예제 #1
0
    def _validate_X_predict(self, X, check_input):
        """Validate X whenever one tries to predict, apply, predict_proba"""
        if self.tree_ is None:
            raise NotFittedError("Estimator not fitted, "
                                 "call `fit` before exploiting the model.")

        if check_input:
            X = check_array(X, dtype=DTYPE, accept_sparse="csr")
            if issparse(X) and (X.indices.dtype != np.intc or
                                X.indptr.dtype != np.intc):
                raise ValueError("No support for np.int64 index based "
                                 "sparse matrices")

        n_features = X.shape[1]
        if self.n_features_ != n_features:
            raise ValueError("Number of features of the model must "
                             "match the input. Model n_features is %s and "
                             "input n_features is %s "
                             % (self.n_features_, n_features))

        return X
예제 #2
0
    def feature_names(self) -> np.ndarray:
        """Depending on the encoder chosen retrieves the feature names
        :return:
        """
        if self.encoder is None:
            raise NotFittedError(
                f'Estimator is not yet fitted. Call "fit" or "fit_transform" methods first.'
            )

        if isinstance(self.encoder, (LabelBinarizer, MultiLabelBinarizer)):
            fnames = self.encoder.classes_
        elif isinstance(self.encoder, CountVectorizer):
            fnames = self.encoder.get_feature_names()
        elif isinstance(self.encoder, OneHotEncoder):
            fnames = self.encoder.categories_[0]
        elif isinstance(self.encoder, FeatureHasher):
            fnames = range(self.encoder.n_features)
        else:
            raise NotImplementedError()

        return np.asarray(fnames)
예제 #3
0
    def predict_meta_features(self, X):
        """ Get meta-features of test-data.

        Parameters
        ----------
        X : numpy array, shape = [n_samples, n_features]
            Test vectors, where n_samples is the number of samples and
            n_features is the number of features.

        Returns
        -------
        meta-features : numpy array, shape = [n_samples, len(self.regressors)]
            meta-features for test data, where n_samples is the number of
            samples in test data and len(self.regressors) is the number
            of regressors.

        """
        if not hasattr(self, 'regr_'):
            raise NotFittedError("Estimator not fitted, "
                                 "call `fit` before exploiting the model.")
        return np.column_stack([r.predict(X) for r in self.regr_])
예제 #4
0
def decision_tree_data_from_pipeline(pipeline_):
    """Return data for a fitted pipeline with  in a restructured format

    Arguments:
        pipeline_ (PipelineBase): A pipeline with a DecisionTree-based estimator.

    Returns:
        OrderedDict: An OrderedDict of OrderedDicts describing a tree structure
    """
    if not pipeline_.model_family == ModelFamily.DECISION_TREE:
        raise ValueError(
            "Tree structure reformatting is only supported for decision tree estimators"
        )
    if not pipeline_._is_fitted:
        raise NotFittedError(
            "The DecisionTree estimator associated with this pipeline is not fitted yet. Call 'fit' "
            "with appropriate arguments before using this estimator.")
    est = pipeline_.estimator._component_obj
    feature_names = pipeline_.input_feature_names[pipeline_.estimator.name]

    return _tree_parse(est, feature_names)
예제 #5
0
    def predict_proba(self, X):
        """ Predict class probabilities for X.

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape = [n_samples, n_features]
            Training vectors, where n_samples is the number of samples and
            n_features is the number of features.

        Returns
        ----------
        avg : array-like, shape = [n_samples, n_classes]
            Weighted average probability for each class per sample.

        """
        if not hasattr(self, 'clfs_'):
            raise NotFittedError("Estimator not fitted, "
                                 "call `fit` before exploiting the model.")

        avg = np.average(self._predict_probas(X), axis=0, weights=self.weights)
        return avg
예제 #6
0
    def predict_proba(self, X):
        """Predict class probabilities for X.

        Parameters
        ----------
        X : array-like of shape = [n_samples, n_features]
            The input samples.

        Returns
        -------
        p : array of shape = [n_samples, n_classes]
            The class probabilities of the input samples. The order of the
            classes corresponds to that in the attribute classes_.
        """
        if not self.estimator_fitted:
            raise NotFittedError(
                'The estimator must be fitted before calling predict_proba().'
            )
        probabilistic_predictions = self.estimator.predict_proba(X)
        probabilistic_predictions = probabilistic_predictions[:, 1]
        return probabilistic_predictions / self.c
예제 #7
0
    def transform(self, docs):
        """
        Return the topic proportions for the documents passed.
        The input `docs` should be in BOW format and can be a list of documents like : [ [(4, 1), (7, 1)], [(9, 1), (13, 1)], [(2, 1), (6, 1)] ]
        or a single document like : [(4, 1), (7, 1)]
        """
        if self.gensim_model is None:
            raise NotFittedError(
                "This model has not been fitted yet. Call 'fit' with appropriate arguments before using this method."
            )

        # The input as array of array
        check = lambda x: [x] if isinstance(x[0], tuple) else x
        docs = check(docs)
        X = [[] for _ in range(0, len(docs))]

        for k, v in enumerate(docs):
            transformed_author = self.gensim_model[v]
            X[k] = transformed_author

        return np.reshape(np.array(X), (len(docs), self.num_topics))
예제 #8
0
파일: IPW.py 프로젝트: yuan596/FeatureEval
    def transform(self, X):
        """
        Perform feature reduction by selecting features within from the IPW.
        
        Parameters
        ----------
        X : pandas dataframe or numpy ndarray
            matrix used as predictors.

        
        Notes
        -----

        
        Returns
        -------
        :return value : numpy ndarray or Dataframe, same as the input
            a nxz matrix, where n is the number of samples in x 
            and z are the number of features, z is based upon the 
            threshold value. The features in the returning matrix will have 
            descending order of measure of relevancy provided by
            the IPW iteration.

        """
        if self.importances is not None:
            self.params = None
            if isinstance(X, pd.DataFrame):  # dataframe
                return X[X.columns[np.argsort(
                    self.significant_variables)[::-1]]]

            elif isinstance(X, np.ndarray):  # numpy array
                return X[:, np.argsort(self.significant_variables)[::-1]]

            else:
                raise TypeError(
                    'X must be a pandas dataframe or numpy ndarray')
        else:
            raise NotFittedError(
                'importances is not defined, use the fit method to define them'
            )
예제 #9
0
    def transform(self, X):
        """
        Compute Betti curves.

        Parameters:
            X (list of 2d arrays): Persistence diagrams.

        Returns:
            `len(X).len(self.grid_)` array of ints: Betti numbers of the given persistence diagrams at the grid points given in `self.grid_`
        """

        if not self.is_fitted():
            raise NotFittedError("Not fitted.")

        if not X:
            X = [np.zeros((0, 2))]

        N = len(X)

        events = np.concatenate([pd.flatten(order="F") for pd in X], axis=0)
        sorting = np.argsort(events)
        offsets = np.zeros(1 + N, dtype=int)
        for i in range(0, N):
            offsets[i + 1] = offsets[i] + 2 * X[i].shape[0]
        starts = offsets[0:N]
        ends = offsets[1:N + 1] - 1

        bettis = [[0] for i in range(0, N)]

        i = 0
        for x in self.grid_:
            while i < len(sorting) and events[sorting[i]] <= x:
                j = np.searchsorted(ends, sorting[i])
                delta = 1 if sorting[i] - starts[j] < len(X[j]) else -1
                bettis[j][-1] += delta
                i += 1
            for k in range(0, N):
                bettis[k].append(bettis[k][-1])

        return np.array(bettis, dtype=int)[:, 0:-1]
예제 #10
0
def check_is_fitted(estimator, attributes, msg=None, all_or_any=all):
    """Perform is_fitted validation for estimator.
    Checks if the estimator is fitted by verifying the presence of
    "all_or_any" of the passed attributes and raises a NotFittedError with the
    given message.
    Parameters
    ----------
    estimator : estimator instance.
        estimator instance for which the check is performed.
    attributes : attribute name(s) given as string or a list/tuple of strings
        Eg.:
            ``["coef_", "estimator_", ...], "coef_"``
    msg : string
        The default error message is, "This %(name)s instance is not fitted
        yet. Call 'fit' with appropriate arguments before using this method."
        For custom messages if "%(name)s" is present in the message string,
        it is substituted for the estimator name.
        Eg. : "Estimator, %(name)s, must be fitted before sparsifying".
    all_or_any : callable, {all, any}, default all
        Specify whether all or any of the given attributes must exist.
    Returns
    -------
    None
    Raises
    ------
    NotFittedError
        If the attributes are not found.
    """
    if msg is None:
        msg = ("This %(name)s instance is not fitted yet. Call 'fit' with "
               "appropriate arguments before using this method.")

    if not hasattr(estimator, 'fit'):
        raise TypeError("%s is not an estimator instance." % (estimator))

    if not isinstance(attributes, (list, tuple)):
        attributes = [attributes]

    if not all_or_any([hasattr(estimator, attr) for attr in attributes]):
        raise NotFittedError(msg % {'name': type(estimator).__name__})
def predict_segmenter(features, clf):
    """Segmentation of images using a pretrained classifier.

    Parameters
    ----------
    features : ndarray
        Array of features, with the last dimension corresponding to the number
        of features, and the other dimensions are compatible with the shape of
        the image to segment, or a flattened image.
    clf : classifier object
        trained classifier object, exposing a ``predict`` method as in
        scikit-learn's API, for example an instance of
        ``RandomForestClassifier`` or ``LogisticRegression`` classifier. The
        classifier must be already trained, for example with
        :func:`skimage.segmentation.fit_segmenter`.

    Returns
    -------
    output : ndarray
        Labeled array, built from the prediction of the classifier.
    """
    sh = features.shape
    if features.ndim > 2:
        features = features.reshape((-1, sh[-1]))

    try:
        predicted_labels = clf.predict(features)
    except NotFittedError:
        raise NotFittedError("You must train the classifier `clf` first"
                             "for example with the `fit_segmenter` function.")
    except ValueError as err:
        if err.args and 'x must consist of vectors of length' in err.args[0]:
            raise ValueError(
                err.args[0] + '\n' +
                "Maybe you did not use the same type of features for training the classifier."
            )
        else:
            raise err
    output = predicted_labels.reshape(sh[:-1])
    return output
예제 #12
0
    def predict_proba(self, X):
        if not self.fitted_:
            raise NotFittedError("Estimator not fitted, "
                                 "call `fit` before exploiting the model.")

        if isspmatrix(X):
            _sparse_savetxt(os.path.join(loc_temp, "test.data.x"), X)
        else:
            np.savetxt(os.path.join(loc_temp, "test.data.x"),
                       X,
                       delimiter=' ',
                       fmt="%s")

        # Find latest model location
        model_glob = loc_temp + os.sep + self._file_prefix + "*"
        if not glob(model_glob):
            raise Exception('Model learning result is not found in {0}. '
                            'This is rgf_python error.'.format(loc_temp))
        latest_model_loc = sorted(glob(model_glob), reverse=True)[0]

        # Format test command
        params = []
        params.append("test_x_fn=%s" % os.path.join(loc_temp, "test.data.x"))
        params.append("prediction_fn=%s" %
                      os.path.join(loc_temp, "predictions.txt"))
        params.append("model_fn=%s" % latest_model_loc)

        cmd = (loc_exec, "predict", ",".join(params))

        output = subprocess.Popen(cmd,
                                  stdout=subprocess.PIPE,
                                  stderr=subprocess.STDOUT,
                                  universal_newlines=True).communicate()

        if self.verbose:
            for k in output:
                print(k)

        y_pred = np.loadtxt(os.path.join(loc_temp, "predictions.txt"))
        return y_pred
예제 #13
0
    def plot_qq(self, path: str=None, dim: tuple=(12, 8)):
        """Display a Q-Q plot from the standardized prediction residuals.

        Parameters
        ----------
        path : str, optional
            Path to store the figure.
        dim : tuple, optional
            Tuple with width and length of the plot.
        """

        if self.qq is None:
            msg = ("This {} instance is not fitted yet. Call 'fit' with "
                   "appropriate arguments before using this method.")

            raise NotFittedError(msg.format(self.__class__.__name__))

        with plt.style.context("seaborn-whitegrid"):

            fig, ax = plt.subplots(figsize=dim)

            x = self.qq["quantiles"]
            y = self.qq["residuals"]

            ax.plot(x, x, ls="--", label="perfect model", color="darkorange", linewidth=3)
            ax.plot(x, y, label="current model", color="cornflowerblue", linewidth=3)

            ax.set_xlabel("Theoretical quantiles", fontsize=15)
            ax.set_xticks(range(int(np.floor(min(x))), int(np.ceil(max(x[x < float("inf")])))+1, 1))

            ax.set_ylabel("Standardized residuals", fontsize=15)
            ax.set_yticks(range(int(np.floor(min(y))), int(np.ceil(max(y[x < float("inf")])))+1, 1))

            ax.legend(loc="best")
            ax.set_title("Q-Q plot", fontsize=20)

            if path:
                plt.savefig(path, format="png", dpi=300, bbox_inches="tight")

        plt.show()
예제 #14
0
파일: utils.py 프로젝트: vmarkovtsev/rgf
    def predict_proba(self, X):
        """
        Predict class probabilities for X.

        The predicted class probabilities of an input sample are computed.

        Parameters
        ----------
        X : array-like or sparse matrix of shape = [n_samples, n_features]
            The input samples.

        Returns
        -------
        p : array of shape = [n_samples, n_classes].
            The class probabilities of the input samples.
            The order of the classes corresponds to that in the attribute classes_.
        """
        if self._fitted is None:
            raise NotFittedError(NOT_FITTED_ERROR_DESC)
        X = check_array(X, accept_sparse=True)
        self._check_n_features(X.shape[1])

        if self._n_classes == 2:
            y = self._estimators[0].predict(X)
            y = sigmoid(y)
            y = np.c_[y, 1 - y]
        else:
            y = np.zeros((X.shape[0], self._n_classes))
            for i, clf in enumerate(self._estimators):
                class_proba = clf.predict(X)
                y[:, i] = class_proba

            if self.calc_prob == "sigmoid":
                y = sigmoid(y)
                normalizer = np.sum(y, axis=1)[:, np.newaxis]
                normalizer[normalizer == 0.0] = 1.0
                y /= normalizer
            else:
                y = softmax(y)
        return y
예제 #15
0
    def identify_low_importance(self, cumulative_importance):
        """
        Finds the lowest importance features not needed to account for `cumulative_importance` 
        of the feature importance from the gradient boosting machine. As an example, if cumulative
        importance is set to 0.95, this will retain only the most important features needed to 
        reach 95% of the total feature importance. The identified features are those not needed.

        Parameters
        --------
        cumulative_importance : float between 0 and 1
            The fraction of cumulative importance to account for 

        """

        self.cumulative_importance = cumulative_importance

        # The feature importances need to be calculated before running
        if self.feature_importances is None:
            raise NotFittedError(
                'Feature importances have not yet been determined. Call the `identify_zero_importance` method` first.'
            )

        # Make sure most important features are on top
        self.feature_importances = self.feature_importances.sort_values(
            'cumulative_importance')

        # Identify the features not needed to reach the cumulative_importance
        record_low_importance = self.feature_importances[
            self.feature_importances['cumulative_importance'] >
            cumulative_importance]

        to_drop = list(record_low_importance['feature'])

        self.record_low_importance = record_low_importance
        self.removal_ops['low_importance'] = to_drop

        print(
            '%d features that do not contribute to cumulative importance of %0.2f.\n'
            % (len(self.removal_ops['low_importance']),
               self.cumulative_importance))
예제 #16
0
    def inverse_transform(
        self,
        y: SUPPORTED_TARGET_TYPES,
    ) -> np.ndarray:
        """
        Revert any encoding transformation done on a target array

        Args:
            y (Union[np.ndarray, pd.DataFrame, pd.Series]):
                Target array to be transformed back to original form before encoding
        Returns:
            np.ndarray:
                The transformed array
        """
        if not self._is_fitted:
            raise NotFittedError(
                "Cannot call inverse_transform on a validator that is not fitted"
            )

        if self.encoder is None:
            return y
        shape = np.shape(y)
        if len(shape) > 1:
            y = self.encoder.inverse_transform(y)
        else:
            # The targets should be a flattened array, hence reshape with -1
            if hasattr(y, 'iloc'):
                y = cast(pd.DataFrame, y)
                y = self.encoder.inverse_transform(y.to_numpy().reshape(
                    -1, 1)).reshape(-1)
            else:
                y = self.encoder.inverse_transform(np.array(y).reshape(
                    -1, 1)).reshape(-1)

        # Inverse transform returns a numpy array of type object
        # This breaks certain metrics as accuracy, which makes type_of_target be unknown
        # If while fit a dtype was observed, we try to honor that dtype
        if self.dtype is not None:
            y = y.astype(self.dtype)
        return y
예제 #17
0
파일: base.py 프로젝트: pi19404/skflow
    def save(self, path):
        """Saves checkpoints and graph to given path.

        Args:
            path: Folder to save model to.
        """
        if not self._initialized:
            raise NotFittedError()

        # Currently Saver requires absolute path to work correctly.
        path = os.path.abspath(path)

        if not os.path.exists(path):
            os.makedirs(path)
        if not os.path.isdir(path):
            raise ValueError("Path %s should be a directory to save"
                             "checkpoints and graph." % path)
        with open(os.path.join(path, 'model.def'), 'w') as fmodel:
            all_params = self.get_params()
            params = {}
            for key, value in all_params.items():
                if not callable(value) and value is not None:
                    params[key] = value
            params['class_name'] = type(self).__name__
            fmodel.write(
                json.dumps(params,
                           default=lambda o: o.__dict__
                           if hasattr(o, '__dict__') else None))
        with open(os.path.join(path, 'endpoints'), 'w') as foutputs:
            foutputs.write(
                '%s\n%s\n%s\n%s' %
                (self._inp.name, self._out.name, self._model_predictions.name,
                 self._model_loss.name))
        with open(os.path.join(path, 'graph.pbtxt'), 'w') as fgraph:
            fgraph.write(str(self._graph.as_graph_def()))
        with open(os.path.join(path, 'saver.pbtxt'), 'w') as fsaver:
            fsaver.write(str(self._saver.as_saver_def()))
        self._saver.save(self._session,
                         os.path.join(path, 'model'),
                         global_step=self._global_step)
예제 #18
0
    def predict(self, X, **kwargs):
        """Returns predictions for the given test data.

        Arguments:
            X: array-like, shape `(n_samples, n_features)`
                Test samples where `n_samples` is the number of samples
                and `n_features` is the number of features.
            **kwargs: dictionary arguments
                Legal arguments are the arguments of `self.model_.predict`.

        Returns:
            preds: array-like, shape `(n_samples,)`
                Predictions.
        """
        # check if fitted
        if not self.is_fitted_:
            raise NotFittedError(
                "Estimator needs to be fit before `predict` " "can be called"
            )

        # basic input checks
        X = check_array(X, allow_nd=True, dtype=["float64", "int"])

        # pre process X
        X, _ = self._pre_process_X(X)

        # filter kwargs and get attributes for predict
        kwargs = self._filter_params(
            self.model_.predict, params_to_check=kwargs
        )
        predict_args = self._filter_params(self.model_.predict)

        # predict with Keras model
        pred_args = {**predict_args, **kwargs}
        y_pred = self.model_.predict(X, **pred_args)

        # post process y
        y, _ = self._post_process_y(y_pred)
        return y
예제 #19
0
    def plot_roc_curve(self, path: str=None, dim: tuple=(12, 8)):
        """Plot ROC curve of the model.

        Parameters
        ----------
        path : str, optional
            Path to store the figure.
        dim : tuple, optional
            Tuple with width and length of the plot.
        """

        if self.roc_curve is None:
            msg = ("This {} instance is not fitted yet. Call 'fit' with "
                   "appropriate arguments before using this method.")

            raise NotFittedError(msg.format(self.__class__.__name__))

        auc = float(self.scalar_metrics.loc["AUC"])

        with plt.style.context("seaborn-whitegrid"):

            fig, ax = plt.subplots(figsize=dim)

            ax.plot(self.roc_curve["fpr"],
                    self.roc_curve["tpr"],
                    color="cornflowerblue", linewidth=3,
                    label="ROC curve (area = {s:.3})".format(s=auc))

            ax.plot([0, 1], [0, 1], color="darkorange", linewidth=3,
                    linestyle="--")
            ax.set_xlabel("False Positive Rate", fontsize=15)
            ax.set_ylabel("True Positive Rate", fontsize=15)
            ax.legend(loc="lower right")
            ax.set_title("ROC curve", fontsize=20)

            if path:
                plt.savefig(path, format="png", dpi=300, bbox_inches="tight")

        plt.show()
    def predict(self, X):
        if not hasattr(self, 'estimators_'):
            raise NotFittedError("Must fit clusters before predicting.")

        # this returns -1 if any of the values squared are too large
        # models with numerical instability will fail.
        clusters = self.clusterer_.predict(X)

        y_tmp = []
        idx = []
        for c, est in self.estimators_.items():
            mask = clusters == c
            if mask.any():
                idx.append(np.flatnonzero(mask))
                y_tmp.append(est.predict(X[safe_mask(X, mask)]))

        y_tmp = np.concatenate(y_tmp)
        idx = np.concatenate(idx)
        y = np.full([X.shape[0], y_tmp.shape[1]], np.nan)
        y[idx] = y_tmp

        return y
예제 #21
0
    def predict_probabilites(self, X):

        if not self._session:
            raise NotFittedError("This %s instance is not fitted yet" %
                                 self.__class__.__name__)

        h_np, b_np, h_lenseq, b_lenseq = X['h_np'], X['b_np'], X[
            'h_seqlen'], X['b_seqlen']
        h_np, b_np, h_lenseq, b_lenseq = self.get_truncted_data(
            h_np, b_np, h_lenseq, b_lenseq)
        if h_np.shape[1] < self.h_max_length or b_np.shape[
                1] < self.b_max_length:
            h_np, b_np = self.pad_data(h_np, b_np)

        with self._session.as_default() as sess:
            return self._probabilites.eval(
                feed_dict={
                    self._X_head: h_np,
                    self._X_body: b_np,
                    self._X_head_length: h_lenseq,
                    self._X_body_length: b_lenseq
                })
예제 #22
0
def _insert_error_scores(results, error_score):
    """Insert error in `results` by replacing them inplace with `error_score`.
    This only applies to multimetric scores because `_fit_and_score` will
    handle the single metric case.
    """
    successful_score = None
    failed_indices = []
    for i, result in enumerate(results):
        if result["fit_failed"]:
            failed_indices.append(i)
        elif successful_score is None:
            successful_score = result["test_scores"]

    if successful_score is None:
        raise NotFittedError("All estimators failed to fit")

    if isinstance(successful_score, dict):
        formatted_error = {name: error_score for name in successful_score}
        for i in failed_indices:
            results[i]["test_scores"] = formatted_error.copy()
            if "train_scores" in results[i]:
                results[i]["train_scores"] = formatted_error.copy()
예제 #23
0
    def _compute_output(self, X):
        """Get the outputs of the network, for use in prediction methods."""

        if not self._is_fitted:
            raise NotFittedError("Call fit before prediction")

        X = self._check_X(X)

        # Make predictions in batches.
        pred_batches = []
        start_idx = 0
        n_examples = X.shape[0]
        with self.graph_.as_default():
            while start_idx < n_examples:
                X_batch = \
                    X[start_idx:min(start_idx + self.batch_size, n_examples)]
                feed_dict = self._make_feed_dict(X_batch)
                start_idx += self.batch_size
                pred_batches.append(
                    self._session.run(self.output_layer_, feed_dict=feed_dict))
        y_pred = np.concatenate(pred_batches)
        return y_pred
예제 #24
0
    def _fit(self, x, y, verbose=False, load=False):
        """
        Args:
            y: Nx1 ndarray observed value.
            x: NxD ndarry features.

        Returns:

        """
        x, y = Module.validate(x, y)

        l_x, l_y = np.log(x + self.eps), np.log(y + self.eps)
        y_zero_one = (y > 0.0).astype(int)

        if y_zero_one.max() == y_zero_one.min():
            raise NotFittedError(
                "Logistic model couldn't fit, because the number of classes is <2"
            )

        self.log_reg.fit(x, y_zero_one)
        sample_weight = self.log_reg.predict_proba(x)[:, 1]

        # Linear regression under log mode.
        self.linear_reg.fit(X=l_x, y=l_y, sample_weight=sample_weight)
        self.fitted = self.linear_reg.predict(l_x)
        self.residual = (self.fitted - l_y)

        # Grid fit for bandwidth.
        if load is False:

            param = grid_fit_kde(self.residual)
            self.kde = KernelDensity(bandwidth=param["bandwidth"])
            self.kde.fit(self.residual)

        else:
            self.kde = pickle.load(open("all_kde.kd", "rb"))
        self.fitted = True
        #logger.debug("KDE bandwidth %s"%self.kde.bandwidth)
        return self
예제 #25
0
    def predict(self, X):
        """
        Predicts the targets using the trained ELM regressor.

        Parameters
        ----------
        X : {ndarray, sparse matrix} of shape (n_samples, n_features)

        Returns
        -------
        y : {ndarray, sparse matrix} of shape (n_samples,) or (n_samples, n_targets)
            The predicted targets
        """
        if self._input_to_node is None or self._regressor is None:
            raise NotFittedError(self)

        hidden_layer_state = self._input_to_node.transform(X)
        hidden_layer_state = self._node_to_node.transform(hidden_layer_state)

        return ((self._node_to_node._y_pred[:-1, :]) -
                self.node_to_node.teacher_shift
                ) / self.node_to_node.teacher_scaling
예제 #26
0
    def transform(self, docs):
        """
        Return the vector representations for the input documents.
        The input `docs` should be a list of lists like : [ ['calculus', 'mathematical'], ['geometry', 'operations', 'curves'] ]
        or a single document like : ['calculus', 'mathematical']
        """
        if self.gensim_model is None:
            raise NotFittedError(
                "This model has not been fitted yet. Call 'fit' with appropriate arguments before using this method."
            )

        # The input as array of array
        check = lambda x: [x] if isinstance(x[0], string_types) else x
        docs = check(docs)
        X = [[] for _ in range(0, len(docs))]

        for k, v in enumerate(docs):
            doc_vec = self.gensim_model.infer_vector(v)
            X[k] = doc_vec

        return np.reshape(np.array(X),
                          (len(docs), self.gensim_model.vector_size))
예제 #27
0
    def transform(self, X, y=None):
        """Encode data with the autoencoder.

        Parameters
        ----------
        X : numpy array or sparse matrix of shape [n_samples, n_features]
            Data to encode

        Returns
        -------
        numpy array of shape [n_samples, hidden_units[-1]]
            Encoded data.
        """

        if not self._is_fitted:
            raise NotFittedError("Call fit before transform!")

        # For sparse input, make the input a CSR matrix since it can be
        # indexed by row.
        X = check_array(X, accept_sparse=['csr'])

        # Check input data against internal data.
        # Raises an error on failure.
        self._check_data(X)

        # Make predictions in batches.
        pred_batches = []
        start_idx = 0
        n_examples = X.shape[0]
        with self.graph_.as_default():
            while start_idx < n_examples:
                X_batch = \
                    X[start_idx:min(start_idx + self.batch_size, n_examples)]
                feed_dict = self._make_feed_dict(X_batch, training=False)
                start_idx += self.batch_size
                pred_batches.append(
                    self._session.run(self._encoded_values,
                                      feed_dict=feed_dict))
        return np.concatenate(pred_batches)
예제 #28
0
    def transform(self, df, **transform_params):
        """
        Label encoding "cols" of "df" using the fitting parameters

        :param df: Dataframe
        :param transform_params:
        :return:
        """
        if not self._is_fitted:
            raise NotFittedError("Fitting was not performed")
        _is_cols_subset_of_df_cols(self.cols, df)

        df = df.copy()
        label_enc_dict = {}
        for col in self.cols:
            label_enc_dict[col] = self.les[col].transform(df[col])

        labelenc_cols = pd.DataFrame(label_enc_dict, index=df.index)

        for col in self.cols:
            df[col] = labelenc_cols[col]
        return df
예제 #29
0
    def transform(self, GRFData):
        """Scale the values of all force components using the previously fitted scaler.

        Parameters:
        GRFData : dictionary containing the data for all force components.
        Input data in the following form:
        'f_v": num_samples x num_dimensions
        'f_ap": num_samples x num_dimensions 
        'f_ml": num_samples x num_dimensions 
        'cop_ap": num_samples x num_dimensions 
        'cop_ml": num_samples x num_dimensions 

        ----------
        Returns:
        transformed_GRFData : dictionary containing the tranformed values for all force components.
        The output data has the same form as the input data.

        ----------
        Raises:
        NotFittedError : If the scaler has not been fitted to data prior to calling this function.

        ValueError: If GRFData is not a dictionary or does not contain values for one of the force components.
        """

        if not self.isFitted:
            raise NotFittedError(
                "The scaler has n ot been fitted to data. Call 'fit()' before calling 'transform()'."
            )

        self.__is_valid_dict(GRFData)
        transformed_GRFData = {}
        for component in self.comp_list:
            len_series = GRFData[component].shape[1]
            transformed_data = self.scaler[component].transform(
                np.reshape(GRFData[component], (-1, 1)))
            transformed_GRFData[component] = np.reshape(
                transformed_data, (-1, len_series))

        return transformed_GRFData
예제 #30
0
파일: dcca.py 프로젝트: mvlearn/mvlearn
    def transform(self, Xs, return_loss=False):
        r"""
        Embeds data matrix(s) using the trained deep networks and fitted CCA
        projection matrices. May be used for out-of-sample embeddings.

        Parameters
        ----------
        Xs : list of array-likes or numpy.ndarray
            - Xs length: n_views
            - Xs[i] shape: (n_samples, n_features_i)
            A list of data matrices from each view to transform based on the
            prior fit function. If view_idx defined, then Xs is a 2D data
            matrix corresponding to a single view.
        return_loss :boolean, default=False
            If ``True``, returns the loss along with the transformed
            data in a tuple.

        Returns
        -------
        Xs_transformed : list of array-likes or array-like
            Transformed samples. Same structure as Xs, but potentially
            different n_features_i.
        loss : float
            Average loss over data, defined as negative correlation of
            transformed views. Only returned if ``return_loss=True``.
        """

        if not self.is_fit:
            raise NotFittedError("Must call fit function before transform")
        Xs = check_Xs(Xs, multiview=True)
        x1 = torch.DoubleTensor(Xs[0])
        x2 = torch.DoubleTensor(Xs[1])

        with torch.no_grad():
            losses, outputs = self._get_outputs(x1, x2)
            outputs = self.linear_cca_.transform(outputs[0], outputs[1])
            if return_loss:
                return outputs, np.mean(losses)
            return outputs