Ejemplo n.º 1
0
    def _validate_X_predict(self, X):
        """Validate X whenever one tries to predict, apply, predict_proba"""
        if self.estimators_ is None or len(self.estimators_) == 0:
            raise NotFittedError("Estimator not fitted, "
                                 "call `fit` before exploiting the model.")

        return self.estimators_[0]._validate_X_predict(X, check_input=True)
Ejemplo n.º 2
0
    def predict(self, X):
        """Classify the output for given data

        Parameters
        ----------

        X : list of 2D arrays, element i has shape=[voxels_i, samples_i]
            Each element in the list contains the fMRI data of one subject
            The number of voxels should be according to each subject at
            the moment of training the model.

        Returns
        -------
        p: list of arrays, element i has shape=[samples_i]
            Predictions for each data sample.
        """
        # Check if the model exist
        if hasattr(self, 'w_') is False:
            raise NotFittedError("The model fit has not been run yet.")

        # Check the number of subjects
        if len(X) != len(self.w_):
            raise ValueError("The number of subjects does not match the one"
                             " in the model.")

        X_shared = self.transform(X)
        p = [None] * len(X_shared)
        for subject in range(len(X_shared)):
            sumexp, _, exponents = utils.sumexp_stable(
                self.theta_.T.dot(X_shared[subject]) + self.bias_)
            p[subject] = self.classes_[(exponents /
                                        sumexp[np.newaxis, :]).argmax(axis=0)]

        return p
Ejemplo n.º 3
0
    def transform(self, X):
        """Use the model to transform new data to Shared Response space
        Parameters
        ----------
        X : list of 2D arrays, element i has shape=[voxels_i, timepoints_i]
            Each element in the list contains the fMRI data of one subject.
        Returns
        -------
        r : list of 2D arrays, element i has shape=[features_i, timepoints_i]
            Shared responses from input data (X)
        s : list of 2D arrays, element i has shape=[voxels_i, timepoints_i]
            Individual data obtained from fitting model to input data (X)
        """
        # Check if the model exist
        if hasattr(self, 'w_') is False:
            raise NotFittedError("The model fit has not been run yet.")

        # Check the number of subjects
        if len(X) != len(self.w_):
            raise ValueError("The number of subjects does not match the one"
                             " in the model.")

        r = [None] * len(X)
        s = [None] * len(X)
        for subject in range(len(X)):
            if X[subject] is not None:
                r[subject], s[subject] = self._transform_new_data(X[subject],
                                                                  subject)

        # modified from https://github.com/brainiak/brainiak/blob/ee093597c6c11597b0a59e95b48d2118e40394a5/brainiak/funcalign/rsrm.py#L191
        # to only return the shared response, rather than BOTH the shared responses and the original data
        return r
Ejemplo n.º 4
0
    def _get_support_mask(self):
        if hasattr(self, 'estimator_'):
            if isinstance(self.estimator_, dict):
                estimators = self.estimator_
            else:
                estimators = self.estimator_.estimators_
        else:
            raise NotFittedError('Fit the model before transform')

        # if len(estimators) is already 1, no further feature selection reasonable
        if self.criterion is None or len(estimators) == 1:
            if len(estimators) == 1:
                warn('Skipping ROI feature selection, because otherwise no ROI would be left.')
            return list(estimators.keys())
        else:
            scores = dict()
            for roi_id, estimator in estimators.items():
                scores[roi_id] = np.mean(_get_feature_importances(estimator))

            scores_sorted = sorted(scores.items(), key=lambda x: x[1], reverse=True)

            if self.criterion < 1:  # proportion
                return [x[0] for x in scores_sorted[:max(1, round(self.criterion * len(scores)))]]
            else:
                return [x[0] for x in scores_sorted[:self.criterion]]
Ejemplo n.º 5
0
    def transform(self, X):
        """Transform data _X according to the fitted model.
        Parameters
        ----------
        X : array-like or sparse matrix, shape=(n_samples, n_features)
            Document word matrix.
        Returns
        -------
        doc_topic_distr : shape=(n_samples, n_topics)
            Document topic distribution for _X.
        """

        if not hasattr(self, 'components_'):
            raise NotFittedError("no 'components_' attribute in model."
                                 " Please fit model first.")

        # make sure feature size is the same in fitted model and in _X
        X = self._check_non_neg_array(X, "LatentDirichletAllocation.transform")
        n_samples, n_features = X.shape
        if n_features != self.components_.shape[1]:
            raise ValueError("The provided data has %d dimensions while "
                             "the model was trained with feature size %d." %
                             (n_features, self.components_.shape[1]))

        doc_topic_distr, _ = self._e_step(X,
                                          cal_sstats=False,
                                          random_init=False)
        # normalize doc_topic_distr
        doc_topic_distr /= doc_topic_distr.sum(axis=1)[:, np.newaxis]
        return doc_topic_distr
Ejemplo n.º 6
0
    def decision_function(self, X):
        if self.is_fitted == False:
            raise NotFittedError(
                "This EasyMKL instance is not fitted yet. Call 'fit' with appropriate arguments before using this method."
            )

        if self.kernel == 'precomputed':
            K = check_KL_Y(X, self.Y)
        else:
            X = check_array(X,
                            accept_sparse='csr',
                            dtype=np.float64,
                            order="C")
            if X.shape[1] != self.n_f:
                raise ValueError("The number of feature in X not correspond")
                #K = self.K.set_test(X)
            K = kernel_list(self.X, X, self.K)

        if self.multiclass_ == True:
            return self.cls.decision_function(X)

        YY = matrix(np.diag(list(matrix(self.Y))))
        ker_matrix = matrix(summation(K, self.weights))
        z = ker_matrix * YY * self.gamma
        z = z - self.bias
        return np.array(list(z))
Ejemplo n.º 7
0
    def predict_proba(self, X):
        """
        Returns the posterior probabilities of each class for data X.

        Attributes
        ---
        X : array of shape [n_samples, n_features]
            the transformed input data

        Raises
        ---
        NotFittedError :
            when the model has not yet been fit for this transformation
        """
        if not self.is_fitted():
            msg = ("This %(name)s instance is not fitted yet. Call 'fit' with "
                   "appropriate arguments before using this transformer.")
            raise NotFittedError(msg % {"name": type(self).__name__})

        X = check_array(X)
        votes_per_example = self.knn.predict_proba(X)

        if len(self.missing_label_indices) > 0:
            for i in self.missing_label_indices:
                new_col = np.zeros(votes_per_example.shape[0])
                votes_per_example = np.insert(votes_per_example,
                                              i,
                                              new_col,
                                              axis=1)

        return votes_per_example
Ejemplo n.º 8
0
    def transform(self, X):
        """Use the model to transform new data to Shared Response space

        Parameters
        ----------

        X : list of 2D arrays, element i has shape=[voxels_i, timepoints_i]
            Each element in the list contains the fMRI data of one subject.

        Returns
        -------

        r : list of 2D arrays, element i has shape=[features_i, timepoints_i]
            Shared responses from input data (X)

        s : list of 2D arrays, element i has shape=[voxels_i, timepoints_i]
            Individual data obtained from fitting model to input data (X)
        """
        # Check if the model exist
        if hasattr(self, 'w_') is False:
            raise NotFittedError("The model fit has not been run yet.")

        # Check the number of subjects
        if len(X) != len(self.w_):
            raise ValueError("The number of subjects does not match the one"
                             " in the model.")

        r = [None] * len(X)
        s = [None] * len(X)
        for subject in range(len(X)):
            if X[subject] is not None:
                r[subject], s[subject] = self._transform_new_data(X[subject],
                                                                  subject)

        return r, s
Ejemplo n.º 9
0
    def predict(self, X, transformer_ids=None):
        """
        Predicts the most likely class per input example.

        Uses the predict_proba method to get the mean vote per id.
        Returns the class with the highest vote.

        Parameters:
        -----------
        X : ndarray
            Input data matrix.

        transformer_ids : list, default=None
            A list with all transformer ids. Defaults to None if no transformer ids
            are given.

        Returns:
        -----------
        The class with the highest vote based on the argmax of the votes as an int.
        """
        if not self.is_fitted():
            msg = ("This %(name)s instance is not fitted yet. Call 'fit' with "
                   "appropriate arguments before using this decider.")
            raise NotFittedError(msg % {"name": type(self).__name__})

        vote_overall = self.predict_proba(X, transformer_ids=transformer_ids)
        return self.classes[np.argmax(vote_overall, axis=1)]
Ejemplo n.º 10
0
    def transform_subject(self, X):
        """Transform a new subject using the existing model

        Parameters
        ----------

        X : 2D array, shape=[voxels, timepoints]
            The fMRI data of the new subject.

        Returns
        -------

        w : 2D array, shape=[voxels, features]
            Orthogonal mapping `W_{new}` for new subject

        s : 2D array, shape=[voxels, timepoints]
            Individual term `S_{new}` for new subject
        """
        # Check if the model exist
        if hasattr(self, 'w_') is False:
            raise NotFittedError("The model fit has not been run yet.")

        # Check the number of TRs in the subject
        if X.shape[1] != self.r_.shape[1]:
            raise ValueError("The number of timepoints(TRs) does not match the"
                             "one in the model.")

        s = np.zeros_like(X)
        for i in range(self.n_iter):
            w = self._update_transform_subject(X, s, self.r_)
            s = self._shrink(X - w.dot(self.r_), self.lam)

        return w, s
Ejemplo n.º 11
0
    def predict(self, X):
        """Perform classification on samples in X.
        
        Parameters
        ----------
        X : array-like, shape = [n_samples, n_features]
            Matrix containing new samples
        
        Returns
        -------
        y_pred : array, shape = [n_samples]
            The value of prediction for each sample
        """

        if self.is_fitted == False:
            raise NotFittedError(
                "This KOMD instance is not fitted yet. Call 'fit' with appropriate arguments before using this method."
            )
        X = check_array(X, accept_sparse='csr', dtype=np.float64, order="C")
        if self.multiclass_ == True:
            return self.cls.predict(X)

        return np.array([
            self.classes_[1] if p >= 0 else self.classes_[0]
            for p in self.decision_function(X)
        ])
Ejemplo n.º 12
0
    def vote(self, X):
        """
        Returns the posterior probabilities of each class for data X.

        Attributes
        ---
        X : array of shape [n_samples, n_features]
            the transformed input data

        Raises
        ---
        NotFittedError :
            when the model has not yet been fit for this transformation
        """
        if not self.is_fitted():
            msg = (
                "This %(name)s instance is not fitted yet. Call 'fit' with "
                "appropriate arguments before using this voter."
            )
            raise NotFittedError(msg % {"name": type(self).__name__})

        votes_per_example = []
        for x in X:
            if x in list(self.leaf_to_posterior.keys()):
                votes_per_example.append(self.leaf_to_posterior[x])
            else:
                votes_per_example.append(self.uniform_posterior)
        return np.array(votes_per_example)
Ejemplo n.º 13
0
 def decision_function(self, X):
     """Distance of the samples in X to the separating hyperplane.
     
     Parameters
     ----------
     X : array-like, shape = [n_samples, n_features]
     
     Returns
     -------
     Z : array-like, shape = [n_samples, 1]
         Returns the decision function of the samples.
     """
     
     if self.is_fitted == False:
         raise NotFittedError("This KOMD instance is not fitted yet. Call 'fit' with appropriate arguments before using this method.")
     X = check_array(X, accept_sparse='csr', dtype=np.float64, order="C")
     
     if self.multiclass_ == True:
         return self.cls.decision_function(X)
     
     Kf = self.__kernel_definition__()
     YY = matrix(np.diag(list(matrix(self.Y))))
     ker_matrix = matrix(Kf(X,self.X).astype(np.double))
     z = ker_matrix*YY*self.gamma
     z = z-self.bias
     return np.array(list(z))
Ejemplo n.º 14
0
    def predict_wp(self, plays):
        """Estimate the win probability for a set of plays.

        Basically a simple wrapper around ``WPModel.model.predict_proba``,
        takes in a DataFrame and then spits out an array of predicted
        win probabilities.

        Parameters
        ----------
        plays : Pandas DataFrame
            The input data to use to make the predictions.

        Returns
        -------
        Numpy array, of length ``len(plays)``
            Predicted probability that the offensive team in each play
            will go on to win the game.

        Raises
        ------
        NotFittedError
            If the model hasn't been fit.
        """
        if self.training_seasons is None:
            raise NotFittedError("Must fit model before predicting WP.")

        return self.model.predict_proba(plays)[:, 1]
Ejemplo n.º 15
0
    def plot_validation(self, axis=None, **kwargs):
        """Plot the validation data.

        Parameters
        ----------
        axis : matplotlib.pyplot.axis object or ``None`` (default=``None``)
            If provided, the validation line will be overlaid on ``axis``.
            Otherwise, a new figure and axis will be generated and plotted on.
        **kwargs
            Arguments to ``axis.plot``.

        Returns
        -------
        matplotlib.pylot.axis
            The axis the plot was made on.

        Raises
        ------
        NotFittedError
            If the model hasn't been fit **and** validated.
        """

        if self.sample_probabilities is None:
            raise NotFittedError("Must validate model before plotting.")

        import matplotlib.pyplot as plt
        if axis is None:
            axis = plt.figure().add_subplot(111)
            axis.plot([0, 100], [0, 100], ls="--", lw=2, color="black")
            axis.set_xlabel("Predicted WP")
            axis.set_ylabel("Actual WP")
        axis.plot(self.sample_probabilities, self.predicted_win_percents,
                  **kwargs)

        return axis
Ejemplo n.º 16
0
    def transform(self, X, y=None):
        """Use the model to transform matrix to Shared Response space

        Parameters
        ----------

        X : list of 2D arrays, element i has shape=[voxels_i, samples_i]
            Each element in the list contains the fMRI data of one subject
            note that number of voxels and samples can vary across subjects.

        y : not used as it only applies the mappings


        Returns
        -------

        s : list of 2D arrays, element i has shape=[features_i, samples_i]
            Shared responses from input data (X)
        """

        # Check if the model exist
        if hasattr(self, 'w_') is False:
            raise NotFittedError("The model fit has not been run yet.")

        # Check the number of subjects
        if len(X) != len(self.w_):
            raise ValueError("The number of subjects does not match the one"
                             " in the model.")

        s = [None] * len(X)
        for subject in range(len(X)):
            s[subject] = self.w_[subject].T.dot(X[subject])

        return s
Ejemplo n.º 17
0
    def predict(self, X):
        """The predicted value of an input sample is a vote by the RGFRegressor.

        Parameters
        ----------
        X : array-like or sparse matrix of shape = [n_samples, n_features]
            The input samples. Internally, it will be converted to
            ``dtype=np.float32`` and if a sparse matrix is provided
            to a sparse ``csr_matrix``.

        Returns
        -------
        y : array of shape = [n_samples]
            The predicted values.
        """
        if not self.fitted:
            raise NotFittedError("Estimator not fitted, "
                                 "call `fit` before exploiting the model.")

        #Store the test set into RGF format
        np.savetxt(os.path.join(loc_temp, "test.data.x"),
                   X,
                   delimiter=' ',
                   fmt="%s")

        #Find latest model location
        model_glob = loc_temp + os.sep + self.file_prefix + "*"
        if not glob(model_glob):
            raise Exception(
                'Model learning result is not found @{0}. This is rgf_python error.'
                .format(loc_temp))
        latest_model_loc = sorted(glob(model_glob), reverse=True)[0]

        #Format test command
        params = []
        params.append("test_x_fn=%s" % os.path.join(loc_temp, "test.data.x"))
        params.append("prediction_fn=%s" %
                      os.path.join(loc_temp, "predictions.txt"))
        params.append("model_fn=%s" % latest_model_loc)
        cmd = "%s predict %s" % (loc_exec, ",".join(params))  # 2>&1

        output = platform_specific_Popen(cmd,
                                         stdout=subprocess.PIPE,
                                         shell=True).communicate()

        if self.verbose:
            for k in output:
                print(k)

        y_pred = np.loadtxt(os.path.join(loc_temp, "predictions.txt"))

        #Clean temp directory
        if self.clean:
            model_glob = loc_temp + os.sep + "*"

            for fn in glob(model_glob):
                if "predictions.txt" in fn or self.prefix in fn or "train.data." in fn or "test.data." in fn:
                    os.remove(fn)
        return y_pred
Ejemplo n.º 18
0
 def _check_fit(self):
     '''
     raise a NotFittedError if the model isn't fit
     '''
     if not self.fitted:
         msg = ("This %(name)s instance is not fitted yet. Call 'fit' with "
                "appropriate arguments before using this estimator.")
         raise NotFittedError(msg % {"name": type(self).__name__})
Ejemplo n.º 19
0
    def predict(self, X, transformer_ids=None):
        if not self.is_fitted():
            msg = ("This %(name)s instance is not fitted yet. Call 'fit' with "
                   "appropriate arguments before using this decider.")
            raise NotFittedError(msg % {"name": type(self).__name__})

        vote_overall = self.predict_proba(X, transformer_ids=transformer_ids)
        return self.classes[np.argmax(vote_overall, axis=1)]
Ejemplo n.º 20
0
    def transform(self, *_):
        assert self.data is not None, NotFittedError(
            'must fit aligner before transforming data')
        for r in self.required:
            assert hasattr(self,
                           r), NotFittedError(f'missing fitted attribute: {r}')

        if self.transformer is None:
            RuntimeWarning(
                'null transform function; returning without fitting alignment model'
            )
            return

        data = trim_and_pad(dw.unstack(self.data))
        required_params = {r: getattr(self, r) for r in self.required}
        return self.transformer(
            data, **dw.core.update_dict(required_params, self.kwargs))
Ejemplo n.º 21
0
    def get_predictions(self, X):
        assert X.ndim == 2, "Classifier prediction data X.ndim is %d instead of 2" %X.ndim

        # get classes
        try:
            return self.linSVC_obj.predict(X)
        except NotFittedError:
            raise NotFittedError("Classification model cannot preidct without being trained first. " \
                                 + "Train the classification model at least once to prevent this error.")
Ejemplo n.º 22
0
 def predict(self, X):
     if not self.is_fitted:
         raise NotFittedError(
             "This KOMD instance is not fitted yet. Call 'fit' with appropriate arguments before using this method."
         )
     KL = process_list(X, self.generator)
     return self.clf.predict(
         KL) if self.multiclass_ else self.estimator.predict(
             self.how_to(KL, self.weights))
Ejemplo n.º 23
0
 def predict(self, X):
     if self.is_fitted == False:
         raise NotFittedError(
             "This EasyMKL instance is not fitted yet. Call 'fit' with appropriate arguments before using this method."
         )
     if self.multiclass_ == True:
         return self.cls.predict(X)
     return np.array([
         self.classes_[1] if p >= 0 else self.classes_[0]
         for p in self.decision_function(X)
     ])
Ejemplo n.º 24
0
    def predict(self, X, transformer_ids=None):
        if not self.is_fitted():
            msg = ("This %(name)s instance is not fitted yet. Call 'fit' with "
                   "appropriate arguments before using this transformer.")
            raise NotFittedError(msg % {"name": type(self).__name__})

        X = check_array(X)

        yhats = self.ensemble_represetations(X)

        return self.knn.predict(yhats)
Ejemplo n.º 25
0
 def _check_test(self,X):
     if self.is_fitted == False:
         raise NotFittedError("This EasyMKL instance is not fitted yet. Call 'fit' with appropriate arguments before using this method.")
     if self.kernel == 'precomputed':
         KL = check_KL_Y(X,self.Y)
     else:
         X = check_array(X, accept_sparse='csr', dtype=np.float64, order="C")
         if X.shape[1] != self.X.shape[1]:#self.n_f:
             raise ValueError("The number of feature in X not correspond")
         KL = self.gen(self.X,X).make_a_list(self.n_kernels).to_array()
     return KL
Ejemplo n.º 26
0
 def check_voter_fit_(self):
     '''
     raise a NotFittedError if the voter isn't fit
     '''
     if not self.voter_fitted_:
             msg = (
                     "This %(name)s instance's voter is not fitted yet. "
                     "Call 'fit_voter' or 'fit' with appropriate arguments "
                     "before using this estimator."
             )
             raise NotFittedError(msg % {"name": type(self).__name__})
Ejemplo n.º 27
0
    def vote(self, X):
        """
        Doc strings here.
        """
        if not self.is_fitted():
            msg = ("This %(name)s instance is not fitted yet. Call 'fit' with "
                   "appropriate arguments before using this transformer.")
            raise NotFittedError(msg % {"name": type(self).__name__})

        X = check_array(X)
        return self.voter.predict(X)
Ejemplo n.º 28
0
    def _predict(self, comparison_vectors, return_type):

        try:
            prediction = self.classifier.predict(
                comparison_vectors.as_matrix())
        except NotFittedError:
            raise NotFittedError(
                "This {} is not fitted yet. Call 'learn' with appropriate "
                "arguments before using this method.".format(
                    type(self).__name__))

        return self._return_result(prediction, return_type, comparison_vectors)
Ejemplo n.º 29
0
 def decision_function(self, X):
     if self.is_fitted == False:
         raise NotFittedError(
             "This KOMD instance is not fitted yet. Call 'fit' with appropriate arguments before using this method."
         )
     if self.multiclass_:
         raise ValueError(
             'Scores are not available for multiclass problems, use predict'
         )
     KL = process_list(
         X, self.generator)  # X can be a samples matrix or Kernel List
     return self.estimator.decision_function(self.how_to(KL, self.weights))
Ejemplo n.º 30
0
 def get_sparse_features(self, whitened_patches):
     # assert correct dimensionality of input data
     if whitened_patches.ndim == 3:
         whitened_patches = whitened_patches.reshape(
             (whitened_patches.shape[0], -1))
     assert whitened_patches.ndim == 2, "Whitened patches ndim is %d instead of 2" % whitened_patches.ndim
     try:
         sparse_code = self.DL_obj.transform(whitened_patches)
     except NotFittedError:
         raise NotFittedError("Feature extraction dictionary has not yet been learnt for this model, " \
                              + "therefore Sparse Codes cannot be extracted. Train the feature extraction model " \
                              + "at least once to prevent this error.")
     return sparse_code