def _validate_X_predict(self, X): """Validate X whenever one tries to predict, apply, predict_proba""" if self.estimators_ is None or len(self.estimators_) == 0: raise NotFittedError("Estimator not fitted, " "call `fit` before exploiting the model.") return self.estimators_[0]._validate_X_predict(X, check_input=True)
def predict(self, X): """Classify the output for given data Parameters ---------- X : list of 2D arrays, element i has shape=[voxels_i, samples_i] Each element in the list contains the fMRI data of one subject The number of voxels should be according to each subject at the moment of training the model. Returns ------- p: list of arrays, element i has shape=[samples_i] Predictions for each data sample. """ # Check if the model exist if hasattr(self, 'w_') is False: raise NotFittedError("The model fit has not been run yet.") # Check the number of subjects if len(X) != len(self.w_): raise ValueError("The number of subjects does not match the one" " in the model.") X_shared = self.transform(X) p = [None] * len(X_shared) for subject in range(len(X_shared)): sumexp, _, exponents = utils.sumexp_stable( self.theta_.T.dot(X_shared[subject]) + self.bias_) p[subject] = self.classes_[(exponents / sumexp[np.newaxis, :]).argmax(axis=0)] return p
def transform(self, X): """Use the model to transform new data to Shared Response space Parameters ---------- X : list of 2D arrays, element i has shape=[voxels_i, timepoints_i] Each element in the list contains the fMRI data of one subject. Returns ------- r : list of 2D arrays, element i has shape=[features_i, timepoints_i] Shared responses from input data (X) s : list of 2D arrays, element i has shape=[voxels_i, timepoints_i] Individual data obtained from fitting model to input data (X) """ # Check if the model exist if hasattr(self, 'w_') is False: raise NotFittedError("The model fit has not been run yet.") # Check the number of subjects if len(X) != len(self.w_): raise ValueError("The number of subjects does not match the one" " in the model.") r = [None] * len(X) s = [None] * len(X) for subject in range(len(X)): if X[subject] is not None: r[subject], s[subject] = self._transform_new_data(X[subject], subject) # modified from https://github.com/brainiak/brainiak/blob/ee093597c6c11597b0a59e95b48d2118e40394a5/brainiak/funcalign/rsrm.py#L191 # to only return the shared response, rather than BOTH the shared responses and the original data return r
def _get_support_mask(self): if hasattr(self, 'estimator_'): if isinstance(self.estimator_, dict): estimators = self.estimator_ else: estimators = self.estimator_.estimators_ else: raise NotFittedError('Fit the model before transform') # if len(estimators) is already 1, no further feature selection reasonable if self.criterion is None or len(estimators) == 1: if len(estimators) == 1: warn('Skipping ROI feature selection, because otherwise no ROI would be left.') return list(estimators.keys()) else: scores = dict() for roi_id, estimator in estimators.items(): scores[roi_id] = np.mean(_get_feature_importances(estimator)) scores_sorted = sorted(scores.items(), key=lambda x: x[1], reverse=True) if self.criterion < 1: # proportion return [x[0] for x in scores_sorted[:max(1, round(self.criterion * len(scores)))]] else: return [x[0] for x in scores_sorted[:self.criterion]]
def transform(self, X): """Transform data _X according to the fitted model. Parameters ---------- X : array-like or sparse matrix, shape=(n_samples, n_features) Document word matrix. Returns ------- doc_topic_distr : shape=(n_samples, n_topics) Document topic distribution for _X. """ if not hasattr(self, 'components_'): raise NotFittedError("no 'components_' attribute in model." " Please fit model first.") # make sure feature size is the same in fitted model and in _X X = self._check_non_neg_array(X, "LatentDirichletAllocation.transform") n_samples, n_features = X.shape if n_features != self.components_.shape[1]: raise ValueError("The provided data has %d dimensions while " "the model was trained with feature size %d." % (n_features, self.components_.shape[1])) doc_topic_distr, _ = self._e_step(X, cal_sstats=False, random_init=False) # normalize doc_topic_distr doc_topic_distr /= doc_topic_distr.sum(axis=1)[:, np.newaxis] return doc_topic_distr
def decision_function(self, X): if self.is_fitted == False: raise NotFittedError( "This EasyMKL instance is not fitted yet. Call 'fit' with appropriate arguments before using this method." ) if self.kernel == 'precomputed': K = check_KL_Y(X, self.Y) else: X = check_array(X, accept_sparse='csr', dtype=np.float64, order="C") if X.shape[1] != self.n_f: raise ValueError("The number of feature in X not correspond") #K = self.K.set_test(X) K = kernel_list(self.X, X, self.K) if self.multiclass_ == True: return self.cls.decision_function(X) YY = matrix(np.diag(list(matrix(self.Y)))) ker_matrix = matrix(summation(K, self.weights)) z = ker_matrix * YY * self.gamma z = z - self.bias return np.array(list(z))
def predict_proba(self, X): """ Returns the posterior probabilities of each class for data X. Attributes --- X : array of shape [n_samples, n_features] the transformed input data Raises --- NotFittedError : when the model has not yet been fit for this transformation """ if not self.is_fitted(): msg = ("This %(name)s instance is not fitted yet. Call 'fit' with " "appropriate arguments before using this transformer.") raise NotFittedError(msg % {"name": type(self).__name__}) X = check_array(X) votes_per_example = self.knn.predict_proba(X) if len(self.missing_label_indices) > 0: for i in self.missing_label_indices: new_col = np.zeros(votes_per_example.shape[0]) votes_per_example = np.insert(votes_per_example, i, new_col, axis=1) return votes_per_example
def transform(self, X): """Use the model to transform new data to Shared Response space Parameters ---------- X : list of 2D arrays, element i has shape=[voxels_i, timepoints_i] Each element in the list contains the fMRI data of one subject. Returns ------- r : list of 2D arrays, element i has shape=[features_i, timepoints_i] Shared responses from input data (X) s : list of 2D arrays, element i has shape=[voxels_i, timepoints_i] Individual data obtained from fitting model to input data (X) """ # Check if the model exist if hasattr(self, 'w_') is False: raise NotFittedError("The model fit has not been run yet.") # Check the number of subjects if len(X) != len(self.w_): raise ValueError("The number of subjects does not match the one" " in the model.") r = [None] * len(X) s = [None] * len(X) for subject in range(len(X)): if X[subject] is not None: r[subject], s[subject] = self._transform_new_data(X[subject], subject) return r, s
def predict(self, X, transformer_ids=None): """ Predicts the most likely class per input example. Uses the predict_proba method to get the mean vote per id. Returns the class with the highest vote. Parameters: ----------- X : ndarray Input data matrix. transformer_ids : list, default=None A list with all transformer ids. Defaults to None if no transformer ids are given. Returns: ----------- The class with the highest vote based on the argmax of the votes as an int. """ if not self.is_fitted(): msg = ("This %(name)s instance is not fitted yet. Call 'fit' with " "appropriate arguments before using this decider.") raise NotFittedError(msg % {"name": type(self).__name__}) vote_overall = self.predict_proba(X, transformer_ids=transformer_ids) return self.classes[np.argmax(vote_overall, axis=1)]
def transform_subject(self, X): """Transform a new subject using the existing model Parameters ---------- X : 2D array, shape=[voxels, timepoints] The fMRI data of the new subject. Returns ------- w : 2D array, shape=[voxels, features] Orthogonal mapping `W_{new}` for new subject s : 2D array, shape=[voxels, timepoints] Individual term `S_{new}` for new subject """ # Check if the model exist if hasattr(self, 'w_') is False: raise NotFittedError("The model fit has not been run yet.") # Check the number of TRs in the subject if X.shape[1] != self.r_.shape[1]: raise ValueError("The number of timepoints(TRs) does not match the" "one in the model.") s = np.zeros_like(X) for i in range(self.n_iter): w = self._update_transform_subject(X, s, self.r_) s = self._shrink(X - w.dot(self.r_), self.lam) return w, s
def predict(self, X): """Perform classification on samples in X. Parameters ---------- X : array-like, shape = [n_samples, n_features] Matrix containing new samples Returns ------- y_pred : array, shape = [n_samples] The value of prediction for each sample """ if self.is_fitted == False: raise NotFittedError( "This KOMD instance is not fitted yet. Call 'fit' with appropriate arguments before using this method." ) X = check_array(X, accept_sparse='csr', dtype=np.float64, order="C") if self.multiclass_ == True: return self.cls.predict(X) return np.array([ self.classes_[1] if p >= 0 else self.classes_[0] for p in self.decision_function(X) ])
def vote(self, X): """ Returns the posterior probabilities of each class for data X. Attributes --- X : array of shape [n_samples, n_features] the transformed input data Raises --- NotFittedError : when the model has not yet been fit for this transformation """ if not self.is_fitted(): msg = ( "This %(name)s instance is not fitted yet. Call 'fit' with " "appropriate arguments before using this voter." ) raise NotFittedError(msg % {"name": type(self).__name__}) votes_per_example = [] for x in X: if x in list(self.leaf_to_posterior.keys()): votes_per_example.append(self.leaf_to_posterior[x]) else: votes_per_example.append(self.uniform_posterior) return np.array(votes_per_example)
def decision_function(self, X): """Distance of the samples in X to the separating hyperplane. Parameters ---------- X : array-like, shape = [n_samples, n_features] Returns ------- Z : array-like, shape = [n_samples, 1] Returns the decision function of the samples. """ if self.is_fitted == False: raise NotFittedError("This KOMD instance is not fitted yet. Call 'fit' with appropriate arguments before using this method.") X = check_array(X, accept_sparse='csr', dtype=np.float64, order="C") if self.multiclass_ == True: return self.cls.decision_function(X) Kf = self.__kernel_definition__() YY = matrix(np.diag(list(matrix(self.Y)))) ker_matrix = matrix(Kf(X,self.X).astype(np.double)) z = ker_matrix*YY*self.gamma z = z-self.bias return np.array(list(z))
def predict_wp(self, plays): """Estimate the win probability for a set of plays. Basically a simple wrapper around ``WPModel.model.predict_proba``, takes in a DataFrame and then spits out an array of predicted win probabilities. Parameters ---------- plays : Pandas DataFrame The input data to use to make the predictions. Returns ------- Numpy array, of length ``len(plays)`` Predicted probability that the offensive team in each play will go on to win the game. Raises ------ NotFittedError If the model hasn't been fit. """ if self.training_seasons is None: raise NotFittedError("Must fit model before predicting WP.") return self.model.predict_proba(plays)[:, 1]
def plot_validation(self, axis=None, **kwargs): """Plot the validation data. Parameters ---------- axis : matplotlib.pyplot.axis object or ``None`` (default=``None``) If provided, the validation line will be overlaid on ``axis``. Otherwise, a new figure and axis will be generated and plotted on. **kwargs Arguments to ``axis.plot``. Returns ------- matplotlib.pylot.axis The axis the plot was made on. Raises ------ NotFittedError If the model hasn't been fit **and** validated. """ if self.sample_probabilities is None: raise NotFittedError("Must validate model before plotting.") import matplotlib.pyplot as plt if axis is None: axis = plt.figure().add_subplot(111) axis.plot([0, 100], [0, 100], ls="--", lw=2, color="black") axis.set_xlabel("Predicted WP") axis.set_ylabel("Actual WP") axis.plot(self.sample_probabilities, self.predicted_win_percents, **kwargs) return axis
def transform(self, X, y=None): """Use the model to transform matrix to Shared Response space Parameters ---------- X : list of 2D arrays, element i has shape=[voxels_i, samples_i] Each element in the list contains the fMRI data of one subject note that number of voxels and samples can vary across subjects. y : not used as it only applies the mappings Returns ------- s : list of 2D arrays, element i has shape=[features_i, samples_i] Shared responses from input data (X) """ # Check if the model exist if hasattr(self, 'w_') is False: raise NotFittedError("The model fit has not been run yet.") # Check the number of subjects if len(X) != len(self.w_): raise ValueError("The number of subjects does not match the one" " in the model.") s = [None] * len(X) for subject in range(len(X)): s[subject] = self.w_[subject].T.dot(X[subject]) return s
def predict(self, X): """The predicted value of an input sample is a vote by the RGFRegressor. Parameters ---------- X : array-like or sparse matrix of shape = [n_samples, n_features] The input samples. Internally, it will be converted to ``dtype=np.float32`` and if a sparse matrix is provided to a sparse ``csr_matrix``. Returns ------- y : array of shape = [n_samples] The predicted values. """ if not self.fitted: raise NotFittedError("Estimator not fitted, " "call `fit` before exploiting the model.") #Store the test set into RGF format np.savetxt(os.path.join(loc_temp, "test.data.x"), X, delimiter=' ', fmt="%s") #Find latest model location model_glob = loc_temp + os.sep + self.file_prefix + "*" if not glob(model_glob): raise Exception( 'Model learning result is not found @{0}. This is rgf_python error.' .format(loc_temp)) latest_model_loc = sorted(glob(model_glob), reverse=True)[0] #Format test command params = [] params.append("test_x_fn=%s" % os.path.join(loc_temp, "test.data.x")) params.append("prediction_fn=%s" % os.path.join(loc_temp, "predictions.txt")) params.append("model_fn=%s" % latest_model_loc) cmd = "%s predict %s" % (loc_exec, ",".join(params)) # 2>&1 output = platform_specific_Popen(cmd, stdout=subprocess.PIPE, shell=True).communicate() if self.verbose: for k in output: print(k) y_pred = np.loadtxt(os.path.join(loc_temp, "predictions.txt")) #Clean temp directory if self.clean: model_glob = loc_temp + os.sep + "*" for fn in glob(model_glob): if "predictions.txt" in fn or self.prefix in fn or "train.data." in fn or "test.data." in fn: os.remove(fn) return y_pred
def _check_fit(self): ''' raise a NotFittedError if the model isn't fit ''' if not self.fitted: msg = ("This %(name)s instance is not fitted yet. Call 'fit' with " "appropriate arguments before using this estimator.") raise NotFittedError(msg % {"name": type(self).__name__})
def predict(self, X, transformer_ids=None): if not self.is_fitted(): msg = ("This %(name)s instance is not fitted yet. Call 'fit' with " "appropriate arguments before using this decider.") raise NotFittedError(msg % {"name": type(self).__name__}) vote_overall = self.predict_proba(X, transformer_ids=transformer_ids) return self.classes[np.argmax(vote_overall, axis=1)]
def transform(self, *_): assert self.data is not None, NotFittedError( 'must fit aligner before transforming data') for r in self.required: assert hasattr(self, r), NotFittedError(f'missing fitted attribute: {r}') if self.transformer is None: RuntimeWarning( 'null transform function; returning without fitting alignment model' ) return data = trim_and_pad(dw.unstack(self.data)) required_params = {r: getattr(self, r) for r in self.required} return self.transformer( data, **dw.core.update_dict(required_params, self.kwargs))
def get_predictions(self, X): assert X.ndim == 2, "Classifier prediction data X.ndim is %d instead of 2" %X.ndim # get classes try: return self.linSVC_obj.predict(X) except NotFittedError: raise NotFittedError("Classification model cannot preidct without being trained first. " \ + "Train the classification model at least once to prevent this error.")
def predict(self, X): if not self.is_fitted: raise NotFittedError( "This KOMD instance is not fitted yet. Call 'fit' with appropriate arguments before using this method." ) KL = process_list(X, self.generator) return self.clf.predict( KL) if self.multiclass_ else self.estimator.predict( self.how_to(KL, self.weights))
def predict(self, X): if self.is_fitted == False: raise NotFittedError( "This EasyMKL instance is not fitted yet. Call 'fit' with appropriate arguments before using this method." ) if self.multiclass_ == True: return self.cls.predict(X) return np.array([ self.classes_[1] if p >= 0 else self.classes_[0] for p in self.decision_function(X) ])
def predict(self, X, transformer_ids=None): if not self.is_fitted(): msg = ("This %(name)s instance is not fitted yet. Call 'fit' with " "appropriate arguments before using this transformer.") raise NotFittedError(msg % {"name": type(self).__name__}) X = check_array(X) yhats = self.ensemble_represetations(X) return self.knn.predict(yhats)
def _check_test(self,X): if self.is_fitted == False: raise NotFittedError("This EasyMKL instance is not fitted yet. Call 'fit' with appropriate arguments before using this method.") if self.kernel == 'precomputed': KL = check_KL_Y(X,self.Y) else: X = check_array(X, accept_sparse='csr', dtype=np.float64, order="C") if X.shape[1] != self.X.shape[1]:#self.n_f: raise ValueError("The number of feature in X not correspond") KL = self.gen(self.X,X).make_a_list(self.n_kernels).to_array() return KL
def check_voter_fit_(self): ''' raise a NotFittedError if the voter isn't fit ''' if not self.voter_fitted_: msg = ( "This %(name)s instance's voter is not fitted yet. " "Call 'fit_voter' or 'fit' with appropriate arguments " "before using this estimator." ) raise NotFittedError(msg % {"name": type(self).__name__})
def vote(self, X): """ Doc strings here. """ if not self.is_fitted(): msg = ("This %(name)s instance is not fitted yet. Call 'fit' with " "appropriate arguments before using this transformer.") raise NotFittedError(msg % {"name": type(self).__name__}) X = check_array(X) return self.voter.predict(X)
def _predict(self, comparison_vectors, return_type): try: prediction = self.classifier.predict( comparison_vectors.as_matrix()) except NotFittedError: raise NotFittedError( "This {} is not fitted yet. Call 'learn' with appropriate " "arguments before using this method.".format( type(self).__name__)) return self._return_result(prediction, return_type, comparison_vectors)
def decision_function(self, X): if self.is_fitted == False: raise NotFittedError( "This KOMD instance is not fitted yet. Call 'fit' with appropriate arguments before using this method." ) if self.multiclass_: raise ValueError( 'Scores are not available for multiclass problems, use predict' ) KL = process_list( X, self.generator) # X can be a samples matrix or Kernel List return self.estimator.decision_function(self.how_to(KL, self.weights))
def get_sparse_features(self, whitened_patches): # assert correct dimensionality of input data if whitened_patches.ndim == 3: whitened_patches = whitened_patches.reshape( (whitened_patches.shape[0], -1)) assert whitened_patches.ndim == 2, "Whitened patches ndim is %d instead of 2" % whitened_patches.ndim try: sparse_code = self.DL_obj.transform(whitened_patches) except NotFittedError: raise NotFittedError("Feature extraction dictionary has not yet been learnt for this model, " \ + "therefore Sparse Codes cannot be extracted. Train the feature extraction model " \ + "at least once to prevent this error.") return sparse_code