def score(self, X, y, sample_weight=None): """ Returns the mean accuracy on the given test data and labels. Parameters ---------- X : array, shape (n_samples, n_features) Test samples. y : array, shape (n_samples,) True labels for X. sample_weight : array, shape (n_samples,), optional Sample weights. Returns ------- score : float Mean accuracy of self.predict(X) w.r.t. y. """ # Check input arrays X = check_array(X, ndim=2, dtype='numeric', force_all_finite=True) y = check_array(y.astype(int), ndim=1, dtype='numeric', force_all_finite=True) # Check input arrays are same length if X.shape[0] != y.size: raise ValueError("Number of samples in X and y must be the same: " "{} vs {}".format(X.shape[0], y.size)) return accuracy_score(y, self.predict(X), sample_weight=sample_weight)
def transform(self, X, Y=None): """ Apply the dimension reduction learned on the train data. Parameters ---------- X : array, shape (n_samples, n_features) Input X data. Y : array, shape (n_samples, n_targets) or None (default=None) Input Y data. If Y=None, then only the transformed X data are returned. Returns ------- X_new : array, shape (n_samples, n_components) Transformed X data. Y_new : array, shape (n_samples, n_components) Transformed Y data. If Y=None, only X_new is returned. """ self._check_is_fitted() X = check_array(X, ndim=2, dtype='numeric', force_all_finite=True) if Y is None: return self.cca.transform(X, Y=None, copy=True) else: Y = check_array(Y, ndim=2, dtype='numeric', force_all_finite=True) X_new, Y_new = self.cca.transform(X, Y=Y, copy=True) # If n_components=1, reshape Y_new so it is 2D if self.n_components_ == 1: n_samples = Y_new.shape[0] Y_new = Y_new.reshape((n_samples, 1)) return X_new, Y_new
def inverse_transform(self, X, Y=None): """ Transform data back to its original space. Note: This is not exact! Parameters ---------- X : array, shape (n_samples, n_components) Transformed X data. Y : array, shape (n_samples, n_components) or None (default=None) Transformed Y data. If Y=None, only the X data are transformed back to the original space. Returns ------- X_original : array, shape (n_samples, n_features) X data transformed back into original space. Y_original : array, shape (n_samples, n_targets) Y data transformed back into original space. If Y=None, only X_original is returned. """ self._check_is_fitted() # Check X is in transformed space X = check_array(X, ndim=2, dtype='numeric', force_all_finite=True) if X.shape[1] != self.n_components_: raise ValueError("X has {} features per sample." "Expecting {}".format(X.shape[1], self.n_components_)) # Invert X into original space X_original = np.dot(X, self.components_) + self.mean_ if Y is None: return X_original else: # Check Y is in transformed space Y = check_array(Y, ndim=2, dtype='numeric', force_all_finite=True) if Y.shape[1] != self.n_components_: raise ValueError("Y has {} features per sample." "Expecting {}".format(Y.shape[1], self.n_components_)) # Invert Y into original space Y_original = np.dot(Y, self.components_y_) + self.mean_y_ return X_original, Y_original
def fit(self, X, y): """ Fit PLDA model according to the given training data and parameters. Parameters ---------- X : array, shape (n_samples, n_features) Training data. y : array, shape (n_samples,) Target values. """ # Check input arrays X = check_array(X, ndim=2, dtype='numeric', force_all_finite=True) y = check_array(y.astype(int), ndim=1, dtype='numeric', force_all_finite=True) # Check input arrays are same length if X.shape[0] != y.size: raise ValueError("Number of samples in X and y must be the same: " "{} vs {}".format(X.shape[0], y.size)) # Check that n_components does not exceed maximum possible max_components = min(X.shape) if self.n_components_ is None: self.n_components_ = max_components elif self.n_components_ > max_components: self.n_components_ = max_components warnings.warn("n_components exceeds maximum possible components. " "Setting n_components = {}".format(max_components)) # Set useful data attributes self.classes_ = np.unique(y) self.mean_ = X.mean(axis=0) self.class_means_ = self._class_means(X, y) self._solve_eigen(X, y) # Adjust coefficients and intercept for binary classification problems if self.classes_.size == 2: self.coef_ = np.array(self.coef_[1,:] - self.coef_[0,:], ndmin=2) self.intercept_ = np.array(self.intercept_[1]-self.intercept_[0], ndmin=1) # Transform data so we can get the explained variance self.explained_variance_ = np.dot(X, self.components_.T).var(axis=0) self.is_fitted = True return
def decision_function(self, X): """ Predict confidence scores for samples. The confidence score for a sample is the signed distance of that sample to the hyperplane. Parameters ---------- X : array, shape (n_samples, n_features) Input data. Returns ------- scores : array, shape=(n_samples,) if n_classes == 2 else (n_samples, n_classes) Confidence scores per (sample, class) combination. In the binary case, confidence score for self.classes_[1] where >0 means this class would be predicted. """ self._check_is_fitted() # Check input array X = check_array(X, ndim=2, dtype='numeric', force_all_finite=True) # Check number of features is correct n_feat = self.components_.shape[1] if X.shape[1] != n_feat: raise ValueError("X has {} features per sample." "Expecting {}".format(X.shape[1], n_feat)) scores = np.dot(X, self.coef_.T) + self.intercept_ return scores.ravel() if scores.shape[1] == 1 else scores
def inverse_transform(self, X): """ Transform data back to its original space. Note: If n_components is less than the maximum, information will be lost, so reconstructed data will not exactly match the original data. Parameters ---------- X : array shape (n_samples, n_components) New data. Returns ------- X_original : array, shape (n_samples, n_features) Data transformed back into original space. """ self._check_is_fitted() X = check_array(X, ndim=2) # Check data dimensions if X.shape[1] != self.n_components_: raise ValueError("X has {} features per sample." "Expecting {}".format(X.shape[1], self.n_components_)) # Inverse transform X_original = self.mean_ + np.dot(X, self.components_) return X_original
def transform(self, X): """ Transform data. Parameters ---------- X : array, shape (n_samples, n_features) Input data. Returns ------- X_new : array, shape (n_samples, n_components) Transformed data. """ self._check_is_fitted() # Check input arrays X = check_array(X, ndim=2, dtype='numeric', force_all_finite=True) # Check number of features is correct n_feat = self.components_.shape[1] if X.shape[1] != n_feat: raise ValueError("X has {} features per sample." "Expecting {}".format(X.shape[1], n_feat)) # Transform data X_new = np.dot(X, self.components_.T) return X_new[:,:self.n_components_]
def fit(self, X, Y): """ Fit model to data. Parameters ---------- X : array, shape (n_samples, n_features) Training vectors, where n_samples is the number of samples and n_features is the number of predictors. Y : array, shape (n_samples, n_targets) Target vectors, where n_samples is the number of samples and n_targets is the number of response variables. """ X = check_array(X, ndim=2, dtype='numeric', force_all_finite=True) Y = check_array(Y, ndim=2, dtype='numeric', force_all_finite=True) if X.shape[0] != Y.shape[0]: raise ValueError("Number of samples in X and Y must be the same: " "{} vs {}".format(X.shape[0], Y.shape[0])) if self.n_components_ > X.shape[1]: raise ValueError("n_components exceeds number of features in X: " "{} > {}".format(self.n_components_, X.shape[1])) if self.n_components_ > Y.shape[1]: raise ValueError("n_components exceeds number of targets in Y: " "{} > {}".format(self.n_components_, Y.shape[1])) self.cca.fit(X, Y) self.components_ = self.cca.x_weights_.T self.components_y_ = self.cca.y_weights_.T self.mean_ = self.cca.x_mean_ self.mean_y_ = self.cca.y_mean_ # Get the explained variance of the transformed data self.explained_variance_ = self.cca.x_scores_.var(axis=0) self.explained_variance_y_ = self.cca.y_scores_.var(axis=0) self.is_fitted = True return
def predict_transformed(self, X_trans): """ Predict class labels for data that have already been transformed by self.transform(X). This is useful for plotting classification boundaries. Note: Due to arithemtic discrepancies, this may return slightly different class labels to self.predict(X). Parameters ---------- X_trans : array, shape (n_samples, n_components) Test samples that have already been transformed into PLDA space. Returns ------- y : array, shape (n_samples,) Predicted class labels for X_trans. """ self._check_is_fitted() # Check input array X_trans = check_array(X_trans, ndim=2) # Make sure this is a set of transformed data if X_trans.shape[1] != self.n_components_: raise ValueError("Number of features in X_trans must match " "n_components: {}".format(self.n_components_)) # Transform class means into PLDA space mean_trans = self.transform(self.class_means_) # Initialize useful values n_samples = X_trans.shape[0] n_classes = mean_trans.shape[0] dists = np.zeros((n_samples,n_classes)) # Compute the distance between each data sample and each class mean for i,mean in enumerate(mean_trans): dists[:,i] = np.linalg.norm(X_trans-np.tile(mean,(n_samples,1)), axis=1) # Classification based on shortest distance to class mean ind = dists.argmin(axis=1) return self.classes_[ind]