def test_np_matrix(): """ Confirm that input validation code does not return np.matrix """ X = np.arange(12).reshape(3, 4) assert_false(isinstance(as_float_array(X), np.matrix)) assert_false(isinstance(as_float_array(np.matrix(X)), np.matrix)) assert_false(isinstance(as_float_array(sp.csc_matrix(X)), np.matrix)) assert_false(isinstance(atleast2d_or_csr(X), np.matrix)) assert_false(isinstance(atleast2d_or_csr(np.matrix(X)), np.matrix)) assert_false(isinstance(atleast2d_or_csr(sp.csc_matrix(X)), np.matrix)) assert_false(isinstance(atleast2d_or_csc(X), np.matrix)) assert_false(isinstance(atleast2d_or_csc(np.matrix(X)), np.matrix)) assert_false(isinstance(atleast2d_or_csc(sp.csr_matrix(X)), np.matrix)) assert_false(isinstance(safe_asarray(X), np.matrix)) assert_false(isinstance(safe_asarray(np.matrix(X)), np.matrix)) assert_false(isinstance(safe_asarray(sp.lil_matrix(X)), np.matrix)) assert_true(atleast2d_or_csr(X, copy=False) is X) assert_false(atleast2d_or_csr(X, copy=True) is X) assert_true(atleast2d_or_csc(X, copy=False) is X) assert_false(atleast2d_or_csc(X, copy=True) is X)
def _joint_log_likelihood(self, X): """Calculate the posterior log probability of the samples X""" X = atleast2d_or_csr(X) neg_prob = np.log(1 - np.exp(self.feature_log_prob_)) jll = safe_sparse_dot(X, (self.feature_log_prob_ - neg_prob).T) jll += self.class_log_prior_ + neg_prob.sum(axis=1) return jll
def predict(self, X, n_neighbors=1): """Perform classification on an array of test vectors X. The predicted class C for each sample in X is returned. Parameters ---------- X : array-like, shape = [n_samples, n_features] Returns ------- C : array, shape = [n_samples] Notes ----- The default prediction is using KNeighborsClassifier, if the instance reducition algorithm is to be performed with another classifier, it should be explicited overwritten and explained in the documentation. """ X = atleast2d_or_csr(X) if not hasattr(self, "X_") or self.X_ is None: raise AttributeError("Model has not been trained yet.") if not hasattr(self, "y_") or self.y_ is None: raise AttributeError("Model has not been trained yet.") if self.classifier == None: self.classifier = KNeighborsClassifier(n_neighbors=n_neighbors) self.classifier.fit(self.X_, self.y_) return self.classifier.predict(X)
def decision_function(self, X): """Predict confidence scores for samples The confidence score for a sample is the signed distance of that sample to the hyperplane. Parameters ---------- X : {array-like, sparse matrix}, shape = (n_samples, n_features) Samples. Returns ------- array, shape=(n_samples,) if n_classes == 2 else (n_samples, n_classes) Confidence scores per (sample, class) combination. In the binary case, confidence score for self.classes_[1] where >0 means this class would be predicted. """ # handle regression (least-squared loss) if not self.is_classif: return LinearModel.decision_function(self, X) X = atleast2d_or_csr(X) n_features = self.coef_.shape[1] if X.shape[1] != n_features: raise ValueError("X has %d features per sample; expecting %d" % (X.shape[1], n_features)) scores = safe_sparse_dot(X, self.coef_.T, dense_output=True) + self.intercept_ return scores.ravel() if scores.shape[1] == 1 else scores
def fit(self, X, y): if sparse.issparse(y): y = np.asarray(y.todense()) self._enc = LabelEncoder() y = self._enc.fit_transform(y) if len(self.classes_) != 2: raise ValueError("The number of classes must be 2, " "use sklearn.multiclass for more classes.") # The LabelEncoder maps the binary labels to 0 and 1 but the # training algorithm requires the labels to be -1 and +1. y[y==0] = -1 X = atleast2d_or_csr(X, dtype=np.float64, order="C") if X.shape[0] != y.shape[0]: raise ValueError("X and y have incompatible shapes.\n" "X has %s samples, but y has %s." % (X.shape[0], y.shape[0])) self.weight_vector = WeightVector(X) if self.loop_type == constants.LOOP_BALANCED_STOCHASTIC: pegasos.train_stochastic_balanced(self, X, y) elif self.loop_type == constants.LOOP_STOCHASTIC: pegasos.train_stochastic(self, X, y) else: raise ValueError('%s: unknown loop type' % self.loop_type) return self
def fit(self, X, y): """Fit the model to the data X and target y. Parameters ---------- X : {array-like, sparse matrix}, shape (n_samples, n_features) Training data, where n_samples in the number of samples and n_features is the number of features. y : numpy array of shape (n_samples) Subset of the target values. Returns ------- self """ self.dim_image = np.sqrt(X.shape[1]) self.n_classes = y.shape[1] self._validate_params() self._init_fit() self._init_param() X = atleast2d_or_csr(X) X = np.reshape(X, (-1, self.dim_image, self.dim_image)) self.n_outputs = y.shape[1] self._backprop_lbfgs(X, y) return self
def predict(self, X): """Predict using the multi-layer perceptron model Parameters ---------- X : {array-like, sparse matrix}, shape (n_samples, n_features) Returns ------- array, shape (n_samples) Predicted target values per element in X. """ X = atleast2d_or_csr(X) scores = self.decision_function(X) if len(scores.shape) == 1 or self.multi_label is True: scores = logistic_sigmoid(scores) results = (scores > 0.5).astype(np.int) if self.multi_label: return self._lbin.inverse_transform(results) else: scores = _softmax(scores) results = scores.argmax(axis=1) return self.classes_[results]
def scatter(data, labels=None, title=None, name=None): """2d PCA scatter plot with optional class info Return the pca model to be able to introspect the components or transform new data with the same model. """ data = atleast2d_or_csr(data) if data.shape[1] == 2: # No need for a PCA: data_2d = data else: pca = RandomizedPCA(n_components=2) data_2d = pca.fit_transform(data) for i, c, m in zip(np.unique(labels), cycle(COLORS), cycle(MARKERS)): plt.scatter(data_2d[labels == i, 0], data_2d[labels == i, 1], c=c, marker=m, label=i, alpha=0.5) plt.legend(loc='best') if title is None: title = "2D PCA scatter plot" if name is not None: title += " for " + name plt.xlabel('First Principal Component') plt.ylabel('Second Principal Component') plt.title(title) return pca
def test_atleast2d_or_sparse(): for typ in [sp.csr_matrix, sp.dok_matrix, sp.lil_matrix, sp.coo_matrix]: X = typ(np.arange(9, dtype=float).reshape(3, 3)) Y = atleast2d_or_csr(X, copy=True) assert_true(isinstance(Y, sp.csr_matrix)) Y.data[:] = 1 assert_array_equal(X.toarray().ravel(), np.arange(9)) Y = atleast2d_or_csc(X, copy=False) Y.data[:] = 4 assert_true( np.all(X.data == 4) if isinstance(X, sp.csc_matrix) else np.all( X.toarray().ravel() == np.arange(9))) Y = atleast2d_or_csr(X, dtype=np.float32) assert_true(Y.dtype == np.float32)
def test_atleast2d_or_sparse(): for typ in [sp.csr_matrix, sp.dok_matrix, sp.lil_matrix, sp.coo_matrix]: X = typ(np.arange(9, dtype=float).reshape(3, 3)) Y = atleast2d_or_csr(X, copy=True) assert_true(isinstance(Y, sp.csr_matrix)) Y.data[:] = 1 assert_array_equal(X.toarray().ravel(), np.arange(9)) Y = atleast2d_or_csc(X, copy=False) Y.data[:] = 4 assert_true(np.all(X.data == 4) if isinstance(X, sp.csc_matrix) else np.all(X.toarray().ravel() == np.arange(9))) Y = atleast2d_or_csr(X, dtype=np.float32) assert_true(Y.dtype == np.float32)
def fit(self, X, y=None): """ Fit the model to the data X. Parameters ---------- X: array-like, shape (n_samples, n_features) Training data, where n_samples in the number of samples and n_features is the number of features. Returns ------- self """ X = atleast2d_or_csr(X, dtype=np.float64, order="C") n_samples, n_features = X.shape self._init_fit(n_features) self._init_param() self._init_t_eta_() if self.shuffle_data: X, y = shuffle(X, y, random_state=self.random_state) # l-bfgs does not use mini-batches if self.algorithm == 'l-bfgs': batch_size = n_samples else: batch_size = np.clip(self.batch_size, 0, n_samples) n_batches = n_samples / batch_size batch_slices = list( gen_even_slices(n_batches * batch_size, n_batches)) # preallocate memory a_hidden = np.empty((batch_size, self.n_hidden)) a_output = np.empty((batch_size, n_features)) delta_o = np.empty((batch_size, n_features)) if self.algorithm == 'sgd': prev_cost = np.inf for i in xrange(self.max_iter): for batch_slice in batch_slices: cost = self.backprop_sgd(X[batch_slice], n_features, batch_size, delta_o, a_hidden, a_output) if self.verbose: print("Iteration %d, cost = %.2f" % (i, cost)) if abs(cost - prev_cost) < self.tol: break prev_cost = cost self.t_ += 1 elif self.algorithm == 'l-bfgs': self._backprop_lbfgs(X, n_features, a_hidden, a_output, delta_o, n_samples) return self
def _to_csr(self, X): """ check & convert X to csr format """ X = atleast2d_or_csr(X) if not sp.issparse(X): X = sp.csr_matrix(X) return X
def fit(self, X, Y): """Fit the model to the data X and target y. Parameters ---------- X : {array-like, sparse matrix}, shape = [n_samples, n_features] Training data, where n_samples in the number of samples and n_features is the number of features. Y : numpy array of shape [n_samples] Subset of the target values. Returns ------- self """ self.n_layers = len(self.n_hidden) X = atleast2d_or_csr(X, dtype=np.float64, order="C") n_outputs = Y.shape[1] n_samples, n_features = X.shape self._init_fit(X, Y, n_features, n_outputs) self._init_param() if self.shuffle_data: X, Y = shuffle(X, Y, random_state=self.random_state) self.batch_size = np.clip(self.batch_size, 0, n_samples) n_batches = n_samples / self.batch_size batch_slices = list( gen_even_slices(n_batches * self.batch_size, n_batches)) # l-bfgs does not work well with batches if self.algorithm == 'l-bfgs': self.batch_size = n_samples # preallocate memory a_hidden = [0] * self.n_layers a_output = np.empty((self.batch_size, n_outputs)) delta_o = np.empty((self.batch_size, n_outputs)) # print 'Fine tuning...' if self.algorithm is 'sgd': eta = self.eta0 t = 1 prev_cost = np.inf for i in xrange(self.max_iter): for batch_slice in batch_slices: cost, eta = self.backprop_sgd(X[batch_slice], Y[batch_slice], self.batch_size, a_hidden, a_output, delta_o, t, eta) if self.verbose: print("Iteration %d, cost = %.2f" % (i, cost)) if abs(cost - prev_cost) < self.tol: break prev_cost = cost t += 1 elif 'l-bfgs': self._backprop_lbfgs(X, Y, n_features, n_outputs, n_samples, a_hidden, a_output, delta_o) return self
def decision_function(self, X): X = atleast2d_or_csr(X) # compute hidden layer activations self.hidden_activations_ = self._get_hidden_activations(X) output = safe_sparse_dot(self.hidden_activations_, self.coef_output_) return output
def predict(self, X): X = atleast2d_or_csr(X) scores = self.decision_function(X) # if len(scores.shape) == 1: #scores = logistic_sigmoid(scores) #results = (scores > 0.5).astype(np.int) # else: #scores = _softmax(scores) #results = scores.argmax(axis=1) # self.classes_[results] return self._lbin.inverse_transform(scores)
def predict(self, X): """Predict using the multi-layer perceptron model Parameters ---------- X : {array-like, sparse matrix}, shape = [n_samples, n_features] Returns ------- array, shape = [n_samples] Predicted target values per element in X. """ X = atleast2d_or_csr(X) return super(DBNRegressor, self).decision_function(X)
def transform(self, features): features = atleast2d_or_csr(features) if self.mean_ is not None: features = features - self.mean_ features = np.dot(features, self.U_reduce); ##features = safe_sparse_dot(features, self.components.T) ## features = np.dot(np.transpose(self.U[:, :self.k_components]), features) #print 'features dimensions : ', features.shape return features
def transform(self, features): features = atleast2d_or_csr(features) if self.mean_ is not None: features = features - self.mean_ features = np.dot(features, self.U_reduce) ##features = safe_sparse_dot(features, self.components.T) ## features = np.dot(np.transpose(self.U[:, :self.k_components]), features) #print 'features dimensions : ', features.shape return features
def fit(self, X, y=None): """Generate a random hidden layer. Parameters ---------- X : {array-like, sparse matrix} of shape [n_samples, n_features] Training set: only the shape is used to generate random component values for hidden units y : is not used: placeholder to allow for usage in a Pipeline. Returns ------- self """ X = atleast2d_or_csr(X) self._generate_components(X) return self
def fit_transform(self, X, y=None): """Apply dimensionality reduction on X. Parameters ---------- X : array-like, shape (n_samples, n_features) New data, where n_samples in the number of samples and n_features is the number of features. Returns ------- X_new : array-like, shape (n_samples, n_components) """ X = self._fit(atleast2d_or_csr(X)) X = safe_sparse_dot(X, self.components_.T) return X
def transform(self, X, y=None): """Generate the random hidden layer's activations given X as input. Parameters ---------- X : {array-like, sparse matrix}, shape [n_samples, n_features] Data to transform y : is not used: placeholder to allow for usage in a Pipeline. Returns ------- X_new : numpy array of shape [n_samples, n_components] """ X = atleast2d_or_csr(X) if (self.components_ is None): raise ValueError('No components initialized') return self._compute_hidden_activations(X)
def score(self, X): """Computer lower bound on data, very naive implementation Parameters ---------- data : array-like, shape (N, n_features) The data that needs to be fitted to and transformed Returns ------- lower bound : int The lower bound on the log likelihood """ v = atleast2d_or_csr(X) rng = check_random_state(self.random_state) gradients, lowerbound = self._computeGradients(v.T, rng) return lowerbound/X.shape[0]
def score(self, X): """Computer lower bound on data, very naive implementation Parameters ---------- data : array-like, shape (N, n_features) The data that needs to be fitted to and transformed Returns ------- lower bound : int The lower bound on the log likelihood """ v = atleast2d_or_csr(X) rng = check_random_state(self.random_state) gradients, lowerbound = self._computeGradients(v.T, rng) return lowerbound / X.shape[0]
def inverse_transform(self, X, dict_type=dict, inverse_onehot=True): """Transform array or sparse matrix X back to feature mappings. X must have been produced by this DictVectorizer's transform or fit_transform method; it may only have passed through transformers that preserve the number of features and their order. In the case of one-hot/one-of-K coding, the constructed feature names and values are returned rather than the original ones. Parameters ---------- X : {array-like, sparse matrix}, shape = [n_samples, n_features] Sample matrix. dict_type : callable, optional Constructor for feature mappings. Must conform to the collections.Mapping API. Returns ------- D : list of dict_type objects, length = n_samples Feature mappings for the samples in X. """ X = atleast2d_or_csr(X) # COO matrix is not subscriptable n_samples = X.shape[0] names = self.feature_names_ dicts = [dict_type() for _ in xrange(n_samples)] if sp.issparse(X): for i, j in zip(*X.nonzero()): if reverse_onehot and names[j] in self._onehot_dict and X[i, j]: dicts[i][self._onehot_dict[names[j]] [0]] = self._onehot_dict[names[j]][1] else: dicts[i][names[j]] = X[i, j] else: for i, d in enumerate(dicts): for j, v in enumerate(X[i, :]): if v != 0: d[names[j]] = X[i, j] return dicts
def transform(self, X): """Transform the data X according to the fitted NMF model Parameters ---------- X: {array-like, sparse matrix}, shape = [n_samples, n_features] Data matrix to be transformed by the model Returns ------- data: array, [n_samples, n_components] Transformed data """ X = atleast2d_or_csr(X) H = np.zeros((X.shape[0], self.n_components)) for j in xrange(0, X.shape[0]): H[j, :], _ = nnls(self.components_.T, X[j, :]) return H
def fit(self, X, y=None, **fit_params): X = atleast2d_or_csr(X) if sp.issparse(X): # Indices attrib of sparse matrix in csr format contains column # numbers of non-zero elements. Counting the number of # occurrences of each value in this array therefore gives the # number of non-zero elements per column. The minlength argument # guarantees even zero-count columns at the end of the matrix are # included. assert X.format == "csr" self.feature_importances_ = np.bincount(X.indices, minlength=X.shape[1]) else: self.feature_importances_ = (X > 0).sum(axis=0) log.debug("MinCountFilter removed out {} of {} features".format( np.sum(self.feature_importances_ < self.threshold), X.shape[1])) return self
def inverse_transform(self, X, dict_type=dict, inverse_onehot=True): """Transform array or sparse matrix X back to feature mappings. X must have been produced by this DictVectorizer's transform or fit_transform method; it may only have passed through transformers that preserve the number of features and their order. In the case of one-hot/one-of-K coding, the constructed feature names and values are returned rather than the original ones. Parameters ---------- X : {array-like, sparse matrix}, shape = [n_samples, n_features] Sample matrix. dict_type : callable, optional Constructor for feature mappings. Must conform to the collections.Mapping API. Returns ------- D : list of dict_type objects, length = n_samples Feature mappings for the samples in X. """ X = atleast2d_or_csr(X) # COO matrix is not subscriptable n_samples = X.shape[0] names = self.feature_names_ dicts = [dict_type() for _ in xrange(n_samples)] if sp.issparse(X): for i, j in zip(*X.nonzero()): if reverse_onehot and names[j] in self._onehot_dict and X[i, j]: dicts[i][self._onehot_dict[names[j]][0]] = self._onehot_dict[names[j]][1] else: dicts[i][names[j]] = X[i, j] else: for i, d in enumerate(dicts): for j, v in enumerate(X[i, :]): if v != 0: d[names[j]] = X[i, j] return dicts
def decision_function(self, X): """Fit the model to the data X and target y. Parameters ---------- X : {array-like, sparse matrix}, shape (n_samples, n_features) Returns ------- array, shape (n_samples) Predicted target values per element in X. """ X = atleast2d_or_csr(X) a_hidden = self.activation_func(safe_sparse_dot(X, self.coef_hidden_) + self.intercept_hidden_) output = safe_sparse_dot(a_hidden, self.coef_output_) + self.intercept_output_ if output.shape[1] == 1: output = output.ravel() return output
def _joint_log_likelihood(self, X): """Calculate the posterior log probability of the samples X""" X = atleast2d_or_csr(X) if self.binarize is not None: X = binarize(X, threshold=self.binarize) n_classes, n_features = self.feature_log_prob_.shape n_samples, n_features_X = X.shape if n_features_X != n_features: raise ValueError("Expected input with %d features, got %d instead" % (n_features, n_features_X)) neg_prob = np.log(1 - np.exp(self.feature_log_prob_)) # Compute neg_prob · (1 - X).T as ∑neg_prob - X · neg_prob jll = safe_sparse_dot(X, (self.feature_log_prob_ - neg_prob).T) jll += self.class_log_prior_ + neg_prob.sum(axis=1) return jll
def predict(self, X): """Predict using the multi-layer perceptron model Parameters ---------- X : {array-like, sparse matrix}, shape = [n_samples, n_features] Returns ------- array, shape = [n_samples] Predicted target values per element in X. """ X = atleast2d_or_csr(X) scores = super(DBNClassifier, self).decision_function(X) if len(scores.shape) == 1: scores = logistic_sigmoid(scores) indices = (scores > 0.5).astype(np.int) else: scores = _softmax(scores) indices = scores.argmax(axis=1) return self._lbin.classes_[indices]
def transform(self, X, y=None): """Apply dimensionality reduction on X. Parameters ---------- X : array-like, shape (n_samples, n_features) New data, where n_samples in the number of samples and n_features is the number of features. Returns ------- X_new : array-like, shape (n_samples, n_components) """ # XXX remove scipy.sparse support here in 0.16 X = atleast2d_or_csr(X) if self.mean_ is not None: X = X - self.mean_ X = safe_sparse_dot(X, self.components_.T) return X
def decision_function(self, X): """Fit the model to the data X and target y. Parameters ---------- X : {array-like, sparse matrix}, shape (n_samples, n_features) Returns ------- array, shape (n_samples) Predicted target values per element in X. """ X = atleast2d_or_csr(X) a_hidden = self.activation_func( safe_sparse_dot(X, self.coef_hidden_) + self.intercept_hidden_) output = safe_sparse_dot(a_hidden, self.coef_output_) +\ self.intercept_output_ if output.shape[1] == 1: output = output.ravel() return output
def _joint_log_likelihood(self, X): """Calculate the posterior log probability of the samples X""" X = atleast2d_or_csr(X) if self.binarize is not None: X = binarize(X, threshold=self.binarize) n_classes, n_features = self.feature_log_prob_.shape n_samples, n_features_X = X.shape if n_features_X != n_features: raise ValueError( "Expected input with %d features, got %d instead" % (n_features, n_features_X)) neg_prob = np.log(1 - np.exp(self.feature_log_prob_)) # Compute neg_prob · (1 - X).T as ∑neg_prob - X · neg_prob jll = safe_sparse_dot(X, (self.feature_log_prob_ - neg_prob).T) jll += self.class_log_prior_ + neg_prob.sum(axis=1) return jll
def decision_function(self, X): """Fit the model to the data X and target y. Parameters ---------- X : {array-like, sparse matrix}, shape (n_samples, n_features) Returns ------- array, shape (n_samples) Predicted target values per element in X. """ X = atleast2d_or_csr(X) X = np.reshape(X, (-1, self.dim_image, self.dim_image)) output, _, _ = self.forward_pass(X) if output.shape[1] == 1: output = output.ravel() return output
def partial_fit(self, X, y, classes): """Fit the model to the data X and target y. Parameters ---------- X : {array-like, sparse matrix}, shape = [n_samples, n_features] Subset of training data y : numpy array of shape [n_samples] Subset of target values sample_weight : array-like, shape = [n_samples], optional Weights applied to individual samples. If not provided, uniform weights are assumed. Returns ------- self : returns an instance of self. """ X = atleast2d_or_csr(X, dtype=np.float64, order="C") _, n_features = X.shape self._init_param() if self.classes_ is None and classes is None: raise ValueError("classes must be passed on the first call " "to partial_fit.") elif classes is not None and self.classes_ is not None: if not np.all(self.classes_ == np.unique(classes)): raise ValueError("`classes` is not the same as on last call " "to partial_fit.") elif classes is not None: self._lbin = LabelBinarizer(classes=classes) Y = self._lbin.fit_transform(y) self._init_fit(n_features, Y.shape[1]) else: Y = self._lbin.transform(y) self.backprop_naive(X, Y, 1) return self
def partial_fit(self, X, y): """Fit the model to the data X and target y. Parameters ---------- X : {array-like, sparse matrix}, shape (n_samples, n_features) Subset of training data. y : numpy array of shape (n_samples) Subset of target values. Returns ------- self : returns an instance of self. """ X = atleast2d_or_csr(X) self.n_outputs = y.shape[1] n_samples, self.n_features = X.shape self._validate_params() if self.coef_hidden_ is None: self._init_fit() self._init_param() if self.t_ is None or self.eta_ is None: self._init_t_eta_() a_hidden, a_output, delta_o = self._preallocate_memory(n_samples) cost = self._backprop_sgd(X, y, n_samples, a_hidden, a_output, delta_o) if self.verbose: print("Iteration %d, cost = %.2f" % (self.t_, cost)) self.t_ += 1 return self
def fit(self, X, y=None, **fit_params): X = atleast2d_or_csr(X) if sp.issparse(X): # Indices attrib of sparse matrix in csr format contains column # numbers of non-zero elements. Counting the number of # occurrences of each value in this array therefore gives the # number of non-zero elements per column. The minlength argument # guarantees even zero-count columns at the end of the matrix are # included. counts = np.bincount(X.indices, minlength=X.shape[1]) else: counts = (X > 0).sum(axis=0) max_count = self.max_freq * X.shape[0] # All features with a count above max_count get become "0" whereas # all others become "1". Using a threshold of "1", transform will # then select the featurs with a count below or equal to max_count. self.feature_importances_ = (counts <= max_count).astype("i") log.debug("MaxFreqFilter removed {} of {} features".format( X.shape[1] - self.feature_importances_.sum(), X.shape[1])) return self
def fit(self, X, y): """Fit the model to the data X and target y. Parameters ---------- X : {array-like, sparse matrix}, shape (n_samples, n_features) Training data, where n_samples in the number of samples and n_features is the number of features. y : numpy array of shape (n_samples) Subset of the target values. Returns ------- self """ X = atleast2d_or_csr(X) self._validate_params() n_samples, self.n_features = X.shape self.n_outputs = y.shape[1] if not self.warm_start: self._init_t_eta_() self._init_fit() self._init_param() else: if self.t_ is None or self.coef_hidden_ is None: self._init_t_eta_() self._init_fit() self._init_param() if self.shuffle: X, y = shuffle(X, y, random_state=self.random_state) # l-bfgs does not use mini-batches if self.algorithm == 'l-bfgs': batch_size = n_samples else: batch_size = np.clip(self.batch_size, 0, n_samples) n_batches = n_samples / batch_size batch_slices = list( gen_even_slices( n_batches * batch_size, n_batches)) # preallocate memory a_hidden, a_output, delta_o = self._preallocate_memory( batch_size) if self.algorithm == 'sgd': prev_cost = np.inf for i in xrange(self.max_iter): for batch_slice in batch_slices: cost = self._backprop_sgd( X[batch_slice], y[batch_slice], batch_size, a_hidden, a_output, delta_o) if self.verbose: print("Iteration %d, cost = %.2f" % (i, cost)) if abs(cost - prev_cost) < self.tol: break prev_cost = cost self.t_ += 1 elif 'l-bfgs': self._backprop_lbfgs( X, y, n_samples, a_hidden, a_output, delta_o) return self
def fit_transform(self, X, y=None): """Learn a NMF model for the data X and returns the transformed data. This is more efficient than calling fit followed by transform. Parameters ---------- X: {array-like, sparse matrix}, shape = [n_samples, n_features] Data matrix to be decomposed Returns ------- data: array, [n_samples, n_components] Transformed data """ X = atleast2d_or_csr(X) check_non_negative(X, "NMF.fit") n_samples, n_features = X.shape if not self.n_components: self.n_components_ = n_features else: self.n_components_ = self.n_components W, H = self._init(X) gradW = (np.dot(W, np.dot(H, H.T)) - safe_sparse_dot(X, H.T, dense_output=True)) gradH = (np.dot(np.dot(W.T, W), H) - safe_sparse_dot(W.T, X, dense_output=True)) init_grad = norm(np.r_[gradW, gradH.T]) tolW = max(0.001, self.tol) * init_grad # why max? tolH = tolW tol = self.tol * init_grad for n_iter in range(1, self.max_iter + 1): # stopping condition # as discussed in paper proj_norm = norm(np.r_[gradW[np.logical_or(gradW < 0, W > 0)], gradH[np.logical_or(gradH < 0, H > 0)]]) if proj_norm < tol: break # update W W, gradW, iterW = self._update_W(X, H, W, tolW) if iterW == 1: tolW = 0.1 * tolW # update H H, gradH, iterH = self._update_H(X, H, W, tolH) if iterH == 1: tolH = 0.1 * tolH if not sp.issparse(X): error = norm(X - np.dot(W, H)) else: sqnorm_X = np.dot(X.data, X.data) norm_WHT = trace_dot(np.dot(np.dot(W.T, W), H), H) cross_prod = trace_dot((X * H.T), W) error = sqrt(sqnorm_X + norm_WHT - 2. * cross_prod) self.reconstruction_err_ = error self.comp_sparseness_ = _sparseness(H.ravel()) self.data_sparseness_ = _sparseness(W.ravel()) H[H == 0] = 0 # fix up negative zeros self.components_ = H if n_iter == self.max_iter: warnings.warn("Iteration limit reached during fit") return W, H
def dump_svmlight_file(X, y, f, zero_based=True, comment=None, query_id=None): """Dump the dataset in svmlight / libsvm file format. This format is a text-based format, with one sample per line. It does not store zero valued features hence is suitable for sparse dataset. The first element of each line can be used to store a target variable to predict. Parameters ---------- X : {array-like, sparse matrix}, shape = [n_samples, n_features] Training vectors, where n_samples is the number of samples and n_features is the number of features. y : array-like, shape = [n_samples] Target values. f : string or file-like in binary mode If string, specifies the path that will contain the data. If file-like, data will be written to f. f should be opened in binary mode. zero_based : boolean, optional Whether column indices should be written zero-based (True) or one-based (False). comment : string, optional Comment to insert at the top of the file. This should be either a Unicode string, which will be encoded as UTF-8, or an ASCII byte string. If a comment is given, then it will be preceded by one that identifies the file as having been dumped by scikit-learn. Note that not all tools grok comments in SVMlight files. query_id : array-like, shape = [n_samples] Array containing pairwise preference constraints (qid in svmlight format). """ import pdb pdb.set_trace() if comment is not None: # Convert comment string to list of lines in UTF-8. # If a byte string is passed, then check whether it's ASCII; # if a user wants to get fancy, they'll have to decode themselves. # Avoid mention of str and unicode types for Python 3.x compat. if isinstance(comment, bytes): comment.decode("ascii") # just for the exception else: comment = comment.encode("utf-8") if six.b("\0") in comment: raise ValueError("comment string contains NUL byte") y = np.asarray(y) if y.ndim != 1: raise ValueError("expected y of shape (n_samples,), got %r" % (y.shape, )) Xval = atleast2d_or_csr(X) if Xval.shape[0] != y.shape[0]: raise ValueError("X.shape[0] and y.shape[0] should be the same, got" " %r and %r instead." % (Xval.shape[0], y.shape[0])) # We had some issues with CSR matrices with unsorted indices (e.g. #1501), # so sort them here, but first make sure we don't modify the user's X. # TODO We can do this cheaper; sorted_indices copies the whole matrix. if Xval is X and hasattr(Xval, "sorted_indices"): X = Xval.sorted_indices() else: X = Xval if hasattr(X, "sort_indices"): X.sort_indices() if query_id is not None: query_id = np.asarray(query_id) if query_id.shape[0] != y.shape[0]: raise ValueError( "expected query_id of shape (n_samples,), got %r" % (query_id.shape, )) one_based = not zero_based if hasattr(f, "write"): _dump_svmlight(X, y, f, one_based, comment, query_id) else: with open(f, "wb") as f: _dump_svmlight(X, y, f, one_based, comment, query_id)
def fit_transform(self, X, y=None): """Learn a NMF model for the data X and returns the transformed data. This is more efficient than calling fit followed by transform. Parameters ---------- X: {array-like, sparse matrix}, shape = [n_samples, n_features] Data matrix to be decomposed Returns ------- data: array, [n_samples, n_components] Transformed data """ X = atleast2d_or_csr(X) check_non_negative(X, "NMF.fit") n_samples, n_features = X.shape if not self.n_components: self.n_components = n_features W, H = self._init(X) gradW = (np.dot(W, np.dot(H, H.T)) - safe_sparse_dot(X, H.T, dense_output=True)) gradH = (np.dot(np.dot(W.T, W), H) - safe_sparse_dot(W.T, X, dense_output=True)) init_grad = norm(np.r_[gradW, gradH.T]) tolW = max(0.001, self.tol) * init_grad # why max? tolH = tolW for n_iter in xrange(1, self.max_iter + 1): # stopping condition # as discussed in paper proj_norm = norm(np.r_[gradW[np.logical_or(gradW < 0, W > 0)], gradH[np.logical_or(gradH < 0, H > 0)]]) if proj_norm < self.tol * init_grad: break # update W W, gradW, iterW = self._update_W(X, H, W, tolW) W = W.T gradW = gradW.T if iterW == 1: tolW = 0.1 * tolW # update H H, gradH, iterH = self._update_H(X, H, W, tolH) if iterH == 1: tolH = 0.1 * tolH self.comp_sparseness_ = _sparseness(H.ravel()) self.data_sparseness_ = _sparseness(W.ravel()) if not sp.issparse(X): self.reconstruction_err_ = norm(X - np.dot(W, H)) self.components_ = H if n_iter == self.max_iter: warnings.warn("Iteration limit reached during fit") return W
def fit(self, X, y): """Fit the model to the data X and target y. Parameters ---------- X : {array-like, sparse matrix}, shape (n_samples, n_features) Training data, where n_samples in the number of samples and n_features is the number of features. y : numpy array of shape (n_samples) Subset of the target values. Returns ------- self """ X = atleast2d_or_csr(X) self._validate_params() n_samples, self.n_features = X.shape self.n_outputs = y.shape[1] if not self.warm_start: self._init_t_eta_() self._init_fit() self._init_param() else: if self.t_ is None or self.coef_hidden_ is None: self._init_t_eta_() self._init_fit() self._init_param() if self.shuffle: X, y = shuffle(X, y, random_state=self.random_state) # l-bfgs does not use mini-batches if self.algorithm == 'l-bfgs': batch_size = n_samples else: batch_size = np.clip(self.batch_size, 0, n_samples) n_batches = n_samples / batch_size batch_slices = list( gen_even_slices(n_batches * batch_size, n_batches)) # preallocate memory a_hidden, a_output, delta_o = self._preallocate_memory(batch_size) if self.algorithm == 'sgd': prev_cost = np.inf for i in xrange(self.max_iter): for batch_slice in batch_slices: cost = self._backprop_sgd(X[batch_slice], y[batch_slice], batch_size, a_hidden, a_output, delta_o) if self.verbose: print("Iteration %d, cost = %.2f" % (i, cost)) if abs(cost - prev_cost) < self.tol: break prev_cost = cost self.t_ += 1 elif 'l-bfgs': self._backprop_lbfgs(X, y, n_samples, a_hidden, a_output, delta_o) return self
def _joint_log_likelihood(self, X): """Calculate the posterior log probability of the samples X""" X = atleast2d_or_csr(X) return (safe_sparse_dot(X, self.feature_log_prob_.T) + self.class_log_prior_)
def dump_svmlight_file(X, y, f, zero_based=True, comment=None, query_id=None): """Dump the dataset in svmlight / libsvm file format. This format is a text-based format, with one sample per line. It does not store zero valued features hence is suitable for sparse dataset. The first element of each line can be used to store a target variable to predict. Parameters ---------- X : {array-like, sparse matrix}, shape = [n_samples, n_features] Training vectors, where n_samples is the number of samples and n_features is the number of features. y : array-like, shape = [n_samples] Target values. f : string or file-like in binary mode If string, specifies the path that will contain the data. If file-like, data will be written to f. f should be opened in binary mode. zero_based : boolean, optional Whether column indices should be written zero-based (True) or one-based (False). comment : string, optional Comment to insert at the top of the file. This should be either a Unicode string, which will be encoded as UTF-8, or an ASCII byte string. If a comment is given, then it will be preceded by one that identifies the file as having been dumped by scikit-learn. Note that not all tools grok comments in SVMlight files. query_id : array-like, shape = [n_samples] Array containing pairwise preference constraints (qid in svmlight format). """ import pdb pdb.set_trace() if comment is not None: # Convert comment string to list of lines in UTF-8. # If a byte string is passed, then check whether it's ASCII; # if a user wants to get fancy, they'll have to decode themselves. # Avoid mention of str and unicode types for Python 3.x compat. if isinstance(comment, bytes): comment.decode("ascii") # just for the exception else: comment = comment.encode("utf-8") if six.b("\0") in comment: raise ValueError("comment string contains NUL byte") y = np.asarray(y) if y.ndim != 1: raise ValueError("expected y of shape (n_samples,), got %r" % (y.shape,)) Xval = atleast2d_or_csr(X) if Xval.shape[0] != y.shape[0]: raise ValueError("X.shape[0] and y.shape[0] should be the same, got" " %r and %r instead." % (Xval.shape[0], y.shape[0])) # We had some issues with CSR matrices with unsorted indices (e.g. #1501), # so sort them here, but first make sure we don't modify the user's X. # TODO We can do this cheaper; sorted_indices copies the whole matrix. if Xval is X and hasattr(Xval, "sorted_indices"): X = Xval.sorted_indices() else: X = Xval if hasattr(X, "sort_indices"): X.sort_indices() if query_id is not None: query_id = np.asarray(query_id) if query_id.shape[0] != y.shape[0]: raise ValueError("expected query_id of shape (n_samples,), got %r" % (query_id.shape,)) one_based = not zero_based if hasattr(f, "write"): _dump_svmlight(X, y, f, one_based, comment, query_id) else: with open(f, "wb") as f: _dump_svmlight(X, y, f, one_based, comment, query_id)
def partial_fit(self, X, y, classes=None, sample_weight=None): """Incremental fit on a batch of samples. This method is expected to be called several times consecutively on different chunks of a dataset so as to implement out-of-core or online learning. This is especially useful when the whole dataset is too big to fit in memory at once. This method has some performance overhead hence it is better to call partial_fit on chunks of data that are as large as possible (as long as fitting in the memory budget) to hide the overhead. Parameters ---------- X : {array-like, sparse matrix}, shape = [n_samples, n_features] Training vectors, where n_samples is the number of samples and n_features is the number of features. y : array-like, shape = [n_samples] Target values. classes : array-like, shape = [n_classes] List of all the classes that can possibly appear in the y vector. Must be provided at the first call to partial_fit, can be omitted in subsequent calls. sample_weight : array-like, shape = [n_samples], optional Weights applied to individual samples (1. for unweighted). Returns ------- self : object Returns self. """ X = atleast2d_or_csr(X, dtype=np.float64) _, n_features = X.shape if _check_partial_fit_first_call(self, classes): # This is the first call to partial_fit: # initialize various cumulative counters n_effective_classes = len(classes) if len(classes) > 1 else 2 self.class_count_ = np.zeros(n_effective_classes, dtype=np.float64) self.feature_count_ = np.zeros((n_effective_classes, n_features), dtype=np.float64) Y = label_binarize(y, classes=self.classes_) if Y.shape[1] == 1: Y = np.concatenate((1 - Y, Y), axis=1) n_samples, n_classes = Y.shape if X.shape[0] != Y.shape[0]: msg = "X.shape[0]=%d and y.shape[0]=%d are incompatible." raise ValueError(msg % (X.shape[0], y.shape[0])) # convert to float to support sample weight consistently Y = Y.astype(np.float64) if sample_weight is not None: Y *= array2d(sample_weight).T # Count raw events from data before updating the class log prior # and feature log probas self._count(X, Y) # XXX: OPTIM: we could introduce a public finalization method to # be called by the user explicitly just once after several consecutive # calls to partial_fit and prior any call to predict[_[log_]proba] # to avoid computing the smooth log probas at each call to partial fit self._update_feature_log_prob() self._update_class_log_prior() return self