def test_parameter_checking(): A = np.ones((2, 2)) name = "spam" with ignore_warnings(category=FutureWarning): # TODO remove in 1.2 msg = "Invalid regularization parameter: got 'spam' instead of one of" with pytest.raises(ValueError, match=msg): NMF(regularization=name).fit(A) msg = "Invalid beta_loss parameter: solver 'cd' does not handle beta_loss = 1.0" with pytest.raises(ValueError, match=msg): NMF(solver="cd", beta_loss=1.0).fit(A) msg = "Negative values in data passed to" with pytest.raises(ValueError, match=msg): NMF().fit(-A) clf = NMF(2, tol=0.1).fit(A) with pytest.raises(ValueError, match=msg): clf.transform(-A) with pytest.raises(ValueError, match=msg): nmf._initialize_nmf(-A, 2, "nndsvd") for init in ["nndsvd", "nndsvda", "nndsvdar"]: msg = re.escape( "init = '{}' can only be used when " "n_components <= min(n_samples, n_features)".format(init)) with pytest.raises(ValueError, match=msg): NMF(3, init=init).fit(A) with pytest.raises(ValueError, match=msg): MiniBatchNMF(3, init=init).fit(A) with pytest.raises(ValueError, match=msg): nmf._initialize_nmf(A, 3, init)
def test_parameter_checking(): # Here we only check for invalid parameter values that are not already # automatically tested in the common tests. A = np.ones((2, 2)) msg = "Invalid beta_loss parameter: solver 'cd' does not handle beta_loss = 1.0" with pytest.raises(ValueError, match=msg): NMF(solver="cd", beta_loss=1.0).fit(A) msg = "Negative values in data passed to" with pytest.raises(ValueError, match=msg): NMF().fit(-A) clf = NMF(2, tol=0.1).fit(A) with pytest.raises(ValueError, match=msg): clf.transform(-A) with pytest.raises(ValueError, match=msg): nmf._initialize_nmf(-A, 2, "nndsvd") for init in ["nndsvd", "nndsvda", "nndsvdar"]: msg = re.escape( "init = '{}' can only be used when " "n_components <= min(n_samples, n_features)".format(init) ) with pytest.raises(ValueError, match=msg): NMF(3, init=init).fit(A) with pytest.raises(ValueError, match=msg): MiniBatchNMF(3, init=init).fit(A) with pytest.raises(ValueError, match=msg): nmf._initialize_nmf(A, 3, init)
def test_initialize_variants(): # Test NNDSVD variants correctness # Test that the variants 'nndsvda' and 'nndsvdar' differ from basic # 'nndsvd' only where the basic version has zeros. rng = np.random.mtrand.RandomState(42) data = np.abs(rng.randn(10, 10)) W0, H0 = nmf._initialize_nmf(data, 10, init='nndsvd') Wa, Ha = nmf._initialize_nmf(data, 10, init='nndsvda') War, Har = nmf._initialize_nmf(data, 10, init='nndsvdar', random_state=0) for ref, evl in ((W0, Wa), (W0, War), (H0, Ha), (H0, Har)): assert_almost_equal(evl[ref != 0], ref[ref != 0])
def test_init_default_deprecation(): # Test FutureWarning on init default msg = (r"The 'init' value, when 'init=None' and " r"n_components is less than n_samples and " r"n_features, will be changed from 'nndsvd' to " r"'nndsvda' in 1.1 \(renaming of 0.26\).") rng = np.random.mtrand.RandomState(42) A = np.abs(rng.randn(6, 5)) with pytest.warns(FutureWarning, match=msg): nmf._initialize_nmf(A, 3) with pytest.warns(FutureWarning, match=msg): NMF().fit(A) with pytest.warns(FutureWarning, match=msg): non_negative_factorization(A)
def test_initialize_nn_output(): # Test that initialization does not return negative values rng = np.random.mtrand.RandomState(42) data = np.abs(rng.randn(10, 10)) for init in ('random', 'nndsvd', 'nndsvda', 'nndsvdar'): W, H = nmf._initialize_nmf(data, 10, init=init, random_state=0) assert not ((W < 0).any() or (H < 0).any())
def test_nmf_decreasing(): # test that the objective function is decreasing at each iteration n_samples = 20 n_features = 15 n_components = 10 alpha = 0.1 l1_ratio = 0.5 tol = 0. # initialization rng = np.random.mtrand.RandomState(42) X = rng.randn(n_samples, n_features) np.abs(X, X) W0, H0 = nmf._initialize_nmf(X, n_components, init='random', random_state=42) for beta_loss in (-1.2, 0, 0.2, 1., 2., 2.5): for solver in ('cd', 'mu'): if solver != 'mu' and beta_loss != 2: # not implemented continue W, H = W0.copy(), H0.copy() previous_loss = None for _ in range(30): # one more iteration starting from the previous results W, H, _ = non_negative_factorization( X, W, H, beta_loss=beta_loss, init='custom', n_components=n_components, max_iter=1, alpha=alpha, solver=solver, tol=tol, l1_ratio=l1_ratio, verbose=0, regularization='both', random_state=0, update_H=True) loss = nmf._beta_divergence(X, W, H, beta_loss) if previous_loss is not None: assert previous_loss > loss previous_loss = loss
def test_minibatch_nmf_partial_fit(): # Check fit / partial_fit equivalence. Applicable only with fresh restarts. rng = np.random.mtrand.RandomState(42) X = np.abs(rng.randn(100, 5)) n_components = 5 batch_size = 10 max_iter = 2 mbnmf1 = MiniBatchNMF( n_components=n_components, init="custom", random_state=0, max_iter=max_iter, batch_size=batch_size, tol=0, max_no_improvement=None, fresh_restarts=False, ) mbnmf2 = MiniBatchNMF(n_components=n_components, init="custom", random_state=0) # Force the same init of H (W is recomputed anyway) to be able to compare results. W, H = nmf._initialize_nmf( X, n_components=n_components, init="random", random_state=0 ) mbnmf1.fit(X, W=W, H=H) for i in range(max_iter): for j in range(batch_size): mbnmf2.partial_fit(X[j : j + batch_size], W=W[:batch_size], H=H) assert mbnmf1.n_steps_ == mbnmf2.n_steps_ assert_allclose(mbnmf1.components_, mbnmf2.components_)
def test_initialize_close(): # Test NNDSVD error # Test that _initialize_nmf error is less than the standard deviation of # the entries in the matrix. rng = np.random.mtrand.RandomState(42) A = np.abs(rng.randn(10, 10)) W, H = nmf._initialize_nmf(A, 10, init='nndsvd') error = linalg.norm(np.dot(W, H) - A) sdev = linalg.norm(A - A.mean()) assert error <= sdev
def test_parameter_checking(): A = np.ones((2, 2)) name = 'spam' # FIXME : should be removed in 1.1 init = 'nndsvda' msg = "Invalid solver parameter: got 'spam' instead of one of" with pytest.raises(ValueError, match=msg): NMF(solver=name, init=init).fit(A) msg = "Invalid init parameter: got 'spam' instead of one of" with pytest.raises(ValueError, match=msg): NMF(init=name).fit(A) msg = "Invalid regularization parameter: got 'spam' instead of one of" with pytest.raises(ValueError, match=msg): NMF(regularization=name, init=init).fit(A) msg = "Invalid beta_loss parameter: got 'spam' instead of one" with pytest.raises(ValueError, match=msg): NMF(solver='mu', init=init, beta_loss=name).fit(A) msg = ( "Invalid beta_loss parameter: solver 'cd' does not handle " "beta_loss = 1.0" ) with pytest.raises(ValueError, match=msg): NMF(solver='cd', init=init, beta_loss=1.0).fit(A) msg = "Negative values in data passed to" with pytest.raises(ValueError, match=msg): NMF(init=init).fit(-A) with pytest.raises(ValueError, match=msg): nmf._initialize_nmf(-A, 2, 'nndsvd') clf = NMF(2, tol=0.1, init=init).fit(A) with pytest.raises(ValueError, match=msg): clf.transform(-A) for init in ['nndsvd', 'nndsvda', 'nndsvdar']: msg = re.escape( "init = '{}' can only be used when " "n_components <= min(n_samples, n_features)" .format(init) ) with pytest.raises(ValueError, match=msg): NMF(3, init=init).fit(A) with pytest.raises(ValueError, match=msg): nmf._initialize_nmf(A, 3, init)
def test_nmf_decreasing(solver): # test that the objective function is decreasing at each iteration n_samples = 20 n_features = 15 n_components = 10 alpha = 0.1 l1_ratio = 0.5 tol = 0.0 # initialization rng = np.random.mtrand.RandomState(42) X = rng.randn(n_samples, n_features) np.abs(X, X) W0, H0 = nmf._initialize_nmf(X, n_components, init="random", random_state=42) for beta_loss in (-1.2, 0, 0.2, 1.0, 2.0, 2.5): if solver != "mu" and beta_loss != 2: # not implemented continue W, H = W0.copy(), H0.copy() previous_loss = None for _ in range(30): # one more iteration starting from the previous results W, H, _ = non_negative_factorization( X, W, H, beta_loss=beta_loss, init="custom", n_components=n_components, max_iter=1, alpha_W=alpha, solver=solver, tol=tol, l1_ratio=l1_ratio, verbose=0, random_state=0, update_H=True, ) loss = (nmf._beta_divergence(X, W, H, beta_loss) + alpha * l1_ratio * n_features * W.sum() + alpha * l1_ratio * n_samples * H.sum() + alpha * (1 - l1_ratio) * n_features * (W**2).sum() + alpha * (1 - l1_ratio) * n_samples * (H**2).sum()) if previous_loss is not None: assert previous_loss > loss previous_loss = loss
def _fit_transform(self, X, y=None, W=None, H=None, update_H=True): X = check_array(X, accept_sparse=('csr', 'csc')) check_non_negative(X, "NMF (input X)") n_samples, n_features = X.shape n_components = self.n_components if n_components is None: n_components = n_features if (not isinstance(n_components, numbers.Integral) or n_components <= 0): raise ValueError("Number of components must be a positive integer;" " got (n_components=%r)" % n_components) if (not isinstance(self.max_iter, numbers.Integral) or self.max_iter < 0): raise ValueError("Maximum number of iterations must be a positive " "integer; got (max_iter=%r)" % self.max_iter) if not isinstance(self.tol, numbers.Number) or self.tol < 0: raise ValueError("Tolerance for stopping criteria must be " "positive; got (tol=%r)" % self.tol) # check W and H, or initialize them if self.init == 'custom' and update_H: _check_init(H, (n_components, n_features), "NMF (input H)") _check_init(W, (n_samples, n_components), "NMF (input W)") elif not update_H: _check_init(H, (n_components, n_features), "NMF (input H)") W = np.zeros((n_samples, n_components)) else: W, H = _initialize_nmf(X, n_components, init=self.init, random_state=self.random_state) if update_H: # fit_transform W, H, n_iter = _fit_projected_gradient( X, W, H, self.tol, self.max_iter, self.nls_max_iter, self.alpha, self.l1_ratio) else: # transform Wt, _, n_iter = _nls_subproblem(X.T, H.T, W.T, self.tol, self.nls_max_iter, alpha=self.alpha, l1_ratio=self.l1_ratio) W = Wt.T if n_iter == self.max_iter and self.tol > 0: warnings.warn("Maximum number of iteration %d reached. Increase it" " to improve convergence." % self.max_iter, ConvergenceWarning) return W, H, n_iter
def test_nmf_multiplicative_update_sparse(): # Compare sparse and dense input in multiplicative update NMF # Also test continuity of the results with respect to beta_loss parameter n_samples = 20 n_features = 10 n_components = 5 alpha = 0.1 l1_ratio = 0.5 n_iter = 20 # initialization rng = np.random.mtrand.RandomState(1337) X = rng.randn(n_samples, n_features) X = np.abs(X) X_csr = sp.csr_matrix(X) W0, H0 = nmf._initialize_nmf(X, n_components, init='random', random_state=42) for beta_loss in (-1.2, 0, 0.2, 1., 2., 2.5): # Reference with dense array X W, H = W0.copy(), H0.copy() W1, H1, _ = non_negative_factorization( X, W, H, n_components, init='custom', update_H=True, solver='mu', beta_loss=beta_loss, max_iter=n_iter, alpha=alpha, l1_ratio=l1_ratio, regularization='both', random_state=42) # Compare with sparse X W, H = W0.copy(), H0.copy() W2, H2, _ = non_negative_factorization( X_csr, W, H, n_components, init='custom', update_H=True, solver='mu', beta_loss=beta_loss, max_iter=n_iter, alpha=alpha, l1_ratio=l1_ratio, regularization='both', random_state=42) assert_array_almost_equal(W1, W2, decimal=7) assert_array_almost_equal(H1, H2, decimal=7) # Compare with almost same beta_loss, since some values have a specific # behavior, but the results should be continuous w.r.t beta_loss beta_loss -= 1.e-5 W, H = W0.copy(), H0.copy() W3, H3, _ = non_negative_factorization( X_csr, W, H, n_components, init='custom', update_H=True, solver='mu', beta_loss=beta_loss, max_iter=n_iter, alpha=alpha, l1_ratio=l1_ratio, regularization='both', random_state=42) assert_array_almost_equal(W1, W3, decimal=4) assert_array_almost_equal(H1, H3, decimal=4)
def test_beta_divergence(): # Compare _beta_divergence with the reference _beta_divergence_dense n_samples = 20 n_features = 10 n_components = 5 beta_losses = [0., 0.5, 1., 1.5, 2.] # initialization rng = np.random.mtrand.RandomState(42) X = rng.randn(n_samples, n_features) np.clip(X, 0, None, out=X) X_csr = sp.csr_matrix(X) W, H = nmf._initialize_nmf(X, n_components, init='random', random_state=42) for beta in beta_losses: ref = _beta_divergence_dense(X, W, H, beta) loss = nmf._beta_divergence(X, W, H, beta) loss_csr = nmf._beta_divergence(X_csr, W, H, beta) assert_almost_equal(ref, loss, decimal=7) assert_almost_equal(ref, loss_csr, decimal=7)
def run_bench(X, clfs, plot_name, n_components, tol, alpha, l1_ratio): start = time() results = [] for name, clf_type, iter_range, clf_params in clfs: print("Training %s:" % name) for rs, init in enumerate(('nndsvd', 'nndsvdar', 'random')): print(" %s %s: " % (init, " " * (8 - len(init))), end="") W, H = _initialize_nmf(X, n_components, init, 1e-6, rs) for max_iter in iter_range: clf_params['alpha'] = alpha clf_params['l1_ratio'] = l1_ratio clf_params['max_iter'] = max_iter clf_params['tol'] = tol clf_params['random_state'] = rs clf_params['init'] = 'custom' clf_params['n_components'] = n_components this_loss, duration = bench_one(name, X, W, H, X.shape, clf_type, clf_params, init, n_components, rs) init_name = "init='%s'" % init results.append((name, this_loss, duration, init_name)) # print("loss: %.6f, time: %.3f sec" % (this_loss, duration)) print(".", end="") sys.stdout.flush() print(" ") # Use a panda dataframe to organize the results results_df = pandas.DataFrame(results, columns="method loss time init".split()) print("Total time = %0.3f sec\n" % (time() - start)) # plot the results plot_results(results_df, plot_name) return results_df