def test_initialize_variants(): # Test NNDSVD variants correctness # Test that the variants 'nndsvda' and 'nndsvdar' differ from basic # 'nndsvd' only where the basic version has zeros. rng = np.random.mtrand.RandomState(42) data = np.abs(rng.randn(10, 10)) W0, H0 = nmf._initialize_nmf(data, 10, init='nndsvd') Wa, Ha = nmf._initialize_nmf(data, 10, init='nndsvda') War, Har = nmf._initialize_nmf(data, 10, init='nndsvdar', random_state=0) for ref, evl in ((W0, Wa), (W0, War), (H0, Ha), (H0, Har)): assert_almost_equal(evl[ref != 0], ref[ref != 0])
def test_nmf_decreasing(): # test that the objective function is decreasing at each iteration n_samples = 20 n_features = 15 n_components = 10 alpha = 0.1 l1_ratio = 0.5 tol = 0. # initialization rng = np.random.mtrand.RandomState(42) X = rng.randn(n_samples, n_features) np.abs(X, X) W0, H0 = nmf._initialize_nmf(X, n_components, init='random', random_state=42) for beta_loss in (-1.2, 0, 0.2, 1., 2., 2.5): for solver in ('cd', 'mu'): if solver != 'mu' and beta_loss != 2: # not implemented continue W, H = W0.copy(), H0.copy() previous_loss = None for _ in range(30): # one more iteration starting from the previous results W, H, _ = non_negative_factorization( X, W, H, beta_loss=beta_loss, init='custom', n_components=n_components, max_iter=1, alpha=alpha, solver=solver, tol=tol, l1_ratio=l1_ratio, verbose=0, regularization='both', random_state=0, update_H=True) loss = nmf._beta_divergence(X, W, H, beta_loss) if previous_loss is not None: assert previous_loss > loss previous_loss = loss
def test_initialize_nn_output(): # Test that initialization does not return negative values rng = np.random.mtrand.RandomState(42) data = np.abs(rng.randn(10, 10)) for init in ('random', 'nndsvd', 'nndsvda', 'nndsvdar'): W, H = nmf._initialize_nmf(data, 10, init=init, random_state=0) assert not ((W < 0).any() or (H < 0).any())
def test_initialize_close(): # Test NNDSVD error # Test that _initialize_nmf error is less than the standard deviation of # the entries in the matrix. rng = np.random.mtrand.RandomState(42) A = np.abs(rng.randn(10, 10)) W, H = nmf._initialize_nmf(A, 10, init='nndsvd') error = linalg.norm(np.dot(W, H) - A) sdev = linalg.norm(A - A.mean()) assert error <= sdev
def _fit_transform(self, X, y=None, W=None, H=None, update_H=True): X = check_array(X, accept_sparse=('csr', 'csc')) check_non_negative(X, "NMF (input X)") n_samples, n_features = X.shape n_components = self.n_components if n_components is None: n_components = n_features if (not isinstance(n_components, numbers.Integral) or n_components <= 0): raise ValueError("Number of components must be a positive integer;" " got (n_components=%r)" % n_components) if (not isinstance(self.max_iter, numbers.Integral) or self.max_iter < 0): raise ValueError("Maximum number of iterations must be a positive " "integer; got (max_iter=%r)" % self.max_iter) if not isinstance(self.tol, numbers.Number) or self.tol < 0: raise ValueError("Tolerance for stopping criteria must be " "positive; got (tol=%r)" % self.tol) # check W and H, or initialize them if self.init == 'custom' and update_H: _check_init(H, (n_components, n_features), "NMF (input H)") _check_init(W, (n_samples, n_components), "NMF (input W)") elif not update_H: _check_init(H, (n_components, n_features), "NMF (input H)") W = np.zeros((n_samples, n_components)) else: W, H = _initialize_nmf(X, n_components, init=self.init, random_state=self.random_state) if update_H: # fit_transform W, H, n_iter = _fit_projected_gradient( X, W, H, self.tol, self.max_iter, self.nls_max_iter, self.alpha, self.l1_ratio) else: # transform Wt, _, n_iter = _nls_subproblem(X.T, H.T, W.T, self.tol, self.nls_max_iter, alpha=self.alpha, l1_ratio=self.l1_ratio) W = Wt.T if n_iter == self.max_iter and self.tol > 0: warnings.warn("Maximum number of iteration %d reached. Increase it" " to improve convergence." % self.max_iter, ConvergenceWarning) return W, H, n_iter
def test_nmf_multiplicative_update_sparse(): # Compare sparse and dense input in multiplicative update NMF # Also test continuity of the results with respect to beta_loss parameter n_samples = 20 n_features = 10 n_components = 5 alpha = 0.1 l1_ratio = 0.5 n_iter = 20 # initialization rng = np.random.mtrand.RandomState(1337) X = rng.randn(n_samples, n_features) X = np.abs(X) X_csr = sp.csr_matrix(X) W0, H0 = nmf._initialize_nmf(X, n_components, init='random', random_state=42) for beta_loss in (-1.2, 0, 0.2, 1., 2., 2.5): # Reference with dense array X W, H = W0.copy(), H0.copy() W1, H1, _ = non_negative_factorization( X, W, H, n_components, init='custom', update_H=True, solver='mu', beta_loss=beta_loss, max_iter=n_iter, alpha=alpha, l1_ratio=l1_ratio, regularization='both', random_state=42) # Compare with sparse X W, H = W0.copy(), H0.copy() W2, H2, _ = non_negative_factorization( X_csr, W, H, n_components, init='custom', update_H=True, solver='mu', beta_loss=beta_loss, max_iter=n_iter, alpha=alpha, l1_ratio=l1_ratio, regularization='both', random_state=42) assert_array_almost_equal(W1, W2, decimal=7) assert_array_almost_equal(H1, H2, decimal=7) # Compare with almost same beta_loss, since some values have a specific # behavior, but the results should be continuous w.r.t beta_loss beta_loss -= 1.e-5 W, H = W0.copy(), H0.copy() W3, H3, _ = non_negative_factorization( X_csr, W, H, n_components, init='custom', update_H=True, solver='mu', beta_loss=beta_loss, max_iter=n_iter, alpha=alpha, l1_ratio=l1_ratio, regularization='both', random_state=42) assert_array_almost_equal(W1, W3, decimal=4) assert_array_almost_equal(H1, H3, decimal=4)
def test_beta_divergence(): # Compare _beta_divergence with the reference _beta_divergence_dense n_samples = 20 n_features = 10 n_components = 5 beta_losses = [0., 0.5, 1., 1.5, 2.] # initialization rng = np.random.mtrand.RandomState(42) X = rng.randn(n_samples, n_features) np.clip(X, 0, None, out=X) X_csr = sp.csr_matrix(X) W, H = nmf._initialize_nmf(X, n_components, init='random', random_state=42) for beta in beta_losses: ref = _beta_divergence_dense(X, W, H, beta) loss = nmf._beta_divergence(X, W, H, beta) loss_csr = nmf._beta_divergence(X_csr, W, H, beta) assert_almost_equal(ref, loss, decimal=7) assert_almost_equal(ref, loss_csr, decimal=7)
def run_bench(X, clfs, plot_name, n_components, tol, alpha, l1_ratio): start = time() results = [] for name, clf_type, iter_range, clf_params in clfs: print("Training %s:" % name) for rs, init in enumerate(('nndsvd', 'nndsvdar', 'random')): print(" %s %s: " % (init, " " * (8 - len(init))), end="") W, H = _initialize_nmf(X, n_components, init, 1e-6, rs) for max_iter in iter_range: clf_params['alpha'] = alpha clf_params['l1_ratio'] = l1_ratio clf_params['max_iter'] = max_iter clf_params['tol'] = tol clf_params['random_state'] = rs clf_params['init'] = 'custom' clf_params['n_components'] = n_components this_loss, duration = bench_one(name, X, W, H, X.shape, clf_type, clf_params, init, n_components, rs) init_name = "init='%s'" % init results.append((name, this_loss, duration, init_name)) # print("loss: %.6f, time: %.3f sec" % (this_loss, duration)) print(".", end="") sys.stdout.flush() print(" ") # Use a panda dataframe to organize the results results_df = pandas.DataFrame(results, columns="method loss time init".split()) print("Total time = %0.3f sec\n" % (time() - start)) # plot the results plot_results(results_df, plot_name) return results_df