def test_nmf_regularization(): # Test the effect of L1 and L2 regularizations n_samples = 6 n_features = 5 n_components = 3 rng = np.random.mtrand.RandomState(42) X = np.abs(rng.randn(n_samples, n_features)) # L1 regularization should increase the number of zeros l1_ratio = 1. for solver in ['cd', 'mu']: regul = nmf.NMF(n_components=n_components, solver=solver, alpha=0.5, l1_ratio=l1_ratio, random_state=42) model = nmf.NMF(n_components=n_components, solver=solver, alpha=0., l1_ratio=l1_ratio, random_state=42) W_regul = regul.fit_transform(X) W_model = model.fit_transform(X) H_regul = regul.components_ H_model = model.components_ W_regul_n_zeros = W_regul[W_regul == 0].size W_model_n_zeros = W_model[W_model == 0].size H_regul_n_zeros = H_regul[H_regul == 0].size H_model_n_zeros = H_model[H_model == 0].size assert_greater(W_regul_n_zeros, W_model_n_zeros) assert_greater(H_regul_n_zeros, H_model_n_zeros) # L2 regularization should decrease the mean of the coefficients l1_ratio = 0. for solver in ['cd', 'mu']: regul = nmf.NMF(n_components=n_components, solver=solver, alpha=0.5, l1_ratio=l1_ratio, random_state=42) model = nmf.NMF(n_components=n_components, solver=solver, alpha=0., l1_ratio=l1_ratio, random_state=42) W_regul = regul.fit_transform(X) W_model = model.fit_transform(X) H_regul = regul.components_ H_model = model.components_ assert_greater(W_model.mean(), W_regul.mean()) assert_greater(H_model.mean(), H_regul.mean())
def test_parameter_checking(): A = np.ones((2, 2)) name = 'spam' msg = "Invalid solver parameter: got 'spam' instead of one of" assert_raise_message(ValueError, msg, nmf.NMF(solver=name).fit, A) msg = "Invalid init parameter: got 'spam' instead of one of" assert_raise_message(ValueError, msg, nmf.NMF(init=name).fit, A) msg = "Invalid sparseness parameter: got 'spam' instead of one of" assert_raise_message(ValueError, msg, nmf.NMF(sparseness=name).fit, A) msg = "Negative values in data passed to" assert_raise_message(ValueError, msg, nmf.NMF().fit, -A) assert_raise_message(ValueError, msg, nmf._initialize_nmf, -A, 2, 'nndsvd') clf = nmf.NMF(2, tol=0.1).fit(A) assert_raise_message(ValueError, msg, clf.transform, -A)
def test_sparse_input(): # Test that sparse matrices are accepted as input from scipy.sparse import csc_matrix A = np.abs(random_state.randn(10, 10)) A[:, 2 * np.arange(5)] = 0 A_sparse = csc_matrix(A) for solver in ('proj-grad', 'coordinate', 'greedy'): est1 = nmf.NMF(solver=solver, n_components=5, init='random', random_state=0, tol=1e-2) est2 = clone(est1) W1 = est1.fit_transform(A) W2 = est2.fit_transform(A_sparse) H1 = est1.components_ H2 = est2.components_ assert_array_almost_equal(est2.reconstruction_err_, nmf._safe_compute_error(A, W2, H2)) assert_array_almost_equal(W1, W2) assert_array_almost_equal(H1, H2)
def test_nmf_transform(): # Test that NMF.transform returns close values A = np.abs(random_state.randn(6, 5)) for solver in ('proj-grad', 'coordinate', 'greedy'): m = nmf.NMF(solver=solver, n_components=4, init='nndsvd', random_state=0) ft = m.fit_transform(A) t = m.transform(A) assert_array_almost_equal(ft, t, decimal=2)
def test_nmf_fit_nn_output(): # Test that the decomposition does not contain negative values A = np.c_[5 * np.ones(5) - np.arange(1, 6), 5 * np.ones(5) + np.arange(1, 6)] for solver in ('proj-grad', 'coordinate', 'greedy'): for init in (None, 'nndsvd', 'nndsvda', 'nndsvdar'): model = nmf.NMF(n_components=2, solver=solver, init=init, random_state=0) transf = model.fit_transform(A) assert_false((model.components_ < 0).any() or (transf < 0).any())
def get_topics(X, n_topics): """ X = W*H :param X: vectorized input :param n_topics: number of topics :return: W, H, nmf """ nmf = sknmf.NMF(n_components=n_topics, l1_ratio=0.5, init='nndsvd') W = nmf.fit_transform(X) H = nmf.components_ return W, H, nmf
def test_sparse_transform(): """Test that transform works on sparse data. Issue #2124""" from scipy.sparse import csc_matrix A = np.abs(random_state.randn(5, 4)) A[A > 1.0] = 0 A = csc_matrix(A) model = nmf.NMF() A_fit_tr = model.fit_transform(A) A_tr = model.transform(A) # This solver seems pretty inconsistent assert_array_almost_equal(A_fit_tr, A_tr, decimal=2)
def test_sparse_transform(): # Test that transform works on sparse data. Issue #2124 A = np.abs(random_state.randn(3, 2)) A[A > 1.0] = 0 A = csc_matrix(A) for solver in ('pg', 'cd'): model = nmf.NMF(solver=solver, random_state=0, tol=1e-4, n_components=2) A_fit_tr = model.fit_transform(A) A_tr = model.transform(A) assert_array_almost_equal(A_fit_tr, A_tr, decimal=1)
def draw_rec_err(l1ratio, location, X): import matplotlib.pyplot as plt x = [] y = [] z = [] for n_topics in range(10, 80, 2): nmf = sknmf.NMF(n_components=n_topics, l1_ratio=l1ratio, init='nndsvd') W = nmf.fit_transform(X) print('\nNumber of topics: {}, reconstruction error: {}'.format( n_topics, nmf.reconstruction_err_ )) print('Topics: shape: {}, nonzeros: {}, density: {}'.format( nmf.components_.shape, len(nmf.components_.nonzero()[0]), len(nmf.components_.nonzero()[0]) / ( nmf.components_.shape[0] * nmf.components_.shape[1]) )) x.append(n_topics) y.append(nmf.reconstruction_err_) z.append(len(nmf.components_.nonzero()[0]) / (nmf.components_.shape[0] * nmf.components_.shape[1])) plt.figure(1) plt.subplot(211) plt.plot(x, y, 'bo', x, y, 'k') plt.title('Reconstruction error') plt.grid(True) plt.ylabel('Reconstruction error') plt.xlabel('Number of Topics') plt.subplot(212) plt.plot(x, z, 'ro', x, z, 'k') plt.title('Density') plt.grid(True) plt.ylabel('Density') plt.xlabel('Number of Topics') plt.tight_layout() plt.savefig(location) plt.close()
def test_non_negative_matrix_factorization_consistency(): # Test that the function is called in the same way, either directly # or through the NMF class A = np.abs(random_state.randn(10, 10)) A[:, 2 * np.arange(5)] = 0 for solver in ('proj-grad', 'coordinate', 'greedy'): W_nmf, H, _ = nmf.non_negative_matrix_factorization(A, solver=solver, random_state=1, tol=1e-2) W_nmf_2, _, _ = nmf.non_negative_matrix_factorization(A, H=H, update_H=False, solver=solver, random_state=1, tol=1e-2) model_class = nmf.NMF(solver=solver, random_state=1, tol=1e-2) W_cls = model_class.fit_transform(A) W_cls_2 = model_class.transform(A) assert_array_almost_equal(W_nmf, W_cls, decimal=10) assert_array_almost_equal(W_nmf_2, W_cls_2, decimal=10)
def test_sparse_input(): # Test that sparse matrices are accepted as input from scipy.sparse import csc_matrix A = np.abs(random_state.randn(10, 10)) A[:, 2 * np.arange(5)] = 0 A_sparse = csc_matrix(A) for solver in ('pg', 'cd'): est1 = nmf.NMF(solver=solver, n_components=5, init='random', random_state=0, tol=1e-2) est2 = clone(est1) W1 = est1.fit_transform(A) W2 = est2.fit_transform(A_sparse) H1 = est1.components_ H2 = est2.components_ assert_array_almost_equal(W1, W2) assert_array_almost_equal(H1, H2)
def pretreat_by_nmf(data): #输入的格式必须是numpy的data,也就是数组,可以使matrix 也可以是 array import sklearn.decomposition.nmf as nmf nmf_model = nmf.NMF(n_components=300, max_iter=5000, tol=0.05) data_out = nmf_model.fit_transform(data) return data_out
def test_nmf_fit_close(): # Test that the fit is not too far away for solver in ('proj-grad', 'coordinate', 'greedy'): pnmf = nmf.NMF(5, solver=solver, init='nndsvda', random_state=0) X = np.abs(random_state.randn(6, 5)) assert_less(pnmf.fit(X).reconstruction_err_, 0.05)
def test_n_components_greater_n_features(): # Smoke test for the case of more components than features. A = np.abs(random_state.randn(30, 10)) nmf.NMF(n_components=15, sparseness='data', random_state=0, tol=1e-2).fit(A)