Example #1
0
def test_nmf_regularization():
    # Test the effect of L1 and L2 regularizations
    n_samples = 6
    n_features = 5
    n_components = 3
    rng = np.random.mtrand.RandomState(42)
    X = np.abs(rng.randn(n_samples, n_features))

    # L1 regularization should increase the number of zeros
    l1_ratio = 1.
    for solver in ['cd', 'mu']:
        regul = nmf.NMF(n_components=n_components,
                        solver=solver,
                        alpha=0.5,
                        l1_ratio=l1_ratio,
                        random_state=42)
        model = nmf.NMF(n_components=n_components,
                        solver=solver,
                        alpha=0.,
                        l1_ratio=l1_ratio,
                        random_state=42)

        W_regul = regul.fit_transform(X)
        W_model = model.fit_transform(X)

        H_regul = regul.components_
        H_model = model.components_

        W_regul_n_zeros = W_regul[W_regul == 0].size
        W_model_n_zeros = W_model[W_model == 0].size
        H_regul_n_zeros = H_regul[H_regul == 0].size
        H_model_n_zeros = H_model[H_model == 0].size

        assert_greater(W_regul_n_zeros, W_model_n_zeros)
        assert_greater(H_regul_n_zeros, H_model_n_zeros)

    # L2 regularization should decrease the mean of the coefficients
    l1_ratio = 0.
    for solver in ['cd', 'mu']:
        regul = nmf.NMF(n_components=n_components,
                        solver=solver,
                        alpha=0.5,
                        l1_ratio=l1_ratio,
                        random_state=42)
        model = nmf.NMF(n_components=n_components,
                        solver=solver,
                        alpha=0.,
                        l1_ratio=l1_ratio,
                        random_state=42)

        W_regul = regul.fit_transform(X)
        W_model = model.fit_transform(X)

        H_regul = regul.components_
        H_model = model.components_

        assert_greater(W_model.mean(), W_regul.mean())
        assert_greater(H_model.mean(), H_regul.mean())
Example #2
0
def test_parameter_checking():
    A = np.ones((2, 2))
    name = 'spam'
    msg = "Invalid solver parameter: got 'spam' instead of one of"
    assert_raise_message(ValueError, msg, nmf.NMF(solver=name).fit, A)
    msg = "Invalid init parameter: got 'spam' instead of one of"
    assert_raise_message(ValueError, msg, nmf.NMF(init=name).fit, A)
    msg = "Invalid sparseness parameter: got 'spam' instead of one of"
    assert_raise_message(ValueError, msg, nmf.NMF(sparseness=name).fit, A)

    msg = "Negative values in data passed to"
    assert_raise_message(ValueError, msg, nmf.NMF().fit, -A)
    assert_raise_message(ValueError, msg, nmf._initialize_nmf, -A, 2, 'nndsvd')
    clf = nmf.NMF(2, tol=0.1).fit(A)
    assert_raise_message(ValueError, msg, clf.transform, -A)
Example #3
0
def test_sparse_input():
    # Test that sparse matrices are accepted as input
    from scipy.sparse import csc_matrix

    A = np.abs(random_state.randn(10, 10))
    A[:, 2 * np.arange(5)] = 0
    A_sparse = csc_matrix(A)

    for solver in ('proj-grad', 'coordinate', 'greedy'):
        est1 = nmf.NMF(solver=solver,
                       n_components=5,
                       init='random',
                       random_state=0,
                       tol=1e-2)
        est2 = clone(est1)

        W1 = est1.fit_transform(A)
        W2 = est2.fit_transform(A_sparse)
        H1 = est1.components_
        H2 = est2.components_

        assert_array_almost_equal(est2.reconstruction_err_,
                                  nmf._safe_compute_error(A, W2, H2))

        assert_array_almost_equal(W1, W2)
        assert_array_almost_equal(H1, H2)
Example #4
0
def test_nmf_transform():
    # Test that NMF.transform returns close values
    A = np.abs(random_state.randn(6, 5))
    for solver in ('proj-grad', 'coordinate', 'greedy'):
        m = nmf.NMF(solver=solver,
                    n_components=4,
                    init='nndsvd',
                    random_state=0)
        ft = m.fit_transform(A)
        t = m.transform(A)
        assert_array_almost_equal(ft, t, decimal=2)
Example #5
0
def test_nmf_fit_nn_output():
    # Test that the decomposition does not contain negative values
    A = np.c_[5 * np.ones(5) - np.arange(1, 6),
              5 * np.ones(5) + np.arange(1, 6)]
    for solver in ('proj-grad', 'coordinate', 'greedy'):
        for init in (None, 'nndsvd', 'nndsvda', 'nndsvdar'):
            model = nmf.NMF(n_components=2,
                            solver=solver,
                            init=init,
                            random_state=0)
            transf = model.fit_transform(A)
            assert_false((model.components_ < 0).any() or (transf < 0).any())
def get_topics(X, n_topics):
    """
    X = W*H

    :param X: vectorized input
    :param n_topics: number of topics
    :return: W, H, nmf
    """
    nmf = sknmf.NMF(n_components=n_topics, l1_ratio=0.5, init='nndsvd')
    W = nmf.fit_transform(X)
    H = nmf.components_
    return W, H, nmf
Example #7
0
def test_sparse_transform():
    """Test that transform works on sparse data.  Issue #2124"""
    from scipy.sparse import csc_matrix

    A = np.abs(random_state.randn(5, 4))
    A[A > 1.0] = 0
    A = csc_matrix(A)

    model = nmf.NMF()
    A_fit_tr = model.fit_transform(A)
    A_tr = model.transform(A)
    # This solver seems pretty inconsistent
    assert_array_almost_equal(A_fit_tr, A_tr, decimal=2)
Example #8
0
def test_sparse_transform():
    # Test that transform works on sparse data.  Issue #2124

    A = np.abs(random_state.randn(3, 2))
    A[A > 1.0] = 0
    A = csc_matrix(A)

    for solver in ('pg', 'cd'):
        model = nmf.NMF(solver=solver,
                        random_state=0,
                        tol=1e-4,
                        n_components=2)
        A_fit_tr = model.fit_transform(A)
        A_tr = model.transform(A)
        assert_array_almost_equal(A_fit_tr, A_tr, decimal=1)
def draw_rec_err(l1ratio, location, X):
    import matplotlib.pyplot as plt
    x = []
    y = []
    z = []
    for n_topics in range(10, 80, 2):
        nmf = sknmf.NMF(n_components=n_topics, l1_ratio=l1ratio, init='nndsvd')
        W = nmf.fit_transform(X)
        print('\nNumber of topics: {}, reconstruction error: {}'.format(
            n_topics, nmf.reconstruction_err_
        ))
        print('Topics: shape: {}, nonzeros: {}, density: {}'.format(
            nmf.components_.shape, len(nmf.components_.nonzero()[0]),
            len(nmf.components_.nonzero()[0]) / (
                nmf.components_.shape[0] * nmf.components_.shape[1])
        ))
        x.append(n_topics)
        y.append(nmf.reconstruction_err_)
        z.append(len(nmf.components_.nonzero()[0]) /
                 (nmf.components_.shape[0] * nmf.components_.shape[1]))

    plt.figure(1)
    plt.subplot(211)
    plt.plot(x, y, 'bo', x, y, 'k')
    plt.title('Reconstruction error')
    plt.grid(True)
    plt.ylabel('Reconstruction error')
    plt.xlabel('Number of Topics')

    plt.subplot(212)
    plt.plot(x, z, 'ro', x, z, 'k')
    plt.title('Density')
    plt.grid(True)
    plt.ylabel('Density')
    plt.xlabel('Number of Topics')
    plt.tight_layout()
    plt.savefig(location)
    plt.close()
Example #10
0
def test_non_negative_matrix_factorization_consistency():
    # Test that the function is called in the same way, either directly
    # or through the NMF class
    A = np.abs(random_state.randn(10, 10))
    A[:, 2 * np.arange(5)] = 0

    for solver in ('proj-grad', 'coordinate', 'greedy'):
        W_nmf, H, _ = nmf.non_negative_matrix_factorization(A,
                                                            solver=solver,
                                                            random_state=1,
                                                            tol=1e-2)
        W_nmf_2, _, _ = nmf.non_negative_matrix_factorization(A,
                                                              H=H,
                                                              update_H=False,
                                                              solver=solver,
                                                              random_state=1,
                                                              tol=1e-2)

        model_class = nmf.NMF(solver=solver, random_state=1, tol=1e-2)
        W_cls = model_class.fit_transform(A)
        W_cls_2 = model_class.transform(A)
        assert_array_almost_equal(W_nmf, W_cls, decimal=10)
        assert_array_almost_equal(W_nmf_2, W_cls_2, decimal=10)
Example #11
0
def test_sparse_input():
    # Test that sparse matrices are accepted as input
    from scipy.sparse import csc_matrix

    A = np.abs(random_state.randn(10, 10))
    A[:, 2 * np.arange(5)] = 0
    A_sparse = csc_matrix(A)

    for solver in ('pg', 'cd'):
        est1 = nmf.NMF(solver=solver,
                       n_components=5,
                       init='random',
                       random_state=0,
                       tol=1e-2)
        est2 = clone(est1)

        W1 = est1.fit_transform(A)
        W2 = est2.fit_transform(A_sparse)
        H1 = est1.components_
        H2 = est2.components_

        assert_array_almost_equal(W1, W2)
        assert_array_almost_equal(H1, H2)
def pretreat_by_nmf(data):
    #输入的格式必须是numpy的data,也就是数组,可以使matrix 也可以是 array
    import sklearn.decomposition.nmf as nmf
    nmf_model = nmf.NMF(n_components=300, max_iter=5000, tol=0.05)
    data_out = nmf_model.fit_transform(data)
    return data_out
Example #13
0
def test_nmf_fit_close():
    # Test that the fit is not too far away
    for solver in ('proj-grad', 'coordinate', 'greedy'):
        pnmf = nmf.NMF(5, solver=solver, init='nndsvda', random_state=0)
        X = np.abs(random_state.randn(6, 5))
        assert_less(pnmf.fit(X).reconstruction_err_, 0.05)
Example #14
0
def test_n_components_greater_n_features():
    # Smoke test for the case of more components than features.
    A = np.abs(random_state.randn(30, 10))
    nmf.NMF(n_components=15, sparseness='data', random_state=0,
            tol=1e-2).fit(A)