예제 #1
0
def test_l1_regularization(solver):
    n_components = 3
    rng = np.random.mtrand.RandomState(42)
    X = np.abs(rng.randn(6, 5))
    Y = np.abs(rng.randn(5, 4))

    # L1 regularization should increase the number of zeros
    l1_reg = 2.
    reg = CMF(n_components=n_components, solver=solver,
              l1_reg=l1_reg, random_state=42)
    model = CMF(n_components=n_components, solver=solver,
                l1_reg=0., random_state=42)

    U_reg, V_reg, Z_reg = reg.fit_transform(X, Y)
    U_model, V_model, Z_model = model.fit_transform(X, Y)

    U_reg_n_zeros = U_reg[U_reg == 0].size
    V_reg_n_zeros = V_reg[V_reg == 0].size
    Z_reg_n_zeros = Z_reg[Z_reg == 0].size
    U_model_n_zeros = U_model[U_model == 0].size
    V_model_n_zeros = V_model[V_model == 0].size
    Z_model_n_zeros = Z_model[Z_model == 0].size

    msg = "solver: {}".format(solver)

    # If one matrix is full of zeros,
    # it might make sense for the other matrices to reduce the number of zeros
    # Therefore, we compare the total number of zeros
    assert_greater(U_reg_n_zeros + V_reg_n_zeros + Z_reg_n_zeros,
                   U_model_n_zeros + V_model_n_zeros + Z_model_n_zeros, msg)
예제 #2
0
def test_analysis():
    # smoke test to see that analysis works
    rng = np.random.mtrand.RandomState(36)
    model = CMF(n_components=2, solver="newton", max_iter=1)
    c = CountVectorizer()
    X_ = c.fit_transform(["hello world",
                          "goodbye world",
                          "hello goodbye"])
    X_ = csr_matrix(X_)
    Y = np.abs(rng.randn(3, 1))
    model.fit_transform(X_.T, Y)
    model.print_topic_terms(c, importances=False)
    model.print_topic_terms(c, importances=True)
예제 #3
0
def test_transform_custom_init():
    # Smoke test that checks if CMF.fit_transform works with custom initialization
    random_state = np.random.RandomState(0)
    X = np.abs(random_state.randn(6, 5))
    Y = np.abs(random_state.randn(5, 1))
    n_components = 4
    avg = np.sqrt(X.mean() / n_components)
    U_init = np.abs(avg * random_state.randn(6, n_components))
    V_init = np.abs(avg * random_state.randn(5, n_components))
    avg = np.sqrt(Y.mean() / n_components)
    Z_init = np.abs(avg * random_state.randn(1, n_components))

    m = CMF(solver='newton', n_components=n_components,
            x_init='custom', y_init='custom',
            random_state=0)
    m.fit_transform(X, Y, U=U_init, V=V_init, Z=Z_init)
예제 #4
0
def test_input_method_compatibility():
    # Smoke test for combinations between different init methods
    rng = np.random.mtrand.RandomState(0)
    X = np.abs(rng.randn(6, 5))
    Y = np.abs(rng.randn(5, 6))
    n_components = 4
    avg = np.sqrt(X.mean() / n_components)
    U_init = np.abs(avg * rng.randn(6, n_components))
    V_init = np.abs(avg * rng.randn(5, n_components))
    avg = np.sqrt(Y.mean() / n_components)
    Z_init = np.abs(avg * rng.randn(6, n_components))
    inits = [None, 'random', 'nndsvd', 'nndsvda', 'nndsvdar', 'custom']
    for x_init, y_init in itertools.product(inits, inits):
        pnmf = CMF(n_components=n_components, solver='mu',
                   x_init=x_init, y_init=y_init,
                   random_state=0, max_iter=1)
        pnmf.fit_transform(X, Y, U=U_init, V=V_init, Z=Z_init)
예제 #5
0
def sparse_cmf_benchmark(solver):
    rng = np.random.mtrand.RandomState(42)
    X = np.abs(rng.randn(2000, 150))
    X[:1000, 2 * np.arange(10) + 100] = 0
    X[1000:, 2 * np.arange(10)] = 0
    X_sparse = SP(X)
    Y = np.abs(rng.randn(150, 10))
    model = CMF(n_components=10, solver=solver, random_state=42, max_iter=10)
    U, V, Z = model.fit_transform(X_sparse, Y)
예제 #6
0
def test_auto_compute_alpha():
    rng = np.random.mtrand.RandomState(36)
    X = rng.randn(10, 10)
    Y = rng.randn(10, 5)

    x_emphasis_model = CMF(n_components=2, solver="newton", x_init='svd', y_init='svd',
                           U_non_negative=False, V_non_negative=False, Z_non_negative=False,
                           random_state=0, max_iter=100, alpha=0.5)
    # automatic = weight * number_of_elements is constant for both X and Y
    y_emphasis_model = CMF(n_components=2, solver="newton", x_init='svd', y_init='svd',
                           U_non_negative=False, V_non_negative=False, Z_non_negative=False,
                           random_state=0, max_iter=100, alpha="auto")

    U1, V1, Z1 = x_emphasis_model.fit_transform(X, Y)
    U2, V2, Z2 = y_emphasis_model.fit_transform(X, Y)

    assert_greater(np.linalg.norm(np.dot(U2, V2.T) - X), np.linalg.norm(np.dot(U1, V1.T) - X))
    assert_greater(np.linalg.norm(np.dot(V1, Z1.T) - Y), np.linalg.norm(np.dot(V2, Z2.T) - Y))
예제 #7
0
def test_l2_regularization(solver):
    n_components = 3
    rng = np.random.mtrand.RandomState(42)
    X = np.abs(rng.randn(6, 5))
    Y = np.abs(rng.randn(5, 4))
    # L2 regularization should decrease the mean of the coefficients
    l2_reg = 2.
    model = CMF(n_components=n_components, solver=solver,
                l2_reg=0., random_state=42)
    reg = CMF(n_components=n_components, solver=solver,
              l2_reg=l2_reg, random_state=42)

    U_reg, V_reg, Z_reg = reg.fit_transform(X, Y)
    U_model, V_model, Z_model = model.fit_transform(X, Y)

    msg = "solver: {}".format(solver)
    assert_greater(U_model.mean(), U_reg.mean(), msg)
    assert_greater(V_model.mean(), V_reg.mean(), msg)
    assert_greater(Z_model.mean(), Z_reg.mean(), msg)
예제 #8
0
def test_logit_link_optimization():
    n_components = 5
    rng = np.random.mtrand.RandomState(42)
    X = 1 / (1 + np.exp(-rng.randn(6, 5)))
    Y = 1 / (1 + np.exp(-rng.randn(5, 4)))

    model = CMF(n_components=n_components, solver="newton",
                l2_reg=0., random_state=42, x_link="logit", y_link="logit",
                U_non_negative=False, V_non_negative=False, Z_non_negative=False)

    U, V, Z = model.fit_transform(X, Y)
    assert_less(model.reconstruction_err_, 0.1)
예제 #9
0
def sparse_cmf_with_logits_benchmark(sample_ratio):
    rng = np.random.mtrand.RandomState(42)
    X = np.abs(rng.randn(2000, 150))
    X[:1000, 2 * np.arange(10) + 100] = 0
    X[1000:, 2 * np.arange(10)] = 0
    X_sparse = SP(X)
    Y = expit(rng.randn(150, 10))
    model = CMF(n_components=10,
                solver="newton",
                random_state=42,
                sg_sample_ratio=sample_ratio,
                max_iter=10)
    U, V, Z = model.fit_transform(X_sparse, Y)
예제 #10
0
def test_transform_after_fit_no_labels(solver):
    rng = np.random.mtrand.RandomState(36)
    X = rng.randn(7, 5)
    Y = rng.randn(5, 3)
    X_new = rng.randn(15, 5)

    model = CMF(n_components=2, solver=solver, x_init='svd', y_init='svd',
                U_non_negative=False, V_non_negative=False, Z_non_negative=False,
                random_state=0, max_iter=100)
    U_ft, V_ft, Z_ft = model.fit_transform(X, Y)

    U_t, V_t, Z_t = model.transform(X_new, None)
    assert_array_equal(V_t, V_ft)
예제 #11
0
def test_fit_nn_output(solver):
    # Test that the decomposition does not contain negative values
    X = np.c_[5 * np.ones(5) - np.arange(1, 6),
              5 * np.ones(5) + np.arange(1, 6)]
    Y = np.c_[5 * np.ones(5) - np.arange(1, 6),
              5 * np.ones(5) + np.arange(1, 6)].T
    for init in (None, 'nndsvd', 'nndsvda', 'nndsvdar', 'random'):
        model = CMF(n_components=2, solver=solver, x_init=init, y_init=init,
                    random_state=0)
        U, V, Z = model.fit_transform(X, Y)
        assert_false((U < 0).any() or
                     (V < 0).any() or
                     (Z < 0).any())
예제 #12
0
def test_logit_link_non_negative_optimization():
    # Test if the logit link function works with a non-negative counterpart
    n_components = 5
    rng = np.random.mtrand.RandomState(42)
    X = rng.randn(6, 5)
    X[X < 0] = 0
    Y = 1 / (1 + np.exp(-rng.randn(5, 4)))

    model = CMF(n_components=n_components, solver="newton",
                l2_reg=0., random_state=42, y_link="logit",
                U_non_negative=True, V_non_negative=True, Z_non_negative=False,
                hessian_pertubation=0.2, max_iter=1000)

    U, V, Z = model.fit_transform(X, Y)
    assert_less(model.reconstruction_err_, 0.1)
예제 #13
0
def test_nonnegative_condition_for_newton_solver():
    n_components = 3
    rng = np.random.mtrand.RandomState(42)
    X = np.abs(rng.randn(6, 5))
    Y = np.abs(rng.randn(5, 4))

    model = CMF(n_components=n_components, solver="newton",
                l2_reg=0., random_state=42,
                U_non_negative=False, V_non_negative=False, Z_non_negative=False)

    U, V, Z = model.fit_transform(X, Y)

    # if one value is negative in any matrix, since X and Y are non-negative,
    # all the other matrices will need to have negative values
    assert_less(np.min(U), 0)
    assert_less(np.min(V), 0)
    assert_less(np.min(Z), 0)
예제 #14
0
def test_stochastic_newton_solver_sparse_input():
    rng = np.random.mtrand.RandomState(36)
    A = np.abs(rng.randn(10, 10))
    A[:, 2 * np.arange(5)] = 0
    A_sparse = csr_matrix(A)
    B = np.abs(rng.randn(10, 5))
    B[2 * np.arange(5), :] = 0
    B_sparse = csr_matrix(B)

    est1 = CMF(n_components=5, solver="newton", x_init='svd', y_init='svd',
               U_non_negative=False, V_non_negative=False, Z_non_negative=False,
               sg_sample_ratio=0.5, random_state=0, max_iter=1000)
    est2 = clone(est1)

    U1, V1, Z1 = est1.fit_transform(A, B)
    U2, V2, Z2 = est2.fit_transform(A_sparse, B_sparse)

    assert_array_almost_equal(U1, U2)
    assert_array_almost_equal(V1, V2)
    assert_array_almost_equal(Z1, Z2)
예제 #15
0
def test_sparse_input(solver):
    # Test that sparse matrices are accepted as input
    rng = np.random.mtrand.RandomState(42)
    A = np.abs(rng.randn(10, 10))
    A[:, 2 * np.arange(5)] = 0
    A_sparse = csr_matrix(A)
    B = np.abs(rng.randn(10, 5))
    B[2 * np.arange(5), :] = 0
    B_sparse = csr_matrix(B)

    est1 = CMF(solver=solver, n_components=5,
               x_init='random', y_init='random',
               random_state=0, tol=1e-2)
    est2 = clone(est1)

    U1, V1, Z1 = est1.fit_transform(A, B)
    U2, V2, Z2 = est2.fit_transform(A_sparse, B_sparse)

    assert_array_almost_equal(U1, U2)
    assert_array_almost_equal(V1, V2)
    assert_array_almost_equal(Z1, Z2)
예제 #16
0
def dense_cmf_with_logits_benchmark():
    rng = np.random.mtrand.RandomState(42)
    X = np.abs(rng.randn(2000, 150))
    Y = np.abs(rng.randn(150, 10))
    model = CMF(n_components=10, solver="newton", random_state=42, max_iter=10)
    U, V, Z = model.fit_transform(X, Y)