Example #1
0
def test_non_negative_factorization_consistency():
    # Test that the function is called in the same way, either directly
    # or through the NMF class
    rng = np.random.mtrand.RandomState(42)
    A = np.abs(rng.randn(10, 10))
    A[:, 2 * np.arange(5)] = 0

    for init in ['random', 'nndsvd']:
        for solver in ('cd', 'mu'):
            W_nmf, H, _ = non_negative_factorization(A,
                                                     init=init,
                                                     solver=solver,
                                                     random_state=1,
                                                     tol=1e-2)
            W_nmf_2, _, _ = non_negative_factorization(A,
                                                       H=H,
                                                       update_H=False,
                                                       init=init,
                                                       solver=solver,
                                                       random_state=1,
                                                       tol=1e-2)

            model_class = NMF(init=init,
                              solver=solver,
                              random_state=1,
                              tol=1e-2)
            W_cls = model_class.fit_transform(A)
            W_cls_2 = model_class.transform(A)

            assert_array_almost_equal(W_nmf, W_cls, decimal=10)
            assert_array_almost_equal(W_nmf_2, W_cls_2, decimal=10)
Example #2
0
    def run(self):
        from numpy import array, reshape
        from sklearn.decomposition import non_negative_factorization
        from sklearn.ensemble import GradientBoostingRegressor

        ##########################Learning#################################
        W, H, _ = non_negative_factorization(X=self.train.data,
                                             n_components=self.train.data.shape[1],
                                             regularization='transformation',
                                             alpha=2 * self.alpha + self.beta,
                                             l1_ratio=self.beta / (2 * self.alpha + self.beta))

        Y = reshape(self.train.occupancy, (-1,))
        gblsr = GradientBoostingRegressor(loss='ls', n_estimators=500).fit(W, Y)
        ####################################################################

        #############################Prediction#############################

        W, _, _ = non_negative_factorization(X=self.test.data,
                                             H=H,
                                             n_components=self.test.data.shape[1],
                                             regularization='transformation',
                                             alpha=2 * self.alpha + self.beta,
                                             l1_ratio=self.beta / (2 * self.alpha + self.beta))
        Y = gblsr.predict(W)
        Y[Y < 0] = 0
        predict_occupancy = array(Y)
        ####################################################################

        return reshape(predict_occupancy, (-1, 1))
Example #3
0
    def apply(self, k=-1, alpha=1.0, l1=0.75, max_iter=100, rel_err=1e-3):
        if k == -1:
            k = self.num_cluster
        X_t = self.pre_processing()
        X = X_t.T

        fixed_W = pd.get_dummies(self.labels)
        fixed_W_t = fixed_W.T  # interpret W as H (transpose), you can only fix H, while optimizing W in the code. So we simply switch those matrices (invert their roles).
        learned_H_t, fixed_W_t_same, n_iter = decomp.non_negative_factorization(X_t.astype(np.float), n_components=k, init='custom', random_state=0, update_H=False, H=fixed_W_t.astype(np.float), alpha=alpha, l1_ratio=l1, max_iter=max_iter, shuffle=True, solver='cd',tol=rel_err, verbose=0)

        assert(np.all(fixed_W_t == fixed_W_t_same))
        #self.cluster_labels = np.argmax(fixed_W_t_same.T, axis=1)

        # Now take the learned H, fix it and learn W to see how well it worked
        learned_W, learned_H_fix, n_iter = decomp.non_negative_factorization(X.astype(np.float), n_components=k, init='custom', random_state=0, update_H=False, H=learned_H_t.T, alpha=alpha, l1_ratio=l1, max_iter=max_iter, shuffle=True, solver='cd',tol=rel_err, verbose=0)

        assert(np.all(learned_H_t.T == learned_H_fix))
        self.cluster_labels = np.argmax(learned_W, axis=1)

        if np.any(np.isnan(learned_H_t)):
            raise Exception('H contains NaNs (alpha={0}, k={1}, l1={2}, data={3}x{4}'.format(
                alpha, k, l1, X.shape[0], X.shape[1]))
        if np.any(np.isnan(fixed_W_t)):
            raise Exception('W contains NaNs (alpha={0}, k={1}, l1={2}, data={3}x{4}'.format(
                alpha, k, l1, X.shape[0], X.shape[1]))

        #self.print_reconstruction_error(X, fixed_W_t, learned_H_t)
        self.dictionary = learned_H_t
        self.data_matrix = fixed_W_t
Example #4
0
def norm_nmf(data,
             k,
             init_weights=None,
             init_means=None,
             normalize_w=True,
             return_cost=True,
             write_progress_file=None,
             **kwargs):
    """
    Args:
        data (array): dense or sparse array with shape (genes, cells)
        k (int): number of cell types
        normalize_w (bool): True if W should be normalized (so that each column sums to 1)
        init_weights (array, optional): Initial value for W. Default: None
        init_means (array, optional): Initial value for M. Default: None
        **kwargs: misc arguments to NMF

    Returns:
        Two matrices M of shape (genes, k) and W of shape (k, cells)
    """
    data = cell_normalize(data)
    init = None
    if init_weights is not None or init_means is not None:
        init = 'custom'
        if init_weights is None:
            init_weights_, _, n_iter = non_negative_factorization(
                data.T,
                n_components=k,
                init='custom',
                update_W=False,
                W=init_means.T)
            init_weights = init_weights_.T
        elif init_means is None:
            init_means, _, n_iter = non_negative_factorization(data,
                                                               n_components=k,
                                                               init='custom',
                                                               update_W=False,
                                                               W=init_weights)
        init_means = init_means.copy(order='C')
        init_weights = init_weights.copy(order='C')
    nmf = NMF(k, init=init, **kwargs)
    if write_progress_file is not None:
        progress = open(write_progress_file, 'w')
        progress.write(str(0))
        progress.close()
    M = nmf.fit_transform(data, W=init_means, H=init_weights)
    W = nmf.components_
    if normalize_w:
        W = W / W.sum(0)
    if return_cost:
        cost = 0
        if sparse.issparse(data):
            ws = sparse.csr_matrix(M)
            hs = sparse.csr_matrix(W)
            cost = 0.5 * ((data - ws.dot(hs)).power(2)).sum()
        else:
            cost = 0.5 * ((data - M.dot(W))**2).sum()
        return M, W, cost
    else:
        return M, W
Example #5
0
def test_nmf_custom_init_dtype_error():
    # Check that an error is raise if custom H and/or W don't have the same
    # dtype as X.
    rng = np.random.RandomState(0)
    X = rng.random_sample((20, 15))
    H = rng.random_sample((15, 15)).astype(np.float32)
    W = rng.random_sample((20, 15))

    with pytest.raises(TypeError, match="should have the same dtype as X"):
        NMF(init='custom').fit(X, H=H, W=W)

    with pytest.raises(TypeError, match="should have the same dtype as X"):
        non_negative_factorization(X, H=H, update_H=False)
Example #6
0
def test_non_negative_factorization_consistency():
    # Test that the function is called in the same way, either directly
    # or through the NMF class
    A = np.abs(random_state.randn(10, 10))
    A[:, 2 * np.arange(5)] = 0

    W_nmf, H, _ = non_negative_factorization(A, random_state=1, tol=1e-2)
    W_nmf_2, _, _ = non_negative_factorization(A, H=H, update_H=False, random_state=1, tol=1e-2)

    model_class = NMF(random_state=1, tol=1e-2)
    W_cls = model_class.fit_transform(A)
    W_cls_2 = model_class.transform(A)
    assert_array_almost_equal(W_nmf, W_cls, decimal=10)
    assert_array_almost_equal(W_nmf_2, W_cls_2, decimal=10)
Example #7
0
def test_init_default_deprecation():
    # Test FutureWarning on init default
    msg = (r"The 'init' value, when 'init=None' and "
           r"n_components is less than n_samples and "
           r"n_features, will be changed from 'nndsvd' to "
           r"'nndsvda' in 1.1 \(renaming of 0.26\).")
    rng = np.random.mtrand.RandomState(42)
    A = np.abs(rng.randn(6, 5))
    with pytest.warns(FutureWarning, match=msg):
        nmf._initialize_nmf(A, 3)
    with pytest.warns(FutureWarning, match=msg):
        NMF().fit(A)
    with pytest.warns(FutureWarning, match=msg):
        non_negative_factorization(A)
Example #8
0
def run_NMF():

    true_labels = labels

    adjs = [
        adjacency_matrix, adjacency_matrix_weights, adjacency_matrix_similarity
    ]
    names = [
        'Adjacency Matrix: no weights\n',
        'Adjacency Matrix: likes-dislikes weights\n',
        'Adjacency Matrix: similarity weights\n'
    ]

    for adj, name in zip(adjs, names):

        nmf_factorization = non_negative_factorization(adj,
                                                       n_components=2,
                                                       init='random')
        W = nmf_factorization[0]
        W = pd.DataFrame(W)

        H = nmf_factorization[1]
        H = pd.DataFrame(H)

        clusters = [
            1 if (H.iloc[0, i] < H.iloc[1, i]) else 0
            for i in range(H.shape[1])
        ]
        clusters = pd.Series(clusters)
        predicted_labels = list(clusters)

        print(name)
        print(classification_report(true_labels, predicted_labels))
        print('--------------------------------------------\n')
Example #9
0
    def h_to_a(self, h_comp, w, adj):
        # TODO construct kernel from random walk theory
        # TODO random walk is fast but least accurate model among graph completion algos
        # TODO check the literature for online nmf (OMF)
        # transform graph to kernel to parameterize
        # fNRI factorization of edges using the softmax
        #  make it variational, make 10 projections to generate 10 different permutations of adjacency
        # adj will all be ones, assuming fully connected graph at the init
        # use nmf to sparsify the adj, by making more plausible connections and less density graph.
        adj_mat_vec = tf.zeros(shape=(10, adj.shape[0], adj.shape[1]),
                               dtype=tf.float32)
        for k in range(10):
            w, h, n_iter = sk_dec.non_negative_factorization(
                X=adj,
                H=h_comp[k],
                W=w,
                init='custom',
                n_components=adj.shape[0])
            adj_mat = tf.matmul(w, h)
            adj_mat_vec[k] = adj_mat

        # edges = gumbel_softmax(logits, tau=args.temp, hard=args.hard)
        # prob = my_softmax(logits, -1)
        # loss_kl = kl_categorical_uniform(prob, args.num_atoms, edge_types)
        return adj_mat_vec
Example #10
0
    def apply(self, k=-1, alpha=1.0, l1=0.75, max_iter=100, rel_err=1e-3):
        if k == -1:
            k = self.num_cluster
        X = self.pre_processing()

        fixed_W = pd.get_dummies(self.labels)
        fixed_W_t = fixed_W.T  # interpret W as H (transpose), you can only fix H, while optimizing W in the code. So we simply switch those matrices (invert their roles).
        learned_H_t, fixed_W_t_same, n_iter = decomp.non_negative_factorization(X.astype(np.float), n_components=k, init='custom', random_state=0, update_H=False, H=fixed_W_t.astype(np.float), alpha=alpha, l1_ratio=l1, max_iter=max_iter, shuffle=True, solver='cd',tol=rel_err, verbose=0)

        init_W = fixed_W_t_same.T
        init_H = learned_H_t.T

        nmf = decomp.NMF(alpha=alpha, init='custom',l1_ratio=l1, max_iter=max_iter, n_components=k, random_state=0, shuffle=True, solver='cd', tol=rel_err, verbose=0)
        W = nmf.fit_transform(X.T, W=init_W, H = init_H)
        H = nmf.components_
        self.cluster_labels = np.argmax(W, axis=1)

        if np.any(np.isnan(H)):
            raise Exception('H contains NaNs (alpha={0}, k={1}, l1={2}, data={3}x{4}'.format(
                alpha, k, l1, X.shape[0], X.shape[1]))
        if np.any(np.isnan(W)):
            raise Exception('W contains NaNs (alpha={0}, k={1}, l1={2}, data={3}x{4}'.format(
                alpha, k, l1, X.shape[0], X.shape[1]))

        # self.print_reconstruction_error(X, W, H)
        self.dictionary = H.T
        self.data_matrix = W.T
Example #11
0
    def compute_exposure(self, X, P):
        ''' 
        Compute exposures from given signatures
        '''

        # Initialize NMF object with components equal to the signatures.
        # then run nmf.transform(X).
        K, M = P.shape

        # A hacky way to call sklearn's nmf function...
        nmf =  self._new_NMF_model(K)
        E, P_, n_iter_ = non_negative_factorization(
            X=X, W=None, H=P, n_components=K,
            init=nmf.init, update_H=False, solver=nmf.solver,
            beta_loss=nmf.beta_loss, tol=nmf.tol, max_iter=nmf.max_iter,
            alpha=nmf.alpha, l1_ratio=nmf.l1_ratio, regularization='both',
            random_state=nmf.random_state, verbose=nmf.verbose,
            shuffle=nmf.shuffle)

        assert(np.allclose(P_, P))

        # TODO: add new feature to change norm to be
        # KL or itakaru-saito divergence
        err = np.linalg.norm(X - E.dot(P), 'fro')
        return E, err
Example #12
0
def thresholding(X, rank=0, W_ini=[], H_ini=[]):
    ''' Algorithm of thresholding from Binary Matrix Factorization with Applications by Zhang
    '''
    if (rank == 0 and (W_ini == [] or H_ini == [])):
        print(" You have to put initializations or a rank")
        return
    if (W_ini == [] or H_ini == []):
        W_ini, H_ini, thash = non_negative_factorization(X,
                                                         n_components=rank,
                                                         solver='mu')
    W_ini, H_ini = utils.normalization(W_ini, H_ini)
    II = np.max(H_ini)
    testh = np.linspace(0, II, int((II - 0) / 0.01))
    ll = np.max(W_ini)
    testw = np.linspace(0, ll, int((ll - 0) / 0.01))
    temp = 10**10
    for i in range(len(testh)):
        newH = signstar(H_ini, testh[i])
        for j in range(len(testw)):
            newW = signstar(W_ini, testw[j])
            X_res = np.dot(newW, newH.T)
            X_res[X_res > 1] = 1
            newtemp = utils.frobenius(X, X_res)
            if newtemp < temp:
                temp = newtemp
                h = testh[i]
                w = testw[j]
    return (w, h)
Example #13
0
def test_nmf_decreasing():
    # test that the objective function is decreasing at each iteration
    n_samples = 20
    n_features = 15
    n_components = 10
    alpha = 0.1
    l1_ratio = 0.5
    tol = 0.

    # initialization
    rng = np.random.mtrand.RandomState(42)
    X = rng.randn(n_samples, n_features)
    np.abs(X, X)
    W0, H0 = nmf._initialize_nmf(X, n_components, init='random',
                                 random_state=42)

    for beta_loss in (-1.2, 0, 0.2, 1., 2., 2.5):
        for solver in ('cd', 'mu'):
            if solver != 'mu' and beta_loss != 2:
                # not implemented
                continue
            W, H = W0.copy(), H0.copy()
            previous_loss = None
            for _ in range(30):
                # one more iteration starting from the previous results
                W, H, _ = non_negative_factorization(
                    X, W, H, beta_loss=beta_loss, init='custom',
                    n_components=n_components, max_iter=1, alpha=alpha,
                    solver=solver, tol=tol, l1_ratio=l1_ratio, verbose=0,
                    regularization='both', random_state=0, update_H=True)

                loss = nmf._beta_divergence(X, W, H, beta_loss)
                if previous_loss is not None:
                    assert_greater(previous_loss, loss)
                previous_loss = loss
Example #14
0
def test_nmf_decreasing():
    # test that the objective function is decreasing at each iteration
    n_samples = 20
    n_features = 15
    n_components = 10
    alpha = 0.1
    l1_ratio = 0.5
    tol = 0.

    # initialization
    rng = np.random.mtrand.RandomState(42)
    X = rng.randn(n_samples, n_features)
    np.abs(X, X)
    W0, H0 = nmf._initialize_nmf(X, n_components, init='random',
                                 random_state=42)

    for beta_loss in (-1.2, 0, 0.2, 1., 2., 2.5):
        for solver in ('cd', 'mu'):
            if solver != 'mu' and beta_loss != 2:
                # not implemented
                continue
            W, H = W0.copy(), H0.copy()
            previous_loss = None
            for _ in range(30):
                # one more iteration starting from the previous results
                W, H, _ = non_negative_factorization(
                    X, W, H, beta_loss=beta_loss, init='custom',
                    n_components=n_components, max_iter=1, alpha=alpha,
                    solver=solver, tol=tol, l1_ratio=l1_ratio, verbose=0,
                    regularization='both', random_state=0, update_H=True)

                loss = nmf._beta_divergence(X, W, H, beta_loss)
                if previous_loss is not None:
                    assert_greater(previous_loss, loss)
                previous_loss = loss
Example #15
0
    def delete_word_from_topic(self, topic_to_delete_from, word_to_delete,
                               top_words_in_topic):
        self.W = np.copy(self.nmf_matrix)
        H = np.copy(self.nmf_components)

        index_of_word_to_remove = self.features.index(
            word_to_delete.replace(' ', '_'))

        H[topic_to_delete_from][index_of_word_to_remove] = 0

        self.W, self.H, n_iter = non_negative_factorization(
            self.vectorized_out,
            n_components=self.nr_of_topics,
            init='custom',
            random_state=0,
            update_H=False,
            H=H)

        self.nmf_matrix = np.copy(self.W)
        self.nmf_components = np.copy(self.H)

        self.doc_topic_dists = self.nmf_matrix / self.nmf_matrix.sum(
            axis=1)[:, None]
        self.doc_topic_dists = np.nan_to_num(self.doc_topic_dists,
                                             nan=1 / self.nr_of_topics)
        self.top_words_map = self._top_words_map()
        self.doc_topic_matrix_df = self._doc_topic_matrix_df()
Example #16
0
def runCustom(sig, mix):

    print("mix: ", mix.shape)
    print("sig: ", sig.shape)

    # the roles of W and H are reversed in this case
    # because sklearn nmf only lets us fix H whereas we want to fix W so we must reverse
    # the roles and transpose
    # H is now the signature matrix
    # W is now the mix matrix
    print("running NMF with %d components" % sig.shape[1])
    W, H, n_iter = non_negative_factorization(mix.T,
                                              n_components=sig.shape[1],
                                              init='custom',
                                              solver='mu',
                                              beta_loss='kullback-leibler',
                                              max_iter=10000000,
                                              tol=1e-13,
                                              random_state=123456,
                                              update_H=False,
                                              H=sig.T)

    # sum to 1 for each row
    W = W / W.sum(axis=1, keepdims=1)

    return W.T, H.T
Example #17
0
def learn_representation(audio: np.ndarray,
                         win_length: int = 1024,
                         n_components: int = 100,
                         max_iter: int = 400,
                         init: str = None,
                         W: np.ndarray = None,
                         H: np.ndarray = None):
    mags, phases = get_magphase(audio, win_length=win_length)
    components, weights, n_iters = non_negative_factorization(
        mags,
        init=init,
        W=W,
        H=H,
        n_components=n_components,
        beta_loss="kullback-leibler",
        solver="mu",
        l1_ratio=1.0,
        alpha=0.1,
        max_iter=max_iter)
    # model = DictionaryLearning(n_components=n_components,
    #                            tol=1e-1,
    #                            fit_algorithm="cd",
    #                            transform_algorithm="lasso_cd",
    #                            positive_code=True,
    #                            positive_dict=True,
    #                            max_iter=max_iter)
    # weights = model.fit_transform(mags.T).T
    # components = model.components_.T
    # n_iters = model.n_iter_
    return components, weights, n_iters
Example #18
0
def nmf_pooling(A, levels, binarize=False):
    S_list = []
    A_list = []
    S_prev = sp.eye(A.shape[0], dtype=np.float32)
    for i in range(max(levels) + 1):
        A = sp.csr_matrix(A, dtype=np.float32)
        if i in levels:
            A_list.append(A)
        n_nodes = A.shape[0]
        n_comp = np.maximum(n_nodes // 2, 2)
        _, H, _ = non_negative_factorization(A, n_components=n_comp, init='random', random_state=0, max_iter=10)
        H = sp.csr_matrix(H, dtype=np.float32)
        A = (H.dot(A)).dot(H.T)

        # binarize H (hard cluster assignment)
        if binarize:
            H = H.toarray()
            S_i = np.zeros_like(H)
            S_i[np.arange(len(H)), H.argmax(1)] = 1
            S_i = sp.csr_matrix(S_i, dtype=np.float32)
        else:
            S_i = H

        # save the right pooling matrices
        S_prev = S_i.dot(S_prev)
        if i + 1 == max(levels) + 1:
            S_list.append(S_prev)
        elif i + 1 in levels:
            S_list.append(S_prev)
            S_prev = sp.eye(A.shape[0], dtype=np.float32)

    return S_list, A_list
Example #19
0
    def fit_transform_split(self, topics, fixed_H, column):
        self.nr_of_topics = topics

        repeats = np.ones(len(self.nmf_matrix.T), dtype=int)
        repeats[column] = int(2)
        W = np.repeat(self.nmf_matrix.T, repeats, axis=0)
        self.W = W.T

        self.W = np.ascontiguousarray(self.W, dtype=np.float64)

        # self.W, self.H, n_iter = non_negative_factorization(self.vectorized_out, n_components=topics, init='custom', random_state=0, update_H=True, H=fixed_H, W=self.W)
        self.W, self.H, n_iter = non_negative_factorization(
            self.vectorized_out,
            n_components=topics,
            init='custom',
            random_state=0,
            update_H=False,
            H=fixed_H)

        self.nmf_matrix = np.copy(self.W)
        self.nmf_components = np.copy(self.H)

        self.doc_topic_dists = self.nmf_matrix / self.nmf_matrix.sum(
            axis=1)[:, None]
        self.doc_topic_dists = np.nan_to_num(self.doc_topic_dists,
                                             nan=1 / self.nr_of_topics)
        self.top_words_map = self._top_words_map()
        self.doc_topic_matrix_df = self._doc_topic_matrix_df()

        return
Example #20
0
def lsa_compute(word_doc_matrix, n_topics: int, method='SVD', max_nmf_iter=10):
    """
    Computes lsa on word_doc_matrix, using factorization functions from `sklearn`.
    If `method` is "SVD" (default), it will use `randomized_svd`.
    If `method` is "NMF", it will use `non_negative_factorization`.

    Args:
        word_doc_matrix (matrix): matrix to factorize
        n_topics (int): number of "topics" to extract
        method (str): factorization method
        max_nmf_iter (int, optional): Sets the max number of iterations
            when calling `non_negative_factorization`. Default is 10.

    Returns:
        tuple of word_topic_matrix, topic_doc_matrix
    """

    logging.info(f"Computing LSA using {method} method...")

    if method == "SVD":
        U, _, VT = randomized_svd(word_doc_matrix, n_topics)
        return U, VT
    elif method == "NMF":
        W, H, _ = non_negative_factorization(
            word_doc_matrix,
            n_components=n_topics,
            max_iter=max_nmf_iter,
            random_state=0,
        )
        return W, H
    else:
        raise ValueError(f"ERROR: invalid value for method argument")
Example #21
0
def decompose_with_dict(spec, dic, max_iter=6000, alpha=0.5):
    """
    get H with V and W

    Example:
    >>> V = 10*np.random.rand(100, 200)
    >>> W, H = decompose(V, k=50)
    >>> H2 = decompose_with_dict(V, W)

    :param spec:
    :param dic:
    :param max_iter
    :param alpha
    :param l1_rate
    :return:
    """
    k = dic.shape[1]
    _act, _, n_iter = non_negative_factorization(np.transpose(spec),
                                            H=np.transpose(dic),
                                            update_H=False,
                                            alpha=alpha,
                                            l1_ratio=1,
                                            n_components=k,
                                            solver='cd',
                                            max_iter=max_iter)
    return np.transpose(_act)
Example #22
0
def test_nmf_multiplicative_update_sparse():
    # Compare sparse and dense input in multiplicative update NMF
    # Also test continuity of the results with respect to beta_loss parameter
    n_samples = 20
    n_features = 10
    n_components = 5
    alpha = 0.1
    l1_ratio = 0.5
    n_iter = 20

    # initialization
    rng = np.random.mtrand.RandomState(1337)
    X = rng.randn(n_samples, n_features)
    X = np.abs(X)
    X_csr = sp.csr_matrix(X)
    W0, H0 = nmf._initialize_nmf(X, n_components, init='random',
                                 random_state=42)

    for beta_loss in (-1.2, 0, 0.2, 1., 2., 2.5):
        # Reference with dense array X
        W, H = W0.copy(), H0.copy()
        W1, H1, _ = non_negative_factorization(
            X, W, H, n_components, init='custom', update_H=True,
            solver='mu', beta_loss=beta_loss, max_iter=n_iter, alpha=alpha,
            l1_ratio=l1_ratio, regularization='both', random_state=42)

        # Compare with sparse X
        W, H = W0.copy(), H0.copy()
        W2, H2, _ = non_negative_factorization(
            X_csr, W, H, n_components, init='custom', update_H=True,
            solver='mu', beta_loss=beta_loss, max_iter=n_iter, alpha=alpha,
            l1_ratio=l1_ratio, regularization='both', random_state=42)

        assert_array_almost_equal(W1, W2, decimal=7)
        assert_array_almost_equal(H1, H2, decimal=7)

        # Compare with almost same beta_loss, since some values have a specific
        # behavior, but the results should be continuous w.r.t beta_loss
        beta_loss -= 1.e-5
        W, H = W0.copy(), H0.copy()
        W3, H3, _ = non_negative_factorization(
            X_csr, W, H, n_components, init='custom', update_H=True,
            solver='mu', beta_loss=beta_loss, max_iter=n_iter, alpha=alpha,
            l1_ratio=l1_ratio, regularization='both', random_state=42)

        assert_array_almost_equal(W1, W3, decimal=4)
        assert_array_almost_equal(H1, H3, decimal=4)
Example #23
0
def test_nmf_multiplicative_update_sparse():
    # Compare sparse and dense input in multiplicative update NMF
    # Also test continuity of the results with respect to beta_loss parameter
    n_samples = 20
    n_features = 10
    n_components = 5
    alpha = 0.1
    l1_ratio = 0.5
    n_iter = 20

    # initialization
    rng = np.random.mtrand.RandomState(1337)
    X = rng.randn(n_samples, n_features)
    X = np.abs(X)
    X_csr = sp.csr_matrix(X)
    W0, H0 = nmf._initialize_nmf(X, n_components, init='random',
                                 random_state=42)

    for beta_loss in (-1.2, 0, 0.2, 1., 2., 2.5):
        # Reference with dense array X
        W, H = W0.copy(), H0.copy()
        W1, H1, _ = non_negative_factorization(
            X, W, H, n_components, init='custom', update_H=True,
            solver='mu', beta_loss=beta_loss, max_iter=n_iter, alpha=alpha,
            l1_ratio=l1_ratio, regularization='both', random_state=42)

        # Compare with sparse X
        W, H = W0.copy(), H0.copy()
        W2, H2, _ = non_negative_factorization(
            X_csr, W, H, n_components, init='custom', update_H=True,
            solver='mu', beta_loss=beta_loss, max_iter=n_iter, alpha=alpha,
            l1_ratio=l1_ratio, regularization='both', random_state=42)

        assert_array_almost_equal(W1, W2, decimal=7)
        assert_array_almost_equal(H1, H2, decimal=7)

        # Compare with almost same beta_loss, since some values have a specific
        # behavior, but the results should be continuous w.r.t beta_loss
        beta_loss -= 1.e-5
        W, H = W0.copy(), H0.copy()
        W3, H3, _ = non_negative_factorization(
            X_csr, W, H, n_components, init='custom', update_H=True,
            solver='mu', beta_loss=beta_loss, max_iter=n_iter, alpha=alpha,
            l1_ratio=l1_ratio, regularization='both', random_state=42)

        assert_array_almost_equal(W1, W3, decimal=4)
        assert_array_almost_equal(H1, H3, decimal=4)
Example #24
0
def test_non_negative_factorization_consistency(init, solver, alpha_W,
                                                alpha_H):
    # Test that the function is called in the same way, either directly
    # or through the NMF class
    max_iter = 500
    rng = np.random.mtrand.RandomState(42)
    A = np.abs(rng.randn(10, 10))
    A[:, 2 * np.arange(5)] = 0

    W_nmf, H, _ = non_negative_factorization(
        A,
        init=init,
        solver=solver,
        max_iter=max_iter,
        alpha_W=alpha_W,
        alpha_H=alpha_H,
        random_state=1,
        tol=1e-2,
    )
    W_nmf_2, H, _ = non_negative_factorization(
        A,
        H=H,
        update_H=False,
        init=init,
        solver=solver,
        max_iter=max_iter,
        alpha_W=alpha_W,
        alpha_H=alpha_H,
        random_state=1,
        tol=1e-2,
    )

    model_class = NMF(
        init=init,
        solver=solver,
        max_iter=max_iter,
        alpha_W=alpha_W,
        alpha_H=alpha_H,
        random_state=1,
        tol=1e-2,
    )
    W_cls = model_class.fit_transform(A)
    W_cls_2 = model_class.transform(A)

    assert_allclose(W_nmf, W_cls)
    assert_allclose(W_nmf_2, W_cls_2)
Example #25
0
 def _assert_nmf_no_nan(X, beta_loss):
     W, H, _ = non_negative_factorization(X,
                                          n_components=n_components,
                                          solver='mu',
                                          beta_loss=beta_loss,
                                          random_state=0,
                                          max_iter=1000)
     assert_false(np.any(np.isnan(W)))
     assert_false(np.any(np.isnan(H)))
Example #26
0
 def _assert_nmf_no_nan(X, beta_loss):
     W, H, _ = non_negative_factorization(
         X,
         init="random",
         n_components=n_components,
         solver="mu",
         beta_loss=beta_loss,
         random_state=0,
         max_iter=1000,
     )
     assert not np.any(np.isnan(W))
     assert not np.any(np.isnan(H))
Example #27
0
def test_nmf_decreasing(solver):
    # test that the objective function is decreasing at each iteration
    n_samples = 20
    n_features = 15
    n_components = 10
    alpha = 0.1
    l1_ratio = 0.5
    tol = 0.0

    # initialization
    rng = np.random.mtrand.RandomState(42)
    X = rng.randn(n_samples, n_features)
    np.abs(X, X)
    W0, H0 = nmf._initialize_nmf(X,
                                 n_components,
                                 init="random",
                                 random_state=42)

    for beta_loss in (-1.2, 0, 0.2, 1.0, 2.0, 2.5):
        if solver != "mu" and beta_loss != 2:
            # not implemented
            continue
        W, H = W0.copy(), H0.copy()
        previous_loss = None
        for _ in range(30):
            # one more iteration starting from the previous results
            W, H, _ = non_negative_factorization(
                X,
                W,
                H,
                beta_loss=beta_loss,
                init="custom",
                n_components=n_components,
                max_iter=1,
                alpha_W=alpha,
                solver=solver,
                tol=tol,
                l1_ratio=l1_ratio,
                verbose=0,
                random_state=0,
                update_H=True,
            )

            loss = (nmf._beta_divergence(X, W, H, beta_loss) +
                    alpha * l1_ratio * n_features * W.sum() +
                    alpha * l1_ratio * n_samples * H.sum() + alpha *
                    (1 - l1_ratio) * n_features * (W**2).sum() + alpha *
                    (1 - l1_ratio) * n_samples * (H**2).sum())
            if previous_loss is not None:
                assert previous_loss > loss
            previous_loss = loss
Example #28
0
    def _nmf(self, X, nmf_kwargs):
        """
        Parameters
        ----------
        X : pandas.DataFrame,
            Normalized counts dataFrame to be factorized.

        nmf_kwargs : dict,
            Arguments to be passed to ``non_negative_factorization``

        """
        (usages, spectra, niter) = non_negative_factorization(X, **nmf_kwargs)

        return(spectra, usages)
Example #29
0
def nmf():
    product_tensor = np.load('data/product_tensor.npy')
    X = product_tensor.sum(1)

    W, H, n_iter = non_negative_factorization(X,
                                              n_components=10,
                                              max_iter=500,
                                              regularization='both',
                                              init='random',
                                              solver='mu',
                                              beta_loss='kullback-leibler')
    print(n_iter, W.shape, H.shape)
    np.save('predictions/user_embed.npy', W)
    np.save('predictions/prod_embed.npy', H.T)
Example #30
0
def nmf_sklearn(V, k, W=None, H=None, beta_loss="frobenius", verbose=False):
    """
    NMF with sklearn.
    """
    f = V.shape[0]
    t = V.shape[1]
    if W is None:
        W = np.random.uniform(size=(f, k))
    if H is None:
        H = np.random.uniform(size=(k, t))

    W, H, _ = non_negative_factorization(V, W, H, k, init="custom", solver="mu", beta_loss=beta_loss, verbose=verbose)

    return W, H
Example #31
0
def split_once_sklearn(
    X,
    subset,
    W_parent,
    random_state: mtrand.RandomState,
    dtype: Union[np.float32, np.float64],
    tol,
    maxiter,
    init,
):
    m = X.shape[0]
    if len(subset) <= 3:
        cluster_subset = np.ones(len(subset), dtype=dtype)
        W_buffer_one = np.zeros((m, 2), dtype=dtype)
        H_buffer_one = np.zeros((2, len(subset)), dtype=dtype)
        priority_one = -1
    else:
        term_subset = np.where(np.sum(X[:, subset], axis=1) != 0)[0]
        X_subset = X[term_subset, :][:, subset]
        W = random_state.rand(len(term_subset), 2)
        H = random_state.rand(2, len(subset))
        W, H, n_iter_ = non_negative_factorization(
            X=X_subset,
            W=W,
            H=H,
            n_components=2,
            init=init,
            update_H=True,
            solver="cd",
            beta_loss=2,
            tol=tol,
            max_iter=maxiter,
            alpha=0,
            l1_ratio=0,
            regularization="both",
            random_state=random_state,
            verbose=0,
            shuffle=False,
        )
        cluster_subset = np.argmax(H, axis=0)
        W_buffer_one = np.zeros((m, 2), dtype=dtype)
        W_buffer_one[term_subset, :] = W
        H_buffer_one = H
        if len(np.unique(cluster_subset)) > 1:
            priority_one = compute_priority(W_parent, W_buffer_one, dtype=dtype)
        else:
            priority_one = -1
    return cluster_subset, W_buffer_one, H_buffer_one, priority_one
Example #32
0
def nmf_init(data, clusters, k, init='enhanced'):
    """
    runs enhanced NMF initialization from clusterings (Gong 2013)

    There are 3 options for init:
        enhanced - uses EIn-NMF from Gong 2013
        basic - uses means for W, assigns H such that the chosen cluster for a given cell has value 0.75 and all others have 0.25/(k-1).
        nmf - uses means for W, and assigns H using the NMF objective while holding W constant.
    """
    init_w = np.zeros((data.shape[0], k))
    if sparse.issparse(data):
        for i in range(k):
            if data[:, clusters == i].shape[1] == 0:
                point = np.random.randint(0, data.shape[1])
                init_w[:, i] = data[:, point].toarray().flatten()
            else:
                init_w[:, i] = np.array(data[:,
                                             clusters == i].mean(1)).flatten()
    else:
        for i in range(k):
            if data[:, clusters == i].shape[1] == 0:
                point = np.random.randint(0, data.shape[1])
                init_w[:, i] = data[:, point].flatten()
            else:
                init_w[:, i] = data[:, clusters == i].mean(1)
    init_h = np.zeros((k, data.shape[1]))
    if init == 'enhanced':
        distances = np.zeros((k, data.shape[1]))
        for i in range(k):
            for j in range(data.shape[1]):
                distances[i, j] = np.sqrt(
                    ((data[:, j] - init_w[:, i])**2).sum())
        for i in range(k):
            for j in range(data.shape[1]):
                init_h[i, j] = 1 / (
                    (distances[:, j] / distances[i, j])**(-2)).sum()
    elif init == 'basic':
        init_h = initialize_from_assignments(clusters, k)
    elif init == 'nmf':
        init_h_, _, n_iter = non_negative_factorization(data.T,
                                                        n_components=k,
                                                        init='custom',
                                                        update_H=False,
                                                        H=init_w.T)
        init_h = init_h_.T
    return init_w, init_h
Example #33
0
def decompose(spec, k, max_iter=6000, alpha=0.5):
    """
    basic NMF tool, use it to get W and H

    Example:
    >>> V = 10 * np.random.rand(100, 3000)
    >>> W, H = decompose(V, 50)

    :param spec:
    :param k:
    :param max_iter:
    """

    _dic, _act, n_iter = non_negative_factorization(spec,
                                               n_components=k,
                                               solver='cd',
                                               alpha=alpha,
                                               l1_ratio=1,
                                               max_iter=max_iter)
    return _dic, _act
Example #34
0
 def _assert_nmf_no_nan(X, beta_loss):
     W, H, _ = non_negative_factorization(
         X, init='random', n_components=n_components, solver='mu',
         beta_loss=beta_loss, random_state=0, max_iter=1000)
     assert not np.any(np.isnan(W))
     assert not np.any(np.isnan(H))