Ejemplo n.º 1
0
    def learnModel(self, X):
        """
        Learn X using a matrix factorisation method. If self.rank is an integer 
        then we factorise with that rank. If it is an array then we compute the 
        complete regularisation path and return a list of matrices. 
        """
        if isinstance(self.rank, int): 
            model = nimfa.mf(X, method=self.method, max_iter=self.maxIter, rank=self.rank)
            fit = nimfa.mf_run(model)
            W = fit.basis()
            H = fit.coef()
            
            predX = W.dot(H)
            return predX 
        else: 
            predXList = []

            model = nimfa.mf(X, method=self.method, max_iter=self.maxIter, rank=self.rank[0])
            fit = nimfa.mf_run(model)
            W = fit.basis()
            H = fit.coef()
            predXList.append(W.dot(H))
            
            for i in range(1, self.rank.shape[0]): 
                model = nimfa.mf(X, method=self.method, max_iter=self.maxIter, rank=self.rank[i], W=W, H=H)
                fit = nimfa.mf_run(model)
                W = fit.basis()
                H = fit.coef()
                predXList.append(W.dot(H))

            return predXList
Ejemplo n.º 2
0
def run_nmf(V):
    """
    Run standard nonnegative matrix factorization.
    
    :param V: Target matrix to estimate.
    :type V: :class:`numpy.matrix`
    """
    # Euclidean
    rank = 10
    model = nimfa.mf(V,
                     seed="random_vcol",
                     rank=rank,
                     method="nmf",
                     max_iter=12,
                     initialize_only=True,
                     update='euclidean',
                     objective='fro')
    fit = nimfa.mf_run(model)
    print_info(fit)
    # divergence
    model = nimfa.mf(V,
                     seed="random_vcol",
                     rank=rank,
                     method="nmf",
                     max_iter=12,
                     initialize_only=True,
                     update='divergence',
                     objective='div')
    fit = nimfa.mf_run(model)
    print_info(fit)
Ejemplo n.º 3
0
def nmf(Xtrn, Xtst):
    # Init matricies
    Xtrn_n = np.shape(Xtrn)[0]
    Xtst_n = np.shape(Xtst)[0]
    Xtrn_nmf = np.zeros((Xtrn_n, my_rank))
    Xtst_nmf = np.zeros((Xtst_n, my_rank))
    print(file_name + ': Running non-negative matrix facorization w/ rank = ' + str(my_rank))
    #Xtrn_fctr = nimfa.mf(Xtrn, method = 'nmf', seed = "fixed", max_iter = iters,
    #                     rank = my_rank, update = 'euclidean', objective = 'fro')
    print(file_name + ': \t on traning...')
    for i in xrange(Xtrn_n):
        Xtrn_fctr = nimfa.mf(Xtrn[i,:], method = 'lsnmf', max_iter = iters, rank = my_rank)
        Xtrn_res = nimfa.mf_run(Xtrn_fctr)
        Xtrn_nmf[i,:] = Xtrn_res.basis()
        if (i%10000 == 0): print(file_name + ': \t iter ' + str(i))
    print(file_name + ' \t on testing...')
    for i in xrange(Xtst_n):
        Xtst_fctr = nimfa.mf(Xtst[i,:], method = 'lsnmf', max_iter = iters, rank = my_rank)
        Xtst_res = nimfa.mf_run(Xtrn_fctr)
        Xtst_nmf[i,:] = Xtst_res.basis()
        if (i%10000 == 0): print(file_name + ': \t iter ' + str(i))
    
    """
    Xtrn_sm = Xtrn_res.summary()
    Xtst_sm = Xtst_res.summary()
    print(file_name + ': \t\t RSS \t Explained Var \t Iters')
    print(file_name + ': Xtrn: \t' + str(Xtrn_sm['rss']) + '\t' +
          str(Xtrn_sm['evar']) + '\t' + str(Xtrn_sm['n_iter']))
    print(file_name + ': Xtst: ' + str(Xtst_sm['rss']) + '\t' +
          str(Xtst_sm['evar']) + '\t' + str(Xtst_sm['n_iter']))
    """
    
    return (Xtrn_nmf, Xtst_nmf)
Ejemplo n.º 4
0
def run_nmf(V):
    """
    Run standard nonnegative matrix factorization.
    
    :param V: Target matrix to estimate.
    :type V: :class:`numpy.matrix`
    """
    # Euclidean
    rank = 10
    model = nimfa.mf(V, 
                  seed = "random_vcol", 
                  rank = rank, 
                  method = "nmf", 
                  max_iter = 12, 
                  initialize_only = True,
                  update = 'euclidean',
                  objective = 'fro')
    fit = nimfa.mf_run(model)
    print_info(fit)
    # divergence
    model = nimfa.mf(V, 
                  seed = "random_vcol", 
                  rank = rank, 
                  method = "nmf", 
                  max_iter = 12, 
                  initialize_only = True,
                  update = 'divergence',
                  objective = 'div')
    fit = nimfa.mf_run(model)
    print_info(fit)
Ejemplo n.º 5
0
def run_bd(V):
    """
    Run Bayesian decomposition.
    
    :param V: Target matrix to estimate.
    :type V: :class:`numpy.matrix`
    """
    rank = 10
    model = nimfa.mf(V, 
                  seed = "random_c", 
                  rank = rank, 
                  method = "bd", 
                  max_iter = 12, 
                  initialize_only = True,
                  alpha = np.mat(np.zeros((V.shape[0], rank))),
                  beta = np.mat(np.zeros((rank, V.shape[1]))),
                  theta = .0,
                  k = .0,
                  sigma = 1., 
                  skip = 100,
                  stride = 1,
                  n_w = np.mat(np.zeros((rank, 1))),
                  n_h = np.mat(np.zeros((rank, 1))),
                  n_sigma = False)
    fit = nimfa.mf_run(model)
    print_info(fit)
Ejemplo n.º 6
0
def factorize(V):
    """
    Perform SNMF/R factorization on the sparse MovieLens data matrix. 
    
    Return basis and mixture matrices of the fitted factorization model. 
    
    :param V: The MovieLens data matrix. 
    :type V: `scipy.sparse.csr_matrix`
    """
    model = nimfa.mf(V,
                     seed="random_vcol",
                     rank=12,
                     method="snmf",
                     max_iter=15,
                     initialize_only=True,
                     version='r',
                     eta=1.,
                     beta=1e-4,
                     i_conv=10,
                     w_min_change=0)
    print("Performing %s %s %d factorization ..." % (model, model.seed, model.rank))
    fit = nimfa.mf_run(model)
    print("... Finished")
    sparse_w, sparse_h = fit.fit.sparseness()
    print("""Stats:
            - iterations: %d
            - Euclidean distance: %5.3f
            - Sparseness basis: %5.3f, mixture: %5.3f""" % (fit.fit.n_iter, fit.distance(metric='euclidean'), sparse_w, sparse_h))
    return fit.basis(), fit.coef()
Ejemplo n.º 7
0
def run_snmnmf(V, V1):
    """
    Run sparse network-regularized multiple NMF. 
    
    :param V: First target matrix to estimate.
    :type V: :class:`numpy.matrix`
    :param V1: Second target matrix to estimate.
    :type V1: :class:`numpy.matrix`
    """
    rank = 10
    model = nimfa.mf(target = (V, V1), 
                  seed = "random_c", 
                  rank = rank, 
                  method = "snmnmf", 
                  max_iter = 12, 
                  initialize_only = True,
                  A = abs(sp.rand(V1.shape[1], V1.shape[1], density = 0.7, format = 'csr')),
                  B = abs(sp.rand(V.shape[1], V1.shape[1], density = 0.7, format = 'csr')), 
                  gamma = 0.01,
                  gamma_1 = 0.01,
                  lamb = 0.01,
                  lamb_1 = 0.01)
    fit = nimfa.mf_run(model)
    # print all quality measures concerning first target and mixture matrix in multiple NMF
    print_info(fit, idx = 0)
    # print all quality measures concerning second target and mixture matrix in multiple NMF
    print_info(fit, idx = 1)
Ejemplo n.º 8
0
    def run(self,
            seed='random_vcol',
            method='nmf',
            rank=3,
            max_iter=65,
            display_N_tokens=5,
            display_N_documents=3):
        #Re-initialise clusters
        if self.clusters != []:
            self.clusters = []

        self.construct_term_doc_matrix(
            pca=False
        )  #We cannot perform PCA with NMF because we only want non-negative vectors
        V = self.td_matrix
        model = nimfa.mf(V,
                         seed=seed,
                         method=method,
                         rank=rank,
                         max_iter=max_iter)
        fitted = nimfa.mf_run(model)
        w = fitted.basis()
        h = fitted.coef()
        self.split_documents(w,
                             h,
                             self.document_dict,
                             self.attributes,
                             display_N_tokens=display_N_tokens,
                             display_N_documents=display_N_documents)
        #Just testing remove it
        self.showfeatures(w, h, [
            self.document_dict.values()[i]["raw"]
            for i in range(numpy.shape(w)[0])
        ], self.attributes)
Ejemplo n.º 9
0
def decompose_nmf(spectrum_array,n_spectra):
	print '\nDecomposing spectra using NMF...'
	fctr = nimfa.mf(spectrum_array, method="nmf", max_iter=10000, rank=n_spectra, update='divergence', objective='div')
	fctr_res = nimfa.mf_run(fctr)
	a=n.transpose(n.array(fctr_res.basis()))
	coeffs=n.array(fctr_res.coef())
	return a,coeffs
Ejemplo n.º 10
0
def factorization(V, rank=4):
    """
    use nmf to factorize V
    :rtype : (1) the projection matrix (2) the feature vector of V
    """
    fctr = nimfa.mf(
        V,
        method="nmf",
        max_iter=30,
        rank=rank,
        update="divergence",
        objective="div",
        callback_init=init_info,
        callback=init_info,
    )
    fctr_res = nimfa.mf_run(fctr)
    print "calculate generized inverse"
    projection = pinv(fctr_res.basis().todense())
    print "inverse finished"
    return {
        "projection": projection,
        "feature": (projection * V),
        "basis": fctr_res.basis(),
        "coef": fctr_res.coef().todense(),
    }
Ejemplo n.º 11
0
def factorize(V):
    """
    Perform LSNMF factorization on the ORL faces data matrix. 
    
    Return basis and mixture matrices of the fitted factorization model. 
    
    :param V: The ORL faces data matrix. 
    :type V: `numpy.matrix`
    """
    model = nimfa.mf(V, 
                  seed = "random_vcol",
                  rank = 25, 
                  method = "lsnmf", 
                  max_iter = 50,
                  initialize_only = True,
                  sub_iter = 10,
                  inner_sub_iter = 10, 
                  beta = 0.1,
                  min_residuals = 1e-8)
    print "Performing %s %s %d factorization ..." % (model, model.seed, model.rank) 
    fit = nimfa.mf_run(model)
    print "... Finished"
    print """Stats:
            - iterations: %d
            - final projected gradients norm: %5.3f
            - Euclidean distance: %5.3f""" % (fit.fit.n_iter, fit.distance(), fit.distance(metric = 'euclidean'))
    return fit.basis(), fit.coef()
Ejemplo n.º 12
0
 def run(self, **params):
     if not self.dataConsolided:
         print "NIMFA_SNMNMF: preparing data"     
         self.consolideTheData()         
         self.dataConsolided = True
     print "NIMFA_SNMNMF: starting"     
     #        
     V  = self.miRNA.as_matrix()
     V1 = self.mRNA.as_matrix()
     A  = csr_matrix(self.gene2gene)
     B  = csr_matrix(self.miRNA2gene)
     
     fctr = nimfa.mf(target = (V, V1),
                   seed = params['seed'], # e.g., "random_c", 
                   rank = params['rank'], # e.g., 50, 
                   method = "snmnmf", 
                   max_iter = params['max_iter'], # e.g., 500, 
                   initialize_only = True,
                   A = A ,
                   B = B,               
                   n_run = 3,
                   gamma = self.g1,
                   gamma_1 = self.g2,
                   lamb = self.l1,
                   lamb_1 = self.l2)
     fctr_res = nimfa.mf_run(fctr)
     print "NIMFA_SNMNMF: done"     
     # extract the results
     self.W =  DataFrame(fctr_res.basis(), index = self.miRNA.index)        
     self.H1_miRNA = DataFrame(fctr_res.coef(0), columns = self.miRNA.columns)
     self.H2_genes = DataFrame(fctr_res.coef(1), columns = self.mRNA.columns)
     self.performance = NIMFA_SNMNMFPerformance(fctr_res)
    def nmfMatrix(self, V):
        print "---"
        print "NMF"
        print "---"

        V = np.array(V)
        print "Target matrix"
        print V

        fctr = nimfa.mf(V, seed = 'random_vcol', method = 'lsnmf', rank = 40, max_iter = 10)
        fctr_res = nimfa.mf_run(fctr)


        W = fctr_res.basis()
        print "Basis matrix"
        print W
        H = fctr_res.coef()
        print "Coef"
        print H

        print "Estimate"
        print np.dot(W, H)

        print 'Rss: %5.4f' % fctr_res.fit.rss()
        print 'Evar: %5.4f' % fctr_res.fit.evar()
        print 'K-L divergence: %5.4f' % fctr_res.distance(metric = 'kl')
        print 'Sparseness, W: %5.4f, H: %5.4f' % fctr_res.fit.sparseness()

        return W, H
Ejemplo n.º 14
0
def factorize(V):
    """
    Perform NMF - Divergence factorization on the sparse Medlars data matrix. 
    
    Return basis and mixture matrices of the fitted factorization model. 
    
    :param V: The Medlars data matrix. 
    :type V: `scipy.sparse.csr_matrix`
    """
    model = nimfa.mf(V, 
                  seed = "random_vcol", 
                  rank = 12, 
                  method = "nmf", 
                  max_iter = 15, 
                  initialize_only = True,
                  update = 'divergence',
                  objective = 'div')
    print "Performing %s %s %d factorization ..." % (model, model.seed, model.rank) 
    fit = nimfa.mf_run(model)
    print "... Finished"
    sparse_w, sparse_h = fit.fit.sparseness()
    print """Stats:
            - iterations: %d
            - KL Divergence: %5.3f
            - Euclidean distance: %5.3f
            - Sparseness basis: %5.3f, mixture: %5.3f""" % (fit.fit.n_iter, fit.distance(), fit.distance(metric = 'euclidean'), sparse_w, sparse_h)
    return fit.basis(), fit.coef()
Ejemplo n.º 15
0
def factorize(V):
    """
    Perform NMF - Divergence factorization on the sparse Medlars data matrix. 
    
    Return basis and mixture matrices of the fitted factorization model. 
    
    :param V: The Medlars data matrix. 
    :type V: `scipy.sparse.csr_matrix`
    """
    model = nimfa.mf(V,
                     seed="random_vcol",
                     rank=12,
                     method="nmf",
                     max_iter=15,
                     initialize_only=True,
                     update='divergence',
                     objective='div')
    print("Performing %s %s %d factorization ..." %
          (model, model.seed, model.rank))
    fit = nimfa.mf_run(model)
    print("... Finished")
    sparse_w, sparse_h = fit.fit.sparseness()
    print("""Stats:
            - iterations: %d
            - KL Divergence: %5.3f
            - Euclidean distance: %5.3f
            - Sparseness basis: %5.3f, mixture: %5.3f""" %
          (fit.fit.n_iter, fit.distance(), fit.distance(metric='euclidean'),
           sparse_w, sparse_h))
    return fit.basis(), fit.coef()
Ejemplo n.º 16
0
def run_snmnmf(V, V1):
    """
    Run sparse network-regularized multiple NMF. 
    
    :param V: First target matrix to estimate.
    :type V: :class:`numpy.matrix`
    :param V1: Second target matrix to estimate.
    :type V1: :class:`numpy.matrix`
    """
    rank = 10
    model = nimfa.mf(
        target=(V, V1),
        seed="random_c",
        rank=rank,
        method="snmnmf",
        max_iter=12,
        initialize_only=True,
        A=abs(sp.rand(V1.shape[1], V1.shape[1], density=0.7, format='csr')),
        B=abs(sp.rand(V.shape[1], V1.shape[1], density=0.7, format='csr')),
        gamma=0.01,
        gamma_1=0.01,
        lamb=0.01,
        lamb_1=0.01)
    fit = nimfa.mf_run(model)
    # print all quality measures concerning first target and mixture matrix in
    # multiple NMF
    print_info(fit, idx=0)
    # print all quality measures concerning second target and mixture matrix
    # in multiple NMF
    print_info(fit, idx=1)
Ejemplo n.º 17
0
def factorize(V):
    """
    Perform LSNMF factorization on the CBCL faces data matrix. 
    
    Return basis and mixture matrices of the fitted factorization model. 
    
    :param V: The CBCL faces data matrix. 
    :type V: `numpy.matrix`
    """
    model = nimfa.mf(V,
                     seed="random_vcol",
                     rank=49,
                     method="lsnmf",
                     max_iter=50,
                     initialize_only=True,
                     sub_iter=10,
                     inner_sub_iter=10,
                     beta=0.1,
                     min_residuals=1e-8)
    print("Performing %s %s %d factorization ..." %
          (model, model.seed, model.rank))
    fit = nimfa.mf_run(model)
    print("... Finished")
    sparse_w, sparse_h = fit.fit.sparseness()
    print("""Stats:
            - iterations: %d
            - final projected gradients norm: %5.3f
            - Euclidean distance: %5.3f 
            - Sparseness basis: %5.3f, mixture: %5.3f""" %
          (fit.fit.n_iter, fit.distance(), fit.distance(metric='euclidean'),
           sparse_w, sparse_h))
    return fit.basis(), fit.coef()
Ejemplo n.º 18
0
def max_guess_select(ratings, users, rank=9, user=None):
    matrix = sp.dok_matrix((len(users), len(users)))
    for k, v in ratings.items():
        matrix[users[k[0]], users[k[1]]] = v
        matrix[users[k[1]], users[k[0]]] = v
    # Run sparse matrix factorisation
    factor = nimfa.mf(matrix,
                      seed="random_c",
                      rank=rank,
                      method="snmf",
                      max_iter=12,
                      initialize_only=True,
                      version='r',
                      eta=1.,
                      beta=1e-4,
                      i_conv=10,
                      w_min_change=0)
    result = nimfa.mf_run(factor)
    if user is None:
        # Pick a user to expand
        user = min(users, key=lambda u: len([i for i in ratings if u in i]))
    recommendations = result.fitted()
    rval = max([
        i
        for i in users if (i, user) not in ratings and (user, i) not in ratings
    ],
               key=lambda x: recommendations[users[user], users[x]])
    return user, rval
Ejemplo n.º 19
0
def factorize(V):
    """
    Perform SNMF/R factorization on the sparse MovieLens data matrix. 
    
    Return basis and mixture matrices of the fitted factorization model. 
    
    :param V: The MovieLens data matrix. 
    :type V: `scipy.sparse.csr_matrix`
    """
    model = nimfa.mf(V,
                     seed="random_vcol",
                     rank=12,
                     method="snmf",
                     max_iter=15,
                     initialize_only=True,
                     version='r',
                     eta=1.,
                     beta=1e-4,
                     i_conv=10,
                     w_min_change=0)
    print "Performing %s %s %d factorization ..." % (model, model.seed, model.rank)
    fit = nimfa.mf_run(model)
    print "... Finished"
    sparse_w, sparse_h = fit.fit.sparseness()
    print """Stats:
            - iterations: %d
            - Euclidean distance: %5.3f
            - Sparseness basis: %5.3f, mixture: %5.3f""" % (fit.fit.n_iter, fit.distance(metric='euclidean'), sparse_w, sparse_h)
    return fit.basis(), fit.coef()
Ejemplo n.º 20
0
def run_bd(V):
    """
    Run Bayesian decomposition.
    
    :param V: Target matrix to estimate.
    :type V: :class:`numpy.matrix`
    """
    rank = 10
    model = nimfa.mf(V,
                     seed="random_c",
                     rank=rank,
                     method="bd",
                     max_iter=12,
                     initialize_only=True,
                     alpha=np.mat(np.zeros((V.shape[0], rank))),
                     beta=np.mat(np.zeros((rank, V.shape[1]))),
                     theta=.0,
                     k=.0,
                     sigma=1.,
                     skip=100,
                     stride=1,
                     n_w=np.mat(np.zeros((rank, 1))),
                     n_h=np.mat(np.zeros((rank, 1))),
                     n_sigma=False)
    fit = nimfa.mf_run(model)
    print_info(fit)
Ejemplo n.º 21
0
    def run(self, **params):
        if not self.dataConsolided:
            print "NIMFA_SNMNMF: preparing data"
            self.consolideTheData()
            self.dataConsolided = True
        print "NIMFA_SNMNMF: starting"
        #
        V = self.miRNA.as_matrix()
        V1 = self.mRNA.as_matrix()
        A = csr_matrix(self.gene2gene)
        B = csr_matrix(self.miRNA2gene)

        fctr = nimfa.mf(
            target=(V, V1),
            seed=params['seed'],  # e.g., "random_c", 
            rank=params['rank'],  # e.g., 50, 
            method="snmnmf",
            max_iter=params['max_iter'],  # e.g., 500, 
            initialize_only=True,
            A=A,
            B=B,
            n_run=1,
            gamma=self.g1,
            gamma_1=self.g2,
            lamb=self.l1,
            lamb_1=self.l2)
        fctr_res = nimfa.mf_run(fctr)
        print "NIMFA_SNMNMF: done"
        # extract the results
        self.W = DataFrame(fctr_res.basis(), index=self.miRNA.index)
        self.H1_miRNA = DataFrame(fctr_res.coef(0), columns=self.miRNA.columns)
        self.H2_genes = DataFrame(fctr_res.coef(1), columns=self.mRNA.columns)
        self.performance = NIMFA_SNMNMFPerformance(fctr_res)
Ejemplo n.º 22
0
def nmf(matrix, k=c_K):
    fctr = nimfa.mf(matrix,
                    seed='random_vcol',
                    method='lsnmf',
                    rank=k,
                    max_iter=c_NMF_MAXITR)
    fctr_result = nimfa.mf_run(fctr)
    return fctr_result.basis(), fctr_result.coef()
    def nmfMatrix(self, V, method, rank, maxIter):
        print "---"
        print "NMF"
        print "---"

        V = np.array(V)
        print "Target matrix"
        print V.shape[0]
        print V.shape[1]
        print V
        
        
#         X = sp.rand(V.shape[0], V.shape[1], density=1).tocsr()
        # NMFの際の、基底数やイテレーションの設定
        # rank = 8 
        # maxIter = 2000 
        # method = "snmf"
        
#         init2arizer = nimfa.methods.seeding.random_vcol.Random_vcol()
        initiarizer = nimfa.methods.seeding.random.Random()
        initW, initH = initiarizer.initialize(V, rank, {})

        fctr = nimfa.mf(V, seed = 'random_vcol', method = method, rank = rank, max_iter = maxIter)
        # fctr = nimfa.mf(V, method = "lsnmf", rank = rank, max_iter = maxIter, W = initW, H = initH)
        fctr_res = nimfa.mf_run(fctr)

        W = fctr_res.basis()
        print "Basis matrix"
        print W.shape[0]
        print W.shape[1]
        print W
        H = fctr_res.coef()
        print "Coef"
        print H.shape[0]
        print H.shape[1]
        print H

        print "Estimate"
        print np.dot(W, H)

        print 'Rss: %5.4f' % fctr_res.fit.rss()
        print 'Evar: %5.4f' % fctr_res.fit.evar()
        print 'K-L divergence: %5.4f' % fctr_res.distance(metric = 'kl')
        print 'Sparseness, W: %5.4f, H: %5.4f' % fctr_res.fit.sparseness()

        sm = fctr_res.summary()
        print type(sm)
        # print "Rss: %8.3f" % sm['rss']
        # # Print explained variance.
        # print "Evar: %8.3f" % sm['evar']
        # # Print actual number of iterations performed
        # print "Iterations: %d" % sm['n_iter']

        # プロットの際に不具合が生じるため,numpy.ndarray型に変換
        NW = np.asarray(W)
        NH = np.asarray(H)
        return NW, NH, sm
Ejemplo n.º 24
0
    def learnModel(self, X):
        """
        Learn X using a matrix factorisation method. If self.rank is an integer 
        then we factorise with that rank. If it is an array then we compute the 
        complete regularisation path and return a list of matrices. 
        """
        if isinstance(self.rank, int):
            model = nimfa.mf(X,
                             method=self.method,
                             max_iter=self.maxIter,
                             rank=self.rank)
            fit = nimfa.mf_run(model)
            W = fit.basis()
            H = fit.coef()

            predX = W.dot(H)
            return predX
        else:
            predXList = []

            model = nimfa.mf(X,
                             method=self.method,
                             max_iter=self.maxIter,
                             rank=self.rank[0])
            fit = nimfa.mf_run(model)
            W = fit.basis()
            H = fit.coef()
            predXList.append(W.dot(H))

            for i in range(1, self.rank.shape[0]):
                model = nimfa.mf(X,
                                 method=self.method,
                                 max_iter=self.maxIter,
                                 rank=self.rank[i],
                                 W=W,
                                 H=H)
                fit = nimfa.mf_run(model)
                W = fit.basis()
                H = fit.coef()
                predXList.append(W.dot(H))

            return predXList
Ejemplo n.º 25
0
    def _NIMFA_NMF(self, X, nBases):

        model = nimfa.mf(X, seed="nndsvd", rank=nBases, method="nmf", initialize_only=True)

        fit = nimfa.mf_run(model)
        W = fit.basis()
        H = fit.coef()

        self.W = W.todense()
        self.H = H.todense()
        return (self.W, self.H)
Ejemplo n.º 26
0
def cluster_nmf(vectors, num_clusters):
    """ 
    
    Takes in vectors and clusters them using Non Negative Matrix Factorization.

    Inputs:
    vectors      -- matrix containing rows of vectors
    num_clusters -- number of clusters to create

    """

    print "Starting NMF clustering"

    start_time = time.time()

    # Run NMF
    vectors_matrix = numpy.matrix(vectors)
    vectors_matrix = vectors_matrix.transpose()
    print "Created vectors_matrix"

    # Generate random matrix factors which we will pass as fixed factors to nimfa.nmf
    init_W = numpy.random.rand(vectors_matrix.shape[0], num_clusters)
    init_H = numpy.random.rand(num_clusters, vectors_matrix.shape[1])
    print "Generated random matrix factors"

    fctr = nimfa.mf(vectors_matrix,
                    method="nmf",
                    seed="fixed",
                    W=init_W,
                    H=init_H,
                    rank=num_clusters)
    fctr_res = nimfa.mf_run(fctr)
    print "NIMFA"

    # Basis matrix
    W = fctr_res.basis()
    # Mixture matrix
    H = fctr_res.coef()
    print "Extracted Basis and Mixture matrices"

    # get assignments
    assignment = []
    for index in range(H.shape[1]):
        column = list(H[:, index])
        assignment.append(column.index(max(column)))
    print "Assignments extracted"

    # Print the loss function (Euclidean distance between target matrix and its estimate).
    print "Euclidean distance: %5.3e" % fctr_res.distance(metric="euclidean")

    end_time = time.time()
    print "Clustering required", (end_time - start_time), "seconds"

    return assignment
Ejemplo n.º 27
0
    def _NIMFA_NMF(self, X, nBases):
        
        
        model = nimfa.mf(X, seed = 'nndsvd', rank = nBases,
                         method = "nmf", initialize_only = True)

        fit = nimfa.mf_run(model)
        W = fit.basis()
        H = fit.coef()
                
        self.W = W.todense()
        self.H = H.todense()
        return (self.W, self.H)
Ejemplo n.º 28
0
def max_guess_select(ratings, users, rank=9, user=None):
    matrix = sp.dok_matrix((len(users), len(users)))
    for k, v in ratings.items():
        matrix[users[k[0]], users[k[1]]] = v
        matrix[users[k[1]], users[k[0]]] = v
    # Run sparse matrix factorisation
    factor = nimfa.mf(matrix, seed="random_c", rank=rank, method="snmf", max_iter=12, initialize_only=True, version='r', eta=1., beta=1e-4, i_conv=10, w_min_change=0)
    result = nimfa.mf_run(factor)
    if user is None:
        # Pick a user to expand
        user = min(users, key=lambda u: len([i for i in ratings if u in i]))
    recommendations = result.fitted()
    rval = max([i for i in users if (i, user) not in ratings and (user, i) not in ratings], key=lambda x: recommendations[users[user], users[x]])
    return user, rval
Ejemplo n.º 29
0
    def fit(self, k=100, max_iter=15, method='lsnmf'):
        if self.recommender_data.preference_matrix.shape[1] < k:
            k = self.recommender_data.preference_matrix.shape[1]

        model = nimfa.mf(self.recommender_data.preference_matrix,
                         seed="random_vcol",
                         rank=k,
                         method=method,
                         max_iter=max_iter)

        fit = nimfa.mf_run(model)

        self.user_matrix = fit.basis().todense()
        self.item_matrix = fit.coef().todense()
Ejemplo n.º 30
0
def _factorize(matrix):
    "Factorize the matrix to get pc"
    # Build the model
    model = mf(matrix,
               seed="random_vcol",
               rank=15,
               method="nmf",
               max_iter=15,
               initialize_only=True,
               update='divergence',
               objective='div')
    # Then fit it
    fit = mf_run(model)
    return fit.basis(), fit.coef()
Ejemplo n.º 31
0
 def run(self, seed = 'random_vcol', method='nmf', rank=3, max_iter=65, display_N_tokens = 5, display_N_documents = 3):
     #Re-initialise clusters
     if self.clusters != []:
         self.clusters = []
         
     self.construct_term_doc_matrix(pca=False) #We cannot perform PCA with NMF because we only want non-negative vectors
     V = self.td_matrix
     model = nimfa.mf(V, seed = seed, method = method, rank = rank, max_iter = max_iter)
     fitted = nimfa.mf_run(model)
     w = fitted.basis() 
     h = fitted.coef()
     self.split_documents(w,h, self.document_dict, self.attributes, display_N_tokens = display_N_tokens, display_N_documents = display_N_documents)
     #Just testing remove it    
     self.showfeatures(w,h, [self.document_dict.values()[i]["raw"] for i in range(numpy.shape(w)[0])], self.attributes)
Ejemplo n.º 32
0
def run_nmf():
    file_name = inspect.getfile(inspect.currentframe())
    # Read in pre-processed matricies 
    print(file_name + ': Reading train/test matrix w/ dim = ' + f_in_trn)
    Xtrn = ensure_dim(np.loadtxt(open(f_in_trn, 'rb'), delimiter = ',', skiprows = 0))
    Xtst = ensure_dim(np.loadtxt(open(f_in_tst, 'rb'), delimiter = ',', skiprows = 0))

    # Run nmf
    print(file_name + ': Running non-negative matrix facorization w/ rank = ' + my_rank)
    nmf = nimfa.mf(Xtrn, method = 'nmf', max_iter = iters, rank = my_rank)
    
    # Output submission
    print(file_name + ': Saving csv to ' + f_out)
    colfmt = ['%i'] + ['%f'] * (Ytst.shape[1] - 1)
    np.savetxt(f_out, Ytst, delimiter = ',', fmt = colfmt)
Ejemplo n.º 33
0
def run_snmf(V):
    """
    Run sparse nonnegative matrix factorization.
    
    :param V: Target matrix to estimate.
    :type V: :class:`numpy.matrix`
    """
    # SNMF/R
    rank = 10
    model = nimfa.mf(V,
                     seed="random_c",
                     rank=rank,
                     method="snmf",
                     max_iter=12,
                     initialize_only=True,
                     version='r',
                     eta=1.,
                     beta=1e-4,
                     i_conv=10,
                     w_min_change=0)
    fit = nimfa.mf_run(model)
    print_info(fit)
    # SNMF/L
    model = nimfa.mf(V,
                     seed="random_vcol",
                     rank=rank,
                     method="snmf",
                     max_iter=12,
                     initialize_only=True,
                     version='l',
                     eta=1.,
                     beta=1e-4,
                     i_conv=10,
                     w_min_change=0)
    fit = nimfa.mf_run(model)
    print_info(fit)
Ejemplo n.º 34
0
def run_snmf(V):
    """
    Run sparse nonnegative matrix factorization.
    
    :param V: Target matrix to estimate.
    :type V: :class:`numpy.matrix`
    """
    # SNMF/R
    rank = 10
    model = nimfa.mf(V, 
                  seed = "random_c", 
                  rank = rank, 
                  method = "snmf", 
                  max_iter = 12, 
                  initialize_only = True,
                  version = 'r',
                  eta = 1.,
                  beta = 1e-4, 
                  i_conv = 10,
                  w_min_change = 0)
    fit = nimfa.mf_run(model)
    print_info(fit)
    # SNMF/L
    model = nimfa.mf(V, 
                  seed = "random_vcol", 
                  rank = rank, 
                  method = "snmf", 
                  max_iter = 12, 
                  initialize_only = True,
                  version = 'l',
                  eta = 1.,
                  beta = 1e-4, 
                  i_conv = 10,
                  w_min_change = 0)
    fit = nimfa.mf_run(model)
    print_info(fit)
Ejemplo n.º 35
0
def factor_eval(data, ranks, nrun=40, method="nmf", max_iter=2000):
    coefs = []
    for rank in ranks:
        fctr = nimfa.mf(data,
                        method=method,
                        max_iter=max_iter,
                        rank=rank,
                        n_run=nrun,
                        track_factor=True)
        fctr_res = nimfa.mf_run(fctr)
        sm = fctr_res.summary()
        coef = sm['cophenetic']
        print coef
        coefs.append(coef)
    return coefs
Ejemplo n.º 36
0
def run_one(V, rank):
    """
    Run standard NMF on leukemia data set. 50 runs of Standard NMF are performed and obtained consensus matrix
    averages all 50 connectivity matrices.  
    
    :param V: Target matrix with gene expression data.
    :type V: `numpy.matrix` (of course it could be any format of scipy.sparse, but we will use numpy here) 
    :param rank: Factorization rank.
    :type rank: `int`
    """
    print "================= Rank = %d =================" % rank
    consensus = np.mat(np.zeros((V.shape[1], V.shape[1])))
    for i in xrange(50):
        # Standard NMF with Euclidean update equations is used. For initialization random Vcol method is used.
        # Objective function is the number of consecutive iterations in which the connectivity matrix has not changed.
        # We demand that factorization does not terminate before 30 consecutive iterations in which connectivity matrix
        # does not change. For a backup we also specify the maximum number of iterations. Note that the satisfiability
        # of one stopping criteria terminates the run (there is no chance for divergence).
        model = nimfa.mf(
            V,
            method="nmf",
            rank=rank,
            seed="random_vcol",
            max_iter=200,
            update="euclidean",
            objective="conn",
            conn_change=40,
            initialize_only=True,
        )
        fit = nimfa.mf_run(model)
        print "%2d / 50 :: %s - init: %s ran with  ... %3d / 200 iters ..." % (
            i + 1,
            fit.fit,
            fit.fit.seed,
            fit.fit.n_iter,
        )
        # Compute connectivity matrix of factorization.
        # Again, we could use multiple runs support of the nimfa library, track factorization model across 50 runs and then
        # just call fit.consensus()
        consensus += fit.fit.connectivity()
    # averaging connectivity matrices
    consensus /= 50.0
    # reorder consensus matrix
    p_consensus = reorder(consensus)
    # plot reordered consensus matrix
    plot(p_consensus, rank)
Ejemplo n.º 37
0
def run_nsnmf(V):
    """
    Run nonsmooth nonnegative matrix factorization.
    
    :param V: Target matrix to estimate.
    :type V: :class:`numpy.matrix`
    """
    rank = 10
    model = nimfa.mf(V, 
                  seed = "random", 
                  rank = rank, 
                  method = "nsnmf", 
                  max_iter = 12, 
                  initialize_only = True,
                  theta = 0.5)
    fit = nimfa.mf_run(model)
    print_info(fit)
Ejemplo n.º 38
0
def run_pmf(V):
    """
    Run probabilistic matrix factorization.
    
    :param V: Target matrix to estimate.
    :type V: :class:`numpy.matrix`
    """
    rank = 10
    model = nimfa.mf(V, 
                  seed = "random_vcol", 
                  rank = rank, 
                  method = "pmf", 
                  max_iter = 12, 
                  initialize_only = True,
                  rel_error = 1e-5)
    fit = nimfa.mf_run(model)
    print_info(fit)
Ejemplo n.º 39
0
def run_nsnmf(V):
    """
    Run nonsmooth nonnegative matrix factorization.
    
    :param V: Target matrix to estimate.
    :type V: :class:`numpy.matrix`
    """
    rank = 10
    model = nimfa.mf(V,
                     seed="random",
                     rank=rank,
                     method="nsnmf",
                     max_iter=12,
                     initialize_only=True,
                     theta=0.5)
    fit = nimfa.mf_run(model)
    print_info(fit)
Ejemplo n.º 40
0
def run_pmf(V):
    """
    Run probabilistic matrix factorization.
    
    :param V: Target matrix to estimate.
    :type V: :class:`numpy.matrix`
    """
    rank = 10
    model = nimfa.mf(V,
                     seed="random_vcol",
                     rank=rank,
                     method="pmf",
                     max_iter=12,
                     initialize_only=True,
                     rel_error=1e-5)
    fit = nimfa.mf_run(model)
    print_info(fit)
Ejemplo n.º 41
0
def run_bmf(V):
    """
    Run binary matrix factorization.
    
    :param V: Target matrix to estimate.
    :type V: :class:`numpy.matrix`
    """
    rank = 10
    model = nimfa.mf(V,
                     seed="random_vcol",
                     rank=rank,
                     method="bmf",
                     max_iter=12,
                     initialize_only=True,
                     lambda_w=1.1,
                     lambda_h=1.1)
    fit = nimfa.mf_run(model)
    print_info(fit)
Ejemplo n.º 42
0
def run_psmf(V):
    """
    Run probabilistic sparse matrix factorization.
    
    :param V: Target matrix to estimate.
    :type V: :class:`numpy.matrix`
    """
    rank = 10
    prng = np.random.RandomState()
    model = nimfa.mf(V,
                     seed=None,
                     rank=rank,
                     method="psmf",
                     max_iter=12,
                     initialize_only=True,
                     prior=prng.uniform(low=0., high=1., size=10))
    fit = nimfa.mf_run(model)
    print_info(fit)
Ejemplo n.º 43
0
def run_bmf(V):
    """
    Run binary matrix factorization.
    
    :param V: Target matrix to estimate.
    :type V: :class:`numpy.matrix`
    """
    rank = 10
    model = nimfa.mf(V, 
                  seed = "random_vcol", 
                  rank = rank, 
                  method = "bmf", 
                  max_iter = 12, 
                  initialize_only = True,
                  lambda_w = 1.1,
                  lambda_h = 1.1)
    fit = nimfa.mf_run(model)
    print_info(fit)
Ejemplo n.º 44
0
def nmf(X, method='sklearn', **nmfparams):
    """
    Calculates the non-negative matrix factorization of an input matrix
    """

    #TODO: Documentation

    if method == 'sklearn':
        model = NMF(**nmfparams)
        H = model.fit_transform(X)
        W = model.components_
    elif method == 'nimfa':
        model_tmp = nimfa.mf(X, **nmfparams)
        model = nimfa.mf_run(model_tmp)
        H = model.coef()
        W = model.basis()

    return (H, W, model)
Ejemplo n.º 45
0
def run_psmf(V):
    """
    Run probabilistic sparse matrix factorization.
    
    :param V: Target matrix to estimate.
    :type V: :class:`numpy.matrix`
    """
    rank = 10
    prng = np.random.RandomState()
    model = nimfa.mf(V, 
                  seed = None,
                  rank = rank, 
                  method = "psmf", 
                  max_iter = 12, 
                  initialize_only = True,
                  prior = prng.uniform(low = 0., high = 1., size = 10))
    fit = nimfa.mf_run(model)
    print_info(fit)
Ejemplo n.º 46
0
def nmf(X, method='sklearn', **nmfparams):
    """
    Calculates the non-negative matrix factorization of an input matrix
    """

    #TODO: Documentation

    if method == 'sklearn':
        model = NMF(**nmfparams)
        H = model.fit_transform(X)
        W = model.components_
    elif method == 'nimfa':
        model_tmp = nimfa.mf(X, **nmfparams)
        model = nimfa.mf_run(model_tmp)
        H = model.coef()
        W = model.basis()

    return (H, W, model)
Ejemplo n.º 47
0
def run_lfnmf(V):
    """
    Run local fisher nonnegative matrix factorization.
    
    :param V: Target matrix to estimate.
    :type V: :class:`numpy.matrix`
    """
    rank = 10
    pnrg = np.random.RandomState()
    model = nimfa.mf(V, 
                  seed = None,
                  W = abs(pnrg.randn(V.shape[0], rank)), 
                  H = abs(pnrg.randn(rank, V.shape[1])),
                  rank = rank, 
                  method = "lfnmf", 
                  max_iter = 12, 
                  initialize_only = True,
                  alpha = 0.01)
    fit = nimfa.mf_run(model)
    print_info(fit)
Ejemplo n.º 48
0
def run_lfnmf(V):
    """
    Run local fisher nonnegative matrix factorization.
    
    :param V: Target matrix to estimate.
    :type V: :class:`numpy.matrix`
    """
    rank = 10
    pnrg = np.random.RandomState()
    model = nimfa.mf(V,
                     seed=None,
                     W=abs(pnrg.randn(V.shape[0], rank)),
                     H=abs(pnrg.randn(rank, V.shape[1])),
                     rank=rank,
                     method="lfnmf",
                     max_iter=12,
                     initialize_only=True,
                     alpha=0.01)
    fit = nimfa.mf_run(model)
    print_info(fit)
Ejemplo n.º 49
0
def run_lsnmf(V):
    """
    Run least squares nonnegative matrix factorization.
    
    :param V: Target matrix to estimate.
    :type V: :class:`numpy.matrix`
    """
    rank = 10
    model = nimfa.mf(V,
                     seed="random_vcol",
                     rank=rank,
                     method="lsnmf",
                     max_iter=12,
                     initialize_only=True,
                     sub_iter=10,
                     inner_sub_iter=10,
                     beta=0.1,
                     min_residuals=1e-5)
    fit = nimfa.mf_run(model)
    print_info(fit)
Ejemplo n.º 50
0
def run_lsnmf(V):
    """
    Run least squares nonnegative matrix factorization.
    
    :param V: Target matrix to estimate.
    :type V: :class:`numpy.matrix`
    """
    rank = 10
    model = nimfa.mf(V, 
                  seed = "random_vcol", 
                  rank = rank, 
                  method = "lsnmf", 
                  max_iter = 12, 
                  initialize_only = True,
                  sub_iter = 10,
                  inner_sub_iter = 10, 
                  beta = 0.1, 
                  min_residuals = 1e-5)
    fit = nimfa.mf_run(model)
    print_info(fit)
def run_factorization(data):
    fctr = nimfa.mf(data, seed = "random_c", rank = 15, method = "snmf", max_iter = 50, initialize_only = True, version = 'r', eta = 1., beta = 1e-4, i_conv = 10, w_min_change = 0)
    fctr_res = nimfa.mf_run(fctr)

    np.set_printoptions(precision=3)
    np.set_printoptions(suppress=True)

    # Basis matrix. It is sparse, as input data was sparse as well.
    W = fctr_res.basis()
    # print "Basis matrix"
    # print W.todense()
    # print W.shape

    # Mixture matrix. We print this tiny matrix in dense format.
    H = fctr_res.coef()
    # print "Coef"
    # print H.todense()
    # print H.shape

    # Return the loss function according to Kullback-Leibler divergence. By default Euclidean metric is used.
    print "Distance Kullback-Leibler: %5.3e" % fctr_res.distance(metric = "kl")
    # Compute generic set of measures to evaluate the quality of the factorization
    sm = fctr_res.summary()
    # Print sparseness (Hoyer, 2004) of basis and mixture matrix
    print "Sparseness Basis: %5.3f  Mixture: %5.3f" % (sm['sparseness'][0], sm['sparseness'][1])
    # Print actual number of iterations performed
    print "Iterations: %d" % sm['n_iter']


    # Print estimate of target matrix data
    data_fact = np.dot(W.todense(), H.todense() )

    rmse = 0.0
    for i in range(data.shape[0]):
        for j in range(data.shape[1]):
            if data[i][j] > 0:
                print data[i,j], data_fact[i, j]
                rmse += (data[i, j] - data_fact[i, j])**2

    print "RMSE:", rmse
    print data, data_fact
Ejemplo n.º 52
0
def factorize(data):
    """
    Perform factorization on S. cerevisiae FunCat annotated sequence data set (D1 FC seq).
    
    Return factorized data, this is matrix factors as result of factorization (basis and mixture matrix). 
    
    :param data: Transformed data set containing attributes' values, class information and possibly additional meta information.  
    :type data: `tuple`
    """
    V = data['attr']
    """model = nimfa.mf(V, 
                  seed = "random_vcol", 
                  rank = 40, 
                  method = "nmf", 
                  max_iter = 75, 
                  initialize_only = True,
                  update = 'euclidean',
                  objective = 'fro')"""
    model = nimfa.mf(V,
                     seed="random_vcol",
                     rank=40,
                     method="snmf",
                     max_iter=5,
                     initialize_only=True,
                     version='l',
                     eta=1.,
                     beta=1e-4,
                     i_conv=10,
                     w_min_change=0)
    print("Performing %s %s %d factorization ..." % (model, model.seed, model.rank))
    fit = nimfa.mf_run(model)
    print("... Finished")
    sparse_w, sparse_h = fit.fit.sparseness()
    print("""Stats:
            - iterations: %d
            - KL Divergence: %5.3f
            - Euclidean distance: %5.3f
            - Sparseness basis: %5.3f, mixture: %5.3f""" % (fit.fit.n_iter, fit.distance(), fit.distance(metric='euclidean'), sparse_w, sparse_h))
    data['W'] = fit.basis()
    data['H'] = fit.coef()
    return data
Ejemplo n.º 53
0
def run_one(V, rank):
    """
    Run standard NMF on medulloblastoma data set. 50 runs of Standard NMF are performed and obtained consensus matrix
    averages all 50 connectivity matrices.  
    
    :param V: Target matrix with gene expression data.
    :type V: `numpy.matrix` (of course it could be any format of scipy.sparse, but we will use numpy here) 
    :param rank: Factorization rank.
    :type rank: `int`
    """
    print("================= Rank = %d =================" % rank)
    consensus = np.mat(np.zeros((V.shape[1], V.shape[1])))
    for i in range(50):
        # Standard NMF with Euclidean update equations is used. For initialization random Vcol method is used.
        # Objective function is the number of consecutive iterations in which the connectivity matrix has not changed.
        # We demand that factorization does not terminate before 30 consecutive iterations in which connectivity matrix
        # does not change. For a backup we also specify the maximum number of iterations. Note that the satisfiability
        # of one stopping criteria terminates the run (there is no chance for
        # divergence).
        model = nimfa.mf(V,
                         method="nmf",
                         rank=rank,
                         seed="random_vcol",
                         max_iter=200,
                         update='euclidean',
                         objective='conn',
                         conn_change=40,
                         initialize_only=True)
        fit = nimfa.mf_run(model)
        print("%2d / 50 :: %s - init: %s ran with  ... %3d / 200 iters ..." %
              (i + 1, fit.fit, fit.fit.seed, fit.fit.n_iter))
        # Compute connectivity matrix of factorization.
        # Again, we could use multiple runs support of the nimfa library, track factorization model across 50 runs and then
        # just call fit.consensus()
        consensus += fit.fit.connectivity()
    # averaging connectivity matrices
    consensus /= 50.
    # reorder consensus matrix
    p_consensus = reorder(consensus)
    # plot reordered consensus matrix
    plot(p_consensus, rank)
Ejemplo n.º 54
0
def factorize(data):
    """
    Perform factorization on S. cerevisiae FunCat annotated sequence data set (D1 FC seq).
    
    Return factorized data, this is matrix factors as result of factorization (basis and mixture matrix). 
    
    :param data: Transformed data set containing attributes' values, class information and possibly additional meta information.  
    :type data: `tuple`
    """
    V = data['attr']
    """model = nimfa.mf(V, 
                  seed = "random_vcol", 
                  rank = 40, 
                  method = "nmf", 
                  max_iter = 75, 
                  initialize_only = True,
                  update = 'euclidean',
                  objective = 'fro')"""
    model = nimfa.mf(V, 
                  seed = "random_vcol", 
                  rank = 40, 
                  method = "snmf", 
                  max_iter = 5, 
                  initialize_only = True,
                  version = 'l',
                  eta = 1.,
                  beta = 1e-4, 
                  i_conv = 10,
                  w_min_change = 0)
    print "Performing %s %s %d factorization ..." % (model, model.seed, model.rank) 
    fit = nimfa.mf_run(model)
    print "... Finished"
    sparse_w, sparse_h = fit.fit.sparseness()
    print """Stats:
            - iterations: %d
            - KL Divergence: %5.3f
            - Euclidean distance: %5.3f
            - Sparseness basis: %5.3f, mixture: %5.3f""" % (fit.fit.n_iter, fit.distance(), fit.distance(metric = 'euclidean'), sparse_w, sparse_h)
    data['W'] = fit.basis()
    data['H'] = fit.coef()
    return data
Ejemplo n.º 55
0
 def run(self):
     # TODO: estimate rank
     self.mask_bed()
     NMF_Run.run(self)
     import nimfa
     print repr(self.masked_matrix)
     print repr(self.masked_matrix.shape)
     self.fctr = nimfa.mf(numpy.matrix(self.masked_matrix),
                          seed            = "nndsvd",
                          rank            = self.nmf_rank,
                          method          = "bmf",
                          max_iter        = self.max_iter,
                          initialize_only = True,
                          lambda_w        = 1.1,
                          lambda_h        = 1.1)
     self.fctr_res = nimfa.mf_run(self.fctr)
     print 'Rss: %5.4f' % self.fctr_res.fit.rss()
     print 'Evar: %5.4f' % self.fctr_res.fit.evar()
     print 'K-L divergence: %5.4f' % self.fctr_res.distance(metric = 'kl')
     print 'Sparseness, W: %5.4f, H: %5.4f' % self.fctr_res.fit.sparseness()
     print 'Iteration: %d' % self.fctr_res.n_iter
Ejemplo n.º 56
0
def probability_select(ratings, users, rank=9, user=None):
    matrix = sp.dok_matrix((len(users), len(users)))
    for k, v in ratings.items():
        matrix[users[k[0]], users[k[1]]] = v
        matrix[users[k[1]], users[k[0]]] = v
    # Run sparse matrix factorisation
    factor = nimfa.mf(matrix, seed="random_c", rank=rank, method="snmf", max_iter=12, initialize_only=True, version='r', eta=1., beta=1e-4, i_conv=10, w_min_change=0)
    result = nimfa.mf_run(factor)
    if len(ratings) >= len(users)**2:
        return # all items expanded
    if user is None:
        # Pick a user to expand
        user = min(users, key=lambda u: len([i for i in ratings if u in i]))

    # Clusters (F)
    clusters = result.basis()

    # Matrix (M)
    recommendations = result.fitted()

    # All rated users (U)
    user_rated = {i[0]: ratings[i] for i in ratings if user == i[1]}
    user_rated.update({i[1]: ratings[i] for i in ratings if user == i[0]})

    # Affiliations (A)
    caff = [(sum(r * clusters[users[u], x] for u, r in user_rated.items())+1)/(len(user_rated)+1) for x in range(rank)]
    
    # Confidence (d)
    conf = sum(sum(clusters[users[u], x] for u in user_rated) for x in range(rank))/clusters.sum()

    # Cluster confidences (C)
    sums = clusters.sum(axis=0).tolist()[0]
    cconf = [sum(clusters[users[u], x] for u in user_rated)/sums[x] for x in range(rank)]
    cconf_norm = max(cconf) or 1
    cconf = [i/cconf_norm for i in cconf]

    # Find the user with the highest affinity to cluster
    candidates = {i for i in users if i not in user_rated}
    candidate = max(candidates, key=lambda x: conf * recommendations[users[user], users[x]] + (1-conf) * (sum((1-cconf[i])*caff[i]*clusters[users[x], i] for i in range(rank))/rank))
    return user, candidate
Ejemplo n.º 57
0
def run_icm(V):
    """
    Run iterated conditional modes.
    
    :param V: Target matrix to estimate.
    :type V: :class:`numpy.matrix`
    """
    rank = 10
    pnrg = np.random.RandomState()
    model = nimfa.mf(V,
                     seed="nndsvd",
                     rank=rank,
                     method="icm",
                     max_iter=12,
                     initialize_only=True,
                     iiter=20,
                     alpha=pnrg.randn(V.shape[0], rank),
                     beta=pnrg.randn(rank, V.shape[1]),
                     theta=0.,
                     k=0.,
                     sigma=1.)
    fit = nimfa.mf_run(model)
    print_info(fit)
Ejemplo n.º 58
0
	def apply( self, X, k = 2 ):
		"""
		Apply NMF to the specified document-term matrix X.
		"""
		import nimfa
		self.W = None
		self.H = None
		initialize_only = self.max_iters < 1
		if self.update == "euclidean":
			objective = "fro"
		else:
			objective = "div"
		alg = nimfa.mf(X, method = self.method, max_iter = self.max_iters, rank = k, seed = self.init_strategy, update = self.update, objective = objective, test_conv = self.test_conv ) 
		res = nimfa.mf_run(alg)
		# TODO: fix
		try:
			self.W = res.basis().todense() 
			self.H = res.coef().todense()
		except:
			self.W = res.basis()
			self.H = res.coef()
		# last number of iterations
		self.n_iter = res.n_iter
Ejemplo n.º 59
0
def clustered_select(ratings, users, rank=9, user=None):
    matrix = sp.dok_matrix((len(users), len(users)))
    for k, v in ratings.items():
        matrix[users[k[0]], users[k[1]]] = v
        matrix[users[k[1]], users[k[0]]] = v
    # Run sparse matrix factorisation
    factor = nimfa.mf(matrix,
                      seed="random_c",
                      rank=rank,
                      method="snmf",
                      max_iter=12,
                      initialize_only=True,
                      version='r',
                      eta=1.,
                      beta=1e-4,
                      i_conv=10,
                      w_min_change=0)
    result = nimfa.mf_run(factor)
    if len(ratings) >= len(users)**2:
        return  # all items expanded
    if user is None:
        # Pick a user to expand
        user = min(users, key=lambda u: len([i for i in ratings if u in i]))
    # Pick a cluster
    clusters = result.basis()
    # Select all rated users
    user_rated = {i[0]: ratings[i] for i in ratings if user == i[1]}
    user_rated.update({i[1]: ratings[i] for i in ratings if user == i[0]})
    cluster = max(range(rank),
                  key=lambda x: (sum(r * clusters[users[u], x]
                                     for u, r in user_rated.items()) + 1) /
                  (len(user_rated) + 1))  # Maximise A_u(c)
    # Find the user with the highest affinity to cluster
    candidates = {i for i in users if i not in user_rated}
    candidate = max(candidates, key=lambda x: clusters[users[x], cluster])
    return user, candidate
Ejemplo n.º 60
0
V = np.matrix([[1, 2, 3], [4, 5, 6], [6, 7, 8]])
print V


# Initialization callback function
def init_info(model):
    print "Initialized basis matrix\n", model.basis()
    print "Initialized  mixture matrix\n", model.coef()


# ICM rank 3 algorithm
# We specify callback_init parameter by passing a init_info function
# Callback is called after initialization and prior to factorization in each run.
fctr = nimfa.mf(V,
                seed="random_c",
                method="icm",
                max_iter=10,
                rank=3,
                callback_init=init_info)
fctr_res = nimfa.mf_run(fctr)

# Basis matrix.
W = fctr_res.basis()
print "Resulting basis matrix"
print W

# Mixture matrix.
H = fctr_res.coef()
print "Resulting mixture matrix"
print H

sm = fctr_res.summary()