def nmf_library(V, W_init, correct_H):
    #comparisons with non-negative matrix factorization
    lsnmf = nimfa.Lsnmf(V,
                        seed=None,
                        rank=3,
                        max_iter=100,
                        H=np.array([0., 0., 0.]).reshape(-1, 1),
                        W=W_init)
    nmf = nimfa.Nmf(V,
                    seed=None,
                    rank=3,
                    max_iter=100,
                    H=np.array([0., 0., 0.]).reshape(-1, 1),
                    W=W_init)
    icm = nimfa.Icm(V,
                    seed=None,
                    rank=3,
                    max_iter=100,
                    H=np.array([0., 0., 0.]).reshape(-1, 1),
                    W=W_init)
    bd = nimfa.Bd(V,
                  seed=None,
                  rank=3,
                  max_iter=100,
                  H=np.array([0., 0., 0.]).reshape(-1, 1),
                  W=W_init)
    pmf = nimfa.Pmf(V,
                    seed=None,
                    rank=3,
                    max_iter=100,
                    H=np.array([0., 0., 0.]).reshape(-1, 1),
                    W=W_init)
    #lfnmf = nimfa.Lfnmf(V, seed=None, rank=3, max_iter=100, H = np.array([0.,0.,0.]).reshape(-1,1), W = W_init)

    lsnmf_fit = lsnmf()
    nmf_fit = nmf()
    icm_fit = icm()
    bd_fit = bd()
    pmf_fit = pmf()

    lsnmf_error = mean_absolute_error(
        correct_H, normalized(np.array(lsnmf.H).reshape(-1, )))
    nmf_error = mean_absolute_error(correct_H,
                                    normalized(np.array(nmf.H).reshape(-1, )))
    icm_error = mean_absolute_error(correct_H,
                                    normalized(np.array(icm.H).reshape(-1, )))
    bd_error = mean_absolute_error(correct_H,
                                   normalized(np.array(bd.H).reshape(-1, )))
    pmf_error = mean_absolute_error(correct_H,
                                    normalized(np.array(pmf.H).reshape(-1, )))

    return [lsnmf_error, nmf_error, icm_error, bd_error, pmf_error]
Exemple #2
0
def run_pmf(V):
    """
    Run probabilistic matrix factorization.
    
    :param V: Target matrix to estimate.
    :type V: :class:`numpy.matrix`
    """
    rank = 10
    pmf = nimfa.Pmf(V,
                    seed="random_vcol",
                    rank=rank,
                    max_iter=12,
                    rel_error=1e-5)
    fit = pmf()
    print_info(fit)
Exemple #3
0
    def pmf(self, p_factorization_rank=2, p_max_iterations=10, p_runs=10):

        pmf_args = {
            "data_matrix": self.m_data_matrix,
            "rank": p_factorization_rank,
            "seed": "random_vcol",
            "max_iter": p_max_iterations,
            "n_run": p_runs,
            "rel_error": 1e-5
        }

        pmf = nimfa.Pmf(pmf_args["data_matrix"].T,
                        rank=pmf_args["rank"],
                        seed=pmf_args["seed"],
                        max_iter=pmf_args["max_iter"],
                        n_run=pmf_args["n_run"],
                        rel_error=pmf_args["rel_error"],
                        track_factor=True)

        return pmf
Exemple #4
0
    def run(self, output_file):
        print "Running non-negative MF....", strftime(
            "%Y-%m-%d %H:%M:%S", gmtime())
        if self.method == 'nmf':
            modelnmf = nimfa.Nmf(self.mat, rank=self.rank, max_iter=self.iter)
        elif self.method == "lfnmf":
            modelnmf = nimfa.Lfnmf(self.mat, rank=self.rank, max_iter=self.iter)
        elif self.method == "nsnmf":
            modelnmf = nimfa.Nsnmf(self.mat, rank=self.rank, max_iter=self.iter)
        elif self.method == "pmf":
            modelnmf = nimfa.Pmf(self.mat, rank=self.rank, max_iter=self.iter)
        elif self.method == "psmf":
            modelnmf = nimfa.Psmf(self.mat, rank=self.rank, max_iter=self.iter)
        elif self.method == "snmf":
            modelnmf = nimfa.Snmf(self.mat, rank=self.rank, max_iter=self.iter)
        elif self.method == "sepnmf":
            modelnmf = nimfa.Sepnmf(self.mat, rank=self.rank, max_iter=self.iter)
        else:
            print "No model is being recognized, stopped."
            sys.exit(1)

        model = modelnmf()
        self.result = np.array(model.fitted())
        print "Done MF!", strftime("%Y-%m-%d %H:%M:%S", gmtime())


        print "Write results to file.", strftime("%Y-%m-%d %H:%M:%S", gmtime())
        with open(output_file, "r+") as file:
            query = file.readlines()
            file.seek(0)
            file.truncate()

            for line in query:
                list = line.split()
                newline = "%s %s %f\n" % (
                    list[0], list[1],
                    self.result[int(list[0])][int(list[1])]
                )
                file.write(newline)
 def create_pnmf_summary(self, data, ranks, n_runs):
     pmf = nimfa.Pmf(data, seed="random_vcol", max_iter=80, rel_error=1e-5)
     summary = pmf.estimate_rank(rank_range=ranks, n_run=n_runs, what='all')
     self.summary = summary
     return summary
def learning(method, train_matrix, train_index, data, user_list, item_list):
  if method == "SVD":
    u, s, vt = svds(train_matrix, k=attribute)
    np.savetxt("u.csv", u, delimiter=",")
    np.savetxt("s.csv", s, delimiter=",")
    np.savetxt("vt.csv", vt, delimiter=",")
    s_diag_matrix = np.diag(s)
    return u
  elif method == "PMF":
    pmf = nimfa.Pmf(train_matrix.toarray(), seed="random_vcol", rank=attribute, max_iter=50, rel_error=1e-5)
    pmf_fit = pmf()
    return np.array(pmf_fit.fitted())
  elif method == "NMF":
    nmf = nimfa.Nmf(train_matrix, seed="random_vcol", rank=attribute, max_iter=100, rel_error=1e-5, update='euclidean')
    nmf_fit = nmf()
    return nmf_fit.fitted().toarray()
  elif method == "RMrate_liner":
    u, v = pv.rmrate_standard(train_index, data, user_list, item_list, attribute)
    return u
  elif method == "D1_liner":
    u, v = pv.rmrate(train_index, data, user_list, item_list, attribute)
    R = np.c_[np.identity(attribute), -np.identity(attribute)]
    return u
  elif method == "D2_liner":
    u, v = pv.rmrate(train_index, data, user_list, item_list, attribute)
    R = np.c_[2 * np.identity(attribute), -np.identity(attribute)]
    return u
  elif method == "D3_liner":
    u, v = pv.rmrate(train_index, data, user_list, item_list, attribute)
    R = np.c_[np.identity(attribute), -2 * np.identity(attribute)]
    return u
  elif method == "D4_liner":
    u, v = pv.rmrate(train_index, data, user_list, item_list, attribute)
    R = np.c_[np.identity(attribute), np.zeros((attribute, attribute))]
    return u
  elif method == "D5_liner":
    u, v = pv.rmrate(train_index, data, user_list, item_list, attribute)
    R = np.c_[np.zeros((attribute, attribute)), -np.identity(attribute)]
    return u
  elif method == "ML1_liner":
    u, v = pv.rmrate(train_index, data, user_list, item_list, attribute)
    R = ml.pv_ml1(train_matrix, eta0, u, v, attribute)
    return u
  elif method == "ML2_liner":
    u, v = pv.rmrate(train_index, data, user_list, item_list, attribute)
    R = ml.pv_ml2(train_matrix, eta0, u, v, attribute)
    return u
  elif method == "ML3_liner":
    u, v = pv.rmrate(train_index, data, user_list, item_list, attribute)
    R = ml.pv_ml3(train_matrix, eta0, u, v, attribute)
    return u
  elif method == "R2_RMrate":
    u, v = pv.rmrate_square_standard(train_index, data, user_list, item_list, attribute)
    return u
  elif method == "D1_sqaure":
    u, v = pv.rmrate_square(train_index, data, user_list, item_list, attribute)
    R = np.c_[np.identity(attribute), -np.identity(attribute)]
    return u
  elif method == "D2_square":
    u, v = pv.rmrate_square(train_index, data, user_list, item_list, attribute)
    R = np.c_[2 * np.identity(attribute), -np.identity(attribute)]
    return u
  elif method == "D3_square":
    u, v = pv.rmrate_square(train_index, data, user_list, item_list, attribute)
    R = np.c_[np.identity(attribute), -2 * np.identity(attribute)]
    return u
  elif method == "D4_square":
    u, v = pv.rmrate_square(train_index, data, user_list, item_list, attribute)
    R = np.c_[np.identity(attribute), np.zeros((attribute, attribute))]
    return u
  elif method == "D5_square":
    u, v = pv.rmrate_square(train_index, data, user_list, item_list, attribute)
    R = np.c_[np.zeros((attribute, attribute)), -np.identity(attribute)]
    return u
  elif method == "ML1_square":
    u, v = pv.rmrate_square(train_index, data, user_list, item_list, attribute)
    R = ml.pv_ml1(train_matrix, eta0, u, v, attribute)
    return u
  elif method == "ML2_square":
    u, v = pv.rmrate_square(train_index, data, user_list, item_list, attribute)
    R = ml.pv_ml2(train_matrix, eta0, u, v, attribute)
    return u
  elif method == "ML3_square":
    u, v = pv.rmrate_square(train_index, data, user_list, item_list, attribute)
    R = ml.pv_ml3(train_matrix, eta0, u, v, attribute)
    return u
Exemple #7
0
import numpy as np

import nimfa

V = np.random.rand(40, 100)
pmf = nimfa.Pmf(V, seed="random_vcol", rank=10, max_iter=12, rel_error=1e-5)
pmf_fit = pmf()
Exemple #8
0
    def train(self):
        # Run MF
        print "Running non-negative MF....", strftime("%Y-%m-%d %H:%M:%S",
                                                      gmtime())
        source_result = None
        if self.method == "nmf":
            modelnmf = nimfa.Nmf(self.r1, rank=self.rank, max_iter=self.iter)
        elif self.method == "lfnmf":
            modelnmf = nimfa.Lfnmf(self.r1, rank=self.rank, max_iter=self.iter)
        elif self.method == "nsnmf":
            modelnmf = nimfa.Nsnmf(self.r1, rank=self.rank, max_iter=self.iter)
        elif self.method == "pmf":
            modelnmf = nimfa.Pmf(self.r1, rank=self.rank, max_iter=self.iter)
        elif self.method == "psmf":
            modelnmf = nimfa.Psmf(self.r1, rank=self.rank, max_iter=self.iter)
        elif self.method == "snmf":
            modelnmf = nimfa.Snmf(self.r1, rank=self.rank, max_iter=self.iter)
        elif self.method == "sepnmf":
            modelnmf = nimfa.Sepnmf(self.r1,
                                    rank=self.rank,
                                    max_iter=self.iter)
        else:
            print "No model is being recognized, stopped."
            sys.exit(1)

        model = modelnmf()
        source_result = np.array(model.fitted())

        print "Done MF!", strftime("%Y-%m-%d %H:%M:%S", gmtime())

        # Turn vector of per user into distribution
        # And calculate the dot similarity
        # Then find the best data
        print("Transfer user vector into distribution.",
              strftime("%Y-%m-%d %H:%M:%S", gmtime()))

        item_pdf1 = []
        for i in range(N_ITEM):
            count = 0
            pdf = np.zeros(11)
            for j in range(N_USER):
                t = self.r1[i][j]
                if t == 0.0:
                    t = source_result[i][j]

                # ignore the count if it is 0.
                if t < 1e-4:
                    continue

                idx = min(int(math.floor(t / 0.1)), 10)
                pdf[idx] += 1
                count += 1
            if count > 1:
                pdf = pdf / count
            # print count
            item_pdf1.append(pdf)

        item_pdf2 = []
        for i in range(N_ITEM):
            count = 0
            pdf = np.zeros(11)
            for j in range(N_USER):
                if self.r2[i][j] > 0:
                    count += 1
                    pdf[int(math.floor(self.r2[i][j] / 0.1))] += 1
            if count > 1:
                pdf = pdf / count
            item_pdf2.append(pdf)

        # Transform now for further use: matrix[user]
        # self.r1 = self.r1.T
        # self.r2 = self.r2.T

        print "Calculate cost matrix....", strftime("%Y-%m-%d %H:%M:%S",
                                                    gmtime())
        # Calculate cost matrix for items
        # matrix[item r1][item r2]

        # Uses 5 threads to run this slowest part.
        partition = 5
        matrix = [[] for i in range(partition)]

        threads = []
        ll = np.split(np.array(range(N_ITEM)), partition)
        for index in range(partition):
            thread = Thread(target=self.threadFunc,
                            args=(matrix[index], ll[index], item_pdf1,
                                  item_pdf2))
            threads.append(thread)
            thread.start()

        for thread in threads:
            thread.join()

        matrix = np.array(np.concatenate(matrix, axis=0))

        print "Matrix shape: ", matrix.shape

        print "Hungarian running maximum matching....", strftime(
            "%Y-%m-%d %H:%M:%S", gmtime())
        match1to2, match2to1 = hungarian.lap(matrix)
        print "End of matching!", strftime("%Y-%m-%d %H:%M:%S", gmtime())

        # Create item-matching version
        # trans[item in r2]
        trans = []
        for item2 in range(N_ITEM):
            trans.append(source_result[match2to1[item2]])
        trans = np.array(trans).T

        # Find most similar user pair
        print "Find most similar user pair..... Write file...", strftime(
            "%Y-%m-%d %H:%M:%S", gmtime())

        self.writeTrans(trans)

        print "Done, enter cpp mode", strftime("%Y-%m-%d %H:%M:%S", gmtime())