def nmf_library(V, W_init, correct_H): #comparisons with non-negative matrix factorization lsnmf = nimfa.Lsnmf(V, seed=None, rank=3, max_iter=100, H=np.array([0., 0., 0.]).reshape(-1, 1), W=W_init) nmf = nimfa.Nmf(V, seed=None, rank=3, max_iter=100, H=np.array([0., 0., 0.]).reshape(-1, 1), W=W_init) icm = nimfa.Icm(V, seed=None, rank=3, max_iter=100, H=np.array([0., 0., 0.]).reshape(-1, 1), W=W_init) bd = nimfa.Bd(V, seed=None, rank=3, max_iter=100, H=np.array([0., 0., 0.]).reshape(-1, 1), W=W_init) pmf = nimfa.Pmf(V, seed=None, rank=3, max_iter=100, H=np.array([0., 0., 0.]).reshape(-1, 1), W=W_init) #lfnmf = nimfa.Lfnmf(V, seed=None, rank=3, max_iter=100, H = np.array([0.,0.,0.]).reshape(-1,1), W = W_init) lsnmf_fit = lsnmf() nmf_fit = nmf() icm_fit = icm() bd_fit = bd() pmf_fit = pmf() lsnmf_error = mean_absolute_error( correct_H, normalized(np.array(lsnmf.H).reshape(-1, ))) nmf_error = mean_absolute_error(correct_H, normalized(np.array(nmf.H).reshape(-1, ))) icm_error = mean_absolute_error(correct_H, normalized(np.array(icm.H).reshape(-1, ))) bd_error = mean_absolute_error(correct_H, normalized(np.array(bd.H).reshape(-1, ))) pmf_error = mean_absolute_error(correct_H, normalized(np.array(pmf.H).reshape(-1, ))) return [lsnmf_error, nmf_error, icm_error, bd_error, pmf_error]
def run_pmf(V): """ Run probabilistic matrix factorization. :param V: Target matrix to estimate. :type V: :class:`numpy.matrix` """ rank = 10 pmf = nimfa.Pmf(V, seed="random_vcol", rank=rank, max_iter=12, rel_error=1e-5) fit = pmf() print_info(fit)
def pmf(self, p_factorization_rank=2, p_max_iterations=10, p_runs=10): pmf_args = { "data_matrix": self.m_data_matrix, "rank": p_factorization_rank, "seed": "random_vcol", "max_iter": p_max_iterations, "n_run": p_runs, "rel_error": 1e-5 } pmf = nimfa.Pmf(pmf_args["data_matrix"].T, rank=pmf_args["rank"], seed=pmf_args["seed"], max_iter=pmf_args["max_iter"], n_run=pmf_args["n_run"], rel_error=pmf_args["rel_error"], track_factor=True) return pmf
def run(self, output_file): print "Running non-negative MF....", strftime( "%Y-%m-%d %H:%M:%S", gmtime()) if self.method == 'nmf': modelnmf = nimfa.Nmf(self.mat, rank=self.rank, max_iter=self.iter) elif self.method == "lfnmf": modelnmf = nimfa.Lfnmf(self.mat, rank=self.rank, max_iter=self.iter) elif self.method == "nsnmf": modelnmf = nimfa.Nsnmf(self.mat, rank=self.rank, max_iter=self.iter) elif self.method == "pmf": modelnmf = nimfa.Pmf(self.mat, rank=self.rank, max_iter=self.iter) elif self.method == "psmf": modelnmf = nimfa.Psmf(self.mat, rank=self.rank, max_iter=self.iter) elif self.method == "snmf": modelnmf = nimfa.Snmf(self.mat, rank=self.rank, max_iter=self.iter) elif self.method == "sepnmf": modelnmf = nimfa.Sepnmf(self.mat, rank=self.rank, max_iter=self.iter) else: print "No model is being recognized, stopped." sys.exit(1) model = modelnmf() self.result = np.array(model.fitted()) print "Done MF!", strftime("%Y-%m-%d %H:%M:%S", gmtime()) print "Write results to file.", strftime("%Y-%m-%d %H:%M:%S", gmtime()) with open(output_file, "r+") as file: query = file.readlines() file.seek(0) file.truncate() for line in query: list = line.split() newline = "%s %s %f\n" % ( list[0], list[1], self.result[int(list[0])][int(list[1])] ) file.write(newline)
def create_pnmf_summary(self, data, ranks, n_runs): pmf = nimfa.Pmf(data, seed="random_vcol", max_iter=80, rel_error=1e-5) summary = pmf.estimate_rank(rank_range=ranks, n_run=n_runs, what='all') self.summary = summary return summary
def learning(method, train_matrix, train_index, data, user_list, item_list): if method == "SVD": u, s, vt = svds(train_matrix, k=attribute) np.savetxt("u.csv", u, delimiter=",") np.savetxt("s.csv", s, delimiter=",") np.savetxt("vt.csv", vt, delimiter=",") s_diag_matrix = np.diag(s) return u elif method == "PMF": pmf = nimfa.Pmf(train_matrix.toarray(), seed="random_vcol", rank=attribute, max_iter=50, rel_error=1e-5) pmf_fit = pmf() return np.array(pmf_fit.fitted()) elif method == "NMF": nmf = nimfa.Nmf(train_matrix, seed="random_vcol", rank=attribute, max_iter=100, rel_error=1e-5, update='euclidean') nmf_fit = nmf() return nmf_fit.fitted().toarray() elif method == "RMrate_liner": u, v = pv.rmrate_standard(train_index, data, user_list, item_list, attribute) return u elif method == "D1_liner": u, v = pv.rmrate(train_index, data, user_list, item_list, attribute) R = np.c_[np.identity(attribute), -np.identity(attribute)] return u elif method == "D2_liner": u, v = pv.rmrate(train_index, data, user_list, item_list, attribute) R = np.c_[2 * np.identity(attribute), -np.identity(attribute)] return u elif method == "D3_liner": u, v = pv.rmrate(train_index, data, user_list, item_list, attribute) R = np.c_[np.identity(attribute), -2 * np.identity(attribute)] return u elif method == "D4_liner": u, v = pv.rmrate(train_index, data, user_list, item_list, attribute) R = np.c_[np.identity(attribute), np.zeros((attribute, attribute))] return u elif method == "D5_liner": u, v = pv.rmrate(train_index, data, user_list, item_list, attribute) R = np.c_[np.zeros((attribute, attribute)), -np.identity(attribute)] return u elif method == "ML1_liner": u, v = pv.rmrate(train_index, data, user_list, item_list, attribute) R = ml.pv_ml1(train_matrix, eta0, u, v, attribute) return u elif method == "ML2_liner": u, v = pv.rmrate(train_index, data, user_list, item_list, attribute) R = ml.pv_ml2(train_matrix, eta0, u, v, attribute) return u elif method == "ML3_liner": u, v = pv.rmrate(train_index, data, user_list, item_list, attribute) R = ml.pv_ml3(train_matrix, eta0, u, v, attribute) return u elif method == "R2_RMrate": u, v = pv.rmrate_square_standard(train_index, data, user_list, item_list, attribute) return u elif method == "D1_sqaure": u, v = pv.rmrate_square(train_index, data, user_list, item_list, attribute) R = np.c_[np.identity(attribute), -np.identity(attribute)] return u elif method == "D2_square": u, v = pv.rmrate_square(train_index, data, user_list, item_list, attribute) R = np.c_[2 * np.identity(attribute), -np.identity(attribute)] return u elif method == "D3_square": u, v = pv.rmrate_square(train_index, data, user_list, item_list, attribute) R = np.c_[np.identity(attribute), -2 * np.identity(attribute)] return u elif method == "D4_square": u, v = pv.rmrate_square(train_index, data, user_list, item_list, attribute) R = np.c_[np.identity(attribute), np.zeros((attribute, attribute))] return u elif method == "D5_square": u, v = pv.rmrate_square(train_index, data, user_list, item_list, attribute) R = np.c_[np.zeros((attribute, attribute)), -np.identity(attribute)] return u elif method == "ML1_square": u, v = pv.rmrate_square(train_index, data, user_list, item_list, attribute) R = ml.pv_ml1(train_matrix, eta0, u, v, attribute) return u elif method == "ML2_square": u, v = pv.rmrate_square(train_index, data, user_list, item_list, attribute) R = ml.pv_ml2(train_matrix, eta0, u, v, attribute) return u elif method == "ML3_square": u, v = pv.rmrate_square(train_index, data, user_list, item_list, attribute) R = ml.pv_ml3(train_matrix, eta0, u, v, attribute) return u
import numpy as np import nimfa V = np.random.rand(40, 100) pmf = nimfa.Pmf(V, seed="random_vcol", rank=10, max_iter=12, rel_error=1e-5) pmf_fit = pmf()
def train(self): # Run MF print "Running non-negative MF....", strftime("%Y-%m-%d %H:%M:%S", gmtime()) source_result = None if self.method == "nmf": modelnmf = nimfa.Nmf(self.r1, rank=self.rank, max_iter=self.iter) elif self.method == "lfnmf": modelnmf = nimfa.Lfnmf(self.r1, rank=self.rank, max_iter=self.iter) elif self.method == "nsnmf": modelnmf = nimfa.Nsnmf(self.r1, rank=self.rank, max_iter=self.iter) elif self.method == "pmf": modelnmf = nimfa.Pmf(self.r1, rank=self.rank, max_iter=self.iter) elif self.method == "psmf": modelnmf = nimfa.Psmf(self.r1, rank=self.rank, max_iter=self.iter) elif self.method == "snmf": modelnmf = nimfa.Snmf(self.r1, rank=self.rank, max_iter=self.iter) elif self.method == "sepnmf": modelnmf = nimfa.Sepnmf(self.r1, rank=self.rank, max_iter=self.iter) else: print "No model is being recognized, stopped." sys.exit(1) model = modelnmf() source_result = np.array(model.fitted()) print "Done MF!", strftime("%Y-%m-%d %H:%M:%S", gmtime()) # Turn vector of per user into distribution # And calculate the dot similarity # Then find the best data print("Transfer user vector into distribution.", strftime("%Y-%m-%d %H:%M:%S", gmtime())) item_pdf1 = [] for i in range(N_ITEM): count = 0 pdf = np.zeros(11) for j in range(N_USER): t = self.r1[i][j] if t == 0.0: t = source_result[i][j] # ignore the count if it is 0. if t < 1e-4: continue idx = min(int(math.floor(t / 0.1)), 10) pdf[idx] += 1 count += 1 if count > 1: pdf = pdf / count # print count item_pdf1.append(pdf) item_pdf2 = [] for i in range(N_ITEM): count = 0 pdf = np.zeros(11) for j in range(N_USER): if self.r2[i][j] > 0: count += 1 pdf[int(math.floor(self.r2[i][j] / 0.1))] += 1 if count > 1: pdf = pdf / count item_pdf2.append(pdf) # Transform now for further use: matrix[user] # self.r1 = self.r1.T # self.r2 = self.r2.T print "Calculate cost matrix....", strftime("%Y-%m-%d %H:%M:%S", gmtime()) # Calculate cost matrix for items # matrix[item r1][item r2] # Uses 5 threads to run this slowest part. partition = 5 matrix = [[] for i in range(partition)] threads = [] ll = np.split(np.array(range(N_ITEM)), partition) for index in range(partition): thread = Thread(target=self.threadFunc, args=(matrix[index], ll[index], item_pdf1, item_pdf2)) threads.append(thread) thread.start() for thread in threads: thread.join() matrix = np.array(np.concatenate(matrix, axis=0)) print "Matrix shape: ", matrix.shape print "Hungarian running maximum matching....", strftime( "%Y-%m-%d %H:%M:%S", gmtime()) match1to2, match2to1 = hungarian.lap(matrix) print "End of matching!", strftime("%Y-%m-%d %H:%M:%S", gmtime()) # Create item-matching version # trans[item in r2] trans = [] for item2 in range(N_ITEM): trans.append(source_result[match2to1[item2]]) trans = np.array(trans).T # Find most similar user pair print "Find most similar user pair..... Write file...", strftime( "%Y-%m-%d %H:%M:%S", gmtime()) self.writeTrans(trans) print "Done, enter cpp mode", strftime("%Y-%m-%d %H:%M:%S", gmtime())