def recommend(self, target_pids=None, eurm_k=750): #if target_pids is None it calculate the whole eurm self.eurm = ss.dot_product(self.s, self.urm, k=eurm_k, target_items=target_pids, verbose=self.verbose)
def recsys(shrink): t1 = 0.25 t2 = 0.65 c = 0.4 shrink = 50 k = 200 config = ('l=%.2f t1=%.2f t2=%.2f c=%.2f k=%d shrink=%d binary=False' % (l, t1, t2, c, k, shrink)) #print(config) sim = ss.s_plus_similarity(urm.T, urm, k=k, t1=t1, t2=t2, c=c, l=l, normalization=True, shrink=shrink, binary=False, verbose=True) #Computing ratings and remove seed eurm = ss.dot_product(t_urm, sim.T, k=750) del sim eurm = eurm_remove_seed(eurm, dr) #evaluation res = ev.evaluate(eurm_to_recommendation_list(eurm), 'ciao', verbose=False) del eurm return res[0:3], config
def recsys(alpha, beta): alpha = alpha beta = beta k = 200 shrink = 100 config = ('alpha=%.2f beta=%.2f k=%d shrink=%d binary=False' % (alpha, beta, k, shrink)) #print(config) sim = p3r3.p3alpha_rp3beta_similarity(p_iu, p_ui, pop, k=k, shrink=shrink, alpha=alpha, beta=beta, verbose=True, mode=1) #Computing ratings and remove seed eurm = ss.dot_product(t_urm, sim, k=750) del sim eurm = eurm_remove_seed(eurm, dr) #evaluation res = ev.evaluate(eurm_to_recommendation_list(eurm), 'ciao', verbose=False) del eurm return res[0:3], config
def compute_model(self, top_k=100, alpha=0.85, verbose=False, store_graph=False): self.alpha = alpha if verbose: print('[ Building Correlation Graph ]') start_time = time.time() pop = 1.0 / (self.urm.sum(axis=0).A1 + 1e-8) self.corr_graph = dot_product(self.urm.T, self.urm, verbose=verbose, k=top_k) self.corr_graph.eliminate_zeros() self.corr_graph = (self.corr_graph.multiply(pop.reshape(1, -1))).tocsr() self.corr_graph.data *= alpha if verbose: print("time: " + str(int(time.time() - start_time) / 60)) if store_graph: if verbose: print("[ Storing the correlation graph ]") sps.save_npz('corr_graph_top' + str(top_k), self.corr_graph)
def compute_rating(self, top_k=500, verbose=False, small=False): if small: self.urm = sps.csr_matrix(self.urm[self.pid]) self.model = sps.csr_matrix(self.model) if verbose: print("[ Compute ratings ]") start_time = time.time() # Normalize the original URM to get pop for each track norm_urm = normalize(self.urm, axis=0, norm='l1') # dft = self.urm.sum(axis=0).A1 # idft = np.log(self.urm.shape[0] / (dft + 1e-8)) # idft = np.power(idft, 0.5) # norm_urm = self.urm.multiply(idft.reshape(1,-1)).tocsr() # Computer the eURM self.eurm = dot_product(norm_urm, self.model, k=top_k) self.eurm = sps.csr_matrix(self.eurm) if verbose: print("time: " + str(int(time.time() - start_time) / 60)) return self.eurm
def compute_rating(self, top_k=750, verbose=False, small=False): if small: self.model = self.model[self.pid] if verbose: print('[ Computing ratings ]') start_time = time.time() self.eurm = dot_product(self.model, self.urm, verbose=True, k=top_k) if verbose: print("time: " + str(int(time.time() - start_time) / 60)) return self.eurm
def compute_rating(self, top_k=500, verbose=False, small=False, mode="offline", iter=1): """ :param urm: sparse matrix :param model: sparse matrix :param top_k: int, element to take for each row after fitting process :param verbose: boolean, if true print debug information :return: sparse matrix, estimated urm """ if small: self.small_urm = self.urm[self.pid] self.small_urm = sps.csr_matrix(self.small_urm) self.small_urm.eliminate_zeros() if verbose: print('[ Computing Ratings ]') start_time = time.time() # Compute first pass of ItemRank self.dui = norm_l1_row(self.small_urm) self.dui.data *= (1 - self.alpha) self.eurm = self.small_urm.copy() self.eurm.data = np.ones( self.small_urm.data.shape[0]) / self.corr_graph.shape[0] self.dui = self.dui.T self.eurm = self.eurm.T # Subsequent iterations of ItemRank for _ in range(iter): self.eurm = dot_product( self.corr_graph, self.eurm, verbose=verbose, k=top_k) + self.dui self.eurm = self.eurm.T self.eurm.eliminate_zeros() if verbose: print("time: " + str(int(time.time() - start_time) / 60)) return self.eurm.tocsr()
def recsys(shrink): config = ('alpha=0.4 k=200 shrink=%d binary=False' % (shrink)) print(config) sim = ss.cosine_similarity(urm.T, urm, k=200, alpha=0.4, shrink=shrink, binary=False, verbose=True) #Computing ratings and remove seed eurm = ss.dot_product(t_urm, sim.T, k=750) del sim eurm = eurm_remove_seed(eurm, dr) #evaluation res = ev.evaluate(eurm_to_recommendation_list(eurm), 'ciao', verbose=False) del eurm return res[0:3], config
def compute_rating(self, topk=750, verbose=False, small=False, mode="offline"): if small: self.small_urm = self.urm[self.pid] self.small_urm = sps.csr_matrix(self.small_urm) self.small_urm.eliminate_zeros() if verbose: print('[ Computing Ratings ]') start_time = time.time() self.eurm = dot_product(self.small_urm, self.model.T, verbose=verbose, k=topk) if verbose: print("time: " + str(int(time.time() - start_time) / 60)) return self.eurm.tocsr()
def recsys(shrink): alpha = 0.25 beta = 0.65 k = 200 config = ('alpha=%.2f beta=%.2f k=%d shrink=%d binary=False' % (alpha, beta, k, shrink)) #print(config) sim = ss.tversky_similarity(urm.T, urm, k=k, alpha=alpha, beta=beta, shrink=shrink, binary=False, verbose=True) #Computing ratings and remove seed eurm = ss.dot_product(t_urm, sim.T, k=750) del sim eurm = eurm_remove_seed(eurm, dr) #evaluation res = ev.evaluate(eurm_to_recommendation_list(eurm), 'ciao', verbose=False) del eurm return res[0:3], config
def compute_model(self, knn=100, verbose=False, power=1, save_model=False): if verbose: print("[ Creating model with item TF-IDF similarity ]") start_time = time.time() # Calculate DF[u] & IDF[u] urm_bin = sps.csr_matrix(self.urm) urm_bin.data = np.ones(len(self.urm.data)) dft = urm_bin.sum(axis=1).A1 idft = np.log(self.urm.shape[1] / (dft + 1e-8)) # dft = self.urm.sum(axis=1).A1 # idft = np.log(self.urm.shape[1] / (dft + 1e-8)) # Multiply each listened track with its respective idf URM_enhanced = self.urm.multiply(idft.reshape(-1, 1)).tocsr() # Get the user similarity matrix self.model = dot_product(URM_enhanced.T, self.urm, k=knn, verbose=verbose) self.model = self.model.tolil() self.model.setdiag(np.zeros(self.model.shape[0])) self.model = self.model.tocsr() self.model.eliminate_zeros() self.model.data = np.power(self.model.data, power) if save_model: if verbose: print('[ Saving the model ]') sps.save_npz('tf_idf_item_sim_' + str(knn), self.model) if verbose: print("time: " + str(int(time.time() - start_time) / 60)) return self.model
# INITIALIZATION dr = Datareader(mode='offline', verbose=False, only_load=True) ev = Evaluator(dr) test_pids = dr.get_test_pids() urm = dr.get_urm() topk = 750 nlp_strict = NLPStrict(dr) ucm_strict = nlp_strict.get_UCM() # TVERSKY for a in [0.8, 0.9, 1.0, 1.1, 1.2, 1.3, 1.4, 1.7, 2.0]: print('---------') print('TVERSKY | power =', a) sim = tversky_similarity(ucm_strict, ucm_strict.T, k=450, alpha=0.2, beta=0.5, shrink=150, target_items=test_pids) sim.data = np.power(sim.data, a) # Compute eurm eurm = dot_product(sim, urm, k=topk) eurm = eurm.tocsr() eurm = eurm[test_pids, :] rec_list = eurm_to_recommendation_list(eurm, datareader=dr) ev.evaluate(rec_list, name='nlp_strict_tversky_power=' + str(a))
# Do not train on challenge set ucm_strict_T = ucm_strict.copy() inplace_set_rows_zero(ucm_strict_T, test_pids) ucm_strict_T = ucm_strict_T.T sim = tversky_similarity(ucm_strict, ucm_strict_T, k=450, alpha=0.2, beta=0.5, shrink=150, target_items=test_pids) # Compute eurm eurm = dot_product(sim, urm, k=topk) eurm = eurm.tocsr() eurm = eurm[test_pids, :] # NLP TOKENS nlp = NLP(dr) ucm = nlp.get_UCM(data1=data1).astype(np.float64) # Do not train on challenge set ucm_T = ucm.copy() inplace_set_rows_zero(ucm_T, test_pids) ucm_T = ucm_T.T sim_lele = tversky_similarity(ucm, ucm_T,
######### MAURIZ nlp = NLP(dr) UCM = nlp.get_UCM() cfw = CFW_D_Similarity_Linalg(URM_train=dr.get_urm().T, ICM=UCM.copy(), S_matrix_target=sim_user, URM_validation=None) cfw.fit() weights = sps.diags(cfw.D_best) sps.save_npz("ucm_weights_maurizi", weights) UCM_weighted = dot_product(UCM, weights) sps.save_npz("ucm_fw_maurizio", UCM_weighted) ######## NOI urm = dr.get_urm() pid = dr.get_test_pids() similarity = tversky_similarity(UCM_weighted, UCM_weighted.T, binary=False, shrink=1, alpha=0.9, beta=1) similarity = similarity.tocsr()