def prova(): dr = Datareader(mode='offline', only_load=True) print(dr.get_artist_to_tracks_dict()) exit() dr = Datareader(mode='offline', only_load=True, verbose=False) test_playlists = dr.get_test_pids() stopwords = STOP_WORDS token_weights = np.array(TOKEN_WEIGHTS) nlp = NLP(mode='playlists', datareader=dr, stopwords=STOP_WORDS) s = nlp.get_ucm() print(s.shape) evaluator = Evaluator(dr) ucm = nlp.get_ucm() sim = sparse.load_npz(ROOT_DIR + '/data/cf_user_similarity.npz') print('Computing dot...') ucm = dot_product(sim, ucm, k=200) print('NNZ', ucm.nnz) exit() urm = dr.get_urm() # ucm = ucm.astype(np.float64) # inplace_csr_column_scale(ucm, token_weights) print('Computing similarity...') start = time.time() # Compute similarity similarity = tversky_similarity(ucm, shrink=200, alpha=0.1, beta=1) similarity = similarity.tocsr() print(time.time() - start) print('Computing eurm...') start = time.time() # Compute eurm eurm_nlp = dot_product(similarity, urm, k=500) eurm_nlp = eurm_nlp.tocsr() eurm_nlp = eurm_nlp[test_playlists, :] #sparse.save_npz(ROOT_DIR + '/data/eurm_nlp_weighted_offline.npz', eurm_nlp) evaluator.evaluate(eurm_to_recommendation_list(eurm_nlp), name='nlp_enriched')
def icm(): datareader = Datareader(mode='offline', only_load=True) evaluator = Evaluator(datareader) print('NLP...') stopwords = STOP_WORDS token_weights = np.array(TOKEN_WEIGHTS) test_playlists = datareader.get_test_pids() nlp = NLP(datareader=datareader, stopwords=[], mode='tracks') print('Getting ucm and icm...') icm = nlp.get_icm() icm = bm25_row(icm) print('Computing similarity...') start = time.time() # Compute similarity similarity = tversky_similarity(icm, shrink=200, alpha=0.1, beta=1) similarity = similarity.tocsr() print(time.time() - start) urm = datareader.get_urm() print('Computing eurm...') start = time.time() # Compute eurm eurm_nlp = dot_product(urm[test_playlists, :], similarity, k=500) eurm_nlp = eurm_nlp.tocsr() # sparse.save_npz(ROOT_DIR + '/data/eurm_nlp_weighted_offline.npz', eurm_nlp) evaluator.evaluate(eurm_to_recommendation_list(eurm_nlp), name='nlp_enriched')
def online(): datareader = Datareader(mode='online', only_load=True) print('NLP...') stopwords = STOP_WORDS token_weights = np.array(TOKEN_WEIGHTS) nlp = NLP(datareader, stopwords=[]) ucm = nlp.get_ucm() #ucm = bm25_row(ucm) #inplace_csr_column_scale(ucm, token_weights) urm = datareader.get_urm_shrinked()[0] print('Computing similarity...') start = time.time() # Compute similarity similarity = tversky_similarity(ucm, shrink=200, alpha=0.1, beta=1) similarity = similarity.tocsr() print(time.time() - start) print('Computing eurm...') start = time.time() # Compute eurm eurm_nlp = dot_product(similarity, urm, k=500) eurm_nlp = eurm_nlp.tocsr() print(eurm_nlp.shape) eurm_nlp = eurm_nlp[-10000:, :] sparse.save_npz(ROOT_DIR + '/data/eurm_nlp_no_stop_online.npz', eurm_nlp)
def evaluateRecommendationsSpotify(self): # print("Recommender: sparsity self.W_sparse:", self.W_sparse.nnz / self.W_sparse.shape[1] / self.W_sparse.shape[0]) user_profile_batch = self.URM_train[pids_converted] print("dot product") eurm = dot_product(user_profile_batch, self.W_sparse, k=750).tocsr() eurm = eurm_remove_seed(eurm) recommendation_list = np.zeros((10000, 500)) for row in range(eurm.shape[0]): val = eurm[row].data ind = val.argsort()[-500:][::-1] ind = eurm[row].indices[ind] recommendation_list[row, 0:len(ind)] = ind prec_t, ndcg_t, clicks_t, prec_a, ndcg_a, clicks_a = ev.evaluate( recommendation_list=recommendation_list, name=self.configuration + "_epoca" + str(self.currentEpoch), return_overall_mean=True, verbose=False, show_plot=False, do_plot=True) results_run = {} results_run["prec_t"] = prec_t results_run["ndcg_t"] = ndcg_t results_run["clicks_t"] = clicks_t results_run["prec_a"] = prec_a results_run["ndcg_a"] = ndcg_a results_run["clicks_a"] = clicks_a return (results_run)
def evaluateRecommendationsSpotify_RECOMMENDER(recommender): """ THIS FUNCTION WORKS INSIDE THE RECOMMENDER :param self: :return: """ user_profile_batch = recommender.URM_train[pids_converted] eurm = dot_product(user_profile_batch, recommender.W_sparse, k=500).tocsr() recommendation_list = np.zeros((10000, 500)) for row in tqdm(range(eurm.shape[0]), desc="spotify rec list"): val = eurm[row].data ind = val.argsort()[-500:][::-1] ind = eurm[row].indices[ind] recommendation_list[row, 0:len(ind)] = ind prec_t, ndcg_t, clicks_t, prec_a, ndcg_a, clicks_a = ev.evaluate( recommendation_list=recommendation_list, name=recommender.configuration + "epoca" + str(recommender.currentEpoch), return_overall_mean=True, verbose=False, show_plot=False, do_plot=True) results_run = {} results_run["prec_t"] = prec_t results_run["ndcg_t"] = ndcg_t results_run["clicks_t"] = clicks_t results_run["prec_a"] = prec_a results_run["ndcg_a"] = ndcg_a results_run["clicks_a"] = clicks_a return (results_run)
def fitnessFunction(self, individual): # Convert list into a numpy array individual = np.array(individual) # Make a copy of the UCM and filter it for each column if self.verbose: print('Filtering UCM...') start = time.time() UCM_filtered = self.UCM.copy() UCM_filtered = UCM_filtered.astype(np.float64) inplace_csr_column_scale(UCM_filtered, individual) if self.verbose: print('UCM filtered in', time.time() - start, 'sec') # Compute similarity if self.verbose: print('Computing similarity...') start = time.time() similarity = tversky_similarity(UCM_filtered, shrink=200, alpha=0.1, beta=1, target_items=self.test_playlists_indices, binary=False) similarity = similarity.tocsr() if self.verbose: print('Similarity computed in', time.time() - start, 'sec') # Compute eurm if self.verbose: print('Computing eurm...') start = time.time() eurm = dot_product(similarity, self.URM_train, k=500) if self.verbose: print('eurm computed in', time.time() - start, 'sec') print('Converting eurm in csr...') start = time.time() eurm = eurm.tocsr() eurm = eurm[self.test_playlists_indices, :] if self.verbose: print('eurm converted in', time.time() - start, 'sec') # Evaluate rec_list = eurm_to_recommendation_list(eurm) print('current', self.current) score_cat_1 = self.evaluator.evaluate_single_metric(rec_list, name='Genetic', metric='prec', level='track', cat=1, verbose=False) score_cat_2 = self.evaluator.evaluate_single_metric(rec_list, name='Genetic', metric='prec', level='track', cat=2, verbose=False) score = (score_cat_1 + score_cat_2) / 2 self.current += 1 if self.verbose: print(score) print("Numfeatures {}".format(np.sum(individual))) print('\n') return score,
def get_eurm_from_icm(self, urm, test_playlists): """ Compute the ICM, then the similarity and return the EURM sliced for test playlists. :param test_playlists: the pids of the test playlists :return: eurm: the estimated eurm of shape (10K, 2M) """ self.urm = urm self.get_similarity_from_icm() if self.verbose: print('Computing similarity from ucm...') if self.datareader.__online(): self.eurm = dot_product(self.urm[-10000, :], self.similarity_icm, k=500) else: self.eurm = dot_product(self.urm[test_playlists, :], self.similarity_icm, k=500) self.eurm = self.eurm.tocsr() return self.eurm
def evaluate_shrinked(W_sparse, urm_shrinked, pids_shrinked ): W_sparse = W_sparse[pids_shrinked] eurm = dot_product(W_sparse, urm_shrinked, k=750).tocsr() eurm = eurm_remove_seed(eurm=eurm) rec_list = eurm_to_recommendation_list(eurm) ev.evaluate(recommendation_list=rec_list, name="slim_structure_parametribase_BPR_epoca_0_noepoche", return_overall_mean=False, show_plot=False, do_plot=True)
def evaluate_shrinked(W_sparse, urm_shrinked, pids_shrinked): user_profile_batch = urm_shrinked[pids_shrinked] eurm = dot_product(user_profile_batch, W_sparse, k=500).tocsr() recommendation_list = np.zeros((10000, 500)) for row in tqdm(range(eurm.shape[0]), desc="spotify rec list shrinked"): val = eurm[row].data ind = val.argsort()[-500:][::-1] ind = eurm[row].indices[ind] recommendation_list[row, 0:len(ind)] = ind ev.evaluate(recommendation_list=recommendation_list, name="slim_structure_parametribase_BPR_epoca_0_noepoche", return_overall_mean=False, show_plot=False, do_plot=True)
def new(): datareader = Datareader(mode='offline', only_load=True) evaluator = Evaluator(datareader) print('NLP...') stopwords = STOP_WORDS token_weights = np.array(TOKEN_WEIGHTS) test_playlists = datareader.get_test_pids() nlp = NLP(datareader=datareader, stopwords=[], mode='both') print('Getting ucm and icm...') ucm = nlp.get_ucm() ucm = bm25_row(ucm) icm = nlp.get_icm() icm = bm25_row(icm) icm_T = icm.T #ucm = bm25_row(ucm) #urm = datareader.get_urm() print('Computing eurm...') start = time.time() eurm_nlp = dot_product(ucm[test_playlists, :], icm_T, k=500) print(time.time() - start) print('Converting to csr...') eurm_nlp = eurm_nlp.tocsr() print(eurm_nlp.shape) #eurm_nlp = eurm_nlp[test_playlists:, :] sparse.save_npz(ROOT_DIR + '/data/eurm_nlp_new_method_offline.npz', eurm_nlp) evaluator.evaluate(eurm_to_recommendation_list(eurm_nlp), name='nlp_new_method', show_plot=False)
test_playlists = dr.get_test_pids() print('ucm', ucm.shape) print('Computing similarity...') start = time.time() # Compute similarity ucm= bm25_row(ucm) similarity = tversky_similarity(ucm, binary=False, shrink=1, alpha=0.1, beta=1) similarity = similarity.tocsr() print(time.time() - start) print('Computing eurm...') start = time.time() # Compute eurm eurm = dot_product(similarity, urm, k=500) eurm = eurm.tocsr() eurm = eurm[test_playlists, :] print('eurm', eurm.shape) print(time.time() - start) # Evaluating rec_list = eurm_to_recommendation_list(eurm) sps.save_npz("nlp_eurm_online_bm25.npz", eurm, compressed=False) np.save("nlp_rec_list_online_bm25",rec_list) evaluator.evaluate(rec_list, name='AAANLP_bm25_'+nome, verbose=True, show_plot=False)
print('ucm...') ucm = sparse.csr_matrix((np.ones(len(playlists)), (playlists, artists)), shape=(1049361, len(dr.get_artists()))) ucm = ucm.tocsr() ucm = ucm[pids] print(ucm.shape) ucm = bm25_row(ucm) print('similarity..') sim = tversky_similarity(ucm, ucm.T, shrink=200, alpha=0.1, beta=1, k=800, verbose=1, binary=False) sim = sim.tocsr() test_pids = list(dr.get_test_pids()) eurm = dot_product(sim, urm, k=750) eurm = eurm.tocsr() eurm = eurm[test_pids, :] sparse.save_npz('eurm_artists.npz', eurm) #ev.evaluate(eurm_to_recommendation_list(eurm), name='cbf_user_artist', show_plot=False) exit()
lanca2=lanca2) ucm = nlp.get_UCM(data1=data1) urm = dr.get_urm() test_playlists = dr.get_test_pids() ucm = bm25_row(ucm) similarity = tversky_similarity(ucm, binary=False, shrink=1, alpha=0.1, beta=1) similarity = similarity.tocsr() #eurm eurm = dot_product(similarity, urm, k=topk) eurm = eurm.tocsr() eurm = eurm[test_playlists, :] rec_list = eurm_to_recommendation_list(eurm) sps.save_npz(mode + "_" + name + "_bm25.npz", eurm, compressed=False) np.save(mode + "_" + name + "_bm25", rec_list) #evaluate ev = Evaluator(dr) ev.evaluate(rec_list, name=name, verbose=True, show_plot=False) if mode == "online": nlp = NLP(dr,
lambda_j=lambda_j, learning_rate=learning_rate, topK=topk, sgd_mode='adam', gamma=0.999, beta_1=beta_1, beta_2=beta_2, stop_on_validation=True, lower_validatons_allowed=1, validation_metric="ndcg_t", validation_function=evaluate_for_online, validation_every_n=1) # calculating eurm, evaluation, save user_profile_batch = slim.URM_train[pids_converted] eurm = dot_product(user_profile_batch, slim.W_sparse, k=500).tocsr() recommendation_list = eurm_to_recommendation_list(eurm) sps.save_npz(ROOT_DIR + "/results/" + complete_name + ".npz", eurm, compressed=False) ev.evaluate(recommendation_list=recommendation_list, name=complete_name) elif mode == "online": ####### DATA INIZIALIZATION ONLINE ################# dummy_variable = 0 dr = Datareader(mode="online", only_load=True, verbose=False) pids = dr.get_test_pids() urm = dr.get_urm()
nlp_strict = NLPStrict(dr) # Get ucm ucm = nlp_strict.get_UCM() # Compute similarity (playlists x playlists) sim = tversky_similarity(ucm, ucm.T, shrink=200, alpha=0.1, beta=1, k=knn) sim = sim.tocsr() # Recommendation eurm = dot_product(sim, urm, k=topk) eurm = eurm.tocsr() eurm = eurm[test_pids, :] rec_list = eurm_to_recommendation_list(eurm, dr) if save_eurm: sps.save_npz(mode + "_" + name + ".npz", eurm, compressed=False) # Submission ev.evaluate(rec_list, name=name) elif mode == 'online': # Setup dr = Datareader(mode=mode, verbose=False, only_load=True) sb = Submitter(dr)