def icm(): datareader = Datareader(mode='offline', only_load=True) evaluator = Evaluator(datareader) print('NLP...') stopwords = STOP_WORDS token_weights = np.array(TOKEN_WEIGHTS) test_playlists = datareader.get_test_pids() nlp = NLP(datareader=datareader, stopwords=[], mode='tracks') print('Getting ucm and icm...') icm = nlp.get_icm() icm = bm25_row(icm) print('Computing similarity...') start = time.time() # Compute similarity similarity = tversky_similarity(icm, shrink=200, alpha=0.1, beta=1) similarity = similarity.tocsr() print(time.time() - start) urm = datareader.get_urm() print('Computing eurm...') start = time.time() # Compute eurm eurm_nlp = dot_product(urm[test_playlists, :], similarity, k=500) eurm_nlp = eurm_nlp.tocsr() # sparse.save_npz(ROOT_DIR + '/data/eurm_nlp_weighted_offline.npz', eurm_nlp) evaluator.evaluate(eurm_to_recommendation_list(eurm_nlp), name='nlp_enriched')
def online(): datareader = Datareader(mode='online', only_load=True) print('NLP...') stopwords = STOP_WORDS token_weights = np.array(TOKEN_WEIGHTS) nlp = NLP(datareader, stopwords=[]) ucm = nlp.get_ucm() #ucm = bm25_row(ucm) #inplace_csr_column_scale(ucm, token_weights) urm = datareader.get_urm_shrinked()[0] print('Computing similarity...') start = time.time() # Compute similarity similarity = tversky_similarity(ucm, shrink=200, alpha=0.1, beta=1) similarity = similarity.tocsr() print(time.time() - start) print('Computing eurm...') start = time.time() # Compute eurm eurm_nlp = dot_product(similarity, urm, k=500) eurm_nlp = eurm_nlp.tocsr() print(eurm_nlp.shape) eurm_nlp = eurm_nlp[-10000:, :] sparse.save_npz(ROOT_DIR + '/data/eurm_nlp_no_stop_online.npz', eurm_nlp)
def fitnessFunction(self, individual): # Convert list into a numpy array individual = np.array(individual) # Make a copy of the UCM and filter it for each column if self.verbose: print('Filtering UCM...') start = time.time() UCM_filtered = self.UCM.copy() UCM_filtered = UCM_filtered.astype(np.float64) inplace_csr_column_scale(UCM_filtered, individual) if self.verbose: print('UCM filtered in', time.time() - start, 'sec') # Compute similarity if self.verbose: print('Computing similarity...') start = time.time() similarity = tversky_similarity(UCM_filtered, shrink=200, alpha=0.1, beta=1, target_items=self.test_playlists_indices, binary=False) similarity = similarity.tocsr() if self.verbose: print('Similarity computed in', time.time() - start, 'sec') # Compute eurm if self.verbose: print('Computing eurm...') start = time.time() eurm = dot_product(similarity, self.URM_train, k=500) if self.verbose: print('eurm computed in', time.time() - start, 'sec') print('Converting eurm in csr...') start = time.time() eurm = eurm.tocsr() eurm = eurm[self.test_playlists_indices, :] if self.verbose: print('eurm converted in', time.time() - start, 'sec') # Evaluate rec_list = eurm_to_recommendation_list(eurm) print('current', self.current) score_cat_1 = self.evaluator.evaluate_single_metric(rec_list, name='Genetic', metric='prec', level='track', cat=1, verbose=False) score_cat_2 = self.evaluator.evaluate_single_metric(rec_list, name='Genetic', metric='prec', level='track', cat=2, verbose=False) score = (score_cat_1 + score_cat_2) / 2 self.current += 1 if self.verbose: print(score) print("Numfeatures {}".format(np.sum(individual))) print('\n') return score,
def get_similarity_from_icm(self): self.get_icm() if self.verbose: print('Computing similarity from icm...') self.similarity_icm = tversky_similarity(self.icm, shrink=200, alpha=0.1, beta=1) self.similarity_icm = self.similarity_icm.tocsr() return self.similarity_icm
def prova(): dr = Datareader(mode='offline', only_load=True) print(dr.get_artist_to_tracks_dict()) exit() dr = Datareader(mode='offline', only_load=True, verbose=False) test_playlists = dr.get_test_pids() stopwords = STOP_WORDS token_weights = np.array(TOKEN_WEIGHTS) nlp = NLP(mode='playlists', datareader=dr, stopwords=STOP_WORDS) s = nlp.get_ucm() print(s.shape) evaluator = Evaluator(dr) ucm = nlp.get_ucm() sim = sparse.load_npz(ROOT_DIR + '/data/cf_user_similarity.npz') print('Computing dot...') ucm = dot_product(sim, ucm, k=200) print('NNZ', ucm.nnz) exit() urm = dr.get_urm() # ucm = ucm.astype(np.float64) # inplace_csr_column_scale(ucm, token_weights) print('Computing similarity...') start = time.time() # Compute similarity similarity = tversky_similarity(ucm, shrink=200, alpha=0.1, beta=1) similarity = similarity.tocsr() print(time.time() - start) print('Computing eurm...') start = time.time() # Compute eurm eurm_nlp = dot_product(similarity, urm, k=500) eurm_nlp = eurm_nlp.tocsr() eurm_nlp = eurm_nlp[test_playlists, :] #sparse.save_npz(ROOT_DIR + '/data/eurm_nlp_weighted_offline.npz', eurm_nlp) evaluator.evaluate(eurm_to_recommendation_list(eurm_nlp), name='nlp_enriched')
nlp = NLP2(dr, stopwords=[], norm=norm,work=work,split=split,date=date, skip_words=skip_words, porter=porter,porter2=porter2,lanca=lanca,lanca2=lanca2) # new_titles, occ_full, occ_single = nlp.fit( verbose=False, workout=True, normalize=True, date=True, lancaster=False, # porter=False, underscore=True, double_fit=False) ucm = nlp.get_UCM(data1=data1) urm = dr.get_urm() test_playlists = dr.get_test_pids() print('ucm', ucm.shape) print('Computing similarity...') start = time.time() # Compute similarity ucm= bm25_row(ucm) similarity = tversky_similarity(ucm, binary=False, shrink=1, alpha=0.1, beta=1) similarity = similarity.tocsr() print(time.time() - start) print('Computing eurm...') start = time.time() # Compute eurm eurm = dot_product(similarity, urm, k=500) eurm = eurm.tocsr() eurm = eurm[test_playlists, :] print('eurm', eurm.shape) print(time.time() - start) # Evaluating