def offline(): # INIT dr = Datareader(mode='offline', only_load=True, verbose=False) ev = Evaluator(dr) # LOAD AND COMBINE eurm_lele = sparse.load_npz( ROOT_DIR + '/data/lele/ensembled_CLUSTERARTISTScat4-5-6-8-10_offline.npz') eurm_std = sparse.load_npz(ROOT_DIR + '/data/lele/ensembled_SUBCREATIVA_offline.npz') eurm_ens = combine_two_eurms(eurm_lele, eurm_std, cat_first=[4, 5, 6, 8, 10]) # LOAD # eurm_ens = sparse.load_npz(ROOT_DIR + '/data/ensembled_creativeFIRE_offline.npz') sim = sparse.load_npz(ROOT_DIR + '/data/sim_offline.npz') # TOPBOOST # topb = TopBoost(dr, eurm_ens, sim) # eurm_ens = topb.boost_eurm(categories=[9], top_k=100, gamma=0.01) # HOLEBOOST hb = HoleBoost(similarity=sim, eurm=eurm_ens, datareader=dr, norm=norm_l1_row) eurm_ens = hb.boost_eurm(categories=[8], k=300, gamma=1) hb = HoleBoost(similarity=sim, eurm=eurm_ens, datareader=dr, norm=norm_l1_row) eurm_ens = hb.boost_eurm(categories=[10], k=150, gamma=1) # TAILBOOST tb = TailBoost(similarity=sim, eurm=eurm_ens, datareader=dr, norm=norm_l2_row) eurm_ens = tb.boost_eurm(categories=[9, 7, 6, 5], last_tracks=[10, 3, 3, 3], k=[100, 80, 100, 100], gamma=[0.01, 0.01, 0.01, 0.01]) # ALBUMBOOST ab = AlbumBoost(dr, eurm_ens) eurm_ens = ab.boost_eurm(categories=[3, 4, 7, 9], gamma=2, top_k=[3, 3, 10, 40]) # MATCHBOOST # mb = MatchBoost(datareader=dr, eurm=eurm_ens, top_k_alb=5000, top_k_art=10000) # eurm_ens, pids = mb.boost_eurm(categories='all', k_art=300, k_alb=300, gamma_art=0.1, gamma_alb=0.1) # EVALUATION rec_list = eurm_to_recommendation_list(eurm_ens, datareader=dr) sparse.save_npz('FINAL.npz', eurm_ens) ev.evaluate(rec_list, name='LELE_boosts.csv')
def icm(): datareader = Datareader(mode='offline', only_load=True) evaluator = Evaluator(datareader) print('NLP...') stopwords = STOP_WORDS token_weights = np.array(TOKEN_WEIGHTS) test_playlists = datareader.get_test_pids() nlp = NLP(datareader=datareader, stopwords=[], mode='tracks') print('Getting ucm and icm...') icm = nlp.get_icm() icm = bm25_row(icm) print('Computing similarity...') start = time.time() # Compute similarity similarity = tversky_similarity(icm, shrink=200, alpha=0.1, beta=1) similarity = similarity.tocsr() print(time.time() - start) urm = datareader.get_urm() print('Computing eurm...') start = time.time() # Compute eurm eurm_nlp = dot_product(urm[test_playlists, :], similarity, k=500) eurm_nlp = eurm_nlp.tocsr() # sparse.save_npz(ROOT_DIR + '/data/eurm_nlp_weighted_offline.npz', eurm_nlp) evaluator.evaluate(eurm_to_recommendation_list(eurm_nlp), name='nlp_enriched')
def grid_holeboost(): datareader = Datareader(mode='offline', only_load=True, verbose=False) ev = Evaluator(datareader) # LOAD AND COMBINE eurm_lele = sparse.load_npz( ROOT_DIR + '/data/lele/ensembled_CLUSTERARTISTScat4-5-6-8-10_offline.npz') eurm_std = sparse.load_npz(ROOT_DIR + '/data/lele/ensembled_SUBCREATIVA_offline.npz') eurm_ens = combine_two_eurms(eurm_lele, eurm_std, cat_first=[4, 5, 6, 8, 10]) sim_offline = sparse.load_npz(ROOT_DIR + '/data/sim_offline.npz') for k in [50, 100, 150, 200, 250, 300, 350, 400]: for gamma in [1, 2, 5, 10]: h = HoleBoost(similarity=sim_offline, eurm=eurm_ens, datareader=datareader, norm=norm_l1_row) eurm_ens_boosted = h.boost_eurm(categories=[8, 10], k=k, gamma=gamma) rec_list = eurm_to_recommendation_list(eurm_ens_boosted, datareader=datareader) print( '--------------------------------------------------------------------------' ) print('K =', k) print('G =', gamma) ev.evaluate(rec_list, name='hb', save=False)
def prova(): dr = Datareader(mode='offline', only_load=True) print(dr.get_artist_to_tracks_dict()) exit() dr = Datareader(mode='offline', only_load=True, verbose=False) test_playlists = dr.get_test_pids() stopwords = STOP_WORDS token_weights = np.array(TOKEN_WEIGHTS) nlp = NLP(mode='playlists', datareader=dr, stopwords=STOP_WORDS) s = nlp.get_ucm() print(s.shape) evaluator = Evaluator(dr) ucm = nlp.get_ucm() sim = sparse.load_npz(ROOT_DIR + '/data/cf_user_similarity.npz') print('Computing dot...') ucm = dot_product(sim, ucm, k=200) print('NNZ', ucm.nnz) exit() urm = dr.get_urm() # ucm = ucm.astype(np.float64) # inplace_csr_column_scale(ucm, token_weights) print('Computing similarity...') start = time.time() # Compute similarity similarity = tversky_similarity(ucm, shrink=200, alpha=0.1, beta=1) similarity = similarity.tocsr() print(time.time() - start) print('Computing eurm...') start = time.time() # Compute eurm eurm_nlp = dot_product(similarity, urm, k=500) eurm_nlp = eurm_nlp.tocsr() eurm_nlp = eurm_nlp[test_playlists, :] #sparse.save_npz(ROOT_DIR + '/data/eurm_nlp_weighted_offline.npz', eurm_nlp) evaluator.evaluate(eurm_to_recommendation_list(eurm_nlp), name='nlp_enriched')
def main(args): sys.stdout = Logger(args.log_dir) train_loader, test_loader = get_data(args) model = BaseModel(args) evaluator = Evaluator(model=model, data_loader=test_loader) best_acc = evaluator.evaluate() accuracies = [best_acc] losses = [] for e in range(1, args.epochs + 1): epoch_loss = 0 print("Epoch", e) for data in tqdm(train_loader): model.set_input(data) model.optimize_parameters() epoch_loss += model.get_loss() print("Epoch finished with loss", epoch_loss) losses.append(epoch_loss) if e % args.eval_step == 0: acc = evaluator.evaluate() accuracies.append(acc) best_acc = max(acc, best_acc) print("[Epoch {}] Accuracy:{:.2f}, Best Accuracy:{:.2f}".format( e, acc, best_acc)) if e % args.save_step == 0: model.save_model(e) model.update_lr() plt.figure() plt.plot(range(len(losses)), losses) plt.xlabel('Epochs') plt.ylabel('Training Loss') plt.savefig(os.path.join(args.exp_dir, 'losses.png')) plt.figure() plt.plot(range(len(accuracies)), accuracies) plt.xlabel('Epochs') plt.ylabel('Test Accuracy') plt.savefig(os.path.join(args.exp_dir, 'accuracies.png'))
def grid_tailboost(): datareader = Datareader(mode='offline', only_load=True, verbose=False) ev = Evaluator(datareader) # LOAD AND COMBINE eurm_lele = sparse.load_npz( ROOT_DIR + '/data/lele/ensembled_CLUSTERARTISTScat4-5-6-8-10_offline.npz') eurm_std = sparse.load_npz(ROOT_DIR + '/data/lele/ensembled_SUBCREATIVA_offline.npz') eurm_ens = combine_two_eurms(eurm_lele, eurm_std, cat_first=[4, 5, 6, 8, 10]) sim = sparse.load_npz(ROOT_DIR + '/data/sim_offline.npz') # TAILBOOST for lt in [2, 3, 5, 6, 10]: for k in [20, 50, 80, 100, 150]: for g in [0.005, 0.01, 0.02, 0.05]: tb = TailBoost(similarity=sim, eurm=eurm_ens, datareader=datareader, norm=norm_l2_row) eurm_ens = tb.boost_eurm(categories=[9, 7, 6, 5], last_tracks=[lt, lt, lt, lt], k=[k, k, k, k], gamma=[g, g, g, g]) rec_list = eurm_to_recommendation_list(eurm_ens, datareader=datareader) print( '--------------------------------------------------------------------------' ) print('LT =', lt) print('K =', k) print('G =', g) ev.evaluate(rec_list, name='tb', save=False)
def new(): datareader = Datareader(mode='offline', only_load=True) evaluator = Evaluator(datareader) print('NLP...') stopwords = STOP_WORDS token_weights = np.array(TOKEN_WEIGHTS) test_playlists = datareader.get_test_pids() nlp = NLP(datareader=datareader, stopwords=[], mode='both') print('Getting ucm and icm...') ucm = nlp.get_ucm() ucm = bm25_row(ucm) icm = nlp.get_icm() icm = bm25_row(icm) icm_T = icm.T #ucm = bm25_row(ucm) #urm = datareader.get_urm() print('Computing eurm...') start = time.time() eurm_nlp = dot_product(ucm[test_playlists, :], icm_T, k=500) print(time.time() - start) print('Converting to csr...') eurm_nlp = eurm_nlp.tocsr() print(eurm_nlp.shape) #eurm_nlp = eurm_nlp[test_playlists:, :] sparse.save_npz(ROOT_DIR + '/data/eurm_nlp_new_method_offline.npz', eurm_nlp) evaluator.evaluate(eurm_to_recommendation_list(eurm_nlp), name='nlp_new_method', show_plot=False)
class CF_AL_BM25: def __init__(self, urm, ucm, binary=False, verbose=True, mode='offline', datareader=None, verbose_evaluation=True, bm25=False, similarity='tversky'): assert (mode in ('offline', 'online')) if binary: ucm.data = np.ones(ucm.data.shape[0]) self.urm = urm self.binary = binary self.verbose = verbose self.verbose_ev = verbose_evaluation self.dr = datareader self.mode = mode self.similarity = similarity self.bm25 = bm25 ucm_aux = ucm.copy() ut.inplace_set_rows_zero(X=ucm_aux, target_rows=self.dr.get_test_pids() ) #don't learn from challange set ucm_aux.eliminate_zeros() if self.bm25: self.m_ui = bm25_row(ucm.copy()).tocsr() else: self.m_ui = ucm.copy().tocsr() if self.bm25: self.m_iu = bm25_col(ucm_aux.T.copy()).tocsr() else: self.m_iu = ucm_aux.T.copy().tocsr() if mode == 'offline': self.ev = Evaluator(self.dr) def model(self, alpha=1, beta=1, k=200, shrink=0, power=1, threshold=0, target_items=None): if target_items is None: target_items = self.dr.get_test_pids() # work with s*urm self.alpha, self.beta = alpha, beta self.k = k self.power = power self.shrink, self.threshold = shrink, threshold if self.similarity == 'tversky': self.s = ss.tversky_similarity(self.m_ui, self.m_iu, k=k, shrink=shrink, alpha=alpha, beta=beta, threshold=threshold, verbose=self.verbose, target_items=target_items) elif self.similarity == 'dot': self.s = ss.dot_product_similarity(self.m_ui, self.m_iu, k=k, shrink=shrink, threshold=threshold, verbose=self.verbose, target_items=target_items) else: print('ERROR, similarity not implemented') if power != 1: self.s.data = np.power(self.s.data, power) def recommend(self, target_pids=None, eurm_k=750): #if target_pids is None it calculate the whole eurm self.eurm = ss.dot_product(self.s, self.urm, k=eurm_k, target_items=target_pids, verbose=self.verbose) # TODO: here we can try some postprocessing on eurm if complete (like normalize for column) #### METHODS FOR OFFLINE MODE #### def fast_recommend(self, target_pids=None, eurm_k=750): assert (self.mode == 'offline') if target_pids is None: target_pids = self.dr.get_test_pids() self.recommend(target_pids=target_pids, eurm_k=eurm_k) def fast_evaluate_eurm(self, target_pids=None): assert (self.mode == 'offline') res = self.ev.fast_evaluate_eurm(self.eurm, target_pids=target_pids, verbose=self.verbose_ev) return res def evaluate_eurm(self, target_pids): assert (self.mode == 'offline') eurm = sps.csr_matrix(self.eurm[target_pids]) eurm = post.eurm_remove_seed(eurm, self.dr) rec_list = post.eurm_to_recommendation_list(eurm) res = self.ev.evaluate(rec_list, str(self), verbose=self.verbose_ev, return_result='all') return res #### UTILITY METHODS #### def clear_similarity(self): del self.s def clear_eurm(self): del self.eurm def save_similarity(self, name_file, compressed=False): sps.save_npz(name_file, self.s, compressed) def save_small_eurm(self, name_file, target_pids, compressed=True): eurm = sps.csr_matrix(self.eurm[target_pids]) sps.save_npz(name_file, eurm, compressed) #### OVERRIDE METHODS #### def __str__(self): name = ( 'CF_AL_BM25: alpha=%.3f, beta=%.3f, k=%d, shrink=%d, power=%.3f, threshold=%.5f, binary=%s, bm25=%s' % (self.alpha, self.beta, self.k, self.shrink, self.power, self.threshold, str(self.binary), str(self.bm25))) return name #### TUNING METHODS #### def tune_alpha_beta(self, range_alpha=np.arange(0, 1.1, 0.1), range_beta=np.arange(0, 1.1, 0.1), k=200, shrink=0, threshold=0, power=1, verbose_tune=True, filename='tuning_bm25_alpha_beta', overwrite=False, save_mean=True, save_full=True): tp = TunePrint(filename=filename, full=save_full, mean=save_mean, overwrite=overwrite) for alpha in range_alpha: for beta in range_beta: self.model(alpha=alpha, beta=beta, k=k, shrink=shrink, power=power, threshold=threshold) self.fast_recommend() self.clear_similarity() mean, df_all_values = self.fast_evaluate_eurm() self.clear_eurm() s_mean = 'P = %1.4f, NDCG = %1.4f, CLICK = %1.4f' % ( mean[0], mean[1], mean[2]) if verbose_tune: print(str(self) + '\n' + s_mean) if save_mean: tp.print_mean_values(str(self), mean) if save_full: tp.print_full_values(description=str(self), dict_val={ 'alpha': alpha, 'beta': beta }, dataframe=df_all_values) #use this tuning method only with beta=0 def tune_alpha(self, range_alpha=np.arange(0.0, 2, 0.1), beta=0, power=1, k=100, shrink=0, threshold=0, verbose_tune=True, filename='tuning_bm25_k', overwrite=False, save_mean=True, save_full=True): tp = TunePrint(filename=filename, full=save_full, mean=save_mean, overwrite=overwrite) for alpha in range_alpha: self.model(alpha=alpha, beta=beta, k=k, shrink=shrink, power=power, threshold=threshold) self.fast_recommend() self.clear_similarity() mean, df_all_values = self.fast_evaluate_eurm() self.clear_eurm() s_mean = 'P = %1.4f, NDCG = %1.4f, CLICK = %1.4f' % ( mean[0], mean[1], mean[2]) if verbose_tune: print(str(self) + '\n' + s_mean) # save values if save_mean: tp.print_mean_values(str(self), mean) if save_full: tp.print_full_values(description=str(self), dict_val={'beta': k}, dataframe=df_all_values) tp.make_pdf_full() def tune_power(self, range_power=np.arange(0.5, 1.5, 0.1), k=100, shrink=0, threshold=0, verbose_tune=False, alpha=1, beta=1, filename='tuning_bm25_alpha', overwrite=False, save_mean=True, save_full=True): tp = TunePrint(filename=filename, full=save_full, mean=save_mean, overwrite=overwrite) self.model( alpha=alpha, beta=beta, k=k, shrink=shrink, power=1, threshold=threshold) #exploit this trick to generate fastest model save_data = self.s.data for power in range_power: self.s.data = save_data self.s.data = np.power(self.s.data, power) self.power = power self.fast_recommend() mean, df_all_values = self.fast_evaluate_eurm() self.clear_eurm() s_mean = 'P = %1.4f, NDCG = %1.4f, CLICK = %1.4f' % ( mean[0], mean[1], mean[2]) if verbose_tune: print(str(self) + '\n' + s_mean) if save_mean: tp.print_mean_values(str(self), mean) if save_full: tp.print_full_values(description=str(self), dict_val={'power': power}, dataframe=df_all_values) tp.make_pdf_full() def tune_beta(self, range_beta=np.arange(0.0, 2, 0.1), alpha=1, power=1, k=100, shrink=0, threshold=0, verbose_tune=True, filename='tuning_bm25_k', overwrite=False, save_mean=True, save_full=True): tp = TunePrint(filename=filename, full=save_full, mean=save_mean, overwrite=overwrite) for beta in range_beta: self.model(alpha=alpha, beta=beta, k=k, shrink=shrink, power=power, threshold=threshold) self.fast_recommend() self.clear_similarity() mean, df_all_values = self.fast_evaluate_eurm() self.clear_eurm() s_mean = 'P = %1.4f, NDCG = %1.4f, CLICK = %1.4f' % ( mean[0], mean[1], mean[2]) if verbose_tune: print(str(self) + '\n' + s_mean) # save values if save_mean: tp.print_mean_values(str(self), mean) if save_full: tp.print_full_values(description=str(self), dict_val={'beta': k}, dataframe=df_all_values) tp.make_pdf_full() def tune_k(self, range_k=np.arange(25, 300, 25), alpha=1, beta=0, power=1, shrink=0, threshold=0, verbose_tune=True, filename='tuning_bm25_k', overwrite=False, save_mean=True, save_full=True): tp = TunePrint(filename=filename, full=save_full, mean=save_mean, overwrite=overwrite) for k in range_k: self.model(alpha=alpha, beta=beta, k=k, shrink=shrink, power=power, threshold=threshold) self.fast_recommend() self.clear_similarity() mean, df_all_values = self.fast_evaluate_eurm() self.clear_eurm() s_mean = 'P = %1.4f, NDCG = %1.4f, CLICK = %1.4f' % ( mean[0], mean[1], mean[2]) if verbose_tune: print(str(self) + '\n' + s_mean) if save_mean: tp.print_mean_values(str(self), mean) if save_full: tp.print_full_values(description=str(self), dict_val={'k': k}, dataframe=df_all_values) tp.make_pdf_full() def tune_shrink(self, range_shrink=np.arange(25, 300, 25), alpha=1, beta=0, power=1, k=200, threshold=0, verbose_tune=True, filename='tuning_bm25_shrink', overwrite=False, save_mean=True, save_full=True): tp = TunePrint(filename=filename, full=save_full, mean=save_mean, overwrite=overwrite) for shrink in range_shrink: self.model(alpha=alpha, beta=beta, k=k, shrink=shrink, power=power, threshold=threshold) self.fast_recommend() self.clear_similarity() mean, df_all_values = self.fast_evaluate_eurm() self.clear_eurm() s_mean = 'P = %1.4f, NDCG = %1.4f, CLICK = %1.4f' % ( mean[0], mean[1], mean[2]) if verbose_tune: print(str(self) + '\n' + s_mean) if save_mean: tp.print_mean_values(str(self), mean) if save_full: tp.print_full_values(description=str(self), dict_val={'shrink': shrink}, dataframe=df_all_values) tp.make_pdf_full()
URM_validation=None) cfw.fit() weights = sps.diags(cfw.D_best) sps.save_npz("ICM_fw_maurizio", weights) ICM_weighted = ICM.dot(weights) sps.save_npz("ICM_fw_maurizio", ICM_weighted) ######## NOI urm = dr.get_urm() pid = dr.get_test_pids() cbfi = Knn_content_item() cbfi.fit(urm, ICM_weighted, pid) cbfi.compute_model(top_k=knn, sm_type=COSINE, shrink=0, binary=False, verbose=True) cbfi.compute_rating(top_k=topk, verbose=True, small=True) sps.save_npz(complete_name + ".npz", cbfi.eurm) ev = Evaluator(dr) ev.evaluate(recommendation_list=eurm_to_recommendation_list(cbfi.eurm), name=complete_name)
import time name = "ensemble-" + mode + "-data-" + time.strftime( "%x") + "-" + time.strftime("%X") name = name.replace("/", "_") sps.save_npz("results/" + name + ".npz", res) print("[ Initizalizing Datereader ]") dr = Datareader(verbose=False, mode=mode, only_load="False") res = eurm_to_recommendation_list(res, datareader=dr) if mode == "offline": print("[ Initizalizing Evaluator ]") ev = Evaluator(dr) ev.evaluate(res, name="ens") if mode == "online": print("[ Initizalizing Submitter ]") sb = Submitter(dr) sb.submit(recommendation_list=res, name=name, track="main", verify=True, gzipped=False) # # # # if type == "splitted": # mode = "offline"
result_dict[song] = fs.freq for song_predicted in result_dict: pred[i,song_predicted] = result_dict[song_predicted] eurm = eurm_remove_seed(pred , dr ) rec_list = eurm_to_recommendation_list(eurm) ev.evaluate(rec_list, "cat2_top",verbose=True, do_plot=True, show_plot=True, save=True, ) # seuences: [15565, 6186, 6288, 6292, 6294, 6295, 6298, 6310, 6334, 6336, 6337, 6339, 6340, 6362, 6380, 6387, 7597, 7603, 7604, 7605, 7606, 7607, 6173, 6077, 6040, 6027, 74, 76, 77, 81, 282, 768, 2163, 2506, 2507, 2508, 7609, 3084, 3166, 3183, 3282, 3283, 3697, 4211, 4420, 4443, 4493, 6019, 3162, 73, 8408, 8460, 15544, 15545, 15546, 15547, 15548, 15549, 15550, 15551, 15552, 15553, 15554, 15555, 15556, 15557, 15558, 15559, 15560, 15561, 15562, 15563, 15564, 15543, 15503, 15152, 14809, 8484, 8940, 10480, 10527, 10820, 11192, 11200, 11482, 11500, 11512, 8409, 12605, 12710, 12714, 12716, 12728, 12794, 13689, 13692, 14467, 14797, 14801, 12610, 51] # seuences: [11500] # # # # [[11500], [12714]], 62 # [[11500], [70]], 62 # [[11500], [64]], 70 # [[11500], [14809]], 71 # [[11500], [13893]], 72 # [[11500], [69]], 81 # [[11500], [69], [68]], 46
import numpy as np import sys datareader = Datareader(mode='offline', only_load=True, verbose=False) evaluator = Evaluator(datareader) urm = datareader.get_urm() ucm_album = datareader.get_ucm_albums() albums_pop = ucm_album.sum(axis=0).A1 mask = np.argsort(albums_pop)[::-1][:100] ut.inplace_set_cols_zero(ucm_album, mask) ucm_album = bm25_row(ucm_album) print('Similarity..') sim = tversky_similarity(ucm_album, ucm_album.T, shrink=200, alpha=0.1, beta=1, k=800, verbose=1, binary=False) sim = sim.tocsr() test_pids = list(datareader.get_test_pids()) eurm = dot_product(sim, urm, k=750) eurm = eurm.tocsr() eurm = eurm[test_pids, :] sparse.save_npz('eurm_albums_depop_100_offline.npz', eurm) eurm = eurm_remove_seed(eurm, datareader) evaluator.evaluate(eurm_to_recommendation_list(eurm), name='cbuser_album_depop_100', show_plot=False)
for token in tokens: playlists_with_tokens.extend( ucm_csc.indices[ucm_csc.indptr[token]:ucm_csc.indptr[token + 1]]) urm_tmp = urm_csr[playlists_with_tokens] track_total_interactions = np.array(urm_tmp.sum(axis=0)).astype( np.int32)[0, :] # like ravel top_pop = track_total_interactions.argsort()[-750:][::-1] rec_list[i] = top_pop i += 1 np.save("nlp_toketoppop_rec_list_offline", rec_list) eurm = rec_list_to_eurm(rec_list=rec_list) eurm = eurm_remove_seed(eurm, dr) rec_list = eurm_to_recommendation_list(eurm) ev.evaluate( rec_list, "WEILA2_toktoktop_pop", verbose=True, do_plot=True, show_plot=True, save=True, )
# TopPop Album album = artists_dic[track_ind] playlists = ucm_album.indices[ucm_album.indptr[album]:ucm_album. indptr[album + 1]] top = urm[playlists].sum(axis=0).A1.astype(np.int32) track_ind_rec = top.argsort()[-501:][::-1] eurm2[row, track_ind_rec] = top[track_ind_rec] eurm1 = eurm1.tocsr()[pids_all] eurm2 = eurm2.tocsr()[pids_all] eurm1 = eurm_remove_seed(eurm1, dr) eurm2 = eurm_remove_seed(eurm2, dr) sps.save_npz("test1.npz", eurm1) rec_list1 = eurm_to_recommendation_list(eurm1) rec_list2 = eurm_to_recommendation_list(eurm2) rec_list3 = append_rec_list(rec_list1 + rec_list2) ev = Evaluator(dr) ev.evaluate(rec_list1, name="enstest", level='track') ev.evaluate(rec_list2, name="enstest", level='track') ev.evaluate(rec_list3, name="enstest", level='track') # rec.append(list(top_p))
# INITIALIZATION dr = Datareader(mode='offline', verbose=False, only_load=True) ev = Evaluator(dr) test_pids = dr.get_test_pids() urm = dr.get_urm() topk = 750 nlp_strict = NLPStrict(dr) ucm_strict = nlp_strict.get_UCM() # TVERSKY for a in [0.8, 0.9, 1.0, 1.1, 1.2, 1.3, 1.4, 1.7, 2.0]: print('---------') print('TVERSKY | power =', a) sim = tversky_similarity(ucm_strict, ucm_strict.T, k=450, alpha=0.2, beta=0.5, shrink=150, target_items=test_pids) sim.data = np.power(sim.data, a) # Compute eurm eurm = dot_product(sim, urm, k=topk) eurm = eurm.tocsr() eurm = eurm[test_pids, :] rec_list = eurm_to_recommendation_list(eurm, datareader=dr) ev.evaluate(rec_list, name='nlp_strict_tversky_power=' + str(a))
start = time.time() # Compute similarity similarity = tversky_similarity(ucm, binary=False, shrink=1, alpha=0.1, beta=1) similarity = similarity.tocsr() print(time.time() - start) print('Computing eurm...') start = time.time() # Compute eurm eurm = dot_product(similarity, urm, k=500) eurm = eurm.tocsr() eurm = eurm[test_playlists, :] print('eurm', eurm.shape) print(time.time() - start) # Evaluating rec_list = eurm_to_recommendation_list(eurm) (prec_t, ndcg_t, clicks_t, prec_a, ndcg_a, clicks_a) = evaluator.evaluate(rec_list, return_overall_mean=True, name='AAANLP_' + nome, verbose=True, show_plot=False) # gc.collect() # del eurm, rec_list, similarity, nlp, test_playlists, start, nome # gc.collect() np.save("ret", [clicks_t])
#Computing similarity/model rec.compute_model(top_k=knn, sm_type=sm.COSINE, shrink=200, alpha=0.1, beta=1, binary=True, verbose=True) #Computing ratings rec.compute_rating(top_k=topk, verbose=True, small=True) #evaluation and saving sps.save_npz(complete_name + ".npz", rec.eurm) ev.evaluate(recommendation_list=eurm_to_recommendation_list(rec.eurm), name=name) if mode == "online": """Submission""" #Data initialization dr = Datareader(verbose=True, mode=mode, only_load=False) #Recommender algorithm initialization rec = Knn_collaborative_item() #Submitter initialization sb = Submitter(dr) #Getting for the recommender algorithm urm = dr.get_urm() pid = dr.get_test_pids()
sequences = load_obj(path=ROOT_DIR+'/data/cat1/', name='sequences_cat1_'+str(i)) popularity = len(sequences) preds_line = np.zeros(2262292) for seq in fpgrowth(sequences,supp= -popularity/costante_di_popolarita, target='m'): for song in seq[0]: preds_line[song]+= seq[1]*(len(seq[0])-1)*(len(seq[0])-1) vals = fast_argpart(preds_line) pred_lil[i,vals] = preds_line[vals] eurm = sps.csr_matrix(pred_lil) eurm = eurm_remove_seed(eurm , dr ) rec_list = eurm_to_recommendation_list(eurm) ev.evaluate(rec_list, "cat2_spm_max",verbose=True, do_plot=True, show_plot=True, save=True ) exit() # # parallel association rule. import gc target = 'm' costante_di_pop = 15 # In[9]:
knn = 500 topk = 750 # LOAD EURM eurm = sparse.load_npz( ROOT_DIR + '/data/ensemble_per_cat_offline_new_data_32_maggio.npz') rec_list = eurm_to_recommendation_list(eurm, datareader=dr) # SIMILARITIES # ucm_album = dr.get_ucm_albums() # sim_album = tversky_similarity(ucm_album.T, ucm_album, shrink=200, # alpha=0.1, beta=1, k=knn, verbose=1, binary=False) # sim_album = sim_album.tocsr() sim_album = sparse.load_npz(ROOT_DIR + '/data/sim_album.npz') # ucm_artist = dr.get_ucm_artists() # sim_artist = tversky_similarity(ucm_artist.T, ucm_artist, shrink=200, # alpha=0.1, beta=1, k=knn, verbose=1, binary=False) # sim_artist = sim_artist.tocsr() sim_artist = sparse.load_npz(ROOT_DIR + '/data/sim_artist.npz') # TWOBOOST rec_list_new = two_boost(rec_list, dr, sim_al=sim_album, sim_ar=sim_artist, prob=[0.85, 0.1, 0.05]) # EVALUATION ev.evaluate(rec_list_new, name='toptwo')
class CB_AR_BM25: def __init__(self, icm, urm, binary=False, verbose=True, mode='offline', datareader=None, verbose_evaluation=True): assert (mode in ('offline', 'online')) if binary: urm.data = np.ones(urm.data.shape[0]) self.urm = urm self.m_ic = pre.bm25_col(icm.copy()).tocsr() self.m_ci = pre.bm25_col(icm.T.copy()).tocsr() self.binary = binary self.verbose = verbose self.verbose_ev = verbose_evaluation self.dr = datareader self.mode = mode if mode == 'offline': self.ev = Evaluator(self.dr) def model(self, alpha=1, k=200, shrink=0, threshold=0, target_items=None): #if target_items is None it calculate the whole similarity self.alpha = alpha self.k = k self.shrink, self.threshold = shrink, threshold self.s = ss.p3alpha_similarity(self.m_ic, self.m_ci, k=k, shrink=shrink, alpha=alpha, threshold=threshold, verbose=self.verbose, target_items=target_items) def recommend(self, target_pids=None, eurm_k=750): #if target_pids is None it calculate the whole eurm self.eurm = ss.dot_product(self.urm, self.s.T, k=eurm_k, target_items=target_pids, verbose=self.verbose) ##or s.T???? # TODO: here we can try some postprocessing on eurm if complete (like normalize for column) #### METHODS FOR OFFLINE MODE #### def fast_recommend(self, target_pids=None, eurm_k=750): assert (self.mode == 'offline') if target_pids is None: target_pids = self.dr.get_test_pids() self.recommend(target_pids=target_pids, eurm_k=eurm_k) def fast_evaluate_eurm(self, target_pids=None): assert (self.mode == 'offline') res = self.ev.fast_evaluate_eurm(self.eurm, target_pids=target_pids, verbose=self.verbose_ev) return res def evaluate_eurm(self, target_pids): assert (self.mode == 'offline') eurm = sps.csr_matrix(self.eurm[target_pids]) eurm = post.eurm_remove_seed(eurm, self.dr) rec_list = post.eurm_to_recommendation_list(eurm) res = self.ev.evaluate(rec_list, str(self), verbose=self.verbose_ev, return_result='all') return res #### UTILITY METHODS #### def clear_similarity(self): del self.s def clear_eurm(self): del self.eurm def save_similarity(self, name_file, compressed=False): sps.save_npz(name_file, self.s, compressed) def save_small_eurm(self, name_file, target_pids, compressed=True): eurm = sps.csr_matrix(self.eurm[target_pids]) sps.save_npz(name_file, eurm, compressed) #### OVERRIDE METHODS #### def __str__(self): name = ( 'CB_AR_BM25: alpha=%.3f, k=%d, shrink=%d, threshold=%.5f, binary=%s' % (self.alpha, self.k, self.shrink, self.threshold, str( self.binary))) return name #### TUNING METHODS #### def tune_alpha(self, range_alpha=np.arange(0.5, 1.5, 0.1), k=100, shrink=0, threshold=0, verbose_tune=False, filename='tuning_bm25_alpha', overwrite=False, save_mean=True, save_full=True): tp = TunePrint(filename=filename, full=save_full, mean=save_mean, overwrite=overwrite) self.model( alpha=1, k=k, shrink=shrink, threshold=threshold) #exploit this trick to generate fastest model save_data = self.s.data for alpha in range_alpha: self.s.data = save_data self.s.data = np.power(self.s.data, alpha) self.alpha = alpha self.fast_recommend() mean, df_all_values = self.fast_evaluate_eurm() self.clear_eurm() s_mean = 'P = %1.4f, NDCG = %1.4f, CLICK = %1.4f' % ( mean[0], mean[1], mean[2]) if verbose_tune: print(str(self) + '\n' + s_mean) if save_mean: tp.print_mean_values(str(self), mean) if save_full: tp.print_full_values(description=str(self), dict_val={'alpha': alpha}, dataframe=df_all_values) tp.make_pdf_full() def tune_k(self, range_k=np.arange(25, 300, 25), alpha=1, shrink=0, threshold=0, verbose_tune=False, filename='tuning_bm25_k', overwrite=False, save_mean=True, save_full=True): tp = TunePrint(filename=filename, full=save_full, mean=save_mean, overwrite=overwrite, verbose=verbose_tune) for k in range_k: self.model(alpha=alpha, k=k, shrink=shrink, threshold=threshold) self.fast_recommend() self.clear_similarity() mean, df_all_values = self.fast_evaluate_eurm() self.clear_eurm() s_mean = 'P = %1.4f, NDCG = %1.4f, CLICK = %1.4f' % ( mean[0], mean[1], mean[2]) if verbose_tune: print(str(self) + '\n' + s_mean) # save values if save_mean: tp.print_mean_values(str(self), mean) if save_full: tp.print_full_values(description=str(self), dict_val={'k': k}, dataframe=df_all_values) tp.make_pdf_full() def tune_shrink(self, range_shrink=np.arange(25, 300, 25), k=200, alpha=1, threshold=0, verbose_tune=False, filename='tuning_bm25_shrink', overwrite=False, save_mean=True, save_full=True): tp = TunePrint(filename=filename, full=save_full, mean=save_mean, overwrite=overwrite) for shrink in range_shrink: self.model(alpha=alpha, k=k, shrink=shrink, threshold=threshold) self.fast_recommend() self.clear_similarity() mean, df_all_values = self.fast_evaluate_eurm() self.clear_eurm() s_mean = 'P = %1.4f, NDCG = %1.4f, CLICK = %1.4f' % ( mean[0], mean[1], mean[2]) if verbose_tune: print(str(self) + '\n' + s_mean) # save values if save_mean: tp.print_mean_values(str(self), mean) if save_full: tp.print_full_values(description=str(self), dict_val={'shrink': shrink}, dataframe=df_all_values) tp.make_pdf_full()
import utils.pre_processing as pre from boosts.hole_boost import HoleBoost from utils.datareader import Datareader from utils.definitions import ROOT_DIR from utils.evaluator import Evaluator from utils.post_processing import eurm_to_recommendation_list # Initialization dr = Datareader(mode='offline', only_load=True) ev = Evaluator(dr) # Load matrices eurm = sparse.load_npz(ROOT_DIR + '/data/eurm_rp3_offline.npz') sim = sparse.load_npz(ROOT_DIR + '/data/sim_offline.npz') print('Loaded') # Normalization eurm = pre.norm_l2_row(eurm) sim = pre.norm_l2_row(sim) # HoleBoost h = HoleBoost(sim, eurm, dr) eurm_b = h.boost_eurm(categories=[2, 3, 4, 5, 6, 7, 8, 9, 10], k=200, gamma=10) #sparse.save_npz(ROOT_DIR + '/data/eurm_boosted_online.npz', eurm_b) rec_list = eurm_to_recommendation_list(eurm_b) # Evaluation ev.evaluate(rec_list, name='rp3_l2_all_200_10', save=True, show_plot=False)
n_factors = 100 top_k = 750 mode = 'online' if mode == 'offline': # Initialization dr = Datareader(mode='offline', only_load=True, verbose=False) ev = Evaluator(dr) # Prediction eurm = compute_SVD(dr, n_factors, top_k, save_eurm=True) # Evaluation print('N_FACTORS =', n_factors) ev.evaluate(eurm_to_recommendation_list(eurm, datareader=dr), name='svd_' + str(n_factors)) elif mode == 'online': # Initialization dr = Datareader(mode='online', only_load=True, verbose=False) sb = Submitter(dr) # Prediction eurm = compute_SVD(dr, n_factors, top_k, save_eurm=True) # Submission sb.submit(eurm_to_recommendation_list_submission(eurm, datareader=dr), name='svd_' + str(n_factors)) else: print('Wrong mode!')
class CF_IB_BM25: def __init__(self, urm, pop=None, binary=False, verbose=True, mode='offline', datareader=None, verbose_evaluation=True): assert (mode in ('offline', 'online')) if binary: urm.data = np.ones(urm.data.shape[0]) if pop is None: self.pop = urm.sum(axis=0).A1 else: self.pop = pop self.urm = urm self.m_ui = pre.bm25_row(urm.copy()).tocsr() self.m_iu = pre.bm25_row(urm.T.copy()).tocsr() self.binary = binary self.verbose = verbose self.verbose_ev = verbose_evaluation self.dr = datareader self.mode = mode if mode == 'offline': self.ev = Evaluator(self.dr) def model(self, alpha=1, beta=0, k=200, shrink=0, threshold=0, rp3_mode=0, target_items=None): #if target_items is None it calculate the whole similarity self.alpha, self.beta = alpha, beta self.k = k self.shrink, self.threshold = shrink, threshold self.rp3_mode = rp3_mode self.s = p3r3.p3alpha_rp3beta_similarity(self.m_iu, self.m_ui, self.pop, k=k, shrink=shrink, alpha=alpha, beta=beta, threshold=threshold, verbose=self.verbose, mode=rp3_mode, target_items=target_items) def recommend(self, target_pids=None, eurm_k=750): #if target_pids is None it calculate the whole eurm self.eurm = ss.dot_product(self.urm, self.s, k=eurm_k, target_items=target_pids, verbose=self.verbose) # TODO: here we can try some postprocessing on eurm if complete (like normalize for column) #### METHODS FOR OFFLINE MODE #### def fast_recommend(self, target_pids=None, eurm_k=750): assert (self.mode == 'offline') if target_pids is None: target_pids = self.dr.get_test_pids() self.recommend(target_pids=target_pids, eurm_k=eurm_k) def fast_evaluate_eurm(self, target_pids=None): assert (self.mode == 'offline') res = self.ev.fast_evaluate_eurm(self.eurm, target_pids=target_pids, verbose=self.verbose_ev) return res def evaluate_eurm(self, target_pids): assert (self.mode == 'offline') eurm = sps.csr_matrix(self.eurm[target_pids]) eurm = post.eurm_remove_seed(eurm, self.dr) rec_list = post.eurm_to_recommendation_list(eurm) res = self.ev.evaluate(rec_list, str(self), verbose=self.verbose_ev, return_result='all') return res #### UTILITY METHODS #### def clear_similarity(self): del self.s def clear_eurm(self): del self.eurm def save_similarity(self, name_file, compressed=False): sps.save_npz(name_file, self.s, compressed) def save_small_eurm(self, name_file, target_pids, compressed=True): eurm = sps.csr_matrix(self.eurm[target_pids]) sps.save_npz(name_file, eurm, compressed) #### OVERRIDE METHODS #### def __str__(self): name = ( 'CF_IB_BM25: alpha=%.3f, beta=%.3f, k=%d, shrink=%d, threshold=%.5f, binary=%s, rp3mode=%d' % (self.alpha, self.beta, self.k, self.shrink, self.threshold, str(self.binary), self.rp3_mode)) return name #### TUNING METHODS #### def tune_alpha_beta(self, range_alpha=np.arange(0, 1.1, 0.1), range_beta=np.arange(0, 1.1, 0.1), k=200, shrink=0, threshold=0, verbose_tune=True, filename='tuning_bm25_alpha_beta', overwrite=False, save_mean=True, save_full=True): tp = TunePrint(filename=filename, full=save_full, mean=save_mean, overwrite=overwrite) for alpha in range_alpha: for beta in range_beta: self.model(alpha=alpha, beta=beta, k=k, shrink=shrink, threshold=threshold) self.fast_recommend() self.clear_similarity() mean, df_all_values = self.fast_evaluate_eurm() self.clear_eurm() s_mean = 'P = %1.4f, NDCG = %1.4f, CLICK = %1.4f' % ( mean[0], mean[1], mean[2]) if verbose_tune: print(str(self) + '\n' + s_mean) # save values if save_mean: tp.print_mean_values(str(self), mean) if save_full: tp.print_full_values(str(self), df_all_values) def tune_k(self, range_k=np.arange(25, 300, 25), alpha=1, beta=0, shrink=0, threshold=0, verbose_tune=True, filename='tuning_bm25_k', overwrite=False, save_mean=True, save_full=True): tp = TunePrint(filename=filename, full=save_full, mean=save_mean, overwrite=overwrite) for k in range_k: self.model(alpha=alpha, beta=beta, k=k, shrink=shrink, threshold=threshold) self.fast_recommend() self.clear_similarity() mean, df_all_values = self.fast_evaluate_eurm() self.clear_eurm() s_mean = 'P = %1.4f, NDCG = %1.4f, CLICK = %1.4f' % ( mean[0], mean[1], mean[2]) if verbose_tune: print(str(self) + '\n' + s_mean) # save values if save_mean: tp.print_mean_values(str(self), mean) if save_full: tp.print_full_values(str(self), df_all_values) def tune_shrink(self, range_shrink=np.arange(25, 300, 25), alpha=1, beta=0, k=200, threshold=0, verbose_tune=True, filename='tuning_bm25_shrink', overwrite=False, save_mean=True, save_full=True): tp = TunePrint(filename=filename, full=save_full, mean=save_mean, overwrite=overwrite) for shrink in range_shrink: self.model(alpha=alpha, beta=beta, k=k, shrink=shrink, threshold=threshold) self.fast_recommend() self.clear_similarity() mean, df_all_values = self.fast_evaluate_eurm() self.clear_eurm() s_mean = 'P = %1.4f, NDCG = %1.4f, CLICK = %1.4f' % ( mean[0], mean[1], mean[2]) if verbose_tune: print(str(self) + '\n' + s_mean) # save values if save_mean: tp.print_mean_values(str(self), mean) if save_full: tp.print_full_values(str(self), df_all_values)
print(arg) best = list(arg[1:].astype(np.float)) w.append(best) for i in tqdm(range(1,11)): if mode == "offline": CBF_ALBUM = sps.load_npz(mode+"/offline-cbf_item_album-cat"+str(i)+".npz") CBF_ARTISTA = sps.load_npz(mode+"/offline-cbf_item_artist-cat"+str(i)+".npz") NLP = norm_max_row(sps.load_npz(mode + "/nlp_eurm_offline_bm25-cat" + str(1) + ".npz")) RP3BETA = sps.load_npz(mode+"/offline-rp3beta-cat"+str(i)+".npz") CF_USER = sps.load_npz(mode + "/cfu_eurm-cat"+str(i)+".npz") SLIM = sps.load_npz(mode +"/slim_bpr_completo_test1-cat"+str(i)+".npz") CBF_USER_ARTIST = sps.load_npz(mode +"/eurm_cbfu_artists_offline-cat"+str(i)+".npz") matrix = [CBF_ALBUM, CBF_ARTISTA, NLP, RP3BETA, CF_USER, SLIM, CBF_USER_ARTIST] we = w[i-1] res.append(ensembler(matrix, we, normalization_type="lele")) ret = sps.vstack(res).tocsr() if mode == "offline": ev.evaluate(eurm_to_recommendation_list(ret), "best_test", verbose=True) # sps.save_npz("ensemble_per_cat_"+mode+"_new_data_28_maggio.npz", ret) if mode == "online": sb = Submitter(dr) sb.submit(recommendation_list=eurm_to_recommendation_list_submission(ret), name="best_test", track="main", verify=True, gzipped=False)
verbose=1, binary=False) sim = sim.tocsr() # Prediction eurm = dot_product(sim, urm, k=topk) eurm = eurm.tocsr() eurm = eurm[test_pids, :] # Save eurm if save_eurm: sps.save_npz('eurm_' + name + '_' + mode + '.npz', eurm) # Evaluation ev.evaluate(recommendation_list=eurm_to_recommendation_list( eurm, datareader=dr), name=complete_name) elif mode == "online": # Initialization dr = Datareader(verbose=False, mode=mode, only_load=True) test_pids = list(dr.get_test_pids()) sb = Submitter(dr) urm = dr.get_urm() # UCM ucm_artists = dr.get_ucm_albums() ucm_artists = bm25_row(ucm_artists) # Do not train on challenge set ucm_artists_T = ucm_artists.copy()
# UCMs ucm_album = dr.get_ucm_albums(remove_duplicates=True) #ucm = dr.get_ucm_followers(n_clusters) #ucm = sparse.hstack((ucm_album, ucm_followers)) #ucm = bm25_row(ucm) # Similarity print('Similarity..') sim = tversky_similarity(ucm_album, ucm_album.T, shrink=200, target_items=test_pids, alpha=0.1, beta=1, k=knn, verbose=1, binary=False) sim = sim.tocsr() # Prediction eurm = dot_product(sim, urm, k=topk) eurm = eurm.tocsr() eurm = eurm[test_pids, :] # Evaluation ev.evaluate(recommendation_list=eurm_to_recommendation_list(eurm, datareader=dr), name='ucm_album_followers')
#Computing similarity/model rec.compute_model(top_k=knn, sm_type=tversky_similarity, shrink=200, alpha=0.1, beta=1, binary=True, verbose=True) #Computing ratings rec.compute_rating(top_k=topk, verbose=True, small=True) #evaluation and saving sps.save_npz(complete_name + ".npz", rec.eurm) ev = Evaluator(dr) ev.evaluate(eurm_to_recommendation_list(rec.eurm), name=complete_name) elif mode == "online": """Submission""" #Data initialization dr = Datareader(verbose=True, mode=mode, only_load=False) #Recommender algorithm initialization rec = Knn_collabrative_user() #Getting for the recommender algorithm urm = dr.get_urm() pid = dr.get_test_pids() #Fitting data rec.fit(urm, pid)
# Compute similarity ucm= bm25_row(ucm) similarity = tversky_similarity(ucm, binary=False, shrink=1, alpha=0.1, beta=1) similarity = similarity.tocsr() print(time.time() - start) print('Computing eurm...') start = time.time() # Compute eurm eurm = dot_product(similarity, urm, k=500) eurm = eurm.tocsr() eurm = eurm[test_playlists, :] print('eurm', eurm.shape) print(time.time() - start) # Evaluating rec_list = eurm_to_recommendation_list(eurm) sps.save_npz("nlp_eurm_online_bm25.npz", eurm, compressed=False) np.save("nlp_rec_list_online_bm25",rec_list) evaluator.evaluate(rec_list, name='AAANLP_bm25_'+nome, verbose=True, show_plot=False)
import sys from scipy import sparse import numpy as np import utils.pre_processing as pre from utils.definitions import * from utils.datareader import Datareader from utils.evaluator import Evaluator from utils.pre_processing import * from utils.post_processing import * dr = Datareader(mode='offline', only_load=True, verbose=False) ev = Evaluator(dr) urm = dr.get_urm(binary=True) urm_csc = urm.tocsc(copy=True) sim_nlp = sparse.load_npz(ROOT_DIR + '/data/sim_nlp_lele.npz') for k in [1, 2, 3, 4, 5]: eurm_top = dr.get_eurm_top_pop_filter_cat_1(sim_nlp, k, topk=500) eurm_top = norm_l1_row(eurm_top) eurm_nlp = sparse.load_npz(ROOT_DIR + '/data/nlp_fusion_tuned_offline.npz') eurm_nlp = norm_l1_row(eurm_nlp) for a in [0.05, 0.10, 0.15, 0.20]: eurm = eurm_nlp * (1.0 - a) + eurm_top * a rec_list = eurm_to_recommendation_list(eurm, datareader=dr) ev.evaluate(rec_list, name='pop_first_k=' + str(k) + '_a=' + str(a))
# rp3b = sps.load_npz(ROOT_DIR + "/data/sub/EURM-rp3beta-online.npz") # knn_c_i_al = sps.load_npz(ROOT_DIR + "/data/sub/KNN CONTENT ITEM-album-top_k=850-sm_type=cosine-shrink=100.npz") # knn_c_i_ar = sps.load_npz(ROOT_DIR + "/data/sub/KNN CONTENT ITEM-artist-top_k=850-sm_type=cosine-shrink=100.npz") nlp = sps.load_npz(ROOT_DIR + "/data/eurm_nlp_offline.npz") # cf_u = sps.load_npz(ROOT_DIR + "/data/sub/eurm_cfu_online.npz") eurm_ens = sps.load_npz(ROOT_DIR + "/data/ENSEMBLED.npz") #matrix = [rp3b, knn_c_i_ar, knn_c_i_al, nlp, cf_u] #eurm_ens = ensembler(matrix, [0.720, 0.113, 0.177, 0.194, 1.0], normalization_type="max") # HOLEBOOST hb = HoleBoost(similarity=sim, eurm=eurm_ens, datareader=dr, norm=norm_l1_row) eurm_ens = hb.boost_eurm(categories=[8, 10], k=300, gamma=5) # NINEBOOST nb = TailBoost(similarity=sim, eurm=eurm_ens, datareader=dr, norm=norm_l2_row) eurm_ens = nb.boost_eurm(last_tracks=10, k=100, gamma=0.01) rec_list = eurm_to_recommendation_list(eurm_ens) rec_list_nlp = eurm_to_recommendation_list(nlp) indices = dr.get_test_pids_indices(cat=1) for i in indices: rec_list[i] = rec_list_nlp[i] # EVALUATION ev.evaluate(rec_list, name='ens_with_cfu_nineboosted', show_plot=False)