def online(): # INIT dr = Datareader(mode='online', only_load=True, verbose=False) sb = Submitter(dr) # LOAD AND COMBINE eurm_lele = sparse.load_npz( ROOT_DIR + '/data/jess/ensembled_CLUSTERARTISTS_CREATIVA_cat3-4-5-8-10_online.npz' ) eurm_std = sparse.load_npz(ROOT_DIR + '/data/jess/ensembled_creativeFIRE_online.npz') eurm_ens = combine_two_eurms(eurm_lele, eurm_std, cat_first=[3, 4, 5, 8, 10]) # LOAD MATRICES # eurm_ens = sparse.load_npz(ROOT_DIR + '/data/ensembled_creativeFIRE_online.npz') sim = sparse.load_npz(ROOT_DIR + '/data/sim_online.npz') # HOLEBOOST hb = HoleBoost(similarity=sim, eurm=eurm_ens, datareader=dr, norm=norm_l1_row) eurm_ens = hb.boost_eurm(categories=[8], k=300, gamma=1) hb = HoleBoost(similarity=sim, eurm=eurm_ens, datareader=dr, norm=norm_l1_row) eurm_ens = hb.boost_eurm(categories=[10], k=150, gamma=1) # TAILBOOST tb = TailBoost(similarity=sim, eurm=eurm_ens, datareader=dr, norm=norm_l2_row) eurm_ens = tb.boost_eurm(categories=[9, 7, 6, 5], last_tracks=[10, 3, 3, 3], k=[100, 80, 100, 100], gamma=[0.01, 0.01, 0.01, 0.01]) # ALBUMBOOST ab = AlbumBoost(dr, eurm_ens) eurm_ens = ab.boost_eurm(categories=[3, 4, 7, 9], gamma=2, top_k=[3, 3, 10, 40]) # MATCHBOOST # mb = MatchBoost(datareader=dr, eurm=eurm_ens, top_k_alb=5000, top_k_art=10000) # eurm_ens, pids = mb.boost_eurm(categories='all', k_art=20, k_alb=20, gamma_art=1.0, gamma_alb=1.0) # SUBMISSION rec_list = eurm_to_recommendation_list_submission(eurm_ens, datareader=dr) sb.submit(rec_list, name='ens_30_june_jess+lele_boosts', track='creative')
def main(argv): sbatch_cfg = { # Account name 'account': 'rrg-whitem', # Job name 'job-name': 'catcher', # Job time 'time': '0-10:00:00', # GPU/CPU type '--cpus-per-task': 1, # Memory 'mem-per-cpu': '2000M', # Email address 'mail-user': '******' } # sbatch configs backup for different games # sbatch_cfg['job-name'], sbatch_cfg['time'], sbatch_cfg['mem-per-cpu'] = 'catcher', '0-10:00:00', '2000M' # sbatch_cfg['job-name'], sbatch_cfg['time'], sbatch_cfg['mem-per-cpu'] = 'copter', '0-05:00:00', '2000M' # sbatch_cfg['job-name'], sbatch_cfg['time'], sbatch_cfg['mem-per-cpu'] = 'lunar', '0-07:00:00', '2000M' # sbatch_cfg['job-name'], sbatch_cfg['time'], sbatch_cfg['mem-per-cpu'] = 'minatar', '1-08:00:00', '4000M' general_cfg = { # User name 'user': '******', # Sbatch script path 'script-path': './sbatch.sh', # Check time interval in minutes 'check-time-interval': 5, # Clusters info: {name: capacity} 'clusters': { 'Cedar': 3000 }, # Job indexes list 'job-list': list(range(1, 30 + 1)) } make_dir(f"output/{sbatch_cfg['job-name']}") submitter = Submitter(general_cfg, sbatch_cfg) submitter.submit()
def submission(boost, eurm_ens, sim, name): """ Function to create a submission from a eurm with or without boosts. :param boost: apply boosts :param eurm_ens: eurm from ensemble (10k x 2.2M) :param sim: similarity matrix (tracks x tracks) :param name: name of the submission """ # INIT dr = Datareader(mode='online', only_load=True, verbose=False) sb = Submitter(dr) if boost: # HOLEBOOST hb = HoleBoost(similarity=sim, eurm=eurm_ens, datareader=dr, norm=norm_l1_row) eurm_ens = hb.boost_eurm(categories=[8, 10], k=300, gamma=5) # TAILBOOST tb = TailBoost(similarity=sim, eurm=eurm_ens, datareader=dr, norm=norm_l2_row) eurm_ens = tb.boost_eurm(categories=[9, 7, 6, 5], last_tracks=[10, 3, 3, 3], k=[100, 80, 100, 100], gamma=[0.01, 0.01, 0.01, 0.01]) # ALBUMBOOST ab = AlbumBoost(dr, eurm_ens) eurm_ens = ab.boost_eurm(categories=[3, 4, 7, 9], gamma=2, top_k=[3, 3, 10, 40]) # SUBMISSION rec_list = eurm_to_recommendation_list_submission(eurm_ens, datareader=dr) sb.submit(rec_list, name=name)
urm = dr.get_urm() pid = dr.get_test_pids() #Fitting data rec.fit(urm, pid) #Computing similarity/model rec.compute_model(top_k=knn, sm_type=sm.TVERSKY, shrink=200, alpha=0.1, beta=1, binary=True, verbose=True) #Computing ratings rec.compute_rating(top_k=topk, verbose=True, small=True) #submission and saving sps.save_npz(complete_name + ".npz", rec.eurm) sb = Submitter(dr) sb.submit(recommendation_list=eurm_to_recommendation_list_submission( rec.eurm), name=complete_name, track="main", verify=True, gzipped=True) else: print("invalid mode.")
name = name.replace("/", "_") sps.save_npz("results/" + name + ".npz", res) print("[ Initizalizing Datereader ]") dr = Datareader(verbose=False, mode=mode, only_load="False") res = eurm_to_recommendation_list(res, datareader=dr) if mode == "offline": print("[ Initizalizing Evaluator ]") ev = Evaluator(dr) ev.evaluate(res, name="ens") if mode == "online": print("[ Initizalizing Submitter ]") sb = Submitter(dr) sb.submit(recommendation_list=res, name=name, track="main", verify=True, gzipped=False) # # # # if type == "splitted": # mode = "offline" # # print("[ Loading weights ]") # w_rprec = [] # tmp = 0
print(arg) best = list(arg[1:].astype(np.float)) w.append(best) for i in tqdm(range(1,11)): if mode == "offline": CBF_ALBUM = sps.load_npz(mode+"/offline-cbf_item_album-cat"+str(i)+".npz") CBF_ARTISTA = sps.load_npz(mode+"/offline-cbf_item_artist-cat"+str(i)+".npz") NLP = norm_max_row(sps.load_npz(mode + "/nlp_eurm_offline_bm25-cat" + str(1) + ".npz")) RP3BETA = sps.load_npz(mode+"/offline-rp3beta-cat"+str(i)+".npz") CF_USER = sps.load_npz(mode + "/cfu_eurm-cat"+str(i)+".npz") SLIM = sps.load_npz(mode +"/slim_bpr_completo_test1-cat"+str(i)+".npz") CBF_USER_ARTIST = sps.load_npz(mode +"/eurm_cbfu_artists_offline-cat"+str(i)+".npz") matrix = [CBF_ALBUM, CBF_ARTISTA, NLP, RP3BETA, CF_USER, SLIM, CBF_USER_ARTIST] we = w[i-1] res.append(ensembler(matrix, we, normalization_type="lele")) ret = sps.vstack(res).tocsr() if mode == "offline": ev.evaluate(eurm_to_recommendation_list(ret), "best_test", verbose=True) # sps.save_npz("ensemble_per_cat_"+mode+"_new_data_28_maggio.npz", ret) if mode == "online": sb = Submitter(dr) sb.submit(recommendation_list=eurm_to_recommendation_list_submission(ret), name="best_test", track="main", verify=True, gzipped=False)
eurm = eurm[test_pids, :] # Save eurm if save_eurm: sps.save_npz('eurm_' + name + '_' + mode + '.npz', eurm) # Evaluation ev.evaluate(recommendation_list=eurm_to_recommendation_list( eurm, datareader=dr), name=complete_name) elif mode == "online": # Initialization dr = Datareader(verbose=False, mode=mode, only_load=True) test_pids = list(dr.get_test_pids()) sb = Submitter(dr) urm = dr.get_urm() # UCM ucm_artists = dr.get_ucm_albums() ucm_artists = bm25_row(ucm_artists) # Do not train on challenge set ucm_artists_T = ucm_artists.copy() inplace_set_rows_zero(ucm_artists_T, test_pids) ucm_artists_T = ucm_artists_T.T # Similarity print('Similarity..') sim = tversky_similarity(ucm_artists, ucm_artists_T,
from scipy import sparse as sps import utils.pre_processing as pre from boosts.hole_boost import HoleBoost from boosts.match_boost import MatchBoost from boosts.tail_boost import TailBoost from boosts.album_boost import AlbumBoost from boosts.top_boost import TopBoost from utils.post_processing import * from utils.pre_processing import * from utils.submitter import Submitter from utils.ensembler import * from boosts.generate_similarity import generate_similarity # INIT dr = Datareader(mode='online', only_load=True, verbose=False) sb = Submitter(dr) ####### LOAD MATRICES AFTER BAYESIAN OPTIMIZATION ##################################### cluster1 = sps.load_npz( ROOT_DIR + '/final_npz_creative/ensembled_creativeFIRE_ar1_online.npz') cluster2 = sps.load_npz( ROOT_DIR + '/final_npz_creative/ensembled_creativeFIRE_ar2_online.npz') cluster3 = sps.load_npz( ROOT_DIR + '/final_npz_creative/ensembled_creativeFIRE_ar3_online.npz') cluster4 = sps.load_npz( ROOT_DIR + '/final_npz_creative/ensembled_creativeFIRE_ar4_online.npz') clustered_approach_online = cluster1 + cluster2 + cluster3 + cluster4 ensembled1 = sps.load_npz(
from scipy import sparse as sps import utils.pre_processing as pre from boosts.hole_boost import HoleBoost from boosts.match_boost import MatchBoost from boosts.tail_boost import TailBoost from boosts.album_boost import AlbumBoost from boosts.top_boost import TopBoost from utils.post_processing import * from utils.pre_processing import * from utils.submitter import Submitter from utils.ensembler import * from boosts.generate_similarity import generate_similarity # INIT dr = Datareader(mode='online', only_load=True, verbose=False) sb = Submitter(dr) ####### LOAD MATRICES AFTER BAYESIAN OPTIMIZATION ##################################### cluster1 = sps.load_npz(ROOT_DIR + '/final_npz_main/ensembled_ar1_online.npz') cluster2 = sps.load_npz(ROOT_DIR + '/final_npz_main/ensembled_ar2_online.npz') cluster3 = sps.load_npz(ROOT_DIR + '/final_npz_main/ensembled_ar3_online.npz') cluster4 = sps.load_npz(ROOT_DIR + '/fin©al_npz_main/ensembled_ar4_online.npz') clustered_approach_online = cluster1 + cluster2 + cluster3 + cluster4 ensembled1 = sps.load_npz(ROOT_DIR + '/final_npz_main/ensembled_MAIN_online_half1.npz') ensembled2 = sps.load_npz(ROOT_DIR + '/final_npz_main/ensembled_MAIN_online_half2.npz')
mode = 'online' if mode == 'offline': # Initialization dr = Datareader(mode='offline', only_load=True, verbose=False) ev = Evaluator(dr) # Prediction eurm = compute_SVD(dr, n_factors, top_k, save_eurm=True) # Evaluation print('N_FACTORS =', n_factors) ev.evaluate(eurm_to_recommendation_list(eurm, datareader=dr), name='svd_' + str(n_factors)) elif mode == 'online': # Initialization dr = Datareader(mode='online', only_load=True, verbose=False) sb = Submitter(dr) # Prediction eurm = compute_SVD(dr, n_factors, top_k, save_eurm=True) # Submission sb.submit(eurm_to_recommendation_list_submission(eurm, datareader=dr), name='svd_' + str(n_factors)) else: print('Wrong mode!')
from personal.Tommaso.Recommenders.top_pop_rec import TopPopRecommender from utils.datareader import Datareader from utils.submitter import Submitter """ This script shows how to perform correctly a submission. Basically you have to initialize a Submitter object with csv files and then call the method submit which takes in input a numpy array of recommendations of shape (10.000, 500). """ # SUBMITTER dr = Datareader(mode='online', only_load=True) sb = Submitter(dr) # TOP POP t = TopPopRecommender() t.fit(dr.get_df_train_interactions(), dr.get_df_test_interactions()) rec_list = t.make_recommendation(dr.get_df_test_playlists()['pid'].as_matrix()) # SUBMISSION # rec_list is an ordered list of recommendations # This submission will be rejected due to duplicates occurrences. sb.submit(recommendation_list=rec_list, name='top_pop', track='main', verify=True, gzipped=False)
eurm = dot_product(sim, urm, k=topk) eurm = eurm.tocsr() eurm = eurm[test_pids, :] rec_list = eurm_to_recommendation_list(eurm, dr) if save_eurm: sps.save_npz(mode + "_" + name + ".npz", eurm, compressed=False) # Submission ev = Evaluator(dr) ev.evaluate(rec_list, name=name) elif mode == 'online': # Setup sb = Submitter(dr) urm = dr.get_urm() test_pids = dr.get_test_pids() # Init object nlp_strict = NLP(dr) # Get ucm ucm = nlp_strict.get_UCM() print(ucm.shape) # Do not train on challenge set ucm_T = ucm.copy() inplace_set_rows_zero(ucm_T, test_pids).astype(np.float64) ucm_T = ucm_T.T
remove_seed=True, datareader=dr), 'rp3', verbose=True) if mode == "online": ### Submission ### #Data initialization dr = Datareader(verbose=False, mode=mode, only_load=True) #Recommender algorithm initialization rec = R_p_3_beta() #Submitter initialization sb = Submitter(dr) #Getting data ready for the recommender algorithm urm = dr.get_urm() pids = dr.get_test_pids() urm.data = np.ones(len(urm.data)) ut.inplace_set_rows_zero( X=urm, target_rows=pids) #don't learn from challange set urm.eliminate_zeros() p_ui = normalize(urm, norm="l1") p_iu = normalize(urm.T, norm="l1") top = urm.sum(axis=0).A1 # Fitting data
urm = urm[pids] ucm = bm25_row(ucm) similarity = tversky_similarity(ucm, binary=False, shrink=1, alpha=0.1, beta=1) similarity = similarity.tocsr() print(similarity.shape, urm.shape) eurm = dot_product(similarity, urm, k=topk) eurm = eurm.tocsr() eurm = eurm[-10000:] eurm = eurm_remove_seed(eurm, dr) rec_list = eurm_to_recommendation_list(eurm) sps.save_npz(mode + "_" + name + "_knn" + str(knn) + "_bm25.npz", eurm, compressed=False) np.save(mode + "_" + name + "_knn" + str(knn) + "_bm25", rec_list) sb = Submitter(dr) sb.submit(rec_list, name=name, track="main", verify=True, gzipped=False)
norm = best_params_dict['norm'] del best_params_dict['norm'] # cutting and dot the value from ensemble eurms_full = [ value_from_bayesian * norms[norm](matrices_loaded[name][start_index:end_index]) for name, value_from_bayesian in best_params_dict.items()] # and summing up eurms_cutted[cat-1] = sum( [ matrix for matrix in eurms_full] ) # adding to reclist rec_list[start_index:end_index] = eurm_to_recommendation_list(eurm=eurms_cutted[cat-1], cat=cat, verbose=False)[start_index:end_index] eurm = eurms_cutted[0] for i in range(1,10): eurm = sps.vstack([eurm, eurms_cutted[i]]) sps.save_npz(file='../'+configuration_name+'/ensembled_'+configuration_name+'_'+mode, matrix=eurm) if mode=='offline': ev = Evaluator(dr) ev.evaluate(recommendation_list=rec_list, name=configuration_name) else: sb = Submitter(dr) sb.submit(recommendation_list=rec_list, name=configuration_name)