import traceback, os import numpy as np import hdbscan as clust from sklearn.cluster import KMeans if __name__ == '__main__': evaluate_algorithm = True slim_after_hybrid = False # delete_previous_intermediate_computations() filename = "hybrid_UserContentMatrix" dataReader = RS_Data_Loader(top10k=True, all_train=not evaluate_algorithm) URM_train = dataReader.get_URM_train() URM_validation = dataReader.get_URM_validation() URM_test = dataReader.get_URM_test() from Base.Evaluation.Evaluator import SequentialEvaluator evaluator = SequentialEvaluator(URM_test, URM_train, exclude_seen=True) with open( os.path.join("IntermediateComputations", "Clusterization_Kmeans_3.pkl"), 'rb') as handle: clusters = pickle.load(handle) cl_0_ind = [] cl_1_ind = [] cl_2_ind = []
def run(): evaluate_algorithm = True delete_old_computations = False slim_after_hybrid = False # delete_previous_intermediate_computations() # if not evaluate_algorithm: # delete_previous_intermediate_computations() # else: # print("ATTENTION: old intermediate computations kept, pay attention if running with all_train") # delete_previous_intermediate_computations() filename = "hybrid_ICB_ICF_UCF_IALS_SLIM_ELASTIC_local_081962.csv" dataReader = RS_Data_Loader(all_train=not evaluate_algorithm) URM_train = dataReader.get_URM_train() URM_PageRank_train = dataReader.get_page_rank_URM() URM_validation = dataReader.get_URM_validation() URM_test = dataReader.get_URM_test() ICM = dataReader.get_ICM() UCM_tfidf = dataReader.get_tfidf_artists() # _ = dataReader.get_tfidf_album() recommender_list1 = [ # Random, # TopPop, ItemKNNCBFRecommender, # UserKNNCBRecommender, ItemKNNCFRecommender, UserKNNCFRecommender, # P3alphaRecommender, # RP3betaRecommender, # MatrixFactorization_BPR_Cython, # MatrixFactorization_FunkSVD_Cython, IALS_numpy, SLIM_BPR_Cython, # ItemKNNCFRecommenderFAKESLIM, # PureSVDRecommender, SLIMElasticNetRecommender ] from Base.Evaluation.Evaluator import SequentialEvaluator evaluator = SequentialEvaluator(URM_test, URM_train, exclude_seen=True) output_root_path = "result_experiments/" # If directory does not exist, create if not os.path.exists(output_root_path): os.makedirs(output_root_path) logFile = open(output_root_path + "result_all_algorithms.txt", "a") try: recommender_class = HybridRecommender print("Algorithm: {}".format(recommender_class)) ''' Our optimal run ''' recommender_list = recommender_list1 # + recommender_list2 # + recommender_list3 onPop = False # On pop it used to choose if have dynamic weights for recommender = recommender_class(URM_train, ICM, recommender_list, URM_PageRank_train=URM_PageRank_train, dynamic=False, UCM_train=UCM_tfidf, URM_validation=URM_validation, onPop=onPop) recommender.fit( **{ "topK": [10, 181, 82, -1, 761, 490], "shrink": [8, 0, 3, -1, -1, -1], "pop": [280], "weights": [ 0.47412263345597117, 1.3864620551711606, 0.6224999770898935, 1.5498327677561246, 0.1993692779443738, 2.113324096784624 ], "final_weights": [1, 1], "force_compute_sim": False, # not evaluate_algorithm, "feature_weighting_index": 0, "epochs": 150, 'lambda_i': [0.0], 'lambda_j': [1.0153577332223556e-08], 'SLIM_lr': [0.1], 'alphaP3': [0.4121720883248633], 'alphaRP3': [0.8582865731462926], 'betaRP': [0.2814208416833668], 'l1_ratio': 3.020408163265306e-06, 'alpha': 0.0014681984611695231, 'tfidf': [True], "weights_to_dweights": -1, "IALS_num_factors": 290, "IALS_reg": 0.001, "IALS_iters": 6, "IALS_scaling": 'log', "IALS_alpha": 40, "filter_top_pop_len": 0 }) print("TEST") print("Starting Evaluations...") # to indicate if plotting for lenght or for pop results_run, results_run_string, target_recommendations = evaluator.evaluateRecommender( recommender, plot_stats=False, onPop=onPop) print("Algorithm: {}, results: \n{}".format( [rec.RECOMMENDER_NAME for rec in recommender.recommender_list], results_run_string)) logFile.write("Algorithm: {}, results: \n{} time: {}".format( [rec.RECOMMENDER_NAME for rec in recommender.recommender_list], results_run_string, time.time())) logFile.flush() if not evaluate_algorithm: target_playlist = dataReader.get_target_playlist() md.assign_recomendations_to_correct_playlist( target_playlist, target_recommendations) md.make_CSV_file(target_playlist, filename) print('File {} created!'.format(filename)) except Exception as e: traceback.print_exc() logFile.write("Algorithm: {} - Exception: {}\n".format( recommender_class, str(e))) logFile.flush()
def run(): evaluate_algorithm = True delete_old_computations = False slim_after_hybrid = False # delete_previous_intermediate_computations() # if not evaluate_algorithm: # delete_previous_intermediate_computations() # else: # print("ATTENTION: old intermediate computations kept, pay attention if running with all_train") # delete_previous_intermediate_computations() filename = "hybrid_ICB_ICF_UCF_SLIM_ELASTIC_local_08052.csv" dataReader = RS_Data_Loader(all_train=not evaluate_algorithm) URM_train = dataReader.get_URM_train() URM_PageRank_train = dataReader.get_page_rank_URM() URM_validation = dataReader.get_URM_validation() URM_test = dataReader.get_URM_test() ICM = dataReader.get_ICM() UCM_tfidf = dataReader.get_tfidf_artists() # _ = dataReader.get_tfidf_album() recommender_list1 = [ # Random, # TopPop, ItemKNNCBFRecommender, # UserKNNCBRecommender, ItemKNNCFRecommender, UserKNNCFRecommender, # P3alphaRecommender, # RP3betaRecommender, # MatrixFactorization_BPR_Cython, # MatrixFactorization_FunkSVD_Cython, SLIM_BPR_Cython, # ItemKNNCFRecommenderFAKESLIM, # PureSVDRecommender, SLIMElasticNetRecommender ] # ITEM CB, ITEM CF, USER CF, RP3BETA, PURE SVD recommender_list2 = [ # Random, # TopPop, ItemKNNCBFRecommender, # UserKNNCBRecommender, ItemKNNCFRecommender, UserKNNCFRecommender, # P3alphaRecommender, # RP3betaRecommender, # MatrixFactorization_BPR_Cython, # MatrixFactorization_FunkSVD_Cython, SLIM_BPR_Cython, # ItemKNNCFRecommenderFAKESLIM, # PureSVDRecommender, SLIMElasticNetRecommender ] from Base.Evaluation.Evaluator import SequentialEvaluator evaluator = SequentialEvaluator(URM_test, URM_train, exclude_seen=True) output_root_path = "result_experiments/" # If directory does not exist, create if not os.path.exists(output_root_path): os.makedirs(output_root_path) logFile = open(output_root_path + "result_all_algorithms.txt", "a") try: recommender_class = HybridRecommender print("Algorithm: {}".format(recommender_class)) ''' Our optimal run ''' recommender_list = recommender_list1 + recommender_list2 # + recommender_list3 d_weights = [ [0.5469789514168496, 1.5598358421050373, 1.1505851198615593, 0.2540023047558251, 0.9403502151872645] + [ 0] * len(recommender_list2), [0] * len(recommender_list1) + [0.5205017325111618, 1.6831295912149837, 1.6560707664775454, 0.3144197724407203, 1.9912784665282535] ] onPop = False # On pop it used to choose if have dynamic weights for recommender = recommender_class(URM_train, ICM, recommender_list, URM_PageRank_train=URM_PageRank_train, dynamic=True, UCM_train=UCM_tfidf, d_weights=d_weights, URM_validation=URM_validation, onPop=onPop) recommender.fit(** { "topK": [10, 33, 160, 761, 490] + [10, 33, 160, 761, 490], "shrink": [8, 26, 2, -1, -1] + [8, 26, 2, -1, -1], "pop": [30], "weights": [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], "final_weights": [1, 1], "force_compute_sim": False, # not evaluate_algorithm, "feature_weighting_index": [0, 0], "epochs": 150, 'lambda_i': [0.0, 0.0], 'lambda_j': [1.0153577332223556e-08, 1.0153577332223556e-08], 'SLIM_lr': [0.1, 0.1], 'alphaP3': [0.4121720883248633], 'alphaRP3': [0.8582865731462926], 'betaRP': [0.2814208416833668], 'l1_ratio': [3.020408163265306e-06, 3.020408163265306e-06], 'alpha': [0.0014681984611695231, 0.0014681984611695231], 'tfidf': [True, True], "weights_to_dweights": -1, "filter_top_pop_len": 0}) print("TEST") print("Starting Evaluations...") # to indicate if plotting for lenght or for pop results_run, results_run_string, target_recommendations = evaluator.evaluateRecommender(recommender, plot_stats=True, onPop=onPop) print("Algorithm: {}, results: \n{}".format([rec.RECOMMENDER_NAME for rec in recommender.recommender_list], results_run_string)) logFile.write("Algorithm: {}, results: \n{} time: {} \n".format( [rec.RECOMMENDER_NAME for rec in recommender.recommender_list], results_run_string, time.time())) logFile.flush() if not evaluate_algorithm: target_playlist = dataReader.get_target_playlist() md.assign_recomendations_to_correct_playlist(target_playlist, target_recommendations) md.make_CSV_file(target_playlist, filename) print('File {} created!'.format(filename)) except Exception as e: traceback.print_exc() logFile.write("Algorithm: {} - Exception: {}\n".format(recommender_class, str(e))) logFile.flush()
def run(): evaluate_algorithm = False delete_old_computations = False slim_after_hybrid = False # delete_previous_intermediate_computations() # if not evaluate_algorithm: # delete_previous_intermediate_computations() # else: # print("ATTENTION: old intermediate computations kept, pay attention if running with all_train") # delete_previous_intermediate_computations() filename = "hybrid_ICB_ICF_UCF_SLIM_ELASTIC_local_08052.csv" dataReader = RS_Data_Loader(all_train=not evaluate_algorithm) URM_train = dataReader.get_URM_train() URM_PageRank_train = dataReader.get_page_rank_URM() URM_validation = dataReader.get_URM_validation() URM_test = dataReader.get_URM_test() ICM = dataReader.get_ICM() UCM_tfidf = dataReader.get_tfidf_artists() # _ = dataReader.get_tfidf_album() recommender_list1 = [ # Random, # TopPop, ItemKNNCBFRecommender, # UserKNNCBRecommender, ItemKNNCFRecommender, UserKNNCFRecommender, # P3alphaRecommender, # RP3betaRecommender, # MatrixFactorization_BPR_Cython, # MatrixFactorization_FunkSVD_Cython, SLIM_BPR_Cython, # ItemKNNCFRecommenderFAKESLIM, # PureSVDRecommender, SLIMElasticNetRecommender ] # ITEM CB, ITEM CF, USER CF, RP3BETA, PURE SVD recommender_list2 = [ # Random, # TopPop, ItemKNNCBFRecommender, # UserKNNCBRecommender, # ItemKNNCFPageRankRecommender, ItemKNNCFRecommender, UserKNNCFRecommender, # P3alphaRecommender, # RP3betaRecommender, # MatrixFactorization_BPR_Cython, # MatrixFactorization_FunkSVD_Cython, SLIM_BPR_Cython, SLIMElasticNetRecommender # PureSVDRecommender ] from Base.Evaluation.Evaluator import SequentialEvaluator evaluator = SequentialEvaluator(URM_test, URM_train, exclude_seen=True) output_root_path = "result_experiments/" # If directory does not exist, create if not os.path.exists(output_root_path): os.makedirs(output_root_path) logFile = open(output_root_path + "result_all_algorithms.txt", "a") try: recommender_class = HybridRecommender print("Algorithm: {}".format(recommender_class)) ''' Our optimal run ''' recommender_list = recommender_list1 # + recommender_list2 # + recommender_list3 onPop = False # On pop it used to choose if have dynamic weights for recommender = recommender_class(URM_train, ICM, recommender_list, URM_PageRank_train=URM_PageRank_train, dynamic=False, UCM_train=UCM_tfidf, URM_validation=URM_validation, onPop=onPop) lambda_i = 0.1 lambda_j = 0.05 old_similrity_matrix = None num_factors = 395 l1_ratio = 1e-06 # Variabili secondo intervallo alphaRP3_2 = 0.9223827655310622 betaRP3_2 = 0.2213306613226453 num_factors_2 = 391 recommender.fit(** { "topK": [10, 33, 160, 761, 490], "shrink": [8, 26, 2, -1, -1], "pop": [280], "weights": [0.33804686720093335, 1.3092081994688194, 0.642288869881126, 0.18883962446529368, 1.9317211019160747], "final_weights": [1, 1], "force_compute_sim": False, # not evaluate_algorithm, "feature_weighting_index": 0, "epochs": 150, 'lambda_i': [0.0], 'lambda_j': [1.0153577332223556e-08], 'SLIM_lr': [0.1], 'alphaP3': [0.4121720883248633], 'alphaRP3': [0.8582865731462926], 'betaRP': [0.2814208416833668], 'l1_ratio': 3.020408163265306e-06, 'alpha': 0.0014681984611695231, 'tfidf': [True], "weights_to_dweights": -1, "filter_top_pop_len": 0}) print("TEST") print("Starting Evaluations...") # to indicate if plotting for lenght or for pop results_run, results_run_string, target_recommendations = evaluator.evaluateRecommender(recommender, plot_stats=False, onPop=onPop) print("Algorithm: {}, results: \n{}".format([rec.RECOMMENDER_NAME for rec in recommender.recommender_list], results_run_string)) logFile.write("Algorithm: {}, results: \n{} time: {}".format( [rec.RECOMMENDER_NAME for rec in recommender.recommender_list], results_run_string, time.time())) logFile.flush() if not evaluate_algorithm: target_playlist = dataReader.get_target_playlist() md.assign_recomendations_to_correct_playlist(target_playlist, target_recommendations) md.make_CSV_file(target_playlist, filename) print('File {} created!'.format(filename)) except Exception as e: traceback.print_exc() logFile.write("Algorithm: {} - Exception: {}\n".format(recommender_class, str(e))) logFile.flush()
def recommend(self, user_id_array, dict_pop=None, cutoff=None, remove_seen_flag=True, remove_top_pop_flag=False, remove_CustomItems_flag=False): if np.isscalar(user_id_array): user_id_array = np.atleast_1d(user_id_array) single_user = True else: single_user = False weights = self.weights if cutoff == None: # noinspection PyUnresolvedReferences cutoff = self.URM_train.shape[1] - 1 else: cutoff cutoff_addition = 10 cutoff_Boost = cutoff + cutoff_addition # compute the scores using the dot product # noinspection PyUnresolvedReferences if self.sparse_weights: scores = [] # noinspection PyUnresolvedReferences for recommender in self.recommender_list: if recommender.__class__ in [HybridRecommenderXGBoost]: scores.append( self.compute_score_hybrid( recommender, user_id_array, dict_pop, remove_seen_flag=True, remove_top_pop_flag=False, remove_CustomItems_flag=False)) continue scores_batch = recommender.compute_item_score(user_id_array) # scores_batch = np.ravel(scores_batch) # because i'm not using batch for user_index in range(len(user_id_array)): user_id = user_id_array[user_index] if remove_seen_flag: scores_batch[ user_index, :] = self._remove_seen_on_scores( user_id, scores_batch[user_index, :]) if remove_top_pop_flag: scores_batch = self._remove_TopPop_on_scores(scores_batch) if remove_CustomItems_flag: scores_batch = self._remove_CustomItems_on_scores( scores_batch) scores.append(scores_batch) final_score = np.zeros(scores[0].shape) if self.dynamic: for user_index in range(len(user_id_array)): user_id = user_id_array[user_index] user_profile = self.URM_train.indices[ self.URM_train.indptr[user_id]:self.URM_train. indptr[user_id + 1]] if self.onPop: level = int( ged.playlist_popularity(user_profile, dict_pop)) else: level = int(ged.lenght_playlist(user_profile)) # weights = self.change_weights(user_id) weights = self.change_weights(level, self.pop) assert len(weights) == len( scores), "Scores and weights have different lengths" final_score_line = np.zeros(scores[0].shape[1]) if sum(weights) > 0: for score, weight in zip(scores, weights): final_score_line += score[user_index] * weight final_score[user_index] = final_score_line else: for score, weight in zip(scores, weights): final_score += (score * weight) else: raise NotImplementedError # i take the 20 elements with highest scores relevant_items_boost = (-final_score).argpartition( cutoff_Boost, axis=1)[:, 0:cutoff_Boost] # if not self.xgb_model_ready: relevant_items_partition = (-final_score).argpartition( cutoff, axis=1)[:, 0:cutoff] relevant_items_partition_original_value = final_score[ np.arange(final_score.shape[0])[:, None], relevant_items_partition] relevant_items_partition_sorting = np.argsort( -relevant_items_partition_original_value, axis=1) ranking = relevant_items_partition[ np.arange(relevant_items_partition.shape[0])[:, None], relevant_items_partition_sorting] # Creating numpy array for training XGBoost data_reader = RS_Data_Loader() URM_train = data_reader.get_URM_train() pred_data_xgboost = self.xgboost_data_prediction( user_id_array, relevant_items_boost, cutoff_Boost) param = { 'max_depth': 3, # the maximum depth of each tree 'eta': 0.3, # the training step for each iteration 'silent': 1, # logging mode - quiet 'objective': 'multi:softprob', # error evaluation for multiclass training 'num_class': 2 } # the number of classes that exist in this datset num_round = 20 ranking = [] for user_index in range(len(user_id_array)): user_id = user_id_array[user_index] # if self.user_id_XGBoost is None: # self.user_id_XGBoost = np.array([user_id] * cutoff_Boost).reshape(-1, 1) # else: # self.user_id_XGBoost = np.concatenate([self.user_id_XGBoost, # np.array([user_id] * # cutoff_Boost).reshape(-1, 1)], axis=0) train_xgboost = self.xgboost_data_training(user_id, URM_train) half_play = int(train_xgboost.shape[0] / 2) labels_train = np.array([1] * half_play + [0] * half_play) dtrain = xgb.DMatrix(train_xgboost, label=labels_train) bst = xgb.train(param, dtrain, num_round) user_recommendations = pred_data_xgboost[user_index * cutoff_Boost:(user_index + 1) * cutoff_Boost] dtest = xgb.DMatrix(user_recommendations) preds = bst.predict(dtest) predictions = self.reorder_songs(preds, user_recommendations)[:cutoff] ranking.append(predictions) print(user_id, predictions) # # if self.xgb_model_ready: # print("QUI") # preds = self.xgbModel.predict_proba(newTrainXGBoost) # # preds = self.xgbModel.predict(newTrainXGBoost) # ranking = [] # ordered_tracks = [] # current_user_id = 0 # current_user = user_list[current_user_id] # for track_idx in range(newTrainXGBoost.shape[0]): # ordered_tracks.append((relevant_items_boost[track_idx], preds[track_idx][current_user])) # # if track_idx % cutoff_Boost and track_idx != 0: # ordered_tracks.sort(key=lambda elem: elem[1]) # ordered_tracks = [track_id[0] for track_id in ordered_tracks] # ranking.append(ordered_tracks) # ordered_tracks = [] # current_user_id += 1 # # # elif not self.xgb_model_ready: # if self.first_time: # self.first_time = False # self.trainXGBoost = sparse.lil_matrix(newTrainXGBoost, dtype=int) # x = self.trainXGBoost # y = self.user_id_XGBoost # print() # # elif not self.first_time: # self.trainXGBoost = sparse.vstack([self.trainXGBoost, newTrainXGBoost], dtype=int) # x = self.trainXGBoost # y = 0 # Return single list for one user, instead of list of lists # if single_user: # ranking_list = ranking_list[0] return ranking