score += self.RP3alpha_scores * self.weight_initial['RP3Alpha'] if exclude_seen: score = self._filter_seen(user_id, score) ranking = score.argsort()[::-1] return ranking[:at] ################################################ Test ################################################## if __name__ == '__main__': test = False split_users = True max_map = 0 data = get_data() group_cold = None group_one = None group_two = None group_three = None userCBF_args = {'topK': 1000, 'shrink': 7950} P3alpha_args = {'topK': 66, 'alpha': 0.2731573847973295, 'normalize': True} itemCF_args = { 'topK': 12, 'shrink': 88, 'similarity': 'tversky', 'normalize': True,
for values_, rows_, cols_ in res: values.extend(values_) rows.extend(rows_) cols.extend(cols_) # generate the sparse weight matrix self.W_sparse = sps.csr_matrix((values, (rows, cols)), shape=(n_items, n_items), dtype=np.float32) if __name__ == '__main__': evaluate = True train, test = split_train_leave_k_out_user_wise(get_data()['URM_all'], k_out=1) SLIMElasticNet_args = {'l1_ratio': 1e-05, 'alpha': 0.001, 'topK': 1000} if evaluate: evaluator = EvaluatorHoldout(test, [10], target_users=get_data()['target_users']) slel = SLIMElasticNetRecommender(train) slel.fit(l1_ratio=SLIMElasticNet_args['l1_ratio'], topK=SLIMElasticNet_args['topK'], alpha=SLIMElasticNet_args['alpha']) result, result_string = evaluator.evaluateRecommender(slel) print(f"MAP: {result[10]['MAP']:.5f}")
for user_id in user_id_array: scores = np.dot(self.user_factors[user_id], self.item_factors.T) scores = np.squeeze(scores) scores_list.append(scores) return np.asarray(scores_list, dtype=np.float32) def save_model(self, folder_path, file_name = None): print("Saving not implemented...") if __name__ == '__main__': ALS_args = { 'n_factors': 433, 'iterations': 29, 'regularization': 1.707545716729426e-05, 'alpha_val' : 5 } train, test = split_train_leave_k_out_user_wise(get_data()['URM_all'], k_out=1) evaluator = EvaluatorHoldout(test, [10], target_users=get_data()['target_users']) als = ALSRecommender(train) als.fit(n_factors=ALS_args['n_factors'], regularization=ALS_args['regularization'], iterations=ALS_args['iterations'], alpha_val=ALS_args['alpha_val']) result, result_string = evaluator.evaluateRecommender(als) print(f"MAP: {result[10]['MAP']:.5f}")
def read_data_split_and_search(): """ This function provides a simple example on how to tune parameters of a given algorithm The BayesianSearch object will save: - A .txt file with all the cases explored and the recommendation quality - A _best_model file which contains the trained model and can be loaded with recommender.load_model() - A _best_parameter file which contains a dictionary with all the fit parameters, it can be passed to recommender.fit(**_best_parameter) - A _best_result_validation file which contains a dictionary with the results of the best solution on the validation - A _best_result_test file which contains a dictionary with the results, on the test set, of the best solution chosen using the validation set """ URM_train, URM_test = split_train_leave_k_out_user_wise( get_data()['URM_all'], k_out=1) URM_train, URM_validation = split_train_leave_k_out_user_wise(URM_train, k_out=1) output_folder_path = "result_experiments/SKOPT_prova/" # If directory does not exist, create if not os.path.exists(output_folder_path): os.makedirs(output_folder_path) collaborative_algorithm_list = [ # Random, # TopPop, # P3alphaRecommender, # RP3betaRecommender, # ItemKNNCFRecommender, # HybridRecommender ALSRecommender # UserKNNCFRecommender, # MatrixFactorization_BPR_Cython, # MatrixFactorization_FunkSVD_Cython, # PureSVDRecommender, # IALSRecommender, # SLIM_BPR_Cython, # SLIMElasticNetRecommender ] from Algorithms.Base.Evaluation.Evaluator import EvaluatorHoldout evaluator_validation = EvaluatorHoldout( URM_validation, cutoff_list=[10], target_users=get_data()['target_users']) evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[10], target_users=get_data()['target_users']) runParameterSearch_Collaborative_partial = partial( runParameterSearch_Collaborative, URM_train=URM_train, metric_to_optimize="MAP", # TODO change num of iterations here n_cases=100, evaluator_validation_earlystopping=evaluator_validation, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=output_folder_path) from Algorithms.Utils.PoolWithSubprocess import PoolWithSubprocess # pool = PoolWithSubprocess(processes=int(multiprocessing.cpu_count()), maxtasksperchild=1) # resultList = pool.map(runParameterSearch_Collaborative_partial, collaborative_algorithm_list) # pool.close() # pool.join() for recommender_class in collaborative_algorithm_list: try: runParameterSearch_Collaborative_partial(recommender_class) except Exception as e: print("On recommender {} Exception {}".format( recommender_class, str(e))) traceback.print_exc()
def runParameterSearch_Collaborative(recommender_class, URM_train, URM_train_last_test=None, metric_to_optimize="MAP", evaluator_validation=None, evaluator_test=None, evaluator_validation_earlystopping=None, output_folder_path="result_experiments/", parallelizeKNN=True, n_cases=35, n_random_starts=5, resume_from_saved=False, save_model="best", allow_weighting=True, similarity_type_list=None): # If directory does not exist, create if not os.path.exists(output_folder_path): os.makedirs(output_folder_path) # TODO CHANGE VALIDATION EVERY HERE earlystopping_keywargs = { "validation_every_n": 5, "stop_on_validation": True, "evaluator_object": evaluator_validation_earlystopping, "lower_validations_allowed": 5, "validation_metric": metric_to_optimize, } URM_train = URM_train.copy() if URM_train_last_test is not None: URM_train_last_test = URM_train_last_test.copy() try: output_file_name_root = recommender_class.RECOMMENDER_NAME parameterSearch = SearchBayesianSkopt( recommender_class, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) if recommender_class in [TopPop, GlobalEffects, Random]: """ TopPop, GlobalEffects and Random have no parameters therefore only one evaluation is needed """ parameterSearch = SearchSingleCase( recommender_class, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) if URM_train_last_test is not None: recommender_input_args_last_test = recommender_input_args.copy( ) recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[ 0] = URM_train_last_test else: recommender_input_args_last_test = None parameterSearch.search( recommender_input_args, recommender_input_args_last_test= recommender_input_args_last_test, fit_hyperparameters_values={}, output_folder_path=output_folder_path, output_file_name_root=output_file_name_root, resume_from_saved=resume_from_saved, save_model=save_model, ) return ########################################################################################################## if recommender_class in [ItemKNNCFRecommender, UserKNNCFRecommender]: if similarity_type_list is None: similarity_type_list = [ 'cosine', 'jaccard', 'tanimoto', "asymmetric", "dice", "tversky" ] recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) if URM_train_last_test is not None: recommender_input_args_last_test = recommender_input_args.copy( ) recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[ 0] = URM_train_last_test else: recommender_input_args_last_test = None run_KNNCFRecommender_on_similarity_type_partial = partial( run_KNNRecommender_on_similarity_type, recommender_input_args=recommender_input_args, parameter_search_space={}, parameterSearch=parameterSearch, n_cases=n_cases, n_random_starts=n_random_starts, resume_from_saved=resume_from_saved, save_model=save_model, output_folder_path=output_folder_path, output_file_name_root=output_file_name_root, metric_to_optimize=metric_to_optimize, allow_weighting=allow_weighting, recommender_input_args_last_test= recommender_input_args_last_test) if parallelizeKNN: pool = multiprocessing.Pool( processes=multiprocessing.cpu_count(), maxtasksperchild=1) pool.map(run_KNNCFRecommender_on_similarity_type_partial, similarity_type_list) pool.close() pool.join() else: for similarity_type in similarity_type_list: run_KNNCFRecommender_on_similarity_type_partial( similarity_type) return ########################################################################################################## if recommender_class is P3alphaRecommender: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["topK"] = Integer(5, 1000) hyperparameters_range_dictionary["alpha"] = Real(low=0, high=2, prior='uniform') hyperparameters_range_dictionary[ "normalize_similarity"] = Categorical([True, False]) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) ########################################################################################################## if recommender_class is RP3betaRecommender: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["topK"] = Integer(70, 500) hyperparameters_range_dictionary["alpha"] = Real(low=0, high=2, prior='uniform') hyperparameters_range_dictionary["beta"] = Real(low=0, high=2, prior='uniform') hyperparameters_range_dictionary[ "normalize_similarity"] = Categorical([True, False]) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) ########################################################################################################## if recommender_class is MatrixFactorization_FunkSVD_Cython: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["sgd_mode"] = Categorical( ["sgd", "adagrad", "adam"]) hyperparameters_range_dictionary["epochs"] = Categorical([500]) hyperparameters_range_dictionary["use_bias"] = Categorical( [True, False]) hyperparameters_range_dictionary["batch_size"] = Categorical( [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024]) hyperparameters_range_dictionary["num_factors"] = Integer(1, 200) hyperparameters_range_dictionary["item_reg"] = Real( low=1e-5, high=1e-2, prior='log-uniform') hyperparameters_range_dictionary["user_reg"] = Real( low=1e-5, high=1e-2, prior='log-uniform') hyperparameters_range_dictionary["learning_rate"] = Real( low=1e-4, high=1e-1, prior='log-uniform') hyperparameters_range_dictionary[ "negative_interactions_quota"] = Real(low=0.0, high=0.5, prior='uniform') recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS=earlystopping_keywargs) ########################################################################################################## if recommender_class is MatrixFactorization_AsySVD_Cython: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["sgd_mode"] = Categorical( ["sgd", "adagrad", "adam"]) hyperparameters_range_dictionary["epochs"] = Categorical([500]) hyperparameters_range_dictionary["use_bias"] = Categorical( [True, False]) hyperparameters_range_dictionary["batch_size"] = Categorical([1]) hyperparameters_range_dictionary["num_factors"] = Integer(1, 200) hyperparameters_range_dictionary["item_reg"] = Real( low=1e-5, high=1e-2, prior='log-uniform') hyperparameters_range_dictionary["user_reg"] = Real( low=1e-5, high=1e-2, prior='log-uniform') hyperparameters_range_dictionary["learning_rate"] = Real( low=1e-4, high=1e-1, prior='log-uniform') hyperparameters_range_dictionary[ "negative_interactions_quota"] = Real(low=0.0, high=0.5, prior='uniform') recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS=earlystopping_keywargs) ########################################################################################################## if recommender_class is MatrixFactorization_BPR_Cython: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["sgd_mode"] = Categorical( ["sgd", "adagrad", "adam"]) hyperparameters_range_dictionary["epochs"] = Categorical([1500]) hyperparameters_range_dictionary["num_factors"] = Integer(1, 200) hyperparameters_range_dictionary["batch_size"] = Categorical( [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024]) hyperparameters_range_dictionary["positive_reg"] = Real( low=1e-5, high=1e-2, prior='log-uniform') hyperparameters_range_dictionary["negative_reg"] = Real( low=1e-5, high=1e-2, prior='log-uniform') hyperparameters_range_dictionary["learning_rate"] = Real( low=1e-4, high=1e-1, prior='log-uniform') recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={ **earlystopping_keywargs, "positive_threshold_BPR": None }) ########################################################################################################## if recommender_class is IALSRecommender: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["num_factors"] = Integer(1, 200) hyperparameters_range_dictionary[ "confidence_scaling"] = Categorical(["linear", "log"]) hyperparameters_range_dictionary["alpha"] = Real( low=1e-3, high=50.0, prior='log-uniform') hyperparameters_range_dictionary["epsilon"] = Real( low=1e-3, high=10.0, prior='log-uniform') hyperparameters_range_dictionary["reg"] = Real(low=1e-5, high=1e-2, prior='log-uniform') recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS=earlystopping_keywargs) ########################################################################################################## if recommender_class is PureSVDRecommender: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["num_factors"] = Integer(1, 350) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) ########################################################################################################## if recommender_class is NMFRecommender: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["num_factors"] = Integer(1, 350) hyperparameters_range_dictionary["solver"] = Categorical( ["coordinate_descent", "multiplicative_update"]) hyperparameters_range_dictionary["init_type"] = Categorical( ["random", "nndsvda"]) hyperparameters_range_dictionary["beta_loss"] = Categorical( ["frobenius", "kullback-leibler"]) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) ######################################################################################################### if recommender_class is SLIM_BPR_Cython: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["topK"] = Integer(5, 70) hyperparameters_range_dictionary["epochs"] = Categorical([7000]) hyperparameters_range_dictionary["symmetric"] = Categorical( [True, False]) hyperparameters_range_dictionary["sgd_mode"] = Categorical( ["sgd", "adagrad", "adam"]) hyperparameters_range_dictionary["lambda_i"] = Real( low=1e-5, high=1e-2, prior='log-uniform') hyperparameters_range_dictionary["lambda_j"] = Real( low=1e-5, high=1e-2, prior='log-uniform') hyperparameters_range_dictionary["learning_rate"] = Real( low=1e-4, high=1e-1, prior='log-uniform') recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={ **earlystopping_keywargs, "positive_threshold_BPR": None, 'train_with_sparse_weights': None }) ########################################################################################################## if recommender_class is SLIMElasticNetRecommender: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["topK"] = Integer(5, 1000) hyperparameters_range_dictionary["l1_ratio"] = Real( low=1e-5, high=1.0, prior='log-uniform') hyperparameters_range_dictionary["alpha"] = Real(low=1e-3, high=1.0, prior='uniform') recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) ########################################################################################################## if recommender_class is ALSRecommender: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["n_factors"] = Integer(5, 1000) hyperparameters_range_dictionary["regularization"] = Real( low=1e-5, high=1.0, prior='log-uniform') hyperparameters_range_dictionary["iterations"] = Integer(10, 30) hyperparameters_range_dictionary["alpha_val"] = Integer(1, 40) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) ######################################################################################################### if recommender_class is HybridRecommender: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["weight_itemcf"] = Real( low=0.0, high=2.0, prior='uniform') # hyperparameters_range_dictionary["weight_slim"] = Real(low=0.0, high=6.0, prior='uniform') hyperparameters_range_dictionary["weight_p3"] = Real( low=0.0, high=2.0, prior='uniform') hyperparameters_range_dictionary["weight_rp3"] = Real( low=0.0, high=2.0, prior='uniform') hyperparameters_range_dictionary["weight_als"] = Real( low=0.0, high=2.0, prior='uniform') recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train, get_data()['UCM']], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) ######################################################################################################### if URM_train_last_test is not None: recommender_input_args_last_test = recommender_input_args.copy() recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[ 0] = URM_train_last_test else: recommender_input_args_last_test = None ## Final step, after the hyperparameter range has been defined for each type of algorithm parameterSearch.search( recommender_input_args, parameter_search_space=hyperparameters_range_dictionary, n_cases=n_cases, n_random_starts=n_random_starts, resume_from_saved=resume_from_saved, save_model=save_model, output_folder_path=output_folder_path, output_file_name_root=output_file_name_root, metric_to_optimize=metric_to_optimize, recommender_input_args_last_test=recommender_input_args_last_test) except Exception as e: print("On recommender {} Exception {}".format(recommender_class, str(e))) traceback.print_exc() error_file = open(output_folder_path + "ErrorLog.txt", "a") error_file.write("On recommender {} Exception {}\n".format( recommender_class, str(e))) error_file.close()
def save_model(self, folder_path, file_name = None): print("Saving not implemented...") if __name__ == '__main__': evaluate = False weight_itemcf = 0.06469128422082827 weight_p3 = 0.04997541987671707 weight_rp3 = 0.030600333541027876 weight_als = 0.0 weight_slimEl = 1.0 train, test = split_train_leave_k_out_user_wise(get_data()['URM_all'], k_out=1) ucm = get_data()['UCM'] if evaluate: evaluator = EvaluatorHoldout(test, [10], target_users=get_data()['target_users']) hybrid = HybridRecommender(train, ucm) hybrid.fit(weight_itemcf=weight_itemcf, weight_p3=weight_p3, weight_rp3=weight_rp3, weight_als=weight_als, weight_slimel=weight_slimEl) result, result_string = evaluator.evaluateRecommender(hybrid) print(f"MAP: {result[10]['MAP']:.5f}") else: urm_all = train + test hybrid = HybridRecommender(urm_all, ucm) hybrid.fit(weight_itemcf=weight_itemcf, weight_p3=weight_p3, weight_rp3=weight_rp3, weight_als=weight_als, weight_slimel=weight_slimEl)
if self.topK != False: self.W_sparse = similarityMatrixTopK(self.W_sparse, k=self.topK) self.W_sparse = check_matrix(self.W_sparse, format='csr') self.RM = self.URM_train.dot(self.W_sparse) def get_expected_ratings(self, user_id): expected_recommendations = self.RM[user_id].todense() return np.squeeze(np.asarray(expected_recommendations)) if __name__ == '__main__': train, test = split_train_leave_k_out_user_wise(get_data()['URM_all'], k_out=1) urm = train + test evaluator = EvaluatorHoldout(test, [10]) rp3 = RP3betaRecommender(urm) rp3.fit(alpha=0.032949920239451876, beta=0.14658580479486563, normalize_similarity=True, topK=75) # rp3.evaluate_MAP_target(test, get_data()['target_users']) # result, result_string = evaluator.evaluateRecommender(rp3) write_output(rp3, get_data()['target_users']) #print(f"MAP: {result[10]['MAP']:.5f}")
neg_item_id = np.random.randint(0, self.n_items) if neg_item_id not in userSeenItems: return pos_item_id, neg_item_id def sampleTriple(self): """ Randomly samples a user and then samples randomly a seen and not seen item :return: user_id, pos_item_id, neg_item_id """ user_id = self.sampleUser() pos_item_id, neg_item_id = self.sampleItemPair(user_id) return user_id, pos_item_id, neg_item_id if __name__ == '__main__': train, test = split_train_leave_k_out_user_wise(get_data()['URM_all'], k_out=1) evaluator = EvaluatorHoldout(test, [10], target_users=get_data()['target_users']) slim = SLIM_BPR(train) slim.fit(1) result, result_string = evaluator.evaluateRecommender(slim) # write_output(itemCF, get_data()['target_users']) print(f"MAP: {result[10]['MAP']:.5f}")