from Utils.s_plus import dot_product import scipy.sparse as sps from Base.Evaluation.Evaluator import EvaluatorHoldout from GraphBased.RP3betaRecommender import RP3betaRecommender from ParameterTuning.SearchBayesianSkopt import SearchBayesianSkopt from skopt.space import Real, Integer, Categorical from ParameterTuning.SearchAbstractClass import SearchInputRecommenderArgs data = DataManager() ucm_age, ucm_region, ucm_all = data.get_ucm() icm_price, icm_asset, icm_sub, icm_all = data.get_icm() recommender_4 = UserKNNCFRecommender(data.get_urm()) recommender_4.fit(shrink=2, topK=600, normalize=True) W_sparse_CF = recommender_4.W_sparse cfw = User_CFW_D_Similarity_Linalg(URM_train=data.get_urm(), UCM=ucm_all.copy(), S_matrix_target=W_sparse_CF ) cfw.fit(topK=1740, add_zeros_quota=0.3528735601555612, normalize_similarity=True) weights = sps.diags(cfw.D_best) ucm_weighted = ucm_all.dot(weights) sps.save_npz("Data/ucm_weighted.npz", ucm_weighted.tocsr())
def read_data_split_and_search(): """ This function provides a simple example on how to tune parameters of a given algorithm The BayesianSearch object will save: - A .txt file with all the cases explored and the recommendation quality - A _best_model file which contains the trained model and can be loaded with recommender.load_model() - A _best_parameter file which contains a dictionary with all the fit parameters, it can be passed to recommender.fit(**_best_parameter) - A _best_result_validation file which contains a dictionary with the results of the best solution on the validation - A _best_result_test file which contains a dictionary with the results, on the test set, of the best solution chosen using the validation set """ seed = 1205 parser = DataParser() URM_all = parser.get_URM_all() ICM_obj = parser.get_ICM_all() # SPLIT TO GET TEST PARTITION URM_train, URM_test = split_train_in_two_percentage_global_sample( URM_all, train_percentage=0.90, seed=seed) # SPLIT TO GET THE HYBRID VALID PARTITION URM_train, URM_valid_hybrid = split_train_in_two_percentage_global_sample( URM_train, train_percentage=0.85, seed=seed) # SPLIT TO GET THE sub_rec VALID PARTITION URM_train_bis, URM_valid_sub = split_train_in_two_percentage_global_sample( URM_train, train_percentage=0.85, seed=seed) collaborative_algorithm_list = [ #EASE_R_Recommender #PipeHybrid001, #Random, #TopPop, #P3alphaRecommender, #RP3betaRecommender, #ItemKNNCFRecommender, #UserKNNCFRecommender, #MatrixFactorization_BPR_Cython, #MatrixFactorization_FunkSVD_Cython, #PureSVDRecommender, #NMFRecommender, #PureSVDItemRecommender #SLIM_BPR_Cython, #SLIMElasticNetRecommender #IALSRecommender #MF_MSE_PyTorch #MergedHybrid000 #LinearHybrid002ggg HybridCombinationSearch ] content_algorithm_list = [ #ItemKNNCBFRecommender ] from Base.Evaluation.Evaluator import EvaluatorHoldout evaluator_valid_sub = EvaluatorHoldout(URM_valid_sub, cutoff_list=[10]) evaluator_valid_hybrid = EvaluatorHoldout(URM_valid_hybrid, cutoff_list=[10]) evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[10]) """ # TODO: setta I GIUSTI EVALUATOR QUI!!!! runParameterSearch_Content_partial = partial(runParameterSearch_Content, URM_train=URM_train, ICM_object=ICM_obj, ICM_name='1BookFeatures', n_cases = 50, n_random_starts = 20, evaluator_validation= evaluator_valid_sub, evaluator_test = evaluator_valid_hybrid, metric_to_optimize = "MAP", output_folder_path=output_folder_path, parallelizeKNN = False, allow_weighting = True, #similarity_type_list = ['cosine'] ) pool = multiprocessing.Pool(processes=int(multiprocessing.cpu_count()), maxtasksperchild=1) pool.map(runParameterSearch_Content_partial, content_algorithm_list) """ print("Rp3beta training...") rp3b = RP3betaRecommender(URM_train, verbose=False) rp3b_params = { 'topK': 1000, 'alpha': 0.38192761611274967, 'beta': 0.0, 'normalize_similarity': False } rp3b.fit(**rp3b_params) print("Done") print("P3alpha training...") p3a = P3alphaRecommender(URM_train, verbose=False) p3a_params = { 'topK': 131, 'alpha': 0.33660811631883863, 'normalize_similarity': False } p3a.fit(**p3a_params) print("Done") print("ItemKnnCF training...") icf = ItemKNNCFRecommender(URM_train, verbose=False) icf_params = { 'topK': 100, 'shrink': 1000, 'similarity': 'asymmetric', 'normalize': True, 'asymmetric_alpha': 0.0 } icf.fit(**icf_params) print("Done") print("UserKnnCF training...") ucf = UserKNNCFRecommender(URM_train, verbose=False) ucf_params = { 'topK': 190, 'shrink': 0, 'similarity': 'cosine', 'normalize': True } ucf.fit(**ucf_params) print("Done") print("ItemKnnCBF training...") icb = ItemKNNCBFRecommender(URM_train, ICM_obj, verbose=False) icb_params = { 'topK': 205, 'shrink': 1000, 'similarity': 'cosine', 'normalize': True, 'feature_weighting': 'BM25' } icb.fit(**icb_params) print("Done") print("SlimBPR training...") sbpr = SLIM_BPR_Cython(URM_train, verbose=False) sbpr_params = { 'topK': 979, 'epochs': 130, 'symmetric': False, 'sgd_mode': 'adam', 'lambda_i': 0.004947329669424629, 'lambda_j': 1.1534760845071758e-05, 'learning_rate': 0.0001 } sbpr.fit(**sbpr_params) print("Done") print("SlimElasticNet training...") sen = SLIMElasticNetRecommender(URM_train, verbose=False) sen_params = { 'topK': 992, 'l1_ratio': 0.004065081925341167, 'alpha': 0.003725005053334143 } sen.fit(**sen_params) print("Done") list_recommender = [rp3b, p3a, icf, ucf, icb, sen, sbpr] list_already_seen = [rp3b, p3a, icf, ucf, icb] for rec_perm in combinations(list_recommender, 3): if rec_perm not in combinations(list_already_seen, 3): recommender_names = '_'.join( [r.RECOMMENDER_NAME for r in rec_perm]) output_folder_path = "result_experiments_v3/seed_" + str( seed) + '/' + recommender_names + '/' # If directory does not exist, create if not os.path.exists(output_folder_path): os.makedirs(output_folder_path) # TODO: setta I GIUSTI EVALUATOR QUI!!!! runParameterSearch_Collaborative_partial = partial( runParameterSearch_Collaborative, URM_train=URM_train, ICM_train=ICM_obj, metric_to_optimize="MAP", n_cases=50, n_random_starts=20, evaluator_validation_earlystopping=evaluator_valid_hybrid, evaluator_validation=evaluator_valid_hybrid, evaluator_test=evaluator_test, output_folder_path=output_folder_path, allow_weighting=False, #similarity_type_list = ["cosine", 'jaccard'], parallelizeKNN=False, list_rec=rec_perm) pool = multiprocessing.Pool(processes=int( multiprocessing.cpu_count()), maxtasksperchild=1) pool.map(runParameterSearch_Collaborative_partial, collaborative_algorithm_list)
def read_data_split_and_search(): """ This function provides a simple example on how to tune parameters of a given algorithm The BayesianSearch object will save: - A .txt file with all the cases explored and the recommendation quality - A _best_model file which contains the trained model and can be loaded with recommender.load_model() - A _best_parameter file which contains a dictionary with all the fit parameters, it can be passed to recommender.fit(**_best_parameter) - A _best_result_validation file which contains a dictionary with the results of the best solution on the validation - A _best_result_test file which contains a dictionary with the results, on the test set, of the best solution chosen using the validation set """ seed = 1205 parser = DataParser() URM_all = parser.get_URM_all() ICM_obj = parser.get_ICM_all() # SPLIT TO GET TEST PARTITION URM_train, URM_test = split_train_in_two_percentage_global_sample( URM_all, train_percentage=0.90, seed=seed) # SPLIT TO GET THE HYBRID VALID PARTITION URM_train, URM_valid_hybrid = split_train_in_two_percentage_global_sample( URM_train, train_percentage=0.85, seed=seed) URM_valid_hybrid = parser.filter_URM_test_by_range(URM_train, URM_valid_hybrid, (3, -1)) collaborative_algorithm_list = [ # EASE_R_Recommender # PipeHybrid001, # Random, # TopPop, # P3alphaRecommender, # RP3betaRecommender, # ItemKNNCFRecommender, # UserKNNCFRecommender, # MatrixFactorization_BPR_Cython, # MatrixFactorization_FunkSVD_Cython, # PureSVDRecommender, # NMFRecommender, # PureSVDItemRecommender # SLIM_BPR_Cython, # SLIMElasticNetRecommender # IALSRecommender # MF_MSE_PyTorch # MergedHybrid000 # LinearHybrid002ggg HybridCombinationSearch ] content_algorithm_list = [ # ItemKNNCBFRecommender ] from Base.Evaluation.Evaluator import EvaluatorHoldout evaluator_valid_hybrid = EvaluatorHoldout(URM_valid_hybrid, cutoff_list=[10]) evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[10]) """ earlystopping_keywargs = {"validation_every_n": 5, "stop_on_validation": True, "evaluator_object": evaluator_valid_hybrid, "lower_validations_allowed": 5, "validation_metric": 'MAP', } print('IALS training...') ials = IALSRecommender(URM_train, verbose=False) ials_params = {'num_factors': 83, 'confidence_scaling': 'linear', 'alpha': 28.4278070726612, 'epsilon': 1.0234211788885077, 'reg': 0.0027328110246575004, 'epochs': 20} ials.fit(**ials_params, **earlystopping_keywargs) print("Done") print("PureSVD training...") psvd = PureSVDRecommender(URM_train, verbose=False) psvd_params = {'num_factors': 711} psvd.fit(**psvd_params) print("Done") """ print("Rp3beta training...") rp3b = RP3betaRecommender(URM_train, verbose=False) rp3b_params = { 'topK': 753, 'alpha': 0.3873710051288722, 'beta': 0.0, 'normalize_similarity': False } rp3b.fit(**rp3b_params) print("Done") print("P3alpha training...") p3a = P3alphaRecommender(URM_train, verbose=False) p3a_params = { 'topK': 438, 'alpha': 0.41923120471415165, 'normalize_similarity': False } p3a.fit(**p3a_params) print("Done") print("ItemKnnCF training...") icf = ItemKNNCFRecommender(URM_train, verbose=False) icf_params = { 'topK': 565, 'shrink': 554, 'similarity': 'tversky', 'normalize': True, 'tversky_alpha': 1.9109121434662428, 'tversky_beta': 1.7823834698905734 } icf.fit(**icf_params) print("Done") print("UserKnnCF training...") ucf = UserKNNCFRecommender(URM_train, verbose=False) ucf_params = { 'topK': 190, 'shrink': 0, 'similarity': 'cosine', 'normalize': True } ucf.fit(**ucf_params) print("Done") print("ItemKnnCBF training...") icb = ItemKNNCBFRecommender(URM_train, ICM_obj, verbose=False) icb_params = { 'topK': 205, 'shrink': 1000, 'similarity': 'cosine', 'normalize': True, 'feature_weighting': 'BM25' } icb.fit(**icb_params) print("Done") """ print("SlimElasticNet training...") sen = SLIMElasticNetRecommender(URM_train, verbose=False) sen_params = {'topK': 954, 'l1_ratio': 3.87446082207643e-05, 'alpha': 0.07562657698792305} sen.fit(**sen_params) print("Done") """ list_recommender = [icb, icf, ucf, p3a, rp3b] list_already_seen = [] for rec_perm in combinations(list_recommender, 3): if rec_perm not in combinations(list_already_seen, 3): recommender_names = '_'.join( [r.RECOMMENDER_NAME for r in rec_perm]) output_folder_path = "result_experiments_v3/seed_" + str( seed) + '_3--1' + '/' + recommender_names + '/' # If directory does not exist, create if not os.path.exists(output_folder_path): os.makedirs(output_folder_path) # TODO: setta I GIUSTI EVALUATOR QUI!!!! runParameterSearch_Collaborative_partial = partial( runParameterSearch_Collaborative, URM_train=URM_train, ICM_train=ICM_obj, metric_to_optimize="MAP", n_cases=50, n_random_starts=20, evaluator_validation_earlystopping=evaluator_valid_hybrid, evaluator_validation=evaluator_valid_hybrid, evaluator_test=evaluator_test, output_folder_path=output_folder_path, allow_weighting=False, # similarity_type_list = ["cosine", 'jaccard'], parallelizeKNN=False, list_rec=rec_perm) pool = multiprocessing.Pool(processes=int( multiprocessing.cpu_count()), maxtasksperchild=1) pool.map(runParameterSearch_Collaborative_partial, collaborative_algorithm_list)
def runParameterSearch_Collaborative(recommender_class, URM_train, ICM_1, ICM_2, metric_to_optimize="PRECISION", evaluator_validation=None, evaluator_test=None, evaluator_validation_earlystopping=None, output_root_path="result_experiments/", parallelizeKNN=True, n_cases=100): from ParameterTuning.AbstractClassSearch import DictionaryKeys # If directory does not exist, create if not os.path.exists(output_root_path): os.makedirs(output_root_path) try: output_root_path_rec_name = output_root_path + recommender_class.RECOMMENDER_NAME parameterSearch = BayesianSearch( recommender_class, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) if recommender_class in [TopPop, Random]: recommender = recommender_class(URM_train) recommender.fit() output_file = open( output_root_path_rec_name + "_BayesianSearch.txt", "a") result_dict, result_baseline = evaluator_validation.evaluateRecommender( recommender) output_file.write( "ParameterSearch: Best result evaluated on URM_validation. Results: {}" .format(result_baseline)) pickle.dump( result_dict.copy(), open(output_root_path_rec_name + "_best_result_validation", "wb"), protocol=pickle.HIGHEST_PROTOCOL) result_dict, result_baseline = evaluator_test.evaluateRecommender( recommender) output_file.write( "ParameterSearch: Best result evaluated on URM_test. Results: {}" .format(result_baseline)) pickle.dump(result_dict.copy(), open(output_root_path_rec_name + "_best_result_test", "wb"), protocol=pickle.HIGHEST_PROTOCOL) output_file.close() return ########################################################################################################## if recommender_class is UserKNNCFRecommender: similarity_type_list = ['cosine'] run_KNNCFRecommender_on_similarity_type_partial = partial( run_KNNCFRecommender_on_similarity_type, parameterSearch=parameterSearch, URM_train=URM_train, n_cases=n_cases, output_root_path=output_root_path_rec_name, metric_to_optimize=metric_to_optimize) if parallelizeKNN: pool = PoolWithSubprocess(processes=int(2), maxtasksperchild=1) resultList = pool.map( run_KNNCFRecommender_on_similarity_type_partial, similarity_type_list) else: for similarity_type in similarity_type_list: run_KNNCFRecommender_on_similarity_type_partial( similarity_type) return ########################################################################################################## if recommender_class is ItemKNNCFRecommender: similarity_type_list = ['cosine'] run_KNNCFRecommender_on_similarity_type_partial = partial( run_KNNCFRecommender_on_similarity_type, parameterSearch=parameterSearch, URM_train=URM_train, n_cases=n_cases, output_root_path=output_root_path_rec_name, metric_to_optimize=metric_to_optimize) if parallelizeKNN: pool = PoolWithSubprocess(processes=int(2), maxtasksperchild=1) resultList = pool.map( run_KNNCFRecommender_on_similarity_type_partial, similarity_type_list) else: for similarity_type in similarity_type_list: run_KNNCFRecommender_on_similarity_type_partial( similarity_type) return ########################################################################################################## # if recommender_class is MultiThreadSLIM_RMSE: # # hyperparamethers_range_dictionary = {} # hyperparamethers_range_dictionary["topK"] = [50, 100] # hyperparamethers_range_dictionary["l1_penalty"] = [1e-2, 1e-3, 1e-4] # hyperparamethers_range_dictionary["l2_penalty"] = [1e-2, 1e-3, 1e-4] # # # recommenderDictionary = {DictionaryKeys.CONSTRUCTOR_POSITIONAL_ARGS: [URM_train], # DictionaryKeys.CONSTRUCTOR_KEYWORD_ARGS: {}, # DictionaryKeys.FIT_POSITIONAL_ARGS: dict(), # DictionaryKeys.FIT_KEYWORD_ARGS: dict(), # DictionaryKeys.FIT_RANGE_KEYWORD_ARGS: hyperparamethers_range_dictionary} # # ########################################################################################################## if recommender_class is P3alphaRecommender: hyperparamethers_range_dictionary = {} hyperparamethers_range_dictionary["topK"] = [ 5, 10, 20, 50, 100, 150, 200, 300, 400, 500, 600, 700, 800 ] hyperparamethers_range_dictionary["alpha"] = range(0, 2) hyperparamethers_range_dictionary["normalize_similarity"] = [ True, False ] recommenderDictionary = { DictionaryKeys.CONSTRUCTOR_POSITIONAL_ARGS: [URM_train], DictionaryKeys.CONSTRUCTOR_KEYWORD_ARGS: {}, DictionaryKeys.FIT_POSITIONAL_ARGS: dict(), DictionaryKeys.FIT_KEYWORD_ARGS: dict(), DictionaryKeys.FIT_RANGE_KEYWORD_ARGS: hyperparamethers_range_dictionary } ########################################################################################################## if recommender_class is HybridRecommender: hyperparamethers_range_dictionary = {} hyperparamethers_range_dictionary["w_itemcf"] = [ x * 0.1 + 1 for x in range(0, 10) ] hyperparamethers_range_dictionary["w_usercf"] = [ x * 0.1 for x in range(0, 10) ] hyperparamethers_range_dictionary["w_cbart"] = [ x * 0.1 for x in range(0, 10) ] hyperparamethers_range_dictionary["w_cbalb"] = [ x * 0.1 for x in range(0, 10) ] hyperparamethers_range_dictionary["w_slim"] = [ x * 0.1 for x in range(0, 10) ] #hyperparamethers_range_dictionary["w_svd"] = [x * 0.05 for x in range(0, 20)] #hyperparamethers_range_dictionary["w_rp3"] = [x * 0.05 for x in range(0, 20)] item = ItemKNNCFRecommender(URM_train) user = UserKNNCFRecommender(URM_train) SLIM = MultiThreadSLIM_ElasticNet(URM_train=URM_train) item.fit(topK=800, shrink=10, similarity='cosine', normalize=True) user.fit(topK=70, shrink=22, similarity='cosine', normalize=True) SLIM.fit(l1_penalty=1e-05, l2_penalty=0, positive_only=True, topK=150, alpha=0.00415637376180466) CBArt = ItemKNNCBFRecommender(ICM=ICM_1, URM_train=URM_train) CBArt.fit(topK=160, shrink=5, similarity='cosine', normalize=True, feature_weighting="none") CBAlb = ItemKNNCBFRecommender(ICM=ICM_2, URM_train=URM_train) CBAlb.fit(topK=160, shrink=5, similarity='cosine', normalize=True, feature_weighting="none") recommenderDictionary = { DictionaryKeys.CONSTRUCTOR_POSITIONAL_ARGS: [URM_train], DictionaryKeys.CONSTRUCTOR_KEYWORD_ARGS: {}, DictionaryKeys.FIT_POSITIONAL_ARGS: dict(), DictionaryKeys.FIT_KEYWORD_ARGS: { "ICM_Art": ICM_1, "ICM_Alb": ICM_2, "item": item, "user": user, "SLIM": SLIM, "CBArt": CBArt, "CBAlb": CBAlb, }, DictionaryKeys.FIT_RANGE_KEYWORD_ARGS: hyperparamethers_range_dictionary } ########################################################################################################## if recommender_class is RP3betaRecommender: hyperparamethers_range_dictionary = {} hyperparamethers_range_dictionary["topK"] = [ 5, 10, 20, 50, 100, 150, 200, 300, 400, 500, 600, 700, 800 ] hyperparamethers_range_dictionary["alpha"] = range(0, 2) hyperparamethers_range_dictionary["beta"] = range(0, 2) hyperparamethers_range_dictionary["normalize_similarity"] = [True] hyperparamethers_range_dictionary["implicit"] = [True] recommenderDictionary = { DictionaryKeys.CONSTRUCTOR_POSITIONAL_ARGS: [URM_train], DictionaryKeys.CONSTRUCTOR_KEYWORD_ARGS: {}, DictionaryKeys.FIT_POSITIONAL_ARGS: dict(), DictionaryKeys.FIT_KEYWORD_ARGS: dict(), DictionaryKeys.FIT_RANGE_KEYWORD_ARGS: hyperparamethers_range_dictionary } ########################################################################################################## if recommender_class is MatrixFactorization_FunkSVD_Cython: hyperparamethers_range_dictionary = {} hyperparamethers_range_dictionary["sgd_mode"] = ["adagrad", "adam"] # hyperparamethers_range_dictionary["epochs"] = [1, 5, 10, 20, 30, 50, 70, 90, 110] hyperparamethers_range_dictionary["num_factors"] = range( 100, 1000, 20) hyperparamethers_range_dictionary["reg"] = [0.0, 1e-3, 1e-6, 1e-9] hyperparamethers_range_dictionary["learning_rate"] = [ 1e-2, 1e-3, 1e-4, 1e-5 ] recommenderDictionary = { DictionaryKeys.CONSTRUCTOR_POSITIONAL_ARGS: [URM_train], DictionaryKeys.CONSTRUCTOR_KEYWORD_ARGS: {}, DictionaryKeys.FIT_POSITIONAL_ARGS: dict(), DictionaryKeys.FIT_KEYWORD_ARGS: { "validation_every_n": 5, "stop_on_validation": True, "evaluator_object": evaluator_validation_earlystopping, "lower_validatons_allowed": 20, "validation_metric": metric_to_optimize }, DictionaryKeys.FIT_RANGE_KEYWORD_ARGS: hyperparamethers_range_dictionary } ########################################################################################################## if recommender_class is FunkSVD: hyperparamethers_range_dictionary = {} # hyperparamethers_range_dictionary["epochs"] = [1, 5, 10, 20, 30, 50, 70, 90, 110] hyperparamethers_range_dictionary["num_factors"] = range( 100, 1000, 20) hyperparamethers_range_dictionary["reg"] = [ 0.0, 1e-03, 1e-06, 1e-09 ] hyperparamethers_range_dictionary["learning_rate"] = [1e-02, 1e-03] recommenderDictionary = { DictionaryKeys.CONSTRUCTOR_POSITIONAL_ARGS: [URM_train], DictionaryKeys.CONSTRUCTOR_KEYWORD_ARGS: {}, DictionaryKeys.FIT_POSITIONAL_ARGS: dict(), DictionaryKeys.FIT_KEYWORD_ARGS: dict(), DictionaryKeys.FIT_RANGE_KEYWORD_ARGS: hyperparamethers_range_dictionary } ########################################################################################################## if recommender_class is MatrixFactorization_AsySVD_Cython: hyperparamethers_range_dictionary = {} hyperparamethers_range_dictionary["sgd_mode"] = ["adagrad", "adam"] # hyperparamethers_range_dictionary["epochs"] = [1, 5, 10, 20, 30, 50, 70, 90, 110] hyperparamethers_range_dictionary["num_factors"] = range( 100, 500, 10) hyperparamethers_range_dictionary["batch_size"] = [ 100, 200, 300, 400 ] hyperparamethers_range_dictionary["positive_reg"] = [ 0.0, 1e-3, 1e-6, 1e-9 ] hyperparamethers_range_dictionary["negative_reg"] = [ 0.0, 1e-3, 1e-6, 1e-9 ] hyperparamethers_range_dictionary["learning_rate"] = [ 1e-2, 1e-3, 1e-4, 1e-5 ] hyperparamethers_range_dictionary["user_reg"] = [ 1e-3, 1e-4, 1e-5, 1e-6 ] recommenderDictionary = { DictionaryKeys.CONSTRUCTOR_POSITIONAL_ARGS: [URM_train], DictionaryKeys.CONSTRUCTOR_KEYWORD_ARGS: { 'positive_threshold': 1 }, DictionaryKeys.FIT_POSITIONAL_ARGS: dict(), DictionaryKeys.FIT_KEYWORD_ARGS: { "validation_every_n": 5, "stop_on_validation": True, "evaluator_object": evaluator_validation_earlystopping, "lower_validatons_allowed": 20, "validation_metric": metric_to_optimize }, DictionaryKeys.FIT_RANGE_KEYWORD_ARGS: hyperparamethers_range_dictionary } ########################################################################################################## if recommender_class is PureSVDRecommender: hyperparamethers_range_dictionary = {} hyperparamethers_range_dictionary["num_factors"] = list( range(0, 250, 5)) recommenderDictionary = { DictionaryKeys.CONSTRUCTOR_POSITIONAL_ARGS: [URM_train], DictionaryKeys.CONSTRUCTOR_KEYWORD_ARGS: {}, DictionaryKeys.FIT_POSITIONAL_ARGS: dict(), DictionaryKeys.FIT_KEYWORD_ARGS: {}, DictionaryKeys.FIT_RANGE_KEYWORD_ARGS: hyperparamethers_range_dictionary } ######################################################################################################### if recommender_class is SLIM_BPR_Cython: hyperparamethers_range_dictionary = {} hyperparamethers_range_dictionary["topK"] = [800, 900, 1000, 1200] # hyperparamethers_range_dictionary["epochs"] = [1, 5, 10, 20, 30, 50, 70, 90, 110] hyperparamethers_range_dictionary["sgd_mode"] = ["adagrad"] hyperparamethers_range_dictionary["lambda_i"] = [1e-6] hyperparamethers_range_dictionary["lambda_j"] = [1e-9] hyperparamethers_range_dictionary["learning_rate"] = [ 0.01, 0.001, 1e-4, 1e-5, 0.1 ] recommenderDictionary = { DictionaryKeys.CONSTRUCTOR_POSITIONAL_ARGS: [URM_train], DictionaryKeys.CONSTRUCTOR_KEYWORD_ARGS: { 'train_with_sparse_weights': True, 'symmetric': True, 'positive_threshold': 1 }, DictionaryKeys.FIT_POSITIONAL_ARGS: dict(), DictionaryKeys.FIT_KEYWORD_ARGS: { "validation_every_n": 10, "stop_on_validation": True, "evaluator_object": evaluator_validation_earlystopping, "lower_validatons_allowed": 3, "validation_metric": metric_to_optimize }, DictionaryKeys.FIT_RANGE_KEYWORD_ARGS: hyperparamethers_range_dictionary } ########################################################################################################## if recommender_class is MultiThreadSLIM_ElasticNet: hyperparamethers_range_dictionary = {} hyperparamethers_range_dictionary["topK"] = [ 3300, 4300, 5300, 6300, 7300 ] hyperparamethers_range_dictionary["l1_penalty"] = [ 1e-5, 1e-6, 1e-4, 1e-3 ] hyperparamethers_range_dictionary["l2_penalty"] = [1e-4] hyperparamethers_range_dictionary["alpha"] = range(0, 1) recommenderDictionary = { DictionaryKeys.CONSTRUCTOR_POSITIONAL_ARGS: [URM_train], DictionaryKeys.CONSTRUCTOR_KEYWORD_ARGS: {}, DictionaryKeys.FIT_POSITIONAL_ARGS: dict(), DictionaryKeys.FIT_KEYWORD_ARGS: dict(), DictionaryKeys.FIT_RANGE_KEYWORD_ARGS: hyperparamethers_range_dictionary } ######################################################################################################### ## Final step, after the hyperparameter range has been defined for each type of algorithm best_parameters = parameterSearch.search( recommenderDictionary, n_cases=n_cases, output_root_path=output_root_path_rec_name, metric=metric_to_optimize) except Exception as e: print("On recommender {} Exception {}".format(recommender_class, str(e))) traceback.print_exc() error_file = open(output_root_path + "ErrorLog.txt", "a") error_file.write("On recommender {} Exception {}\n".format( recommender_class, str(e))) error_file.close()
base_recommenders = [] tested_recommenders = [] datas = [None] * anti_overfitting tested_users = [] rec_model = [] for j in range(anti_overfitting): data_reader = DataReader() datas[j] = DataObject(data_reader, 1, random_seed=(50 + j * 10)) # TODO: Edit here # TODO: If you edit here, remember to edit at the end too # Insert the mix of recommender systems rec1 = UserKNNCFRecommender(datas[j].urm_train) rec1.fit(shrink=1000, topK=1000, similarity="cosine", feature_weighting="TF-IDF") rec2 = RP3betaRecommender(datas[j].urm_train) rec2.fit(topK=5000, alpha=0.35, beta=0.025, implicit=True) rec3 = ItemKNNCFRecommender(datas[j].urm_train) rec3.fit(topK=200, shrink=1000, similarity="jaccard", feature_weighting="TF-IDF") rec4 = UserKNNCBFRecommender(datas[j].ucm_all, datas[j].urm_train) rec4.fit(topK=5000, shrink=5, feature_weighting="TF-IDF", similarity="euclidean") rec5 = ItemKNNCBFRecommender(datas[j].urm_train, datas[j].icm_all_augmented)
def read_data_split_and_search(): """ This function provides a simple example on how to tune parameters of a given algorithm The BayesianSearch object will save: - A .txt file with all the cases explored and the recommendation quality - A _best_model file which contains the trained model and can be loaded with recommender.load_model() - A _best_parameter file which contains a dictionary with all the fit parameters, it can be passed to recommender.fit(**_best_parameter) - A _best_result_validation file which contains a dictionary with the results of the best solution on the validation - A _best_result_test file which contains a dictionary with the results, on the test set, of the best solution chosen using the validation set """ seed = 1205 parser = DataParser() URM_all = parser.get_URM_all() ICM_obj = parser.get_ICM_all() # SPLIT TO GET TEST PARTITION URM_train, URM_test = split_train_in_two_percentage_global_sample(URM_all, train_percentage=0.90, seed=seed) # SPLIT TO GET THE HYBRID VALID PARTITION URM_train, URM_valid_hybrid = split_train_in_two_percentage_global_sample(URM_train, train_percentage=0.85, seed=seed) collaborative_algorithm_list = [ # EASE_R_Recommender # PipeHybrid001, # Random, # TopPop, # P3alphaRecommender, # RP3betaRecommender, # ItemKNNCFRecommender, # UserKNNCFRecommender, # MatrixFactorization_BPR_Cython, # MatrixFactorization_FunkSVD_Cython, # PureSVDRecommender, # NMFRecommender, # PureSVDItemRecommender # SLIM_BPR_Cython, # SLIMElasticNetRecommender # IALSRecommender # MF_MSE_PyTorch # MergedHybrid000 # LinearHybrid002ggg HybridCombinationSearch ] content_algorithm_list = [ # ItemKNNCBFRecommender ] from Base.Evaluation.Evaluator import EvaluatorHoldout evaluator_valid_hybrid = EvaluatorHoldout(URM_valid_hybrid, cutoff_list=[10]) evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[10]) """ earlystopping_keywargs = {"validation_every_n": 5, "stop_on_validation": True, "evaluator_object": evaluator_valid_hybrid, "lower_validations_allowed": 5, "validation_metric": 'MAP', } print('IALS training...') ials = IALSRecommender(URM_train, verbose=False) ials_params = {'num_factors': 83, 'confidence_scaling': 'linear', 'alpha': 28.4278070726612, 'epsilon': 1.0234211788885077, 'reg': 0.0027328110246575004, 'epochs': 20} ials.fit(**ials_params, **earlystopping_keywargs) print("Done") print("PureSVD training...") psvd = PureSVDRecommender(URM_train, verbose=False) psvd_params = {'num_factors': 711} psvd.fit(**psvd_params) print("Done") """ rp3b = RP3betaRecommender(URM_train, verbose=False) rp3b_params = {'topK': 1000, 'alpha': 0.38192761611274967, 'beta': 0.0, 'normalize_similarity': False} try: rp3b.load_model(f'stored_recommenders/seed_{str(seed)}_hybrid_search/', f'{rp3b.RECOMMENDER_NAME}_for_second_search') print(f"{rp3b.RECOMMENDER_NAME} loaded.") except: print(f"Fitting {rp3b.RECOMMENDER_NAME} ...") rp3b.fit(**rp3b_params) print(f"done.") rp3b.save_model(f'stored_recommenders/seed_{str(seed)}_hybrid_search/', f'{rp3b.RECOMMENDER_NAME}_for_second_search') p3a = P3alphaRecommender(URM_train, verbose=False) p3a_params = {'topK': 131, 'alpha': 0.33660811631883863, 'normalize_similarity': False} try: p3a.load_model(f'stored_recommenders/seed_{str(seed)}_hybrid_search/', f'{p3a.RECOMMENDER_NAME}_for_second_search') print(f"{p3a.RECOMMENDER_NAME} loaded.") except: print(f"Fitting {p3a.RECOMMENDER_NAME} ...") p3a.fit(**p3a_params) print(f"done.") p3a.save_model(f'stored_recommenders/seed_{str(seed)}_hybrid_search/', f'{p3a.RECOMMENDER_NAME}_for_second_search') icf = ItemKNNCFRecommender(URM_train, verbose=False) icf_params = {'topK': 55, 'shrink': 1000, 'similarity': 'asymmetric', 'normalize': True, 'asymmetric_alpha': 0.0} try: icf.load_model(f'stored_recommenders/seed_{str(seed)}_hybrid_search/', f'{icf.RECOMMENDER_NAME}_for_second_search') print(f"{icf.RECOMMENDER_NAME} loaded.") except: print(f"Fitting {icf.RECOMMENDER_NAME} ...") icf.fit(**icf_params) print(f"done.") icf.save_model(f'stored_recommenders/seed_{str(seed)}_hybrid_search/', f'{icf.RECOMMENDER_NAME}_for_second_search') ucf = UserKNNCFRecommender(URM_train, verbose=False) ucf_params = {'topK': 190, 'shrink': 0, 'similarity': 'cosine', 'normalize': True} try: ucf.load_model(f'stored_recommenders/seed_{str(seed)}_hybrid_search/', f'{ucf.RECOMMENDER_NAME}_for_second_search') print(f"{ucf.RECOMMENDER_NAME} loaded.") except: print(f"Fitting {ucf.RECOMMENDER_NAME} ...") ucf.fit(**ucf_params) print(f"done.") ucf.save_model(f'stored_recommenders/seed_{str(seed)}_hybrid_search/', f'{ucf.RECOMMENDER_NAME}_for_second_search') icb = ItemKNNCBFRecommender(URM_train, ICM_obj, verbose=False) icb_params = {'topK': 65, 'shrink': 0, 'similarity': 'dice', 'normalize': True} try: icb.load_model(f'stored_recommenders/seed_{str(seed)}_hybrid_search/', f'{icb.RECOMMENDER_NAME}_for_second_search') print(f"{icb.RECOMMENDER_NAME} loaded.") except: print(f"Fitting {icf.RECOMMENDER_NAME} ...") icb.fit(**icb_params) print(f"done.") icb.save_model(f'stored_recommenders/seed_{str(seed)}_hybrid_search/', f'{icb.RECOMMENDER_NAME}_for_second_search') sen = SLIMElasticNetRecommender(URM_train, verbose=False) sen_params = {'topK': 992, 'l1_ratio': 0.004065081925341167, 'alpha': 0.003725005053334143} try: sen.load_model(f'stored_recommenders/seed_{str(seed)}_hybrid_search/', f'{sen.RECOMMENDER_NAME}_for_second_search') print(f"{sen.RECOMMENDER_NAME} loaded.") except: print(f"Fitting {sen.RECOMMENDER_NAME} ...") sen.fit(**sen_params) print(f"done.") sen.save_model(f'stored_recommenders/seed_{str(seed)}_hybrid_search/', f'{sen.RECOMMENDER_NAME}_for_second_search') print("\nStart.") list_recommender = [icb, icf, ucf, p3a, rp3b, sen] list_already_seen = [] combinations_already_seen = [] """ (icb, icf, p3a), (icb, icf, rp3b), (icb, icf, sen), (icb, p3a, rp3b), (icb, p3a, sen), (icb, rp3b, sen), (icf, p3a, rp3b), (icf, p3a, sen) """ for rec_perm in combinations(list_recommender, 3): if rec_perm not in combinations_already_seen: recommender_names = '_'.join([r.RECOMMENDER_NAME for r in rec_perm]) output_folder_path = "result_experiments_v3/seed_" + str( seed) + '/linear_combination/' + recommender_names + '/' print(F"\nTESTING THE COMBO {recommender_names}") # If directory does not exist, create if not os.path.exists(output_folder_path): os.makedirs(output_folder_path) # TODO: setta I GIUSTI EVALUATOR QUI!!!! runParameterSearch_Collaborative_partial = partial(runParameterSearch_Collaborative, URM_train=URM_train, ICM_train=ICM_obj, metric_to_optimize="MAP", n_cases=50, n_random_starts=20, evaluator_validation_earlystopping=evaluator_valid_hybrid, evaluator_validation=evaluator_valid_hybrid, #evaluator_test=evaluator_test, output_folder_path=output_folder_path, allow_weighting=False, # similarity_type_list = ["cosine", 'jaccard'], parallelizeKNN=False, list_rec=rec_perm) pool = multiprocessing.Pool(processes=int(multiprocessing.cpu_count()), maxtasksperchild=1) pool.map(runParameterSearch_Collaborative_partial, collaborative_algorithm_list)
class HybridLinear10Recommneder(BaseItemSimilarityMatrixRecommender): RECOMMENDER_NAME = "HybridLinear10Recommender" def __init__(self, URM_train, seed: int): super(HybridLinear10Recommneder, self).__init__(URM_train) self.slimBPR = SLIM_BPR_Cython(URM_train) self.userKnnCF = UserKNNCFRecommender(URM_train) #self.itemcf = ItemKNNCFRecommender(urm) def fit(self, alpha=1): self.slimBPR.fit(epochs=135, topK=933, symmetric=False, sgd_mode='adagrad', lambda_i=1.054e-05, lambda_j=1.044e-05, learning_rate=0.00029) self.userKnnCF.fit(topK=201, shrink=998, similarity='cosine', normalize=True, feature_weighting='TF-IDF') self.alpha = alpha self.beta = 1 - alpha #self.gamma = alpha_gamma_ratio def _compute_item_score(self, user_id_array, items_to_compute=None): # ATTENTION! # THIS METHOD WORKS ONLY IF user_id_array IS A SCALAR AND NOT AN ARRAY # TODO scores_slimBPR = self.slimBPR._compute_item_score( user_id_array=user_id_array) scores_userKnnCF = self.userKnnCF._compute_item_score( user_id_array=user_id_array) # normalization #slim_max = scores_slim.max() #rp3_max = scores_rp3.max() #itemcf_max = scores_itemcf.max() #if not slim_max == 0: # scores_slim /= slim_max #if not rp3_max == 0: # scores_rp3 /= rp3_max #if not itemcf_max == 0: # scores_itemcf /= itemcf_max scores_total = self.alpha * scores_slimBPR + self.beta * scores_userKnnCF #+ self.gamma * scores_itemcf return scores_total def save_model(self, folder_path, file_name=None): if file_name is None: file_name = self.RECOMMENDER_NAME self._print("Saving model in file '{}'".format(folder_path + file_name)) dataIO = DataIO(folder_path=folder_path) dataIO.save_data(file_name=file_name, data_dict_to_save={}) self._print("Saving complete")
URM = data.get_URM() ICM = data.get_ICM() URM_train = data.get_URM_train() URM_test = data.get_URM_test() print("URM_train shape : {}".format(URM_train.shape)) print("URM_test shape : {}".format(URM_test.shape)) print("ICM shape : {}".format(ICM.shape)) evaluator_test = SequentialEvaluator(URM_test, cutoff_list=[10]) evaluator_test = EvaluatorWrapper(evaluator_test) UserBased = UserKNNCFRecommender(URM_train) UserBased.fit(topK=500, shrink=10) ContentBased = ItemKNNCBFRecommender(ICM, URM_train) ContentBased.fit(topK=5, shrink=1000) ItemKNNCF = ItemKNNCFRecommender(URM_train) ItemKNNCF.fit(topK=400, shrink=50) hybridRecommender_scores = ItemKNNScoresHybridRecommender_multiple( URM_train, ItemKNNCF, ContentBased, UserBased) hybridRecommender_scores.fit(weight_1=HYBRID_ICF_CB_UCF_WEIGHTS[0], weight_2=HYBRID_ICF_CB_UCF_WEIGHTS[1], weight_3=HYBRID_ICF_CB_UCF_WEIGHTS[2]) dict, _ = evaluator_test.evaluateRecommender(hybridRecommender_scores)
def __init__(self, urm_train, eurm=False): super(HybridNormRecommender, self).__init__(urm_train) self.data_folder = Path(__file__).parent.parent.absolute() self.eurm = eurm self.num_users = urm_train.shape[0] urm_train = check_matrix(urm_train.copy(), 'csr') recommender_1 = HybridGen2Recommender(urm_train, eurm=self.eurm) recommender_1.fit() # recommender_2 = ItemKNNCFRecommender(urm_train) # recommender_2.fit(shrink=30, topK=20) recommender_2 = ItemKNNCFRecommender(urm_train) recommender_2.fit(topK=5, shrink=500, feature_weighting='BM25', similarity='tversky', normalize=False, tversky_alpha=0.0, tversky_beta=1.0) recommender_3 = UserKNNCFRecommender(urm_train) recommender_3.fit(shrink=2, topK=600, normalize=True) # recommender_3 = UserKNNCFRecommender(urm_train) # recommender_3.fit(topK=697, shrink=1000, feature_weighting='TF-IDF', similarity='tversky', normalize=False, # tversky_alpha=1.0, tversky_beta=1.0) recommender_4 = RP3betaRecommender(urm_train) recommender_4.fit(topK=16, alpha=0.03374950051351756, beta=0.24087176329409027, normalize_similarity=True) recommender_5 = SLIM_BPR_Cython(urm_train) recommender_5.fit(lambda_i=0.0926694015, lambda_j=0.001697250, learning_rate=0.002391, epochs=65, topK=200) recommender_6 = ALSRecommender(urm_train) recommender_6.fit(alpha=5, iterations=40, reg=0.3) self.recommender_1 = recommender_1 self.recommender_2 = recommender_2 self.recommender_3 = recommender_3 self.recommender_4 = recommender_4 self.recommender_5 = recommender_5 self.recommender_6 = recommender_6 if self.eurm: if Path(self.data_folder / 'Data/uicm_sparse.npz').is_file(): print("Previous uicm_sparse found") self.score_matrix_1 = sps.load_npz(self.data_folder / 'Data/uicm_sparse.npz') else: print("uicm_sparse not found, create new one...") self.score_matrix_1 = self.recommender_1._compute_item_matrix_score( np.arange(self.num_users)) sps.save_npz(self.data_folder / 'Data/uicm_sparse.npz', self.score_matrix_1) self.score_matrix_2 = self.recommender_2._compute_item_matrix_score( np.arange(self.num_users)) self.score_matrix_3 = self.recommender_3._compute_item_matrix_score( np.arange(self.num_users)) self.score_matrix_4 = self.recommender_4._compute_item_matrix_score( np.arange(self.num_users)) self.score_matrix_5 = self.recommender_5._compute_item_matrix_score( np.arange(self.num_users)) self.score_matrix_6 = self.recommender_6._compute_item_score( np.arange(self.num_users)) self.score_matrix_2 = normalize(self.score_matrix_2, norm='l2', axis=1) self.score_matrix_3 = normalize(self.score_matrix_3, norm='l2', axis=1) self.score_matrix_4 = normalize(self.score_matrix_4, norm='l2', axis=1) self.score_matrix_5 = normalize(self.score_matrix_5, norm='l2', axis=1) self.score_matrix_6 = normalize(self.score_matrix_6, norm='l2', axis=1)
class LinearHybrid002(BaseItemSimilarityMatrixRecommender): RECOMMENDER_NAME = "LinearHybrid002" # set the seed equal to the one of the parameter search!!!! def __init__(self, URM_train, ICM_train, submission=False, verbose=True, seed=1205): super(LinearHybrid002, self).__init__(URM_train, verbose=verbose) self.URM_train = URM_train self.ICM_train = ICM_train # seed 1205: 'topK': 190, 'shrink': 0, 'similarity': 'cosine', 'normalize': True self.__rec1 = UserKNNCFRecommender(URM_train, verbose=False) self.__rec1_params = { 'topK': 190, 'shrink': 0, 'similarity': 'cosine', 'normalize': True } # seed 1205: 'topK': 100, 'shrink': 1000, 'similarity': 'asymmetric', 'normalize': True, 'asymmetric_alpha': 0.0 self.__rec2 = ItemKNNCFRecommender(URM_train, verbose=False) self.__rec2_params = { 'topK': 100, 'shrink': 1000, 'similarity': 'asymmetric', 'normalize': True, 'asymmetric_alpha': 0.0 } # seed 1205: 'topK': 205, 'shrink': 1000, 'similarity': 'cosine', 'normalize': True, 'feature_weighting': 'BM25' self.__rec3 = ItemKNNCBFRecommender(URM_train, ICM_train, verbose=False) self.__rec3_params = { 'topK': 205, 'shrink': 1000, 'similarity': 'cosine', 'normalize': True, 'feature_weighting': 'BM25' } self.__a = self.__b = self.__c = None self.seed = seed self.__submission = submission def fit(self, alpha=0.5, l1_ratio=0.5): self.__a = alpha * l1_ratio self.__b = alpha - self.__a self.__c = 1 - self.__a - self.__b if not self.__submission: try: self.__rec1.load_model( f'stored_recommenders/seed_{str(self.seed)}_{self.__rec1.RECOMMENDER_NAME}/', 'best_for_LinearHybrid002') print(f"{self.__rec1.RECOMMENDER_NAME} loaded.") except: print(f"Fitting {self.__rec1.RECOMMENDER_NAME} ...") self.__rec1.fit(**self.__rec1_params) print(f"done.") self.__rec1.save_model( f'stored_recommenders/seed_{str(self.seed)}_{self.__rec1.RECOMMENDER_NAME}/', 'best_for_LinearHybrid002') try: self.__rec2.load_model( f'stored_recommenders/seed_{str(self.seed)}_{self.__rec2.RECOMMENDER_NAME}/', 'best_for_LinearHybrid002') print(f"{self.__rec2.RECOMMENDER_NAME} loaded.") except: print(f"Fitting {self.__rec2.RECOMMENDER_NAME} ...") self.__rec2.fit(**self.__rec2_params) print(f"done.") self.__rec2.save_model( f'stored_recommenders/seed_{str(self.seed)}_{self.__rec2.RECOMMENDER_NAME}/', 'best_for_LinearHybrid002') try: self.__rec3.load_model( f'stored_recommenders/seed_{str(self.seed)}_{self.__rec3.RECOMMENDER_NAME}/', 'best_for_LinearHybrid002') print(f"{self.__rec3.RECOMMENDER_NAME} loaded.") except: print(f"Fitting {self.__rec3.RECOMMENDER_NAME} ...") self.__rec3.fit(**self.__rec3_params) print(f"done.") self.__rec3.save_model( f'stored_recommenders/seed_{str(self.seed)}_{self.__rec3.RECOMMENDER_NAME}/', 'best_for_LinearHybrid002') else: self.__rec1.fit(**self.__rec1_params) self.__rec2.fit(**self.__rec2_params) self.__rec3.fit(**self.__rec3_params) def _compute_item_score(self, user_id_array, items_to_compute=None): item_weights_1 = self.__rec1._compute_item_score(user_id_array) item_weights_2 = self.__rec2._compute_item_score(user_id_array) item_weights_3 = self.__rec3._compute_item_score(user_id_array) item_weights = item_weights_1 * self.__a + item_weights_2 * self.__b + item_weights_3 * self.__c return item_weights def save_model(self, folder_path, file_name=None): if file_name is None: file_name = self.RECOMMENDER_NAME self._print("Saving model in file '{}'".format(folder_path + file_name)) dataIO = DataIO(folder_path=folder_path) dataIO.save_data(file_name=file_name, data_dict_to_save={}) self._print("Saving complete")
for recommender_class in recommender_list: try: print("Algorithm: {}".format(recommender_class)) recommender = recommender_class(URM_train) item = ItemKNNCFRecommender(URM_train) user = UserKNNCFRecommender(URM_train) SLIM = MultiThreadSLIM_ElasticNet(URM_train=URM_train) item.fit(topK=800, shrink=10, similarity='cosine', normalize=True) user.fit(topK=70, shrink=22, similarity='cosine', normalize=True) SLIM.fit(l1_penalty=1e-05, l2_penalty=0, positive_only=True, topK=150, alpha=0.00415637376180466) recommender.fit(ICM_Art, ICM_Alb, item=item, user=user, SLIM=SLIM, w_itemcf=1.1, w_usercf=0.6, w_cbart=0.3,