def run_permutation_BPRMF(output_folder_path, permutation_index, USER_factors_perm, ITEM_factors_perm): output_folder_path_permutation = output_folder_path + "{}/{}_{}/".format("BPRMF", "BPRMF", permutation_index) # If directory does not exist, create if not os.path.exists(output_folder_path_permutation): os.makedirs(output_folder_path_permutation) assert USER_factors_perm.shape == (n_users, n_factors) assert ITEM_factors_perm.shape == (n_items, n_factors) parameterSearch = SearchSingleCase(MatrixFactorizationCustomFactorsRecommender, evaluator_validation = None, evaluator_test = evaluator_test) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS = [URM_train + URM_validation], FIT_KEYWORD_ARGS = { "USER_factors": USER_factors, "ITEM_factors": ITEM_factors }) parameterSearch.search(recommender_input_args, save_model = "no", resume_from_saved=True, fit_hyperparameters_values = {}, output_folder_path = output_folder_path_permutation, output_file_name_root = MatrixFactorizationCustomFactorsRecommender.RECOMMENDER_NAME)
def run_train_with_early_stopping(dataset_name, URM_train, URM_validation, UCM_CoupledCF, ICM_CoupledCF, evaluator_validation, evaluator_test, metric_to_optimize, result_folder_path, map_mode): if not os.path.exists(result_folder_path): os.makedirs(result_folder_path) article_hyperparameters = get_hyperparameters_for_dataset(dataset_name) article_hyperparameters["map_mode"] = map_mode earlystopping_hyperparameters = { "validation_every_n": 5, "stop_on_validation": True, "lower_validations_allowed": 5, "evaluator_object": evaluator_validation, "validation_metric": metric_to_optimize } parameterSearch = SearchSingleCase( CoupledCF_RecommenderWrapper, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train, UCM_CoupledCF, ICM_CoupledCF], FIT_KEYWORD_ARGS=earlystopping_hyperparameters) recommender_input_args_last_test = recommender_input_args.copy() recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[ 0] = URM_train + URM_validation parameterSearch.search( recommender_input_args, recommender_input_args_last_test=recommender_input_args_last_test, fit_hyperparameters_values=article_hyperparameters, output_folder_path=result_folder_path, output_file_name_root=CoupledCF_RecommenderWrapper.RECOMMENDER_NAME, save_model="last", resume_from_saved=True, evaluate_on_test="last") dataIO = DataIO(result_folder_path) search_metadata = dataIO.load_data( file_name=CoupledCF_RecommenderWrapper.RECOMMENDER_NAME + "_metadata.zip") return search_metadata
def pretrain_MFBPR( URM_train, URM_train_full, evaluator_validation, evaluator_test, result_folder_path, metric_to_optimize, ): article_hyperparameters = { "batch_size": 512, "epochs": 500, "embed_size": 64, "negative_sample_per_positive": 1, "learning_rate": 0.05, "path_partial_results": result_folder_path, } earlystopping_keywargs = { "validation_every_n": 5, "stop_on_validation": True, "lower_validations_allowed": 5, "evaluator_object": evaluator_validation, "validation_metric": metric_to_optimize } parameterSearch = SearchSingleCase( MFBPR_Wrapper, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], FIT_KEYWORD_ARGS=earlystopping_keywargs) recommender_input_args_last_test = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train_full]) parameterSearch.search( recommender_input_args, recommender_input_args_last_test=recommender_input_args_last_test, fit_hyperparameters_values=article_hyperparameters, output_folder_path=result_folder_path, output_file_name_root=MFBPR_Wrapper.RECOMMENDER_NAME, save_model="last", resume_from_saved=True, evaluate_on_test="last")
def pretrain_FMwrapper(URM_train_tuning_only, URM_train_full, evaluator_validation, evaluator_test, CFM_data_class_validation, CFM_data_class_full, result_folder_path:str, # hidden_factors:int, metric_to_optimize:str, dataset_name): # search best epoch article_hyperparameters = get_FM_hyperparameters_for_dataset(dataset_name) earlystopping_keywargs = { "validation_every_n": 5, "stop_on_validation": True, "lower_validations_allowed": 5, "evaluator_object": evaluator_validation, "validation_metric": metric_to_optimize } parameterSearch = SearchSingleCase(FM_Wrapper, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) recommender_input_args = SearchInputRecommenderArgs(CONSTRUCTOR_POSITIONAL_ARGS=[URM_train_tuning_only, CFM_data_class_validation], FIT_KEYWORD_ARGS=earlystopping_keywargs) recommender_input_args_last_test = SearchInputRecommenderArgs(CONSTRUCTOR_POSITIONAL_ARGS=[URM_train_full, CFM_data_class_full]) parameterSearch.search(recommender_input_args, recommender_input_args_last_test=recommender_input_args_last_test, fit_hyperparameters_values=article_hyperparameters, output_folder_path=result_folder_path, output_file_name_root=FM_Wrapper.RECOMMENDER_NAME, save_model = "last", resume_from_saved=True, evaluate_on_test = "last")
def run_train_with_early_stopping(output_folder_path, permutation_index, USER_factors_perm, ITEM_factors_perm, map_mode, metric_to_optimize, evaluator_validation, evaluator_test, URM_train, URM_validation): output_folder_path_permutation = output_folder_path + "fit_ablation_{}/{}_{}/".format(map_mode, map_mode, permutation_index) # If directory does not exist, create if not os.path.exists(output_folder_path_permutation): os.makedirs(output_folder_path_permutation) assert USER_factors_perm.shape == (n_users, n_factors) assert ITEM_factors_perm.shape == (n_items, n_factors) np.save(output_folder_path_permutation + "best_model_latent_factors", [USER_factors_perm, ITEM_factors_perm]) optimal_hyperparameters = { "batch_size": 512, "epochs": 1500, "load_pretrained_MFBPR_if_available": True, "MF_latent_factors_folder": output_folder_path_permutation, "embedding_size": 64, "hidden_size": 128, "negative_sample_per_positive": 1, "negative_instances_per_positive": 4, "regularization_users_items": 0.01, "regularization_weights": 10, "regularization_filter_weights": 1, "learning_rate_embeddings": 0.05, "learning_rate_CNN": 0.05, "channel_size": [32, 32, 32, 32, 32, 32], "dropout": 0.0, "epoch_verbose": 1, "temp_file_folder": None, } optimal_hyperparameters["map_mode"] = map_mode earlystopping_hyperparameters = { "validation_every_n": 5, "stop_on_validation": True, "lower_validations_allowed": 5, "evaluator_object": evaluator_validation, "validation_metric": metric_to_optimize } parameterSearch = SearchSingleCase(ConvNCF_RecommenderWrapper, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) recommender_input_args = SearchInputRecommenderArgs(CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], FIT_KEYWORD_ARGS=earlystopping_hyperparameters) recommender_input_args_last_test = recommender_input_args.copy() recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[0] = URM_train + URM_validation parameterSearch.search(recommender_input_args, recommender_input_args_last_test=recommender_input_args_last_test, fit_hyperparameters_values=optimal_hyperparameters, output_folder_path=output_folder_path_permutation, output_file_name_root=ConvNCF_RecommenderWrapper.RECOMMENDER_NAME, save_model = "last", resume_from_saved=True, evaluate_on_test = "last")
def run_train_with_early_stopping(URM_train_tuning_only, URM_train_full, evaluator_validation, evaluator_test, CFM_data_class_validation, CFM_data_class_full, pretrained_FM_folder_path, output_folder_path, permutation_index, map_mode, metric_to_optimize): output_folder_path_permutation = output_folder_path + "fit_ablation_{}/{}_{}/".format(map_mode, map_mode, permutation_index) # If directory does not exist, create if not os.path.exists(output_folder_path_permutation): os.makedirs(output_folder_path_permutation) if os.path.isfile(output_folder_path_permutation + CFM_wrapper.RECOMMENDER_NAME + "_metadata.zip"): return article_hyperparameters = { 'pretrain_flag': 1, 'pretrained_FM_folder_path': pretrained_FM_folder_path, 'hidden_factor': 64, 'epochs': 300, 'batch_size': 256, 'learning_rate': 0.01, 'lamda_bilinear': 0, 'keep': 0.8, 'optimizer_type': 'AdagradOptimizer', 'batch_norm': 0, 'verbose': False, 'regs': '[10,1]', 'attention_size': 32, 'attentive_pooling': False, 'net_channel': '[32,32,32,32,32,32]', 'num_field': 4, 'permutation': list(permutation), 'map_mode': map_mode } earlystopping_hyperparameters = { "epochs_min": int(article_hyperparameters["epochs"]/2), "validation_every_n": 5, "stop_on_validation": True, "lower_validations_allowed": 5, "evaluator_object": evaluator_validation, "validation_metric": metric_to_optimize } # Due to the extremely long evaluation time it is computationally too expensive to run earlystopping on all # permutations (estimated >60 days on high end GPU) # So, select the epochs only at permutation 0 independently for each of the three modes: "all_map", "main_diagonal", "off_diagonal" # try to load selected number of epochs, if not present run earlystopping again folder_permutation_0 = output_folder_path + "fit_ablation_{}/{}_{}/".format(map_mode, map_mode, 0) if permutation_index == 0: parameterSearch = SearchSingleCase(CFM_wrapper, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) recommender_input_args = SearchInputRecommenderArgs(CONSTRUCTOR_POSITIONAL_ARGS=[URM_train_tuning_only, CFM_data_class_validation], FIT_KEYWORD_ARGS=earlystopping_hyperparameters) recommender_input_args_last_test = SearchInputRecommenderArgs(CONSTRUCTOR_POSITIONAL_ARGS=[URM_train_full, CFM_data_class_full]) parameterSearch.search(recommender_input_args, recommender_input_args_last_test=recommender_input_args_last_test, fit_hyperparameters_values=article_hyperparameters, output_folder_path=output_folder_path_permutation, output_file_name_root=CFM_wrapper.RECOMMENDER_NAME, save_model = "last", resume_from_saved=True, evaluate_on_test = "last") else: dataIO = DataIO(folder_path = folder_permutation_0) data_dict = dataIO.load_data(file_name = CFM_wrapper.RECOMMENDER_NAME + "_metadata.zip") selected_epochs = data_dict["hyperparameters_best"]["epochs"] article_hyperparameters["epochs"] = selected_epochs parameterSearch = SearchSingleCase(CFM_wrapper, evaluator_validation=evaluator_test, evaluator_test=evaluator_test) recommender_input_args_last_test = SearchInputRecommenderArgs(CONSTRUCTOR_POSITIONAL_ARGS=[URM_train_full, CFM_data_class_full]) parameterSearch.search(recommender_input_args_last_test, recommender_input_args_last_test=None, fit_hyperparameters_values=article_hyperparameters, output_folder_path=output_folder_path_permutation, output_file_name_root=CFM_wrapper.RECOMMENDER_NAME, save_model = "best", resume_from_saved=True, evaluate_on_test = "best") # Get the data in the correct format to be readable for the data parsing script # Put the results in the "result_on_last" field of the metadata file # Change the final model file name into the _best_model_last suffix metadata_file_name = CFM_wrapper.RECOMMENDER_NAME + "_metadata.zip" dataIO = DataIO(folder_path = output_folder_path_permutation) search_metadata = dataIO.load_data(file_name = metadata_file_name) search_metadata["result_on_last"] = search_metadata["result_on_test_best"] dataIO.save_data(file_name = metadata_file_name, data_dict_to_save = search_metadata) recommender_object = CFM_wrapper(URM_train_full, CFM_data_class_full) recommender_object.load_model(output_folder_path_permutation, file_name=CFM_wrapper.RECOMMENDER_NAME + "_best_model") recommender_object.save_model(output_folder_path_permutation, file_name=CFM_wrapper.RECOMMENDER_NAME + "_best_model_last")
def read_data_split_and_search(dataset_name, flag_baselines_tune = False, flag_DL_article_default = False, flag_MF_baselines_tune = False, flag_DL_tune = False, flag_print_results = False): from Conferences.WWW.MultiVAE_our_interface.Movielens20M.Movielens20MReader import Movielens20MReader from Conferences.WWW.MultiVAE_our_interface.NetflixPrize.NetflixPrizeReader import NetflixPrizeReader split_type = "cold_user" result_folder_path = "result_experiments/{}/{}_{}_{}/".format(CONFERENCE_NAME, ALGORITHM_NAME, dataset_name, split_type) if dataset_name == "movielens20m": dataset = Movielens20MReader(result_folder_path, split_type = split_type) elif dataset_name == "netflixPrize": dataset = NetflixPrizeReader(result_folder_path) # If directory does not exist, create if not os.path.exists(result_folder_path): os.makedirs(result_folder_path) metric_to_optimize = "NDCG" n_cases = 50 n_random_starts = 15 if split_type == "cold_user": collaborative_algorithm_list = [ Random, TopPop, # UserKNNCFRecommender, ItemKNNCFRecommender, P3alphaRecommender, RP3betaRecommender, # PureSVDRecommender, # IALSRecommender, # NMFRecommender, # MatrixFactorization_BPR_Cython, # MatrixFactorization_FunkSVD_Cython, EASE_R_Recommender, SLIM_BPR_Cython, SLIMElasticNetRecommender, ] URM_train = dataset.URM_DICT["URM_train"].copy() URM_train_all = dataset.URM_DICT["URM_train_all"].copy() URM_validation = dataset.URM_DICT["URM_validation"].copy() URM_test = dataset.URM_DICT["URM_test"].copy() # Ensure IMPLICIT data and DISJOINT sets assert_implicit_data([URM_train, URM_train_all, URM_validation, URM_test]) assert_disjoint_matrices([URM_train, URM_validation, URM_test]) assert_disjoint_matrices([URM_train_all, URM_validation, URM_test]) from Base.Evaluation.Evaluator import EvaluatorHoldout evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[100]) evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[20, 50, 100]) evaluator_validation = EvaluatorUserSubsetWrapper(evaluator_validation, URM_train_all) evaluator_test = EvaluatorUserSubsetWrapper(evaluator_test, URM_train_all) runParameterSearch_Collaborative_partial = partial(runParameterSearch_Collaborative, URM_train = URM_train, URM_train_last_test = URM_train + URM_validation, metric_to_optimize = metric_to_optimize, evaluator_validation_earlystopping = evaluator_validation, evaluator_validation = evaluator_validation, evaluator_test = evaluator_test, output_folder_path = result_folder_path, parallelizeKNN = False, allow_weighting = True, resume_from_saved = True, n_cases = n_cases, n_random_starts = n_random_starts) if flag_baselines_tune: for recommender_class in collaborative_algorithm_list: try: runParameterSearch_Collaborative_partial(recommender_class) except Exception as e: print("On recommender {} Exception {}".format(recommender_class, str(e))) traceback.print_exc() ################################################################################################ ###### Matrix Factorization Cold users collaborative_MF_algorithm_list = [ PureSVDRecommender, IALSRecommender, NMFRecommender, MatrixFactorization_BPR_Cython, MatrixFactorization_FunkSVD_Cython, ] runParameterSearch_cold_user_MF_partial = partial(runParameterSearch_cold_user_MF, URM_train = URM_train, URM_train_last_test = URM_train + URM_validation, metric_to_optimize = metric_to_optimize, evaluator_validation_earlystopping = evaluator_validation, evaluator_validation = evaluator_validation, evaluator_test = evaluator_test, output_folder_path = result_folder_path, resume_from_saved = True, n_cases = n_cases, n_random_starts = n_random_starts) if flag_MF_baselines_tune: for recommender_class in collaborative_MF_algorithm_list: try: runParameterSearch_cold_user_MF_partial(recommender_class) except Exception as e: print("On recommender {} Exception {}".format(recommender_class, str(e))) traceback.print_exc() ################################################################################################ ###### ###### DL ALGORITHM ###### if flag_DL_article_default: try: if dataset_name == "movielens20m": epochs = 100 elif dataset_name == "netflixPrize": epochs = 200 multiVAE_article_hyperparameters = { "epochs": epochs, "batch_size": 500, "total_anneal_steps": 200000, "p_dims": None, } multiVAE_earlystopping_hyperparameters = { "validation_every_n": 5, "stop_on_validation": True, "evaluator_object": evaluator_validation, "lower_validations_allowed": 5, "validation_metric": metric_to_optimize, } parameterSearch = SearchSingleCase(Mult_VAE_RecommenderWrapper, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS = [URM_train], FIT_KEYWORD_ARGS = multiVAE_earlystopping_hyperparameters) recommender_input_args_last_test = recommender_input_args.copy() recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[0] = URM_train + URM_validation parameterSearch.search(recommender_input_args, recommender_input_args_last_test = recommender_input_args_last_test, fit_hyperparameters_values=multiVAE_article_hyperparameters, output_folder_path = result_folder_path, resume_from_saved = True, output_file_name_root = Mult_VAE_RecommenderWrapper.RECOMMENDER_NAME) except Exception as e: print("On recommender {} Exception {}".format(Mult_VAE_RecommenderWrapper, str(e))) traceback.print_exc() ################################################################################################ ###### ###### PRINT RESULTS ###### if flag_print_results: n_test_users = np.sum(np.ediff1d(URM_test.indptr)>=1) file_name = "{}..//{}_{}_".format(result_folder_path, ALGORITHM_NAME, dataset_name) result_loader = ResultFolderLoader(result_folder_path, base_algorithm_list = None, other_algorithm_list = [Mult_VAE_RecommenderWrapper], KNN_similarity_list = KNN_similarity_to_report_list, ICM_names_list = None, UCM_names_list = None) result_loader.generate_latex_results(file_name + "{}_latex_results.txt".format("article_metrics"), metrics_list = ["RECALL", "NDCG"], cutoffs_list = [20, 50, 100], table_title = None, highlight_best = True) result_loader.generate_latex_results(file_name + "{}_latex_results.txt".format("all_metrics"), metrics_list = ["PRECISION", "RECALL", "MAP_MIN_DEN", "MRR", "NDCG", "F1", "HIT_RATE", "ARHR_ALL_HITS", "NOVELTY", "DIVERSITY_MEAN_INTER_LIST", "DIVERSITY_HERFINDAHL", "COVERAGE_ITEM", "DIVERSITY_GINI", "SHANNON_ENTROPY"], cutoffs_list = [50], table_title = None, highlight_best = True) result_loader.generate_latex_time_statistics(file_name + "{}_latex_results.txt".format("time"), n_evaluation_users=n_test_users, table_title = None)
def read_data_split_and_search(dataset_variant, train_interactions, flag_baselines_tune=False, flag_DL_article_default=False, flag_DL_tune=False, flag_print_results=False): # Using dataReader from CollaborativeVAE_our_interface as they use the same data in the same way from Conferences.KDD.CollaborativeVAE_our_interface.Citeulike.CiteulikeReader import CiteulikeReader result_folder_path = "result_experiments/{}/{}_citeulike_{}_{}/".format( CONFERENCE_NAME, ALGORITHM_NAME, dataset_variant, train_interactions) result_folder_path_CollaborativeVAE = "result_experiments/{}/{}_citeulike_{}_{}/".format( CONFERENCE_NAME, "CollaborativeVAE", dataset_variant, train_interactions) dataset = CiteulikeReader(result_folder_path_CollaborativeVAE, dataset_variant=dataset_variant, train_interactions=train_interactions) URM_train = dataset.URM_DICT["URM_train"].copy() URM_validation = dataset.URM_DICT["URM_validation"].copy() URM_test = dataset.URM_DICT["URM_test"].copy() # Ensure IMPLICIT data assert_implicit_data([URM_train, URM_validation, URM_test]) # Due to the sparsity of the dataset, choosing an evaluation as subset of the train # While keepning validation interaction in the train set if train_interactions == 1: # In this case the train data will contain validation data to avoid cold users assert_disjoint_matrices([URM_train, URM_test]) assert_disjoint_matrices([URM_validation, URM_test]) exclude_seen_validation = False URM_train_last_test = URM_train else: assert_disjoint_matrices([URM_train, URM_validation, URM_test]) exclude_seen_validation = True URM_train_last_test = URM_train + URM_validation assert_implicit_data([URM_train_last_test]) # If directory does not exist, create if not os.path.exists(result_folder_path): os.makedirs(result_folder_path) from Base.Evaluation.Evaluator import EvaluatorHoldout evaluator_validation = EvaluatorHoldout( URM_validation, cutoff_list=[150], exclude_seen=exclude_seen_validation) evaluator_test = EvaluatorHoldout( URM_test, cutoff_list=[50, 100, 150, 200, 250, 300]) ################################################################################################ ###### ###### DL ALGORITHM ###### if flag_DL_article_default: try: collaborativeDL_article_hyperparameters = { "para_lv": 10, "para_lu": 1, "para_ln": 1e3, "batch_size": 128, "epoch_sdae": 200, "epoch_dae": 200, } parameterSearch = SearchSingleCase( CollaborativeDL_Matlab_RecommenderWrapper, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[ URM_train, dataset.ICM_DICT["ICM_tokens_TFIDF"] ], FIT_KEYWORD_ARGS={}) recommender_input_args_last_test = recommender_input_args.copy() recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[ 0] = URM_train_last_test parameterSearch.search( recommender_input_args, recommender_input_args_last_test= recommender_input_args_last_test, fit_hyperparameters_values= collaborativeDL_article_hyperparameters, output_folder_path=result_folder_path, resume_from_saved=True, output_file_name_root=CollaborativeDL_Matlab_RecommenderWrapper .RECOMMENDER_NAME) except Exception as e: print("On recommender {} Exception {}".format( CollaborativeDL_Matlab_RecommenderWrapper, str(e))) traceback.print_exc() ################################################################################################ ###### ###### PRINT RESULTS ###### if flag_print_results: n_test_users = np.sum(np.ediff1d(URM_test.indptr) >= 1) ICM_names_to_report_list = list(dataset.ICM_DICT.keys()) dataset_name = "{}_{}".format(dataset_variant, train_interactions) file_name = "{}..//{}_{}_".format(result_folder_path, ALGORITHM_NAME, dataset_name) result_loader = ResultFolderLoader( result_folder_path, base_algorithm_list=None, other_algorithm_list=[CollaborativeDL_Matlab_RecommenderWrapper], KNN_similarity_list=KNN_similarity_to_report_list, ICM_names_list=ICM_names_to_report_list, UCM_names_list=None) result_loader.generate_latex_results( file_name + "{}_latex_results.txt".format("article_metrics"), metrics_list=["RECALL"], cutoffs_list=[50, 100, 150, 200, 250, 300], table_title=None, highlight_best=True) result_loader.generate_latex_results( file_name + "{}_latex_results.txt".format("all_metrics"), metrics_list=[ "PRECISION", "RECALL", "MAP_MIN_DEN", "MRR", "NDCG", "F1", "HIT_RATE", "ARHR_ALL_HITS", "NOVELTY", "DIVERSITY_MEAN_INTER_LIST", "DIVERSITY_HERFINDAHL", "COVERAGE_ITEM", "DIVERSITY_GINI", "SHANNON_ENTROPY" ], cutoffs_list=[150], table_title=None, highlight_best=True) result_loader.generate_latex_time_statistics( file_name + "{}_latex_results.txt".format("time"), n_evaluation_users=n_test_users, table_title=None)
def read_data_split_and_search_CMN(dataset_name): from Conferences.SIGIR.CMN_our_interface.CiteULike.CiteULikeReader import CiteULikeReader from Conferences.SIGIR.CMN_our_interface.Pinterest.PinterestICCVReader import PinterestICCVReader from Conferences.SIGIR.CMN_our_interface.Epinions.EpinionsReader import EpinionsReader if dataset_name == "citeulike": dataset = CiteULikeReader() elif dataset_name == "epinions": dataset = EpinionsReader() elif dataset_name == "pinterest": dataset = PinterestICCVReader() output_folder_path = "result_experiments/{}/{}_{}/".format(CONFERENCE_NAME, ALGORITHM_NAME, dataset_name) URM_train = dataset.URM_train.copy() URM_validation = dataset.URM_validation.copy() URM_test = dataset.URM_test.copy() URM_test_negative = dataset.URM_test_negative.copy() # If directory does not exist, create if not os.path.exists(output_folder_path): os.makedirs(output_folder_path) collaborative_algorithm_list = [ Random, TopPop, UserKNNCFRecommender, ItemKNNCFRecommender, P3alphaRecommender, RP3betaRecommender, ] metric_to_optimize = "HIT_RATE" # Ensure IMPLICIT data and DISJOINT sets assert_implicit_data([URM_train, URM_validation, URM_test, URM_test_negative]) if dataset_name == "citeulike": assert_disjoint_matrices([URM_train, URM_validation, URM_test]) assert_disjoint_matrices([URM_test, URM_test_negative]) elif dataset_name == "pinterest": assert_disjoint_matrices([URM_train, URM_validation, URM_test]) assert_disjoint_matrices([URM_train, URM_validation, URM_test_negative]) else: assert_disjoint_matrices([URM_train, URM_validation, URM_test, URM_test_negative]) algorithm_dataset_string = "{}_{}_".format(ALGORITHM_NAME, dataset_name) plot_popularity_bias([URM_train + URM_validation, URM_test], ["URM train", "URM test"], output_folder_path + algorithm_dataset_string + "popularity_plot") save_popularity_statistics([URM_train + URM_validation, URM_test], ["URM train", "URM test"], output_folder_path + algorithm_dataset_string + "popularity_statistics") from Base.Evaluation.Evaluator import EvaluatorNegativeItemSample evaluator_validation = EvaluatorNegativeItemSample(URM_validation, URM_test_negative, cutoff_list=[5]) evaluator_test = EvaluatorNegativeItemSample(URM_test, URM_test_negative, cutoff_list=[5, 10]) runParameterSearch_Collaborative_partial = partial(runParameterSearch_Collaborative, URM_train = URM_train, metric_to_optimize = metric_to_optimize, evaluator_validation_earlystopping = evaluator_validation, evaluator_validation = evaluator_validation, evaluator_test = evaluator_test, output_folder_path = output_folder_path, parallelizeKNN = False, allow_weighting = True, n_cases = 35) # pool = multiprocessing.Pool(processes=int(multiprocessing.cpu_count()), maxtasksperchild=1) # resultList = pool.map(runParameterSearch_Collaborative_partial, collaborative_algorithm_list) # # pool.close() # pool.join() for recommender_class in collaborative_algorithm_list: try: runParameterSearch_Collaborative_partial(recommender_class) except Exception as e: print("On recommender {} Exception {}".format(recommender_class, str(e))) traceback.print_exc() ################################################################################################ ###### CMN try: temp_file_folder = output_folder_path + "{}_log/".format(ALGORITHM_NAME) CMN_article_parameters = { "epochs": 100, "epochs_gmf": 100, "hops": 3, "neg_samples": 4, "reg_l2_cmn": 1e-1, "reg_l2_gmf": 1e-4, "pretrain": True, "learning_rate": 1e-3, "verbose": False, "temp_file_folder": temp_file_folder } if dataset_name == "citeulike": CMN_article_parameters["batch_size"] = 128 CMN_article_parameters["embed_size"] = 50 elif dataset_name == "epinions": CMN_article_parameters["batch_size"] = 128 CMN_article_parameters["embed_size"] = 40 elif dataset_name == "pinterest": CMN_article_parameters["batch_size"] = 256 CMN_article_parameters["embed_size"] = 50 CMN_earlystopping_parameters = { "validation_every_n": 5, "stop_on_validation": True, "evaluator_object": evaluator_validation, "lower_validations_allowed": 5, "validation_metric": metric_to_optimize } parameterSearch = SearchSingleCase(CMN_RecommenderWrapper, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) recommender_parameters = SearchInputRecommenderParameters( CONSTRUCTOR_POSITIONAL_ARGS = [URM_train], FIT_KEYWORD_ARGS = CMN_earlystopping_parameters) parameterSearch.search(recommender_parameters, fit_parameters_values=CMN_article_parameters, output_folder_path = output_folder_path, output_file_name_root = CMN_RecommenderWrapper.RECOMMENDER_NAME) except Exception as e: print("On recommender {} Exception {}".format(CMN_RecommenderWrapper, str(e))) traceback.print_exc() n_validation_users = np.sum(np.ediff1d(URM_validation.indptr)>=1) n_test_users = np.sum(np.ediff1d(URM_test.indptr)>=1) print_time_statistics_latex_table(result_folder_path = output_folder_path, dataset_name = dataset_name, results_file_prefix_name = ALGORITHM_NAME, other_algorithm_list = [CMN_RecommenderWrapper], ICM_names_to_report_list = [], n_validation_users = n_validation_users, n_test_users = n_test_users, n_decimals = 2) print_results_latex_table(result_folder_path = output_folder_path, results_file_prefix_name = ALGORITHM_NAME, dataset_name = dataset_name, metrics_to_report_list = ["HIT_RATE", "NDCG"], cutoffs_to_report_list = [5, 10], ICM_names_to_report_list = [], other_algorithm_list = [CMN_RecommenderWrapper])
def read_data_split_and_search(dataset_name, flag_baselines_tune=False, flag_DL_article_default=False, flag_DL_tune=False, flag_print_results=False): from Conferences.WWW.NeuMF_our_interface.Movielens1M.Movielens1MReader import Movielens1MReader from Conferences.WWW.NeuMF_our_interface.Pinterest.PinterestICCVReader import PinterestICCVReader result_folder_path = "result_experiments/{}/{}_{}/".format( CONFERENCE_NAME, ALGORITHM_NAME, dataset_name) if dataset_name == "movielens1m": dataset = Movielens1MReader(result_folder_path) elif dataset_name == "pinterest": dataset = PinterestICCVReader(result_folder_path) URM_train = dataset.URM_DICT["URM_train"].copy() URM_validation = dataset.URM_DICT["URM_validation"].copy() URM_test = dataset.URM_DICT["URM_test"].copy() URM_test_negative = dataset.URM_DICT["URM_test_negative"].copy() # Ensure IMPLICIT data and DISJOINT sets assert_implicit_data( [URM_train, URM_validation, URM_test, URM_test_negative]) assert_disjoint_matrices([URM_train, URM_validation, URM_test]) assert_disjoint_matrices([URM_train, URM_validation, URM_test_negative]) # If directory does not exist, create if not os.path.exists(result_folder_path): os.makedirs(result_folder_path) algorithm_dataset_string = "{}_{}_".format(ALGORITHM_NAME, dataset_name) plot_popularity_bias([URM_train + URM_validation, URM_test], ["Training data", "Test data"], result_folder_path + algorithm_dataset_string + "popularity_plot") save_popularity_statistics([ URM_train + URM_validation + URM_test, URM_train + URM_validation, URM_test ], ["Full data", "Training data", "Test data"], result_folder_path + algorithm_dataset_string + "popularity_statistics") collaborative_algorithm_list = [ Random, TopPop, UserKNNCFRecommender, ItemKNNCFRecommender, P3alphaRecommender, RP3betaRecommender, PureSVDRecommender, NMFRecommender, IALSRecommender, MatrixFactorization_BPR_Cython, MatrixFactorization_FunkSVD_Cython, EASE_R_Recommender, SLIM_BPR_Cython, SLIMElasticNetRecommender, ] metric_to_optimize = "HIT_RATE" n_cases = 50 n_random_starts = 15 from Base.Evaluation.Evaluator import EvaluatorNegativeItemSample evaluator_validation = EvaluatorNegativeItemSample(URM_validation, URM_test_negative, cutoff_list=[10]) evaluator_test = EvaluatorNegativeItemSample( URM_test, URM_test_negative, cutoff_list=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) runParameterSearch_Collaborative_partial = partial( runParameterSearch_Collaborative, URM_train=URM_train, URM_train_last_test=URM_train + URM_validation, metric_to_optimize=metric_to_optimize, evaluator_validation_earlystopping=evaluator_validation, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=result_folder_path, parallelizeKNN=False, allow_weighting=True, resume_from_saved=True, n_cases=n_cases, n_random_starts=n_random_starts) if flag_baselines_tune: for recommender_class in collaborative_algorithm_list: try: runParameterSearch_Collaborative_partial(recommender_class) except Exception as e: print("On recommender {} Exception {}".format( recommender_class, str(e))) traceback.print_exc() ################################################################################################ ###### ###### DL ALGORITHM ###### if flag_DL_article_default: try: if dataset_name == "movielens1m": num_factors = 64 elif dataset_name == "pinterest": num_factors = 16 neuMF_article_hyperparameters = { "epochs": 100, "epochs_gmf": 100, "epochs_mlp": 100, "batch_size": 256, "num_factors": num_factors, "layers": [num_factors * 4, num_factors * 2, num_factors], "reg_mf": 0.0, "reg_layers": [0, 0, 0], "num_negatives": 4, "learning_rate": 1e-3, "learning_rate_pretrain": 1e-3, "learner": "sgd", "learner_pretrain": "adam", "pretrain": True } neuMF_earlystopping_hyperparameters = { "validation_every_n": 5, "stop_on_validation": True, "evaluator_object": evaluator_validation, "lower_validations_allowed": 5, "validation_metric": metric_to_optimize } parameterSearch = SearchSingleCase( NeuMF_RecommenderWrapper, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], FIT_KEYWORD_ARGS=neuMF_earlystopping_hyperparameters) recommender_input_args_last_test = recommender_input_args.copy() recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[ 0] = URM_train + URM_validation parameterSearch.search( recommender_input_args, recommender_input_args_last_test= recommender_input_args_last_test, fit_hyperparameters_values=neuMF_article_hyperparameters, output_folder_path=result_folder_path, resume_from_saved=True, output_file_name_root=NeuMF_RecommenderWrapper.RECOMMENDER_NAME ) except Exception as e: print("On recommender {} Exception {}".format( NeuMF_RecommenderWrapper, str(e))) traceback.print_exc() ################################################################################################ ###### ###### PRINT RESULTS ###### if flag_print_results: n_test_users = np.sum(np.ediff1d(URM_test.indptr) >= 1) file_name = "{}..//{}_{}_".format(result_folder_path, ALGORITHM_NAME, dataset_name) result_loader = ResultFolderLoader( result_folder_path, base_algorithm_list=None, other_algorithm_list=[NeuMF_RecommenderWrapper], KNN_similarity_list=KNN_similarity_to_report_list, ICM_names_list=None, UCM_names_list=None) result_loader.generate_latex_results( file_name + "{}_latex_results.txt".format("article_metrics"), metrics_list=["HIT_RATE", "NDCG"], cutoffs_list=[1, 5, 10], table_title=None, highlight_best=True) result_loader.generate_latex_results( file_name + "{}_latex_results.txt".format("all_metrics"), metrics_list=[ "PRECISION", "RECALL", "MAP", "MRR", "NDCG", "F1", "HIT_RATE", "ARHR", "NOVELTY", "DIVERSITY_MEAN_INTER_LIST", "DIVERSITY_HERFINDAHL", "COVERAGE_ITEM", "DIVERSITY_GINI", "SHANNON_ENTROPY" ], cutoffs_list=[10], table_title=None, highlight_best=True) result_loader.generate_latex_time_statistics( file_name + "{}_latex_results.txt".format("time"), n_evaluation_users=n_test_users, table_title=None)
def read_data_split_and_search(dataset_name, flag_baselines_tune=False, flag_DL_article_default=False, flag_DL_tune=False, flag_print_results=False): result_folder_path = "result_experiments/{}/{}_{}/".format( CONFERENCE_NAME, ALGORITHM_NAME, dataset_name) if dataset_name == "gowalla": dataset = GowallaReader(result_folder_path) elif dataset_name == "yelp": dataset = YelpReader(result_folder_path) else: print("Dataset name not supported, current is {}".format(dataset_name)) return print('Current dataset is: {}'.format(dataset_name)) URM_train = dataset.URM_DICT["URM_train"].copy() URM_validation = dataset.URM_DICT["URM_validation"].copy() URM_test = dataset.URM_DICT["URM_test"].copy() URM_test_negative = dataset.URM_DICT["URM_test_negative"].copy() print_negative_items_stats(URM_train, URM_validation, URM_test, URM_test_negative) # Ensure IMPLICIT data from Utils.assertions_on_data_for_experiments import assert_implicit_data, assert_disjoint_matrices assert_implicit_data( [URM_train, URM_validation, URM_test, URM_test_negative]) # URM_test_negative contains duplicates in both train and test assert_disjoint_matrices([URM_train, URM_validation, URM_test]) # If directory does not exist, create if not os.path.exists(result_folder_path): os.makedirs(result_folder_path) collaborative_algorithm_list = [ Random, TopPop, UserKNNCFRecommender, ItemKNNCFRecommender, P3alphaRecommender, RP3betaRecommender, PureSVDRecommender, NMFRecommender, IALSRecommender, MatrixFactorization_BPR_Cython, MatrixFactorization_FunkSVD_Cython, EASE_R_Recommender, SLIM_BPR_Cython, SLIMElasticNetRecommender, ] metric_to_optimize = "NDCG" n_cases = 50 n_random_starts = 15 from Base.Evaluation.Evaluator import EvaluatorNegativeItemSample cutoff_list_validation = [10] cutoff_list_test = [5, 10, 20] evaluator_validation = EvaluatorNegativeItemSample( URM_validation, URM_test_negative, cutoff_list=cutoff_list_validation) evaluator_test = EvaluatorNegativeItemSample(URM_test, URM_test_negative, cutoff_list=cutoff_list_test) runParameterSearch_Collaborative_partial = partial( runParameterSearch_Collaborative, URM_train=URM_train, URM_train_last_test=URM_train + URM_validation, metric_to_optimize=metric_to_optimize, evaluator_validation_earlystopping=evaluator_validation, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=result_folder_path, parallelizeKNN=False, allow_weighting=True, resume_from_saved=True, n_cases=n_cases, n_random_starts=n_random_starts) if flag_baselines_tune: for recommender_class in collaborative_algorithm_list: try: runParameterSearch_Collaborative_partial(recommender_class) except Exception as e: print("On recommender {} Exception {}".format( recommender_class, str(e))) traceback.print_exc() ################################################################################################ ###### ###### DL ALGORITHM ###### if flag_DL_article_default: # Providing an empty matrix to URM_negative for the train samples article_hyperparameters = { "batch_size": 512, "epochs": 1500, "epochs_MFBPR": 500, "embedding_size": 64, "hidden_size": 128, "negative_sample_per_positive": 1, "negative_instances_per_positive": 4, "regularization_users_items": 0.01, "regularization_weights": 10, "regularization_filter_weights": 1, "learning_rate_embeddings": 0.05, "learning_rate_CNN": 0.05, "channel_size": [32, 32, 32, 32, 32, 32], "dropout": 0.0, "epoch_verbose": 1, } earlystopping_hyperparameters = { "validation_every_n": 5, "stop_on_validation": True, "lower_validations_allowed": 5, "evaluator_object": evaluator_validation, "validation_metric": metric_to_optimize, "epochs_min": 150 } parameterSearch = SearchSingleCase( ConvNCF_RecommenderWrapper, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], FIT_KEYWORD_ARGS=earlystopping_hyperparameters) recommender_input_args_last_test = recommender_input_args.copy() recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[ 0] = URM_train + URM_validation parameterSearch.search( recommender_input_args, recommender_input_args_last_test=recommender_input_args_last_test, fit_hyperparameters_values=article_hyperparameters, output_folder_path=result_folder_path, resume_from_saved=True, output_file_name_root=ConvNCF_RecommenderWrapper.RECOMMENDER_NAME) #remember to close the global session since use global variables ConvNCF.close_session(verbose=True) ################################################################################################ ###### ###### PRINT RESULTS ###### if flag_print_results: n_test_users = np.sum(np.ediff1d(URM_test.indptr) >= 1) file_name = "{}..//{}_{}_".format(result_folder_path, ALGORITHM_NAME, dataset_name) result_loader = ResultFolderLoader( result_folder_path, base_algorithm_list=None, other_algorithm_list=[ConvNCF_RecommenderWrapper], KNN_similarity_list=KNN_similarity_to_report_list, ICM_names_list=None, UCM_names_list=None) result_loader.generate_latex_results( file_name + "{}_latex_results.txt".format("article_metrics"), metrics_list=["HIT_RATE", "NDCG"], cutoffs_list=cutoff_list_test, table_title=None, highlight_best=True) result_loader.generate_latex_results( file_name + "{}_latex_results.txt".format("all_metrics"), metrics_list=[ "PRECISION", "RECALL", "MAP_MIN_DEN", "MRR", "NDCG", "F1", "HIT_RATE", "ARHR_ALL_HITS", "NOVELTY", "DIVERSITY_MEAN_INTER_LIST", "DIVERSITY_HERFINDAHL", "COVERAGE_ITEM", "DIVERSITY_GINI", "SHANNON_ENTROPY" ], cutoffs_list=cutoff_list_validation, table_title=None, highlight_best=True) result_loader.generate_latex_time_statistics( file_name + "{}_latex_results.txt".format("time"), n_evaluation_users=n_test_users, table_title=None)
def read_data_split_and_search_MultiVAE(dataset_name): from Conferences.WWW.MultiVAE_our_interface.Movielens20M.Movielens20MReader import Movielens20MReader from Conferences.WWW.MultiVAE_our_interface.NetflixPrize.NetflixPrizeReader import NetflixPrizeReader split_type = "cold_user" if dataset_name == "movielens20m": dataset = Movielens20MReader(split_type=split_type) elif dataset_name == "netflixPrize": dataset = NetflixPrizeReader() output_folder_path = "result_experiments/{}/{}_{}_{}/".format( CONFERENCE_NAME, ALGORITHM_NAME, dataset_name, split_type) # If directory does not exist, create if not os.path.exists(output_folder_path): os.makedirs(output_folder_path) metric_to_optimize = "NDCG" if split_type == "cold_user": collaborative_algorithm_list = [ Random, TopPop, ItemKNNCFRecommender, P3alphaRecommender, RP3betaRecommender, ] URM_train = dataset.URM_train.copy() URM_train_all = dataset.URM_train_all.copy() URM_validation = dataset.URM_validation.copy() URM_test = dataset.URM_test.copy() # Ensure IMPLICIT data and DISJOINT sets assert_implicit_data( [URM_train, URM_train_all, URM_validation, URM_test]) assert_disjoint_matrices([URM_train, URM_validation, URM_test]) assert_disjoint_matrices([URM_train_all, URM_validation, URM_test]) from Base.Evaluation.Evaluator import EvaluatorHoldout evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[100]) evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[20, 50, 100]) evaluator_validation = EvaluatorUserSubsetWrapper( evaluator_validation, URM_train_all) evaluator_test = EvaluatorUserSubsetWrapper(evaluator_test, URM_train_all) runParameterSearch_Collaborative_partial = partial( runParameterSearch_Collaborative, URM_train=URM_train, metric_to_optimize=metric_to_optimize, evaluator_validation_earlystopping=evaluator_validation, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=output_folder_path, parallelizeKNN=False, allow_weighting=True, n_cases=35) # pool = multiprocessing.Pool(processes=int(multiprocessing.cpu_count()), maxtasksperchild=1) # pool.map(runParameterSearch_Collaborative_partial, collaborative_algorithm_list) # # pool.close() # pool.join() for recommender_class in collaborative_algorithm_list: try: runParameterSearch_Collaborative_partial(recommender_class) except Exception as e: print("On recommender {} Exception {}".format( recommender_class, str(e))) traceback.print_exc() ################################################################################################ ###### MultiVAE try: output_root_path_MultiVAE = output_folder_path + "{}_log/".format( ALGORITHM_NAME) if dataset_name == "movielens20m": epochs = 100 elif dataset_name == "netflixPrize": epochs = 200 multiVAE_article_parameters = { "epochs": epochs, "batch_size": 500, "total_anneal_steps": 200000, "p_dims": None, } multiVAE_earlystopping_parameters = { "validation_every_n": 5, "stop_on_validation": True, "evaluator_object": evaluator_validation, "lower_validations_allowed": 5, "validation_metric": metric_to_optimize, "temp_file_folder": output_root_path_MultiVAE } parameterSearch = SearchSingleCase( MultiVAE_RecommenderWrapper, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) recommender_parameters = SearchInputRecommenderParameters( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], FIT_KEYWORD_ARGS=multiVAE_earlystopping_parameters) parameterSearch.search( recommender_parameters, fit_parameters_values=multiVAE_article_parameters, output_folder_path=output_folder_path, output_file_name_root=MultiVAE_RecommenderWrapper.RECOMMENDER_NAME) except Exception as e: print("On recommender {} Exception {}".format( MultiVAE_RecommenderWrapper, str(e))) traceback.print_exc() n_validation_users = np.sum(np.ediff1d(URM_validation.indptr) >= 1) n_test_users = np.sum(np.ediff1d(URM_test.indptr) >= 1) print_time_statistics_latex_table( result_folder_path=output_folder_path, dataset_name=dataset_name, results_file_prefix_name=ALGORITHM_NAME, other_algorithm_list=[MultiVAE_RecommenderWrapper], n_validation_users=n_validation_users, n_test_users=n_test_users, n_decimals=2) print_results_latex_table( result_folder_path=output_folder_path, results_file_prefix_name=ALGORITHM_NAME, dataset_name=dataset_name, metrics_to_report_list=["RECALL", "NDCG"], cutoffs_to_report_list=[20, 50, 100], other_algorithm_list=[MultiVAE_RecommenderWrapper])
def read_data_split_and_search(dataset_name, flag_baselines_tune=False, flag_DL_article_default=False, flag_DL_tune=False, flag_print_results=False): result_folder_path = "result_experiments/{}/{}_{}/".format( CONFERENCE_NAME, ALGORITHM_NAME, dataset_name) if dataset_name == "movielens1m": dataset = Movielens1MReader(result_folder_path) article_hyperparameters = { 'num_neurons': 300, 'num_factors': 50, 'dropout_percentage': 0.03, 'learning_rate': 1e-4, 'regularization_rate': 0.1, 'epochs': 1500, 'batch_size': 1024, 'display_epoch': None, 'display_step': None, 'verbose': True } early_stopping_epochs_min = 800 elif dataset_name == "hetrec": dataset = MovielensHetrec2011Reader(result_folder_path) article_hyperparameters = { 'num_neurons': 300, 'num_factors': 50, 'dropout_percentage': 0.03, 'learning_rate': 1e-4, 'regularization_rate': 0.1, 'epochs': 1500, 'batch_size': 1024, 'display_epoch': None, 'display_step': None, 'verbose': True } early_stopping_epochs_min = 800 elif dataset_name == "filmtrust": dataset = FilmTrustReader(result_folder_path) article_hyperparameters = { 'num_neurons': 150, 'num_factors': 40, 'dropout_percentage': 0.00, 'learning_rate': 5e-5, 'regularization_rate': 0.1, 'epochs': 100, 'batch_size': 1024, 'display_epoch': None, 'display_step': None, 'verbose': True } early_stopping_epochs_min = 0 elif dataset_name == "frappe": dataset = FrappeReader(result_folder_path) article_hyperparameters = { 'num_neurons': 300, 'num_factors': 50, 'dropout_percentage': 0.03, 'learning_rate': 1e-4, 'regularization_rate': 0.01, 'epochs': 100, 'batch_size': 1024, 'display_epoch': None, 'display_step': None, 'verbose': True } early_stopping_epochs_min = 0 print('Current dataset is: {}'.format(dataset_name)) URM_train = dataset.URM_DICT["URM_train"].copy() URM_validation = dataset.URM_DICT["URM_validation"].copy() URM_test = dataset.URM_DICT["URM_test"].copy() # Ensure IMPLICIT data from Utils.assertions_on_data_for_experiments import assert_implicit_data, assert_disjoint_matrices assert_implicit_data([URM_train, URM_validation, URM_test]) assert_disjoint_matrices([URM_train, URM_validation, URM_test]) # If directory does not exist, create if not os.path.exists(result_folder_path): os.makedirs(result_folder_path) collaborative_algorithm_list = [ Random, TopPop, UserKNNCFRecommender, ItemKNNCFRecommender, P3alphaRecommender, RP3betaRecommender, PureSVDRecommender, NMFRecommender, IALSRecommender, MatrixFactorization_BPR_Cython, MatrixFactorization_FunkSVD_Cython, EASE_R_Recommender, SLIM_BPR_Cython, SLIMElasticNetRecommender, ] metric_to_optimize = "NDCG" n_cases = 50 n_random_starts = 15 from Base.Evaluation.Evaluator import EvaluatorHoldout # use max cutoff to compute full MAP and NDCG max_cutoff = URM_train.shape[1] - 1 cutoff_list_validation = [10] cutoff_list_test = [5, 10, 50, max_cutoff] evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=cutoff_list_validation) evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=cutoff_list_test) runParameterSearch_Collaborative_partial = partial( runParameterSearch_Collaborative, URM_train=URM_train, URM_train_last_test=URM_train + URM_validation, metric_to_optimize=metric_to_optimize, evaluator_validation_earlystopping=evaluator_validation, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=result_folder_path, parallelizeKNN=False, allow_weighting=True, resume_from_saved=True, n_cases=n_cases, n_random_starts=n_random_starts) if flag_baselines_tune: for recommender_class in collaborative_algorithm_list: try: runParameterSearch_Collaborative_partial(recommender_class) except Exception as e: print("On recommender {} Exception {}".format( recommender_class, str(e))) traceback.print_exc() ################################################################################################ ###### ###### DL ALGORITHM ###### if flag_DL_article_default: earlystopping_hyperparameters = { 'validation_every_n': 5, 'stop_on_validation': True, 'lower_validations_allowed': 20, 'evaluator_object': evaluator_validation, 'validation_metric': metric_to_optimize, 'epochs_min': early_stopping_epochs_min } try: parameterSearch = SearchSingleCase( UNeuRec_RecommenderWrapper, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], FIT_KEYWORD_ARGS=earlystopping_hyperparameters) recommender_input_args_last_test = recommender_input_args.copy() recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[ 0] = URM_train + URM_validation parameterSearch.search( recommender_input_args, recommender_input_args_last_test= recommender_input_args_last_test, fit_hyperparameters_values=article_hyperparameters, output_folder_path=result_folder_path, resume_from_saved=True, output_file_name_root=UNeuRec_RecommenderWrapper. RECOMMENDER_NAME) except Exception as e: print("On recommender {} Exception {}".format( UNeuRec_RecommenderWrapper, str(e))) traceback.print_exc() try: parameterSearch = SearchSingleCase( INeuRec_RecommenderWrapper, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], FIT_KEYWORD_ARGS=earlystopping_hyperparameters) recommender_input_args_last_test = recommender_input_args.copy() recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[ 0] = URM_train + URM_validation parameterSearch.search( recommender_input_args, recommender_input_args_last_test= recommender_input_args_last_test, fit_hyperparameters_values=article_hyperparameters, output_folder_path=result_folder_path, resume_from_saved=True, output_file_name_root=INeuRec_RecommenderWrapper. RECOMMENDER_NAME) except Exception as e: print("On recommender {} Exception {}".format( INeuRec_RecommenderWrapper, str(e))) traceback.print_exc() # if isUNeuRec_tune: # # try: # # runParameterSearch_NeuRec(UNeuRec_RecommenderWrapper, # URM_train = URM_train, # URM_train_last_test = URM_train + URM_validation, # earlystopping_hyperparameters = earlystopping_hyperparameters, # metric_to_optimize = metric_to_optimize, # evaluator_validation = evaluator_validation, # evaluator_test = evaluator_test, # result_folder_path = result_folder_path, # n_cases = n_cases, # n_random_starts = n_random_starts, # output_file_name_root = UNeuRec_RecommenderWrapper.RECOMMENDER_NAME) # # # except Exception as e: # # print("On recommender {} Exception {}".format(UNeuRec_RecommenderWrapper, str(e))) # traceback.print_exc() # # # # # # if isINeuRec_tune: # # try: # # runParameterSearch_NeuRec(INeuRec_RecommenderWrapper, # URM_train = URM_train, # URM_train_last_test = URM_train + URM_validation, # earlystopping_hyperparameters = earlystopping_hyperparameters, # metric_to_optimize = metric_to_optimize, # evaluator_validation = evaluator_validation, # evaluator_test = evaluator_test, # result_folder_path = result_folder_path, # n_cases = n_cases, # n_random_starts = n_random_starts, # output_file_name_root = INeuRec_RecommenderWrapper.RECOMMENDER_NAME) # # # except Exception as e: # # print("On recommender {} Exception {}".format(INeuRec_RecommenderWrapper, str(e))) # traceback.print_exc() # ################################################################################################ ###### ###### PRINT RESULTS ###### if flag_print_results: n_test_users = np.sum(np.ediff1d(URM_test.indptr) >= 1) file_name = "{}..//{}_{}_".format(result_folder_path, ALGORITHM_NAME, dataset_name) result_loader = ResultFolderLoader( result_folder_path, base_algorithm_list=None, other_algorithm_list=[ INeuRec_RecommenderWrapper, UNeuRec_RecommenderWrapper ], KNN_similarity_list=KNN_similarity_to_report_list, ICM_names_list=None, UCM_names_list=None) result_loader.generate_latex_results( file_name + "{}_latex_results.txt".format("article_metrics"), metrics_list=["PRECISION", "RECALL", "MAP", "NDCG", "MRR"], cutoffs_list=[5, 10, 50], table_title=None, highlight_best=True) result_loader.generate_latex_results( file_name + "{}_latex_results.txt".format("beyond_accuracy_metrics"), metrics_list=[ "DIVERSITY_MEAN_INTER_LIST", "DIVERSITY_HERFINDAHL", "COVERAGE_ITEM", "DIVERSITY_GINI", "SHANNON_ENTROPY" ], cutoffs_list=[50], table_title=None, highlight_best=True) result_loader.generate_latex_results( file_name + "{}_latex_results.txt".format("all_metrics"), metrics_list=[ "PRECISION", "RECALL", "MAP", "MRR", "NDCG", "F1", "HIT_RATE", "ARHR", "NOVELTY", "DIVERSITY_MEAN_INTER_LIST", "DIVERSITY_HERFINDAHL", "COVERAGE_ITEM", "DIVERSITY_GINI", "SHANNON_ENTROPY" ], cutoffs_list=[50], table_title=None, highlight_best=True) result_loader.generate_latex_time_statistics( file_name + "{}_latex_results.txt".format("time"), n_evaluation_users=n_test_users, table_title=None)
def read_data_split_and_search_CMN(dataset_name): from Conferences.SIGIR.CMN_our_interface.CiteULike.CiteULikeReader import CiteULikeReader from Conferences.SIGIR.CMN_our_interface.Pinterest.PinterestICCVReader import PinterestICCVReader from Conferences.SIGIR.CMN_our_interface.Epinions.EpinionsReader import EpinionsReader if dataset_name == "citeulike": dataset = CiteULikeReader() elif dataset_name == "epinions": dataset = EpinionsReader() elif dataset_name == "pinterest": dataset = PinterestICCVReader() output_folder_path = "result_experiments/{}/{}_{}/".format( CONFERENCE_NAME, ALGORITHM_NAME, dataset_name) URM_train = dataset.URM_train.copy() URM_validation = dataset.URM_validation.copy() URM_test = dataset.URM_test.copy() URM_test_negative = dataset.URM_test_negative.copy() test_mode = False limit = False if limit: p = 700 URM_train = URM_train[:p, :] URM_validation = URM_validation[:p, :] URM_test = URM_test[:p, :] URM_test_negative = URM_test_negative[:p, :] ''' user: 3 is_relevant_current_cutoff: [ True True True False False] recommended_items_current_cutoff: [ 65 86 68 3671 1341] Warning! is_relevant_current_cutoff.sum()>1: 3 relevant_items: [65 68 81 86] relevant_items_rating: [1. 1. 1. 1.] items_to_compute: [ 42 62 65 68 81 86 148 218 559 662 776 792 1164 1341 1418 1491 1593 1603 1617 1697 2140 2251 2446 2517 2566 2643 2719 2769 2771 3081 3133 3161 3188 3268 3409 3666 3671 3845 3864 3897 3984 4272 4327 4329 4431 4519 4565 4568 4718 4812 4915 5096 5128 5137 5141 5184 5217 5241 5371 5394 5415 5492 5521 5775 5798 5830 5831 5931 6005 6281 6375 6558 6638 6644 6661 6705 6881 6898 6939 6970 7010 7018 7147 7224 7327 7404 7453 7466 7475 7561 7764 8064 8102 8222 8368 8530 8957 9101 9322 9368 9619 9782 9832] ''' print('USER 3') print('test ', URM_test[3]) print('train ', URM_train[3]) print('valid ', URM_validation[3]) print('neg ', URM_test_negative[3]) # Durante l'esecuzione era stato notato un HR>1. Il motivo e' che veniva calcolato sul validation set (che per ogni utente ha # piu' oggetti preferiti (non uno) # Alla fine l'HR sara' minore o uguale ad uno perche' e' calcolato sul test set. popularity = get_popularity(URM_train) min_value = np.min(popularity) max_value = np.max(popularity) gap = max_value - min_value popularity = (popularity - min_value) / gap print('Luciano > min:', min_value) print('Luciano > max:', max_value) print('Luciano > normalized popularity:', popularity) set_parameters(popularity=popularity, loss_alpha=200, loss_beta=0.02, loss_scale=1, loss_percentile=get_percentile(popularity, 45), metrics_alpha=100, metrics_beta=0.03, metrics_gamma=5, metrics_scale=1 / 15, metrics_percentile=0.45, new_loss=False) # If directory does not exist, create if not os.path.exists(output_folder_path): os.makedirs(output_folder_path) collaborative_algorithm_list = [ Random, TopPop, UserKNNCFRecommender, ItemKNNCFRecommender, P3alphaRecommender, RP3betaRecommender, ] # metric_to_optimize = "WEIGHTED_HIT_RATE" metric_to_optimize = "HIT_RATE" # metric_to_optimize = "CUSTOM_HIT_RATE" print('metric_to_optimize:', metric_to_optimize) # Ensure IMPLICIT data and DISJOINT sets assert_implicit_data( [URM_train, URM_validation, URM_test, URM_test_negative]) if dataset_name == "citeulike": assert_disjoint_matrices([URM_train, URM_validation, URM_test]) assert_disjoint_matrices([URM_test, URM_test_negative]) elif dataset_name == "pinterest": assert_disjoint_matrices([URM_train, URM_validation, URM_test]) assert_disjoint_matrices( [URM_train, URM_validation, URM_test_negative]) else: assert_disjoint_matrices( [URM_train, URM_validation, URM_test, URM_test_negative]) algorithm_dataset_string = "{}_{}_".format(ALGORITHM_NAME, dataset_name) plot_popularity_bias([URM_train + URM_validation, URM_test], ["URM train", "URM test"], output_folder_path + algorithm_dataset_string + "popularity_plot") save_popularity_statistics([URM_train + URM_validation, URM_test], ["URM train", "URM test"], output_folder_path + algorithm_dataset_string + "popularity_statistics") from Base.Evaluation.Evaluator import EvaluatorNegativeItemSample evaluator_validation = EvaluatorNegativeItemSample(URM_validation, URM_test_negative, cutoff_list=[5]) if not test_mode: evaluator_test = EvaluatorNegativeItemSample(URM_test, URM_test_negative, cutoff_list=[5, 10]) else: evaluator_test = EvaluatorNegativeItemSample(URM_test, URM_test_negative, cutoff_list=[5]) runParameterSearch_Collaborative_partial = partial( runParameterSearch_Collaborative, URM_train=URM_train, metric_to_optimize=metric_to_optimize, evaluator_validation_earlystopping=evaluator_validation, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=output_folder_path, parallelizeKNN=False, allow_weighting=True, n_cases=35) # pool = multiprocessing.Pool(processes=int(multiprocessing.cpu_count()), maxtasksperchild=1) # resultList = pool.map(runParameterSearch_Collaborative_partial, collaborative_algorithm_list) # # pool.close() # pool.join() for recommender_class in collaborative_algorithm_list: try: if not test_mode: runParameterSearch_Collaborative_partial(recommender_class) else: print('skipping', recommender_class) except Exception as e: print("On recommender {} Exception {}".format( recommender_class, str(e))) traceback.print_exc() ################################################################################################ ###### CMN ''' Parameters from original paper: { "batch_size": 128, "decay_rate": 0.9, "embed_size": 50, "filename": "data/pinterest.npz", "grad_clip": 5.0, "hops": 2, "item_count": "9916", "l2": 0.1, "learning_rate": 0.001, "logdir": "result/004/", "max_neighbors": 1586, "neg_count": 4, "optimizer": "rmsprop", "optimizer_params": "{'momentum': 0.9, 'decay': 0.9}", "pretrain": "pretrain/pinterest_e50.npz", "save_directory": "result/004/", "tol": 1e-05, "user_count": "55187" } ''' try: temp_file_folder = output_folder_path + "{}_log/".format( ALGORITHM_NAME) CMN_article_parameters = { "epochs": 100, "epochs_gmf": 100, "hops": 3, "neg_samples": 4, "reg_l2_cmn": 1e-1, "reg_l2_gmf": 1e-4, "pretrain": True, "learning_rate": 1e-3, "verbose": False, "temp_file_folder": temp_file_folder } if dataset_name == "citeulike": CMN_article_parameters["batch_size"] = 128 CMN_article_parameters["embed_size"] = 50 elif dataset_name == "epinions": CMN_article_parameters["batch_size"] = 128 CMN_article_parameters["embed_size"] = 40 elif dataset_name == "pinterest": CMN_article_parameters["batch_size"] = 128 # CMN_article_parameters["batch_size"] = 256 CMN_article_parameters["embed_size"] = 50 CMN_earlystopping_parameters = { "validation_every_n": 5, "stop_on_validation": True, "evaluator_object": evaluator_validation, "lower_validations_allowed": 5, "validation_metric": metric_to_optimize } parameterSearch = SearchSingleCase( CMN_RecommenderWrapper, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) recommender_parameters = SearchInputRecommenderParameters( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], FIT_KEYWORD_ARGS=CMN_earlystopping_parameters) parameterSearch.search( recommender_parameters, fit_parameters_values=CMN_article_parameters, output_folder_path=output_folder_path, output_file_name_root=CMN_RecommenderWrapper.RECOMMENDER_NAME) except Exception as e: print("On recommender {} Exception {}".format(CMN_RecommenderWrapper, str(e))) traceback.print_exc() n_validation_users = np.sum(np.ediff1d(URM_validation.indptr) >= 1) n_test_users = np.sum(np.ediff1d(URM_test.indptr) >= 1) print_time_statistics_latex_table( result_folder_path=output_folder_path, dataset_name=dataset_name, results_file_prefix_name=ALGORITHM_NAME, other_algorithm_list=[CMN_RecommenderWrapper], ICM_names_to_report_list=[], n_validation_users=n_validation_users, n_test_users=n_test_users, n_decimals=2) if not test_mode: print_results_latex_table( result_folder_path=output_folder_path, results_file_prefix_name=ALGORITHM_NAME, dataset_name=dataset_name, metrics_to_report_list=["HIT_RATE", "NDCG"], cutoffs_to_report_list=[5, 10], ICM_names_to_report_list=[], other_algorithm_list=[CMN_RecommenderWrapper]) else: print_results_latex_table( result_folder_path=output_folder_path, results_file_prefix_name=ALGORITHM_NAME, dataset_name=dataset_name, metrics_to_report_list=["HIT_RATE", "NDCG"], cutoffs_to_report_list=[5], ICM_names_to_report_list=[], other_algorithm_list=[CMN_RecommenderWrapper])
def runParameterSearch_Collaborative(recommender_class, URM_train, metric_to_optimize="PRECISION", evaluator_validation=None, evaluator_test=None, evaluator_validation_earlystopping=None, output_folder_path="result_experiments/", parallelizeKNN=True, n_cases=35, allow_weighting=True, similarity_type_list=None): # If directory does not exist, create if not os.path.exists(output_folder_path): os.makedirs(output_folder_path) try: output_file_name_root = recommender_class.RECOMMENDER_NAME parameterSearch = SearchBayesianSkopt( recommender_class, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) if recommender_class in [TopPop, GlobalEffects, Random]: """ TopPop, GlobalEffects and Random have no parameters therefore only one evaluation is needed """ parameterSearch = SearchSingleCase( recommender_class, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) recommender_parameters = SearchInputRecommenderParameters( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) parameterSearch.search(recommender_parameters, fit_parameters_values={}, output_folder_path=output_folder_path, output_file_name_root=output_file_name_root) return ########################################################################################################## if recommender_class in [ItemKNNCFRecommender, UserKNNCFRecommender]: if similarity_type_list is None: similarity_type_list = [ 'cosine', 'jaccard', "asymmetric", "dice", "tversky" ] recommender_parameters = SearchInputRecommenderParameters( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) run_KNNCFRecommender_on_similarity_type_partial = partial( run_KNNRecommender_on_similarity_type, recommender_parameters=recommender_parameters, parameter_search_space={}, parameterSearch=parameterSearch, n_cases=n_cases, output_folder_path=output_folder_path, output_file_name_root=output_file_name_root, metric_to_optimize=metric_to_optimize, allow_weighting=allow_weighting) if parallelizeKNN: pool = multiprocessing.Pool( processes=multiprocessing.cpu_count(), maxtasksperchild=1) pool.map(run_KNNCFRecommender_on_similarity_type_partial, similarity_type_list) pool.close() pool.join() else: for similarity_type in similarity_type_list: run_KNNCFRecommender_on_similarity_type_partial( similarity_type) return ########################################################################################################## if recommender_class is P3alphaRecommender: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["topK"] = Integer(5, 800) hyperparameters_range_dictionary["alpha"] = Real(low=0, high=2, prior='uniform') hyperparameters_range_dictionary[ "normalize_similarity"] = Categorical([True, False]) recommender_parameters = SearchInputRecommenderParameters( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) ########################################################################################################## if recommender_class is RP3betaRecommender: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["topK"] = Integer(5, 800) hyperparameters_range_dictionary["alpha"] = Real(low=0, high=2, prior='uniform') hyperparameters_range_dictionary["beta"] = Real(low=0, high=2, prior='uniform') hyperparameters_range_dictionary[ "normalize_similarity"] = Categorical([True, False]) recommender_parameters = SearchInputRecommenderParameters( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) ########################################################################################################## if recommender_class is PureSVDRecommender: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["num_factors"] = Integer(1, 250) recommender_parameters = SearchInputRecommenderParameters( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) ########################################################################################################## if recommender_class is SLIMElasticNetRecommender: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["topK"] = Integer(5, 800) hyperparameters_range_dictionary["l1_ratio"] = Real( low=1e-5, high=1.0, prior='log-uniform') hyperparameters_range_dictionary["alpha"] = Real(low=1e-3, high=1.0, prior='uniform') recommender_parameters = SearchInputRecommenderParameters( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) ######################################################################################################### ## Final step, after the hyperparameter range has been defined for each type of algorithm parameterSearch.search( recommender_parameters, parameter_search_space=hyperparameters_range_dictionary, n_cases=n_cases, output_folder_path=output_folder_path, output_file_name_root=output_file_name_root, metric_to_optimize=metric_to_optimize) except Exception as e: print("On recommender {} Exception {}".format(recommender_class, str(e))) traceback.print_exc() error_file = open(output_folder_path + "ErrorLog.txt", "a") error_file.write("On recommender {} Exception {}\n".format( recommender_class, str(e))) error_file.close()
def read_data_split_and_search(dataset_name, flag_baselines_tune=False, flag_DL_article_default=False, flag_DL_tune=False, flag_print_results=False): result_folder_path = "result_experiments/{}/{}_{}/".format( CONFERENCE_NAME, ALGORITHM_NAME, dataset_name) if not os.path.exists(result_folder_path): os.makedirs(result_folder_path) # Ensure both experiments use the same data dataset_folder_path = "result_experiments/{}/{}_{}/".format( CONFERENCE_NAME, ALGORITHM_NAME, dataset_name.replace("_remove_cold_items", "")) if not os.path.exists(dataset_folder_path): os.makedirs(dataset_folder_path) if 'amazon_music' in dataset_name: dataset = AmazonMusicReader(dataset_folder_path) elif 'movielens1m_ours' in dataset_name: dataset = Movielens1MReader(dataset_folder_path, type="ours") elif 'movielens1m_original' in dataset_name: dataset = Movielens1MReader(dataset_folder_path, type="original") else: print("Dataset name not supported, current is {}".format(dataset_name)) return print('Current dataset is: {}'.format(dataset_name)) URM_train = dataset.URM_DICT["URM_train"].copy() URM_validation = dataset.URM_DICT["URM_validation"].copy() URM_test = dataset.URM_DICT["URM_test"].copy() URM_test_negative = dataset.URM_DICT["URM_test_negative"].copy() # Ensure IMPLICI data and DISJOINT matrices assert_implicit_data( [URM_train, URM_validation, URM_test, URM_test_negative]) assert_disjoint_matrices( [URM_train, URM_validation, URM_test, URM_test_negative]) cold_items_statistics(URM_train, URM_validation, URM_test, URM_test_negative) algorithm_dataset_string = "{}_{}_".format(ALGORITHM_NAME, dataset_name) plot_popularity_bias([URM_train + URM_validation, URM_test], ["Training data", "Test data"], result_folder_path + algorithm_dataset_string + "popularity_plot") save_popularity_statistics([ URM_train + URM_validation + URM_test, URM_train + URM_validation, URM_test ], ["Full data", "Training data", "Test data"], result_folder_path + algorithm_dataset_string + "popularity_statistics") collaborative_algorithm_list = [ Random, TopPop, UserKNNCFRecommender, ItemKNNCFRecommender, P3alphaRecommender, RP3betaRecommender, PureSVDRecommender, NMFRecommender, IALSRecommender, MatrixFactorization_BPR_Cython, MatrixFactorization_FunkSVD_Cython, EASE_R_Recommender, SLIM_BPR_Cython, SLIMElasticNetRecommender, ] metric_to_optimize = "NDCG" n_cases = 50 n_random_starts = 15 cutoff_list_validation = [10] cutoff_list_test = [5, 10, 20] if "_remove_cold_items" in dataset_name: ignore_items_validation = get_cold_items(URM_train) ignore_items_test = get_cold_items(URM_train + URM_validation) else: ignore_items_validation = None ignore_items_test = None evaluator_validation = EvaluatorNegativeItemSample( URM_validation, URM_test_negative, cutoff_list=cutoff_list_validation, ignore_items=ignore_items_validation) evaluator_test = EvaluatorNegativeItemSample( URM_test, URM_test_negative, cutoff_list=cutoff_list_test, ignore_items=ignore_items_test) # The Evaluator automatically skips users with no test interactions # in this case we need the evaluation done with and without cold items to be comparable # So we ensure the users that are included in the evaluation are the same in both cases. evaluator_validation.users_to_evaluate = np.arange(URM_train.shape[0]) evaluator_test.users_to_evaluate = np.arange(URM_train.shape[0]) runParameterSearch_Collaborative_partial = partial( runParameterSearch_Collaborative, URM_train=URM_train, URM_train_last_test=URM_train + URM_validation, metric_to_optimize=metric_to_optimize, evaluator_validation_earlystopping=evaluator_validation, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=result_folder_path, parallelizeKNN=False, allow_weighting=True, resume_from_saved=True, n_cases=n_cases, n_random_starts=n_random_starts) if flag_baselines_tune: for recommender_class in collaborative_algorithm_list: try: runParameterSearch_Collaborative_partial(recommender_class) except Exception as e: print("On recommender {} Exception {}".format( recommender_class, str(e))) traceback.print_exc() ################################################################################################ ###### ###### DL ALGORITHM ###### if flag_DL_article_default: earlystopping_hyperparameters = { 'validation_every_n': 5, 'stop_on_validation': True, 'lower_validations_allowed': 5, 'evaluator_object': evaluator_validation, 'validation_metric': metric_to_optimize, } num_factors = 64 article_hyperparameters = { 'epochs': 500, 'learning_rate': 0.001, 'batch_size': 256, 'num_negatives': 4, 'layers': (num_factors * 4, num_factors * 2, num_factors), 'regularization_layers': (0, 0, 0), 'learner': 'adam', 'verbose': False, } parameterSearch = SearchSingleCase( DELF_MLP_RecommenderWrapper, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], FIT_KEYWORD_ARGS=earlystopping_hyperparameters) recommender_input_args_last_test = recommender_input_args.copy() recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[ 0] = URM_train + URM_validation parameterSearch.search( recommender_input_args, recommender_input_args_last_test=recommender_input_args_last_test, fit_hyperparameters_values=article_hyperparameters, output_folder_path=result_folder_path, resume_from_saved=True, output_file_name_root=DELF_MLP_RecommenderWrapper.RECOMMENDER_NAME) parameterSearch = SearchSingleCase( DELF_EF_RecommenderWrapper, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], FIT_KEYWORD_ARGS=earlystopping_hyperparameters) recommender_input_args_last_test = recommender_input_args.copy() recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[ 0] = URM_train + URM_validation parameterSearch.search( recommender_input_args, recommender_input_args_last_test=recommender_input_args_last_test, fit_hyperparameters_values=article_hyperparameters, output_folder_path=result_folder_path, resume_from_saved=True, output_file_name_root=DELF_EF_RecommenderWrapper.RECOMMENDER_NAME) ################################################################################################ ###### ###### PRINT RESULTS ###### if flag_print_results: n_test_users = np.sum(np.ediff1d(URM_test.indptr) >= 1) file_name = "{}..//{}_{}_".format(result_folder_path, ALGORITHM_NAME, dataset_name) result_loader = ResultFolderLoader( result_folder_path, base_algorithm_list=None, other_algorithm_list=[ DELF_MLP_RecommenderWrapper, DELF_EF_RecommenderWrapper ], KNN_similarity_list=KNN_similarity_to_report_list, ICM_names_list=None, UCM_names_list=None) result_loader.generate_latex_results( file_name + "{}_latex_results.txt".format("article_metrics"), metrics_list=["HIT_RATE", "NDCG"], cutoffs_list=cutoff_list_test, table_title=None, highlight_best=True) result_loader.generate_latex_results( file_name + "{}_latex_results.txt".format("all_metrics"), metrics_list=[ "PRECISION", "RECALL", "MAP_MIN_DEN", "MRR", "NDCG", "F1", "HIT_RATE", "ARHR_ALL_HITS", "NOVELTY", "DIVERSITY_MEAN_INTER_LIST", "DIVERSITY_HERFINDAHL", "COVERAGE_ITEM", "DIVERSITY_GINI", "SHANNON_ENTROPY" ], cutoffs_list=[10], table_title=None, highlight_best=True) result_loader.generate_latex_time_statistics( file_name + "{}_latex_results.txt".format("time"), n_evaluation_users=n_test_users, table_title=None)
def read_data_split_and_search_NeuCF(dataset_name): from Conferences.WWW.NeuMF_our_interface.Movielens1M.Movielens1MReader import Movielens1MReader from Conferences.WWW.NeuMF_our_interface.Pinterest.PinterestICCVReader import PinterestICCVReader if dataset_name == "movielens1m": dataset = Movielens1MReader() elif dataset_name == "pinterest": dataset = PinterestICCVReader() output_folder_path = "result_experiments/{}/{}_{}/".format( CONFERENCE_NAME, ALGORITHM_NAME, dataset_name) URM_train = dataset.URM_train.copy() URM_validation = dataset.URM_validation.copy() URM_test = dataset.URM_test.copy() URM_test_negative = dataset.URM_test_negative.copy() # Ensure IMPLICIT data and DISJOINT sets assert_implicit_data( [URM_train, URM_validation, URM_test, URM_test_negative]) assert_disjoint_matrices([URM_train, URM_validation, URM_test]) assert_disjoint_matrices([URM_train, URM_validation, URM_test_negative]) # If directory does not exist, create if not os.path.exists(output_folder_path): os.makedirs(output_folder_path) algorithm_dataset_string = "{}_{}_".format(ALGORITHM_NAME, dataset_name) plot_popularity_bias([URM_train + URM_validation, URM_test], ["URM train", "URM test"], output_folder_path + algorithm_dataset_string + "popularity_plot") save_popularity_statistics([URM_train + URM_validation, URM_test], ["URM train", "URM test"], output_folder_path + algorithm_dataset_string + "popularity_statistics") collaborative_algorithm_list = [ Random, TopPop, UserKNNCFRecommender, ItemKNNCFRecommender, P3alphaRecommender, RP3betaRecommender, SLIMElasticNetRecommender ] metric_to_optimize = "HIT_RATE" from Base.Evaluation.Evaluator import EvaluatorNegativeItemSample evaluator_validation = EvaluatorNegativeItemSample(URM_validation, URM_test_negative, cutoff_list=[10]) evaluator_test = EvaluatorNegativeItemSample( URM_test, URM_test_negative, cutoff_list=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) runParameterSearch_Collaborative_partial = partial( runParameterSearch_Collaborative, URM_train=URM_train, metric_to_optimize=metric_to_optimize, evaluator_validation_earlystopping=evaluator_validation, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=output_folder_path, parallelizeKNN=False, allow_weighting=True, n_cases=35) # pool = multiprocessing.Pool(processes=int(multiprocessing.cpu_count()), maxtasksperchild=1) # resultList = pool.map(runParameterSearch_Collaborative_partial, collaborative_algorithm_list) # # pool.close() # pool.join() for recommender_class in collaborative_algorithm_list: try: runParameterSearch_Collaborative_partial(recommender_class) except Exception as e: print("On recommender {} Exception {}".format( recommender_class, str(e))) traceback.print_exc() ################################################################################################ ###### NeuMF try: if dataset_name == "movielens1m": num_factors = 64 elif dataset_name == "pinterest": num_factors = 16 neuMF_article_parameters = { "epochs": 100, "epochs_gmf": 100, "epochs_mlp": 100, "batch_size": 256, "num_factors": num_factors, "layers": [num_factors * 4, num_factors * 2, num_factors], "reg_mf": 0.0, "reg_layers": [0, 0, 0], "num_negatives": 4, "learning_rate": 1e-3, "learning_rate_pretrain": 1e-3, "learner": "sgd", "learner_pretrain": "adam", "pretrain": True } neuMF_earlystopping_parameters = { "validation_every_n": 5, "stop_on_validation": True, "evaluator_object": evaluator_validation, "lower_validations_allowed": 5, "validation_metric": metric_to_optimize } parameterSearch = SearchSingleCase( NeuMF_RecommenderWrapper, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) recommender_parameters = SearchInputRecommenderParameters( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], FIT_KEYWORD_ARGS=neuMF_earlystopping_parameters) parameterSearch.search( recommender_parameters, fit_parameters_values=neuMF_article_parameters, output_folder_path=output_folder_path, output_file_name_root=NeuMF_RecommenderWrapper.RECOMMENDER_NAME) except Exception as e: print("On recommender {} Exception {}".format(NeuMF_RecommenderWrapper, str(e))) traceback.print_exc() n_validation_users = np.sum(np.ediff1d(URM_validation.indptr) >= 1) n_test_users = np.sum(np.ediff1d(URM_test.indptr) >= 1) print_time_statistics_latex_table( result_folder_path=output_folder_path, dataset_name=dataset_name, results_file_prefix_name=ALGORITHM_NAME, other_algorithm_list=[NeuMF_RecommenderWrapper], n_validation_users=n_validation_users, n_test_users=n_test_users, n_decimals=2) print_results_latex_table(result_folder_path=output_folder_path, results_file_prefix_name=ALGORITHM_NAME, dataset_name=dataset_name, metrics_to_report_list=["HIT_RATE", "NDCG"], cutoffs_to_report_list=[1, 5, 10], other_algorithm_list=[NeuMF_RecommenderWrapper])
def read_data_split_and_search_SpectralCF(dataset_name, cold_start=False, cold_items=None, isKNN_multiprocess=True, isKNN_tune=True, isSpectralCF_train_default=True, isSpectralCF_tune=True, print_results=True): if dataset_name == "movielens1m_original": assert(cold_start is not True) dataset = Movielens1MReader(type="original") elif dataset_name == "movielens1m_ours": dataset = Movielens1MReader(type="ours", cold_start=cold_start, cold_items=cold_items) elif dataset_name == "hetrec": assert (cold_start is not True) dataset = MovielensHetrec2011Reader() elif dataset_name == "amazon_instant_video": assert (cold_start is not True) dataset = AmazonInstantVideoReader() if not cold_start: output_folder_path = "result_experiments/{}/{}_{}/".format(CONFERENCE_NAME, ALGORITHM_NAME, dataset_name) else: output_folder_path = "result_experiments/{}/{}_cold_{}_{}/".format(CONFERENCE_NAME, ALGORITHM_NAME, cold_items, dataset_name) URM_train = dataset.URM_train.copy() URM_validation = dataset.URM_validation.copy() URM_test = dataset.URM_test.copy() # Ensure IMPLICIT data and DISJOINT sets assert_implicit_data([URM_train, URM_validation, URM_test]) assert_disjoint_matrices([URM_train, URM_validation, URM_test]) # If directory does not exist, create if not os.path.exists(output_folder_path): os.makedirs(output_folder_path) algorithm_dataset_string = "{}_{}_".format(ALGORITHM_NAME, dataset_name) plot_popularity_bias([URM_train + URM_validation, URM_test], ["URM train", "URM test"], output_folder_path + algorithm_dataset_string + "popularity_plot") save_popularity_statistics([URM_train + URM_validation, URM_test], ["URM train", "URM test"], output_folder_path + algorithm_dataset_string + "popularity_statistics") metric_to_optimize = "RECALL" from Base.Evaluation.Evaluator import EvaluatorHoldout if not cold_start: cutoff_list_validation = [50] cutoff_list_test = [20, 30, 40, 50, 60, 70, 80, 90, 100] else: cutoff_list_validation = [20] cutoff_list_test = [20] evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=cutoff_list_validation) evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=cutoff_list_test) ################################################################################################ ###### KNN CF if isKNN_tune: collaborative_algorithm_list = [ Random, TopPop, UserKNNCFRecommender, ItemKNNCFRecommender, P3alphaRecommender, RP3betaRecommender, ] runParameterSearch_Collaborative_partial = partial(runParameterSearch_Collaborative, URM_train = URM_train, metric_to_optimize = metric_to_optimize, evaluator_validation_earlystopping = evaluator_validation, evaluator_validation = evaluator_validation, evaluator_test = evaluator_test, output_folder_path = output_folder_path, parallelizeKNN = False, allow_weighting = True, n_cases = 35) if isKNN_multiprocess: pool = multiprocessing.Pool(processes=int(multiprocessing.cpu_count()), maxtasksperchild=1) resultList = pool.map(runParameterSearch_Collaborative_partial, collaborative_algorithm_list) pool.close() pool.join() else: for recommender_class in collaborative_algorithm_list: try: runParameterSearch_Collaborative_partial(recommender_class) except Exception as e: print("On recommender {} Exception {}".format(recommender_class, str(e))) traceback.print_exc() ################################################################################################ ###### SpectralCF if isSpectralCF_train_default: try: spectralCF_article_parameters = { "epochs": 1000, "batch_size": 1024, "embedding_size": 16, "decay": 0.001, "k": 3, "learning_rate": 1e-3, } spectralCF_earlystopping_parameters = { "validation_every_n": 5, "stop_on_validation": True, "lower_validations_allowed": 20, "evaluator_object": evaluator_validation, "validation_metric": metric_to_optimize, "epochs_min": 400, } parameterSearch = SearchSingleCase(SpectralCF_RecommenderWrapper, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) recommender_parameters = SearchInputRecommenderParameters( CONSTRUCTOR_POSITIONAL_ARGS = [URM_train], FIT_KEYWORD_ARGS = spectralCF_earlystopping_parameters) parameterSearch.search(recommender_parameters, fit_parameters_values = spectralCF_article_parameters, output_folder_path = output_folder_path, output_file_name_root = SpectralCF_RecommenderWrapper.RECOMMENDER_NAME + "_article_default") except Exception as e: print("On recommender {} Exception {}".format(SpectralCF_RecommenderWrapper, str(e))) traceback.print_exc() elif isSpectralCF_tune: try: spectralCF_earlystopping_parameters = { "validation_every_n": 5, "stop_on_validation": True, "lower_validations_allowed": 20, "evaluator_object": evaluator_validation, "validation_metric": metric_to_optimize, "epochs_min": 400, "epochs": 2000 } runParameterSearch_SpectralCF(SpectralCF_RecommenderWrapper, URM_train = URM_train, earlystopping_parameters = spectralCF_earlystopping_parameters, metric_to_optimize = metric_to_optimize, evaluator_validation = evaluator_validation, evaluator_test = evaluator_test, output_folder_path = output_folder_path, n_cases = 35, output_file_name_root = SpectralCF_RecommenderWrapper.RECOMMENDER_NAME) except Exception as e: print("On recommender {} Exception {}".format(SpectralCF_RecommenderWrapper, str(e))) traceback.print_exc() ################################################################################################ ###### print results if print_results: n_validation_users = np.sum(np.ediff1d(URM_validation.indptr)>=1) n_test_users = np.sum(np.ediff1d(URM_test.indptr)>=1) if not cold_start: results_file_root_name = ALGORITHM_NAME else: results_file_root_name = "{}_cold_{}".format(ALGORITHM_NAME, cold_items) print_time_statistics_latex_table(result_folder_path = output_folder_path, dataset_name = dataset_name, results_file_prefix_name = results_file_root_name, other_algorithm_list = [SpectralCF_RecommenderWrapper], n_validation_users = n_validation_users, n_test_users = n_test_users, n_decimals = 2) if cold_start: cutoffs_to_report_list = [20] else: cutoffs_to_report_list = [20, 40, 60, 80, 100] print_results_latex_table(result_folder_path = output_folder_path, results_file_prefix_name = results_file_root_name, dataset_name = dataset_name, metrics_to_report_list = ["RECALL", "MAP"], cutoffs_to_report_list = cutoffs_to_report_list, other_algorithm_list = [SpectralCF_RecommenderWrapper])
def read_data_split_and_search(dataset_variant, train_interactions, flag_baselines_tune=False, flag_DL_article_default=False, flag_DL_tune=False, flag_print_results=False): from Conferences.KDD.CollaborativeVAE_our_interface.Citeulike.CiteulikeReader import CiteulikeReader result_folder_path = "result_experiments/{}/{}_citeulike_{}_{}/".format( CONFERENCE_NAME, ALGORITHM_NAME, dataset_variant, train_interactions) dataset = CiteulikeReader(result_folder_path, dataset_variant=dataset_variant, train_interactions=train_interactions) URM_train = dataset.URM_DICT["URM_train"].copy() URM_validation = dataset.URM_DICT["URM_validation"].copy() URM_test = dataset.URM_DICT["URM_test"].copy() del dataset.ICM_DICT["ICM_tokens_bool"] # Ensure IMPLICIT data assert_implicit_data([URM_train, URM_validation, URM_test]) # Due to the sparsity of the dataset, choosing an evaluation as subset of the train # While keeping validation interaction in the train set if train_interactions == 1: # In this case the train data will contain validation data to avoid cold users assert_disjoint_matrices([URM_train, URM_test]) assert_disjoint_matrices([URM_validation, URM_test]) exclude_seen_validation = False URM_train_last_test = URM_train else: assert_disjoint_matrices([URM_train, URM_validation, URM_test]) exclude_seen_validation = True URM_train_last_test = URM_train + URM_validation assert_implicit_data([URM_train_last_test]) # If directory does not exist, create if not os.path.exists(result_folder_path): os.makedirs(result_folder_path) from Base.Evaluation.Evaluator import EvaluatorHoldout evaluator_validation = EvaluatorHoldout( URM_validation, cutoff_list=[150], exclude_seen=exclude_seen_validation) evaluator_test = EvaluatorHoldout( URM_test, cutoff_list=[50, 100, 150, 200, 250, 300]) collaborative_algorithm_list = [ Random, TopPop, UserKNNCFRecommender, ItemKNNCFRecommender, P3alphaRecommender, RP3betaRecommender, PureSVDRecommender, NMFRecommender, IALSRecommender, MatrixFactorization_BPR_Cython, MatrixFactorization_FunkSVD_Cython, EASE_R_Recommender, SLIM_BPR_Cython, SLIMElasticNetRecommender, ] metric_to_optimize = "RECALL" n_cases = 50 n_random_starts = 15 runParameterSearch_Collaborative_partial = partial( runParameterSearch_Collaborative, URM_train=URM_train, URM_train_last_test=URM_train_last_test, metric_to_optimize=metric_to_optimize, evaluator_validation_earlystopping=evaluator_validation, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=result_folder_path, parallelizeKNN=False, allow_weighting=True, resume_from_saved=True, n_cases=n_cases, n_random_starts=n_random_starts) if flag_baselines_tune: for recommender_class in collaborative_algorithm_list: try: runParameterSearch_Collaborative_partial(recommender_class) except Exception as e: print("On recommender {} Exception {}".format( recommender_class, str(e))) traceback.print_exc() ################################################################################################ ###### Content Baselines for ICM_name, ICM_object in dataset.ICM_DICT.items(): try: runParameterSearch_Content( ItemKNNCBFRecommender, URM_train=URM_train, URM_train_last_test=URM_train_last_test, metric_to_optimize=metric_to_optimize, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=result_folder_path, parallelizeKNN=False, allow_weighting=True, resume_from_saved=True, ICM_name=ICM_name, ICM_object=ICM_object.copy(), n_cases=n_cases, n_random_starts=n_random_starts) except Exception as e: print("On CBF recommender for ICM {} Exception {}".format( ICM_name, str(e))) traceback.print_exc() ################################################################################################ ###### Hybrid for ICM_name, ICM_object in dataset.ICM_DICT.items(): try: runParameterSearch_Hybrid( ItemKNN_CFCBF_Hybrid_Recommender, URM_train=URM_train, URM_train_last_test=URM_train_last_test, metric_to_optimize=metric_to_optimize, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=result_folder_path, parallelizeKNN=False, allow_weighting=True, resume_from_saved=True, ICM_name=ICM_name, ICM_object=ICM_object.copy(), n_cases=n_cases, n_random_starts=n_random_starts) except Exception as e: print("On recommender {} Exception {}".format( ItemKNN_CFCBF_Hybrid_Recommender, str(e))) traceback.print_exc() ################################################################################################ ###### ###### DL ALGORITHM ###### if flag_DL_article_default: try: cvae_recommender_article_hyperparameters = { "epochs": 200, "learning_rate_vae": 1e-2, "learning_rate_cvae": 1e-3, "num_factors": 50, "dimensions_vae": [200, 100], "epochs_vae": [50, 50], "batch_size": 128, "lambda_u": 0.1, "lambda_v": 10, "lambda_r": 1, "a": 1, "b": 0.01, "M": 300, } cvae_earlystopping_hyperparameters = { "validation_every_n": 5, "stop_on_validation": True, "evaluator_object": evaluator_validation, "lower_validations_allowed": 5, "validation_metric": metric_to_optimize } parameterSearch = SearchSingleCase( CollaborativeVAE_RecommenderWrapper, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[ URM_train, dataset.ICM_DICT["ICM_tokens_TFIDF"] ], FIT_KEYWORD_ARGS=cvae_earlystopping_hyperparameters) recommender_input_args_last_test = recommender_input_args.copy() recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[ 0] = URM_train_last_test parameterSearch.search( recommender_input_args, recommender_input_args_last_test= recommender_input_args_last_test, fit_hyperparameters_values= cvae_recommender_article_hyperparameters, output_folder_path=result_folder_path, resume_from_saved=True, output_file_name_root=CollaborativeVAE_RecommenderWrapper. RECOMMENDER_NAME) except Exception as e: print("On recommender {} Exception {}".format( CollaborativeVAE_RecommenderWrapper, str(e))) traceback.print_exc() ################################################################################################ ###### ###### PRINT RESULTS ###### if flag_print_results: n_test_users = np.sum(np.ediff1d(URM_test.indptr) >= 1) ICM_names_to_report_list = list(dataset.ICM_DICT.keys()) dataset_name = "{}_{}".format(dataset_variant, train_interactions) file_name = "{}..//{}_{}_".format(result_folder_path, ALGORITHM_NAME, dataset_name) result_loader = ResultFolderLoader( result_folder_path, base_algorithm_list=None, other_algorithm_list=other_algorithm_list, KNN_similarity_list=KNN_similarity_to_report_list, ICM_names_list=ICM_names_to_report_list, UCM_names_list=None) result_loader.generate_latex_results( file_name + "{}_latex_results.txt".format("article_metrics"), metrics_list=["RECALL"], cutoffs_list=[50, 100, 150, 200, 250, 300], table_title=None, highlight_best=True) result_loader.generate_latex_results( file_name + "{}_latex_results.txt".format("all_metrics"), metrics_list=[ "PRECISION", "RECALL", "MAP_MIN_DEN", "MRR", "NDCG", "F1", "HIT_RATE", "ARHR_ALL_HITS", "NOVELTY", "DIVERSITY_MEAN_INTER_LIST", "DIVERSITY_HERFINDAHL", "COVERAGE_ITEM", "DIVERSITY_GINI", "SHANNON_ENTROPY" ], cutoffs_list=[150], table_title=None, highlight_best=True) result_loader.generate_latex_time_statistics( file_name + "{}_latex_results.txt".format("time"), n_evaluation_users=n_test_users, table_title=None)
def read_data_split_and_search(dataset_name, flag_baselines_tune=False, flag_DL_article_default=False, flag_DL_tune=False, flag_print_results=False): result_folder_path = "result_experiments/IJCAI/CoupledCF_{}/".format( dataset_name) #Logger(path=result_folder_path, name_file='CoupledCF_' + dataset_name) if dataset_name.startswith("movielens1m"): if dataset_name.endswith("_original"): dataset = Movielens1MReader(result_folder_path, type='original') elif dataset_name.endswith("_ours"): dataset = Movielens1MReader(result_folder_path, type='ours') else: print("Dataset name not supported, current is {}".format( dataset_name)) return UCM_to_report = ["UCM_all"] ICM_to_report = ["ICM_all"] UCM_CoupledCF = dataset.ICM_DICT["UCM_all"] ICM_CoupledCF = dataset.ICM_DICT["ICM_all"] elif dataset_name.startswith("tafeng"): if dataset_name.endswith("_original"): dataset = TafengReader(result_folder_path, type='original') elif dataset_name.endswith("_ours"): dataset = TafengReader(result_folder_path, type='ours') else: print("Dataset name not supported, current is {}".format( dataset_name)) return UCM_to_report = ["UCM_all"] ICM_to_report = ["ICM_original"] UCM_CoupledCF = dataset.ICM_DICT["UCM_all"] ICM_CoupledCF = dataset.ICM_DICT["ICM_original"] else: print("Dataset name not supported, current is {}".format(dataset_name)) return print('Current dataset is: {}'.format(dataset_name)) UCM_dict = { UCM_name: UCM_object for (UCM_name, UCM_object) in dataset.ICM_DICT.items() if "UCM" in UCM_name } ICM_dict = { UCM_name: UCM_object for (UCM_name, UCM_object) in dataset.ICM_DICT.items() if "ICM" in UCM_name } URM_train = dataset.URM_DICT["URM_train"].copy() URM_validation = dataset.URM_DICT["URM_validation"].copy() URM_test = dataset.URM_DICT["URM_test"].copy() URM_test_negative = dataset.URM_DICT["URM_test_negative"].copy() # Matrices are 1-indexed, so remove first row print_negative_items_stats(URM_train[1:], URM_validation[1:], URM_test[1:], URM_test_negative[1:]) # Ensure IMPLICIT data from Utils.assertions_on_data_for_experiments import assert_implicit_data, assert_disjoint_matrices assert_implicit_data( [URM_train, URM_validation, URM_test, URM_test_negative]) assert_disjoint_matrices([URM_train, URM_validation, URM_test]) # If directory does not exist, create if not os.path.exists(result_folder_path): os.makedirs(result_folder_path) collaborative_algorithm_list = [ Random, TopPop, UserKNNCFRecommender, ItemKNNCFRecommender, P3alphaRecommender, RP3betaRecommender, PureSVDRecommender, NMFRecommender, IALSRecommender, MatrixFactorization_BPR_Cython, MatrixFactorization_FunkSVD_Cython, EASE_R_Recommender, SLIM_BPR_Cython, SLIMElasticNetRecommender, ] metric_to_optimize = "NDCG" n_cases = 50 n_random_starts = 15 from Base.Evaluation.Evaluator import EvaluatorNegativeItemSample cutoff_list_validation = [5] cutoff_list_test = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] evaluator_validation = EvaluatorNegativeItemSample( URM_validation, URM_test_negative, cutoff_list=cutoff_list_validation) evaluator_test = EvaluatorNegativeItemSample(URM_test, URM_test_negative, cutoff_list=cutoff_list_test) runParameterSearch_Collaborative_partial = partial( runParameterSearch_Collaborative, URM_train=URM_train, URM_train_last_test=URM_train + URM_validation, metric_to_optimize=metric_to_optimize, evaluator_validation_earlystopping=evaluator_validation, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=result_folder_path, parallelizeKNN=False, allow_weighting=True, resume_from_saved=True, n_cases=n_cases, n_random_starts=n_random_starts) if flag_baselines_tune: for recommender_class in collaborative_algorithm_list: try: runParameterSearch_Collaborative_partial(recommender_class) except Exception as e: print("On recommender {} Exception {}".format( recommender_class, str(e))) traceback.print_exc() ############################################################################################### ##### Item Content Baselines for ICM_name, ICM_object in ICM_dict.items(): try: runParameterSearch_Content( ItemKNNCBFRecommender, URM_train=URM_train, URM_train_last_test=URM_train + URM_validation, metric_to_optimize=metric_to_optimize, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=result_folder_path, parallelizeKNN=False, allow_weighting=True, resume_from_saved=True, ICM_name=ICM_name, ICM_object=ICM_object.copy(), n_cases=n_cases, n_random_starts=n_random_starts) runParameterSearch_Hybrid( ItemKNN_CFCBF_Hybrid_Recommender, URM_train=URM_train, URM_train_last_test=URM_train + URM_validation, metric_to_optimize=metric_to_optimize, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=result_folder_path, parallelizeKNN=False, allow_weighting=True, resume_from_saved=True, ICM_name=ICM_name, ICM_object=ICM_object.copy(), n_cases=n_cases, n_random_starts=n_random_starts) except Exception as e: print("On CBF recommender for ICM {} Exception {}".format( ICM_name, str(e))) traceback.print_exc() ################################################################################################ ###### User Content Baselines for UCM_name, UCM_object in UCM_dict.items(): try: runParameterSearch_Content( UserKNNCBFRecommender, URM_train=URM_train, URM_train_last_test=URM_train + URM_validation, metric_to_optimize=metric_to_optimize, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=result_folder_path, parallelizeKNN=False, allow_weighting=True, resume_from_saved=True, ICM_name=UCM_name, ICM_object=UCM_object.copy(), n_cases=n_cases, n_random_starts=n_random_starts) runParameterSearch_Hybrid( UserKNN_CFCBF_Hybrid_Recommender, URM_train=URM_train, URM_train_last_test=URM_train + URM_validation, metric_to_optimize=metric_to_optimize, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=result_folder_path, parallelizeKNN=False, allow_weighting=True, resume_from_saved=True, ICM_name=UCM_name, ICM_object=UCM_object.copy(), n_cases=n_cases, n_random_starts=n_random_starts) except Exception as e: print("On CBF recommender for UCM {} Exception {}".format( UCM_name, str(e))) traceback.print_exc() ################################################################################################ ###### ###### DL ALGORITHM ###### if flag_DL_article_default: model_name = dataset.DATASET_NAME earlystopping_hyperparameters = { 'validation_every_n': 5, 'stop_on_validation': True, 'lower_validations_allowed': 5, 'evaluator_object': evaluator_validation, 'validation_metric': metric_to_optimize } if 'tafeng' in dataset_name: model_number = 3 article_hyperparameters = { 'learning_rate': 0.005, 'epochs': 100, 'n_negative_sample': 4, 'temp_file_folder': None, 'dataset_name': model_name, 'number_model': model_number, 'verbose': 0, 'plot_model': False, } else: # movielens1m and other dataset model_number = 3 article_hyperparameters = { 'learning_rate': 0.001, 'epochs': 100, 'n_negative_sample': 4, 'temp_file_folder': None, 'dataset_name': model_name, 'number_model': model_number, 'verbose': 0, 'plot_model': False, } parameterSearch = SearchSingleCase( DeepCF_RecommenderWrapper, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], FIT_KEYWORD_ARGS=earlystopping_hyperparameters) recommender_input_args_last_test = recommender_input_args.copy() recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[ 0] = URM_train + URM_validation parameterSearch.search( recommender_input_args, recommender_input_args_last_test=recommender_input_args_last_test, fit_hyperparameters_values=article_hyperparameters, output_folder_path=result_folder_path, resume_from_saved=True, output_file_name_root=DeepCF_RecommenderWrapper.RECOMMENDER_NAME) if 'tafeng' in dataset_name: # tafeng model has a different structure model_number = 2 article_hyperparameters = { 'learning_rate': 0.005, 'epochs': 100, 'n_negative_sample': 4, 'temp_file_folder': None, 'dataset_name': "Tafeng", 'number_model': model_number, 'verbose': 0, 'plot_model': False, } else: # movielens1m use this tructure with model 2 model_number = 2 article_hyperparameters = { 'learning_rate': 0.001, 'epochs': 100, 'n_negative_sample': 4, 'temp_file_folder': None, 'dataset_name': "Movielens1M", 'number_model': model_number, 'verbose': 0, 'plot_model': False, } parameterSearch = SearchSingleCase( CoupledCF_RecommenderWrapper, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[ URM_train, UCM_CoupledCF, ICM_CoupledCF ], FIT_KEYWORD_ARGS=earlystopping_hyperparameters) recommender_input_args_last_test = recommender_input_args.copy() recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[ 0] = URM_train + URM_validation parameterSearch.search( recommender_input_args, recommender_input_args_last_test=recommender_input_args_last_test, fit_hyperparameters_values=article_hyperparameters, output_folder_path=result_folder_path, resume_from_saved=True, output_file_name_root=CoupledCF_RecommenderWrapper.RECOMMENDER_NAME ) ################################################################################################ ###### ###### PRINT RESULTS ###### if flag_print_results: n_test_users = np.sum(np.ediff1d(URM_test.indptr) >= 1) file_name = "{}..//{}_{}_".format(result_folder_path, ALGORITHM_NAME, dataset_name) result_loader = ResultFolderLoader( result_folder_path, base_algorithm_list=None, other_algorithm_list=[ DeepCF_RecommenderWrapper, CoupledCF_RecommenderWrapper ], KNN_similarity_list=KNN_similarity_to_report_list, ICM_names_list=ICM_to_report, UCM_names_list=UCM_to_report) result_loader.generate_latex_results( file_name + "{}_latex_results.txt".format("article_metrics"), metrics_list=["HIT_RATE", "NDCG"], cutoffs_list=[1, 5, 10], table_title=None, highlight_best=True) result_loader.generate_latex_results( file_name + "{}_latex_results.txt".format("beyond_accuracy_metrics"), metrics_list=[ "DIVERSITY_MEAN_INTER_LIST", "DIVERSITY_HERFINDAHL", "COVERAGE_ITEM", "DIVERSITY_GINI", "SHANNON_ENTROPY" ], cutoffs_list=[5], table_title=None, highlight_best=True) result_loader.generate_latex_results( file_name + "{}_latex_results.txt".format("all_metrics"), metrics_list=[ "PRECISION", "RECALL", "MAP_MIN_DEN", "MRR", "NDCG", "F1", "HIT_RATE", "ARHR_ALL_HITS", "NOVELTY", "DIVERSITY_MEAN_INTER_LIST", "DIVERSITY_HERFINDAHL", "COVERAGE_ITEM", "DIVERSITY_GINI", "SHANNON_ENTROPY" ], cutoffs_list=[5], table_title=None, highlight_best=True) result_loader.generate_latex_time_statistics( file_name + "{}_latex_results.txt".format("time"), n_evaluation_users=n_test_users, table_title=None)
def read_data_split_and_search(dataset_name, cold_start=False, cold_items=None, flag_baselines_tune=False, flag_DL_article_default=False, flag_DL_tune=False, flag_print_results=False): if not cold_start: result_folder_path = "result_experiments/{}/{}_{}/".format( CONFERENCE_NAME, ALGORITHM_NAME, dataset_name) else: result_folder_path = "result_experiments/{}/{}_cold_{}_{}/".format( CONFERENCE_NAME, ALGORITHM_NAME, cold_items, dataset_name) if dataset_name == "movielens1m_original": assert (cold_start is not True) dataset = Movielens1MReader(result_folder_path, type="original") elif dataset_name == "movielens1m_ours": dataset = Movielens1MReader(result_folder_path, type="ours", cold_start=cold_start, cold_items=cold_items) elif dataset_name == "hetrec": assert (cold_start is not True) dataset = MovielensHetrec2011Reader(result_folder_path) elif dataset_name == "amazon_instant_video": assert (cold_start is not True) dataset = AmazonInstantVideoReader(result_folder_path) URM_train = dataset.URM_DICT["URM_train"].copy() URM_validation = dataset.URM_DICT["URM_validation"].copy() URM_test = dataset.URM_DICT["URM_test"].copy() # Ensure IMPLICIT data and DISJOINT sets assert_implicit_data([URM_train, URM_validation, URM_test]) assert_disjoint_matrices([URM_train, URM_validation, URM_test]) # If directory does not exist, create if not os.path.exists(result_folder_path): os.makedirs(result_folder_path) algorithm_dataset_string = "{}_{}_".format(ALGORITHM_NAME, dataset_name) plot_popularity_bias([URM_train + URM_validation, URM_test], ["Train data", "Test data"], result_folder_path + algorithm_dataset_string + "popularity_plot") save_popularity_statistics([ URM_train + URM_validation + URM_test, URM_train + URM_validation, URM_test ], ["URM_all", "URM train", "URM test"], result_folder_path + algorithm_dataset_string + "popularity_statistics") metric_to_optimize = "RECALL" n_cases = 50 n_random_starts = 15 from Base.Evaluation.Evaluator import EvaluatorHoldout if not cold_start: cutoff_list_validation = [50] cutoff_list_test = [20, 30, 40, 50, 60, 70, 80, 90, 100] else: cutoff_list_validation = [20] cutoff_list_test = [20] evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=cutoff_list_validation) evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=cutoff_list_test) ################################################################################################ ###### KNN CF collaborative_algorithm_list = [ Random, TopPop, UserKNNCFRecommender, ItemKNNCFRecommender, P3alphaRecommender, RP3betaRecommender, PureSVDRecommender, NMFRecommender, IALSRecommender, MatrixFactorization_BPR_Cython, MatrixFactorization_FunkSVD_Cython, EASE_R_Recommender, SLIM_BPR_Cython, SLIMElasticNetRecommender, ] runParameterSearch_Collaborative_partial = partial( runParameterSearch_Collaborative, URM_train=URM_train, URM_train_last_test=URM_train + URM_validation, metric_to_optimize=metric_to_optimize, evaluator_validation_earlystopping=evaluator_validation, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=result_folder_path, parallelizeKNN=False, allow_weighting=True, resume_from_saved=True, n_cases=n_cases, n_random_starts=n_random_starts) if flag_baselines_tune: for recommender_class in collaborative_algorithm_list: try: runParameterSearch_Collaborative_partial(recommender_class) except Exception as e: print("On recommender {} Exception {}".format( recommender_class, str(e))) traceback.print_exc() ################################################################################################ ###### ###### DL ALGORITHM ###### if flag_DL_article_default: try: spectralCF_article_hyperparameters = { "epochs": 1000, "batch_size": 1024, "embedding_size": 16, "decay": 0.001, "k": 3, "learning_rate": 1e-3, } spectralCF_earlystopping_hyperparameters = { "validation_every_n": 5, "stop_on_validation": True, "lower_validations_allowed": 5, "evaluator_object": evaluator_validation, "validation_metric": metric_to_optimize, "epochs_min": 400, } parameterSearch = SearchSingleCase( SpectralCF_RecommenderWrapper, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], FIT_KEYWORD_ARGS=spectralCF_earlystopping_hyperparameters) recommender_input_args_last_test = recommender_input_args.copy() recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[ 0] = URM_train + URM_validation parameterSearch.search( recommender_input_args, recommender_input_args_last_test= recommender_input_args_last_test, fit_hyperparameters_values=spectralCF_article_hyperparameters, output_folder_path=result_folder_path, resume_from_saved=True, output_file_name_root=SpectralCF_RecommenderWrapper. RECOMMENDER_NAME + "_article_default") except Exception as e: print("On recommender {} Exception {}".format( SpectralCF_RecommenderWrapper, str(e))) traceback.print_exc() if flag_DL_tune: try: spectralCF_earlystopping_hyperparameters = { "validation_every_n": 5, "stop_on_validation": True, "lower_validations_allowed": 5, "evaluator_object": evaluator_validation, "validation_metric": metric_to_optimize, "epochs_min": 400, "epochs": 2000 } runParameterSearch_SpectralCF( SpectralCF_RecommenderWrapper, URM_train=URM_train, URM_train_last_test=URM_train + URM_validation, earlystopping_hyperparameters= spectralCF_earlystopping_hyperparameters, metric_to_optimize=metric_to_optimize, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=result_folder_path, n_cases=n_cases, n_random_starts=n_random_starts, output_file_name_root=SpectralCF_RecommenderWrapper. RECOMMENDER_NAME) except Exception as e: print("On recommender {} Exception {}".format( SpectralCF_RecommenderWrapper, str(e))) traceback.print_exc() ################################################################################################ ###### ###### PRINT RESULTS ###### if flag_print_results: n_test_users = np.sum(np.ediff1d(URM_test.indptr) >= 1) file_name = "{}..//{}_{}_".format( result_folder_path, ALGORITHM_NAME if not cold_start else "{}_cold_{}".format( ALGORITHM_NAME, cold_items), dataset_name) if cold_start: cutoffs_to_report_list = [20] else: cutoffs_to_report_list = [20, 40, 60, 80, 100] result_loader = ResultFolderLoader( result_folder_path, base_algorithm_list=None, other_algorithm_list=other_algorithm_list, KNN_similarity_list=KNN_similarity_to_report_list, ICM_names_list=None, UCM_names_list=None) result_loader.generate_latex_results( file_name + "{}_latex_results.txt".format("article_metrics"), metrics_list=["RECALL", "MAP"], cutoffs_list=cutoffs_to_report_list, table_title=None, highlight_best=True) result_loader.generate_latex_results( file_name + "{}_latex_results.txt".format("beyond_accuracy_metrics"), metrics_list=[ "DIVERSITY_MEAN_INTER_LIST", "DIVERSITY_HERFINDAHL", "COVERAGE_ITEM", "DIVERSITY_GINI", "SHANNON_ENTROPY" ], cutoffs_list=[50], table_title=None, highlight_best=True) result_loader.generate_latex_results( file_name + "{}_latex_results.txt".format("all_metrics"), metrics_list=[ "PRECISION", "RECALL", "MAP", "MRR", "NDCG", "F1", "HIT_RATE", "ARHR", "NOVELTY", "DIVERSITY_MEAN_INTER_LIST", "DIVERSITY_HERFINDAHL", "COVERAGE_ITEM", "DIVERSITY_GINI", "SHANNON_ENTROPY" ], cutoffs_list=[50], table_title=None, highlight_best=True) result_loader.generate_latex_time_statistics( file_name + "{}_latex_results.txt".format("time"), n_evaluation_users=n_test_users, table_title=None)
def read_data_split_and_search_MCRec(dataset_name): from Conferences.KDD.MCRec_our_interface.Movielens100K.Movielens100KReader import Movielens100KReader from Conferences.KDD.MCRec_our_interface.LastFM.LastFMReader import LastFMReader from Conferences.KDD.MCRec_our_interface.Yelp.YelpReader import YelpReader if dataset_name == "movielens100k": dataset = Movielens100KReader() elif dataset_name == "yelp": dataset = YelpReader() elif dataset_name == "lastfm": dataset = LastFMReader() output_folder_path = "result_experiments/{}/{}_{}/".format( CONFERENCE_NAME, ALGORITHM_NAME, dataset_name) URM_train = dataset.URM_train.copy() URM_validation = dataset.URM_validation.copy() URM_test = dataset.URM_test.copy() URM_test_negative = dataset.URM_test_negative.copy() # Ensure IMPLICIT data assert_implicit_data( [URM_train, URM_validation, URM_test, URM_test_negative]) assert_disjoint_matrices( [URM_train, URM_validation, URM_test, URM_test_negative]) # If directory does not exist, create if not os.path.exists(output_folder_path): os.makedirs(output_folder_path) algorithm_dataset_string = "{}_{}_".format(ALGORITHM_NAME, dataset_name) plot_popularity_bias([URM_train + URM_validation, URM_test], ["URM train", "URM test"], output_folder_path + algorithm_dataset_string + "popularity_plot") save_popularity_statistics([URM_train + URM_validation, URM_test], ["URM train", "URM test"], output_folder_path + algorithm_dataset_string + "popularity_statistics") from Base.Evaluation.Evaluator import EvaluatorNegativeItemSample if dataset_name == "movielens100k": URM_train += URM_validation evaluator_validation = EvaluatorNegativeItemSample(URM_validation, URM_test_negative, cutoff_list=[10], exclude_seen=False) else: evaluator_validation = EvaluatorNegativeItemSample(URM_validation, URM_test_negative, cutoff_list=[10]) evaluator_test = EvaluatorNegativeItemSample(URM_test, URM_test_negative, cutoff_list=[10]) collaborative_algorithm_list = [ Random, TopPop, UserKNNCFRecommender, ItemKNNCFRecommender, P3alphaRecommender, RP3betaRecommender, PureSVDRecommender ] metric_to_optimize = "PRECISION" runParameterSearch_Collaborative_partial = partial( runParameterSearch_Collaborative, URM_train=URM_train, metric_to_optimize=metric_to_optimize, evaluator_validation_earlystopping=evaluator_validation, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=output_folder_path, parallelizeKNN=False, n_cases=35) # pool = PoolWithSubprocess(processes=int(multiprocessing.cpu_count()), maxtasksperchild=1) # resultList = pool.map(runParameterSearch_Collaborative_partial, collaborative_algorithm_list) # pool.close() # pool.join() for recommender_class in collaborative_algorithm_list: try: runParameterSearch_Collaborative_partial(recommender_class) except Exception as e: print("On recommender {} Exception {}".format( recommender_class, str(e))) traceback.print_exc() ################################################################################################ ###### Content Baselines ICM_dictionary = dataset.ICM_dict ICM_name_list = ICM_dictionary.keys() for ICM_name in ICM_name_list: try: ICM_object = ICM_dictionary[ICM_name] runParameterSearch_Content( ItemKNNCBFRecommender, URM_train=URM_train, metric_to_optimize=metric_to_optimize, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=output_folder_path, parallelizeKNN=False, ICM_name=ICM_name, ICM_object=ICM_object.copy(), n_cases=35) except Exception as e: print("On CBF recommender for ICM {} Exception {}".format( ICM_name, str(e))) traceback.print_exc() ################################################################################################ ###### Hybrid for ICM_name in ICM_name_list: try: ICM_object = ICM_dictionary[ICM_name] runParameterSearch_Hybrid( ItemKNN_CFCBF_Hybrid_Recommender, URM_train=URM_train, metric_to_optimize=metric_to_optimize, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=output_folder_path, parallelizeKNN=False, ICM_name=ICM_name, ICM_object=ICM_object, allow_weighting=True, n_cases=35) except Exception as e: print("On recommender {} Exception {}".format( ItemKNN_CFCBF_Hybrid_Recommender, str(e))) traceback.print_exc() ################################################################################################ ###### MCRec if dataset_name == "movielens100k": # Since I am using the original Data reader, the content of URM_validation are seen items, therefore I have to set another # evaluator which does not exclude them # evaluator_validation = EvaluatorNegativeItemSample(URM_validation, URM_test_negative, cutoff_list=[10], exclude_seen=False) MCRec_article_parameters = { "epochs": 100, "latent_dim": 128, "reg_latent": 0, "layers": [512, 256, 128, 64], "reg_layes": [0, 0, 0, 0], "learning_rate": 1e-3, "batch_size": 256, "num_negatives": 4, } MCRec_earlystopping_parameters = { "validation_every_n": 5, "stop_on_validation": True, "evaluator_object": evaluator_validation, "lower_validations_allowed": 5, "validation_metric": metric_to_optimize } parameterSearch = SearchSingleCase( MCRecML100k_RecommenderWrapper, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) recommender_parameters = SearchInputRecommenderParameters( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], FIT_KEYWORD_ARGS=MCRec_earlystopping_parameters) parameterSearch.search( recommender_parameters, fit_parameters_values=MCRec_article_parameters, output_folder_path=output_folder_path, output_file_name_root=MCRecML100k_RecommenderWrapper. RECOMMENDER_NAME) n_validation_users = np.sum(np.ediff1d(URM_validation.indptr) >= 1) n_test_users = np.sum(np.ediff1d(URM_test.indptr) >= 1) ICM_names_to_report_list = ["ICM_genre"] print_time_statistics_latex_table( result_folder_path=output_folder_path, dataset_name=dataset_name, results_file_prefix_name=ALGORITHM_NAME, other_algorithm_list=[MCRecML100k_RecommenderWrapper], ICM_names_to_report_list=ICM_names_to_report_list, n_validation_users=n_validation_users, n_test_users=n_test_users, n_decimals=2) print_results_latex_table( result_folder_path=output_folder_path, results_file_prefix_name=ALGORITHM_NAME, dataset_name=dataset_name, metrics_to_report_list=["PRECISION", "RECALL", "NDCG"], cutoffs_to_report_list=[10], ICM_names_to_report_list=ICM_names_to_report_list, other_algorithm_list=[MCRecML100k_RecommenderWrapper])
def read_data_split_and_search_CollaborativeVAE(dataset_variant, train_interactions): from Conferences.KDD.CollaborativeVAE_our_interface.Citeulike.CiteulikeReader import CiteulikeReader dataset = CiteulikeReader(dataset_variant=dataset_variant, train_interactions=train_interactions) output_folder_path = "result_experiments/{}/{}_citeulike_{}_{}/".format( CONFERENCE_NAME, ALGORITHM_NAME, dataset_variant, train_interactions) URM_train = dataset.URM_train.copy() URM_validation = dataset.URM_validation.copy() URM_test = dataset.URM_test.copy() # Ensure IMPLICIT data assert_implicit_data([URM_train, URM_validation, URM_test]) assert_disjoint_matrices([URM_train, URM_validation, URM_test]) # If directory does not exist, create if not os.path.exists(output_folder_path): os.makedirs(output_folder_path) collaborative_algorithm_list = [ Random, TopPop, UserKNNCFRecommender, ItemKNNCFRecommender, P3alphaRecommender, RP3betaRecommender, ] metric_to_optimize = "RECALL" from Base.Evaluation.Evaluator import EvaluatorHoldout evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[150]) evaluator_test = EvaluatorHoldout( URM_test, cutoff_list=[50, 100, 150, 200, 250, 300]) runParameterSearch_Collaborative_partial = partial( runParameterSearch_Collaborative, URM_train=URM_train, metric_to_optimize=metric_to_optimize, evaluator_validation_earlystopping=evaluator_validation, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=output_folder_path, parallelizeKNN=False, allow_weighting=True, n_cases=35) # pool = multiprocessing.Pool(processes=int(multiprocessing.cpu_count()), maxtasksperchild=1) # resultList = pool.map(runParameterSearch_Collaborative_partial, collaborative_algorithm_list) # # pool.close() # pool.join() for recommender_class in collaborative_algorithm_list: try: runParameterSearch_Collaborative_partial(recommender_class) except Exception as e: print("On recommender {} Exception {}".format( recommender_class, str(e))) traceback.print_exc() ################################################################################################ ###### Content Baselines ICM_title_abstract = dataset.ICM_title_abstract.copy() try: runParameterSearch_Content(ItemKNNCBFRecommender, URM_train=URM_train, metric_to_optimize=metric_to_optimize, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=output_folder_path, parallelizeKNN=False, ICM_name="ICM_title_abstract", ICM_object=ICM_title_abstract, allow_weighting=False, n_cases=35) except Exception as e: print("On recommender {} Exception {}".format(ItemKNNCBFRecommender, str(e))) traceback.print_exc() ################################################################################################ ###### Hybrid try: runParameterSearch_Hybrid(ItemKNN_CFCBF_Hybrid_Recommender, URM_train=URM_train, metric_to_optimize=metric_to_optimize, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=output_folder_path, parallelizeKNN=False, ICM_name="ICM_title_abstract", ICM_object=ICM_title_abstract, allow_weighting=True, n_cases=35) except Exception as e: print("On recommender {} Exception {}".format( ItemKNN_CFCBF_Hybrid_Recommender, str(e))) traceback.print_exc() ################################################################################################ ###### CollaborativeVAE try: temp_file_folder = output_folder_path + "{}_log/".format( ALGORITHM_NAME) cvae_recommender_article_parameters = { "epochs": 200, "learning_rate_vae": 1e-2, "learning_rate_cvae": 1e-3, "num_factors": 50, "dimensions_vae": [200, 100], "epochs_vae": [50, 50], "batch_size": 128, "lambda_u": 0.1, "lambda_v": 10, "lambda_r": 1, "a": 1, "b": 0.01, "M": 300, "temp_file_folder": temp_file_folder } cvae_earlystopping_parameters = { "validation_every_n": 5, "stop_on_validation": True, "evaluator_object": evaluator_validation, "lower_validations_allowed": 5, "validation_metric": metric_to_optimize } parameterSearch = SearchSingleCase( CollaborativeVAE_RecommenderWrapper, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) recommender_parameters = SearchInputRecommenderParameters( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train, ICM_title_abstract], FIT_KEYWORD_ARGS=cvae_earlystopping_parameters) parameterSearch.search( recommender_parameters, fit_parameters_values=cvae_recommender_article_parameters, output_folder_path=output_folder_path, output_file_name_root=CollaborativeVAE_RecommenderWrapper. RECOMMENDER_NAME) except Exception as e: print("On recommender {} Exception {}".format( CollaborativeVAE_RecommenderWrapper, str(e))) traceback.print_exc() n_validation_users = np.sum(np.ediff1d(URM_validation.indptr) >= 1) n_test_users = np.sum(np.ediff1d(URM_test.indptr) >= 1) ICM_names_to_report_list = ["ICM_title_abstract"] dataset_name = "{}_{}".format(dataset_variant, train_interactions) print_time_statistics_latex_table( result_folder_path=output_folder_path, dataset_name=dataset_name, results_file_prefix_name=ALGORITHM_NAME, other_algorithm_list=[CollaborativeVAE_RecommenderWrapper], ICM_names_to_report_list=ICM_names_to_report_list, n_validation_users=n_validation_users, n_test_users=n_test_users, n_decimals=2) print_results_latex_table( result_folder_path=output_folder_path, results_file_prefix_name=ALGORITHM_NAME, dataset_name=dataset_name, metrics_to_report_list=["RECALL"], cutoffs_to_report_list=[50, 100, 150, 200, 250, 300], ICM_names_to_report_list=ICM_names_to_report_list, other_algorithm_list=[CollaborativeVAE_RecommenderWrapper])
def runParameterSearch_Collaborative(recommender_class, URM_train, URM_train_last_test=None, metric_to_optimize="PRECISION", evaluator_validation=None, evaluator_test=None, evaluator_validation_earlystopping=None, output_folder_path="result_experiments/", parallelizeKNN=True, n_cases=35, n_random_starts=5, resume_from_saved=False, save_model="best", allow_weighting=True, similarity_type_list=None): # If directory does not exist, create if not os.path.exists(output_folder_path): os.makedirs(output_folder_path) earlystopping_keywargs = { "validation_every_n": 5, "stop_on_validation": True, "evaluator_object": evaluator_validation_earlystopping, "lower_validations_allowed": 5, "validation_metric": metric_to_optimize, } URM_train = URM_train.copy() if URM_train_last_test is not None: URM_train_last_test = URM_train_last_test.copy() try: output_file_name_root = recommender_class.RECOMMENDER_NAME parameterSearch = SearchBayesianSkopt( recommender_class, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) if recommender_class in [TopPop, GlobalEffects, Random]: """ TopPop, GlobalEffects and Random have no parameters therefore only one evaluation is needed """ parameterSearch = SearchSingleCase( recommender_class, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) if URM_train_last_test is not None: recommender_input_args_last_test = recommender_input_args.copy( ) recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[ 0] = URM_train_last_test else: recommender_input_args_last_test = None parameterSearch.search( recommender_input_args, recommender_input_args_last_test= recommender_input_args_last_test, fit_hyperparameters_values={}, output_folder_path=output_folder_path, output_file_name_root=output_file_name_root, resume_from_saved=resume_from_saved, save_model=save_model, ) return ########################################################################################################## if recommender_class in [ItemKNNCFRecommender, UserKNNCFRecommender]: if similarity_type_list is None: similarity_type_list = [ 'cosine', 'jaccard', "asymmetric", "dice", "tversky" ] recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) if URM_train_last_test is not None: recommender_input_args_last_test = recommender_input_args.copy( ) recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[ 0] = URM_train_last_test else: recommender_input_args_last_test = None run_KNNCFRecommender_on_similarity_type_partial = partial( run_KNNRecommender_on_similarity_type, recommender_input_args=recommender_input_args, parameter_search_space={}, parameterSearch=parameterSearch, n_cases=n_cases, n_random_starts=n_random_starts, resume_from_saved=resume_from_saved, save_model=save_model, output_folder_path=output_folder_path, output_file_name_root=output_file_name_root, metric_to_optimize=metric_to_optimize, allow_weighting=allow_weighting, recommender_input_args_last_test= recommender_input_args_last_test) if parallelizeKNN: pool = multiprocessing.Pool( processes=multiprocessing.cpu_count(), maxtasksperchild=1) pool.map(run_KNNCFRecommender_on_similarity_type_partial, similarity_type_list) pool.close() pool.join() else: for similarity_type in similarity_type_list: run_KNNCFRecommender_on_similarity_type_partial( similarity_type) return ########################################################################################################## if recommender_class is P3alphaRecommender: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["topK"] = Integer(5, 1000) hyperparameters_range_dictionary["alpha"] = Real(low=0, high=2, prior='uniform') hyperparameters_range_dictionary[ "normalize_similarity"] = Categorical([True, False]) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) ########################################################################################################## if recommender_class is RP3betaRecommender: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["topK"] = Integer(5, 1000) hyperparameters_range_dictionary["alpha"] = Real(low=0, high=2, prior='uniform') hyperparameters_range_dictionary["beta"] = Real(low=0, high=2, prior='uniform') hyperparameters_range_dictionary[ "normalize_similarity"] = Categorical([True, False]) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) ########################################################################################################## if recommender_class is MatrixFactorization_FunkSVD_Cython: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["sgd_mode"] = Categorical( ["sgd", "adagrad", "adam"]) hyperparameters_range_dictionary["epochs"] = Categorical([500]) hyperparameters_range_dictionary["use_bias"] = Categorical( [True, False]) hyperparameters_range_dictionary["batch_size"] = Categorical( [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024]) hyperparameters_range_dictionary["num_factors"] = Integer(1, 200) hyperparameters_range_dictionary["item_reg"] = Real( low=1e-5, high=1e-2, prior='log-uniform') hyperparameters_range_dictionary["user_reg"] = Real( low=1e-5, high=1e-2, prior='log-uniform') hyperparameters_range_dictionary["learning_rate"] = Real( low=1e-4, high=1e-1, prior='log-uniform') hyperparameters_range_dictionary[ "negative_interactions_quota"] = Real(low=0.0, high=0.5, prior='uniform') recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS=earlystopping_keywargs) ########################################################################################################## if recommender_class is MatrixFactorization_AsySVD_Cython: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["sgd_mode"] = Categorical( ["sgd", "adagrad", "adam"]) hyperparameters_range_dictionary["epochs"] = Categorical([500]) hyperparameters_range_dictionary["use_bias"] = Categorical( [True, False]) hyperparameters_range_dictionary["batch_size"] = Categorical([1]) hyperparameters_range_dictionary["num_factors"] = Integer(1, 200) hyperparameters_range_dictionary["item_reg"] = Real( low=1e-5, high=1e-2, prior='log-uniform') hyperparameters_range_dictionary["user_reg"] = Real( low=1e-5, high=1e-2, prior='log-uniform') hyperparameters_range_dictionary["learning_rate"] = Real( low=1e-4, high=1e-1, prior='log-uniform') hyperparameters_range_dictionary[ "negative_interactions_quota"] = Real(low=0.0, high=0.5, prior='uniform') recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS=earlystopping_keywargs) ########################################################################################################## if recommender_class is MatrixFactorization_BPR_Cython: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["sgd_mode"] = Categorical( ["sgd", "adagrad", "adam"]) hyperparameters_range_dictionary["epochs"] = Categorical([1500]) hyperparameters_range_dictionary["num_factors"] = Integer(1, 200) hyperparameters_range_dictionary["batch_size"] = Categorical( [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024]) hyperparameters_range_dictionary["positive_reg"] = Real( low=1e-5, high=1e-2, prior='log-uniform') hyperparameters_range_dictionary["negative_reg"] = Real( low=1e-5, high=1e-2, prior='log-uniform') hyperparameters_range_dictionary["learning_rate"] = Real( low=1e-4, high=1e-1, prior='log-uniform') recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={ **earlystopping_keywargs, "positive_threshold_BPR": None }) ########################################################################################################## if recommender_class is IALSRecommender: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["num_factors"] = Integer(1, 200) hyperparameters_range_dictionary[ "confidence_scaling"] = Categorical(["linear", "log"]) hyperparameters_range_dictionary["alpha"] = Real( low=1e-3, high=50.0, prior='log-uniform') hyperparameters_range_dictionary["epsilon"] = Real( low=1e-3, high=10.0, prior='log-uniform') hyperparameters_range_dictionary["reg"] = Real(low=1e-5, high=1e-2, prior='log-uniform') recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS=earlystopping_keywargs) ########################################################################################################## if recommender_class is PureSVDRecommender: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["num_factors"] = Integer(1, 350) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) ########################################################################################################## if recommender_class is NMFRecommender: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["num_factors"] = Integer(1, 350) hyperparameters_range_dictionary["solver"] = Categorical( ["coordinate_descent", "multiplicative_update"]) hyperparameters_range_dictionary["init_type"] = Categorical( ["random", "nndsvda"]) hyperparameters_range_dictionary["beta_loss"] = Categorical( ["frobenius", "kullback-leibler"]) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) ######################################################################################################### if recommender_class is SLIM_BPR_Cython: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["topK"] = Integer(5, 1000) hyperparameters_range_dictionary["epochs"] = Categorical([1500]) hyperparameters_range_dictionary["symmetric"] = Categorical( [True, False]) hyperparameters_range_dictionary["sgd_mode"] = Categorical( ["sgd", "adagrad", "adam"]) hyperparameters_range_dictionary["lambda_i"] = Real( low=1e-5, high=1e-2, prior='log-uniform') hyperparameters_range_dictionary["lambda_j"] = Real( low=1e-5, high=1e-2, prior='log-uniform') hyperparameters_range_dictionary["learning_rate"] = Real( low=1e-4, high=1e-1, prior='log-uniform') recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={ **earlystopping_keywargs, "positive_threshold_BPR": None, 'train_with_sparse_weights': None }) ########################################################################################################## if recommender_class is SLIMElasticNetRecommender: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["topK"] = Integer(5, 1000) hyperparameters_range_dictionary["l1_ratio"] = Real( low=1e-5, high=1.0, prior='log-uniform') hyperparameters_range_dictionary["alpha"] = Real(low=1e-3, high=1.0, prior='uniform') recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) ######################################################################################################### if URM_train_last_test is not None: recommender_input_args_last_test = recommender_input_args.copy() recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[ 0] = URM_train_last_test else: recommender_input_args_last_test = None ## Final step, after the hyperparameter range has been defined for each type of algorithm parameterSearch.search( recommender_input_args, parameter_search_space=hyperparameters_range_dictionary, n_cases=n_cases, n_random_starts=n_random_starts, resume_from_saved=resume_from_saved, save_model=save_model, output_folder_path=output_folder_path, output_file_name_root=output_file_name_root, metric_to_optimize=metric_to_optimize, recommender_input_args_last_test=recommender_input_args_last_test) except Exception as e: print("On recommender {} Exception {}".format(recommender_class, str(e))) traceback.print_exc() error_file = open(output_folder_path + "ErrorLog.txt", "a") error_file.write("On recommender {} Exception {}\n".format( recommender_class, str(e))) error_file.close()
def read_data_split_and_search(dataset_name, flag_baselines_tune=False, flag_DL_article_default=False, flag_DL_tune=False, flag_print_results=False): result_folder_path = "result_experiments/{}/{}_{}/".format( CONFERENCE_NAME, ALGORITHM_NAME, dataset_name) if dataset_name == "delicious-hetrec2011": dataset = DeliciousHetrec2011Reader(result_folder_path) elif dataset_name == "delicious-hetrec2011-cold-users": dataset = DeliciousHetrec2011ColdUsersReader(result_folder_path) elif dataset_name == "delicious-hetrec2011-cold-items": dataset = DeliciousHetrec2011ColdItemsReader(result_folder_path) elif dataset_name == "lastfm-hetrec2011": dataset = LastFMHetrec2011Reader(result_folder_path) elif dataset_name == "lastfm-hetrec2011-cold-users": dataset = LastFMHetrec2011ColdUsersReader(result_folder_path) elif dataset_name == "lastfm-hetrec2011-cold-items": dataset = LastFMHetrec2011ColdItemsReader(result_folder_path) else: print("Dataset name not supported, current is {}".format(dataset_name)) return print('Current dataset is: {}'.format(dataset_name)) URM_train = dataset.URM_DICT["URM_train"].copy() URM_validation = dataset.URM_DICT["URM_validation"].copy() URM_test = dataset.URM_DICT["URM_test"].copy() URM_negative = dataset.URM_DICT["URM_negative"].copy() UCM_train = dataset.UCM_DICT["UCM"].copy() ICM_train = dataset.ICM_DICT["ICM"].copy() if dataset_name == "delicious-hetrec2011" or dataset_name == "lastfm-hetrec2011": URM_train_last_test = URM_train + URM_validation # Ensure IMPLICIT data and disjoint test-train split assert_implicit_data([URM_train, URM_validation, URM_test]) assert_disjoint_matrices([URM_train, URM_validation, URM_test]) else: URM_train_last_test = URM_train # Ensure IMPLICIT data and disjoint test-train split assert_implicit_data([URM_train, URM_test]) assert_disjoint_matrices([URM_train, URM_test]) # If directory does not exist, create if not os.path.exists(result_folder_path): os.makedirs(result_folder_path) metric_to_optimize = "MAP" cutoff_list_validation = [5, 10, 20] cutoff_list_test = [5, 10, 20] n_cases = 50 n_random_starts = 15 evaluator_validation = EvaluatorNegativeItemSample( URM_validation, URM_negative, cutoff_list=cutoff_list_validation) evaluator_test = EvaluatorNegativeItemSample(URM_test, URM_negative, cutoff_list=cutoff_list_test) ################################################################################################ ###### ###### DL ALGORITHM ###### if flag_DL_article_default: article_hyperparameters = { "pretrain_samples": 3, "pretrain_batch_size": 200, "pretrain_iterations": 5, "embed_len": 128, "topK": 10, "fliter_theta": 16, "aggre_theta": 64, "batch_size": 400, "samples": 3, "margin": 20, "epochs": 30, "iter_without_att": 5, "directed": False, } # Do not modify earlystopping earlystopping_hyperparameters = { "validation_every_n": 5, "stop_on_validation": False, "lower_validations_allowed": 5, "evaluator_object": evaluator_validation, "validation_metric": metric_to_optimize, } # This is a simple version of the tuning code that is reported below and uses SearchSingleCase # You may use this for a simpler testing # recommender_instance = HERSWrapper(URM_train, UCM_train, ICM_train) # # recommender_instance.fit(**article_hyperparameters, # **earlystopping_hyperparameters) # # evaluator_test.evaluateRecommender(recommender_instance) # Fit the DL model, select the optimal number of epochs and save the result parameterSearch = SearchSingleCase( HERSWrapper, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train, UCM_train, ICM_train], FIT_KEYWORD_ARGS=earlystopping_hyperparameters) if dataset_name == "delicious-hetrec2011" or dataset_name == "lastfm-hetrec2011": recommender_input_args_last_test = recommender_input_args.copy() recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[ 0] = URM_train_last_test parameterSearch.search( recommender_input_args, recommender_input_args_last_test= recommender_input_args_last_test, fit_hyperparameters_values=article_hyperparameters, output_folder_path=result_folder_path, output_file_name_root=HERSWrapper.RECOMMENDER_NAME) else: parameterSearch.search( recommender_input_args, fit_hyperparameters_values=article_hyperparameters, output_folder_path=result_folder_path, output_file_name_root=HERSWrapper.RECOMMENDER_NAME) ################################################################################################ ###### ###### BASELINE ALGORITHMS - Nothing should be modified below this point ###### if flag_baselines_tune: ################################################################################################ ###### Collaborative Baselines collaborative_algorithm_list = [ Random, TopPop, ItemKNNCFRecommender, PureSVDRecommender, SLIM_BPR_Cython, ] # Running hyperparameter tuning of baslines # See if the results are reasonable and comparable to baselines reported in the paper runParameterSearch_Collaborative_partial = partial( runParameterSearch_Collaborative, URM_train=URM_train, URM_train_last_test=URM_train_last_test, metric_to_optimize=metric_to_optimize, evaluator_validation_earlystopping=evaluator_validation, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=result_folder_path, resume_from_saved=True, parallelizeKNN=False, allow_weighting=True, n_cases=n_cases, n_random_starts=n_random_starts) for recommender_class in collaborative_algorithm_list: try: runParameterSearch_Collaborative_partial(recommender_class) except Exception as e: print("On recommender {} Exception {}".format( recommender_class, str(e))) traceback.print_exc() ################################################################################################ ###### Content Baselines for ICM_name, ICM_object in dataset.ICM_DICT.items(): try: runParameterSearch_Content( ItemKNNCBFRecommender, URM_train=URM_train, URM_train_last_test=URM_train_last_test, metric_to_optimize=metric_to_optimize, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=result_folder_path, parallelizeKNN=False, allow_weighting=True, ICM_name=ICM_name, ICM_object=ICM_object.copy(), n_cases=n_cases, n_random_starts=n_random_starts) except Exception as e: print("On CBF recommender for ICM {} Exception {}".format( ICM_name, str(e))) traceback.print_exc() ################################################################################################ ###### Hybrid for ICM_name, ICM_object in dataset.ICM_DICT.items(): try: runParameterSearch_Hybrid( ItemKNN_CFCBF_Hybrid_Recommender, URM_train=URM_train, URM_train_last_test=URM_train_last_test, metric_to_optimize=metric_to_optimize, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=result_folder_path, parallelizeKNN=False, allow_weighting=True, ICM_name=ICM_name, ICM_object=ICM_object.copy(), n_cases=n_cases, n_random_starts=n_random_starts) except Exception as e: print("On recommender {} Exception {}".format( ItemKNN_CFCBF_Hybrid_Recommender, str(e))) traceback.print_exc() ################################################################################################ ###### ###### PRINT RESULTS ###### if flag_print_results: n_validation_users = np.sum(np.ediff1d(URM_test.indptr) >= 1) n_test_users = np.sum(np.ediff1d(URM_test.indptr) >= 1) print_time_statistics_latex_table( result_folder_path=result_folder_path, dataset_name=dataset_name, algorithm_name=ALGORITHM_NAME, other_algorithm_list=[HERSWrapper], KNN_similarity_to_report_list=KNN_similarity_to_report_list, n_validation_users=n_validation_users, n_test_users=n_test_users, n_decimals=2) print_results_latex_table( result_folder_path=result_folder_path, algorithm_name=ALGORITHM_NAME, file_name_suffix="article_metrics_", dataset_name=dataset_name, metrics_to_report_list=["HIT_RATE", "NDCG"], cutoffs_to_report_list=cutoff_list_test, other_algorithm_list=[HERSWrapper], KNN_similarity_to_report_list=KNN_similarity_to_report_list) print_results_latex_table( result_folder_path=result_folder_path, algorithm_name=ALGORITHM_NAME, file_name_suffix="all_metrics_", dataset_name=dataset_name, metrics_to_report_list=[ "PRECISION", "RECALL", "MAP", "MRR", "NDCG", "F1", "HIT_RATE", "ARHR", "NOVELTY", "DIVERSITY_MEAN_INTER_LIST", "DIVERSITY_HERFINDAHL", "COVERAGE_ITEM", "DIVERSITY_GINI", "SHANNON_ENTROPY" ], cutoffs_to_report_list=cutoff_list_validation, other_algorithm_list=[HERSWrapper], KNN_similarity_to_report_list=KNN_similarity_to_report_list)
def read_data_split_and_search(dataset_name, flag_baselines_tune=False, flag_DL_article_default=False, flag_DL_tune=False, flag_print_results=False): from Conferences.KDD.MCRec_our_interface.Movielens100K.Movielens100KReader import Movielens100KReader result_folder_path = "result_experiments/{}/{}_{}/".format( CONFERENCE_NAME, ALGORITHM_NAME, dataset_name) if dataset_name == "movielens100k": dataset = Movielens100KReader(result_folder_path) URM_train = dataset.URM_DICT["URM_train"].copy() URM_validation = dataset.URM_DICT["URM_validation"].copy() URM_test = dataset.URM_DICT["URM_test"].copy() URM_test_negative = dataset.URM_DICT["URM_test_negative"].copy() # Ensure IMPLICIT data and DISJOINT sets assert_implicit_data( [URM_train, URM_validation, URM_test, URM_test_negative]) assert_disjoint_matrices( [URM_train, URM_validation, URM_test, URM_test_negative]) # If directory does not exist, create if not os.path.exists(result_folder_path): os.makedirs(result_folder_path) algorithm_dataset_string = "{}_{}_".format(ALGORITHM_NAME, dataset_name) plot_popularity_bias([URM_train + URM_validation, URM_test], ["URM train", "URM test"], result_folder_path + algorithm_dataset_string + "popularity_plot") save_popularity_statistics([URM_train + URM_validation, URM_test], ["URM train", "URM test"], result_folder_path + algorithm_dataset_string + "popularity_statistics") from Base.Evaluation.Evaluator import EvaluatorNegativeItemSample evaluator_validation = EvaluatorNegativeItemSample(URM_validation, URM_test_negative, cutoff_list=[10]) evaluator_test = EvaluatorNegativeItemSample(URM_test, URM_test_negative, cutoff_list=[10]) collaborative_algorithm_list = [ Random, TopPop, UserKNNCFRecommender, ItemKNNCFRecommender, P3alphaRecommender, RP3betaRecommender, PureSVDRecommender, NMFRecommender, IALSRecommender, MatrixFactorization_BPR_Cython, MatrixFactorization_FunkSVD_Cython, EASE_R_Recommender, SLIM_BPR_Cython, SLIMElasticNetRecommender, ] metric_to_optimize = "PRECISION" n_cases = 50 n_random_starts = 15 runParameterSearch_Collaborative_partial = partial( runParameterSearch_Collaborative, URM_train=URM_train, URM_train_last_test=URM_train + URM_validation, metric_to_optimize=metric_to_optimize, evaluator_validation_earlystopping=evaluator_validation, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=result_folder_path, parallelizeKNN=False, allow_weighting=True, resume_from_saved=True, n_cases=n_cases, n_random_starts=n_random_starts) if flag_baselines_tune: for recommender_class in collaborative_algorithm_list: try: runParameterSearch_Collaborative_partial(recommender_class) except Exception as e: print("On recommender {} Exception {}".format( recommender_class, str(e))) traceback.print_exc() ################################################################################################ ###### Content Baselines for ICM_name, ICM_object in dataset.ICM_DICT.items(): try: runParameterSearch_Content( ItemKNNCBFRecommender, URM_train=URM_train, URM_train_last_test=URM_train + URM_validation, metric_to_optimize=metric_to_optimize, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=result_folder_path, parallelizeKNN=False, allow_weighting=True, resume_from_saved=True, ICM_name=ICM_name, ICM_object=ICM_object.copy(), n_cases=n_cases, n_random_starts=n_random_starts) except Exception as e: print("On CBF recommender for ICM {} Exception {}".format( ICM_name, str(e))) traceback.print_exc() ################################################################################################ ###### Hybrid for ICM_name, ICM_object in dataset.ICM_DICT.items(): try: runParameterSearch_Hybrid( ItemKNN_CFCBF_Hybrid_Recommender, URM_train=URM_train, URM_train_last_test=URM_train + URM_validation, metric_to_optimize=metric_to_optimize, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=result_folder_path, parallelizeKNN=False, allow_weighting=True, resume_from_saved=True, ICM_name=ICM_name, ICM_object=ICM_object.copy(), n_cases=n_cases, n_random_starts=n_random_starts) except Exception as e: print("On recommender {} Exception {}".format( ItemKNN_CFCBF_Hybrid_Recommender, str(e))) traceback.print_exc() ################################################################################################ ###### ###### DL ALGORITHM ###### if flag_DL_article_default: if dataset_name == "movielens100k": """ The code provided by the original authors of MCRec can be used only for the original data. Here I am passing to the Wrapper the URM_train matrix that is only required for its shape, the train will be done using the preprocessed data the original authors provided """ from Conferences.KDD.MCRec_github.code.Dataset import Dataset original_dataset_reader = Dataset( 'Conferences/KDD/MCRec_github/data/' + 'ml-100k') MCRec_article_hyperparameters = { "epochs": 200, "latent_dim": 128, "reg_latent": 0, "layers": [512, 256, 128, 64], "reg_layes": [0, 0, 0, 0], "learning_rate": 1e-3, "batch_size": 256, "num_negatives": 4, } MCRec_earlystopping_hyperparameters = { "validation_every_n": 5, "stop_on_validation": True, "evaluator_object": evaluator_validation, "lower_validations_allowed": 5, "validation_metric": metric_to_optimize } parameterSearch = SearchSingleCase( MCRecML100k_RecommenderWrapper, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[ URM_train, original_dataset_reader ], FIT_KEYWORD_ARGS=MCRec_earlystopping_hyperparameters) recommender_input_args_last_test = recommender_input_args.copy() recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[ 0] = URM_train + URM_validation parameterSearch.search( recommender_input_args, recommender_input_args_last_test= recommender_input_args_last_test, fit_hyperparameters_values=MCRec_article_hyperparameters, output_folder_path=result_folder_path, resume_from_saved=True, output_file_name_root=MCRecML100k_RecommenderWrapper. RECOMMENDER_NAME) ################################################################################################ ###### ###### PRINT RESULTS ###### if flag_print_results: n_test_users = np.sum(np.ediff1d(URM_test.indptr) >= 1) file_name = "{}..//{}_{}_".format(result_folder_path, ALGORITHM_NAME, dataset_name) ICM_names_to_report_list = list(dataset.ICM_DICT.keys()) result_loader = ResultFolderLoader( result_folder_path, base_algorithm_list=None, other_algorithm_list=[MCRecML100k_RecommenderWrapper], KNN_similarity_list=KNN_similarity_to_report_list, ICM_names_list=ICM_names_to_report_list, UCM_names_list=None) result_loader.generate_latex_results( file_name + "{}_latex_results.txt".format("article_metrics"), metrics_list=["PRECISION", "RECALL", "NDCG"], cutoffs_list=[10], table_title=None, highlight_best=True) result_loader.generate_latex_results( file_name + "{}_latex_results.txt".format("all_metrics"), metrics_list=[ "PRECISION", "RECALL", "MAP", "MRR", "NDCG", "F1", "HIT_RATE", "ARHR", "NOVELTY", "DIVERSITY_MEAN_INTER_LIST", "DIVERSITY_HERFINDAHL", "COVERAGE_ITEM", "DIVERSITY_GINI", "SHANNON_ENTROPY" ], cutoffs_list=[10], table_title=None, highlight_best=True) result_loader.generate_latex_time_statistics( file_name + "{}_latex_results.txt".format("time"), n_evaluation_users=n_test_users, table_title=None)
def runParameterSearch_Collaborative(recommender_class, URM_train, URM_train_last_test = None, n_cases = 35, n_random_starts = 5, resume_from_saved = False, save_model = "best", evaluate_on_test = "best", evaluator_validation = None, evaluator_test = None, evaluator_validation_earlystopping = None, metric_to_optimize = "PRECISION", output_folder_path ="result_experiments/", parallelizeKNN = True, allow_weighting = True,similarity_type_list = None): """ This function performs the hyperparameter optimization for a collaborative recommender :param recommender_class: Class of the recommender object to optimize, it must be a BaseRecommender type :param URM_train: Sparse matrix containing the URM training data :param URM_train_last_test: Sparse matrix containing the union of URM training and validation data to be used in the last evaluation :param n_cases: Number of hyperparameter sets to explore :param n_random_starts: Number of the initial random hyperparameter values to explore, usually set at 30% of n_cases :param resume_from_saved: Boolean value, if True the optimization is resumed from the saved files, if False a new one is done :param save_model: ["no", "best", "last"] which of the models to save, see ParameterTuning/SearchAbstractClass for details :param evaluate_on_test: ["all", "best", "last", "no"] when to evaluate the model on the test data, see ParameterTuning/SearchAbstractClass for details :param evaluator_validation: Evaluator object to be used for the validation of each hyperparameter set :param evaluator_validation_earlystopping: Evaluator object to be used for the earlystopping of ML algorithms, can be the same of evaluator_validation :param evaluator_test: Evaluator object to be used for the test results, the output will only be saved but not used :param metric_to_optimize: String with the name of the metric to be optimized as contained in the output of the evaluator objects :param output_folder_path: Folder in which to save the output files :param parallelizeKNN: Boolean value, if True the various heuristics of the KNNs will be computed in parallel, if False sequentially :param allow_weighting: Boolean value, if True it enables the use of TF-IDF and BM25 to weight features, users and items in KNNs :param similarity_type_list: List of strings with the similarity heuristics to be used for the KNNs """ # If directory does not exist, create if not os.path.exists(output_folder_path): os.makedirs(output_folder_path) earlystopping_keywargs = {"validation_every_n": 5, "stop_on_validation": True, "evaluator_object": evaluator_validation_earlystopping, "lower_validations_allowed": 5, "validation_metric": metric_to_optimize, } URM_train = URM_train.copy() if URM_train_last_test is not None: URM_train_last_test = URM_train_last_test.copy() try: output_file_name_root = recommender_class.RECOMMENDER_NAME parameterSearch = SearchBayesianSkopt(recommender_class, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) if recommender_class in [TopPop, GlobalEffects, Random]: """ TopPop, GlobalEffects and Random have no parameters therefore only one evaluation is needed """ parameterSearch = SearchSingleCase(recommender_class, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS = [URM_train], CONSTRUCTOR_KEYWORD_ARGS = {}, FIT_POSITIONAL_ARGS = [], FIT_KEYWORD_ARGS = {} ) if URM_train_last_test is not None: recommender_input_args_last_test = recommender_input_args.copy() recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[0] = URM_train_last_test else: recommender_input_args_last_test = None parameterSearch.search(recommender_input_args, recommender_input_args_last_test = recommender_input_args_last_test, fit_hyperparameters_values={}, output_folder_path = output_folder_path, output_file_name_root = output_file_name_root, resume_from_saved = resume_from_saved, save_model = save_model, evaluate_on_test = evaluate_on_test, ) return ########################################################################################################## if recommender_class in [ItemKNNCFRecommender, UserKNNCFRecommender]: if similarity_type_list is None: similarity_type_list = ['cosine', 'jaccard', "asymmetric", "dice", "tversky"] recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS = [URM_train], CONSTRUCTOR_KEYWORD_ARGS = {}, FIT_POSITIONAL_ARGS = [], FIT_KEYWORD_ARGS = {} ) if URM_train_last_test is not None: recommender_input_args_last_test = recommender_input_args.copy() recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[0] = URM_train_last_test else: recommender_input_args_last_test = None run_KNNCFRecommender_on_similarity_type_partial = partial(run_KNNRecommender_on_similarity_type, recommender_input_args = recommender_input_args, parameter_search_space = {}, parameterSearch = parameterSearch, n_cases = n_cases, n_random_starts = n_random_starts, resume_from_saved = resume_from_saved, save_model = save_model, evaluate_on_test = evaluate_on_test, output_folder_path = output_folder_path, output_file_name_root = output_file_name_root, metric_to_optimize = metric_to_optimize, allow_weighting = allow_weighting, recommender_input_args_last_test = recommender_input_args_last_test) if parallelizeKNN: pool = multiprocessing.Pool(processes=multiprocessing.cpu_count(), maxtasksperchild=1) pool.map(run_KNNCFRecommender_on_similarity_type_partial, similarity_type_list) pool.close() pool.join() else: for similarity_type in similarity_type_list: run_KNNCFRecommender_on_similarity_type_partial(similarity_type) return ########################################################################################################## if recommender_class is P3alphaRecommender: hyperparameters_range_dictionary = { "topK": Integer(5, 1000), "alpha": Real(low = 0, high = 2, prior = 'uniform'), "normalize_similarity": Categorical([True, False]), } recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS = [URM_train], CONSTRUCTOR_KEYWORD_ARGS = {}, FIT_POSITIONAL_ARGS = [], FIT_KEYWORD_ARGS = {} ) ########################################################################################################## if recommender_class is RP3betaRecommender: hyperparameters_range_dictionary = { "topK": Integer(5, 1000), "alpha": Real(low = 0, high = 2, prior = 'uniform'), "beta": Real(low = 0, high = 2, prior = 'uniform'), "normalize_similarity": Categorical([True, False]), } recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS = [URM_train], CONSTRUCTOR_KEYWORD_ARGS = {}, FIT_POSITIONAL_ARGS = [], FIT_KEYWORD_ARGS = {} ) ########################################################################################################## if recommender_class is MatrixFactorization_FunkSVD_Cython: hyperparameters_range_dictionary = { "sgd_mode": Categorical(["sgd", "adagrad", "adam"]), "epochs": Categorical([500]), "use_bias": Categorical([True, False]), "batch_size": Categorical([1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024]), "num_factors": Integer(1, 200), "item_reg": Real(low = 1e-5, high = 1e-2, prior = 'log-uniform'), "user_reg": Real(low = 1e-5, high = 1e-2, prior = 'log-uniform'), "learning_rate": Real(low = 1e-4, high = 1e-1, prior = 'log-uniform'), "negative_interactions_quota": Real(low = 0.0, high = 0.5, prior = 'uniform'), } recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS = [URM_train], CONSTRUCTOR_KEYWORD_ARGS = {}, FIT_POSITIONAL_ARGS = [], FIT_KEYWORD_ARGS = earlystopping_keywargs ) ########################################################################################################## if recommender_class is MatrixFactorization_AsySVD_Cython: hyperparameters_range_dictionary = { "sgd_mode": Categorical(["sgd", "adagrad", "adam"]), "epochs": Categorical([500]), "use_bias": Categorical([True, False]), "batch_size": Categorical([1]), "num_factors": Integer(1, 200), "item_reg": Real(low = 1e-5, high = 1e-2, prior = 'log-uniform'), "user_reg": Real(low = 1e-5, high = 1e-2, prior = 'log-uniform'), "learning_rate": Real(low = 1e-4, high = 1e-1, prior = 'log-uniform'), "negative_interactions_quota": Real(low = 0.0, high = 0.5, prior = 'uniform'), } recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS = [URM_train], CONSTRUCTOR_KEYWORD_ARGS = {}, FIT_POSITIONAL_ARGS = [], FIT_KEYWORD_ARGS = earlystopping_keywargs ) ########################################################################################################## if recommender_class is MatrixFactorization_BPR_Cython: hyperparameters_range_dictionary = { "sgd_mode": Categorical(["sgd", "adagrad", "adam"]), "epochs": Categorical([1500]), "num_factors": Integer(1, 200), "batch_size": Categorical([1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024]), "positive_reg": Real(low = 1e-5, high = 1e-2, prior = 'log-uniform'), "negative_reg": Real(low = 1e-5, high = 1e-2, prior = 'log-uniform'), "learning_rate": Real(low = 1e-4, high = 1e-1, prior = 'log-uniform'), } recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS = [URM_train], CONSTRUCTOR_KEYWORD_ARGS = {}, FIT_POSITIONAL_ARGS = [], FIT_KEYWORD_ARGS = {**earlystopping_keywargs, "positive_threshold_BPR": None} ) ########################################################################################################## if recommender_class is IALSRecommender: hyperparameters_range_dictionary = { "num_factors": Integer(1, 200), "epochs": Categorical([300]), "confidence_scaling": Categorical(["linear", "log"]), "alpha": Real(low = 1e-3, high = 50.0, prior = 'log-uniform'), "epsilon": Real(low = 1e-3, high = 10.0, prior = 'log-uniform'), "reg": Real(low = 1e-5, high = 1e-2, prior = 'log-uniform'), } recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS = [URM_train], CONSTRUCTOR_KEYWORD_ARGS = {}, FIT_POSITIONAL_ARGS = [], FIT_KEYWORD_ARGS = earlystopping_keywargs ) ########################################################################################################## if recommender_class is PureSVDRecommender: hyperparameters_range_dictionary = { "num_factors": Integer(1, 350), } recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS = [URM_train], CONSTRUCTOR_KEYWORD_ARGS = {}, FIT_POSITIONAL_ARGS = [], FIT_KEYWORD_ARGS = {} ) ########################################################################################################## if recommender_class is NMFRecommender: hyperparameters_range_dictionary = { "num_factors": Integer(1, 350), "solver": Categorical(["coordinate_descent", "multiplicative_update"]), "init_type": Categorical(["random", "nndsvda"]), "beta_loss": Categorical(["frobenius", "kullback-leibler"]), } recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS = [URM_train], CONSTRUCTOR_KEYWORD_ARGS = {}, FIT_POSITIONAL_ARGS = [], FIT_KEYWORD_ARGS = {} ) ######################################################################################################### if recommender_class is SLIM_BPR_Cython: hyperparameters_range_dictionary = { "topK": Integer(5, 1000), "epochs": Categorical([1500]), "symmetric": Categorical([True, False]), "sgd_mode": Categorical(["sgd", "adagrad", "adam"]), "lambda_i": Real(low = 1e-5, high = 1e-2, prior = 'log-uniform'), "lambda_j": Real(low = 1e-5, high = 1e-2, prior = 'log-uniform'), "learning_rate": Real(low = 1e-4, high = 1e-1, prior = 'log-uniform'), } recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS = [URM_train], CONSTRUCTOR_KEYWORD_ARGS = {}, FIT_POSITIONAL_ARGS = [], FIT_KEYWORD_ARGS = {**earlystopping_keywargs, "positive_threshold_BPR": None, 'train_with_sparse_weights': None} ) ########################################################################################################## if recommender_class is SLIMElasticNetRecommender: hyperparameters_range_dictionary = { "topK": Integer(5, 1000), "l1_ratio": Real(low = 1e-5, high = 1.0, prior = 'log-uniform'), "alpha": Real(low = 1e-3, high = 1.0, prior = 'uniform'), } recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS = [URM_train], CONSTRUCTOR_KEYWORD_ARGS = {}, FIT_POSITIONAL_ARGS = [], FIT_KEYWORD_ARGS = {} ) ######################################################################################################### if recommender_class is EASE_R_Recommender: hyperparameters_range_dictionary = { "topK": Categorical([None]), "normalize_matrix": Categorical([False]), "l2_norm": Real(low = 1e0, high = 1e7, prior = 'log-uniform'), } recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS = [URM_train], CONSTRUCTOR_KEYWORD_ARGS = {}, FIT_POSITIONAL_ARGS = [], FIT_KEYWORD_ARGS = {} ) ######################################################################################################### if URM_train_last_test is not None: recommender_input_args_last_test = recommender_input_args.copy() recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[0] = URM_train_last_test else: recommender_input_args_last_test = None ## Final step, after the hyperparameter range has been defined for each type of algorithm parameterSearch.search(recommender_input_args, parameter_search_space = hyperparameters_range_dictionary, n_cases = n_cases, n_random_starts = n_random_starts, resume_from_saved = resume_from_saved, save_model = save_model, evaluate_on_test = evaluate_on_test, output_folder_path = output_folder_path, output_file_name_root = output_file_name_root, metric_to_optimize = metric_to_optimize, recommender_input_args_last_test = recommender_input_args_last_test) except Exception as e: print("On recommender {} Exception {}".format(recommender_class, str(e))) traceback.print_exc() error_file = open(output_folder_path + "ErrorLog.txt", "a") error_file.write("On recommender {} Exception {}\n".format(recommender_class, str(e))) error_file.close()