def print_results(urm_test_split: csr_matrix): urm_test = urm_test_split.copy() n_test_users = np.sum(np.ediff1d(urm_test.indptr) >= 1) result_loader = ResultFolderLoader(EXPERIMENTS_FOLDER_PATH, base_algorithm_list=None, other_algorithm_list=None, KNN_similarity_list=KNN_SIMILARITY_LIST, ICM_names_list=None, UCM_names_list=None) article_metrics_latex_results_filename = os.path.join(RESULTS_EXPORT_FOLDER_PATH, "article_metrics_latex_results.txt") result_loader.generate_latex_results(article_metrics_latex_results_filename, metrics_list=["RECALL", "MAP"], cutoffs_list=METRICS_CUTOFF_TO_REPORT_LIST, table_title=None, highlight_best=True) beyond_accuracy_metrics_latex_results_filename = os.path.join(RESULTS_EXPORT_FOLDER_PATH, "beyond_accuracy_metrics_latex_results.txt") result_loader.generate_latex_results(beyond_accuracy_metrics_latex_results_filename, metrics_list=["DIVERSITY_MEAN_INTER_LIST", "DIVERSITY_HERFINDAHL", "COVERAGE_ITEM", "DIVERSITY_GINI", "SHANNON_ENTROPY"], cutoffs_list=OTHERS_CUTOFF_TO_REPORT_LIST, table_title=None, highlight_best=True) all_metrics_latex_results_filename = os.path.join(RESULTS_EXPORT_FOLDER_PATH, "all_metrics_latex_results.txt") result_loader.generate_latex_results(all_metrics_latex_results_filename, metrics_list=["PRECISION", "RECALL", "MAP", "MRR", "NDCG", "F1", "HIT_RATE", "ARHR", "NOVELTY", "DIVERSITY_MEAN_INTER_LIST", "DIVERSITY_HERFINDAHL", "COVERAGE_ITEM", "DIVERSITY_GINI", "SHANNON_ENTROPY"], cutoffs_list=OTHERS_CUTOFF_TO_REPORT_LIST, table_title=None, highlight_best=True) time_latex_results_filename = os.path.join(RESULTS_EXPORT_FOLDER_PATH, "time_latex_results.txt") result_loader.generate_latex_time_statistics(time_latex_results_filename, n_evaluation_users=n_test_users, table_title=None)
def read_data_split_and_search(dataset_name, flag_baselines_tune=False, flag_DL_article_default=False, flag_DL_tune=False, flag_print_results=False): from Conferences.KDD.MCRec_our_interface.Movielens100K.Movielens100KReader import Movielens100KReader result_folder_path = "result_experiments/{}/{}_{}/".format( CONFERENCE_NAME, ALGORITHM_NAME, dataset_name) if dataset_name == "movielens100k": dataset = Movielens100KReader(result_folder_path) URM_train = dataset.URM_DICT["URM_train"].copy() URM_validation = dataset.URM_DICT["URM_validation"].copy() URM_test = dataset.URM_DICT["URM_test"].copy() URM_test_negative = dataset.URM_DICT["URM_test_negative"].copy() # Ensure IMPLICIT data and DISJOINT sets assert_implicit_data( [URM_train, URM_validation, URM_test, URM_test_negative]) assert_disjoint_matrices( [URM_train, URM_validation, URM_test, URM_test_negative]) # If directory does not exist, create if not os.path.exists(result_folder_path): os.makedirs(result_folder_path) algorithm_dataset_string = "{}_{}_".format(ALGORITHM_NAME, dataset_name) plot_popularity_bias([URM_train + URM_validation, URM_test], ["URM train", "URM test"], result_folder_path + algorithm_dataset_string + "popularity_plot") save_popularity_statistics([URM_train + URM_validation, URM_test], ["URM train", "URM test"], result_folder_path + algorithm_dataset_string + "popularity_statistics") from Base.Evaluation.Evaluator import EvaluatorNegativeItemSample evaluator_validation = EvaluatorNegativeItemSample(URM_validation, URM_test_negative, cutoff_list=[10]) evaluator_test = EvaluatorNegativeItemSample(URM_test, URM_test_negative, cutoff_list=[10]) collaborative_algorithm_list = [ Random, TopPop, UserKNNCFRecommender, ItemKNNCFRecommender, P3alphaRecommender, RP3betaRecommender, PureSVDRecommender, NMFRecommender, IALSRecommender, MatrixFactorization_BPR_Cython, MatrixFactorization_FunkSVD_Cython, EASE_R_Recommender, SLIM_BPR_Cython, SLIMElasticNetRecommender, ] metric_to_optimize = "PRECISION" n_cases = 50 n_random_starts = 15 runParameterSearch_Collaborative_partial = partial( runParameterSearch_Collaborative, URM_train=URM_train, URM_train_last_test=URM_train + URM_validation, metric_to_optimize=metric_to_optimize, evaluator_validation_earlystopping=evaluator_validation, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=result_folder_path, parallelizeKNN=False, allow_weighting=True, resume_from_saved=True, n_cases=n_cases, n_random_starts=n_random_starts) if flag_baselines_tune: for recommender_class in collaborative_algorithm_list: try: runParameterSearch_Collaborative_partial(recommender_class) except Exception as e: print("On recommender {} Exception {}".format( recommender_class, str(e))) traceback.print_exc() ################################################################################################ ###### Content Baselines for ICM_name, ICM_object in dataset.ICM_DICT.items(): try: runParameterSearch_Content( ItemKNNCBFRecommender, URM_train=URM_train, URM_train_last_test=URM_train + URM_validation, metric_to_optimize=metric_to_optimize, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=result_folder_path, parallelizeKNN=False, allow_weighting=True, resume_from_saved=True, ICM_name=ICM_name, ICM_object=ICM_object.copy(), n_cases=n_cases, n_random_starts=n_random_starts) except Exception as e: print("On CBF recommender for ICM {} Exception {}".format( ICM_name, str(e))) traceback.print_exc() ################################################################################################ ###### Hybrid for ICM_name, ICM_object in dataset.ICM_DICT.items(): try: runParameterSearch_Hybrid( ItemKNN_CFCBF_Hybrid_Recommender, URM_train=URM_train, URM_train_last_test=URM_train + URM_validation, metric_to_optimize=metric_to_optimize, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=result_folder_path, parallelizeKNN=False, allow_weighting=True, resume_from_saved=True, ICM_name=ICM_name, ICM_object=ICM_object.copy(), n_cases=n_cases, n_random_starts=n_random_starts) except Exception as e: print("On recommender {} Exception {}".format( ItemKNN_CFCBF_Hybrid_Recommender, str(e))) traceback.print_exc() ################################################################################################ ###### ###### DL ALGORITHM ###### if flag_DL_article_default: if dataset_name == "movielens100k": """ The code provided by the original authors of MCRec can be used only for the original data. Here I am passing to the Wrapper the URM_train matrix that is only required for its shape, the train will be done using the preprocessed data the original authors provided """ from Conferences.KDD.MCRec_github.code.Dataset import Dataset original_dataset_reader = Dataset( 'Conferences/KDD/MCRec_github/data/' + 'ml-100k') MCRec_article_hyperparameters = { "epochs": 200, "latent_dim": 128, "reg_latent": 0, "layers": [512, 256, 128, 64], "reg_layes": [0, 0, 0, 0], "learning_rate": 1e-3, "batch_size": 256, "num_negatives": 4, } MCRec_earlystopping_hyperparameters = { "validation_every_n": 5, "stop_on_validation": True, "evaluator_object": evaluator_validation, "lower_validations_allowed": 5, "validation_metric": metric_to_optimize } parameterSearch = SearchSingleCase( MCRecML100k_RecommenderWrapper, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[ URM_train, original_dataset_reader ], FIT_KEYWORD_ARGS=MCRec_earlystopping_hyperparameters) recommender_input_args_last_test = recommender_input_args.copy() recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[ 0] = URM_train + URM_validation parameterSearch.search( recommender_input_args, recommender_input_args_last_test= recommender_input_args_last_test, fit_hyperparameters_values=MCRec_article_hyperparameters, output_folder_path=result_folder_path, resume_from_saved=True, output_file_name_root=MCRecML100k_RecommenderWrapper. RECOMMENDER_NAME) ################################################################################################ ###### ###### PRINT RESULTS ###### if flag_print_results: n_test_users = np.sum(np.ediff1d(URM_test.indptr) >= 1) file_name = "{}..//{}_{}_".format(result_folder_path, ALGORITHM_NAME, dataset_name) ICM_names_to_report_list = list(dataset.ICM_DICT.keys()) result_loader = ResultFolderLoader( result_folder_path, base_algorithm_list=None, other_algorithm_list=[MCRecML100k_RecommenderWrapper], KNN_similarity_list=KNN_similarity_to_report_list, ICM_names_list=ICM_names_to_report_list, UCM_names_list=None) result_loader.generate_latex_results( file_name + "{}_latex_results.txt".format("article_metrics"), metrics_list=["PRECISION", "RECALL", "NDCG"], cutoffs_list=[10], table_title=None, highlight_best=True) result_loader.generate_latex_results( file_name + "{}_latex_results.txt".format("all_metrics"), metrics_list=[ "PRECISION", "RECALL", "MAP", "MRR", "NDCG", "F1", "HIT_RATE", "ARHR", "NOVELTY", "DIVERSITY_MEAN_INTER_LIST", "DIVERSITY_HERFINDAHL", "COVERAGE_ITEM", "DIVERSITY_GINI", "SHANNON_ENTROPY" ], cutoffs_list=[10], table_title=None, highlight_best=True) result_loader.generate_latex_time_statistics( file_name + "{}_latex_results.txt".format("time"), n_evaluation_users=n_test_users, table_title=None)
def read_data_split_and_search(dataset_name, flag_baselines_tune = False, flag_DL_article_default = False, flag_MF_baselines_tune = False, flag_DL_tune = False, flag_print_results = False): from Conferences.WWW.MultiVAE_our_interface.Movielens20M.Movielens20MReader import Movielens20MReader from Conferences.WWW.MultiVAE_our_interface.NetflixPrize.NetflixPrizeReader import NetflixPrizeReader split_type = "cold_user" result_folder_path = "result_experiments/{}/{}_{}_{}/".format(CONFERENCE_NAME, ALGORITHM_NAME, dataset_name, split_type) if dataset_name == "movielens20m": dataset = Movielens20MReader(result_folder_path, split_type = split_type) elif dataset_name == "netflixPrize": dataset = NetflixPrizeReader(result_folder_path) # If directory does not exist, create if not os.path.exists(result_folder_path): os.makedirs(result_folder_path) metric_to_optimize = "NDCG" n_cases = 50 n_random_starts = 15 if split_type == "cold_user": collaborative_algorithm_list = [ Random, TopPop, # UserKNNCFRecommender, ItemKNNCFRecommender, P3alphaRecommender, RP3betaRecommender, # PureSVDRecommender, # IALSRecommender, # NMFRecommender, # MatrixFactorization_BPR_Cython, # MatrixFactorization_FunkSVD_Cython, EASE_R_Recommender, SLIM_BPR_Cython, SLIMElasticNetRecommender, ] URM_train = dataset.URM_DICT["URM_train"].copy() URM_train_all = dataset.URM_DICT["URM_train_all"].copy() URM_validation = dataset.URM_DICT["URM_validation"].copy() URM_test = dataset.URM_DICT["URM_test"].copy() # Ensure IMPLICIT data and DISJOINT sets assert_implicit_data([URM_train, URM_train_all, URM_validation, URM_test]) assert_disjoint_matrices([URM_train, URM_validation, URM_test]) assert_disjoint_matrices([URM_train_all, URM_validation, URM_test]) from Base.Evaluation.Evaluator import EvaluatorHoldout evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[100]) evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[20, 50, 100]) evaluator_validation = EvaluatorUserSubsetWrapper(evaluator_validation, URM_train_all) evaluator_test = EvaluatorUserSubsetWrapper(evaluator_test, URM_train_all) runParameterSearch_Collaborative_partial = partial(runParameterSearch_Collaborative, URM_train = URM_train, URM_train_last_test = URM_train + URM_validation, metric_to_optimize = metric_to_optimize, evaluator_validation_earlystopping = evaluator_validation, evaluator_validation = evaluator_validation, evaluator_test = evaluator_test, output_folder_path = result_folder_path, parallelizeKNN = False, allow_weighting = True, resume_from_saved = True, n_cases = n_cases, n_random_starts = n_random_starts) if flag_baselines_tune: for recommender_class in collaborative_algorithm_list: try: runParameterSearch_Collaborative_partial(recommender_class) except Exception as e: print("On recommender {} Exception {}".format(recommender_class, str(e))) traceback.print_exc() ################################################################################################ ###### Matrix Factorization Cold users collaborative_MF_algorithm_list = [ PureSVDRecommender, IALSRecommender, NMFRecommender, MatrixFactorization_BPR_Cython, MatrixFactorization_FunkSVD_Cython, ] runParameterSearch_cold_user_MF_partial = partial(runParameterSearch_cold_user_MF, URM_train = URM_train, URM_train_last_test = URM_train + URM_validation, metric_to_optimize = metric_to_optimize, evaluator_validation_earlystopping = evaluator_validation, evaluator_validation = evaluator_validation, evaluator_test = evaluator_test, output_folder_path = result_folder_path, resume_from_saved = True, n_cases = n_cases, n_random_starts = n_random_starts) if flag_MF_baselines_tune: for recommender_class in collaborative_MF_algorithm_list: try: runParameterSearch_cold_user_MF_partial(recommender_class) except Exception as e: print("On recommender {} Exception {}".format(recommender_class, str(e))) traceback.print_exc() ################################################################################################ ###### ###### DL ALGORITHM ###### if flag_DL_article_default: try: if dataset_name == "movielens20m": epochs = 100 elif dataset_name == "netflixPrize": epochs = 200 multiVAE_article_hyperparameters = { "epochs": epochs, "batch_size": 500, "total_anneal_steps": 200000, "p_dims": None, } multiVAE_earlystopping_hyperparameters = { "validation_every_n": 5, "stop_on_validation": True, "evaluator_object": evaluator_validation, "lower_validations_allowed": 5, "validation_metric": metric_to_optimize, } parameterSearch = SearchSingleCase(Mult_VAE_RecommenderWrapper, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS = [URM_train], FIT_KEYWORD_ARGS = multiVAE_earlystopping_hyperparameters) recommender_input_args_last_test = recommender_input_args.copy() recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[0] = URM_train + URM_validation parameterSearch.search(recommender_input_args, recommender_input_args_last_test = recommender_input_args_last_test, fit_hyperparameters_values=multiVAE_article_hyperparameters, output_folder_path = result_folder_path, resume_from_saved = True, output_file_name_root = Mult_VAE_RecommenderWrapper.RECOMMENDER_NAME) except Exception as e: print("On recommender {} Exception {}".format(Mult_VAE_RecommenderWrapper, str(e))) traceback.print_exc() ################################################################################################ ###### ###### PRINT RESULTS ###### if flag_print_results: n_test_users = np.sum(np.ediff1d(URM_test.indptr)>=1) file_name = "{}..//{}_{}_".format(result_folder_path, ALGORITHM_NAME, dataset_name) result_loader = ResultFolderLoader(result_folder_path, base_algorithm_list = None, other_algorithm_list = [Mult_VAE_RecommenderWrapper], KNN_similarity_list = KNN_similarity_to_report_list, ICM_names_list = None, UCM_names_list = None) result_loader.generate_latex_results(file_name + "{}_latex_results.txt".format("article_metrics"), metrics_list = ["RECALL", "NDCG"], cutoffs_list = [20, 50, 100], table_title = None, highlight_best = True) result_loader.generate_latex_results(file_name + "{}_latex_results.txt".format("all_metrics"), metrics_list = ["PRECISION", "RECALL", "MAP_MIN_DEN", "MRR", "NDCG", "F1", "HIT_RATE", "ARHR_ALL_HITS", "NOVELTY", "DIVERSITY_MEAN_INTER_LIST", "DIVERSITY_HERFINDAHL", "COVERAGE_ITEM", "DIVERSITY_GINI", "SHANNON_ENTROPY"], cutoffs_list = [50], table_title = None, highlight_best = True) result_loader.generate_latex_time_statistics(file_name + "{}_latex_results.txt".format("time"), n_evaluation_users=n_test_users, table_title = None)
def read_data_split_and_search(dataset_variant, train_interactions, flag_baselines_tune=False, flag_DL_article_default=False, flag_DL_tune=False, flag_print_results=False): # Using dataReader from CollaborativeVAE_our_interface as they use the same data in the same way from Conferences.KDD.CollaborativeVAE_our_interface.Citeulike.CiteulikeReader import CiteulikeReader result_folder_path = "result_experiments/{}/{}_citeulike_{}_{}/".format( CONFERENCE_NAME, ALGORITHM_NAME, dataset_variant, train_interactions) result_folder_path_CollaborativeVAE = "result_experiments/{}/{}_citeulike_{}_{}/".format( CONFERENCE_NAME, "CollaborativeVAE", dataset_variant, train_interactions) dataset = CiteulikeReader(result_folder_path_CollaborativeVAE, dataset_variant=dataset_variant, train_interactions=train_interactions) URM_train = dataset.URM_DICT["URM_train"].copy() URM_validation = dataset.URM_DICT["URM_validation"].copy() URM_test = dataset.URM_DICT["URM_test"].copy() # Ensure IMPLICIT data assert_implicit_data([URM_train, URM_validation, URM_test]) # Due to the sparsity of the dataset, choosing an evaluation as subset of the train # While keepning validation interaction in the train set if train_interactions == 1: # In this case the train data will contain validation data to avoid cold users assert_disjoint_matrices([URM_train, URM_test]) assert_disjoint_matrices([URM_validation, URM_test]) exclude_seen_validation = False URM_train_last_test = URM_train else: assert_disjoint_matrices([URM_train, URM_validation, URM_test]) exclude_seen_validation = True URM_train_last_test = URM_train + URM_validation assert_implicit_data([URM_train_last_test]) # If directory does not exist, create if not os.path.exists(result_folder_path): os.makedirs(result_folder_path) from Base.Evaluation.Evaluator import EvaluatorHoldout evaluator_validation = EvaluatorHoldout( URM_validation, cutoff_list=[150], exclude_seen=exclude_seen_validation) evaluator_test = EvaluatorHoldout( URM_test, cutoff_list=[50, 100, 150, 200, 250, 300]) ################################################################################################ ###### ###### DL ALGORITHM ###### if flag_DL_article_default: try: collaborativeDL_article_hyperparameters = { "para_lv": 10, "para_lu": 1, "para_ln": 1e3, "batch_size": 128, "epoch_sdae": 200, "epoch_dae": 200, } parameterSearch = SearchSingleCase( CollaborativeDL_Matlab_RecommenderWrapper, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[ URM_train, dataset.ICM_DICT["ICM_tokens_TFIDF"] ], FIT_KEYWORD_ARGS={}) recommender_input_args_last_test = recommender_input_args.copy() recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[ 0] = URM_train_last_test parameterSearch.search( recommender_input_args, recommender_input_args_last_test= recommender_input_args_last_test, fit_hyperparameters_values= collaborativeDL_article_hyperparameters, output_folder_path=result_folder_path, resume_from_saved=True, output_file_name_root=CollaborativeDL_Matlab_RecommenderWrapper .RECOMMENDER_NAME) except Exception as e: print("On recommender {} Exception {}".format( CollaborativeDL_Matlab_RecommenderWrapper, str(e))) traceback.print_exc() ################################################################################################ ###### ###### PRINT RESULTS ###### if flag_print_results: n_test_users = np.sum(np.ediff1d(URM_test.indptr) >= 1) ICM_names_to_report_list = list(dataset.ICM_DICT.keys()) dataset_name = "{}_{}".format(dataset_variant, train_interactions) file_name = "{}..//{}_{}_".format(result_folder_path, ALGORITHM_NAME, dataset_name) result_loader = ResultFolderLoader( result_folder_path, base_algorithm_list=None, other_algorithm_list=[CollaborativeDL_Matlab_RecommenderWrapper], KNN_similarity_list=KNN_similarity_to_report_list, ICM_names_list=ICM_names_to_report_list, UCM_names_list=None) result_loader.generate_latex_results( file_name + "{}_latex_results.txt".format("article_metrics"), metrics_list=["RECALL"], cutoffs_list=[50, 100, 150, 200, 250, 300], table_title=None, highlight_best=True) result_loader.generate_latex_results( file_name + "{}_latex_results.txt".format("all_metrics"), metrics_list=[ "PRECISION", "RECALL", "MAP_MIN_DEN", "MRR", "NDCG", "F1", "HIT_RATE", "ARHR_ALL_HITS", "NOVELTY", "DIVERSITY_MEAN_INTER_LIST", "DIVERSITY_HERFINDAHL", "COVERAGE_ITEM", "DIVERSITY_GINI", "SHANNON_ENTROPY" ], cutoffs_list=[150], table_title=None, highlight_best=True) result_loader.generate_latex_time_statistics( file_name + "{}_latex_results.txt".format("time"), n_evaluation_users=n_test_users, table_title=None)
n_test_users = np.sum(np.ediff1d(URM_test.indptr)>=1) file_name = "{}..//{}_{}_".format(result_baselines_folder_path, ALGORITHM_NAME, input_flags.dataset_name) KNN_similarity_to_report_list = ["cosine", "dice", "jaccard", "asymmetric", "tversky"] # Put results for the CNN algorithm in the baseline folder for it to be subsequently loaded dataIO = DataIO(folder_path = output_folder_path + "fit_ablation_all_map/all_map_0/" ) search_metadata = dataIO.load_data(CoupledCF_RecommenderWrapper.RECOMMENDER_NAME + "_metadata") dataIO = DataIO(folder_path = result_baselines_folder_path) dataIO.save_data(CoupledCF_RecommenderWrapper.RECOMMENDER_NAME + "_metadata", search_metadata) result_loader = ResultFolderLoader(result_baselines_folder_path, base_algorithm_list = None, other_algorithm_list = [CoupledCF_RecommenderWrapper], KNN_similarity_list = KNN_similarity_to_report_list, ICM_names_list = None, UCM_names_list = None) result_loader.generate_latex_results(file_name + "{}_latex_results.txt".format("article_metrics"), metrics_list = ["HIT_RATE", "NDCG"], cutoffs_list = [1, 5, 10], table_title = None, highlight_best = True) result_loader.generate_latex_time_statistics(file_name + "{}_latex_results.txt".format("time"), n_evaluation_users=n_test_users, table_title = None)
def read_data_split_and_search(dataset_name, flag_baselines_tune=False, flag_DL_article_default=False, flag_DL_tune=False, flag_print_results=False): result_folder_path = "result_experiments/IJCAI/CoupledCF_{}/".format( dataset_name) #Logger(path=result_folder_path, name_file='CoupledCF_' + dataset_name) if dataset_name.startswith("movielens1m"): if dataset_name.endswith("_original"): dataset = Movielens1MReader(result_folder_path, type='original') elif dataset_name.endswith("_ours"): dataset = Movielens1MReader(result_folder_path, type='ours') else: print("Dataset name not supported, current is {}".format( dataset_name)) return UCM_to_report = ["UCM_all"] ICM_to_report = ["ICM_all"] UCM_CoupledCF = dataset.ICM_DICT["UCM_all"] ICM_CoupledCF = dataset.ICM_DICT["ICM_all"] elif dataset_name.startswith("tafeng"): if dataset_name.endswith("_original"): dataset = TafengReader(result_folder_path, type='original') elif dataset_name.endswith("_ours"): dataset = TafengReader(result_folder_path, type='ours') else: print("Dataset name not supported, current is {}".format( dataset_name)) return UCM_to_report = ["UCM_all"] ICM_to_report = ["ICM_original"] UCM_CoupledCF = dataset.ICM_DICT["UCM_all"] ICM_CoupledCF = dataset.ICM_DICT["ICM_original"] else: print("Dataset name not supported, current is {}".format(dataset_name)) return print('Current dataset is: {}'.format(dataset_name)) UCM_dict = { UCM_name: UCM_object for (UCM_name, UCM_object) in dataset.ICM_DICT.items() if "UCM" in UCM_name } ICM_dict = { UCM_name: UCM_object for (UCM_name, UCM_object) in dataset.ICM_DICT.items() if "ICM" in UCM_name } URM_train = dataset.URM_DICT["URM_train"].copy() URM_validation = dataset.URM_DICT["URM_validation"].copy() URM_test = dataset.URM_DICT["URM_test"].copy() URM_test_negative = dataset.URM_DICT["URM_test_negative"].copy() # Matrices are 1-indexed, so remove first row print_negative_items_stats(URM_train[1:], URM_validation[1:], URM_test[1:], URM_test_negative[1:]) # Ensure IMPLICIT data from Utils.assertions_on_data_for_experiments import assert_implicit_data, assert_disjoint_matrices assert_implicit_data( [URM_train, URM_validation, URM_test, URM_test_negative]) assert_disjoint_matrices([URM_train, URM_validation, URM_test]) # If directory does not exist, create if not os.path.exists(result_folder_path): os.makedirs(result_folder_path) collaborative_algorithm_list = [ Random, TopPop, UserKNNCFRecommender, ItemKNNCFRecommender, P3alphaRecommender, RP3betaRecommender, PureSVDRecommender, NMFRecommender, IALSRecommender, MatrixFactorization_BPR_Cython, MatrixFactorization_FunkSVD_Cython, EASE_R_Recommender, SLIM_BPR_Cython, SLIMElasticNetRecommender, ] metric_to_optimize = "NDCG" n_cases = 50 n_random_starts = 15 from Base.Evaluation.Evaluator import EvaluatorNegativeItemSample cutoff_list_validation = [5] cutoff_list_test = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] evaluator_validation = EvaluatorNegativeItemSample( URM_validation, URM_test_negative, cutoff_list=cutoff_list_validation) evaluator_test = EvaluatorNegativeItemSample(URM_test, URM_test_negative, cutoff_list=cutoff_list_test) runParameterSearch_Collaborative_partial = partial( runParameterSearch_Collaborative, URM_train=URM_train, URM_train_last_test=URM_train + URM_validation, metric_to_optimize=metric_to_optimize, evaluator_validation_earlystopping=evaluator_validation, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=result_folder_path, parallelizeKNN=False, allow_weighting=True, resume_from_saved=True, n_cases=n_cases, n_random_starts=n_random_starts) if flag_baselines_tune: for recommender_class in collaborative_algorithm_list: try: runParameterSearch_Collaborative_partial(recommender_class) except Exception as e: print("On recommender {} Exception {}".format( recommender_class, str(e))) traceback.print_exc() ############################################################################################### ##### Item Content Baselines for ICM_name, ICM_object in ICM_dict.items(): try: runParameterSearch_Content( ItemKNNCBFRecommender, URM_train=URM_train, URM_train_last_test=URM_train + URM_validation, metric_to_optimize=metric_to_optimize, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=result_folder_path, parallelizeKNN=False, allow_weighting=True, resume_from_saved=True, ICM_name=ICM_name, ICM_object=ICM_object.copy(), n_cases=n_cases, n_random_starts=n_random_starts) runParameterSearch_Hybrid( ItemKNN_CFCBF_Hybrid_Recommender, URM_train=URM_train, URM_train_last_test=URM_train + URM_validation, metric_to_optimize=metric_to_optimize, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=result_folder_path, parallelizeKNN=False, allow_weighting=True, resume_from_saved=True, ICM_name=ICM_name, ICM_object=ICM_object.copy(), n_cases=n_cases, n_random_starts=n_random_starts) except Exception as e: print("On CBF recommender for ICM {} Exception {}".format( ICM_name, str(e))) traceback.print_exc() ################################################################################################ ###### User Content Baselines for UCM_name, UCM_object in UCM_dict.items(): try: runParameterSearch_Content( UserKNNCBFRecommender, URM_train=URM_train, URM_train_last_test=URM_train + URM_validation, metric_to_optimize=metric_to_optimize, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=result_folder_path, parallelizeKNN=False, allow_weighting=True, resume_from_saved=True, ICM_name=UCM_name, ICM_object=UCM_object.copy(), n_cases=n_cases, n_random_starts=n_random_starts) runParameterSearch_Hybrid( UserKNN_CFCBF_Hybrid_Recommender, URM_train=URM_train, URM_train_last_test=URM_train + URM_validation, metric_to_optimize=metric_to_optimize, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=result_folder_path, parallelizeKNN=False, allow_weighting=True, resume_from_saved=True, ICM_name=UCM_name, ICM_object=UCM_object.copy(), n_cases=n_cases, n_random_starts=n_random_starts) except Exception as e: print("On CBF recommender for UCM {} Exception {}".format( UCM_name, str(e))) traceback.print_exc() ################################################################################################ ###### ###### DL ALGORITHM ###### if flag_DL_article_default: model_name = dataset.DATASET_NAME earlystopping_hyperparameters = { 'validation_every_n': 5, 'stop_on_validation': True, 'lower_validations_allowed': 5, 'evaluator_object': evaluator_validation, 'validation_metric': metric_to_optimize } if 'tafeng' in dataset_name: model_number = 3 article_hyperparameters = { 'learning_rate': 0.005, 'epochs': 100, 'n_negative_sample': 4, 'temp_file_folder': None, 'dataset_name': model_name, 'number_model': model_number, 'verbose': 0, 'plot_model': False, } else: # movielens1m and other dataset model_number = 3 article_hyperparameters = { 'learning_rate': 0.001, 'epochs': 100, 'n_negative_sample': 4, 'temp_file_folder': None, 'dataset_name': model_name, 'number_model': model_number, 'verbose': 0, 'plot_model': False, } parameterSearch = SearchSingleCase( DeepCF_RecommenderWrapper, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], FIT_KEYWORD_ARGS=earlystopping_hyperparameters) recommender_input_args_last_test = recommender_input_args.copy() recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[ 0] = URM_train + URM_validation parameterSearch.search( recommender_input_args, recommender_input_args_last_test=recommender_input_args_last_test, fit_hyperparameters_values=article_hyperparameters, output_folder_path=result_folder_path, resume_from_saved=True, output_file_name_root=DeepCF_RecommenderWrapper.RECOMMENDER_NAME) if 'tafeng' in dataset_name: # tafeng model has a different structure model_number = 2 article_hyperparameters = { 'learning_rate': 0.005, 'epochs': 100, 'n_negative_sample': 4, 'temp_file_folder': None, 'dataset_name': "Tafeng", 'number_model': model_number, 'verbose': 0, 'plot_model': False, } else: # movielens1m use this tructure with model 2 model_number = 2 article_hyperparameters = { 'learning_rate': 0.001, 'epochs': 100, 'n_negative_sample': 4, 'temp_file_folder': None, 'dataset_name': "Movielens1M", 'number_model': model_number, 'verbose': 0, 'plot_model': False, } parameterSearch = SearchSingleCase( CoupledCF_RecommenderWrapper, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[ URM_train, UCM_CoupledCF, ICM_CoupledCF ], FIT_KEYWORD_ARGS=earlystopping_hyperparameters) recommender_input_args_last_test = recommender_input_args.copy() recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[ 0] = URM_train + URM_validation parameterSearch.search( recommender_input_args, recommender_input_args_last_test=recommender_input_args_last_test, fit_hyperparameters_values=article_hyperparameters, output_folder_path=result_folder_path, resume_from_saved=True, output_file_name_root=CoupledCF_RecommenderWrapper.RECOMMENDER_NAME ) ################################################################################################ ###### ###### PRINT RESULTS ###### if flag_print_results: n_test_users = np.sum(np.ediff1d(URM_test.indptr) >= 1) file_name = "{}..//{}_{}_".format(result_folder_path, ALGORITHM_NAME, dataset_name) result_loader = ResultFolderLoader( result_folder_path, base_algorithm_list=None, other_algorithm_list=[ DeepCF_RecommenderWrapper, CoupledCF_RecommenderWrapper ], KNN_similarity_list=KNN_similarity_to_report_list, ICM_names_list=ICM_to_report, UCM_names_list=UCM_to_report) result_loader.generate_latex_results( file_name + "{}_latex_results.txt".format("article_metrics"), metrics_list=["HIT_RATE", "NDCG"], cutoffs_list=[1, 5, 10], table_title=None, highlight_best=True) result_loader.generate_latex_results( file_name + "{}_latex_results.txt".format("beyond_accuracy_metrics"), metrics_list=[ "DIVERSITY_MEAN_INTER_LIST", "DIVERSITY_HERFINDAHL", "COVERAGE_ITEM", "DIVERSITY_GINI", "SHANNON_ENTROPY" ], cutoffs_list=[5], table_title=None, highlight_best=True) result_loader.generate_latex_results( file_name + "{}_latex_results.txt".format("all_metrics"), metrics_list=[ "PRECISION", "RECALL", "MAP_MIN_DEN", "MRR", "NDCG", "F1", "HIT_RATE", "ARHR_ALL_HITS", "NOVELTY", "DIVERSITY_MEAN_INTER_LIST", "DIVERSITY_HERFINDAHL", "COVERAGE_ITEM", "DIVERSITY_GINI", "SHANNON_ENTROPY" ], cutoffs_list=[5], table_title=None, highlight_best=True) result_loader.generate_latex_time_statistics( file_name + "{}_latex_results.txt".format("time"), n_evaluation_users=n_test_users, table_title=None)
def read_data_split_and_search(dataset_name, flag_baselines_tune=False, flag_DL_article_default=False, flag_DL_tune=False, flag_print_results=False): from Conferences.WWW.NeuMF_our_interface.Movielens1M.Movielens1MReader import Movielens1MReader from Conferences.WWW.NeuMF_our_interface.Pinterest.PinterestICCVReader import PinterestICCVReader result_folder_path = "result_experiments/{}/{}_{}/".format( CONFERENCE_NAME, ALGORITHM_NAME, dataset_name) if dataset_name == "movielens1m": dataset = Movielens1MReader(result_folder_path) elif dataset_name == "pinterest": dataset = PinterestICCVReader(result_folder_path) URM_train = dataset.URM_DICT["URM_train"].copy() URM_validation = dataset.URM_DICT["URM_validation"].copy() URM_test = dataset.URM_DICT["URM_test"].copy() URM_test_negative = dataset.URM_DICT["URM_test_negative"].copy() # Ensure IMPLICIT data and DISJOINT sets assert_implicit_data( [URM_train, URM_validation, URM_test, URM_test_negative]) assert_disjoint_matrices([URM_train, URM_validation, URM_test]) assert_disjoint_matrices([URM_train, URM_validation, URM_test_negative]) # If directory does not exist, create if not os.path.exists(result_folder_path): os.makedirs(result_folder_path) algorithm_dataset_string = "{}_{}_".format(ALGORITHM_NAME, dataset_name) plot_popularity_bias([URM_train + URM_validation, URM_test], ["Training data", "Test data"], result_folder_path + algorithm_dataset_string + "popularity_plot") save_popularity_statistics([ URM_train + URM_validation + URM_test, URM_train + URM_validation, URM_test ], ["Full data", "Training data", "Test data"], result_folder_path + algorithm_dataset_string + "popularity_statistics") collaborative_algorithm_list = [ Random, TopPop, UserKNNCFRecommender, ItemKNNCFRecommender, P3alphaRecommender, RP3betaRecommender, PureSVDRecommender, NMFRecommender, IALSRecommender, MatrixFactorization_BPR_Cython, MatrixFactorization_FunkSVD_Cython, EASE_R_Recommender, SLIM_BPR_Cython, SLIMElasticNetRecommender, ] metric_to_optimize = "HIT_RATE" n_cases = 50 n_random_starts = 15 from Base.Evaluation.Evaluator import EvaluatorNegativeItemSample evaluator_validation = EvaluatorNegativeItemSample(URM_validation, URM_test_negative, cutoff_list=[10]) evaluator_test = EvaluatorNegativeItemSample( URM_test, URM_test_negative, cutoff_list=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) runParameterSearch_Collaborative_partial = partial( runParameterSearch_Collaborative, URM_train=URM_train, URM_train_last_test=URM_train + URM_validation, metric_to_optimize=metric_to_optimize, evaluator_validation_earlystopping=evaluator_validation, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=result_folder_path, parallelizeKNN=False, allow_weighting=True, resume_from_saved=True, n_cases=n_cases, n_random_starts=n_random_starts) if flag_baselines_tune: for recommender_class in collaborative_algorithm_list: try: runParameterSearch_Collaborative_partial(recommender_class) except Exception as e: print("On recommender {} Exception {}".format( recommender_class, str(e))) traceback.print_exc() ################################################################################################ ###### ###### DL ALGORITHM ###### if flag_DL_article_default: try: if dataset_name == "movielens1m": num_factors = 64 elif dataset_name == "pinterest": num_factors = 16 neuMF_article_hyperparameters = { "epochs": 100, "epochs_gmf": 100, "epochs_mlp": 100, "batch_size": 256, "num_factors": num_factors, "layers": [num_factors * 4, num_factors * 2, num_factors], "reg_mf": 0.0, "reg_layers": [0, 0, 0], "num_negatives": 4, "learning_rate": 1e-3, "learning_rate_pretrain": 1e-3, "learner": "sgd", "learner_pretrain": "adam", "pretrain": True } neuMF_earlystopping_hyperparameters = { "validation_every_n": 5, "stop_on_validation": True, "evaluator_object": evaluator_validation, "lower_validations_allowed": 5, "validation_metric": metric_to_optimize } parameterSearch = SearchSingleCase( NeuMF_RecommenderWrapper, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], FIT_KEYWORD_ARGS=neuMF_earlystopping_hyperparameters) recommender_input_args_last_test = recommender_input_args.copy() recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[ 0] = URM_train + URM_validation parameterSearch.search( recommender_input_args, recommender_input_args_last_test= recommender_input_args_last_test, fit_hyperparameters_values=neuMF_article_hyperparameters, output_folder_path=result_folder_path, resume_from_saved=True, output_file_name_root=NeuMF_RecommenderWrapper.RECOMMENDER_NAME ) except Exception as e: print("On recommender {} Exception {}".format( NeuMF_RecommenderWrapper, str(e))) traceback.print_exc() ################################################################################################ ###### ###### PRINT RESULTS ###### if flag_print_results: n_test_users = np.sum(np.ediff1d(URM_test.indptr) >= 1) file_name = "{}..//{}_{}_".format(result_folder_path, ALGORITHM_NAME, dataset_name) result_loader = ResultFolderLoader( result_folder_path, base_algorithm_list=None, other_algorithm_list=[NeuMF_RecommenderWrapper], KNN_similarity_list=KNN_similarity_to_report_list, ICM_names_list=None, UCM_names_list=None) result_loader.generate_latex_results( file_name + "{}_latex_results.txt".format("article_metrics"), metrics_list=["HIT_RATE", "NDCG"], cutoffs_list=[1, 5, 10], table_title=None, highlight_best=True) result_loader.generate_latex_results( file_name + "{}_latex_results.txt".format("all_metrics"), metrics_list=[ "PRECISION", "RECALL", "MAP", "MRR", "NDCG", "F1", "HIT_RATE", "ARHR", "NOVELTY", "DIVERSITY_MEAN_INTER_LIST", "DIVERSITY_HERFINDAHL", "COVERAGE_ITEM", "DIVERSITY_GINI", "SHANNON_ENTROPY" ], cutoffs_list=[10], table_title=None, highlight_best=True) result_loader.generate_latex_time_statistics( file_name + "{}_latex_results.txt".format("time"), n_evaluation_users=n_test_users, table_title=None)
def read_data_split_and_search(dataset_name, flag_baselines_tune=False, flag_DL_article_default=False, flag_DL_tune=False, flag_print_results=False): result_folder_path = "result_experiments/{}/{}_{}/".format( CONFERENCE_NAME, ALGORITHM_NAME, dataset_name) if dataset_name == "gowalla": dataset = GowallaReader(result_folder_path) elif dataset_name == "yelp": dataset = YelpReader(result_folder_path) else: print("Dataset name not supported, current is {}".format(dataset_name)) return print('Current dataset is: {}'.format(dataset_name)) URM_train = dataset.URM_DICT["URM_train"].copy() URM_validation = dataset.URM_DICT["URM_validation"].copy() URM_test = dataset.URM_DICT["URM_test"].copy() URM_test_negative = dataset.URM_DICT["URM_test_negative"].copy() print_negative_items_stats(URM_train, URM_validation, URM_test, URM_test_negative) # Ensure IMPLICIT data from Utils.assertions_on_data_for_experiments import assert_implicit_data, assert_disjoint_matrices assert_implicit_data( [URM_train, URM_validation, URM_test, URM_test_negative]) # URM_test_negative contains duplicates in both train and test assert_disjoint_matrices([URM_train, URM_validation, URM_test]) # If directory does not exist, create if not os.path.exists(result_folder_path): os.makedirs(result_folder_path) collaborative_algorithm_list = [ Random, TopPop, UserKNNCFRecommender, ItemKNNCFRecommender, P3alphaRecommender, RP3betaRecommender, PureSVDRecommender, NMFRecommender, IALSRecommender, MatrixFactorization_BPR_Cython, MatrixFactorization_FunkSVD_Cython, EASE_R_Recommender, SLIM_BPR_Cython, SLIMElasticNetRecommender, ] metric_to_optimize = "NDCG" n_cases = 50 n_random_starts = 15 from Base.Evaluation.Evaluator import EvaluatorNegativeItemSample cutoff_list_validation = [10] cutoff_list_test = [5, 10, 20] evaluator_validation = EvaluatorNegativeItemSample( URM_validation, URM_test_negative, cutoff_list=cutoff_list_validation) evaluator_test = EvaluatorNegativeItemSample(URM_test, URM_test_negative, cutoff_list=cutoff_list_test) runParameterSearch_Collaborative_partial = partial( runParameterSearch_Collaborative, URM_train=URM_train, URM_train_last_test=URM_train + URM_validation, metric_to_optimize=metric_to_optimize, evaluator_validation_earlystopping=evaluator_validation, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=result_folder_path, parallelizeKNN=False, allow_weighting=True, resume_from_saved=True, n_cases=n_cases, n_random_starts=n_random_starts) if flag_baselines_tune: for recommender_class in collaborative_algorithm_list: try: runParameterSearch_Collaborative_partial(recommender_class) except Exception as e: print("On recommender {} Exception {}".format( recommender_class, str(e))) traceback.print_exc() ################################################################################################ ###### ###### DL ALGORITHM ###### if flag_DL_article_default: # Providing an empty matrix to URM_negative for the train samples article_hyperparameters = { "batch_size": 512, "epochs": 1500, "epochs_MFBPR": 500, "embedding_size": 64, "hidden_size": 128, "negative_sample_per_positive": 1, "negative_instances_per_positive": 4, "regularization_users_items": 0.01, "regularization_weights": 10, "regularization_filter_weights": 1, "learning_rate_embeddings": 0.05, "learning_rate_CNN": 0.05, "channel_size": [32, 32, 32, 32, 32, 32], "dropout": 0.0, "epoch_verbose": 1, } earlystopping_hyperparameters = { "validation_every_n": 5, "stop_on_validation": True, "lower_validations_allowed": 5, "evaluator_object": evaluator_validation, "validation_metric": metric_to_optimize, "epochs_min": 150 } parameterSearch = SearchSingleCase( ConvNCF_RecommenderWrapper, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], FIT_KEYWORD_ARGS=earlystopping_hyperparameters) recommender_input_args_last_test = recommender_input_args.copy() recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[ 0] = URM_train + URM_validation parameterSearch.search( recommender_input_args, recommender_input_args_last_test=recommender_input_args_last_test, fit_hyperparameters_values=article_hyperparameters, output_folder_path=result_folder_path, resume_from_saved=True, output_file_name_root=ConvNCF_RecommenderWrapper.RECOMMENDER_NAME) #remember to close the global session since use global variables ConvNCF.close_session(verbose=True) ################################################################################################ ###### ###### PRINT RESULTS ###### if flag_print_results: n_test_users = np.sum(np.ediff1d(URM_test.indptr) >= 1) file_name = "{}..//{}_{}_".format(result_folder_path, ALGORITHM_NAME, dataset_name) result_loader = ResultFolderLoader( result_folder_path, base_algorithm_list=None, other_algorithm_list=[ConvNCF_RecommenderWrapper], KNN_similarity_list=KNN_similarity_to_report_list, ICM_names_list=None, UCM_names_list=None) result_loader.generate_latex_results( file_name + "{}_latex_results.txt".format("article_metrics"), metrics_list=["HIT_RATE", "NDCG"], cutoffs_list=cutoff_list_test, table_title=None, highlight_best=True) result_loader.generate_latex_results( file_name + "{}_latex_results.txt".format("all_metrics"), metrics_list=[ "PRECISION", "RECALL", "MAP_MIN_DEN", "MRR", "NDCG", "F1", "HIT_RATE", "ARHR_ALL_HITS", "NOVELTY", "DIVERSITY_MEAN_INTER_LIST", "DIVERSITY_HERFINDAHL", "COVERAGE_ITEM", "DIVERSITY_GINI", "SHANNON_ENTROPY" ], cutoffs_list=cutoff_list_validation, table_title=None, highlight_best=True) result_loader.generate_latex_time_statistics( file_name + "{}_latex_results.txt".format("time"), n_evaluation_users=n_test_users, table_title=None)
def read_data_split_and_search(dataset_name, flag_baselines_tune=False, flag_DL_article_default=False, flag_DL_tune=False, flag_print_results=False): result_folder_path = "result_experiments/{}/{}_{}/".format( CONFERENCE_NAME, ALGORITHM_NAME, dataset_name) if not os.path.exists(result_folder_path): os.makedirs(result_folder_path) # Ensure both experiments use the same data dataset_folder_path = "result_experiments/{}/{}_{}/".format( CONFERENCE_NAME, ALGORITHM_NAME, dataset_name.replace("_remove_cold_items", "")) if not os.path.exists(dataset_folder_path): os.makedirs(dataset_folder_path) if 'amazon_music' in dataset_name: dataset = AmazonMusicReader(dataset_folder_path) elif 'movielens1m_ours' in dataset_name: dataset = Movielens1MReader(dataset_folder_path, type="ours") elif 'movielens1m_original' in dataset_name: dataset = Movielens1MReader(dataset_folder_path, type="original") else: print("Dataset name not supported, current is {}".format(dataset_name)) return print('Current dataset is: {}'.format(dataset_name)) URM_train = dataset.URM_DICT["URM_train"].copy() URM_validation = dataset.URM_DICT["URM_validation"].copy() URM_test = dataset.URM_DICT["URM_test"].copy() URM_test_negative = dataset.URM_DICT["URM_test_negative"].copy() # Ensure IMPLICI data and DISJOINT matrices assert_implicit_data( [URM_train, URM_validation, URM_test, URM_test_negative]) assert_disjoint_matrices( [URM_train, URM_validation, URM_test, URM_test_negative]) cold_items_statistics(URM_train, URM_validation, URM_test, URM_test_negative) algorithm_dataset_string = "{}_{}_".format(ALGORITHM_NAME, dataset_name) plot_popularity_bias([URM_train + URM_validation, URM_test], ["Training data", "Test data"], result_folder_path + algorithm_dataset_string + "popularity_plot") save_popularity_statistics([ URM_train + URM_validation + URM_test, URM_train + URM_validation, URM_test ], ["Full data", "Training data", "Test data"], result_folder_path + algorithm_dataset_string + "popularity_statistics") collaborative_algorithm_list = [ Random, TopPop, UserKNNCFRecommender, ItemKNNCFRecommender, P3alphaRecommender, RP3betaRecommender, PureSVDRecommender, NMFRecommender, IALSRecommender, MatrixFactorization_BPR_Cython, MatrixFactorization_FunkSVD_Cython, EASE_R_Recommender, SLIM_BPR_Cython, SLIMElasticNetRecommender, ] metric_to_optimize = "NDCG" n_cases = 50 n_random_starts = 15 cutoff_list_validation = [10] cutoff_list_test = [5, 10, 20] if "_remove_cold_items" in dataset_name: ignore_items_validation = get_cold_items(URM_train) ignore_items_test = get_cold_items(URM_train + URM_validation) else: ignore_items_validation = None ignore_items_test = None evaluator_validation = EvaluatorNegativeItemSample( URM_validation, URM_test_negative, cutoff_list=cutoff_list_validation, ignore_items=ignore_items_validation) evaluator_test = EvaluatorNegativeItemSample( URM_test, URM_test_negative, cutoff_list=cutoff_list_test, ignore_items=ignore_items_test) # The Evaluator automatically skips users with no test interactions # in this case we need the evaluation done with and without cold items to be comparable # So we ensure the users that are included in the evaluation are the same in both cases. evaluator_validation.users_to_evaluate = np.arange(URM_train.shape[0]) evaluator_test.users_to_evaluate = np.arange(URM_train.shape[0]) runParameterSearch_Collaborative_partial = partial( runParameterSearch_Collaborative, URM_train=URM_train, URM_train_last_test=URM_train + URM_validation, metric_to_optimize=metric_to_optimize, evaluator_validation_earlystopping=evaluator_validation, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=result_folder_path, parallelizeKNN=False, allow_weighting=True, resume_from_saved=True, n_cases=n_cases, n_random_starts=n_random_starts) if flag_baselines_tune: for recommender_class in collaborative_algorithm_list: try: runParameterSearch_Collaborative_partial(recommender_class) except Exception as e: print("On recommender {} Exception {}".format( recommender_class, str(e))) traceback.print_exc() ################################################################################################ ###### ###### DL ALGORITHM ###### if flag_DL_article_default: earlystopping_hyperparameters = { 'validation_every_n': 5, 'stop_on_validation': True, 'lower_validations_allowed': 5, 'evaluator_object': evaluator_validation, 'validation_metric': metric_to_optimize, } num_factors = 64 article_hyperparameters = { 'epochs': 500, 'learning_rate': 0.001, 'batch_size': 256, 'num_negatives': 4, 'layers': (num_factors * 4, num_factors * 2, num_factors), 'regularization_layers': (0, 0, 0), 'learner': 'adam', 'verbose': False, } parameterSearch = SearchSingleCase( DELF_MLP_RecommenderWrapper, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], FIT_KEYWORD_ARGS=earlystopping_hyperparameters) recommender_input_args_last_test = recommender_input_args.copy() recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[ 0] = URM_train + URM_validation parameterSearch.search( recommender_input_args, recommender_input_args_last_test=recommender_input_args_last_test, fit_hyperparameters_values=article_hyperparameters, output_folder_path=result_folder_path, resume_from_saved=True, output_file_name_root=DELF_MLP_RecommenderWrapper.RECOMMENDER_NAME) parameterSearch = SearchSingleCase( DELF_EF_RecommenderWrapper, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], FIT_KEYWORD_ARGS=earlystopping_hyperparameters) recommender_input_args_last_test = recommender_input_args.copy() recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[ 0] = URM_train + URM_validation parameterSearch.search( recommender_input_args, recommender_input_args_last_test=recommender_input_args_last_test, fit_hyperparameters_values=article_hyperparameters, output_folder_path=result_folder_path, resume_from_saved=True, output_file_name_root=DELF_EF_RecommenderWrapper.RECOMMENDER_NAME) ################################################################################################ ###### ###### PRINT RESULTS ###### if flag_print_results: n_test_users = np.sum(np.ediff1d(URM_test.indptr) >= 1) file_name = "{}..//{}_{}_".format(result_folder_path, ALGORITHM_NAME, dataset_name) result_loader = ResultFolderLoader( result_folder_path, base_algorithm_list=None, other_algorithm_list=[ DELF_MLP_RecommenderWrapper, DELF_EF_RecommenderWrapper ], KNN_similarity_list=KNN_similarity_to_report_list, ICM_names_list=None, UCM_names_list=None) result_loader.generate_latex_results( file_name + "{}_latex_results.txt".format("article_metrics"), metrics_list=["HIT_RATE", "NDCG"], cutoffs_list=cutoff_list_test, table_title=None, highlight_best=True) result_loader.generate_latex_results( file_name + "{}_latex_results.txt".format("all_metrics"), metrics_list=[ "PRECISION", "RECALL", "MAP_MIN_DEN", "MRR", "NDCG", "F1", "HIT_RATE", "ARHR_ALL_HITS", "NOVELTY", "DIVERSITY_MEAN_INTER_LIST", "DIVERSITY_HERFINDAHL", "COVERAGE_ITEM", "DIVERSITY_GINI", "SHANNON_ENTROPY" ], cutoffs_list=[10], table_title=None, highlight_best=True) result_loader.generate_latex_time_statistics( file_name + "{}_latex_results.txt".format("time"), n_evaluation_users=n_test_users, table_title=None)
def read_data_split_and_search(dataset_name, cold_start=False, cold_items=None, flag_baselines_tune=False, flag_DL_article_default=False, flag_DL_tune=False, flag_print_results=False): if not cold_start: result_folder_path = "result_experiments/{}/{}_{}/".format( CONFERENCE_NAME, ALGORITHM_NAME, dataset_name) else: result_folder_path = "result_experiments/{}/{}_cold_{}_{}/".format( CONFERENCE_NAME, ALGORITHM_NAME, cold_items, dataset_name) if dataset_name == "movielens1m_original": assert (cold_start is not True) dataset = Movielens1MReader(result_folder_path, type="original") elif dataset_name == "movielens1m_ours": dataset = Movielens1MReader(result_folder_path, type="ours", cold_start=cold_start, cold_items=cold_items) elif dataset_name == "hetrec": assert (cold_start is not True) dataset = MovielensHetrec2011Reader(result_folder_path) elif dataset_name == "amazon_instant_video": assert (cold_start is not True) dataset = AmazonInstantVideoReader(result_folder_path) URM_train = dataset.URM_DICT["URM_train"].copy() URM_validation = dataset.URM_DICT["URM_validation"].copy() URM_test = dataset.URM_DICT["URM_test"].copy() # Ensure IMPLICIT data and DISJOINT sets assert_implicit_data([URM_train, URM_validation, URM_test]) assert_disjoint_matrices([URM_train, URM_validation, URM_test]) # If directory does not exist, create if not os.path.exists(result_folder_path): os.makedirs(result_folder_path) algorithm_dataset_string = "{}_{}_".format(ALGORITHM_NAME, dataset_name) plot_popularity_bias([URM_train + URM_validation, URM_test], ["Train data", "Test data"], result_folder_path + algorithm_dataset_string + "popularity_plot") save_popularity_statistics([ URM_train + URM_validation + URM_test, URM_train + URM_validation, URM_test ], ["URM_all", "URM train", "URM test"], result_folder_path + algorithm_dataset_string + "popularity_statistics") metric_to_optimize = "RECALL" n_cases = 50 n_random_starts = 15 from Base.Evaluation.Evaluator import EvaluatorHoldout if not cold_start: cutoff_list_validation = [50] cutoff_list_test = [20, 30, 40, 50, 60, 70, 80, 90, 100] else: cutoff_list_validation = [20] cutoff_list_test = [20] evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=cutoff_list_validation) evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=cutoff_list_test) ################################################################################################ ###### KNN CF collaborative_algorithm_list = [ Random, TopPop, UserKNNCFRecommender, ItemKNNCFRecommender, P3alphaRecommender, RP3betaRecommender, PureSVDRecommender, NMFRecommender, IALSRecommender, MatrixFactorization_BPR_Cython, MatrixFactorization_FunkSVD_Cython, EASE_R_Recommender, SLIM_BPR_Cython, SLIMElasticNetRecommender, ] runParameterSearch_Collaborative_partial = partial( runParameterSearch_Collaborative, URM_train=URM_train, URM_train_last_test=URM_train + URM_validation, metric_to_optimize=metric_to_optimize, evaluator_validation_earlystopping=evaluator_validation, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=result_folder_path, parallelizeKNN=False, allow_weighting=True, resume_from_saved=True, n_cases=n_cases, n_random_starts=n_random_starts) if flag_baselines_tune: for recommender_class in collaborative_algorithm_list: try: runParameterSearch_Collaborative_partial(recommender_class) except Exception as e: print("On recommender {} Exception {}".format( recommender_class, str(e))) traceback.print_exc() ################################################################################################ ###### ###### DL ALGORITHM ###### if flag_DL_article_default: try: spectralCF_article_hyperparameters = { "epochs": 1000, "batch_size": 1024, "embedding_size": 16, "decay": 0.001, "k": 3, "learning_rate": 1e-3, } spectralCF_earlystopping_hyperparameters = { "validation_every_n": 5, "stop_on_validation": True, "lower_validations_allowed": 5, "evaluator_object": evaluator_validation, "validation_metric": metric_to_optimize, "epochs_min": 400, } parameterSearch = SearchSingleCase( SpectralCF_RecommenderWrapper, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], FIT_KEYWORD_ARGS=spectralCF_earlystopping_hyperparameters) recommender_input_args_last_test = recommender_input_args.copy() recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[ 0] = URM_train + URM_validation parameterSearch.search( recommender_input_args, recommender_input_args_last_test= recommender_input_args_last_test, fit_hyperparameters_values=spectralCF_article_hyperparameters, output_folder_path=result_folder_path, resume_from_saved=True, output_file_name_root=SpectralCF_RecommenderWrapper. RECOMMENDER_NAME + "_article_default") except Exception as e: print("On recommender {} Exception {}".format( SpectralCF_RecommenderWrapper, str(e))) traceback.print_exc() if flag_DL_tune: try: spectralCF_earlystopping_hyperparameters = { "validation_every_n": 5, "stop_on_validation": True, "lower_validations_allowed": 5, "evaluator_object": evaluator_validation, "validation_metric": metric_to_optimize, "epochs_min": 400, "epochs": 2000 } runParameterSearch_SpectralCF( SpectralCF_RecommenderWrapper, URM_train=URM_train, URM_train_last_test=URM_train + URM_validation, earlystopping_hyperparameters= spectralCF_earlystopping_hyperparameters, metric_to_optimize=metric_to_optimize, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=result_folder_path, n_cases=n_cases, n_random_starts=n_random_starts, output_file_name_root=SpectralCF_RecommenderWrapper. RECOMMENDER_NAME) except Exception as e: print("On recommender {} Exception {}".format( SpectralCF_RecommenderWrapper, str(e))) traceback.print_exc() ################################################################################################ ###### ###### PRINT RESULTS ###### if flag_print_results: n_test_users = np.sum(np.ediff1d(URM_test.indptr) >= 1) file_name = "{}..//{}_{}_".format( result_folder_path, ALGORITHM_NAME if not cold_start else "{}_cold_{}".format( ALGORITHM_NAME, cold_items), dataset_name) if cold_start: cutoffs_to_report_list = [20] else: cutoffs_to_report_list = [20, 40, 60, 80, 100] result_loader = ResultFolderLoader( result_folder_path, base_algorithm_list=None, other_algorithm_list=other_algorithm_list, KNN_similarity_list=KNN_similarity_to_report_list, ICM_names_list=None, UCM_names_list=None) result_loader.generate_latex_results( file_name + "{}_latex_results.txt".format("article_metrics"), metrics_list=["RECALL", "MAP"], cutoffs_list=cutoffs_to_report_list, table_title=None, highlight_best=True) result_loader.generate_latex_results( file_name + "{}_latex_results.txt".format("beyond_accuracy_metrics"), metrics_list=[ "DIVERSITY_MEAN_INTER_LIST", "DIVERSITY_HERFINDAHL", "COVERAGE_ITEM", "DIVERSITY_GINI", "SHANNON_ENTROPY" ], cutoffs_list=[50], table_title=None, highlight_best=True) result_loader.generate_latex_results( file_name + "{}_latex_results.txt".format("all_metrics"), metrics_list=[ "PRECISION", "RECALL", "MAP", "MRR", "NDCG", "F1", "HIT_RATE", "ARHR", "NOVELTY", "DIVERSITY_MEAN_INTER_LIST", "DIVERSITY_HERFINDAHL", "COVERAGE_ITEM", "DIVERSITY_GINI", "SHANNON_ENTROPY" ], cutoffs_list=[50], table_title=None, highlight_best=True) result_loader.generate_latex_time_statistics( file_name + "{}_latex_results.txt".format("time"), n_evaluation_users=n_test_users, table_title=None)
def read_data_split_and_search(dataset_variant, train_interactions, flag_baselines_tune=False, flag_DL_article_default=False, flag_DL_tune=False, flag_print_results=False): from Conferences.KDD.CollaborativeVAE_our_interface.Citeulike.CiteulikeReader import CiteulikeReader result_folder_path = "result_experiments/{}/{}_citeulike_{}_{}/".format( CONFERENCE_NAME, ALGORITHM_NAME, dataset_variant, train_interactions) dataset = CiteulikeReader(result_folder_path, dataset_variant=dataset_variant, train_interactions=train_interactions) URM_train = dataset.URM_DICT["URM_train"].copy() URM_validation = dataset.URM_DICT["URM_validation"].copy() URM_test = dataset.URM_DICT["URM_test"].copy() del dataset.ICM_DICT["ICM_tokens_bool"] # Ensure IMPLICIT data assert_implicit_data([URM_train, URM_validation, URM_test]) # Due to the sparsity of the dataset, choosing an evaluation as subset of the train # While keeping validation interaction in the train set if train_interactions == 1: # In this case the train data will contain validation data to avoid cold users assert_disjoint_matrices([URM_train, URM_test]) assert_disjoint_matrices([URM_validation, URM_test]) exclude_seen_validation = False URM_train_last_test = URM_train else: assert_disjoint_matrices([URM_train, URM_validation, URM_test]) exclude_seen_validation = True URM_train_last_test = URM_train + URM_validation assert_implicit_data([URM_train_last_test]) # If directory does not exist, create if not os.path.exists(result_folder_path): os.makedirs(result_folder_path) from Base.Evaluation.Evaluator import EvaluatorHoldout evaluator_validation = EvaluatorHoldout( URM_validation, cutoff_list=[150], exclude_seen=exclude_seen_validation) evaluator_test = EvaluatorHoldout( URM_test, cutoff_list=[50, 100, 150, 200, 250, 300]) collaborative_algorithm_list = [ Random, TopPop, UserKNNCFRecommender, ItemKNNCFRecommender, P3alphaRecommender, RP3betaRecommender, PureSVDRecommender, NMFRecommender, IALSRecommender, MatrixFactorization_BPR_Cython, MatrixFactorization_FunkSVD_Cython, EASE_R_Recommender, SLIM_BPR_Cython, SLIMElasticNetRecommender, ] metric_to_optimize = "RECALL" n_cases = 50 n_random_starts = 15 runParameterSearch_Collaborative_partial = partial( runParameterSearch_Collaborative, URM_train=URM_train, URM_train_last_test=URM_train_last_test, metric_to_optimize=metric_to_optimize, evaluator_validation_earlystopping=evaluator_validation, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=result_folder_path, parallelizeKNN=False, allow_weighting=True, resume_from_saved=True, n_cases=n_cases, n_random_starts=n_random_starts) if flag_baselines_tune: for recommender_class in collaborative_algorithm_list: try: runParameterSearch_Collaborative_partial(recommender_class) except Exception as e: print("On recommender {} Exception {}".format( recommender_class, str(e))) traceback.print_exc() ################################################################################################ ###### Content Baselines for ICM_name, ICM_object in dataset.ICM_DICT.items(): try: runParameterSearch_Content( ItemKNNCBFRecommender, URM_train=URM_train, URM_train_last_test=URM_train_last_test, metric_to_optimize=metric_to_optimize, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=result_folder_path, parallelizeKNN=False, allow_weighting=True, resume_from_saved=True, ICM_name=ICM_name, ICM_object=ICM_object.copy(), n_cases=n_cases, n_random_starts=n_random_starts) except Exception as e: print("On CBF recommender for ICM {} Exception {}".format( ICM_name, str(e))) traceback.print_exc() ################################################################################################ ###### Hybrid for ICM_name, ICM_object in dataset.ICM_DICT.items(): try: runParameterSearch_Hybrid( ItemKNN_CFCBF_Hybrid_Recommender, URM_train=URM_train, URM_train_last_test=URM_train_last_test, metric_to_optimize=metric_to_optimize, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=result_folder_path, parallelizeKNN=False, allow_weighting=True, resume_from_saved=True, ICM_name=ICM_name, ICM_object=ICM_object.copy(), n_cases=n_cases, n_random_starts=n_random_starts) except Exception as e: print("On recommender {} Exception {}".format( ItemKNN_CFCBF_Hybrid_Recommender, str(e))) traceback.print_exc() ################################################################################################ ###### ###### DL ALGORITHM ###### if flag_DL_article_default: try: cvae_recommender_article_hyperparameters = { "epochs": 200, "learning_rate_vae": 1e-2, "learning_rate_cvae": 1e-3, "num_factors": 50, "dimensions_vae": [200, 100], "epochs_vae": [50, 50], "batch_size": 128, "lambda_u": 0.1, "lambda_v": 10, "lambda_r": 1, "a": 1, "b": 0.01, "M": 300, } cvae_earlystopping_hyperparameters = { "validation_every_n": 5, "stop_on_validation": True, "evaluator_object": evaluator_validation, "lower_validations_allowed": 5, "validation_metric": metric_to_optimize } parameterSearch = SearchSingleCase( CollaborativeVAE_RecommenderWrapper, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[ URM_train, dataset.ICM_DICT["ICM_tokens_TFIDF"] ], FIT_KEYWORD_ARGS=cvae_earlystopping_hyperparameters) recommender_input_args_last_test = recommender_input_args.copy() recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[ 0] = URM_train_last_test parameterSearch.search( recommender_input_args, recommender_input_args_last_test= recommender_input_args_last_test, fit_hyperparameters_values= cvae_recommender_article_hyperparameters, output_folder_path=result_folder_path, resume_from_saved=True, output_file_name_root=CollaborativeVAE_RecommenderWrapper. RECOMMENDER_NAME) except Exception as e: print("On recommender {} Exception {}".format( CollaborativeVAE_RecommenderWrapper, str(e))) traceback.print_exc() ################################################################################################ ###### ###### PRINT RESULTS ###### if flag_print_results: n_test_users = np.sum(np.ediff1d(URM_test.indptr) >= 1) ICM_names_to_report_list = list(dataset.ICM_DICT.keys()) dataset_name = "{}_{}".format(dataset_variant, train_interactions) file_name = "{}..//{}_{}_".format(result_folder_path, ALGORITHM_NAME, dataset_name) result_loader = ResultFolderLoader( result_folder_path, base_algorithm_list=None, other_algorithm_list=other_algorithm_list, KNN_similarity_list=KNN_similarity_to_report_list, ICM_names_list=ICM_names_to_report_list, UCM_names_list=None) result_loader.generate_latex_results( file_name + "{}_latex_results.txt".format("article_metrics"), metrics_list=["RECALL"], cutoffs_list=[50, 100, 150, 200, 250, 300], table_title=None, highlight_best=True) result_loader.generate_latex_results( file_name + "{}_latex_results.txt".format("all_metrics"), metrics_list=[ "PRECISION", "RECALL", "MAP_MIN_DEN", "MRR", "NDCG", "F1", "HIT_RATE", "ARHR_ALL_HITS", "NOVELTY", "DIVERSITY_MEAN_INTER_LIST", "DIVERSITY_HERFINDAHL", "COVERAGE_ITEM", "DIVERSITY_GINI", "SHANNON_ENTROPY" ], cutoffs_list=[150], table_title=None, highlight_best=True) result_loader.generate_latex_time_statistics( file_name + "{}_latex_results.txt".format("time"), n_evaluation_users=n_test_users, table_title=None)
def read_data_split_and_search(dataset_name, flag_baselines_tune=False, flag_DL_article_default=False, flag_DL_tune=False, flag_print_results=False): result_folder_path = "result_experiments/{}/{}_{}/".format( CONFERENCE_NAME, ALGORITHM_NAME, dataset_name) if dataset_name == "movielens1m": dataset = Movielens1MReader(result_folder_path) article_hyperparameters = { 'num_neurons': 300, 'num_factors': 50, 'dropout_percentage': 0.03, 'learning_rate': 1e-4, 'regularization_rate': 0.1, 'epochs': 1500, 'batch_size': 1024, 'display_epoch': None, 'display_step': None, 'verbose': True } early_stopping_epochs_min = 800 elif dataset_name == "hetrec": dataset = MovielensHetrec2011Reader(result_folder_path) article_hyperparameters = { 'num_neurons': 300, 'num_factors': 50, 'dropout_percentage': 0.03, 'learning_rate': 1e-4, 'regularization_rate': 0.1, 'epochs': 1500, 'batch_size': 1024, 'display_epoch': None, 'display_step': None, 'verbose': True } early_stopping_epochs_min = 800 elif dataset_name == "filmtrust": dataset = FilmTrustReader(result_folder_path) article_hyperparameters = { 'num_neurons': 150, 'num_factors': 40, 'dropout_percentage': 0.00, 'learning_rate': 5e-5, 'regularization_rate': 0.1, 'epochs': 100, 'batch_size': 1024, 'display_epoch': None, 'display_step': None, 'verbose': True } early_stopping_epochs_min = 0 elif dataset_name == "frappe": dataset = FrappeReader(result_folder_path) article_hyperparameters = { 'num_neurons': 300, 'num_factors': 50, 'dropout_percentage': 0.03, 'learning_rate': 1e-4, 'regularization_rate': 0.01, 'epochs': 100, 'batch_size': 1024, 'display_epoch': None, 'display_step': None, 'verbose': True } early_stopping_epochs_min = 0 print('Current dataset is: {}'.format(dataset_name)) URM_train = dataset.URM_DICT["URM_train"].copy() URM_validation = dataset.URM_DICT["URM_validation"].copy() URM_test = dataset.URM_DICT["URM_test"].copy() # Ensure IMPLICIT data from Utils.assertions_on_data_for_experiments import assert_implicit_data, assert_disjoint_matrices assert_implicit_data([URM_train, URM_validation, URM_test]) assert_disjoint_matrices([URM_train, URM_validation, URM_test]) # If directory does not exist, create if not os.path.exists(result_folder_path): os.makedirs(result_folder_path) collaborative_algorithm_list = [ Random, TopPop, UserKNNCFRecommender, ItemKNNCFRecommender, P3alphaRecommender, RP3betaRecommender, PureSVDRecommender, NMFRecommender, IALSRecommender, MatrixFactorization_BPR_Cython, MatrixFactorization_FunkSVD_Cython, EASE_R_Recommender, SLIM_BPR_Cython, SLIMElasticNetRecommender, ] metric_to_optimize = "NDCG" n_cases = 50 n_random_starts = 15 from Base.Evaluation.Evaluator import EvaluatorHoldout # use max cutoff to compute full MAP and NDCG max_cutoff = URM_train.shape[1] - 1 cutoff_list_validation = [10] cutoff_list_test = [5, 10, 50, max_cutoff] evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=cutoff_list_validation) evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=cutoff_list_test) runParameterSearch_Collaborative_partial = partial( runParameterSearch_Collaborative, URM_train=URM_train, URM_train_last_test=URM_train + URM_validation, metric_to_optimize=metric_to_optimize, evaluator_validation_earlystopping=evaluator_validation, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=result_folder_path, parallelizeKNN=False, allow_weighting=True, resume_from_saved=True, n_cases=n_cases, n_random_starts=n_random_starts) if flag_baselines_tune: for recommender_class in collaborative_algorithm_list: try: runParameterSearch_Collaborative_partial(recommender_class) except Exception as e: print("On recommender {} Exception {}".format( recommender_class, str(e))) traceback.print_exc() ################################################################################################ ###### ###### DL ALGORITHM ###### if flag_DL_article_default: earlystopping_hyperparameters = { 'validation_every_n': 5, 'stop_on_validation': True, 'lower_validations_allowed': 20, 'evaluator_object': evaluator_validation, 'validation_metric': metric_to_optimize, 'epochs_min': early_stopping_epochs_min } try: parameterSearch = SearchSingleCase( UNeuRec_RecommenderWrapper, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], FIT_KEYWORD_ARGS=earlystopping_hyperparameters) recommender_input_args_last_test = recommender_input_args.copy() recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[ 0] = URM_train + URM_validation parameterSearch.search( recommender_input_args, recommender_input_args_last_test= recommender_input_args_last_test, fit_hyperparameters_values=article_hyperparameters, output_folder_path=result_folder_path, resume_from_saved=True, output_file_name_root=UNeuRec_RecommenderWrapper. RECOMMENDER_NAME) except Exception as e: print("On recommender {} Exception {}".format( UNeuRec_RecommenderWrapper, str(e))) traceback.print_exc() try: parameterSearch = SearchSingleCase( INeuRec_RecommenderWrapper, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], FIT_KEYWORD_ARGS=earlystopping_hyperparameters) recommender_input_args_last_test = recommender_input_args.copy() recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[ 0] = URM_train + URM_validation parameterSearch.search( recommender_input_args, recommender_input_args_last_test= recommender_input_args_last_test, fit_hyperparameters_values=article_hyperparameters, output_folder_path=result_folder_path, resume_from_saved=True, output_file_name_root=INeuRec_RecommenderWrapper. RECOMMENDER_NAME) except Exception as e: print("On recommender {} Exception {}".format( INeuRec_RecommenderWrapper, str(e))) traceback.print_exc() # if isUNeuRec_tune: # # try: # # runParameterSearch_NeuRec(UNeuRec_RecommenderWrapper, # URM_train = URM_train, # URM_train_last_test = URM_train + URM_validation, # earlystopping_hyperparameters = earlystopping_hyperparameters, # metric_to_optimize = metric_to_optimize, # evaluator_validation = evaluator_validation, # evaluator_test = evaluator_test, # result_folder_path = result_folder_path, # n_cases = n_cases, # n_random_starts = n_random_starts, # output_file_name_root = UNeuRec_RecommenderWrapper.RECOMMENDER_NAME) # # # except Exception as e: # # print("On recommender {} Exception {}".format(UNeuRec_RecommenderWrapper, str(e))) # traceback.print_exc() # # # # # # if isINeuRec_tune: # # try: # # runParameterSearch_NeuRec(INeuRec_RecommenderWrapper, # URM_train = URM_train, # URM_train_last_test = URM_train + URM_validation, # earlystopping_hyperparameters = earlystopping_hyperparameters, # metric_to_optimize = metric_to_optimize, # evaluator_validation = evaluator_validation, # evaluator_test = evaluator_test, # result_folder_path = result_folder_path, # n_cases = n_cases, # n_random_starts = n_random_starts, # output_file_name_root = INeuRec_RecommenderWrapper.RECOMMENDER_NAME) # # # except Exception as e: # # print("On recommender {} Exception {}".format(INeuRec_RecommenderWrapper, str(e))) # traceback.print_exc() # ################################################################################################ ###### ###### PRINT RESULTS ###### if flag_print_results: n_test_users = np.sum(np.ediff1d(URM_test.indptr) >= 1) file_name = "{}..//{}_{}_".format(result_folder_path, ALGORITHM_NAME, dataset_name) result_loader = ResultFolderLoader( result_folder_path, base_algorithm_list=None, other_algorithm_list=[ INeuRec_RecommenderWrapper, UNeuRec_RecommenderWrapper ], KNN_similarity_list=KNN_similarity_to_report_list, ICM_names_list=None, UCM_names_list=None) result_loader.generate_latex_results( file_name + "{}_latex_results.txt".format("article_metrics"), metrics_list=["PRECISION", "RECALL", "MAP", "NDCG", "MRR"], cutoffs_list=[5, 10, 50], table_title=None, highlight_best=True) result_loader.generate_latex_results( file_name + "{}_latex_results.txt".format("beyond_accuracy_metrics"), metrics_list=[ "DIVERSITY_MEAN_INTER_LIST", "DIVERSITY_HERFINDAHL", "COVERAGE_ITEM", "DIVERSITY_GINI", "SHANNON_ENTROPY" ], cutoffs_list=[50], table_title=None, highlight_best=True) result_loader.generate_latex_results( file_name + "{}_latex_results.txt".format("all_metrics"), metrics_list=[ "PRECISION", "RECALL", "MAP", "MRR", "NDCG", "F1", "HIT_RATE", "ARHR", "NOVELTY", "DIVERSITY_MEAN_INTER_LIST", "DIVERSITY_HERFINDAHL", "COVERAGE_ITEM", "DIVERSITY_GINI", "SHANNON_ENTROPY" ], cutoffs_list=[50], table_title=None, highlight_best=True) result_loader.generate_latex_time_statistics( file_name + "{}_latex_results.txt".format("time"), n_evaluation_users=n_test_users, table_title=None)
def read_data_split_and_search(dataset_name, flag_baselines_tune = False, flag_DL_article_default = False, flag_DL_tune = False, flag_print_results = False): result_folder_path = "result_experiments/{}/{}_{}/".format(CONFERENCE_NAME, ALGORITHM_NAME, dataset_name) if dataset_name == 'amazon_music_original': dataset = AmazonMusicReader(result_folder_path, original = True) elif dataset_name == 'amazon_music_ours': dataset = AmazonMusicReader(result_folder_path, original = False) elif dataset_name == 'amazon_movie': dataset = AmazonMovieReader(result_folder_path) elif dataset_name == 'movielens100k': dataset = Movielens100KReader(result_folder_path) elif dataset_name == 'movielens1m': dataset = Movielens1MReader(result_folder_path) else: print("Dataset name not supported, current is {}".format(dataset_name)) return print ('Current dataset is: {}'.format(dataset_name)) URM_train = dataset.URM_DICT["URM_train"].copy() URM_validation = dataset.URM_DICT["URM_validation"].copy() URM_test = dataset.URM_DICT["URM_test"].copy() URM_test_negative = dataset.URM_DICT["URM_test_negative"].copy() # Ensure DISJOINT sets. Do not ensure IMPLICIT data because the algorithm needs explicit data assert_disjoint_matrices([URM_train, URM_validation, URM_test, URM_test_negative]) cold_items_statistics(URM_train, URM_validation, URM_test, URM_test_negative) # If directory does not exist, create if not os.path.exists(result_folder_path): os.makedirs(result_folder_path) algorithm_dataset_string = "{}_{}_".format(ALGORITHM_NAME, dataset_name) plot_popularity_bias([URM_train + URM_validation, URM_test], ["Training data", "Test data"], result_folder_path + algorithm_dataset_string + "popularity_plot") save_popularity_statistics([URM_train + URM_validation + URM_test, URM_train + URM_validation, URM_test], ["Full data", "Training data", "Test data"], result_folder_path + algorithm_dataset_string + "popularity_statistics") collaborative_algorithm_list = [ Random, TopPop, UserKNNCFRecommender, ItemKNNCFRecommender, P3alphaRecommender, RP3betaRecommender, PureSVDRecommender, NMFRecommender, IALSRecommender, MatrixFactorization_BPR_Cython, MatrixFactorization_FunkSVD_Cython, EASE_R_Recommender, SLIM_BPR_Cython, SLIMElasticNetRecommender, ] metric_to_optimize = "NDCG" n_cases = 50 n_random_starts = 15 cutoff_list_validation = [10] cutoff_list_test = [5, 10, 20] evaluator_validation = EvaluatorNegativeItemSample(URM_validation, URM_test_negative, cutoff_list=cutoff_list_validation) evaluator_test = EvaluatorNegativeItemSample(URM_test, URM_test_negative, cutoff_list=cutoff_list_test) runParameterSearch_Collaborative_partial = partial(runParameterSearch_Collaborative, URM_train = URM_train, URM_train_last_test = URM_train + URM_validation, metric_to_optimize = metric_to_optimize, evaluator_validation_earlystopping = evaluator_validation, evaluator_validation = evaluator_validation, evaluator_test = evaluator_test, output_folder_path = result_folder_path, parallelizeKNN = False, allow_weighting = True, resume_from_saved = True, n_cases = n_cases, n_random_starts = n_random_starts) if flag_baselines_tune: for recommender_class in collaborative_algorithm_list: try: runParameterSearch_Collaborative_partial(recommender_class) except Exception as e: print("On recommender {} Exception {}".format(recommender_class, str(e))) traceback.print_exc() ################################################################################################ ###### ###### DL ALGORITHM ###### """ NOTICE: We did not upload the source code of DMF as it was not publicly available and the original authors did not respond to our request to add it to this repository """ if flag_DL_article_default: if dataset_name in ['amazon_music_original', 'amazon_music_ours']: last_layer_size = 128 else: last_layer_size = 64 article_hyperparameters = {'epochs': 300, 'learning_rate': 0.0001, 'batch_size': 256, 'num_negatives': 7, # As reported in the "Detailed implementation" section of the original paper 'last_layer_size': last_layer_size, } earlystopping_hyperparameters = {'validation_every_n': 5, 'stop_on_validation': True, 'lower_validations_allowed': 5, 'evaluator_object': evaluator_validation, 'validation_metric': metric_to_optimize, } # # try: # # # parameterSearch = SearchSingleCase(DMF_NCE_RecommenderWrapper, # evaluator_validation=evaluator_validation, # evaluator_test=evaluator_test) # # recommender_input_args = SearchInputRecommenderArgs( # CONSTRUCTOR_POSITIONAL_ARGS = [URM_train], # FIT_KEYWORD_ARGS = earlystopping_hyperparameters) # # recommender_input_args_last_test = recommender_input_args.copy() # recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[0] = URM_train + URM_validation # # parameterSearch.search(recommender_input_args, # recommender_input_args_last_test = recommender_input_args_last_test, # fit_hyperparameters_values = article_hyperparameters, # output_folder_path = result_folder_path, # resume_from_saved = True, # output_file_name_root = DMF_NCE_RecommenderWrapper.RECOMMENDER_NAME) # # # # except Exception as e: # # print("On recommender {} Exception {}".format(DMF_NCE_RecommenderWrapper, str(e))) # traceback.print_exc() # # # # try: # # # parameterSearch = SearchSingleCase(DMF_BCE_RecommenderWrapper, # evaluator_validation=evaluator_validation, # evaluator_test=evaluator_test) # # recommender_input_args = SearchInputRecommenderArgs( # CONSTRUCTOR_POSITIONAL_ARGS = [URM_train], # FIT_KEYWORD_ARGS = earlystopping_hyperparameters) # # recommender_input_args_last_test = recommender_input_args.copy() # recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[0] = URM_train + URM_validation # # parameterSearch.search(recommender_input_args, # recommender_input_args_last_test = recommender_input_args_last_test, # fit_hyperparameters_values = article_hyperparameters, # output_folder_path = result_folder_path, # resume_from_saved = True, # output_file_name_root = DMF_BCE_RecommenderWrapper.RECOMMENDER_NAME) # # # except Exception as e: # # print("On recommender {} Exception {}".format(DMF_BCE_RecommenderWrapper, str(e))) # traceback.print_exc() ################################################################################################ ###### ###### PRINT RESULTS ###### if flag_print_results: n_test_users = np.sum(np.ediff1d(URM_test.indptr)>=1) file_name = "{}..//{}_{}_".format(result_folder_path, ALGORITHM_NAME, dataset_name) result_loader = ResultFolderLoader(result_folder_path, base_algorithm_list = None, other_algorithm_list = [DMF_NCE_RecommenderWrapper, DMF_BCE_RecommenderWrapper], KNN_similarity_list = KNN_similarity_to_report_list, ICM_names_list = None, UCM_names_list = None) result_loader.generate_latex_results(file_name + "{}_latex_results.txt".format("article_metrics"), metrics_list = ["HIT_RATE", "NDCG"], cutoffs_list = cutoff_list_validation, table_title = None, highlight_best = True) result_loader.generate_latex_results(file_name + "{}_latex_results.txt".format("all_metrics"), metrics_list = ["PRECISION", "RECALL", "MAP_MIN_DEN", "MRR", "NDCG", "F1", "HIT_RATE", "ARHR_ALL_HITS", "NOVELTY", "DIVERSITY_MEAN_INTER_LIST", "DIVERSITY_HERFINDAHL", "COVERAGE_ITEM", "DIVERSITY_GINI", "SHANNON_ENTROPY"], cutoffs_list = [10], table_title = None, highlight_best = True) result_loader.generate_latex_time_statistics(file_name + "{}_latex_results.txt".format("time"), n_evaluation_users=n_test_users, table_title = None)