def runParameterSearch_NeuRec(recommender_class, URM_train, earlystopping_hyperparameters, output_file_name_root, URM_train_last_test = None, n_cases = 35, n_random_starts = 5, evaluator_validation= None, evaluator_test=None, metric_to_optimize = "RECALL", output_folder_path ="result_experiments/"): # If directory does not exist, create if not os.path.exists(output_folder_path): os.makedirs(output_folder_path) parameterSearch = SearchBayesianSkopt(recommender_class, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) ########################################################################################################## hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["epochs"] = Categorical([1500]) hyperparameters_range_dictionary["num_neurons"] = Integer(100, 400) hyperparameters_range_dictionary["num_factors"] = Integer(20, 70) hyperparameters_range_dictionary["dropout_percentage"] = Real(low = 0.0, high = 0.1, prior = 'uniform') hyperparameters_range_dictionary["learning_rate"] = Real(low = 1e-5, high = 1e-3, prior = 'log-uniform') hyperparameters_range_dictionary["regularization_rate"] = Real(low = 0.0, high = 0.2, prior = 'uniform') hyperparameters_range_dictionary["batch_size"] = Categorical([128, 256, 512, 1024, 2048]) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS = [URM_train], CONSTRUCTOR_KEYWORD_ARGS = {}, FIT_POSITIONAL_ARGS = [], FIT_KEYWORD_ARGS = {**earlystopping_hyperparameters, "use_gpu": False, "epochs_min": 200, "display_epoch": None, "display_step": None, "verbose": False} ) ######################################################################################################### if URM_train_last_test is not None: recommender_input_args_last_test = recommender_input_args.copy() recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[0] = URM_train_last_test else: recommender_input_args_last_test = None parameterSearch.search(recommender_input_args, parameter_search_space = hyperparameters_range_dictionary, n_cases = n_cases, n_random_starts = n_random_starts, output_folder_path = output_folder_path, output_file_name_root = output_file_name_root, metric_to_optimize = metric_to_optimize, resume_from_saved = True, recommender_input_args_last_test = recommender_input_args_last_test)
def run_train_with_early_stopping(dataset_name, URM_train, URM_validation, UCM_CoupledCF, ICM_CoupledCF, evaluator_validation, evaluator_test, metric_to_optimize, result_folder_path, map_mode): if not os.path.exists(result_folder_path): os.makedirs(result_folder_path) article_hyperparameters = get_hyperparameters_for_dataset(dataset_name) article_hyperparameters["map_mode"] = map_mode earlystopping_hyperparameters = { "validation_every_n": 5, "stop_on_validation": True, "lower_validations_allowed": 5, "evaluator_object": evaluator_validation, "validation_metric": metric_to_optimize } parameterSearch = SearchSingleCase( CoupledCF_RecommenderWrapper, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train, UCM_CoupledCF, ICM_CoupledCF], FIT_KEYWORD_ARGS=earlystopping_hyperparameters) recommender_input_args_last_test = recommender_input_args.copy() recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[ 0] = URM_train + URM_validation parameterSearch.search( recommender_input_args, recommender_input_args_last_test=recommender_input_args_last_test, fit_hyperparameters_values=article_hyperparameters, output_folder_path=result_folder_path, output_file_name_root=CoupledCF_RecommenderWrapper.RECOMMENDER_NAME, save_model="last", resume_from_saved=True, evaluate_on_test="last") dataIO = DataIO(result_folder_path) search_metadata = dataIO.load_data( file_name=CoupledCF_RecommenderWrapper.RECOMMENDER_NAME + "_metadata.zip") return search_metadata
def run_permutation_BPRMF(output_folder_path, permutation_index, USER_factors_perm, ITEM_factors_perm): output_folder_path_permutation = output_folder_path + "{}/{}_{}/".format("BPRMF", "BPRMF", permutation_index) # If directory does not exist, create if not os.path.exists(output_folder_path_permutation): os.makedirs(output_folder_path_permutation) assert USER_factors_perm.shape == (n_users, n_factors) assert ITEM_factors_perm.shape == (n_items, n_factors) parameterSearch = SearchSingleCase(MatrixFactorizationCustomFactorsRecommender, evaluator_validation = None, evaluator_test = evaluator_test) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS = [URM_train + URM_validation], FIT_KEYWORD_ARGS = { "USER_factors": USER_factors, "ITEM_factors": ITEM_factors }) parameterSearch.search(recommender_input_args, save_model = "no", resume_from_saved=True, fit_hyperparameters_values = {}, output_folder_path = output_folder_path_permutation, output_file_name_root = MatrixFactorizationCustomFactorsRecommender.RECOMMENDER_NAME)
def pretrain_MFBPR( URM_train, URM_train_full, evaluator_validation, evaluator_test, result_folder_path, metric_to_optimize, ): article_hyperparameters = { "batch_size": 512, "epochs": 500, "embed_size": 64, "negative_sample_per_positive": 1, "learning_rate": 0.05, "path_partial_results": result_folder_path, } earlystopping_keywargs = { "validation_every_n": 5, "stop_on_validation": True, "lower_validations_allowed": 5, "evaluator_object": evaluator_validation, "validation_metric": metric_to_optimize } parameterSearch = SearchSingleCase( MFBPR_Wrapper, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], FIT_KEYWORD_ARGS=earlystopping_keywargs) recommender_input_args_last_test = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train_full]) parameterSearch.search( recommender_input_args, recommender_input_args_last_test=recommender_input_args_last_test, fit_hyperparameters_values=article_hyperparameters, output_folder_path=result_folder_path, output_file_name_root=MFBPR_Wrapper.RECOMMENDER_NAME, save_model="last", resume_from_saved=True, evaluate_on_test="last")
def run_parameter_search_VNN(recommender_class, URM_train, metric_to_optimize="PRECISION", evaluator_validation=None, evaluator_test=None, output_folder_path="result_experiments/", parallelizeKNN=True, n_cases=35, n_random_starts=5, resume_from_saved=False, save_model="best", allow_weighting=True, similarity_type_list=None): # If directory does not exist, create if not os.path.exists(output_folder_path): os.makedirs(output_folder_path) output_file_name_root = recommender_class.RECOMMENDER_NAME parameterSearch = SearchBayesianSkopt(recommender_class, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) if similarity_type_list is None: similarity_type_list = ['cosine', 'jaccard', "asymmetric", "dice", "tversky"] recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={} ) recommender_input_args_last_test = None run_KNNCFRecommender_on_similarity_type_partial = partial(run_VKNNRecommender_on_similarity_type, recommender_input_args=recommender_input_args, parameter_search_space={}, parameterSearch=parameterSearch, n_cases=n_cases, n_random_starts=n_random_starts, resume_from_saved=resume_from_saved, save_model=save_model, output_folder_path=output_folder_path, output_file_name_root=output_file_name_root, metric_to_optimize=metric_to_optimize, allow_weighting=allow_weighting, recommender_input_args_last_test=recommender_input_args_last_test) if parallelizeKNN: pool = multiprocessing.Pool(processes=multiprocessing.cpu_count(), maxtasksperchild=1) pool.map(run_KNNCFRecommender_on_similarity_type_partial, similarity_type_list) pool.close() pool.join() else: for similarity_type in similarity_type_list: run_KNNCFRecommender_on_similarity_type_partial(similarity_type) return
def pretrain_FMwrapper(URM_train_tuning_only, URM_train_full, evaluator_validation, evaluator_test, CFM_data_class_validation, CFM_data_class_full, result_folder_path:str, # hidden_factors:int, metric_to_optimize:str, dataset_name): # search best epoch article_hyperparameters = get_FM_hyperparameters_for_dataset(dataset_name) earlystopping_keywargs = { "validation_every_n": 5, "stop_on_validation": True, "lower_validations_allowed": 5, "evaluator_object": evaluator_validation, "validation_metric": metric_to_optimize } parameterSearch = SearchSingleCase(FM_Wrapper, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) recommender_input_args = SearchInputRecommenderArgs(CONSTRUCTOR_POSITIONAL_ARGS=[URM_train_tuning_only, CFM_data_class_validation], FIT_KEYWORD_ARGS=earlystopping_keywargs) recommender_input_args_last_test = SearchInputRecommenderArgs(CONSTRUCTOR_POSITIONAL_ARGS=[URM_train_full, CFM_data_class_full]) parameterSearch.search(recommender_input_args, recommender_input_args_last_test=recommender_input_args_last_test, fit_hyperparameters_values=article_hyperparameters, output_folder_path=result_folder_path, output_file_name_root=FM_Wrapper.RECOMMENDER_NAME, save_model = "last", resume_from_saved=True, evaluate_on_test = "last")
def run_train_with_early_stopping(output_folder_path, permutation_index, USER_factors_perm, ITEM_factors_perm, map_mode, metric_to_optimize, evaluator_validation, evaluator_test, URM_train, URM_validation): output_folder_path_permutation = output_folder_path + "fit_ablation_{}/{}_{}/".format(map_mode, map_mode, permutation_index) # If directory does not exist, create if not os.path.exists(output_folder_path_permutation): os.makedirs(output_folder_path_permutation) assert USER_factors_perm.shape == (n_users, n_factors) assert ITEM_factors_perm.shape == (n_items, n_factors) np.save(output_folder_path_permutation + "best_model_latent_factors", [USER_factors_perm, ITEM_factors_perm]) optimal_hyperparameters = { "batch_size": 512, "epochs": 1500, "load_pretrained_MFBPR_if_available": True, "MF_latent_factors_folder": output_folder_path_permutation, "embedding_size": 64, "hidden_size": 128, "negative_sample_per_positive": 1, "negative_instances_per_positive": 4, "regularization_users_items": 0.01, "regularization_weights": 10, "regularization_filter_weights": 1, "learning_rate_embeddings": 0.05, "learning_rate_CNN": 0.05, "channel_size": [32, 32, 32, 32, 32, 32], "dropout": 0.0, "epoch_verbose": 1, "temp_file_folder": None, } optimal_hyperparameters["map_mode"] = map_mode earlystopping_hyperparameters = { "validation_every_n": 5, "stop_on_validation": True, "lower_validations_allowed": 5, "evaluator_object": evaluator_validation, "validation_metric": metric_to_optimize } parameterSearch = SearchSingleCase(ConvNCF_RecommenderWrapper, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) recommender_input_args = SearchInputRecommenderArgs(CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], FIT_KEYWORD_ARGS=earlystopping_hyperparameters) recommender_input_args_last_test = recommender_input_args.copy() recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[0] = URM_train + URM_validation parameterSearch.search(recommender_input_args, recommender_input_args_last_test=recommender_input_args_last_test, fit_hyperparameters_values=optimal_hyperparameters, output_folder_path=output_folder_path_permutation, output_file_name_root=ConvNCF_RecommenderWrapper.RECOMMENDER_NAME, save_model = "last", resume_from_saved=True, evaluate_on_test = "last")
parameterSearch = SearchBayesianSkopt(recommender_class, evaluator_validation=evaluator_valid, evaluator_test=evaluator_test) hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["topK"] = Integer(5, 1000) hyperparameters_range_dictionary["add_zeros_quota"] = Real(low=0, high=1, prior='uniform') hyperparameters_range_dictionary["normalize_similarity"] = Categorical( [True, False]) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[urm_train, icm_asset, W_sparse_CF], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) output_folder_path = "result_experiments/" import os # If directory does not exist, create if not os.path.exists(output_folder_path): os.makedirs(output_folder_path) n_cases = 50 metric_to_optimize = "MAP" # Clone data structure to perform the fitting with the best hyperparameters on train + validation data
def run_train_with_early_stopping(URM_train_tuning_only, URM_train_full, evaluator_validation, evaluator_test, CFM_data_class_validation, CFM_data_class_full, pretrained_FM_folder_path, output_folder_path, permutation_index, map_mode, metric_to_optimize): output_folder_path_permutation = output_folder_path + "fit_ablation_{}/{}_{}/".format(map_mode, map_mode, permutation_index) # If directory does not exist, create if not os.path.exists(output_folder_path_permutation): os.makedirs(output_folder_path_permutation) if os.path.isfile(output_folder_path_permutation + CFM_wrapper.RECOMMENDER_NAME + "_metadata.zip"): return article_hyperparameters = { 'pretrain_flag': 1, 'pretrained_FM_folder_path': pretrained_FM_folder_path, 'hidden_factor': 64, 'epochs': 300, 'batch_size': 256, 'learning_rate': 0.01, 'lamda_bilinear': 0, 'keep': 0.8, 'optimizer_type': 'AdagradOptimizer', 'batch_norm': 0, 'verbose': False, 'regs': '[10,1]', 'attention_size': 32, 'attentive_pooling': False, 'net_channel': '[32,32,32,32,32,32]', 'num_field': 4, 'permutation': list(permutation), 'map_mode': map_mode } earlystopping_hyperparameters = { "epochs_min": int(article_hyperparameters["epochs"]/2), "validation_every_n": 5, "stop_on_validation": True, "lower_validations_allowed": 5, "evaluator_object": evaluator_validation, "validation_metric": metric_to_optimize } # Due to the extremely long evaluation time it is computationally too expensive to run earlystopping on all # permutations (estimated >60 days on high end GPU) # So, select the epochs only at permutation 0 independently for each of the three modes: "all_map", "main_diagonal", "off_diagonal" # try to load selected number of epochs, if not present run earlystopping again folder_permutation_0 = output_folder_path + "fit_ablation_{}/{}_{}/".format(map_mode, map_mode, 0) if permutation_index == 0: parameterSearch = SearchSingleCase(CFM_wrapper, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) recommender_input_args = SearchInputRecommenderArgs(CONSTRUCTOR_POSITIONAL_ARGS=[URM_train_tuning_only, CFM_data_class_validation], FIT_KEYWORD_ARGS=earlystopping_hyperparameters) recommender_input_args_last_test = SearchInputRecommenderArgs(CONSTRUCTOR_POSITIONAL_ARGS=[URM_train_full, CFM_data_class_full]) parameterSearch.search(recommender_input_args, recommender_input_args_last_test=recommender_input_args_last_test, fit_hyperparameters_values=article_hyperparameters, output_folder_path=output_folder_path_permutation, output_file_name_root=CFM_wrapper.RECOMMENDER_NAME, save_model = "last", resume_from_saved=True, evaluate_on_test = "last") else: dataIO = DataIO(folder_path = folder_permutation_0) data_dict = dataIO.load_data(file_name = CFM_wrapper.RECOMMENDER_NAME + "_metadata.zip") selected_epochs = data_dict["hyperparameters_best"]["epochs"] article_hyperparameters["epochs"] = selected_epochs parameterSearch = SearchSingleCase(CFM_wrapper, evaluator_validation=evaluator_test, evaluator_test=evaluator_test) recommender_input_args_last_test = SearchInputRecommenderArgs(CONSTRUCTOR_POSITIONAL_ARGS=[URM_train_full, CFM_data_class_full]) parameterSearch.search(recommender_input_args_last_test, recommender_input_args_last_test=None, fit_hyperparameters_values=article_hyperparameters, output_folder_path=output_folder_path_permutation, output_file_name_root=CFM_wrapper.RECOMMENDER_NAME, save_model = "best", resume_from_saved=True, evaluate_on_test = "best") # Get the data in the correct format to be readable for the data parsing script # Put the results in the "result_on_last" field of the metadata file # Change the final model file name into the _best_model_last suffix metadata_file_name = CFM_wrapper.RECOMMENDER_NAME + "_metadata.zip" dataIO = DataIO(folder_path = output_folder_path_permutation) search_metadata = dataIO.load_data(file_name = metadata_file_name) search_metadata["result_on_last"] = search_metadata["result_on_test_best"] dataIO.save_data(file_name = metadata_file_name, data_dict_to_save = search_metadata) recommender_object = CFM_wrapper(URM_train_full, CFM_data_class_full) recommender_object.load_model(output_folder_path_permutation, file_name=CFM_wrapper.RECOMMENDER_NAME + "_best_model") recommender_object.save_model(output_folder_path_permutation, file_name=CFM_wrapper.RECOMMENDER_NAME + "_best_model_last")
def runParameterSearch_Collaborative(recommender_class, URM_train, URM_train_last_test=None, metric_to_optimize="PRECISION", evaluator_validation=None, evaluator_test=None, evaluator_validation_earlystopping=None, output_folder_path="result_experiments/", parallelizeKNN=True, n_cases=35, n_random_starts=5, resume_from_saved=False, save_model="best", allow_weighting=True, similarity_type_list=None): # If directory does not exist, create if not os.path.exists(output_folder_path): os.makedirs(output_folder_path) earlystopping_keywargs = { "validation_every_n": 5, "stop_on_validation": True, "evaluator_object": evaluator_validation_earlystopping, "lower_validations_allowed": 5, "validation_metric": metric_to_optimize, } URM_train = URM_train.copy() if URM_train_last_test is not None: URM_train_last_test = URM_train_last_test.copy() try: output_file_name_root = recommender_class.RECOMMENDER_NAME parameterSearch = SearchBayesianSkopt( recommender_class, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) if recommender_class in [TopPop, GlobalEffects, Random]: """ TopPop, GlobalEffects and Random have no parameters therefore only one evaluation is needed """ parameterSearch = SearchSingleCase( recommender_class, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) if URM_train_last_test is not None: recommender_input_args_last_test = recommender_input_args.copy( ) recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[ 0] = URM_train_last_test else: recommender_input_args_last_test = None parameterSearch.search( recommender_input_args, recommender_input_args_last_test= recommender_input_args_last_test, fit_hyperparameters_values={}, output_folder_path=output_folder_path, output_file_name_root=output_file_name_root, resume_from_saved=resume_from_saved, save_model=save_model, ) return ########################################################################################################## if recommender_class in [ItemKNNCFRecommender, UserKNNCFRecommender]: if similarity_type_list is None: similarity_type_list = [ 'cosine', 'jaccard', "asymmetric", "dice", "tversky" ] recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) if URM_train_last_test is not None: recommender_input_args_last_test = recommender_input_args.copy( ) recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[ 0] = URM_train_last_test else: recommender_input_args_last_test = None run_KNNCFRecommender_on_similarity_type_partial = partial( run_KNNRecommender_on_similarity_type, recommender_input_args=recommender_input_args, parameter_search_space={}, parameterSearch=parameterSearch, n_cases=n_cases, n_random_starts=n_random_starts, resume_from_saved=resume_from_saved, save_model=save_model, output_folder_path=output_folder_path, output_file_name_root=output_file_name_root, metric_to_optimize=metric_to_optimize, allow_weighting=allow_weighting, recommender_input_args_last_test= recommender_input_args_last_test) if parallelizeKNN: pool = multiprocessing.Pool( processes=multiprocessing.cpu_count(), maxtasksperchild=1) pool.map(run_KNNCFRecommender_on_similarity_type_partial, similarity_type_list) pool.close() pool.join() else: for similarity_type in similarity_type_list: run_KNNCFRecommender_on_similarity_type_partial( similarity_type) return ########################################################################################################## if recommender_class is P3alphaRecommender: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["topK"] = Integer(5, 1000) hyperparameters_range_dictionary["alpha"] = Real(low=0, high=2, prior='uniform') hyperparameters_range_dictionary[ "normalize_similarity"] = Categorical([True, False]) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) ########################################################################################################## if recommender_class is RP3betaRecommender: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["topK"] = Integer(5, 1000) hyperparameters_range_dictionary["alpha"] = Real(low=0, high=2, prior='uniform') hyperparameters_range_dictionary["beta"] = Real(low=0, high=2, prior='uniform') hyperparameters_range_dictionary[ "normalize_similarity"] = Categorical([True, False]) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) ########################################################################################################## if recommender_class is MatrixFactorization_FunkSVD_Cython: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["sgd_mode"] = Categorical( ["sgd", "adagrad", "adam"]) hyperparameters_range_dictionary["epochs"] = Categorical([500]) hyperparameters_range_dictionary["use_bias"] = Categorical( [True, False]) hyperparameters_range_dictionary["batch_size"] = Categorical( [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024]) hyperparameters_range_dictionary["num_factors"] = Integer(1, 200) hyperparameters_range_dictionary["item_reg"] = Real( low=1e-5, high=1e-2, prior='log-uniform') hyperparameters_range_dictionary["user_reg"] = Real( low=1e-5, high=1e-2, prior='log-uniform') hyperparameters_range_dictionary["learning_rate"] = Real( low=1e-4, high=1e-1, prior='log-uniform') hyperparameters_range_dictionary[ "negative_interactions_quota"] = Real(low=0.0, high=0.5, prior='uniform') recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS=earlystopping_keywargs) ########################################################################################################## if recommender_class is MatrixFactorization_AsySVD_Cython: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["sgd_mode"] = Categorical( ["sgd", "adagrad", "adam"]) hyperparameters_range_dictionary["epochs"] = Categorical([500]) hyperparameters_range_dictionary["use_bias"] = Categorical( [True, False]) hyperparameters_range_dictionary["batch_size"] = Categorical([1]) hyperparameters_range_dictionary["num_factors"] = Integer(1, 200) hyperparameters_range_dictionary["item_reg"] = Real( low=1e-5, high=1e-2, prior='log-uniform') hyperparameters_range_dictionary["user_reg"] = Real( low=1e-5, high=1e-2, prior='log-uniform') hyperparameters_range_dictionary["learning_rate"] = Real( low=1e-4, high=1e-1, prior='log-uniform') hyperparameters_range_dictionary[ "negative_interactions_quota"] = Real(low=0.0, high=0.5, prior='uniform') recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS=earlystopping_keywargs) ########################################################################################################## if recommender_class is MatrixFactorization_BPR_Cython: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["sgd_mode"] = Categorical( ["sgd", "adagrad", "adam"]) hyperparameters_range_dictionary["epochs"] = Categorical([1500]) hyperparameters_range_dictionary["num_factors"] = Integer(1, 200) hyperparameters_range_dictionary["batch_size"] = Categorical( [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024]) hyperparameters_range_dictionary["positive_reg"] = Real( low=1e-5, high=1e-2, prior='log-uniform') hyperparameters_range_dictionary["negative_reg"] = Real( low=1e-5, high=1e-2, prior='log-uniform') hyperparameters_range_dictionary["learning_rate"] = Real( low=1e-4, high=1e-1, prior='log-uniform') recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={ **earlystopping_keywargs, "positive_threshold_BPR": None }) ########################################################################################################## if recommender_class is IALSRecommender: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["num_factors"] = Integer(1, 200) hyperparameters_range_dictionary[ "confidence_scaling"] = Categorical(["linear", "log"]) hyperparameters_range_dictionary["alpha"] = Real( low=1e-3, high=50.0, prior='log-uniform') hyperparameters_range_dictionary["epsilon"] = Real( low=1e-3, high=10.0, prior='log-uniform') hyperparameters_range_dictionary["reg"] = Real(low=1e-5, high=1e-2, prior='log-uniform') recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS=earlystopping_keywargs) ########################################################################################################## if recommender_class is PureSVDRecommender: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["num_factors"] = Integer(1, 350) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) ########################################################################################################## if recommender_class is NMFRecommender: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["num_factors"] = Integer(1, 350) hyperparameters_range_dictionary["solver"] = Categorical( ["coordinate_descent", "multiplicative_update"]) hyperparameters_range_dictionary["init_type"] = Categorical( ["random", "nndsvda"]) hyperparameters_range_dictionary["beta_loss"] = Categorical( ["frobenius", "kullback-leibler"]) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) ######################################################################################################### if recommender_class is SLIM_BPR_Cython: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["topK"] = Integer(5, 1000) hyperparameters_range_dictionary["epochs"] = Categorical([1500]) hyperparameters_range_dictionary["symmetric"] = Categorical( [True, False]) hyperparameters_range_dictionary["sgd_mode"] = Categorical( ["sgd", "adagrad", "adam"]) hyperparameters_range_dictionary["lambda_i"] = Real( low=1e-5, high=1e-2, prior='log-uniform') hyperparameters_range_dictionary["lambda_j"] = Real( low=1e-5, high=1e-2, prior='log-uniform') hyperparameters_range_dictionary["learning_rate"] = Real( low=1e-4, high=1e-1, prior='log-uniform') recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={ **earlystopping_keywargs, "positive_threshold_BPR": None, 'train_with_sparse_weights': None }) ########################################################################################################## if recommender_class is SLIMElasticNetRecommender: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["topK"] = Integer(5, 1000) hyperparameters_range_dictionary["l1_ratio"] = Real( low=1e-5, high=1.0, prior='log-uniform') hyperparameters_range_dictionary["alpha"] = Real(low=1e-3, high=1.0, prior='uniform') recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) ######################################################################################################### if URM_train_last_test is not None: recommender_input_args_last_test = recommender_input_args.copy() recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[ 0] = URM_train_last_test else: recommender_input_args_last_test = None ## Final step, after the hyperparameter range has been defined for each type of algorithm parameterSearch.search( recommender_input_args, parameter_search_space=hyperparameters_range_dictionary, n_cases=n_cases, n_random_starts=n_random_starts, resume_from_saved=resume_from_saved, save_model=save_model, output_folder_path=output_folder_path, output_file_name_root=output_file_name_root, metric_to_optimize=metric_to_optimize, recommender_input_args_last_test=recommender_input_args_last_test) except Exception as e: print("On recommender {} Exception {}".format(recommender_class, str(e))) traceback.print_exc() error_file = open(output_folder_path + "ErrorLog.txt", "a") error_file.write("On recommender {} Exception {}\n".format( recommender_class, str(e))) error_file.close()
URM_train, URM_validation, URM_test = dataSplitter_fold.get_holdout_split() UCM_CoupledCF = dataSplitter_fold.get_UCM_from_name("UCM_all") ICM_CoupledCF = dataSplitter_fold.get_ICM_from_name("ICM_all") # Ensure negative items are consistent with positive items, accounting for removed cold users URM_test_negative_fold = get_URM_negatives_without_cold_users(dataSplitter_fold.removed_cold_users, URM_test_negative) # ensure IMPLICIT data assert_implicit_data([URM_train, URM_validation, URM_test, URM_test_negative_fold]) assert_disjoint_matrices([URM_train, URM_validation, URM_test]) evaluator_validation = EvaluatorNegativeItemSample(URM_validation, URM_test_negative_fold, cutoff_list=cutoff_list_validation) evaluator_test = EvaluatorNegativeItemSample(URM_test, URM_test_negative_fold, cutoff_list=cutoff_list_test) recommender_input_args = SearchInputRecommenderArgs(CONSTRUCTOR_POSITIONAL_ARGS=[URM_train, UCM_CoupledCF, ICM_CoupledCF]) # Ablation with training on selected mode for map_mode in ["all_map", "main_diagonal", "off_diagonal"]: result_folder_path = os.path.join(output_folder_path, "fit_ablation_{}/{}_{}/".format(map_mode, map_mode, fold_index)) search_metadata = run_train_with_early_stopping(input_flags.dataset_name, URM_train, URM_validation, UCM_CoupledCF, ICM_CoupledCF, evaluator_validation, evaluator_test, metric_to_optimize, result_folder_path, map_mode = map_mode)
def read_data_split_and_search(dataset_name, flag_baselines_tune=False, flag_DL_article_default=False, flag_DL_tune=False, flag_print_results=False): from Conferences.WWW.NeuMF_our_interface.Movielens1M.Movielens1MReader import Movielens1MReader from Conferences.WWW.NeuMF_our_interface.Pinterest.PinterestICCVReader import PinterestICCVReader result_folder_path = "result_experiments/{}/{}_{}/".format( CONFERENCE_NAME, ALGORITHM_NAME, dataset_name) if dataset_name == "movielens1m": dataset = Movielens1MReader(result_folder_path) elif dataset_name == "pinterest": dataset = PinterestICCVReader(result_folder_path) URM_train = dataset.URM_DICT["URM_train"].copy() URM_validation = dataset.URM_DICT["URM_validation"].copy() URM_test = dataset.URM_DICT["URM_test"].copy() URM_test_negative = dataset.URM_DICT["URM_test_negative"].copy() # Ensure IMPLICIT data and DISJOINT sets assert_implicit_data( [URM_train, URM_validation, URM_test, URM_test_negative]) assert_disjoint_matrices([URM_train, URM_validation, URM_test]) assert_disjoint_matrices([URM_train, URM_validation, URM_test_negative]) # If directory does not exist, create if not os.path.exists(result_folder_path): os.makedirs(result_folder_path) algorithm_dataset_string = "{}_{}_".format(ALGORITHM_NAME, dataset_name) plot_popularity_bias([URM_train + URM_validation, URM_test], ["Training data", "Test data"], result_folder_path + algorithm_dataset_string + "popularity_plot") save_popularity_statistics([ URM_train + URM_validation + URM_test, URM_train + URM_validation, URM_test ], ["Full data", "Training data", "Test data"], result_folder_path + algorithm_dataset_string + "popularity_statistics") collaborative_algorithm_list = [ Random, TopPop, UserKNNCFRecommender, ItemKNNCFRecommender, P3alphaRecommender, RP3betaRecommender, PureSVDRecommender, NMFRecommender, IALSRecommender, MatrixFactorization_BPR_Cython, MatrixFactorization_FunkSVD_Cython, EASE_R_Recommender, SLIM_BPR_Cython, SLIMElasticNetRecommender, ] metric_to_optimize = "HIT_RATE" n_cases = 50 n_random_starts = 15 from Base.Evaluation.Evaluator import EvaluatorNegativeItemSample evaluator_validation = EvaluatorNegativeItemSample(URM_validation, URM_test_negative, cutoff_list=[10]) evaluator_test = EvaluatorNegativeItemSample( URM_test, URM_test_negative, cutoff_list=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) runParameterSearch_Collaborative_partial = partial( runParameterSearch_Collaborative, URM_train=URM_train, URM_train_last_test=URM_train + URM_validation, metric_to_optimize=metric_to_optimize, evaluator_validation_earlystopping=evaluator_validation, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=result_folder_path, parallelizeKNN=False, allow_weighting=True, resume_from_saved=True, n_cases=n_cases, n_random_starts=n_random_starts) if flag_baselines_tune: for recommender_class in collaborative_algorithm_list: try: runParameterSearch_Collaborative_partial(recommender_class) except Exception as e: print("On recommender {} Exception {}".format( recommender_class, str(e))) traceback.print_exc() ################################################################################################ ###### ###### DL ALGORITHM ###### if flag_DL_article_default: try: if dataset_name == "movielens1m": num_factors = 64 elif dataset_name == "pinterest": num_factors = 16 neuMF_article_hyperparameters = { "epochs": 100, "epochs_gmf": 100, "epochs_mlp": 100, "batch_size": 256, "num_factors": num_factors, "layers": [num_factors * 4, num_factors * 2, num_factors], "reg_mf": 0.0, "reg_layers": [0, 0, 0], "num_negatives": 4, "learning_rate": 1e-3, "learning_rate_pretrain": 1e-3, "learner": "sgd", "learner_pretrain": "adam", "pretrain": True } neuMF_earlystopping_hyperparameters = { "validation_every_n": 5, "stop_on_validation": True, "evaluator_object": evaluator_validation, "lower_validations_allowed": 5, "validation_metric": metric_to_optimize } parameterSearch = SearchSingleCase( NeuMF_RecommenderWrapper, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], FIT_KEYWORD_ARGS=neuMF_earlystopping_hyperparameters) recommender_input_args_last_test = recommender_input_args.copy() recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[ 0] = URM_train + URM_validation parameterSearch.search( recommender_input_args, recommender_input_args_last_test= recommender_input_args_last_test, fit_hyperparameters_values=neuMF_article_hyperparameters, output_folder_path=result_folder_path, resume_from_saved=True, output_file_name_root=NeuMF_RecommenderWrapper.RECOMMENDER_NAME ) except Exception as e: print("On recommender {} Exception {}".format( NeuMF_RecommenderWrapper, str(e))) traceback.print_exc() ################################################################################################ ###### ###### PRINT RESULTS ###### if flag_print_results: n_test_users = np.sum(np.ediff1d(URM_test.indptr) >= 1) file_name = "{}..//{}_{}_".format(result_folder_path, ALGORITHM_NAME, dataset_name) result_loader = ResultFolderLoader( result_folder_path, base_algorithm_list=None, other_algorithm_list=[NeuMF_RecommenderWrapper], KNN_similarity_list=KNN_similarity_to_report_list, ICM_names_list=None, UCM_names_list=None) result_loader.generate_latex_results( file_name + "{}_latex_results.txt".format("article_metrics"), metrics_list=["HIT_RATE", "NDCG"], cutoffs_list=[1, 5, 10], table_title=None, highlight_best=True) result_loader.generate_latex_results( file_name + "{}_latex_results.txt".format("all_metrics"), metrics_list=[ "PRECISION", "RECALL", "MAP", "MRR", "NDCG", "F1", "HIT_RATE", "ARHR", "NOVELTY", "DIVERSITY_MEAN_INTER_LIST", "DIVERSITY_HERFINDAHL", "COVERAGE_ITEM", "DIVERSITY_GINI", "SHANNON_ENTROPY" ], cutoffs_list=[10], table_title=None, highlight_best=True) result_loader.generate_latex_time_statistics( file_name + "{}_latex_results.txt".format("time"), n_evaluation_users=n_test_users, table_title=None)
def read_data_split_and_search(dataset_variant, train_interactions, flag_baselines_tune=False, flag_DL_article_default=False, flag_DL_tune=False, flag_print_results=False): from Conferences.KDD.CollaborativeVAE_our_interface.Citeulike.CiteulikeReader import CiteulikeReader result_folder_path = "result_experiments/{}/{}_citeulike_{}_{}/".format( CONFERENCE_NAME, ALGORITHM_NAME, dataset_variant, train_interactions) dataset = CiteulikeReader(result_folder_path, dataset_variant=dataset_variant, train_interactions=train_interactions) URM_train = dataset.URM_DICT["URM_train"].copy() URM_validation = dataset.URM_DICT["URM_validation"].copy() URM_test = dataset.URM_DICT["URM_test"].copy() del dataset.ICM_DICT["ICM_tokens_bool"] # Ensure IMPLICIT data assert_implicit_data([URM_train, URM_validation, URM_test]) # Due to the sparsity of the dataset, choosing an evaluation as subset of the train # While keeping validation interaction in the train set if train_interactions == 1: # In this case the train data will contain validation data to avoid cold users assert_disjoint_matrices([URM_train, URM_test]) assert_disjoint_matrices([URM_validation, URM_test]) exclude_seen_validation = False URM_train_last_test = URM_train else: assert_disjoint_matrices([URM_train, URM_validation, URM_test]) exclude_seen_validation = True URM_train_last_test = URM_train + URM_validation assert_implicit_data([URM_train_last_test]) # If directory does not exist, create if not os.path.exists(result_folder_path): os.makedirs(result_folder_path) from Base.Evaluation.Evaluator import EvaluatorHoldout evaluator_validation = EvaluatorHoldout( URM_validation, cutoff_list=[150], exclude_seen=exclude_seen_validation) evaluator_test = EvaluatorHoldout( URM_test, cutoff_list=[50, 100, 150, 200, 250, 300]) collaborative_algorithm_list = [ Random, TopPop, UserKNNCFRecommender, ItemKNNCFRecommender, P3alphaRecommender, RP3betaRecommender, PureSVDRecommender, NMFRecommender, IALSRecommender, MatrixFactorization_BPR_Cython, MatrixFactorization_FunkSVD_Cython, EASE_R_Recommender, SLIM_BPR_Cython, SLIMElasticNetRecommender, ] metric_to_optimize = "RECALL" n_cases = 50 n_random_starts = 15 runParameterSearch_Collaborative_partial = partial( runParameterSearch_Collaborative, URM_train=URM_train, URM_train_last_test=URM_train_last_test, metric_to_optimize=metric_to_optimize, evaluator_validation_earlystopping=evaluator_validation, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=result_folder_path, parallelizeKNN=False, allow_weighting=True, resume_from_saved=True, n_cases=n_cases, n_random_starts=n_random_starts) if flag_baselines_tune: for recommender_class in collaborative_algorithm_list: try: runParameterSearch_Collaborative_partial(recommender_class) except Exception as e: print("On recommender {} Exception {}".format( recommender_class, str(e))) traceback.print_exc() ################################################################################################ ###### Content Baselines for ICM_name, ICM_object in dataset.ICM_DICT.items(): try: runParameterSearch_Content( ItemKNNCBFRecommender, URM_train=URM_train, URM_train_last_test=URM_train_last_test, metric_to_optimize=metric_to_optimize, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=result_folder_path, parallelizeKNN=False, allow_weighting=True, resume_from_saved=True, ICM_name=ICM_name, ICM_object=ICM_object.copy(), n_cases=n_cases, n_random_starts=n_random_starts) except Exception as e: print("On CBF recommender for ICM {} Exception {}".format( ICM_name, str(e))) traceback.print_exc() ################################################################################################ ###### Hybrid for ICM_name, ICM_object in dataset.ICM_DICT.items(): try: runParameterSearch_Hybrid( ItemKNN_CFCBF_Hybrid_Recommender, URM_train=URM_train, URM_train_last_test=URM_train_last_test, metric_to_optimize=metric_to_optimize, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=result_folder_path, parallelizeKNN=False, allow_weighting=True, resume_from_saved=True, ICM_name=ICM_name, ICM_object=ICM_object.copy(), n_cases=n_cases, n_random_starts=n_random_starts) except Exception as e: print("On recommender {} Exception {}".format( ItemKNN_CFCBF_Hybrid_Recommender, str(e))) traceback.print_exc() ################################################################################################ ###### ###### DL ALGORITHM ###### if flag_DL_article_default: try: cvae_recommender_article_hyperparameters = { "epochs": 200, "learning_rate_vae": 1e-2, "learning_rate_cvae": 1e-3, "num_factors": 50, "dimensions_vae": [200, 100], "epochs_vae": [50, 50], "batch_size": 128, "lambda_u": 0.1, "lambda_v": 10, "lambda_r": 1, "a": 1, "b": 0.01, "M": 300, } cvae_earlystopping_hyperparameters = { "validation_every_n": 5, "stop_on_validation": True, "evaluator_object": evaluator_validation, "lower_validations_allowed": 5, "validation_metric": metric_to_optimize } parameterSearch = SearchSingleCase( CollaborativeVAE_RecommenderWrapper, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[ URM_train, dataset.ICM_DICT["ICM_tokens_TFIDF"] ], FIT_KEYWORD_ARGS=cvae_earlystopping_hyperparameters) recommender_input_args_last_test = recommender_input_args.copy() recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[ 0] = URM_train_last_test parameterSearch.search( recommender_input_args, recommender_input_args_last_test= recommender_input_args_last_test, fit_hyperparameters_values= cvae_recommender_article_hyperparameters, output_folder_path=result_folder_path, resume_from_saved=True, output_file_name_root=CollaborativeVAE_RecommenderWrapper. RECOMMENDER_NAME) except Exception as e: print("On recommender {} Exception {}".format( CollaborativeVAE_RecommenderWrapper, str(e))) traceback.print_exc() ################################################################################################ ###### ###### PRINT RESULTS ###### if flag_print_results: n_test_users = np.sum(np.ediff1d(URM_test.indptr) >= 1) ICM_names_to_report_list = list(dataset.ICM_DICT.keys()) dataset_name = "{}_{}".format(dataset_variant, train_interactions) file_name = "{}..//{}_{}_".format(result_folder_path, ALGORITHM_NAME, dataset_name) result_loader = ResultFolderLoader( result_folder_path, base_algorithm_list=None, other_algorithm_list=other_algorithm_list, KNN_similarity_list=KNN_similarity_to_report_list, ICM_names_list=ICM_names_to_report_list, UCM_names_list=None) result_loader.generate_latex_results( file_name + "{}_latex_results.txt".format("article_metrics"), metrics_list=["RECALL"], cutoffs_list=[50, 100, 150, 200, 250, 300], table_title=None, highlight_best=True) result_loader.generate_latex_results( file_name + "{}_latex_results.txt".format("all_metrics"), metrics_list=[ "PRECISION", "RECALL", "MAP_MIN_DEN", "MRR", "NDCG", "F1", "HIT_RATE", "ARHR_ALL_HITS", "NOVELTY", "DIVERSITY_MEAN_INTER_LIST", "DIVERSITY_HERFINDAHL", "COVERAGE_ITEM", "DIVERSITY_GINI", "SHANNON_ENTROPY" ], cutoffs_list=[150], table_title=None, highlight_best=True) result_loader.generate_latex_time_statistics( file_name + "{}_latex_results.txt".format("time"), n_evaluation_users=n_test_users, table_title=None)
hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["alpha"] = Categorical(weight_list) hyperparameters_range_dictionary["beta"] = Categorical(weight_list) hyperparameters_range_dictionary["gamma"] = Categorical(weight_list) hyperparameters_range_dictionary["delta"] = Categorical(weight_list) hyperparameters_range_dictionary["epsilon"] = Categorical([ 0.1, 0.2, 0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6, 0.65, 0.7, 0.8, 0.9, 1, 2 ]) hyperparameters_range_dictionary["zeta"] = Categorical(weight_list) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[ URM_train, itemKNNCF, recommenderELASTIC, recommenderCB, recommenderBetaGRAPH, recommederUserKNN, recommenderCYTHON ], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) output_folder_path = "result_experiments/" # If directory does not exist, create if not os.path.exists(output_folder_path): os.makedirs(output_folder_path) # RUN n_cases = 45 metric_to_optimize = "MAP" parameterSearch.search( recommender_input_args, parameter_search_space=hyperparameters_range_dictionary,
def runParameterSearch_Content(recommender_class, URM_train, ICM_object, ICM_name, URM_train_last_test=None, n_cases=30, n_random_starts=5, evaluator_validation=None, evaluator_test=None, metric_to_optimize="PRECISION", output_folder_path="result_experiments/", parallelizeKNN=False, allow_weighting=True, similarity_type_list=None, allow_bias_ICM=False): # If directory does not exist, create if not os.path.exists(output_folder_path): os.makedirs(output_folder_path) ########################################################################################################## output_file_name_root = recommender_class.RECOMMENDER_NAME + "_{}".format( ICM_name) parameterSearch = SearchBayesianSkopt( recommender_class, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) if similarity_type_list is None: similarity_type_list = [ 'cosine', 'jaccard', "asymmetric", "dice", "tversky" ] recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[ICM_object, URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) if URM_train_last_test is not None: recommender_input_args_last_test = recommender_input_args.copy() recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[ 1] = URM_train_last_test else: recommender_input_args_last_test = None run_KNNCBFRecommender_on_similarity_type_partial = partial( run_KNNRecommender_on_similarity_type, recommender_input_args=recommender_input_args, parameter_search_space={}, parameterSearch=parameterSearch, n_cases=n_cases, n_random_starts=n_random_starts, output_folder_path=output_folder_path, output_file_name_root=output_file_name_root, metric_to_optimize=metric_to_optimize, allow_weighting=allow_weighting, allow_bias_ICM=allow_bias_ICM, recommender_input_args_last_test=recommender_input_args_last_test) if parallelizeKNN: pool = multiprocessing.Pool(processes=int(multiprocessing.cpu_count()), maxtasksperchild=1) pool.map(run_KNNCBFRecommender_on_similarity_type_partial, similarity_type_list) pool.close() pool.join() else: for similarity_type in similarity_type_list: run_KNNCBFRecommender_on_similarity_type_partial(similarity_type)
def read_data_split_and_search(dataset_name, flag_baselines_tune=False, flag_DL_article_default=False, flag_DL_tune=False, flag_print_results=False): result_folder_path = "result_experiments/{}/{}_{}/".format( CONFERENCE_NAME, ALGORITHM_NAME, dataset_name) if dataset_name == "movielens1m": dataset = Movielens1MReader(result_folder_path) article_hyperparameters = { 'num_neurons': 300, 'num_factors': 50, 'dropout_percentage': 0.03, 'learning_rate': 1e-4, 'regularization_rate': 0.1, 'epochs': 1500, 'batch_size': 1024, 'display_epoch': None, 'display_step': None, 'verbose': True } early_stopping_epochs_min = 800 elif dataset_name == "hetrec": dataset = MovielensHetrec2011Reader(result_folder_path) article_hyperparameters = { 'num_neurons': 300, 'num_factors': 50, 'dropout_percentage': 0.03, 'learning_rate': 1e-4, 'regularization_rate': 0.1, 'epochs': 1500, 'batch_size': 1024, 'display_epoch': None, 'display_step': None, 'verbose': True } early_stopping_epochs_min = 800 elif dataset_name == "filmtrust": dataset = FilmTrustReader(result_folder_path) article_hyperparameters = { 'num_neurons': 150, 'num_factors': 40, 'dropout_percentage': 0.00, 'learning_rate': 5e-5, 'regularization_rate': 0.1, 'epochs': 100, 'batch_size': 1024, 'display_epoch': None, 'display_step': None, 'verbose': True } early_stopping_epochs_min = 0 elif dataset_name == "frappe": dataset = FrappeReader(result_folder_path) article_hyperparameters = { 'num_neurons': 300, 'num_factors': 50, 'dropout_percentage': 0.03, 'learning_rate': 1e-4, 'regularization_rate': 0.01, 'epochs': 100, 'batch_size': 1024, 'display_epoch': None, 'display_step': None, 'verbose': True } early_stopping_epochs_min = 0 print('Current dataset is: {}'.format(dataset_name)) URM_train = dataset.URM_DICT["URM_train"].copy() URM_validation = dataset.URM_DICT["URM_validation"].copy() URM_test = dataset.URM_DICT["URM_test"].copy() # Ensure IMPLICIT data from Utils.assertions_on_data_for_experiments import assert_implicit_data, assert_disjoint_matrices assert_implicit_data([URM_train, URM_validation, URM_test]) assert_disjoint_matrices([URM_train, URM_validation, URM_test]) # If directory does not exist, create if not os.path.exists(result_folder_path): os.makedirs(result_folder_path) collaborative_algorithm_list = [ Random, TopPop, UserKNNCFRecommender, ItemKNNCFRecommender, P3alphaRecommender, RP3betaRecommender, PureSVDRecommender, NMFRecommender, IALSRecommender, MatrixFactorization_BPR_Cython, MatrixFactorization_FunkSVD_Cython, EASE_R_Recommender, SLIM_BPR_Cython, SLIMElasticNetRecommender, ] metric_to_optimize = "NDCG" n_cases = 50 n_random_starts = 15 from Base.Evaluation.Evaluator import EvaluatorHoldout # use max cutoff to compute full MAP and NDCG max_cutoff = URM_train.shape[1] - 1 cutoff_list_validation = [10] cutoff_list_test = [5, 10, 50, max_cutoff] evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=cutoff_list_validation) evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=cutoff_list_test) runParameterSearch_Collaborative_partial = partial( runParameterSearch_Collaborative, URM_train=URM_train, URM_train_last_test=URM_train + URM_validation, metric_to_optimize=metric_to_optimize, evaluator_validation_earlystopping=evaluator_validation, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=result_folder_path, parallelizeKNN=False, allow_weighting=True, resume_from_saved=True, n_cases=n_cases, n_random_starts=n_random_starts) if flag_baselines_tune: for recommender_class in collaborative_algorithm_list: try: runParameterSearch_Collaborative_partial(recommender_class) except Exception as e: print("On recommender {} Exception {}".format( recommender_class, str(e))) traceback.print_exc() ################################################################################################ ###### ###### DL ALGORITHM ###### if flag_DL_article_default: earlystopping_hyperparameters = { 'validation_every_n': 5, 'stop_on_validation': True, 'lower_validations_allowed': 20, 'evaluator_object': evaluator_validation, 'validation_metric': metric_to_optimize, 'epochs_min': early_stopping_epochs_min } try: parameterSearch = SearchSingleCase( UNeuRec_RecommenderWrapper, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], FIT_KEYWORD_ARGS=earlystopping_hyperparameters) recommender_input_args_last_test = recommender_input_args.copy() recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[ 0] = URM_train + URM_validation parameterSearch.search( recommender_input_args, recommender_input_args_last_test= recommender_input_args_last_test, fit_hyperparameters_values=article_hyperparameters, output_folder_path=result_folder_path, resume_from_saved=True, output_file_name_root=UNeuRec_RecommenderWrapper. RECOMMENDER_NAME) except Exception as e: print("On recommender {} Exception {}".format( UNeuRec_RecommenderWrapper, str(e))) traceback.print_exc() try: parameterSearch = SearchSingleCase( INeuRec_RecommenderWrapper, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], FIT_KEYWORD_ARGS=earlystopping_hyperparameters) recommender_input_args_last_test = recommender_input_args.copy() recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[ 0] = URM_train + URM_validation parameterSearch.search( recommender_input_args, recommender_input_args_last_test= recommender_input_args_last_test, fit_hyperparameters_values=article_hyperparameters, output_folder_path=result_folder_path, resume_from_saved=True, output_file_name_root=INeuRec_RecommenderWrapper. RECOMMENDER_NAME) except Exception as e: print("On recommender {} Exception {}".format( INeuRec_RecommenderWrapper, str(e))) traceback.print_exc() # if isUNeuRec_tune: # # try: # # runParameterSearch_NeuRec(UNeuRec_RecommenderWrapper, # URM_train = URM_train, # URM_train_last_test = URM_train + URM_validation, # earlystopping_hyperparameters = earlystopping_hyperparameters, # metric_to_optimize = metric_to_optimize, # evaluator_validation = evaluator_validation, # evaluator_test = evaluator_test, # result_folder_path = result_folder_path, # n_cases = n_cases, # n_random_starts = n_random_starts, # output_file_name_root = UNeuRec_RecommenderWrapper.RECOMMENDER_NAME) # # # except Exception as e: # # print("On recommender {} Exception {}".format(UNeuRec_RecommenderWrapper, str(e))) # traceback.print_exc() # # # # # # if isINeuRec_tune: # # try: # # runParameterSearch_NeuRec(INeuRec_RecommenderWrapper, # URM_train = URM_train, # URM_train_last_test = URM_train + URM_validation, # earlystopping_hyperparameters = earlystopping_hyperparameters, # metric_to_optimize = metric_to_optimize, # evaluator_validation = evaluator_validation, # evaluator_test = evaluator_test, # result_folder_path = result_folder_path, # n_cases = n_cases, # n_random_starts = n_random_starts, # output_file_name_root = INeuRec_RecommenderWrapper.RECOMMENDER_NAME) # # # except Exception as e: # # print("On recommender {} Exception {}".format(INeuRec_RecommenderWrapper, str(e))) # traceback.print_exc() # ################################################################################################ ###### ###### PRINT RESULTS ###### if flag_print_results: n_test_users = np.sum(np.ediff1d(URM_test.indptr) >= 1) file_name = "{}..//{}_{}_".format(result_folder_path, ALGORITHM_NAME, dataset_name) result_loader = ResultFolderLoader( result_folder_path, base_algorithm_list=None, other_algorithm_list=[ INeuRec_RecommenderWrapper, UNeuRec_RecommenderWrapper ], KNN_similarity_list=KNN_similarity_to_report_list, ICM_names_list=None, UCM_names_list=None) result_loader.generate_latex_results( file_name + "{}_latex_results.txt".format("article_metrics"), metrics_list=["PRECISION", "RECALL", "MAP", "NDCG", "MRR"], cutoffs_list=[5, 10, 50], table_title=None, highlight_best=True) result_loader.generate_latex_results( file_name + "{}_latex_results.txt".format("beyond_accuracy_metrics"), metrics_list=[ "DIVERSITY_MEAN_INTER_LIST", "DIVERSITY_HERFINDAHL", "COVERAGE_ITEM", "DIVERSITY_GINI", "SHANNON_ENTROPY" ], cutoffs_list=[50], table_title=None, highlight_best=True) result_loader.generate_latex_results( file_name + "{}_latex_results.txt".format("all_metrics"), metrics_list=[ "PRECISION", "RECALL", "MAP", "MRR", "NDCG", "F1", "HIT_RATE", "ARHR", "NOVELTY", "DIVERSITY_MEAN_INTER_LIST", "DIVERSITY_HERFINDAHL", "COVERAGE_ITEM", "DIVERSITY_GINI", "SHANNON_ENTROPY" ], cutoffs_list=[50], table_title=None, highlight_best=True) result_loader.generate_latex_time_statistics( file_name + "{}_latex_results.txt".format("time"), n_evaluation_users=n_test_users, table_title=None)
def runParameterSearch_Hybrid(recommender_class, URM_train, ICM_train, W_sparse_CF=None, URM_train_last_test=None, metric_to_optimize="MAP", evaluator_validation=None, evaluator_test=None, evaluator_validation_earlystopping=None, output_folder_path="result_experiments/", n_cases=35, n_random_starts=5, resume_from_saved=False, save_model="best", allow_weighting=True, similarity_type_list=None): # If directory does not exist, create if not os.path.exists(output_folder_path): os.makedirs(output_folder_path) URM_train = URM_train.copy() ICM_train = ICM_train.copy() # W_sparse_CF = W_sparse_CF.copy() if URM_train_last_test is not None: URM_train_last_test = URM_train_last_test.copy() try: output_file_name_root = recommender_class.RECOMMENDER_NAME parameterSearch = SearchBayesianSkopt( recommender_class, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) ########################################################################################################## if recommender_class in [ ScoresHybridP3alphaKNNCBF, ScoresHybridRP3betaKNNCBF, ScoresHybridSpecialized, ScoresHybridSpecializedCold, ScoresHybridSpecializedV2Cold, ScoresHybridSpecializedV2Mid, ScoresHybridSpecializedV2Warm, ScoresHybridSpecializedV2Mid12, ScoresHybridSpecializedV2Warm12, ScoresHybridSpecializedV3Cold, ScoresHybridSpecializedV3Warm ]: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["topK_P"] = Integer(5, 3000) hyperparameters_range_dictionary["alpha_P"] = Real(low=0, high=2, prior='uniform') hyperparameters_range_dictionary[ "normalize_similarity_P"] = Categorical([False]) hyperparameters_range_dictionary["topK"] = Integer(5, 3000) hyperparameters_range_dictionary["shrink"] = Integer(0, 5000) hyperparameters_range_dictionary["similarity"] = Categorical( ["tversky", "tanimoto", 'cosine', 'asymmetric']) hyperparameters_range_dictionary["normalize"] = Categorical( [True, False]) hyperparameters_range_dictionary["alpha"] = Real(low=0, high=2, prior='uniform') if recommender_class is ScoresHybridRP3betaKNNCBF: hyperparameters_range_dictionary["beta_P"] = Real( low=0, high=2, prior='uniform') if allow_weighting: hyperparameters_range_dictionary[ "feature_weighting"] = Categorical( ["none", "BM25", "TF-IDF"]) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train, ICM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) ########################################################################################################## if recommender_class in [ ScoresHybridKNNCFKNNCBF, ScoresHybridUserKNNCFKNNCBF ]: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["topK_CF"] = Integer(5, 1500) hyperparameters_range_dictionary["shrink_CF"] = Integer(0, 1500) hyperparameters_range_dictionary["similarity_CF"] = Categorical( ["tversky", "tanimoto", 'cosine', 'asymmetric']) hyperparameters_range_dictionary["normalize_CF"] = Categorical( [True, False]) hyperparameters_range_dictionary["topK"] = Integer(5, 1500) hyperparameters_range_dictionary["shrink"] = Integer(0, 1500) hyperparameters_range_dictionary["similarity"] = Categorical( ["tversky", "tanimoto", 'cosine', 'asymmetric']) hyperparameters_range_dictionary["normalize"] = Categorical( [True, False]) hyperparameters_range_dictionary["alpha"] = Real(low=0, high=1, prior='uniform') if allow_weighting: hyperparameters_range_dictionary[ "feature_weighting"] = Categorical( ["none", "BM25", "TF-IDF"]) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train, ICM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) ########################################################################################################## if recommender_class is ScoresHybridSpecializedAdaptive: hyperparameters_range_dictionary = {} # Cold users hybrid hyperparameters_range_dictionary["topK_P_C"] = Integer(5, 1500) hyperparameters_range_dictionary["alpha_P_C"] = Real( low=0, high=2, prior='uniform') hyperparameters_range_dictionary["beta_P_C"] = Real( low=0, high=2, prior='uniform') hyperparameters_range_dictionary[ "normalize_similarity_P_C"] = Categorical([False]) hyperparameters_range_dictionary["topK_C"] = Integer(5, 1500) hyperparameters_range_dictionary["shrink_C"] = Integer(0, 1500) hyperparameters_range_dictionary["similarity_C"] = Categorical( ["tversky", "tanimoto", 'cosine', 'asymmetric']) hyperparameters_range_dictionary["normalize_C"] = Categorical( [True, False]) # hyperparameters_range_dictionary["alpha_C"] = Real(low=0, high=1, prior='uniform') if allow_weighting: hyperparameters_range_dictionary[ "feature_weighting_C"] = Categorical( ["none", "BM25", "TF-IDF"]) # Warm users hybrid hyperparameters_range_dictionary["topK_P"] = Integer(5, 1500) hyperparameters_range_dictionary["alpha_P"] = Real(low=0, high=2, prior='uniform') hyperparameters_range_dictionary["beta_P"] = Real(low=0, high=2, prior='uniform') hyperparameters_range_dictionary[ "normalize_similarity_P"] = Categorical([False]) hyperparameters_range_dictionary["topK"] = Integer(5, 1500) hyperparameters_range_dictionary["shrink"] = Integer(0, 1500) hyperparameters_range_dictionary["similarity"] = Categorical( ["tversky", "tanimoto", 'cosine', 'asymmetric']) hyperparameters_range_dictionary["normalize"] = Categorical( [True, False]) # hyperparameters_range_dictionary["alpha"] = Real(low=0, high=1, prior='uniform') if allow_weighting: hyperparameters_range_dictionary[ "feature_weighting"] = Categorical( ["none", "BM25", "TF-IDF"]) hyperparameters_range_dictionary["threshold"] = Integer(1, 30) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train, ICM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) ########################################################################################################## if recommender_class is ScoresHybridP3alphaPureSVD: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["topK_P"] = Integer(5, 1000) hyperparameters_range_dictionary["alpha_P"] = Real(low=0, high=2, prior='uniform') hyperparameters_range_dictionary[ "normalize_similarity_P"] = Categorical([False]) hyperparameters_range_dictionary["num_factors"] = Integer(1, 500) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) ########################################################################################################## if recommender_class is CFW_D_Similarity_Linalg: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["topK"] = Integer(5, 1000) hyperparameters_range_dictionary["add_zeros_quota"] = Real( low=0, high=1, prior='uniform') hyperparameters_range_dictionary[ "normalize_similarity"] = Categorical([True, False]) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[ URM_train, ICM_train, W_sparse_CF ], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) ######################################################################################################### if URM_train_last_test is not None: recommender_input_args_last_test = recommender_input_args.copy() recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[ 0] = URM_train_last_test else: recommender_input_args_last_test = None ## Final step, after the hyperparameter range has been defined for each type of algorithm parameterSearch.search( recommender_input_args, parameter_search_space=hyperparameters_range_dictionary, n_cases=n_cases, n_random_starts=n_random_starts, resume_from_saved=resume_from_saved, save_model=save_model, output_folder_path=output_folder_path, output_file_name_root=output_file_name_root, metric_to_optimize=metric_to_optimize, recommender_input_args_last_test=recommender_input_args_last_test) except Exception as e: print("On recommender {} Exception {}".format(recommender_class, str(e))) traceback.print_exc() error_file = open(output_folder_path + "ErrorLog.txt", "a") error_file.write("On recommender {} Exception {}\n".format( recommender_class, str(e))) error_file.close()
def read_data_split_and_search(dataset_name, flag_baselines_tune=False, flag_DL_article_default=False, flag_DL_tune=False, flag_print_results=False): result_folder_path = "result_experiments/{}/{}_{}/".format( CONFERENCE_NAME, ALGORITHM_NAME, dataset_name) if not os.path.exists(result_folder_path): os.makedirs(result_folder_path) # Ensure both experiments use the same data dataset_folder_path = "result_experiments/{}/{}_{}/".format( CONFERENCE_NAME, ALGORITHM_NAME, dataset_name.replace("_remove_cold_items", "")) if not os.path.exists(dataset_folder_path): os.makedirs(dataset_folder_path) if 'amazon_music' in dataset_name: dataset = AmazonMusicReader(dataset_folder_path) elif 'movielens1m_ours' in dataset_name: dataset = Movielens1MReader(dataset_folder_path, type="ours") elif 'movielens1m_original' in dataset_name: dataset = Movielens1MReader(dataset_folder_path, type="original") else: print("Dataset name not supported, current is {}".format(dataset_name)) return print('Current dataset is: {}'.format(dataset_name)) URM_train = dataset.URM_DICT["URM_train"].copy() URM_validation = dataset.URM_DICT["URM_validation"].copy() URM_test = dataset.URM_DICT["URM_test"].copy() URM_test_negative = dataset.URM_DICT["URM_test_negative"].copy() # Ensure IMPLICI data and DISJOINT matrices assert_implicit_data( [URM_train, URM_validation, URM_test, URM_test_negative]) assert_disjoint_matrices( [URM_train, URM_validation, URM_test, URM_test_negative]) cold_items_statistics(URM_train, URM_validation, URM_test, URM_test_negative) algorithm_dataset_string = "{}_{}_".format(ALGORITHM_NAME, dataset_name) plot_popularity_bias([URM_train + URM_validation, URM_test], ["Training data", "Test data"], result_folder_path + algorithm_dataset_string + "popularity_plot") save_popularity_statistics([ URM_train + URM_validation + URM_test, URM_train + URM_validation, URM_test ], ["Full data", "Training data", "Test data"], result_folder_path + algorithm_dataset_string + "popularity_statistics") collaborative_algorithm_list = [ Random, TopPop, UserKNNCFRecommender, ItemKNNCFRecommender, P3alphaRecommender, RP3betaRecommender, PureSVDRecommender, NMFRecommender, IALSRecommender, MatrixFactorization_BPR_Cython, MatrixFactorization_FunkSVD_Cython, EASE_R_Recommender, SLIM_BPR_Cython, SLIMElasticNetRecommender, ] metric_to_optimize = "NDCG" n_cases = 50 n_random_starts = 15 cutoff_list_validation = [10] cutoff_list_test = [5, 10, 20] if "_remove_cold_items" in dataset_name: ignore_items_validation = get_cold_items(URM_train) ignore_items_test = get_cold_items(URM_train + URM_validation) else: ignore_items_validation = None ignore_items_test = None evaluator_validation = EvaluatorNegativeItemSample( URM_validation, URM_test_negative, cutoff_list=cutoff_list_validation, ignore_items=ignore_items_validation) evaluator_test = EvaluatorNegativeItemSample( URM_test, URM_test_negative, cutoff_list=cutoff_list_test, ignore_items=ignore_items_test) # The Evaluator automatically skips users with no test interactions # in this case we need the evaluation done with and without cold items to be comparable # So we ensure the users that are included in the evaluation are the same in both cases. evaluator_validation.users_to_evaluate = np.arange(URM_train.shape[0]) evaluator_test.users_to_evaluate = np.arange(URM_train.shape[0]) runParameterSearch_Collaborative_partial = partial( runParameterSearch_Collaborative, URM_train=URM_train, URM_train_last_test=URM_train + URM_validation, metric_to_optimize=metric_to_optimize, evaluator_validation_earlystopping=evaluator_validation, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=result_folder_path, parallelizeKNN=False, allow_weighting=True, resume_from_saved=True, n_cases=n_cases, n_random_starts=n_random_starts) if flag_baselines_tune: for recommender_class in collaborative_algorithm_list: try: runParameterSearch_Collaborative_partial(recommender_class) except Exception as e: print("On recommender {} Exception {}".format( recommender_class, str(e))) traceback.print_exc() ################################################################################################ ###### ###### DL ALGORITHM ###### if flag_DL_article_default: earlystopping_hyperparameters = { 'validation_every_n': 5, 'stop_on_validation': True, 'lower_validations_allowed': 5, 'evaluator_object': evaluator_validation, 'validation_metric': metric_to_optimize, } num_factors = 64 article_hyperparameters = { 'epochs': 500, 'learning_rate': 0.001, 'batch_size': 256, 'num_negatives': 4, 'layers': (num_factors * 4, num_factors * 2, num_factors), 'regularization_layers': (0, 0, 0), 'learner': 'adam', 'verbose': False, } parameterSearch = SearchSingleCase( DELF_MLP_RecommenderWrapper, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], FIT_KEYWORD_ARGS=earlystopping_hyperparameters) recommender_input_args_last_test = recommender_input_args.copy() recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[ 0] = URM_train + URM_validation parameterSearch.search( recommender_input_args, recommender_input_args_last_test=recommender_input_args_last_test, fit_hyperparameters_values=article_hyperparameters, output_folder_path=result_folder_path, resume_from_saved=True, output_file_name_root=DELF_MLP_RecommenderWrapper.RECOMMENDER_NAME) parameterSearch = SearchSingleCase( DELF_EF_RecommenderWrapper, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], FIT_KEYWORD_ARGS=earlystopping_hyperparameters) recommender_input_args_last_test = recommender_input_args.copy() recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[ 0] = URM_train + URM_validation parameterSearch.search( recommender_input_args, recommender_input_args_last_test=recommender_input_args_last_test, fit_hyperparameters_values=article_hyperparameters, output_folder_path=result_folder_path, resume_from_saved=True, output_file_name_root=DELF_EF_RecommenderWrapper.RECOMMENDER_NAME) ################################################################################################ ###### ###### PRINT RESULTS ###### if flag_print_results: n_test_users = np.sum(np.ediff1d(URM_test.indptr) >= 1) file_name = "{}..//{}_{}_".format(result_folder_path, ALGORITHM_NAME, dataset_name) result_loader = ResultFolderLoader( result_folder_path, base_algorithm_list=None, other_algorithm_list=[ DELF_MLP_RecommenderWrapper, DELF_EF_RecommenderWrapper ], KNN_similarity_list=KNN_similarity_to_report_list, ICM_names_list=None, UCM_names_list=None) result_loader.generate_latex_results( file_name + "{}_latex_results.txt".format("article_metrics"), metrics_list=["HIT_RATE", "NDCG"], cutoffs_list=cutoff_list_test, table_title=None, highlight_best=True) result_loader.generate_latex_results( file_name + "{}_latex_results.txt".format("all_metrics"), metrics_list=[ "PRECISION", "RECALL", "MAP_MIN_DEN", "MRR", "NDCG", "F1", "HIT_RATE", "ARHR_ALL_HITS", "NOVELTY", "DIVERSITY_MEAN_INTER_LIST", "DIVERSITY_HERFINDAHL", "COVERAGE_ITEM", "DIVERSITY_GINI", "SHANNON_ENTROPY" ], cutoffs_list=[10], table_title=None, highlight_best=True) result_loader.generate_latex_time_statistics( file_name + "{}_latex_results.txt".format("time"), n_evaluation_users=n_test_users, table_title=None)
def read_data_split_and_search(dataset_name, flag_baselines_tune=False, flag_DL_article_default=False, flag_DL_tune=False, flag_print_results=False): result_folder_path = "result_experiments/{}/{}_{}/".format( CONFERENCE_NAME, ALGORITHM_NAME, dataset_name) if dataset_name == "gowalla": dataset = GowallaReader(result_folder_path) elif dataset_name == "yelp": dataset = YelpReader(result_folder_path) else: print("Dataset name not supported, current is {}".format(dataset_name)) return print('Current dataset is: {}'.format(dataset_name)) URM_train = dataset.URM_DICT["URM_train"].copy() URM_validation = dataset.URM_DICT["URM_validation"].copy() URM_test = dataset.URM_DICT["URM_test"].copy() URM_test_negative = dataset.URM_DICT["URM_test_negative"].copy() print_negative_items_stats(URM_train, URM_validation, URM_test, URM_test_negative) # Ensure IMPLICIT data from Utils.assertions_on_data_for_experiments import assert_implicit_data, assert_disjoint_matrices assert_implicit_data( [URM_train, URM_validation, URM_test, URM_test_negative]) # URM_test_negative contains duplicates in both train and test assert_disjoint_matrices([URM_train, URM_validation, URM_test]) # If directory does not exist, create if not os.path.exists(result_folder_path): os.makedirs(result_folder_path) collaborative_algorithm_list = [ Random, TopPop, UserKNNCFRecommender, ItemKNNCFRecommender, P3alphaRecommender, RP3betaRecommender, PureSVDRecommender, NMFRecommender, IALSRecommender, MatrixFactorization_BPR_Cython, MatrixFactorization_FunkSVD_Cython, EASE_R_Recommender, SLIM_BPR_Cython, SLIMElasticNetRecommender, ] metric_to_optimize = "NDCG" n_cases = 50 n_random_starts = 15 from Base.Evaluation.Evaluator import EvaluatorNegativeItemSample cutoff_list_validation = [10] cutoff_list_test = [5, 10, 20] evaluator_validation = EvaluatorNegativeItemSample( URM_validation, URM_test_negative, cutoff_list=cutoff_list_validation) evaluator_test = EvaluatorNegativeItemSample(URM_test, URM_test_negative, cutoff_list=cutoff_list_test) runParameterSearch_Collaborative_partial = partial( runParameterSearch_Collaborative, URM_train=URM_train, URM_train_last_test=URM_train + URM_validation, metric_to_optimize=metric_to_optimize, evaluator_validation_earlystopping=evaluator_validation, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=result_folder_path, parallelizeKNN=False, allow_weighting=True, resume_from_saved=True, n_cases=n_cases, n_random_starts=n_random_starts) if flag_baselines_tune: for recommender_class in collaborative_algorithm_list: try: runParameterSearch_Collaborative_partial(recommender_class) except Exception as e: print("On recommender {} Exception {}".format( recommender_class, str(e))) traceback.print_exc() ################################################################################################ ###### ###### DL ALGORITHM ###### if flag_DL_article_default: # Providing an empty matrix to URM_negative for the train samples article_hyperparameters = { "batch_size": 512, "epochs": 1500, "epochs_MFBPR": 500, "embedding_size": 64, "hidden_size": 128, "negative_sample_per_positive": 1, "negative_instances_per_positive": 4, "regularization_users_items": 0.01, "regularization_weights": 10, "regularization_filter_weights": 1, "learning_rate_embeddings": 0.05, "learning_rate_CNN": 0.05, "channel_size": [32, 32, 32, 32, 32, 32], "dropout": 0.0, "epoch_verbose": 1, } earlystopping_hyperparameters = { "validation_every_n": 5, "stop_on_validation": True, "lower_validations_allowed": 5, "evaluator_object": evaluator_validation, "validation_metric": metric_to_optimize, "epochs_min": 150 } parameterSearch = SearchSingleCase( ConvNCF_RecommenderWrapper, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], FIT_KEYWORD_ARGS=earlystopping_hyperparameters) recommender_input_args_last_test = recommender_input_args.copy() recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[ 0] = URM_train + URM_validation parameterSearch.search( recommender_input_args, recommender_input_args_last_test=recommender_input_args_last_test, fit_hyperparameters_values=article_hyperparameters, output_folder_path=result_folder_path, resume_from_saved=True, output_file_name_root=ConvNCF_RecommenderWrapper.RECOMMENDER_NAME) #remember to close the global session since use global variables ConvNCF.close_session(verbose=True) ################################################################################################ ###### ###### PRINT RESULTS ###### if flag_print_results: n_test_users = np.sum(np.ediff1d(URM_test.indptr) >= 1) file_name = "{}..//{}_{}_".format(result_folder_path, ALGORITHM_NAME, dataset_name) result_loader = ResultFolderLoader( result_folder_path, base_algorithm_list=None, other_algorithm_list=[ConvNCF_RecommenderWrapper], KNN_similarity_list=KNN_similarity_to_report_list, ICM_names_list=None, UCM_names_list=None) result_loader.generate_latex_results( file_name + "{}_latex_results.txt".format("article_metrics"), metrics_list=["HIT_RATE", "NDCG"], cutoffs_list=cutoff_list_test, table_title=None, highlight_best=True) result_loader.generate_latex_results( file_name + "{}_latex_results.txt".format("all_metrics"), metrics_list=[ "PRECISION", "RECALL", "MAP_MIN_DEN", "MRR", "NDCG", "F1", "HIT_RATE", "ARHR_ALL_HITS", "NOVELTY", "DIVERSITY_MEAN_INTER_LIST", "DIVERSITY_HERFINDAHL", "COVERAGE_ITEM", "DIVERSITY_GINI", "SHANNON_ENTROPY" ], cutoffs_list=cutoff_list_validation, table_title=None, highlight_best=True) result_loader.generate_latex_time_statistics( file_name + "{}_latex_results.txt".format("time"), n_evaluation_users=n_test_users, table_title=None)
def read_data_split_and_search(dataset_name, cold_start=False, cold_items=None, flag_baselines_tune=False, flag_DL_article_default=False, flag_DL_tune=False, flag_print_results=False): if not cold_start: result_folder_path = "result_experiments/{}/{}_{}/".format( CONFERENCE_NAME, ALGORITHM_NAME, dataset_name) else: result_folder_path = "result_experiments/{}/{}_cold_{}_{}/".format( CONFERENCE_NAME, ALGORITHM_NAME, cold_items, dataset_name) if dataset_name == "movielens1m_original": assert (cold_start is not True) dataset = Movielens1MReader(result_folder_path, type="original") elif dataset_name == "movielens1m_ours": dataset = Movielens1MReader(result_folder_path, type="ours", cold_start=cold_start, cold_items=cold_items) elif dataset_name == "hetrec": assert (cold_start is not True) dataset = MovielensHetrec2011Reader(result_folder_path) elif dataset_name == "amazon_instant_video": assert (cold_start is not True) dataset = AmazonInstantVideoReader(result_folder_path) URM_train = dataset.URM_DICT["URM_train"].copy() URM_validation = dataset.URM_DICT["URM_validation"].copy() URM_test = dataset.URM_DICT["URM_test"].copy() # Ensure IMPLICIT data and DISJOINT sets assert_implicit_data([URM_train, URM_validation, URM_test]) assert_disjoint_matrices([URM_train, URM_validation, URM_test]) # If directory does not exist, create if not os.path.exists(result_folder_path): os.makedirs(result_folder_path) algorithm_dataset_string = "{}_{}_".format(ALGORITHM_NAME, dataset_name) plot_popularity_bias([URM_train + URM_validation, URM_test], ["Train data", "Test data"], result_folder_path + algorithm_dataset_string + "popularity_plot") save_popularity_statistics([ URM_train + URM_validation + URM_test, URM_train + URM_validation, URM_test ], ["URM_all", "URM train", "URM test"], result_folder_path + algorithm_dataset_string + "popularity_statistics") metric_to_optimize = "RECALL" n_cases = 50 n_random_starts = 15 from Base.Evaluation.Evaluator import EvaluatorHoldout if not cold_start: cutoff_list_validation = [50] cutoff_list_test = [20, 30, 40, 50, 60, 70, 80, 90, 100] else: cutoff_list_validation = [20] cutoff_list_test = [20] evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=cutoff_list_validation) evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=cutoff_list_test) ################################################################################################ ###### KNN CF collaborative_algorithm_list = [ Random, TopPop, UserKNNCFRecommender, ItemKNNCFRecommender, P3alphaRecommender, RP3betaRecommender, PureSVDRecommender, NMFRecommender, IALSRecommender, MatrixFactorization_BPR_Cython, MatrixFactorization_FunkSVD_Cython, EASE_R_Recommender, SLIM_BPR_Cython, SLIMElasticNetRecommender, ] runParameterSearch_Collaborative_partial = partial( runParameterSearch_Collaborative, URM_train=URM_train, URM_train_last_test=URM_train + URM_validation, metric_to_optimize=metric_to_optimize, evaluator_validation_earlystopping=evaluator_validation, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=result_folder_path, parallelizeKNN=False, allow_weighting=True, resume_from_saved=True, n_cases=n_cases, n_random_starts=n_random_starts) if flag_baselines_tune: for recommender_class in collaborative_algorithm_list: try: runParameterSearch_Collaborative_partial(recommender_class) except Exception as e: print("On recommender {} Exception {}".format( recommender_class, str(e))) traceback.print_exc() ################################################################################################ ###### ###### DL ALGORITHM ###### if flag_DL_article_default: try: spectralCF_article_hyperparameters = { "epochs": 1000, "batch_size": 1024, "embedding_size": 16, "decay": 0.001, "k": 3, "learning_rate": 1e-3, } spectralCF_earlystopping_hyperparameters = { "validation_every_n": 5, "stop_on_validation": True, "lower_validations_allowed": 5, "evaluator_object": evaluator_validation, "validation_metric": metric_to_optimize, "epochs_min": 400, } parameterSearch = SearchSingleCase( SpectralCF_RecommenderWrapper, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], FIT_KEYWORD_ARGS=spectralCF_earlystopping_hyperparameters) recommender_input_args_last_test = recommender_input_args.copy() recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[ 0] = URM_train + URM_validation parameterSearch.search( recommender_input_args, recommender_input_args_last_test= recommender_input_args_last_test, fit_hyperparameters_values=spectralCF_article_hyperparameters, output_folder_path=result_folder_path, resume_from_saved=True, output_file_name_root=SpectralCF_RecommenderWrapper. RECOMMENDER_NAME + "_article_default") except Exception as e: print("On recommender {} Exception {}".format( SpectralCF_RecommenderWrapper, str(e))) traceback.print_exc() if flag_DL_tune: try: spectralCF_earlystopping_hyperparameters = { "validation_every_n": 5, "stop_on_validation": True, "lower_validations_allowed": 5, "evaluator_object": evaluator_validation, "validation_metric": metric_to_optimize, "epochs_min": 400, "epochs": 2000 } runParameterSearch_SpectralCF( SpectralCF_RecommenderWrapper, URM_train=URM_train, URM_train_last_test=URM_train + URM_validation, earlystopping_hyperparameters= spectralCF_earlystopping_hyperparameters, metric_to_optimize=metric_to_optimize, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=result_folder_path, n_cases=n_cases, n_random_starts=n_random_starts, output_file_name_root=SpectralCF_RecommenderWrapper. RECOMMENDER_NAME) except Exception as e: print("On recommender {} Exception {}".format( SpectralCF_RecommenderWrapper, str(e))) traceback.print_exc() ################################################################################################ ###### ###### PRINT RESULTS ###### if flag_print_results: n_test_users = np.sum(np.ediff1d(URM_test.indptr) >= 1) file_name = "{}..//{}_{}_".format( result_folder_path, ALGORITHM_NAME if not cold_start else "{}_cold_{}".format( ALGORITHM_NAME, cold_items), dataset_name) if cold_start: cutoffs_to_report_list = [20] else: cutoffs_to_report_list = [20, 40, 60, 80, 100] result_loader = ResultFolderLoader( result_folder_path, base_algorithm_list=None, other_algorithm_list=other_algorithm_list, KNN_similarity_list=KNN_similarity_to_report_list, ICM_names_list=None, UCM_names_list=None) result_loader.generate_latex_results( file_name + "{}_latex_results.txt".format("article_metrics"), metrics_list=["RECALL", "MAP"], cutoffs_list=cutoffs_to_report_list, table_title=None, highlight_best=True) result_loader.generate_latex_results( file_name + "{}_latex_results.txt".format("beyond_accuracy_metrics"), metrics_list=[ "DIVERSITY_MEAN_INTER_LIST", "DIVERSITY_HERFINDAHL", "COVERAGE_ITEM", "DIVERSITY_GINI", "SHANNON_ENTROPY" ], cutoffs_list=[50], table_title=None, highlight_best=True) result_loader.generate_latex_results( file_name + "{}_latex_results.txt".format("all_metrics"), metrics_list=[ "PRECISION", "RECALL", "MAP", "MRR", "NDCG", "F1", "HIT_RATE", "ARHR", "NOVELTY", "DIVERSITY_MEAN_INTER_LIST", "DIVERSITY_HERFINDAHL", "COVERAGE_ITEM", "DIVERSITY_GINI", "SHANNON_ENTROPY" ], cutoffs_list=[50], table_title=None, highlight_best=True) result_loader.generate_latex_time_statistics( file_name + "{}_latex_results.txt".format("time"), n_evaluation_users=n_test_users, table_title=None)
def read_data_split_and_search(dataset_name, flag_baselines_tune=False, flag_DL_article_default=False, flag_DL_tune=False, flag_print_results=False): result_folder_path = "result_experiments/IJCAI/CoupledCF_{}/".format( dataset_name) #Logger(path=result_folder_path, name_file='CoupledCF_' + dataset_name) if dataset_name.startswith("movielens1m"): if dataset_name.endswith("_original"): dataset = Movielens1MReader(result_folder_path, type='original') elif dataset_name.endswith("_ours"): dataset = Movielens1MReader(result_folder_path, type='ours') else: print("Dataset name not supported, current is {}".format( dataset_name)) return UCM_to_report = ["UCM_all"] ICM_to_report = ["ICM_all"] UCM_CoupledCF = dataset.ICM_DICT["UCM_all"] ICM_CoupledCF = dataset.ICM_DICT["ICM_all"] elif dataset_name.startswith("tafeng"): if dataset_name.endswith("_original"): dataset = TafengReader(result_folder_path, type='original') elif dataset_name.endswith("_ours"): dataset = TafengReader(result_folder_path, type='ours') else: print("Dataset name not supported, current is {}".format( dataset_name)) return UCM_to_report = ["UCM_all"] ICM_to_report = ["ICM_original"] UCM_CoupledCF = dataset.ICM_DICT["UCM_all"] ICM_CoupledCF = dataset.ICM_DICT["ICM_original"] else: print("Dataset name not supported, current is {}".format(dataset_name)) return print('Current dataset is: {}'.format(dataset_name)) UCM_dict = { UCM_name: UCM_object for (UCM_name, UCM_object) in dataset.ICM_DICT.items() if "UCM" in UCM_name } ICM_dict = { UCM_name: UCM_object for (UCM_name, UCM_object) in dataset.ICM_DICT.items() if "ICM" in UCM_name } URM_train = dataset.URM_DICT["URM_train"].copy() URM_validation = dataset.URM_DICT["URM_validation"].copy() URM_test = dataset.URM_DICT["URM_test"].copy() URM_test_negative = dataset.URM_DICT["URM_test_negative"].copy() # Matrices are 1-indexed, so remove first row print_negative_items_stats(URM_train[1:], URM_validation[1:], URM_test[1:], URM_test_negative[1:]) # Ensure IMPLICIT data from Utils.assertions_on_data_for_experiments import assert_implicit_data, assert_disjoint_matrices assert_implicit_data( [URM_train, URM_validation, URM_test, URM_test_negative]) assert_disjoint_matrices([URM_train, URM_validation, URM_test]) # If directory does not exist, create if not os.path.exists(result_folder_path): os.makedirs(result_folder_path) collaborative_algorithm_list = [ Random, TopPop, UserKNNCFRecommender, ItemKNNCFRecommender, P3alphaRecommender, RP3betaRecommender, PureSVDRecommender, NMFRecommender, IALSRecommender, MatrixFactorization_BPR_Cython, MatrixFactorization_FunkSVD_Cython, EASE_R_Recommender, SLIM_BPR_Cython, SLIMElasticNetRecommender, ] metric_to_optimize = "NDCG" n_cases = 50 n_random_starts = 15 from Base.Evaluation.Evaluator import EvaluatorNegativeItemSample cutoff_list_validation = [5] cutoff_list_test = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] evaluator_validation = EvaluatorNegativeItemSample( URM_validation, URM_test_negative, cutoff_list=cutoff_list_validation) evaluator_test = EvaluatorNegativeItemSample(URM_test, URM_test_negative, cutoff_list=cutoff_list_test) runParameterSearch_Collaborative_partial = partial( runParameterSearch_Collaborative, URM_train=URM_train, URM_train_last_test=URM_train + URM_validation, metric_to_optimize=metric_to_optimize, evaluator_validation_earlystopping=evaluator_validation, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=result_folder_path, parallelizeKNN=False, allow_weighting=True, resume_from_saved=True, n_cases=n_cases, n_random_starts=n_random_starts) if flag_baselines_tune: for recommender_class in collaborative_algorithm_list: try: runParameterSearch_Collaborative_partial(recommender_class) except Exception as e: print("On recommender {} Exception {}".format( recommender_class, str(e))) traceback.print_exc() ############################################################################################### ##### Item Content Baselines for ICM_name, ICM_object in ICM_dict.items(): try: runParameterSearch_Content( ItemKNNCBFRecommender, URM_train=URM_train, URM_train_last_test=URM_train + URM_validation, metric_to_optimize=metric_to_optimize, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=result_folder_path, parallelizeKNN=False, allow_weighting=True, resume_from_saved=True, ICM_name=ICM_name, ICM_object=ICM_object.copy(), n_cases=n_cases, n_random_starts=n_random_starts) runParameterSearch_Hybrid( ItemKNN_CFCBF_Hybrid_Recommender, URM_train=URM_train, URM_train_last_test=URM_train + URM_validation, metric_to_optimize=metric_to_optimize, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=result_folder_path, parallelizeKNN=False, allow_weighting=True, resume_from_saved=True, ICM_name=ICM_name, ICM_object=ICM_object.copy(), n_cases=n_cases, n_random_starts=n_random_starts) except Exception as e: print("On CBF recommender for ICM {} Exception {}".format( ICM_name, str(e))) traceback.print_exc() ################################################################################################ ###### User Content Baselines for UCM_name, UCM_object in UCM_dict.items(): try: runParameterSearch_Content( UserKNNCBFRecommender, URM_train=URM_train, URM_train_last_test=URM_train + URM_validation, metric_to_optimize=metric_to_optimize, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=result_folder_path, parallelizeKNN=False, allow_weighting=True, resume_from_saved=True, ICM_name=UCM_name, ICM_object=UCM_object.copy(), n_cases=n_cases, n_random_starts=n_random_starts) runParameterSearch_Hybrid( UserKNN_CFCBF_Hybrid_Recommender, URM_train=URM_train, URM_train_last_test=URM_train + URM_validation, metric_to_optimize=metric_to_optimize, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=result_folder_path, parallelizeKNN=False, allow_weighting=True, resume_from_saved=True, ICM_name=UCM_name, ICM_object=UCM_object.copy(), n_cases=n_cases, n_random_starts=n_random_starts) except Exception as e: print("On CBF recommender for UCM {} Exception {}".format( UCM_name, str(e))) traceback.print_exc() ################################################################################################ ###### ###### DL ALGORITHM ###### if flag_DL_article_default: model_name = dataset.DATASET_NAME earlystopping_hyperparameters = { 'validation_every_n': 5, 'stop_on_validation': True, 'lower_validations_allowed': 5, 'evaluator_object': evaluator_validation, 'validation_metric': metric_to_optimize } if 'tafeng' in dataset_name: model_number = 3 article_hyperparameters = { 'learning_rate': 0.005, 'epochs': 100, 'n_negative_sample': 4, 'temp_file_folder': None, 'dataset_name': model_name, 'number_model': model_number, 'verbose': 0, 'plot_model': False, } else: # movielens1m and other dataset model_number = 3 article_hyperparameters = { 'learning_rate': 0.001, 'epochs': 100, 'n_negative_sample': 4, 'temp_file_folder': None, 'dataset_name': model_name, 'number_model': model_number, 'verbose': 0, 'plot_model': False, } parameterSearch = SearchSingleCase( DeepCF_RecommenderWrapper, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], FIT_KEYWORD_ARGS=earlystopping_hyperparameters) recommender_input_args_last_test = recommender_input_args.copy() recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[ 0] = URM_train + URM_validation parameterSearch.search( recommender_input_args, recommender_input_args_last_test=recommender_input_args_last_test, fit_hyperparameters_values=article_hyperparameters, output_folder_path=result_folder_path, resume_from_saved=True, output_file_name_root=DeepCF_RecommenderWrapper.RECOMMENDER_NAME) if 'tafeng' in dataset_name: # tafeng model has a different structure model_number = 2 article_hyperparameters = { 'learning_rate': 0.005, 'epochs': 100, 'n_negative_sample': 4, 'temp_file_folder': None, 'dataset_name': "Tafeng", 'number_model': model_number, 'verbose': 0, 'plot_model': False, } else: # movielens1m use this tructure with model 2 model_number = 2 article_hyperparameters = { 'learning_rate': 0.001, 'epochs': 100, 'n_negative_sample': 4, 'temp_file_folder': None, 'dataset_name': "Movielens1M", 'number_model': model_number, 'verbose': 0, 'plot_model': False, } parameterSearch = SearchSingleCase( CoupledCF_RecommenderWrapper, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[ URM_train, UCM_CoupledCF, ICM_CoupledCF ], FIT_KEYWORD_ARGS=earlystopping_hyperparameters) recommender_input_args_last_test = recommender_input_args.copy() recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[ 0] = URM_train + URM_validation parameterSearch.search( recommender_input_args, recommender_input_args_last_test=recommender_input_args_last_test, fit_hyperparameters_values=article_hyperparameters, output_folder_path=result_folder_path, resume_from_saved=True, output_file_name_root=CoupledCF_RecommenderWrapper.RECOMMENDER_NAME ) ################################################################################################ ###### ###### PRINT RESULTS ###### if flag_print_results: n_test_users = np.sum(np.ediff1d(URM_test.indptr) >= 1) file_name = "{}..//{}_{}_".format(result_folder_path, ALGORITHM_NAME, dataset_name) result_loader = ResultFolderLoader( result_folder_path, base_algorithm_list=None, other_algorithm_list=[ DeepCF_RecommenderWrapper, CoupledCF_RecommenderWrapper ], KNN_similarity_list=KNN_similarity_to_report_list, ICM_names_list=ICM_to_report, UCM_names_list=UCM_to_report) result_loader.generate_latex_results( file_name + "{}_latex_results.txt".format("article_metrics"), metrics_list=["HIT_RATE", "NDCG"], cutoffs_list=[1, 5, 10], table_title=None, highlight_best=True) result_loader.generate_latex_results( file_name + "{}_latex_results.txt".format("beyond_accuracy_metrics"), metrics_list=[ "DIVERSITY_MEAN_INTER_LIST", "DIVERSITY_HERFINDAHL", "COVERAGE_ITEM", "DIVERSITY_GINI", "SHANNON_ENTROPY" ], cutoffs_list=[5], table_title=None, highlight_best=True) result_loader.generate_latex_results( file_name + "{}_latex_results.txt".format("all_metrics"), metrics_list=[ "PRECISION", "RECALL", "MAP_MIN_DEN", "MRR", "NDCG", "F1", "HIT_RATE", "ARHR_ALL_HITS", "NOVELTY", "DIVERSITY_MEAN_INTER_LIST", "DIVERSITY_HERFINDAHL", "COVERAGE_ITEM", "DIVERSITY_GINI", "SHANNON_ENTROPY" ], cutoffs_list=[5], table_title=None, highlight_best=True) result_loader.generate_latex_time_statistics( file_name + "{}_latex_results.txt".format("time"), n_evaluation_users=n_test_users, table_title=None)
def runParameterSearch_SpectralCF(recommender_class, URM_train, earlystopping_hyperparameters, output_file_name_root, URM_train_last_test=None, n_cases=35, n_random_starts=5, evaluator_validation=None, evaluator_test=None, metric_to_optimize="RECALL", output_folder_path="result_experiments/"): # If directory does not exist, create if not os.path.exists(output_folder_path): os.makedirs(output_folder_path) parameterSearch = SearchBayesianSkopt( recommender_class, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) ########################################################################################################## if recommender_class is SpectralCF_RecommenderWrapper: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["batch_size"] = Categorical( [128, 256, 512, 1024, 2048]) hyperparameters_range_dictionary["embedding_size"] = Categorical( [4, 8, 16, 32]) hyperparameters_range_dictionary["decay"] = Real(low=1e-5, high=1e-1, prior='log-uniform') hyperparameters_range_dictionary["learning_rate"] = Real( low=1e-5, high=1e-2, prior='log-uniform') hyperparameters_range_dictionary["k"] = Integer(low=1, high=6) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS=earlystopping_hyperparameters) ######################################################################################################### if URM_train_last_test is not None: recommender_input_args_last_test = recommender_input_args.copy() recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[ 0] = URM_train_last_test else: recommender_input_args_last_test = None parameterSearch.search( recommender_input_args, parameter_search_space=hyperparameters_range_dictionary, n_cases=n_cases, n_random_starts=n_random_starts, resume_from_saved=True, output_folder_path=output_folder_path, output_file_name_root=output_file_name_root, metric_to_optimize=metric_to_optimize, recommender_input_args_last_test=recommender_input_args_last_test)
def read_data_split_and_search(dataset_variant, train_interactions, flag_baselines_tune=False, flag_DL_article_default=False, flag_DL_tune=False, flag_print_results=False): # Using dataReader from CollaborativeVAE_our_interface as they use the same data in the same way from Conferences.KDD.CollaborativeVAE_our_interface.Citeulike.CiteulikeReader import CiteulikeReader result_folder_path = "result_experiments/{}/{}_citeulike_{}_{}/".format( CONFERENCE_NAME, ALGORITHM_NAME, dataset_variant, train_interactions) result_folder_path_CollaborativeVAE = "result_experiments/{}/{}_citeulike_{}_{}/".format( CONFERENCE_NAME, "CollaborativeVAE", dataset_variant, train_interactions) dataset = CiteulikeReader(result_folder_path_CollaborativeVAE, dataset_variant=dataset_variant, train_interactions=train_interactions) URM_train = dataset.URM_DICT["URM_train"].copy() URM_validation = dataset.URM_DICT["URM_validation"].copy() URM_test = dataset.URM_DICT["URM_test"].copy() # Ensure IMPLICIT data assert_implicit_data([URM_train, URM_validation, URM_test]) # Due to the sparsity of the dataset, choosing an evaluation as subset of the train # While keepning validation interaction in the train set if train_interactions == 1: # In this case the train data will contain validation data to avoid cold users assert_disjoint_matrices([URM_train, URM_test]) assert_disjoint_matrices([URM_validation, URM_test]) exclude_seen_validation = False URM_train_last_test = URM_train else: assert_disjoint_matrices([URM_train, URM_validation, URM_test]) exclude_seen_validation = True URM_train_last_test = URM_train + URM_validation assert_implicit_data([URM_train_last_test]) # If directory does not exist, create if not os.path.exists(result_folder_path): os.makedirs(result_folder_path) from Base.Evaluation.Evaluator import EvaluatorHoldout evaluator_validation = EvaluatorHoldout( URM_validation, cutoff_list=[150], exclude_seen=exclude_seen_validation) evaluator_test = EvaluatorHoldout( URM_test, cutoff_list=[50, 100, 150, 200, 250, 300]) ################################################################################################ ###### ###### DL ALGORITHM ###### if flag_DL_article_default: try: collaborativeDL_article_hyperparameters = { "para_lv": 10, "para_lu": 1, "para_ln": 1e3, "batch_size": 128, "epoch_sdae": 200, "epoch_dae": 200, } parameterSearch = SearchSingleCase( CollaborativeDL_Matlab_RecommenderWrapper, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[ URM_train, dataset.ICM_DICT["ICM_tokens_TFIDF"] ], FIT_KEYWORD_ARGS={}) recommender_input_args_last_test = recommender_input_args.copy() recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[ 0] = URM_train_last_test parameterSearch.search( recommender_input_args, recommender_input_args_last_test= recommender_input_args_last_test, fit_hyperparameters_values= collaborativeDL_article_hyperparameters, output_folder_path=result_folder_path, resume_from_saved=True, output_file_name_root=CollaborativeDL_Matlab_RecommenderWrapper .RECOMMENDER_NAME) except Exception as e: print("On recommender {} Exception {}".format( CollaborativeDL_Matlab_RecommenderWrapper, str(e))) traceback.print_exc() ################################################################################################ ###### ###### PRINT RESULTS ###### if flag_print_results: n_test_users = np.sum(np.ediff1d(URM_test.indptr) >= 1) ICM_names_to_report_list = list(dataset.ICM_DICT.keys()) dataset_name = "{}_{}".format(dataset_variant, train_interactions) file_name = "{}..//{}_{}_".format(result_folder_path, ALGORITHM_NAME, dataset_name) result_loader = ResultFolderLoader( result_folder_path, base_algorithm_list=None, other_algorithm_list=[CollaborativeDL_Matlab_RecommenderWrapper], KNN_similarity_list=KNN_similarity_to_report_list, ICM_names_list=ICM_names_to_report_list, UCM_names_list=None) result_loader.generate_latex_results( file_name + "{}_latex_results.txt".format("article_metrics"), metrics_list=["RECALL"], cutoffs_list=[50, 100, 150, 200, 250, 300], table_title=None, highlight_best=True) result_loader.generate_latex_results( file_name + "{}_latex_results.txt".format("all_metrics"), metrics_list=[ "PRECISION", "RECALL", "MAP_MIN_DEN", "MRR", "NDCG", "F1", "HIT_RATE", "ARHR_ALL_HITS", "NOVELTY", "DIVERSITY_MEAN_INTER_LIST", "DIVERSITY_HERFINDAHL", "COVERAGE_ITEM", "DIVERSITY_GINI", "SHANNON_ENTROPY" ], cutoffs_list=[150], table_title=None, highlight_best=True) result_loader.generate_latex_time_statistics( file_name + "{}_latex_results.txt".format("time"), n_evaluation_users=n_test_users, table_title=None)
# hyperparameters_range_dictionary["normalize"] = Categorical([True, False]) # hyperparameters_range_dictionary["tversky_alpha"] = Real(0, 1) # hyperparameters_range_dictionary["tversky_beta"] = Real(0, 1) # hyperparameters_range_dictionary = {} # hyperparameters_range_dictionary["topK"] = Integer(5, 2000) # hyperparameters_range_dictionary["add_zeros_quota"] = Real(low = 0, high = 1, prior = 'uniform') # hyperparameters_range_dictionary["normalize_similarity"] = Categorical([True, False]) # ucm_w = sps.load_npz('Data/ucm_weighted.npz') # ucm_age, ucm_region, ucm_all = Data.get_ucm() recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[urm_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={} ) output_folder_path = "result_experiments/" import os # If directory does not exist, create if not os.path.exists(output_folder_path): os.makedirs(output_folder_path) n_cases = 300 metric_to_optimize = "MAP" parameterSearch.search(recommender_input_args, parameter_search_space = hyperparameters_range_dictionary,
def runParameterSearch_cold_user_MF(recommender_class, URM_train, URM_train_last_test = None, metric_to_optimize = "PRECISION", evaluator_validation = None, evaluator_test = None, evaluator_validation_earlystopping = None, output_folder_path ="result_experiments/", n_cases = 35, n_random_starts = 5, resume_from_saved = True): # If directory does not exist, create if not os.path.exists(output_folder_path): os.makedirs(output_folder_path) earlystopping_keywargs = {"validation_every_n": 5, "stop_on_validation": True, "evaluator_object": evaluator_validation_earlystopping, "lower_validations_allowed": 5, "validation_metric": metric_to_optimize, } URM_train = URM_train.copy() if URM_train_last_test is not None: URM_train_last_test = URM_train_last_test.copy() try: output_file_name_root = recommender_class.RECOMMENDER_NAME ########################################################################################################## if recommender_class is MatrixFactorization_FunkSVD_Cython: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["sgd_mode"] = Categorical(["sgd", "adagrad", "adam"]) hyperparameters_range_dictionary["epochs"] = Categorical([500]) hyperparameters_range_dictionary["use_bias"] = Categorical([True, False]) hyperparameters_range_dictionary["batch_size"] = Categorical([1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024]) hyperparameters_range_dictionary["num_factors"] = Integer(1, 200) hyperparameters_range_dictionary["item_reg"] = Real(low = 1e-5, high = 1e-2, prior = 'log-uniform') hyperparameters_range_dictionary["user_reg"] = Real(low = 1e-5, high = 1e-2, prior = 'log-uniform') hyperparameters_range_dictionary["learning_rate"] = Real(low = 1e-4, high = 1e-1, prior = 'log-uniform') hyperparameters_range_dictionary["negative_interactions_quota"] = Real(low = 0.0, high = 0.5, prior = 'uniform') recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS = [recommender_class, URM_train], CONSTRUCTOR_KEYWORD_ARGS = {}, FIT_POSITIONAL_ARGS = [], FIT_KEYWORD_ARGS = earlystopping_keywargs ) ########################################################################################################## if recommender_class is MatrixFactorization_AsySVD_Cython: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["sgd_mode"] = Categorical(["sgd", "adagrad", "adam"]) hyperparameters_range_dictionary["epochs"] = Categorical([500]) hyperparameters_range_dictionary["use_bias"] = Categorical([True, False]) hyperparameters_range_dictionary["batch_size"] = Categorical([1]) hyperparameters_range_dictionary["num_factors"] = Integer(1, 200) hyperparameters_range_dictionary["item_reg"] = Real(low = 1e-5, high = 1e-2, prior = 'log-uniform') hyperparameters_range_dictionary["user_reg"] = Real(low = 1e-5, high = 1e-2, prior = 'log-uniform') hyperparameters_range_dictionary["learning_rate"] = Real(low = 1e-4, high = 1e-1, prior = 'log-uniform') hyperparameters_range_dictionary["negative_interactions_quota"] = Real(low = 0.0, high = 0.5, prior = 'uniform') recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS = [recommender_class, URM_train], CONSTRUCTOR_KEYWORD_ARGS = {}, FIT_POSITIONAL_ARGS = [], FIT_KEYWORD_ARGS = earlystopping_keywargs ) ########################################################################################################## if recommender_class is MatrixFactorization_BPR_Cython: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["sgd_mode"] = Categorical(["sgd", "adagrad", "adam"]) hyperparameters_range_dictionary["epochs"] = Categorical([1500]) hyperparameters_range_dictionary["num_factors"] = Integer(1, 200) hyperparameters_range_dictionary["batch_size"] = Categorical([1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024]) hyperparameters_range_dictionary["positive_reg"] = Real(low = 1e-5, high = 1e-2, prior = 'log-uniform') hyperparameters_range_dictionary["negative_reg"] = Real(low = 1e-5, high = 1e-2, prior = 'log-uniform') hyperparameters_range_dictionary["learning_rate"] = Real(low = 1e-4, high = 1e-1, prior = 'log-uniform') recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS = [recommender_class, URM_train], CONSTRUCTOR_KEYWORD_ARGS = {}, FIT_POSITIONAL_ARGS = [], FIT_KEYWORD_ARGS = {**earlystopping_keywargs, "positive_threshold_BPR": None} ) ########################################################################################################## if recommender_class is IALSRecommender: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["num_factors"] = Integer(1, 200) hyperparameters_range_dictionary["confidence_scaling"] = Categorical(["linear", "log"]) hyperparameters_range_dictionary["alpha"] = Real(low = 1e-3, high = 50.0, prior = 'log-uniform') hyperparameters_range_dictionary["epsilon"] = Real(low = 1e-3, high = 10.0, prior = 'log-uniform') hyperparameters_range_dictionary["reg"] = Real(low = 1e-5, high = 1e-2, prior = 'log-uniform') recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS = [recommender_class, URM_train], CONSTRUCTOR_KEYWORD_ARGS = {}, FIT_POSITIONAL_ARGS = [], FIT_KEYWORD_ARGS = earlystopping_keywargs ) ########################################################################################################## if recommender_class is PureSVDRecommender: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["num_factors"] = Integer(1, 350) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS = [recommender_class, URM_train], CONSTRUCTOR_KEYWORD_ARGS = {}, FIT_POSITIONAL_ARGS = [], FIT_KEYWORD_ARGS = {} ) ########################################################################################################## if recommender_class is NMFRecommender: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["num_factors"] = Integer(1, 350) hyperparameters_range_dictionary["solver"] = Categorical(["coordinate_descent", "multiplicative_update"]) hyperparameters_range_dictionary["init_type"] = Categorical(["random", "nndsvda"]) hyperparameters_range_dictionary["beta_loss"] = Categorical(["frobenius", "kullback-leibler"]) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS = [recommender_class, URM_train], CONSTRUCTOR_KEYWORD_ARGS = {}, FIT_POSITIONAL_ARGS = [], FIT_KEYWORD_ARGS = {} ) ######################################################################################################### if URM_train_last_test is not None: recommender_input_args_last_test = recommender_input_args.copy() recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[1] = URM_train_last_test else: recommender_input_args_last_test = None parameterSearch = SearchBayesianSkopt(MF_cold_user_wrapper, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) hyperparameters_range_dictionary["estimate_model_for_cold_users"] = Categorical(["itemKNN", "mean_item_factors"]) hyperparameters_range_dictionary["estimate_model_for_cold_users_topK"] = Integer(5, 1000) ## Final step, after the hyperparameter range has been defined for each type of algorithm parameterSearch.search(recommender_input_args, parameter_search_space = hyperparameters_range_dictionary, n_cases = n_cases, n_random_starts = n_random_starts, output_folder_path = output_folder_path, output_file_name_root = output_file_name_root, metric_to_optimize = metric_to_optimize, resume_from_saved = resume_from_saved, recommender_input_args_last_test = recommender_input_args_last_test) except Exception as e: print("On recommender {} Exception {}".format(recommender_class, str(e))) traceback.print_exc() error_file = open(output_folder_path + "ErrorLog.txt", "a") error_file.write("On recommender {} Exception {}\n".format(recommender_class, str(e))) error_file.close()
############################### ############################### EVALUATION ABLATION EXPERIMENT ############################### ################################################################################################################################################ if input_flags.run_eval_ablation: for permutation_index in range(input_flags.n_permutations): # Run evaluation of the full map fitted model with the different interaction map modes for map_mode in ["all_map", "main_diagonal", "off_diagonal"]: input_folder_path = os.path.join(output_folder_path, "fit_ablation_{}/{}_{}/".format("all_map", "all_map", permutation_index)) result_folder_path = os.path.join(output_folder_path, "evaluation_ablation_{}/{}_{}/".format(map_mode, map_mode, permutation_index)) recommender_input_args_last_test = SearchInputRecommenderArgs(CONSTRUCTOR_POSITIONAL_ARGS=[URM_train_full, dataset.CFM_data_class_full]) run_evaluation_ablation(recommender_class = CFM_wrapper, recommender_input_args = recommender_input_args_last_test, evaluator_test = evaluator_test, input_folder_path = input_folder_path, result_folder_path = result_folder_path, map_mode = map_mode) read_permutation_results(output_folder_path, input_flags.n_permutations, 10, ["NDCG", "HIT_RATE"], file_result_name_root = "latex_evaluation_ablation_results", convolution_model_name = CFM_wrapper.RECOMMENDER_NAME, pretrained_model_name = 'FM', pretrained_model_class = FM_Wrapper,
def read_data_split_and_search(dataset_name, flag_baselines_tune = False, flag_DL_article_default = False, flag_MF_baselines_tune = False, flag_DL_tune = False, flag_print_results = False): from Conferences.WWW.MultiVAE_our_interface.Movielens20M.Movielens20MReader import Movielens20MReader from Conferences.WWW.MultiVAE_our_interface.NetflixPrize.NetflixPrizeReader import NetflixPrizeReader split_type = "cold_user" result_folder_path = "result_experiments/{}/{}_{}_{}/".format(CONFERENCE_NAME, ALGORITHM_NAME, dataset_name, split_type) if dataset_name == "movielens20m": dataset = Movielens20MReader(result_folder_path, split_type = split_type) elif dataset_name == "netflixPrize": dataset = NetflixPrizeReader(result_folder_path) # If directory does not exist, create if not os.path.exists(result_folder_path): os.makedirs(result_folder_path) metric_to_optimize = "NDCG" n_cases = 50 n_random_starts = 15 if split_type == "cold_user": collaborative_algorithm_list = [ Random, TopPop, # UserKNNCFRecommender, ItemKNNCFRecommender, P3alphaRecommender, RP3betaRecommender, # PureSVDRecommender, # IALSRecommender, # NMFRecommender, # MatrixFactorization_BPR_Cython, # MatrixFactorization_FunkSVD_Cython, EASE_R_Recommender, SLIM_BPR_Cython, SLIMElasticNetRecommender, ] URM_train = dataset.URM_DICT["URM_train"].copy() URM_train_all = dataset.URM_DICT["URM_train_all"].copy() URM_validation = dataset.URM_DICT["URM_validation"].copy() URM_test = dataset.URM_DICT["URM_test"].copy() # Ensure IMPLICIT data and DISJOINT sets assert_implicit_data([URM_train, URM_train_all, URM_validation, URM_test]) assert_disjoint_matrices([URM_train, URM_validation, URM_test]) assert_disjoint_matrices([URM_train_all, URM_validation, URM_test]) from Base.Evaluation.Evaluator import EvaluatorHoldout evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[100]) evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[20, 50, 100]) evaluator_validation = EvaluatorUserSubsetWrapper(evaluator_validation, URM_train_all) evaluator_test = EvaluatorUserSubsetWrapper(evaluator_test, URM_train_all) runParameterSearch_Collaborative_partial = partial(runParameterSearch_Collaborative, URM_train = URM_train, URM_train_last_test = URM_train + URM_validation, metric_to_optimize = metric_to_optimize, evaluator_validation_earlystopping = evaluator_validation, evaluator_validation = evaluator_validation, evaluator_test = evaluator_test, output_folder_path = result_folder_path, parallelizeKNN = False, allow_weighting = True, resume_from_saved = True, n_cases = n_cases, n_random_starts = n_random_starts) if flag_baselines_tune: for recommender_class in collaborative_algorithm_list: try: runParameterSearch_Collaborative_partial(recommender_class) except Exception as e: print("On recommender {} Exception {}".format(recommender_class, str(e))) traceback.print_exc() ################################################################################################ ###### Matrix Factorization Cold users collaborative_MF_algorithm_list = [ PureSVDRecommender, IALSRecommender, NMFRecommender, MatrixFactorization_BPR_Cython, MatrixFactorization_FunkSVD_Cython, ] runParameterSearch_cold_user_MF_partial = partial(runParameterSearch_cold_user_MF, URM_train = URM_train, URM_train_last_test = URM_train + URM_validation, metric_to_optimize = metric_to_optimize, evaluator_validation_earlystopping = evaluator_validation, evaluator_validation = evaluator_validation, evaluator_test = evaluator_test, output_folder_path = result_folder_path, resume_from_saved = True, n_cases = n_cases, n_random_starts = n_random_starts) if flag_MF_baselines_tune: for recommender_class in collaborative_MF_algorithm_list: try: runParameterSearch_cold_user_MF_partial(recommender_class) except Exception as e: print("On recommender {} Exception {}".format(recommender_class, str(e))) traceback.print_exc() ################################################################################################ ###### ###### DL ALGORITHM ###### if flag_DL_article_default: try: if dataset_name == "movielens20m": epochs = 100 elif dataset_name == "netflixPrize": epochs = 200 multiVAE_article_hyperparameters = { "epochs": epochs, "batch_size": 500, "total_anneal_steps": 200000, "p_dims": None, } multiVAE_earlystopping_hyperparameters = { "validation_every_n": 5, "stop_on_validation": True, "evaluator_object": evaluator_validation, "lower_validations_allowed": 5, "validation_metric": metric_to_optimize, } parameterSearch = SearchSingleCase(Mult_VAE_RecommenderWrapper, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS = [URM_train], FIT_KEYWORD_ARGS = multiVAE_earlystopping_hyperparameters) recommender_input_args_last_test = recommender_input_args.copy() recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[0] = URM_train + URM_validation parameterSearch.search(recommender_input_args, recommender_input_args_last_test = recommender_input_args_last_test, fit_hyperparameters_values=multiVAE_article_hyperparameters, output_folder_path = result_folder_path, resume_from_saved = True, output_file_name_root = Mult_VAE_RecommenderWrapper.RECOMMENDER_NAME) except Exception as e: print("On recommender {} Exception {}".format(Mult_VAE_RecommenderWrapper, str(e))) traceback.print_exc() ################################################################################################ ###### ###### PRINT RESULTS ###### if flag_print_results: n_test_users = np.sum(np.ediff1d(URM_test.indptr)>=1) file_name = "{}..//{}_{}_".format(result_folder_path, ALGORITHM_NAME, dataset_name) result_loader = ResultFolderLoader(result_folder_path, base_algorithm_list = None, other_algorithm_list = [Mult_VAE_RecommenderWrapper], KNN_similarity_list = KNN_similarity_to_report_list, ICM_names_list = None, UCM_names_list = None) result_loader.generate_latex_results(file_name + "{}_latex_results.txt".format("article_metrics"), metrics_list = ["RECALL", "NDCG"], cutoffs_list = [20, 50, 100], table_title = None, highlight_best = True) result_loader.generate_latex_results(file_name + "{}_latex_results.txt".format("all_metrics"), metrics_list = ["PRECISION", "RECALL", "MAP_MIN_DEN", "MRR", "NDCG", "F1", "HIT_RATE", "ARHR_ALL_HITS", "NOVELTY", "DIVERSITY_MEAN_INTER_LIST", "DIVERSITY_HERFINDAHL", "COVERAGE_ITEM", "DIVERSITY_GINI", "SHANNON_ENTROPY"], cutoffs_list = [50], table_title = None, highlight_best = True) result_loader.generate_latex_time_statistics(file_name + "{}_latex_results.txt".format("time"), n_evaluation_users=n_test_users, table_title = None)
def read_data_split_and_search(dataset_name, flag_baselines_tune=False, flag_DL_article_default=False, flag_DL_tune=False, flag_print_results=False): result_folder_path = "result_experiments/{}/{}_{}/".format( CONFERENCE_NAME, ALGORITHM_NAME, dataset_name) if dataset_name == "delicious-hetrec2011": dataset = DeliciousHetrec2011Reader(result_folder_path) elif dataset_name == "delicious-hetrec2011-cold-users": dataset = DeliciousHetrec2011ColdUsersReader(result_folder_path) elif dataset_name == "delicious-hetrec2011-cold-items": dataset = DeliciousHetrec2011ColdItemsReader(result_folder_path) elif dataset_name == "lastfm-hetrec2011": dataset = LastFMHetrec2011Reader(result_folder_path) elif dataset_name == "lastfm-hetrec2011-cold-users": dataset = LastFMHetrec2011ColdUsersReader(result_folder_path) elif dataset_name == "lastfm-hetrec2011-cold-items": dataset = LastFMHetrec2011ColdItemsReader(result_folder_path) else: print("Dataset name not supported, current is {}".format(dataset_name)) return print('Current dataset is: {}'.format(dataset_name)) URM_train = dataset.URM_DICT["URM_train"].copy() URM_validation = dataset.URM_DICT["URM_validation"].copy() URM_test = dataset.URM_DICT["URM_test"].copy() URM_negative = dataset.URM_DICT["URM_negative"].copy() UCM_train = dataset.UCM_DICT["UCM"].copy() ICM_train = dataset.ICM_DICT["ICM"].copy() if dataset_name == "delicious-hetrec2011" or dataset_name == "lastfm-hetrec2011": URM_train_last_test = URM_train + URM_validation # Ensure IMPLICIT data and disjoint test-train split assert_implicit_data([URM_train, URM_validation, URM_test]) assert_disjoint_matrices([URM_train, URM_validation, URM_test]) else: URM_train_last_test = URM_train # Ensure IMPLICIT data and disjoint test-train split assert_implicit_data([URM_train, URM_test]) assert_disjoint_matrices([URM_train, URM_test]) # If directory does not exist, create if not os.path.exists(result_folder_path): os.makedirs(result_folder_path) metric_to_optimize = "MAP" cutoff_list_validation = [5, 10, 20] cutoff_list_test = [5, 10, 20] n_cases = 50 n_random_starts = 15 evaluator_validation = EvaluatorNegativeItemSample( URM_validation, URM_negative, cutoff_list=cutoff_list_validation) evaluator_test = EvaluatorNegativeItemSample(URM_test, URM_negative, cutoff_list=cutoff_list_test) ################################################################################################ ###### ###### DL ALGORITHM ###### if flag_DL_article_default: article_hyperparameters = { "pretrain_samples": 3, "pretrain_batch_size": 200, "pretrain_iterations": 5, "embed_len": 128, "topK": 10, "fliter_theta": 16, "aggre_theta": 64, "batch_size": 400, "samples": 3, "margin": 20, "epochs": 30, "iter_without_att": 5, "directed": False, } # Do not modify earlystopping earlystopping_hyperparameters = { "validation_every_n": 5, "stop_on_validation": False, "lower_validations_allowed": 5, "evaluator_object": evaluator_validation, "validation_metric": metric_to_optimize, } # This is a simple version of the tuning code that is reported below and uses SearchSingleCase # You may use this for a simpler testing # recommender_instance = HERSWrapper(URM_train, UCM_train, ICM_train) # # recommender_instance.fit(**article_hyperparameters, # **earlystopping_hyperparameters) # # evaluator_test.evaluateRecommender(recommender_instance) # Fit the DL model, select the optimal number of epochs and save the result parameterSearch = SearchSingleCase( HERSWrapper, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train, UCM_train, ICM_train], FIT_KEYWORD_ARGS=earlystopping_hyperparameters) if dataset_name == "delicious-hetrec2011" or dataset_name == "lastfm-hetrec2011": recommender_input_args_last_test = recommender_input_args.copy() recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[ 0] = URM_train_last_test parameterSearch.search( recommender_input_args, recommender_input_args_last_test= recommender_input_args_last_test, fit_hyperparameters_values=article_hyperparameters, output_folder_path=result_folder_path, output_file_name_root=HERSWrapper.RECOMMENDER_NAME) else: parameterSearch.search( recommender_input_args, fit_hyperparameters_values=article_hyperparameters, output_folder_path=result_folder_path, output_file_name_root=HERSWrapper.RECOMMENDER_NAME) ################################################################################################ ###### ###### BASELINE ALGORITHMS - Nothing should be modified below this point ###### if flag_baselines_tune: ################################################################################################ ###### Collaborative Baselines collaborative_algorithm_list = [ Random, TopPop, ItemKNNCFRecommender, PureSVDRecommender, SLIM_BPR_Cython, ] # Running hyperparameter tuning of baslines # See if the results are reasonable and comparable to baselines reported in the paper runParameterSearch_Collaborative_partial = partial( runParameterSearch_Collaborative, URM_train=URM_train, URM_train_last_test=URM_train_last_test, metric_to_optimize=metric_to_optimize, evaluator_validation_earlystopping=evaluator_validation, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=result_folder_path, resume_from_saved=True, parallelizeKNN=False, allow_weighting=True, n_cases=n_cases, n_random_starts=n_random_starts) for recommender_class in collaborative_algorithm_list: try: runParameterSearch_Collaborative_partial(recommender_class) except Exception as e: print("On recommender {} Exception {}".format( recommender_class, str(e))) traceback.print_exc() ################################################################################################ ###### Content Baselines for ICM_name, ICM_object in dataset.ICM_DICT.items(): try: runParameterSearch_Content( ItemKNNCBFRecommender, URM_train=URM_train, URM_train_last_test=URM_train_last_test, metric_to_optimize=metric_to_optimize, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=result_folder_path, parallelizeKNN=False, allow_weighting=True, ICM_name=ICM_name, ICM_object=ICM_object.copy(), n_cases=n_cases, n_random_starts=n_random_starts) except Exception as e: print("On CBF recommender for ICM {} Exception {}".format( ICM_name, str(e))) traceback.print_exc() ################################################################################################ ###### Hybrid for ICM_name, ICM_object in dataset.ICM_DICT.items(): try: runParameterSearch_Hybrid( ItemKNN_CFCBF_Hybrid_Recommender, URM_train=URM_train, URM_train_last_test=URM_train_last_test, metric_to_optimize=metric_to_optimize, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=result_folder_path, parallelizeKNN=False, allow_weighting=True, ICM_name=ICM_name, ICM_object=ICM_object.copy(), n_cases=n_cases, n_random_starts=n_random_starts) except Exception as e: print("On recommender {} Exception {}".format( ItemKNN_CFCBF_Hybrid_Recommender, str(e))) traceback.print_exc() ################################################################################################ ###### ###### PRINT RESULTS ###### if flag_print_results: n_validation_users = np.sum(np.ediff1d(URM_test.indptr) >= 1) n_test_users = np.sum(np.ediff1d(URM_test.indptr) >= 1) print_time_statistics_latex_table( result_folder_path=result_folder_path, dataset_name=dataset_name, algorithm_name=ALGORITHM_NAME, other_algorithm_list=[HERSWrapper], KNN_similarity_to_report_list=KNN_similarity_to_report_list, n_validation_users=n_validation_users, n_test_users=n_test_users, n_decimals=2) print_results_latex_table( result_folder_path=result_folder_path, algorithm_name=ALGORITHM_NAME, file_name_suffix="article_metrics_", dataset_name=dataset_name, metrics_to_report_list=["HIT_RATE", "NDCG"], cutoffs_to_report_list=cutoff_list_test, other_algorithm_list=[HERSWrapper], KNN_similarity_to_report_list=KNN_similarity_to_report_list) print_results_latex_table( result_folder_path=result_folder_path, algorithm_name=ALGORITHM_NAME, file_name_suffix="all_metrics_", dataset_name=dataset_name, metrics_to_report_list=[ "PRECISION", "RECALL", "MAP", "MRR", "NDCG", "F1", "HIT_RATE", "ARHR", "NOVELTY", "DIVERSITY_MEAN_INTER_LIST", "DIVERSITY_HERFINDAHL", "COVERAGE_ITEM", "DIVERSITY_GINI", "SHANNON_ENTROPY" ], cutoffs_to_report_list=cutoff_list_validation, other_algorithm_list=[HERSWrapper], KNN_similarity_to_report_list=KNN_similarity_to_report_list)
def read_data_split_and_search(dataset_name, flag_baselines_tune=False, flag_DL_article_default=False, flag_DL_tune=False, flag_print_results=False): from Conferences.KDD.MCRec_our_interface.Movielens100K.Movielens100KReader import Movielens100KReader result_folder_path = "result_experiments/{}/{}_{}/".format( CONFERENCE_NAME, ALGORITHM_NAME, dataset_name) if dataset_name == "movielens100k": dataset = Movielens100KReader(result_folder_path) URM_train = dataset.URM_DICT["URM_train"].copy() URM_validation = dataset.URM_DICT["URM_validation"].copy() URM_test = dataset.URM_DICT["URM_test"].copy() URM_test_negative = dataset.URM_DICT["URM_test_negative"].copy() # Ensure IMPLICIT data and DISJOINT sets assert_implicit_data( [URM_train, URM_validation, URM_test, URM_test_negative]) assert_disjoint_matrices( [URM_train, URM_validation, URM_test, URM_test_negative]) # If directory does not exist, create if not os.path.exists(result_folder_path): os.makedirs(result_folder_path) algorithm_dataset_string = "{}_{}_".format(ALGORITHM_NAME, dataset_name) plot_popularity_bias([URM_train + URM_validation, URM_test], ["URM train", "URM test"], result_folder_path + algorithm_dataset_string + "popularity_plot") save_popularity_statistics([URM_train + URM_validation, URM_test], ["URM train", "URM test"], result_folder_path + algorithm_dataset_string + "popularity_statistics") from Base.Evaluation.Evaluator import EvaluatorNegativeItemSample evaluator_validation = EvaluatorNegativeItemSample(URM_validation, URM_test_negative, cutoff_list=[10]) evaluator_test = EvaluatorNegativeItemSample(URM_test, URM_test_negative, cutoff_list=[10]) collaborative_algorithm_list = [ Random, TopPop, UserKNNCFRecommender, ItemKNNCFRecommender, P3alphaRecommender, RP3betaRecommender, PureSVDRecommender, NMFRecommender, IALSRecommender, MatrixFactorization_BPR_Cython, MatrixFactorization_FunkSVD_Cython, EASE_R_Recommender, SLIM_BPR_Cython, SLIMElasticNetRecommender, ] metric_to_optimize = "PRECISION" n_cases = 50 n_random_starts = 15 runParameterSearch_Collaborative_partial = partial( runParameterSearch_Collaborative, URM_train=URM_train, URM_train_last_test=URM_train + URM_validation, metric_to_optimize=metric_to_optimize, evaluator_validation_earlystopping=evaluator_validation, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=result_folder_path, parallelizeKNN=False, allow_weighting=True, resume_from_saved=True, n_cases=n_cases, n_random_starts=n_random_starts) if flag_baselines_tune: for recommender_class in collaborative_algorithm_list: try: runParameterSearch_Collaborative_partial(recommender_class) except Exception as e: print("On recommender {} Exception {}".format( recommender_class, str(e))) traceback.print_exc() ################################################################################################ ###### Content Baselines for ICM_name, ICM_object in dataset.ICM_DICT.items(): try: runParameterSearch_Content( ItemKNNCBFRecommender, URM_train=URM_train, URM_train_last_test=URM_train + URM_validation, metric_to_optimize=metric_to_optimize, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=result_folder_path, parallelizeKNN=False, allow_weighting=True, resume_from_saved=True, ICM_name=ICM_name, ICM_object=ICM_object.copy(), n_cases=n_cases, n_random_starts=n_random_starts) except Exception as e: print("On CBF recommender for ICM {} Exception {}".format( ICM_name, str(e))) traceback.print_exc() ################################################################################################ ###### Hybrid for ICM_name, ICM_object in dataset.ICM_DICT.items(): try: runParameterSearch_Hybrid( ItemKNN_CFCBF_Hybrid_Recommender, URM_train=URM_train, URM_train_last_test=URM_train + URM_validation, metric_to_optimize=metric_to_optimize, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test, output_folder_path=result_folder_path, parallelizeKNN=False, allow_weighting=True, resume_from_saved=True, ICM_name=ICM_name, ICM_object=ICM_object.copy(), n_cases=n_cases, n_random_starts=n_random_starts) except Exception as e: print("On recommender {} Exception {}".format( ItemKNN_CFCBF_Hybrid_Recommender, str(e))) traceback.print_exc() ################################################################################################ ###### ###### DL ALGORITHM ###### if flag_DL_article_default: if dataset_name == "movielens100k": """ The code provided by the original authors of MCRec can be used only for the original data. Here I am passing to the Wrapper the URM_train matrix that is only required for its shape, the train will be done using the preprocessed data the original authors provided """ from Conferences.KDD.MCRec_github.code.Dataset import Dataset original_dataset_reader = Dataset( 'Conferences/KDD/MCRec_github/data/' + 'ml-100k') MCRec_article_hyperparameters = { "epochs": 200, "latent_dim": 128, "reg_latent": 0, "layers": [512, 256, 128, 64], "reg_layes": [0, 0, 0, 0], "learning_rate": 1e-3, "batch_size": 256, "num_negatives": 4, } MCRec_earlystopping_hyperparameters = { "validation_every_n": 5, "stop_on_validation": True, "evaluator_object": evaluator_validation, "lower_validations_allowed": 5, "validation_metric": metric_to_optimize } parameterSearch = SearchSingleCase( MCRecML100k_RecommenderWrapper, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[ URM_train, original_dataset_reader ], FIT_KEYWORD_ARGS=MCRec_earlystopping_hyperparameters) recommender_input_args_last_test = recommender_input_args.copy() recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[ 0] = URM_train + URM_validation parameterSearch.search( recommender_input_args, recommender_input_args_last_test= recommender_input_args_last_test, fit_hyperparameters_values=MCRec_article_hyperparameters, output_folder_path=result_folder_path, resume_from_saved=True, output_file_name_root=MCRecML100k_RecommenderWrapper. RECOMMENDER_NAME) ################################################################################################ ###### ###### PRINT RESULTS ###### if flag_print_results: n_test_users = np.sum(np.ediff1d(URM_test.indptr) >= 1) file_name = "{}..//{}_{}_".format(result_folder_path, ALGORITHM_NAME, dataset_name) ICM_names_to_report_list = list(dataset.ICM_DICT.keys()) result_loader = ResultFolderLoader( result_folder_path, base_algorithm_list=None, other_algorithm_list=[MCRecML100k_RecommenderWrapper], KNN_similarity_list=KNN_similarity_to_report_list, ICM_names_list=ICM_names_to_report_list, UCM_names_list=None) result_loader.generate_latex_results( file_name + "{}_latex_results.txt".format("article_metrics"), metrics_list=["PRECISION", "RECALL", "NDCG"], cutoffs_list=[10], table_title=None, highlight_best=True) result_loader.generate_latex_results( file_name + "{}_latex_results.txt".format("all_metrics"), metrics_list=[ "PRECISION", "RECALL", "MAP", "MRR", "NDCG", "F1", "HIT_RATE", "ARHR", "NOVELTY", "DIVERSITY_MEAN_INTER_LIST", "DIVERSITY_HERFINDAHL", "COVERAGE_ITEM", "DIVERSITY_GINI", "SHANNON_ENTROPY" ], cutoffs_list=[10], table_title=None, highlight_best=True) result_loader.generate_latex_time_statistics( file_name + "{}_latex_results.txt".format("time"), n_evaluation_users=n_test_users, table_title=None)
############################### ############################### EVALUATION ABLATION EXPERIMENT ############################### ################################################################################################################################################ if input_flags.run_eval_ablation: for permutation_index in range(input_flags.n_permutations): # Run evaluation of the full map fitted model with the different interaction map modes for map_mode in ["all_map", "main_diagonal", "off_diagonal"]: input_folder_path = os.path.join(output_folder_path, "fit_ablation_{}/{}_{}/".format("all_map", "all_map", permutation_index)) result_folder_path = os.path.join(output_folder_path, "evaluation_ablation_{}/{}_{}/".format(map_mode, map_mode, permutation_index)) recommender_input_args = SearchInputRecommenderArgs(CONSTRUCTOR_POSITIONAL_ARGS=[URM_train]) run_evaluation_ablation(recommender_class=ConvNCF_RecommenderWrapper, recommender_input_args = recommender_input_args, evaluator_test = evaluator_test, input_folder_path = input_folder_path, result_folder_path = result_folder_path, map_mode = map_mode) read_permutation_results(output_folder_path, input_flags.n_permutations, 10, ["PRECISION", "MAP_MIN_DEN", "NDCG", "F1", "HIT_RATE"], file_result_name_root = "latex_evaluation_ablation_results", convolution_model_name = ConvNCF_RecommenderWrapper.RECOMMENDER_NAME, pretrained_model_name = 'BPRMF', pretrained_model_class = MatrixFactorizationCustomFactorsRecommender,