def runParameterSearch_NeuRec(recommender_class, URM_train, earlystopping_hyperparameters, output_file_name_root, URM_train_last_test = None, n_cases = 35, n_random_starts = 5, evaluator_validation= None, evaluator_test=None, metric_to_optimize = "RECALL", output_folder_path ="result_experiments/"): # If directory does not exist, create if not os.path.exists(output_folder_path): os.makedirs(output_folder_path) parameterSearch = SearchBayesianSkopt(recommender_class, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) ########################################################################################################## hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["epochs"] = Categorical([1500]) hyperparameters_range_dictionary["num_neurons"] = Integer(100, 400) hyperparameters_range_dictionary["num_factors"] = Integer(20, 70) hyperparameters_range_dictionary["dropout_percentage"] = Real(low = 0.0, high = 0.1, prior = 'uniform') hyperparameters_range_dictionary["learning_rate"] = Real(low = 1e-5, high = 1e-3, prior = 'log-uniform') hyperparameters_range_dictionary["regularization_rate"] = Real(low = 0.0, high = 0.2, prior = 'uniform') hyperparameters_range_dictionary["batch_size"] = Categorical([128, 256, 512, 1024, 2048]) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS = [URM_train], CONSTRUCTOR_KEYWORD_ARGS = {}, FIT_POSITIONAL_ARGS = [], FIT_KEYWORD_ARGS = {**earlystopping_hyperparameters, "use_gpu": False, "epochs_min": 200, "display_epoch": None, "display_step": None, "verbose": False} ) ######################################################################################################### if URM_train_last_test is not None: recommender_input_args_last_test = recommender_input_args.copy() recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[0] = URM_train_last_test else: recommender_input_args_last_test = None parameterSearch.search(recommender_input_args, parameter_search_space = hyperparameters_range_dictionary, n_cases = n_cases, n_random_starts = n_random_starts, output_folder_path = output_folder_path, output_file_name_root = output_file_name_root, metric_to_optimize = metric_to_optimize, resume_from_saved = True, recommender_input_args_last_test = recommender_input_args_last_test)
def runParameterSearch_SpectralCF(recommender_class, URM_train, earlystopping_parameters, output_file_name_root, n_cases = 35, evaluator_validation= None, evaluator_test=None, metric_to_optimize = "RECALL", output_folder_path ="result_experiments/"): # If directory does not exist, create if not os.path.exists(output_folder_path): os.makedirs(output_folder_path) parameterSearch = SearchBayesianSkopt(recommender_class, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) ########################################################################################################## if recommender_class is SpectralCF_RecommenderWrapper: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["batch_size"] = Categorical([1024]) hyperparameters_range_dictionary["embedding_size"] = Categorical([4, 8, 16, 32]) hyperparameters_range_dictionary["decay"] = Real(low = 1e-5, high = 1e-1, prior = 'log-uniform') hyperparameters_range_dictionary["learning_rate"] = Real(low = 1e-5, high = 1e-2, prior = 'log-uniform') hyperparameters_range_dictionary["k"] = Integer(low = 1, high = 6) recommender_parameters = SearchInputRecommenderParameters( CONSTRUCTOR_POSITIONAL_ARGS = [URM_train], CONSTRUCTOR_KEYWORD_ARGS = {}, FIT_POSITIONAL_ARGS = [], FIT_KEYWORD_ARGS = earlystopping_parameters ) ######################################################################################################### parameterSearch.search(recommender_parameters, parameter_search_space = hyperparameters_range_dictionary, n_cases = n_cases, output_folder_path = output_folder_path, output_file_name_root = output_file_name_root, metric_to_optimize = metric_to_optimize)
def runParameterSearch_cold_user_MF(recommender_class, URM_train, URM_train_last_test = None, metric_to_optimize = "PRECISION", evaluator_validation = None, evaluator_test = None, evaluator_validation_earlystopping = None, output_folder_path ="result_experiments/", n_cases = 35, n_random_starts = 5, resume_from_saved = True): # If directory does not exist, create if not os.path.exists(output_folder_path): os.makedirs(output_folder_path) earlystopping_keywargs = {"validation_every_n": 5, "stop_on_validation": True, "evaluator_object": evaluator_validation_earlystopping, "lower_validations_allowed": 5, "validation_metric": metric_to_optimize, } URM_train = URM_train.copy() if URM_train_last_test is not None: URM_train_last_test = URM_train_last_test.copy() try: output_file_name_root = recommender_class.RECOMMENDER_NAME ########################################################################################################## if recommender_class is MatrixFactorization_FunkSVD_Cython: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["sgd_mode"] = Categorical(["sgd", "adagrad", "adam"]) hyperparameters_range_dictionary["epochs"] = Categorical([500]) hyperparameters_range_dictionary["use_bias"] = Categorical([True, False]) hyperparameters_range_dictionary["batch_size"] = Categorical([1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024]) hyperparameters_range_dictionary["num_factors"] = Integer(1, 200) hyperparameters_range_dictionary["item_reg"] = Real(low = 1e-5, high = 1e-2, prior = 'log-uniform') hyperparameters_range_dictionary["user_reg"] = Real(low = 1e-5, high = 1e-2, prior = 'log-uniform') hyperparameters_range_dictionary["learning_rate"] = Real(low = 1e-4, high = 1e-1, prior = 'log-uniform') hyperparameters_range_dictionary["negative_interactions_quota"] = Real(low = 0.0, high = 0.5, prior = 'uniform') recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS = [recommender_class, URM_train], CONSTRUCTOR_KEYWORD_ARGS = {}, FIT_POSITIONAL_ARGS = [], FIT_KEYWORD_ARGS = earlystopping_keywargs ) ########################################################################################################## if recommender_class is MatrixFactorization_AsySVD_Cython: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["sgd_mode"] = Categorical(["sgd", "adagrad", "adam"]) hyperparameters_range_dictionary["epochs"] = Categorical([500]) hyperparameters_range_dictionary["use_bias"] = Categorical([True, False]) hyperparameters_range_dictionary["batch_size"] = Categorical([1]) hyperparameters_range_dictionary["num_factors"] = Integer(1, 200) hyperparameters_range_dictionary["item_reg"] = Real(low = 1e-5, high = 1e-2, prior = 'log-uniform') hyperparameters_range_dictionary["user_reg"] = Real(low = 1e-5, high = 1e-2, prior = 'log-uniform') hyperparameters_range_dictionary["learning_rate"] = Real(low = 1e-4, high = 1e-1, prior = 'log-uniform') hyperparameters_range_dictionary["negative_interactions_quota"] = Real(low = 0.0, high = 0.5, prior = 'uniform') recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS = [recommender_class, URM_train], CONSTRUCTOR_KEYWORD_ARGS = {}, FIT_POSITIONAL_ARGS = [], FIT_KEYWORD_ARGS = earlystopping_keywargs ) ########################################################################################################## if recommender_class is MatrixFactorization_BPR_Cython: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["sgd_mode"] = Categorical(["sgd", "adagrad", "adam"]) hyperparameters_range_dictionary["epochs"] = Categorical([1500]) hyperparameters_range_dictionary["num_factors"] = Integer(1, 200) hyperparameters_range_dictionary["batch_size"] = Categorical([1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024]) hyperparameters_range_dictionary["positive_reg"] = Real(low = 1e-5, high = 1e-2, prior = 'log-uniform') hyperparameters_range_dictionary["negative_reg"] = Real(low = 1e-5, high = 1e-2, prior = 'log-uniform') hyperparameters_range_dictionary["learning_rate"] = Real(low = 1e-4, high = 1e-1, prior = 'log-uniform') recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS = [recommender_class, URM_train], CONSTRUCTOR_KEYWORD_ARGS = {}, FIT_POSITIONAL_ARGS = [], FIT_KEYWORD_ARGS = {**earlystopping_keywargs, "positive_threshold_BPR": None} ) ########################################################################################################## if recommender_class is IALSRecommender: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["num_factors"] = Integer(1, 200) hyperparameters_range_dictionary["confidence_scaling"] = Categorical(["linear", "log"]) hyperparameters_range_dictionary["alpha"] = Real(low = 1e-3, high = 50.0, prior = 'log-uniform') hyperparameters_range_dictionary["epsilon"] = Real(low = 1e-3, high = 10.0, prior = 'log-uniform') hyperparameters_range_dictionary["reg"] = Real(low = 1e-5, high = 1e-2, prior = 'log-uniform') recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS = [recommender_class, URM_train], CONSTRUCTOR_KEYWORD_ARGS = {}, FIT_POSITIONAL_ARGS = [], FIT_KEYWORD_ARGS = earlystopping_keywargs ) ########################################################################################################## if recommender_class is PureSVDRecommender: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["num_factors"] = Integer(1, 350) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS = [recommender_class, URM_train], CONSTRUCTOR_KEYWORD_ARGS = {}, FIT_POSITIONAL_ARGS = [], FIT_KEYWORD_ARGS = {} ) ########################################################################################################## if recommender_class is NMFRecommender: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["num_factors"] = Integer(1, 350) hyperparameters_range_dictionary["solver"] = Categorical(["coordinate_descent", "multiplicative_update"]) hyperparameters_range_dictionary["init_type"] = Categorical(["random", "nndsvda"]) hyperparameters_range_dictionary["beta_loss"] = Categorical(["frobenius", "kullback-leibler"]) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS = [recommender_class, URM_train], CONSTRUCTOR_KEYWORD_ARGS = {}, FIT_POSITIONAL_ARGS = [], FIT_KEYWORD_ARGS = {} ) ######################################################################################################### if URM_train_last_test is not None: recommender_input_args_last_test = recommender_input_args.copy() recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[1] = URM_train_last_test else: recommender_input_args_last_test = None parameterSearch = SearchBayesianSkopt(MF_cold_user_wrapper, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) hyperparameters_range_dictionary["estimate_model_for_cold_users"] = Categorical(["itemKNN", "mean_item_factors"]) hyperparameters_range_dictionary["estimate_model_for_cold_users_topK"] = Integer(5, 1000) ## Final step, after the hyperparameter range has been defined for each type of algorithm parameterSearch.search(recommender_input_args, parameter_search_space = hyperparameters_range_dictionary, n_cases = n_cases, n_random_starts = n_random_starts, output_folder_path = output_folder_path, output_file_name_root = output_file_name_root, metric_to_optimize = metric_to_optimize, resume_from_saved = resume_from_saved, recommender_input_args_last_test = recommender_input_args_last_test) except Exception as e: print("On recommender {} Exception {}".format(recommender_class, str(e))) traceback.print_exc() error_file = open(output_folder_path + "ErrorLog.txt", "a") error_file.write("On recommender {} Exception {}\n".format(recommender_class, str(e))) error_file.close()
Data = DataManager() urm_train, urm_test = split_train_leave_k_out_user_wise(Data.get_urm(), threshold=10, temperature='normal') urm_train, urm_valid = split_train_leave_k_out_user_wise(urm_train, threshold=10, temperature='valid') evaluator_valid = EvaluatorHoldout(urm_valid, cutoff_list=[10]) evaluator_test = EvaluatorHoldout(urm_test, cutoff_list=[10]) recommender = HybridNorm1Recommender # recommender_3 = UserKNNCFRecommender(urm_train) # recommender_3.fit(shrink=2, topK=600, normalize=True) # w_sparse = recommender_3.W_sparse parameterSearch = SearchBayesianSkopt(recommender, evaluator_validation=evaluator_valid, evaluator_test=evaluator_test) # earlystopping_keywargs = {"validation_every_n": 5, # "stop_on_validation": True, # "evaluator_object": evaluator_valid, # "lower_validations_allowed": 2, # "validation_metric": "MAP" # } hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["beta"] = Real(0, 1) # hyperparameters_range_dictionary["gamma"] = Real(0, 1) # hyperparameters_range_dictionary["phi"] = Real(0, 1) # hyperparameters_range_dictionary["topK"] = Integer(5, 100)
def runParameterSearch_Collaborative(recommender_class, URM_train, URM_train_last_test=None, metric_to_optimize="PRECISION", evaluator_validation=None, evaluator_test=None, evaluator_validation_earlystopping=None, output_folder_path="result_experiments/", parallelizeKNN=True, n_cases=35, n_random_starts=5, resume_from_saved=False, save_model="best", allow_weighting=True, similarity_type_list=None): # If directory does not exist, create if not os.path.exists(output_folder_path): os.makedirs(output_folder_path) earlystopping_keywargs = { "validation_every_n": 5, "stop_on_validation": True, "evaluator_object": evaluator_validation_earlystopping, "lower_validations_allowed": 5, "validation_metric": metric_to_optimize, } URM_train = URM_train.copy() if URM_train_last_test is not None: URM_train_last_test = URM_train_last_test.copy() try: output_file_name_root = recommender_class.RECOMMENDER_NAME parameterSearch = SearchBayesianSkopt( recommender_class, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) if recommender_class in [TopPop, GlobalEffects, Random]: """ TopPop, GlobalEffects and Random have no parameters therefore only one evaluation is needed """ parameterSearch = SearchSingleCase( recommender_class, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) if URM_train_last_test is not None: recommender_input_args_last_test = recommender_input_args.copy( ) recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[ 0] = URM_train_last_test else: recommender_input_args_last_test = None parameterSearch.search( recommender_input_args, recommender_input_args_last_test= recommender_input_args_last_test, fit_hyperparameters_values={}, output_folder_path=output_folder_path, output_file_name_root=output_file_name_root, resume_from_saved=resume_from_saved, save_model=save_model, ) return ########################################################################################################## if recommender_class in [ItemKNNCFRecommender, UserKNNCFRecommender]: if similarity_type_list is None: similarity_type_list = [ 'cosine', 'jaccard', "asymmetric", "dice", "tversky" ] recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) if URM_train_last_test is not None: recommender_input_args_last_test = recommender_input_args.copy( ) recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[ 0] = URM_train_last_test else: recommender_input_args_last_test = None run_KNNCFRecommender_on_similarity_type_partial = partial( run_KNNRecommender_on_similarity_type, recommender_input_args=recommender_input_args, parameter_search_space={}, parameterSearch=parameterSearch, n_cases=n_cases, n_random_starts=n_random_starts, resume_from_saved=resume_from_saved, save_model=save_model, output_folder_path=output_folder_path, output_file_name_root=output_file_name_root, metric_to_optimize=metric_to_optimize, allow_weighting=allow_weighting, recommender_input_args_last_test= recommender_input_args_last_test) if parallelizeKNN: pool = multiprocessing.Pool( processes=multiprocessing.cpu_count(), maxtasksperchild=1) pool.map(run_KNNCFRecommender_on_similarity_type_partial, similarity_type_list) pool.close() pool.join() else: for similarity_type in similarity_type_list: run_KNNCFRecommender_on_similarity_type_partial( similarity_type) return ########################################################################################################## if recommender_class is P3alphaRecommender: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["topK"] = Integer(5, 1000) hyperparameters_range_dictionary["alpha"] = Real(low=0, high=2, prior='uniform') hyperparameters_range_dictionary[ "normalize_similarity"] = Categorical([True, False]) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) ########################################################################################################## if recommender_class is RP3betaRecommender: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["topK"] = Integer(5, 1000) hyperparameters_range_dictionary["alpha"] = Real(low=0, high=2, prior='uniform') hyperparameters_range_dictionary["beta"] = Real(low=0, high=2, prior='uniform') hyperparameters_range_dictionary[ "normalize_similarity"] = Categorical([True, False]) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) ########################################################################################################## if recommender_class is MatrixFactorization_FunkSVD_Cython: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["sgd_mode"] = Categorical( ["sgd", "adagrad", "adam"]) hyperparameters_range_dictionary["epochs"] = Categorical([500]) hyperparameters_range_dictionary["use_bias"] = Categorical( [True, False]) hyperparameters_range_dictionary["batch_size"] = Categorical( [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024]) hyperparameters_range_dictionary["num_factors"] = Integer(1, 200) hyperparameters_range_dictionary["item_reg"] = Real( low=1e-5, high=1e-2, prior='log-uniform') hyperparameters_range_dictionary["user_reg"] = Real( low=1e-5, high=1e-2, prior='log-uniform') hyperparameters_range_dictionary["learning_rate"] = Real( low=1e-4, high=1e-1, prior='log-uniform') hyperparameters_range_dictionary[ "negative_interactions_quota"] = Real(low=0.0, high=0.5, prior='uniform') recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS=earlystopping_keywargs) ########################################################################################################## if recommender_class is MatrixFactorization_AsySVD_Cython: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["sgd_mode"] = Categorical( ["sgd", "adagrad", "adam"]) hyperparameters_range_dictionary["epochs"] = Categorical([500]) hyperparameters_range_dictionary["use_bias"] = Categorical( [True, False]) hyperparameters_range_dictionary["batch_size"] = Categorical([1]) hyperparameters_range_dictionary["num_factors"] = Integer(1, 200) hyperparameters_range_dictionary["item_reg"] = Real( low=1e-5, high=1e-2, prior='log-uniform') hyperparameters_range_dictionary["user_reg"] = Real( low=1e-5, high=1e-2, prior='log-uniform') hyperparameters_range_dictionary["learning_rate"] = Real( low=1e-4, high=1e-1, prior='log-uniform') hyperparameters_range_dictionary[ "negative_interactions_quota"] = Real(low=0.0, high=0.5, prior='uniform') recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS=earlystopping_keywargs) ########################################################################################################## if recommender_class is MatrixFactorization_BPR_Cython: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["sgd_mode"] = Categorical( ["sgd", "adagrad", "adam"]) hyperparameters_range_dictionary["epochs"] = Categorical([1500]) hyperparameters_range_dictionary["num_factors"] = Integer(1, 200) hyperparameters_range_dictionary["batch_size"] = Categorical( [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024]) hyperparameters_range_dictionary["positive_reg"] = Real( low=1e-5, high=1e-2, prior='log-uniform') hyperparameters_range_dictionary["negative_reg"] = Real( low=1e-5, high=1e-2, prior='log-uniform') hyperparameters_range_dictionary["learning_rate"] = Real( low=1e-4, high=1e-1, prior='log-uniform') recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={ **earlystopping_keywargs, "positive_threshold_BPR": None }) ########################################################################################################## if recommender_class is IALSRecommender: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["num_factors"] = Integer(1, 200) hyperparameters_range_dictionary[ "confidence_scaling"] = Categorical(["linear", "log"]) hyperparameters_range_dictionary["alpha"] = Real( low=1e-3, high=50.0, prior='log-uniform') hyperparameters_range_dictionary["epsilon"] = Real( low=1e-3, high=10.0, prior='log-uniform') hyperparameters_range_dictionary["reg"] = Real(low=1e-5, high=1e-2, prior='log-uniform') recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS=earlystopping_keywargs) ########################################################################################################## if recommender_class is PureSVDRecommender: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["num_factors"] = Integer(1, 350) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) ########################################################################################################## if recommender_class is NMFRecommender: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["num_factors"] = Integer(1, 350) hyperparameters_range_dictionary["solver"] = Categorical( ["coordinate_descent", "multiplicative_update"]) hyperparameters_range_dictionary["init_type"] = Categorical( ["random", "nndsvda"]) hyperparameters_range_dictionary["beta_loss"] = Categorical( ["frobenius", "kullback-leibler"]) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) ######################################################################################################### if recommender_class is SLIM_BPR_Cython: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["topK"] = Integer(5, 1000) hyperparameters_range_dictionary["epochs"] = Categorical([1500]) hyperparameters_range_dictionary["symmetric"] = Categorical( [True, False]) hyperparameters_range_dictionary["sgd_mode"] = Categorical( ["sgd", "adagrad", "adam"]) hyperparameters_range_dictionary["lambda_i"] = Real( low=1e-5, high=1e-2, prior='log-uniform') hyperparameters_range_dictionary["lambda_j"] = Real( low=1e-5, high=1e-2, prior='log-uniform') hyperparameters_range_dictionary["learning_rate"] = Real( low=1e-4, high=1e-1, prior='log-uniform') recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={ **earlystopping_keywargs, "positive_threshold_BPR": None, 'train_with_sparse_weights': None }) ########################################################################################################## if recommender_class is SLIMElasticNetRecommender: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["topK"] = Integer(5, 1000) hyperparameters_range_dictionary["l1_ratio"] = Real( low=1e-5, high=1.0, prior='log-uniform') hyperparameters_range_dictionary["alpha"] = Real(low=1e-3, high=1.0, prior='uniform') recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) ######################################################################################################### if URM_train_last_test is not None: recommender_input_args_last_test = recommender_input_args.copy() recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[ 0] = URM_train_last_test else: recommender_input_args_last_test = None ## Final step, after the hyperparameter range has been defined for each type of algorithm parameterSearch.search( recommender_input_args, parameter_search_space=hyperparameters_range_dictionary, n_cases=n_cases, n_random_starts=n_random_starts, resume_from_saved=resume_from_saved, save_model=save_model, output_folder_path=output_folder_path, output_file_name_root=output_file_name_root, metric_to_optimize=metric_to_optimize, recommender_input_args_last_test=recommender_input_args_last_test) except Exception as e: print("On recommender {} Exception {}".format(recommender_class, str(e))) traceback.print_exc() error_file = open(output_folder_path + "ErrorLog.txt", "a") error_file.write("On recommender {} Exception {}\n".format( recommender_class, str(e))) error_file.close()
def runParameterSearch_Content(recommender_class, URM_train, ICM_object, ICM_name, URM_train_last_test=None, n_cases=30, n_random_starts=5, resume_from_saved=False, save_model="best", evaluator_validation=None, evaluator_test=None, metric_to_optimize="PRECISION", output_folder_path="result_experiments/", parallelizeKNN=False, allow_weighting=True, similarity_type_list=None): # If directory does not exist, create if not os.path.exists(output_folder_path): os.makedirs(output_folder_path) URM_train = URM_train.copy() ICM_object = ICM_object.copy() if URM_train_last_test is not None: URM_train_last_test = URM_train_last_test.copy() ########################################################################################################## output_file_name_root = recommender_class.RECOMMENDER_NAME + "_{}".format( ICM_name) parameterSearch = SearchBayesianSkopt( recommender_class, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) if similarity_type_list is None: similarity_type_list = [ 'cosine', 'jaccard', "asymmetric", "dice", "tversky" ] recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train, ICM_object], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) if URM_train_last_test is not None: recommender_input_args_last_test = recommender_input_args.copy() recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[ 0] = URM_train_last_test else: recommender_input_args_last_test = None run_KNNCBFRecommender_on_similarity_type_partial = partial( run_KNNRecommender_on_similarity_type, recommender_input_args=recommender_input_args, parameter_search_space={}, parameterSearch=parameterSearch, n_cases=n_cases, n_random_starts=n_random_starts, resume_from_saved=resume_from_saved, save_model=save_model, output_folder_path=output_folder_path, output_file_name_root=output_file_name_root, metric_to_optimize=metric_to_optimize, allow_weighting=allow_weighting, recommender_input_args_last_test=recommender_input_args_last_test) if parallelizeKNN: pool = multiprocessing.Pool(processes=int(multiprocessing.cpu_count()), maxtasksperchild=1) pool.map(run_KNNCBFRecommender_on_similarity_type_partial, similarity_type_list) pool.close() pool.join() else: for similarity_type in similarity_type_list: run_KNNCBFRecommender_on_similarity_type_partial(similarity_type)
def runParameterSearch_Hybrid(recommender_class, URM_train, ICM_train, W_sparse_CF=None, URM_train_last_test=None, metric_to_optimize="MAP", evaluator_validation=None, evaluator_test=None, evaluator_validation_earlystopping=None, output_folder_path="result_experiments/", n_cases=35, n_random_starts=5, resume_from_saved=False, save_model="best", allow_weighting=True, similarity_type_list=None): # If directory does not exist, create if not os.path.exists(output_folder_path): os.makedirs(output_folder_path) URM_train = URM_train.copy() ICM_train = ICM_train.copy() # W_sparse_CF = W_sparse_CF.copy() if URM_train_last_test is not None: URM_train_last_test = URM_train_last_test.copy() try: output_file_name_root = recommender_class.RECOMMENDER_NAME parameterSearch = SearchBayesianSkopt( recommender_class, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) ########################################################################################################## if recommender_class in [ ScoresHybridP3alphaKNNCBF, ScoresHybridRP3betaKNNCBF, ScoresHybridSpecialized, ScoresHybridSpecializedCold, ScoresHybridSpecializedV2Cold, ScoresHybridSpecializedV2Mid, ScoresHybridSpecializedV2Warm, ScoresHybridSpecializedV2Mid12, ScoresHybridSpecializedV2Warm12, ScoresHybridSpecializedV3Cold, ScoresHybridSpecializedV3Warm ]: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["topK_P"] = Integer(5, 3000) hyperparameters_range_dictionary["alpha_P"] = Real(low=0, high=2, prior='uniform') hyperparameters_range_dictionary[ "normalize_similarity_P"] = Categorical([False]) hyperparameters_range_dictionary["topK"] = Integer(5, 3000) hyperparameters_range_dictionary["shrink"] = Integer(0, 5000) hyperparameters_range_dictionary["similarity"] = Categorical( ["tversky", "tanimoto", 'cosine', 'asymmetric']) hyperparameters_range_dictionary["normalize"] = Categorical( [True, False]) hyperparameters_range_dictionary["alpha"] = Real(low=0, high=2, prior='uniform') if recommender_class is ScoresHybridRP3betaKNNCBF: hyperparameters_range_dictionary["beta_P"] = Real( low=0, high=2, prior='uniform') if allow_weighting: hyperparameters_range_dictionary[ "feature_weighting"] = Categorical( ["none", "BM25", "TF-IDF"]) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train, ICM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) ########################################################################################################## if recommender_class in [ ScoresHybridKNNCFKNNCBF, ScoresHybridUserKNNCFKNNCBF ]: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["topK_CF"] = Integer(5, 1500) hyperparameters_range_dictionary["shrink_CF"] = Integer(0, 1500) hyperparameters_range_dictionary["similarity_CF"] = Categorical( ["tversky", "tanimoto", 'cosine', 'asymmetric']) hyperparameters_range_dictionary["normalize_CF"] = Categorical( [True, False]) hyperparameters_range_dictionary["topK"] = Integer(5, 1500) hyperparameters_range_dictionary["shrink"] = Integer(0, 1500) hyperparameters_range_dictionary["similarity"] = Categorical( ["tversky", "tanimoto", 'cosine', 'asymmetric']) hyperparameters_range_dictionary["normalize"] = Categorical( [True, False]) hyperparameters_range_dictionary["alpha"] = Real(low=0, high=1, prior='uniform') if allow_weighting: hyperparameters_range_dictionary[ "feature_weighting"] = Categorical( ["none", "BM25", "TF-IDF"]) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train, ICM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) ########################################################################################################## if recommender_class is ScoresHybridSpecializedAdaptive: hyperparameters_range_dictionary = {} # Cold users hybrid hyperparameters_range_dictionary["topK_P_C"] = Integer(5, 1500) hyperparameters_range_dictionary["alpha_P_C"] = Real( low=0, high=2, prior='uniform') hyperparameters_range_dictionary["beta_P_C"] = Real( low=0, high=2, prior='uniform') hyperparameters_range_dictionary[ "normalize_similarity_P_C"] = Categorical([False]) hyperparameters_range_dictionary["topK_C"] = Integer(5, 1500) hyperparameters_range_dictionary["shrink_C"] = Integer(0, 1500) hyperparameters_range_dictionary["similarity_C"] = Categorical( ["tversky", "tanimoto", 'cosine', 'asymmetric']) hyperparameters_range_dictionary["normalize_C"] = Categorical( [True, False]) # hyperparameters_range_dictionary["alpha_C"] = Real(low=0, high=1, prior='uniform') if allow_weighting: hyperparameters_range_dictionary[ "feature_weighting_C"] = Categorical( ["none", "BM25", "TF-IDF"]) # Warm users hybrid hyperparameters_range_dictionary["topK_P"] = Integer(5, 1500) hyperparameters_range_dictionary["alpha_P"] = Real(low=0, high=2, prior='uniform') hyperparameters_range_dictionary["beta_P"] = Real(low=0, high=2, prior='uniform') hyperparameters_range_dictionary[ "normalize_similarity_P"] = Categorical([False]) hyperparameters_range_dictionary["topK"] = Integer(5, 1500) hyperparameters_range_dictionary["shrink"] = Integer(0, 1500) hyperparameters_range_dictionary["similarity"] = Categorical( ["tversky", "tanimoto", 'cosine', 'asymmetric']) hyperparameters_range_dictionary["normalize"] = Categorical( [True, False]) # hyperparameters_range_dictionary["alpha"] = Real(low=0, high=1, prior='uniform') if allow_weighting: hyperparameters_range_dictionary[ "feature_weighting"] = Categorical( ["none", "BM25", "TF-IDF"]) hyperparameters_range_dictionary["threshold"] = Integer(1, 30) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train, ICM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) ########################################################################################################## if recommender_class is ScoresHybridP3alphaPureSVD: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["topK_P"] = Integer(5, 1000) hyperparameters_range_dictionary["alpha_P"] = Real(low=0, high=2, prior='uniform') hyperparameters_range_dictionary[ "normalize_similarity_P"] = Categorical([False]) hyperparameters_range_dictionary["num_factors"] = Integer(1, 500) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) ########################################################################################################## if recommender_class is CFW_D_Similarity_Linalg: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["topK"] = Integer(5, 1000) hyperparameters_range_dictionary["add_zeros_quota"] = Real( low=0, high=1, prior='uniform') hyperparameters_range_dictionary[ "normalize_similarity"] = Categorical([True, False]) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[ URM_train, ICM_train, W_sparse_CF ], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) ######################################################################################################### if URM_train_last_test is not None: recommender_input_args_last_test = recommender_input_args.copy() recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[ 0] = URM_train_last_test else: recommender_input_args_last_test = None ## Final step, after the hyperparameter range has been defined for each type of algorithm parameterSearch.search( recommender_input_args, parameter_search_space=hyperparameters_range_dictionary, n_cases=n_cases, n_random_starts=n_random_starts, resume_from_saved=resume_from_saved, save_model=save_model, output_folder_path=output_folder_path, output_file_name_root=output_file_name_root, metric_to_optimize=metric_to_optimize, recommender_input_args_last_test=recommender_input_args_last_test) except Exception as e: print("On recommender {} Exception {}".format(recommender_class, str(e))) traceback.print_exc() error_file = open(output_folder_path + "ErrorLog.txt", "a") error_file.write("On recommender {} Exception {}\n".format( recommender_class, str(e))) error_file.close()
def runParameterSearch_Hybrid(recommender_class, URM_train, ICM_object, ICM_name, n_cases=30, evaluator_validation=None, evaluator_test=None, metric_to_optimize="PRECISION", output_folder_path="result_experiments/", parallelizeKNN=False, allow_weighting=True, similarity_type_list=None): # If directory does not exist, create if not os.path.exists(output_folder_path): os.makedirs(output_folder_path) ########################################################################################################## output_file_name_root = recommender_class.RECOMMENDER_NAME + "_{}".format( ICM_name) parameterSearch = SearchBayesianSkopt( recommender_class, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) if recommender_class is ItemKNN_CFCBF_Hybrid_Recommender: if similarity_type_list is None: similarity_type_list = [ 'cosine', 'jaccard', "asymmetric", "dice", "tversky" ] hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["ICM_weight"] = Real( low=1e-2, high=1e2, prior='log-uniform') recommender_parameters = SearchInputRecommenderParameters( CONSTRUCTOR_POSITIONAL_ARGS=[ICM_object, URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) run_KNNCFRecommender_on_similarity_type_partial = partial( run_KNNRecommender_on_similarity_type, parameter_search_space=hyperparameters_range_dictionary, recommender_parameters=recommender_parameters, parameterSearch=parameterSearch, n_cases=n_cases, output_folder_path=output_folder_path, output_file_name_root=output_file_name_root, metric_to_optimize=metric_to_optimize, allow_weighting=allow_weighting) if parallelizeKNN: pool = multiprocessing.Pool(processes=multiprocessing.cpu_count(), maxtasksperchild=1) resultList = pool.map( run_KNNCFRecommender_on_similarity_type_partial, similarity_type_list) pool.close() pool.join() else: for similarity_type in similarity_type_list: run_KNNCFRecommender_on_similarity_type_partial( similarity_type) return
def runParameterSearch_Collaborative(recommender_class, URM_train, metric_to_optimize="PRECISION", evaluator_validation=None, evaluator_test=None, evaluator_validation_earlystopping=None, output_folder_path="result_experiments/", parallelizeKNN=True, n_cases=35, allow_weighting=True, similarity_type_list=None): # If directory does not exist, create if not os.path.exists(output_folder_path): os.makedirs(output_folder_path) try: output_file_name_root = recommender_class.RECOMMENDER_NAME parameterSearch = SearchBayesianSkopt( recommender_class, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) if recommender_class in [TopPop, GlobalEffects, Random]: """ TopPop, GlobalEffects and Random have no parameters therefore only one evaluation is needed """ parameterSearch = SearchSingleCase( recommender_class, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) recommender_parameters = SearchInputRecommenderParameters( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) parameterSearch.search(recommender_parameters, fit_parameters_values={}, output_folder_path=output_folder_path, output_file_name_root=output_file_name_root) return ########################################################################################################## if recommender_class in [ItemKNNCFRecommender, UserKNNCFRecommender]: if similarity_type_list is None: similarity_type_list = [ 'cosine', 'jaccard', "asymmetric", "dice", "tversky" ] recommender_parameters = SearchInputRecommenderParameters( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) run_KNNCFRecommender_on_similarity_type_partial = partial( run_KNNRecommender_on_similarity_type, recommender_parameters=recommender_parameters, parameter_search_space={}, parameterSearch=parameterSearch, n_cases=n_cases, output_folder_path=output_folder_path, output_file_name_root=output_file_name_root, metric_to_optimize=metric_to_optimize, allow_weighting=allow_weighting) if parallelizeKNN: pool = multiprocessing.Pool( processes=multiprocessing.cpu_count(), maxtasksperchild=1) pool.map(run_KNNCFRecommender_on_similarity_type_partial, similarity_type_list) pool.close() pool.join() else: for similarity_type in similarity_type_list: run_KNNCFRecommender_on_similarity_type_partial( similarity_type) return ########################################################################################################## if recommender_class is P3alphaRecommender: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["topK"] = Integer(5, 800) hyperparameters_range_dictionary["alpha"] = Real(low=0, high=2, prior='uniform') hyperparameters_range_dictionary[ "normalize_similarity"] = Categorical([True, False]) recommender_parameters = SearchInputRecommenderParameters( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) ########################################################################################################## if recommender_class is RP3betaRecommender: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["topK"] = Integer(5, 800) hyperparameters_range_dictionary["alpha"] = Real(low=0, high=2, prior='uniform') hyperparameters_range_dictionary["beta"] = Real(low=0, high=2, prior='uniform') hyperparameters_range_dictionary[ "normalize_similarity"] = Categorical([True, False]) recommender_parameters = SearchInputRecommenderParameters( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) ########################################################################################################## if recommender_class is PureSVDRecommender: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["num_factors"] = Integer(1, 250) recommender_parameters = SearchInputRecommenderParameters( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) ########################################################################################################## if recommender_class is SLIMElasticNetRecommender: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["topK"] = Integer(5, 800) hyperparameters_range_dictionary["l1_ratio"] = Real( low=1e-5, high=1.0, prior='log-uniform') hyperparameters_range_dictionary["alpha"] = Real(low=1e-3, high=1.0, prior='uniform') recommender_parameters = SearchInputRecommenderParameters( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) ######################################################################################################### ## Final step, after the hyperparameter range has been defined for each type of algorithm parameterSearch.search( recommender_parameters, parameter_search_space=hyperparameters_range_dictionary, n_cases=n_cases, output_folder_path=output_folder_path, output_file_name_root=output_file_name_root, metric_to_optimize=metric_to_optimize) except Exception as e: print("On recommender {} Exception {}".format(recommender_class, str(e))) traceback.print_exc() error_file = open(output_folder_path + "ErrorLog.txt", "a") error_file.write("On recommender {} Exception {}\n".format( recommender_class, str(e))) error_file.close()
recommender_4 = RP3betaRecommender(urm_train) recommender_4.fit(topK=16, alpha=0.03374950051351756, beta=0.24087176329409027, normalize_similarity=True) # recommender_3 = UserKNNCFRecommender(urm_train) # recommender_3.fit(shrink=2, topK=600, normalize=True) W_sparse_CF = recommender_4.W_sparse recommender_class = CFW_D_Similarity_Linalg parameterSearch = SearchBayesianSkopt(recommender_class, evaluator_validation=evaluator_valid, evaluator_test=evaluator_test) hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["topK"] = Integer(5, 1000) hyperparameters_range_dictionary["add_zeros_quota"] = Real(low=0, high=1, prior='uniform') hyperparameters_range_dictionary["normalize_similarity"] = Categorical( [True, False]) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[urm_train, icm_asset, W_sparse_CF], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={})
def runParameterSearch_Hybrid(recommender_class, URM_train, ICM_object, ICM_name, URM_train_last_test = None, n_cases = 30, n_random_starts = 5, resume_from_saved = False, save_model = "best", evaluate_on_test = "best", evaluator_validation= None, evaluator_test=None, metric_to_optimize = "PRECISION", output_folder_path ="result_experiments/", parallelizeKNN = False, allow_weighting = True, similarity_type_list = None): """ This function performs the hyperparameter optimization for a hybrid collaborative and content-based recommender :param recommender_class: Class of the recommender object to optimize, it must be a BaseRecommender type :param URM_train: Sparse matrix containing the URM training data :param ICM_object: Sparse matrix containing the ICM training data :param ICM_name: String containing the name of the ICM, will be used for the name of the output files :param URM_train_last_test: Sparse matrix containing the union of URM training and validation data to be used in the last evaluation :param n_cases: Number of hyperparameter sets to explore :param n_random_starts: Number of the initial random hyperparameter values to explore, usually set at 30% of n_cases :param resume_from_saved: Boolean value, if True the optimization is resumed from the saved files, if False a new one is done :param save_model: ["no", "best", "last"] which of the models to save, see ParameterTuning/SearchAbstractClass for details :param evaluate_on_test: ["all", "best", "last", "no"] when to evaluate the model on the test data, see ParameterTuning/SearchAbstractClass for details :param evaluator_validation: Evaluator object to be used for the validation of each hyperparameter set :param evaluator_test: Evaluator object to be used for the test results, the output will only be saved but not used :param metric_to_optimize: String with the name of the metric to be optimized as contained in the output of the evaluator objects :param output_folder_path: Folder in which to save the output files :param parallelizeKNN: Boolean value, if True the various heuristics of the KNNs will be computed in parallel, if False sequentially :param allow_weighting: Boolean value, if True it enables the use of TF-IDF and BM25 to weight features, users and items in KNNs :param similarity_type_list: List of strings with the similarity heuristics to be used for the KNNs """ # If directory does not exist, create if not os.path.exists(output_folder_path): os.makedirs(output_folder_path) URM_train = URM_train.copy() ICM_object = ICM_object.copy() if URM_train_last_test is not None: URM_train_last_test = URM_train_last_test.copy() ########################################################################################################## output_file_name_root = recommender_class.RECOMMENDER_NAME + "_{}".format(ICM_name) parameterSearch = SearchBayesianSkopt(recommender_class, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) if recommender_class in [ItemKNN_CFCBF_Hybrid_Recommender, UserKNN_CFCBF_Hybrid_Recommender]: if similarity_type_list is None: similarity_type_list = ['cosine', 'jaccard', "asymmetric", "dice", "tversky"] hyperparameters_range_dictionary = {} if recommender_class is ItemKNN_CFCBF_Hybrid_Recommender: hyperparameters_range_dictionary["ICM_weight"] = Real(low = 1e-2, high = 1e2, prior = 'log-uniform') elif recommender_class is UserKNN_CFCBF_Hybrid_Recommender: hyperparameters_range_dictionary["UCM_weight"] = Real(low = 1e-2, high = 1e2, prior = 'log-uniform') recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS = [URM_train, ICM_object], CONSTRUCTOR_KEYWORD_ARGS = {}, FIT_POSITIONAL_ARGS = [], FIT_KEYWORD_ARGS = {} ) if URM_train_last_test is not None: recommender_input_args_last_test = recommender_input_args.copy() recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[0] = URM_train_last_test else: recommender_input_args_last_test = None run_KNNCFRecommender_on_similarity_type_partial = partial(run_KNNRecommender_on_similarity_type, parameter_search_space = hyperparameters_range_dictionary, recommender_input_args = recommender_input_args, parameterSearch = parameterSearch, resume_from_saved = resume_from_saved, save_model = save_model, evaluate_on_test = evaluate_on_test, n_cases = n_cases, n_random_starts = n_random_starts, output_folder_path = output_folder_path, output_file_name_root = output_file_name_root, metric_to_optimize = metric_to_optimize, allow_weighting = allow_weighting, recommender_input_args_last_test = recommender_input_args_last_test) if parallelizeKNN: pool = multiprocessing.Pool(processes=multiprocessing.cpu_count(), maxtasksperchild=1) resultList = pool.map(run_KNNCFRecommender_on_similarity_type_partial, similarity_type_list) pool.close() pool.join() else: for similarity_type in similarity_type_list: run_KNNCFRecommender_on_similarity_type_partial(similarity_type) return
def runParameterSearch_Collaborative(recommender_class, URM_train, URM_train_last_test = None, n_cases = 35, n_random_starts = 5, resume_from_saved = False, save_model = "best", evaluate_on_test = "best", evaluator_validation = None, evaluator_test = None, evaluator_validation_earlystopping = None, metric_to_optimize = "PRECISION", output_folder_path ="result_experiments/", parallelizeKNN = True, allow_weighting = True,similarity_type_list = None): """ This function performs the hyperparameter optimization for a collaborative recommender :param recommender_class: Class of the recommender object to optimize, it must be a BaseRecommender type :param URM_train: Sparse matrix containing the URM training data :param URM_train_last_test: Sparse matrix containing the union of URM training and validation data to be used in the last evaluation :param n_cases: Number of hyperparameter sets to explore :param n_random_starts: Number of the initial random hyperparameter values to explore, usually set at 30% of n_cases :param resume_from_saved: Boolean value, if True the optimization is resumed from the saved files, if False a new one is done :param save_model: ["no", "best", "last"] which of the models to save, see ParameterTuning/SearchAbstractClass for details :param evaluate_on_test: ["all", "best", "last", "no"] when to evaluate the model on the test data, see ParameterTuning/SearchAbstractClass for details :param evaluator_validation: Evaluator object to be used for the validation of each hyperparameter set :param evaluator_validation_earlystopping: Evaluator object to be used for the earlystopping of ML algorithms, can be the same of evaluator_validation :param evaluator_test: Evaluator object to be used for the test results, the output will only be saved but not used :param metric_to_optimize: String with the name of the metric to be optimized as contained in the output of the evaluator objects :param output_folder_path: Folder in which to save the output files :param parallelizeKNN: Boolean value, if True the various heuristics of the KNNs will be computed in parallel, if False sequentially :param allow_weighting: Boolean value, if True it enables the use of TF-IDF and BM25 to weight features, users and items in KNNs :param similarity_type_list: List of strings with the similarity heuristics to be used for the KNNs """ # If directory does not exist, create if not os.path.exists(output_folder_path): os.makedirs(output_folder_path) earlystopping_keywargs = {"validation_every_n": 5, "stop_on_validation": True, "evaluator_object": evaluator_validation_earlystopping, "lower_validations_allowed": 5, "validation_metric": metric_to_optimize, } URM_train = URM_train.copy() if URM_train_last_test is not None: URM_train_last_test = URM_train_last_test.copy() try: output_file_name_root = recommender_class.RECOMMENDER_NAME parameterSearch = SearchBayesianSkopt(recommender_class, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) if recommender_class in [TopPop, GlobalEffects, Random]: """ TopPop, GlobalEffects and Random have no parameters therefore only one evaluation is needed """ parameterSearch = SearchSingleCase(recommender_class, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS = [URM_train], CONSTRUCTOR_KEYWORD_ARGS = {}, FIT_POSITIONAL_ARGS = [], FIT_KEYWORD_ARGS = {} ) if URM_train_last_test is not None: recommender_input_args_last_test = recommender_input_args.copy() recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[0] = URM_train_last_test else: recommender_input_args_last_test = None parameterSearch.search(recommender_input_args, recommender_input_args_last_test = recommender_input_args_last_test, fit_hyperparameters_values={}, output_folder_path = output_folder_path, output_file_name_root = output_file_name_root, resume_from_saved = resume_from_saved, save_model = save_model, evaluate_on_test = evaluate_on_test, ) return ########################################################################################################## if recommender_class in [ItemKNNCFRecommender, UserKNNCFRecommender]: if similarity_type_list is None: similarity_type_list = ['cosine', 'jaccard', "asymmetric", "dice", "tversky"] recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS = [URM_train], CONSTRUCTOR_KEYWORD_ARGS = {}, FIT_POSITIONAL_ARGS = [], FIT_KEYWORD_ARGS = {} ) if URM_train_last_test is not None: recommender_input_args_last_test = recommender_input_args.copy() recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[0] = URM_train_last_test else: recommender_input_args_last_test = None run_KNNCFRecommender_on_similarity_type_partial = partial(run_KNNRecommender_on_similarity_type, recommender_input_args = recommender_input_args, parameter_search_space = {}, parameterSearch = parameterSearch, n_cases = n_cases, n_random_starts = n_random_starts, resume_from_saved = resume_from_saved, save_model = save_model, evaluate_on_test = evaluate_on_test, output_folder_path = output_folder_path, output_file_name_root = output_file_name_root, metric_to_optimize = metric_to_optimize, allow_weighting = allow_weighting, recommender_input_args_last_test = recommender_input_args_last_test) if parallelizeKNN: pool = multiprocessing.Pool(processes=multiprocessing.cpu_count(), maxtasksperchild=1) pool.map(run_KNNCFRecommender_on_similarity_type_partial, similarity_type_list) pool.close() pool.join() else: for similarity_type in similarity_type_list: run_KNNCFRecommender_on_similarity_type_partial(similarity_type) return ########################################################################################################## if recommender_class is P3alphaRecommender: hyperparameters_range_dictionary = { "topK": Integer(5, 1000), "alpha": Real(low = 0, high = 2, prior = 'uniform'), "normalize_similarity": Categorical([True, False]), } recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS = [URM_train], CONSTRUCTOR_KEYWORD_ARGS = {}, FIT_POSITIONAL_ARGS = [], FIT_KEYWORD_ARGS = {} ) ########################################################################################################## if recommender_class is RP3betaRecommender: hyperparameters_range_dictionary = { "topK": Integer(5, 1000), "alpha": Real(low = 0, high = 2, prior = 'uniform'), "beta": Real(low = 0, high = 2, prior = 'uniform'), "normalize_similarity": Categorical([True, False]), } recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS = [URM_train], CONSTRUCTOR_KEYWORD_ARGS = {}, FIT_POSITIONAL_ARGS = [], FIT_KEYWORD_ARGS = {} ) ########################################################################################################## if recommender_class is MatrixFactorization_FunkSVD_Cython: hyperparameters_range_dictionary = { "sgd_mode": Categorical(["sgd", "adagrad", "adam"]), "epochs": Categorical([500]), "use_bias": Categorical([True, False]), "batch_size": Categorical([1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024]), "num_factors": Integer(1, 200), "item_reg": Real(low = 1e-5, high = 1e-2, prior = 'log-uniform'), "user_reg": Real(low = 1e-5, high = 1e-2, prior = 'log-uniform'), "learning_rate": Real(low = 1e-4, high = 1e-1, prior = 'log-uniform'), "negative_interactions_quota": Real(low = 0.0, high = 0.5, prior = 'uniform'), } recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS = [URM_train], CONSTRUCTOR_KEYWORD_ARGS = {}, FIT_POSITIONAL_ARGS = [], FIT_KEYWORD_ARGS = earlystopping_keywargs ) ########################################################################################################## if recommender_class is MatrixFactorization_AsySVD_Cython: hyperparameters_range_dictionary = { "sgd_mode": Categorical(["sgd", "adagrad", "adam"]), "epochs": Categorical([500]), "use_bias": Categorical([True, False]), "batch_size": Categorical([1]), "num_factors": Integer(1, 200), "item_reg": Real(low = 1e-5, high = 1e-2, prior = 'log-uniform'), "user_reg": Real(low = 1e-5, high = 1e-2, prior = 'log-uniform'), "learning_rate": Real(low = 1e-4, high = 1e-1, prior = 'log-uniform'), "negative_interactions_quota": Real(low = 0.0, high = 0.5, prior = 'uniform'), } recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS = [URM_train], CONSTRUCTOR_KEYWORD_ARGS = {}, FIT_POSITIONAL_ARGS = [], FIT_KEYWORD_ARGS = earlystopping_keywargs ) ########################################################################################################## if recommender_class is MatrixFactorization_BPR_Cython: hyperparameters_range_dictionary = { "sgd_mode": Categorical(["sgd", "adagrad", "adam"]), "epochs": Categorical([1500]), "num_factors": Integer(1, 200), "batch_size": Categorical([1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024]), "positive_reg": Real(low = 1e-5, high = 1e-2, prior = 'log-uniform'), "negative_reg": Real(low = 1e-5, high = 1e-2, prior = 'log-uniform'), "learning_rate": Real(low = 1e-4, high = 1e-1, prior = 'log-uniform'), } recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS = [URM_train], CONSTRUCTOR_KEYWORD_ARGS = {}, FIT_POSITIONAL_ARGS = [], FIT_KEYWORD_ARGS = {**earlystopping_keywargs, "positive_threshold_BPR": None} ) ########################################################################################################## if recommender_class is IALSRecommender: hyperparameters_range_dictionary = { "num_factors": Integer(1, 200), "epochs": Categorical([300]), "confidence_scaling": Categorical(["linear", "log"]), "alpha": Real(low = 1e-3, high = 50.0, prior = 'log-uniform'), "epsilon": Real(low = 1e-3, high = 10.0, prior = 'log-uniform'), "reg": Real(low = 1e-5, high = 1e-2, prior = 'log-uniform'), } recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS = [URM_train], CONSTRUCTOR_KEYWORD_ARGS = {}, FIT_POSITIONAL_ARGS = [], FIT_KEYWORD_ARGS = earlystopping_keywargs ) ########################################################################################################## if recommender_class is PureSVDRecommender: hyperparameters_range_dictionary = { "num_factors": Integer(1, 350), } recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS = [URM_train], CONSTRUCTOR_KEYWORD_ARGS = {}, FIT_POSITIONAL_ARGS = [], FIT_KEYWORD_ARGS = {} ) ########################################################################################################## if recommender_class is NMFRecommender: hyperparameters_range_dictionary = { "num_factors": Integer(1, 350), "solver": Categorical(["coordinate_descent", "multiplicative_update"]), "init_type": Categorical(["random", "nndsvda"]), "beta_loss": Categorical(["frobenius", "kullback-leibler"]), } recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS = [URM_train], CONSTRUCTOR_KEYWORD_ARGS = {}, FIT_POSITIONAL_ARGS = [], FIT_KEYWORD_ARGS = {} ) ######################################################################################################### if recommender_class is SLIM_BPR_Cython: hyperparameters_range_dictionary = { "topK": Integer(5, 1000), "epochs": Categorical([1500]), "symmetric": Categorical([True, False]), "sgd_mode": Categorical(["sgd", "adagrad", "adam"]), "lambda_i": Real(low = 1e-5, high = 1e-2, prior = 'log-uniform'), "lambda_j": Real(low = 1e-5, high = 1e-2, prior = 'log-uniform'), "learning_rate": Real(low = 1e-4, high = 1e-1, prior = 'log-uniform'), } recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS = [URM_train], CONSTRUCTOR_KEYWORD_ARGS = {}, FIT_POSITIONAL_ARGS = [], FIT_KEYWORD_ARGS = {**earlystopping_keywargs, "positive_threshold_BPR": None, 'train_with_sparse_weights': None} ) ########################################################################################################## if recommender_class is SLIMElasticNetRecommender: hyperparameters_range_dictionary = { "topK": Integer(5, 1000), "l1_ratio": Real(low = 1e-5, high = 1.0, prior = 'log-uniform'), "alpha": Real(low = 1e-3, high = 1.0, prior = 'uniform'), } recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS = [URM_train], CONSTRUCTOR_KEYWORD_ARGS = {}, FIT_POSITIONAL_ARGS = [], FIT_KEYWORD_ARGS = {} ) ######################################################################################################### if recommender_class is EASE_R_Recommender: hyperparameters_range_dictionary = { "topK": Categorical([None]), "normalize_matrix": Categorical([False]), "l2_norm": Real(low = 1e0, high = 1e7, prior = 'log-uniform'), } recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS = [URM_train], CONSTRUCTOR_KEYWORD_ARGS = {}, FIT_POSITIONAL_ARGS = [], FIT_KEYWORD_ARGS = {} ) ######################################################################################################### if URM_train_last_test is not None: recommender_input_args_last_test = recommender_input_args.copy() recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[0] = URM_train_last_test else: recommender_input_args_last_test = None ## Final step, after the hyperparameter range has been defined for each type of algorithm parameterSearch.search(recommender_input_args, parameter_search_space = hyperparameters_range_dictionary, n_cases = n_cases, n_random_starts = n_random_starts, resume_from_saved = resume_from_saved, save_model = save_model, evaluate_on_test = evaluate_on_test, output_folder_path = output_folder_path, output_file_name_root = output_file_name_root, metric_to_optimize = metric_to_optimize, recommender_input_args_last_test = recommender_input_args_last_test) except Exception as e: print("On recommender {} Exception {}".format(recommender_class, str(e))) traceback.print_exc() error_file = open(output_folder_path + "ErrorLog.txt", "a") error_file.write("On recommender {} Exception {}\n".format(recommender_class, str(e))) error_file.close()
def run_matrix_factorization_bpr_cython( parameter_search: SearchBayesianSkopt, URM_train: csr_matrix, URM_train_last_test: csr_matrix, URM_impressions: csr_matrix, num_cases: int, num_random_starts: int, resume_from_saved: bool, save_model: str, output_folder_path: str, output_file_name_root: str, metric_to_optimize: str, early_stopping_kwargs: dict): impression_sampling_mode_list = [ "none", "inside", "outside", ] hyper_parameters_range_dictionary = { "sgd_mode": Categorical(["sgd", "adagrad", "adam"]), "epochs": Categorical([1500]), "num_factors": Integer(1, 200), "batch_size": Categorical([1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024]), "positive_reg": Real(low=1e-5, high=1e-2, prior='log-uniform'), "negative_reg": Real(low=1e-5, high=1e-2, prior='log-uniform'), "learning_rate": Real(low=1e-4, high=1e-1, prior='log-uniform') } for impression_sampling_mode in impression_sampling_mode_list: if impression_sampling_mode == "none": fit_kwargs = { **early_stopping_kwargs, "positive_threshold_BPR": None } else: output_file_name_root = f"{output_file_name_root}_{impression_sampling_mode}" fit_kwargs = { **early_stopping_kwargs, "impression_sampling_mode": impression_sampling_mode, "positive_threshold_BPR": None } recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={"URM_impressions": URM_impressions}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS=fit_kwargs) if URM_train_last_test is not None: recommender_input_args_last_test = recommender_input_args.copy() recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[ 0] = URM_train_last_test else: recommender_input_args_last_test = None parameter_search.search( recommender_input_args, parameter_search_space=hyper_parameters_range_dictionary, n_cases=num_cases, n_random_starts=num_random_starts, resume_from_saved=resume_from_saved, save_model=save_model, output_folder_path=output_folder_path, output_file_name_root=output_file_name_root, metric_to_optimize=metric_to_optimize, recommender_input_args_last_test=recommender_input_args_last_test)