def run_parameter_search_p3alpha_wsparse(recommender_class, URM_train, item_W_sparse, user_W_sparse, evaluator_validation, metric_to_optimize="MAP", n_cases=60, n_random_starts=20, output_folder_path="result_experiments/"): parameterSearch = SearchBayesianSkopt(recommender_class, evaluator_validation=evaluator_validation) hyperparameters_range_dictionary = {"topK": Integer(5, 1000), "alpha": Real(low=0, high=2, prior='uniform'), "normalize_similarity": Categorical([True, False])} recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train, user_W_sparse, item_W_sparse], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={} ) # If directory does not exist, create if not os.path.exists(output_folder_path): os.makedirs(output_folder_path) parameterSearch.search(recommender_input_args, recommender_input_args_last_test=None, parameter_search_space=hyperparameters_range_dictionary, n_cases=n_cases, n_random_starts=n_random_starts, save_model="no", output_folder_path=output_folder_path, output_file_name_root=recommender_class.RECOMMENDER_NAME, metric_to_optimize=metric_to_optimize )
def run_parameter_search(URM_train, ICM_all, W_sparse_CF, evaluator_test, metric_to_optimize="MAP", n_cases=10, n_random_starts=3, output_folder_path="result_experiments/"): recommender_class = CFW_D_Similarity_Linalg parameterSearch = SearchBayesianSkopt(recommender_class, evaluator_validation=evaluator_test) hyperparameters_range_dictionary = { "topK": Integer(1, 2000), "add_zeros_quota": Real(low=0, high=0.1, prior='uniform'), "normalize_similarity": Categorical([True, False]) } recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train, ICM_all, W_sparse_CF], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) # If directory does not exist, create if not os.path.exists(output_folder_path): os.makedirs(output_folder_path) # Clone data structure to perform the fitting with the best hyper parameters on train + validation data recommender_input_args_last_test = recommender_input_args.copy() recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[0] = URM_train parameterSearch.search( recommender_input_args, recommender_input_args_last_test=recommender_input_args_last_test, parameter_search_space=hyperparameters_range_dictionary, n_cases=n_cases, n_random_starts=n_random_starts, save_model="no", output_folder_path=output_folder_path, output_file_name_root=recommender_class.RECOMMENDER_NAME, metric_to_optimize=metric_to_optimize)
def run_parameter_search_field_ICM_weight( URM_train, ICM_train, base_recommender_class, base_recommender_parameter, item_feature_to_range_mapper, output_folder_path="result_experiments/", evaluator_validation=None, evaluator_test=None, n_cases=35, n_random_starts=5, metric_to_optimize="MAP"): recommender_class = SearchFieldWeightICMRecommender if not os.path.exists(output_folder_path): os.makedirs(output_folder_path) output_file_name_root = recommender_class.RECOMMENDER_NAME hyperparameters_range_dictionary = {} for user_feature_name in item_feature_to_range_mapper.keys(): hyperparameters_range_dictionary[user_feature_name] = Real( low=0, high=2, prior="uniform") # Set args for recommender recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[ URM_train, ICM_train, base_recommender_class, base_recommender_parameter, item_feature_to_range_mapper ], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) parameterSearch = SearchBayesianSkopt( recommender_class, evaluator_validation=evaluator_validation) parameterSearch.search( recommender_input_args, parameter_search_space=hyperparameters_range_dictionary, n_cases=n_cases, n_random_starts=n_random_starts, output_folder_path=output_folder_path, output_file_name_root=output_file_name_root, metric_to_optimize=metric_to_optimize, save_model="no")
def run_parameter_search_mixed_similarity_item( recommender_object: HybridMixedSimilarityRecommender, URM_train, output_folder_path="result_experiments/", evaluator_validation=None, evaluator_test=None, n_cases=35, n_random_starts=5, metric_to_optimize="MAP"): print("Start search") if not os.path.exists(output_folder_path): os.makedirs(output_folder_path) output_file_name_root = recommender_object.RECOMMENDER_NAME hyperparameters_range_dictionary = { "topK": Integer(1, 2000), "alpha1": Real(0, 1), "alpha2": Real(0, 1), "alpha3": Real(0, 1), "alpha4": Real(0, 1) } # Set args for recommender recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) parameterSearch = SearchBayesianSkoptObject( recommender_object, evaluator_validation=evaluator_validation) parameterSearch.search( recommender_input_args, parameter_search_space=hyperparameters_range_dictionary, n_cases=n_cases, n_random_starts=n_random_starts, output_folder_path=output_folder_path, output_file_name_root=output_file_name_root, metric_to_optimize=metric_to_optimize, save_model="no")
def run_parameter_search_bagging(recommender_class, URM_train, constructor_kwargs, fit_kwargs, URM_train_last_test=None, n_cases=30, n_random_starts=5, resume_from_saved=False, save_model="no", evaluator_validation=None, evaluator_test=None, metric_to_optimize="PRECISION", output_folder_path="result_experiments/", parallelizeKNN=False): # If directory does not exist, create if not os.path.exists(output_folder_path): os.makedirs(output_folder_path) output_file_name_root = recommender_class.RECOMMENDER_NAME + "_" + \ constructor_kwargs['recommender_class'].RECOMMENDER_NAME parameterSearch = SearchBayesianSkopt(recommender_class, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS=constructor_kwargs, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS=fit_kwargs ) hyperparameters_range = {} hyperparameters_range['num_models'] = Integer(10, 100) if recommender_class in [BaggingMergeItemSimilarityRecommender, BaggingMergeUserSimilarityRecommender]: hyperparameters_range['topK'] = Integer(low=1, high=3000) parameterSearch.search(recommender_input_args, parameter_search_space=hyperparameters_range, n_cases=n_cases, n_random_starts=n_random_starts, output_folder_path=output_folder_path, output_file_name_root=output_file_name_root, metric_to_optimize=metric_to_optimize, save_model=save_model, resume_from_saved=resume_from_saved)
def runParameterSearch_Collaborative(recommender_class, URM_train, URM_train_last_test=None, metric_to_optimize="PRECISION", evaluator_validation=None, evaluator_test=None, evaluator_validation_earlystopping=None, output_folder_path="result_experiments/", parallelizeKNN=True, n_cases=35, n_random_starts=5, resume_from_saved=False, save_model="best", allow_weighting=True, similarity_type_list=None): # If directory does not exist, create if not os.path.exists(output_folder_path): os.makedirs(output_folder_path) earlystopping_keywargs = { "validation_every_n": 5, "stop_on_validation": True, "evaluator_object": evaluator_validation_earlystopping, "lower_validations_allowed": 5, "validation_metric": metric_to_optimize, } URM_train = URM_train.copy() if URM_train_last_test is not None: URM_train_last_test = URM_train_last_test.copy() try: output_file_name_root = recommender_class.RECOMMENDER_NAME parameterSearch = SearchBayesianSkopt( recommender_class, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) if recommender_class in [TopPop, GlobalEffects, Random]: """ TopPop, GlobalEffects and Random have no parameters therefore only one evaluation is needed """ parameterSearch = SearchSingleCase( recommender_class, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) if URM_train_last_test is not None: recommender_input_args_last_test = recommender_input_args.copy( ) recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[ 0] = URM_train_last_test else: recommender_input_args_last_test = None parameterSearch.search( recommender_input_args, recommender_input_args_last_test= recommender_input_args_last_test, fit_hyperparameters_values={}, output_folder_path=output_folder_path, output_file_name_root=output_file_name_root, resume_from_saved=resume_from_saved, save_model=save_model, ) return ########################################################################################################## if recommender_class in [ItemKNNCFRecommender, UserKNNCFRecommender]: if similarity_type_list is None: similarity_type_list = [ 'cosine', 'jaccard', "asymmetric", "dice", "tversky" ] recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) if URM_train_last_test is not None: recommender_input_args_last_test = recommender_input_args.copy( ) recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[ 0] = URM_train_last_test else: recommender_input_args_last_test = None run_KNNCFRecommender_on_similarity_type_partial = partial( run_KNNRecommender_on_similarity_type, recommender_input_args=recommender_input_args, parameter_search_space={}, parameterSearch=parameterSearch, n_cases=n_cases, n_random_starts=n_random_starts, resume_from_saved=resume_from_saved, save_model=save_model, output_folder_path=output_folder_path, output_file_name_root=output_file_name_root, metric_to_optimize=metric_to_optimize, allow_weighting=allow_weighting, recommender_input_args_last_test= recommender_input_args_last_test) if parallelizeKNN: pool = multiprocessing.Pool( processes=multiprocessing.cpu_count(), maxtasksperchild=1) pool.map(run_KNNCFRecommender_on_similarity_type_partial, similarity_type_list) pool.close() pool.join() else: for similarity_type in similarity_type_list: run_KNNCFRecommender_on_similarity_type_partial( similarity_type) return ########################################################################################################## if recommender_class is P3alphaRecommender: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["topK"] = Integer(5, 1000) hyperparameters_range_dictionary["alpha"] = Real(low=0, high=2, prior='uniform') hyperparameters_range_dictionary[ "normalize_similarity"] = Categorical([True, False]) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) ########################################################################################################## if recommender_class is RP3betaRecommender: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["topK"] = Integer(5, 1000) hyperparameters_range_dictionary["alpha"] = Real(low=0, high=2, prior='uniform') hyperparameters_range_dictionary["beta"] = Real(low=0, high=2, prior='uniform') hyperparameters_range_dictionary[ "normalize_similarity"] = Categorical([True, False]) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) ########################################################################################################## if recommender_class is MatrixFactorization_FunkSVD_Cython: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["sgd_mode"] = Categorical( ["sgd", "adagrad", "adam"]) hyperparameters_range_dictionary["epochs"] = Categorical([500]) hyperparameters_range_dictionary["use_bias"] = Categorical( [True, False]) hyperparameters_range_dictionary["batch_size"] = Categorical( [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024]) hyperparameters_range_dictionary["num_factors"] = Categorical( [200]) hyperparameters_range_dictionary["item_reg"] = Real( low=1e-5, high=1e-2, prior='log-uniform') hyperparameters_range_dictionary["user_reg"] = Real( low=1e-5, high=1e-2, prior='log-uniform') hyperparameters_range_dictionary["learning_rate"] = Real( low=1e-4, high=1e-1, prior='log-uniform') hyperparameters_range_dictionary[ "negative_interactions_quota"] = Real(low=0.0, high=0.5, prior='uniform') recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS=earlystopping_keywargs) ########################################################################################################## if recommender_class is MatrixFactorization_AsySVD_Cython: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["sgd_mode"] = Categorical( ["sgd", "adagrad", "adam"]) hyperparameters_range_dictionary["epochs"] = Categorical([500]) hyperparameters_range_dictionary["use_bias"] = Categorical( [True, False]) hyperparameters_range_dictionary["batch_size"] = Categorical([1]) hyperparameters_range_dictionary["num_factors"] = Integer(1, 200) hyperparameters_range_dictionary["item_reg"] = Real( low=1e-5, high=1e-2, prior='log-uniform') hyperparameters_range_dictionary["user_reg"] = Real( low=1e-5, high=1e-2, prior='log-uniform') hyperparameters_range_dictionary["learning_rate"] = Real( low=1e-4, high=1e-1, prior='log-uniform') hyperparameters_range_dictionary[ "negative_interactions_quota"] = Real(low=0.0, high=0.5, prior='uniform') recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS=earlystopping_keywargs) ########################################################################################################## if recommender_class is MatrixFactorization_BPR_Cython: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["sgd_mode"] = Categorical( ["sgd", "adagrad", "adam"]) hyperparameters_range_dictionary["epochs"] = Categorical([1500]) hyperparameters_range_dictionary["num_factors"] = Integer(1, 200) hyperparameters_range_dictionary["batch_size"] = Categorical( [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024]) hyperparameters_range_dictionary["positive_reg"] = Real( low=1e-5, high=1e-2, prior='log-uniform') hyperparameters_range_dictionary["negative_reg"] = Real( low=1e-5, high=1e-2, prior='log-uniform') hyperparameters_range_dictionary["learning_rate"] = Real( low=1e-4, high=1e-1, prior='log-uniform') recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={ **earlystopping_keywargs, "positive_threshold_BPR": None }) ########################################################################################################## if recommender_class is IALSRecommender: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["num_factors"] = Integer(1, 200) hyperparameters_range_dictionary[ "confidence_scaling"] = Categorical(["linear", "log"]) hyperparameters_range_dictionary["alpha"] = Real( low=1e-3, high=50.0, prior='log-uniform') hyperparameters_range_dictionary["epsilon"] = Real( low=1e-3, high=10.0, prior='log-uniform') hyperparameters_range_dictionary["reg"] = Real(low=1e-5, high=1e-2, prior='log-uniform') recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS=earlystopping_keywargs) ########################################################################################################## if recommender_class is PureSVDRecommender: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["num_factors"] = Integer(1, 1000) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) ########################################################################################################## if recommender_class is NMFRecommender: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["num_factors"] = Integer(1, 350) hyperparameters_range_dictionary["solver"] = Categorical( ["coordinate_descent", "multiplicative_update"]) hyperparameters_range_dictionary["init_type"] = Categorical( ["random", "nndsvda"]) hyperparameters_range_dictionary["beta_loss"] = Categorical( ["frobenius", "kullback-leibler"]) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) ######################################################################################################### if recommender_class is SLIM_BPR_Cython: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["topK"] = Integer(5, 2000) hyperparameters_range_dictionary["epochs"] = Categorical( [1200, 1500, 1700]) hyperparameters_range_dictionary["symmetric"] = Categorical( [True, False]) hyperparameters_range_dictionary["sgd_mode"] = Categorical( ["adagrad", "adam"]) hyperparameters_range_dictionary["lambda_i"] = Real( low=1e-7, high=1e1, prior='log-uniform') hyperparameters_range_dictionary["lambda_j"] = Real( low=1e-7, high=1e1, prior='log-uniform') hyperparameters_range_dictionary["learning_rate"] = Real( low=1e-6, high=1e-3, prior='log-uniform') recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={ **earlystopping_keywargs, "positive_threshold_BPR": None, 'train_with_sparse_weights': None }) ########################################################################################################## if recommender_class is SLIMElasticNetRecommender: hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["topK"] = Integer(5, 1000) hyperparameters_range_dictionary["l1_ratio"] = Real( low=1e-5, high=1.0, prior='log-uniform') hyperparameters_range_dictionary["alpha"] = Real(low=1e-3, high=1.0, prior='uniform') recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) ######################################################################################################### if URM_train_last_test is not None: recommender_input_args_last_test = recommender_input_args.copy() recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[ 0] = URM_train_last_test else: recommender_input_args_last_test = None ## Final step, after the hyperparameter range has been defined for each type of algorithm parameterSearch.search( recommender_input_args, parameter_search_space=hyperparameters_range_dictionary, n_cases=n_cases, n_random_starts=n_random_starts, resume_from_saved=resume_from_saved, save_model=save_model, output_folder_path=output_folder_path, output_file_name_root=output_file_name_root, metric_to_optimize=metric_to_optimize, recommender_input_args_last_test=recommender_input_args_last_test) except Exception as e: print("On recommender {} Exception {}".format(recommender_class, str(e))) traceback.print_exc() error_file = open(output_folder_path + "ErrorLog.txt", "a") error_file.write("On recommender {} Exception {}\n".format( recommender_class, str(e))) error_file.close()
def runParameterSearch_Content(recommender_class, URM_train, ICM_object, ICM_name, URM_train_last_test=None, n_cases=30, n_random_starts=5, resume_from_saved=False, save_model="best", evaluator_validation=None, evaluator_test=None, metric_to_optimize="PRECISION", output_folder_path="result_experiments/", parallelizeKNN=False, allow_weighting=True, similarity_type_list=None): # If directory does not exist, create if not os.path.exists(output_folder_path): os.makedirs(output_folder_path) URM_train = URM_train.copy() ICM_object = ICM_object.copy() if URM_train_last_test is not None: URM_train_last_test = URM_train_last_test.copy() ########################################################################################################## output_file_name_root = recommender_class.RECOMMENDER_NAME + "_{}".format( ICM_name) parameterSearch = SearchBayesianSkopt( recommender_class, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) if similarity_type_list is None: similarity_type_list = [ 'cosine', 'jaccard', "asymmetric", "dice", "tversky" ] recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train, ICM_object], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) if URM_train_last_test is not None: recommender_input_args_last_test = recommender_input_args.copy() recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[ 0] = URM_train_last_test else: recommender_input_args_last_test = None run_KNNCBFRecommender_on_similarity_type_partial = partial( run_KNNRecommender_on_similarity_type, recommender_input_args=recommender_input_args, parameter_search_space={}, parameterSearch=parameterSearch, n_cases=n_cases, n_random_starts=n_random_starts, resume_from_saved=resume_from_saved, save_model=save_model, output_folder_path=output_folder_path, output_file_name_root=output_file_name_root, metric_to_optimize=metric_to_optimize, allow_weighting=allow_weighting, recommender_input_args_last_test=recommender_input_args_last_test) if parallelizeKNN: pool = multiprocessing.Pool(processes=int(multiprocessing.cpu_count()), maxtasksperchild=1) pool.map(run_KNNCBFRecommender_on_similarity_type_partial, similarity_type_list) pool.close() pool.join() else: for similarity_type in similarity_type_list: run_KNNCBFRecommender_on_similarity_type_partial(similarity_type)
def run_parameter_search_hybrid(recommender_object: AbstractHybridRecommender, metric_to_optimize="PRECISION", evaluator_validation=None, output_folder_path="result_experiments/", parallelizeKNN=True, n_cases=35, n_random_starts=5): # Create folder if it does not exist if not os.path.exists(output_folder_path): os.makedirs(output_folder_path) output_file_name_root = recommender_object.RECOMMENDER_NAME # Set hyperparameters hyperparameters_range_dictionary = {} for model_name in recommender_object.get_recommender_names(): hyperparameters_range_dictionary[model_name] = Real(0, 1) # Set args for recommender recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) if recommender_object.RECOMMENDER_NAME == "HybridRankBasedRecommender": strategies = HybridRankBasedRecommender.get_possible_strategies() run_hybrid_rank_based_rs_on_strategy_partial = partial( run_hybrid_rank_based_rs_on_strategy, recommender_input_args=recommender_input_args, parameter_search_space=hyperparameters_range_dictionary, recommender_object=recommender_object, evaluator_validation=evaluator_validation, n_cases=n_cases, n_random_starts=n_random_starts, output_folder_path=output_folder_path, output_file_name_root=output_file_name_root, metric_to_optimize=metric_to_optimize) if parallelizeKNN: pool = multiprocessing.Pool(processes=multiprocessing.cpu_count(), maxtasksperchild=1) pool.map(run_hybrid_rank_based_rs_on_strategy_partial, strategies) pool.close() pool.join() else: for similarity_type in strategies: run_hybrid_rank_based_rs_on_strategy_partial(similarity_type) return if recommender_object.RECOMMENDER_NAME == "HybridRerankingRecommender": strategies = HybridRerankingRecommender.get_possible_strategies() run_hybrid_reranking_on_strategy_partial = partial( run_hybrid_reranking_on_strategy, recommender_input_args=recommender_input_args, parameter_search_space=hyperparameters_range_dictionary, recommender_object=recommender_object, evaluator_validation=evaluator_validation, n_cases=n_cases, n_random_starts=n_random_starts, output_folder_path=output_folder_path, output_file_name_root=output_file_name_root, metric_to_optimize=metric_to_optimize) if parallelizeKNN: pool = multiprocessing.Pool(processes=multiprocessing.cpu_count(), maxtasksperchild=1) pool.map(run_hybrid_reranking_on_strategy_partial, strategies) pool.close() pool.join() else: for similarity_type in strategies: run_hybrid_reranking_on_strategy_partial(similarity_type) return parameter_search = SearchBayesianSkoptObject(recommender_object, evaluator_validation) parameter_search.search( recommender_input_args, parameter_search_space=hyperparameters_range_dictionary, n_cases=n_cases, n_random_starts=n_random_starts, output_folder_path=output_folder_path, output_file_name_root=output_file_name_root, metric_to_optimize=metric_to_optimize, save_model="no")
def run_parameter_search_mf_collaborative( recommender_class, URM_train, UCM_train=None, UCM_name="NO_UCM", ICM_train=None, ICM_name="NO_ICM", URM_train_last_test=None, metric_to_optimize="PRECISION", evaluator_validation=None, evaluator_test=None, evaluator_validation_earlystopping=None, output_folder_path="result_experiments/", parallelize_search=True, n_cases=35, n_random_starts=5, resume_from_saved=False, save_model="best", approximate_recommender=None): # If directory does not exist, create if not os.path.exists(output_folder_path): os.makedirs(output_folder_path) earlystopping_keywargs = { "validation_every_n": 5, "stop_on_validation": True, "evaluator_object": evaluator_validation_earlystopping, "lower_validations_allowed": 5, "validation_metric": metric_to_optimize, } URM_train = URM_train.copy() if URM_train_last_test is not None: URM_train_last_test = URM_train_last_test.copy() try: output_file_name_root = recommender_class.RECOMMENDER_NAME + "_" + ICM_name + "_" + UCM_name parameterSearch = SearchBayesianSkopt( recommender_class, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) hyperparameters_range_dictionary = {} if recommender_class is ImplicitALSRecommender: hyperparameters_range_dictionary["num_factors"] = Integer(300, 550) hyperparameters_range_dictionary["regularization"] = Real( low=1e-2, high=200, prior='log-uniform') hyperparameters_range_dictionary["epochs"] = Categorical([50]) hyperparameters_range_dictionary[ "confidence_scaling"] = Categorical(["linear"]) hyperparameters_range_dictionary["alpha"] = Real( low=1e-2, high=1e2, prior='log-uniform') if recommender_class is MF_BPR_Recommender: hyperparameters_range_dictionary["num_factors"] = Categorical( [600]) hyperparameters_range_dictionary["regularization"] = Real( low=1e-4, high=1e-1, prior='log-uniform') hyperparameters_range_dictionary["learning_rate"] = Real( low=1e-2, high=1e-1, prior='log-uniform') hyperparameters_range_dictionary["epochs"] = Categorical([300]) if recommender_class is FunkSVDRecommender: hyperparameters_range_dictionary["num_factors"] = Integer(50, 400) hyperparameters_range_dictionary["regularization"] = Real( low=1e-8, high=1e-1, prior='log-uniform') hyperparameters_range_dictionary["learning_rate"] = Real( low=1e-6, high=1e-1, prior='log-uniform') hyperparameters_range_dictionary["epochs"] = Categorical([300]) if recommender_class is LogisticMFRecommender: hyperparameters_range_dictionary["num_factors"] = Integer(20, 400) hyperparameters_range_dictionary["regularization"] = Real( low=1e-5, high=1e1, prior='log-uniform') hyperparameters_range_dictionary["learning_rate"] = Real( low=1e-2, high=1e-1, prior='log-uniform') hyperparameters_range_dictionary["epochs"] = Categorical([300]) if recommender_class is LightFMRecommender: recommender_input_args.CONSTRUCTOR_KEYWORD_ARGS[ 'UCM_train'] = UCM_train recommender_input_args.CONSTRUCTOR_KEYWORD_ARGS[ 'ICM_train'] = ICM_train hyperparameters_range_dictionary['no_components'] = Categorical( [100]) hyperparameters_range_dictionary['epochs'] = Categorical([100]) run_light_fm_search(parameterSearch, recommender_input_args, hyperparameters_range_dictionary, URM_train_last_test=URM_train_last_test, parallelize_search=parallelize_search, n_cases=n_cases, n_random_starts=n_random_starts, output_folder_path=output_folder_path, output_file_name_root=output_file_name_root, metric_to_optimize=metric_to_optimize, save_model=save_model) if recommender_class is FieldAwareFMRecommender: if approximate_recommender is None: raise ValueError("approximate_recommender has to be set") root_path = get_project_root_path() train_svm_file_path = os.path.join(root_path, "resources", "fm_data", "URM_ICM_UCM_uncompressed.txt") recommender_input_args.CONSTRUCTOR_KEYWORD_ARGS[ 'train_svm_file_path'] = train_svm_file_path recommender_input_args.CONSTRUCTOR_KEYWORD_ARGS[ 'approximate_recommender'] = approximate_recommender recommender_input_args.CONSTRUCTOR_KEYWORD_ARGS[ 'UCM_train'] = UCM_train recommender_input_args.CONSTRUCTOR_KEYWORD_ARGS[ 'ICM_train'] = ICM_train hyperparameters_range_dictionary['epochs'] = Categorical([200]) hyperparameters_range_dictionary['latent_factors'] = Integer( low=20, high=500) hyperparameters_range_dictionary['regularization'] = Real( low=10e-7, high=10e-1, prior="log-uniform") hyperparameters_range_dictionary['learning_rate'] = Real( low=10e-3, high=10e-1, prior="log-uniform") if URM_train_last_test is not None: recommender_input_args_last_test = recommender_input_args.copy() recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[ 0] = URM_train_last_test else: recommender_input_args_last_test = None ## Final step, after the hyperparameter range has been defined for each type of algorithm parameterSearch.search( recommender_input_args, parameter_search_space=hyperparameters_range_dictionary, n_cases=n_cases, n_random_starts=n_random_starts, resume_from_saved=resume_from_saved, save_model=save_model, output_folder_path=output_folder_path, output_file_name_root=output_file_name_root, metric_to_optimize=metric_to_optimize, recommender_input_args_last_test=recommender_input_args_last_test) except Exception as e: print("On recommender {} Exception {}".format(recommender_class, str(e))) traceback.print_exc() error_file = open(output_folder_path + "ErrorLog.txt", "a") error_file.write("On recommender {} Exception {}\n".format( recommender_class, str(e))) error_file.close()
def run_cv_parameter_search(recommender_class, URM_train_list, ICM_train_list=None, UCM_train_list=None, ICM_name=None, UCM_name=None, metric_to_optimize="MAP", evaluator_validation_list=None, output_folder_path="result_experiments/", parallelize_search=True, n_cases=60, n_random_starts=20, resume_from_saved=False, n_jobs=multiprocessing.cpu_count()): if len(evaluator_validation_list) != len(URM_train_list): raise ValueError( "Number of evaluators does not coincide with the number of URM_train" ) # If directory does not exist, create it if not os.path.exists(output_folder_path): os.makedirs(output_folder_path) output_file_name_root = recommender_class.RECOMMENDER_NAME if ICM_name is not None: output_file_name_root = output_file_name_root + "_{}".format(ICM_name) if UCM_name is not None: output_file_name_root = output_file_name_root + "_{}".format(UCM_name) try: parameter_search = CrossSearchBayesianSkopt( recommender_class, evaluator_validation_list=evaluator_validation_list) # Set recommender_input_args for each fold recommender_input_args_list = [] for i in range(len(URM_train_list)): recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train_list[i]], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) if ICM_train_list is not None: recommender_input_args.CONSTRUCTOR_KEYWORD_ARGS[ "ICM_train"] = ICM_train_list[i] if UCM_train_list is not None: recommender_input_args.CONSTRUCTOR_KEYWORD_ARGS[ "UCM_train"] = UCM_train_list[i] recommender_input_args_list.append(recommender_input_args) # Get hyper parameters range dictionary by recommender_class hyperparameters_range_dictionary = get_hyper_parameters_dictionary( recommender_class) # -------------------- KNN RECOMMENDERS -------------------- # if recommender_class in KNN_RECOMMENDERS: similarity_type_list = [ 'cosine', 'jaccard', "asymmetric", "dice", "tversky" ] run_tuning_on_similarity_type_partial = partial( run_tuning_on_similarity_type, recommender_input_args_list=recommender_input_args_list, hyper_parameters_dictionary=hyperparameters_range_dictionary, recommender_class=recommender_class, parameter_search=parameter_search, n_cases=n_cases, n_random_starts=n_random_starts, resume_from_saved=resume_from_saved, output_folder_path=output_folder_path, output_file_name_root=output_file_name_root, metric_to_optimize=metric_to_optimize, allow_weighting=True) if parallelize_search: pool = multiprocessing.Pool(processes=n_jobs, maxtasksperchild=1) pool.map(run_tuning_on_similarity_type_partial, similarity_type_list) pool.close() pool.join() else: for similarity_type in similarity_type_list: run_tuning_on_similarity_type_partial(similarity_type) return parameter_search.search( recommender_input_args_list, parameter_search_space=hyperparameters_range_dictionary, n_cases=n_cases, n_random_starts=n_random_starts, resume_from_saved=resume_from_saved, output_folder_path=output_folder_path, output_file_name_root=output_file_name_root, metric_to_optimize=metric_to_optimize) except Exception as e: print("On recommender {} Exception {}".format(recommender_class, str(e))) traceback.print_exc() error_file = open(output_folder_path + "ErrorLog.txt", "a") error_file.write("On recommender {} Exception {}\n".format( recommender_class, str(e))) error_file.close()
def run_parameter_search_lightgbm(URM_train, X_train, y_train, X_test, y_test, cutoff_test, categorical_features=None, num_iteration=10000, early_stopping_iteration=150, objective="lambdarank", verbose=True, output_folder_path="result_experiments/", evaluator_validation=None, n_cases=35, n_random_starts=5, metric_to_optimize="MAP"): recommender_class = LightGBMRecommender if not os.path.exists(output_folder_path): os.makedirs(output_folder_path) output_file_name_root = recommender_class.RECOMMENDER_NAME hyperparameters_range_dictionary = {} hyperparameters_range_dictionary["learning_rate"] = Real( low=1e-6, high=1e-1, prior="log-uniform") hyperparameters_range_dictionary["min_gain_to_split"] = Real( low=1e-4, high=1e-1, prior="log-uniform") hyperparameters_range_dictionary["reg_l1"] = Real(low=1e-7, high=1e1, prior="log-uniform") hyperparameters_range_dictionary["reg_l2"] = Real(low=1e-7, high=1e1, prior="log-uniform") hyperparameters_range_dictionary["max_depth"] = Integer(low=4, high=100) hyperparameters_range_dictionary["min_data_in_leaf"] = Integer(low=5, high=100) hyperparameters_range_dictionary["bagging_freq"] = Integer(low=2, high=100) hyperparameters_range_dictionary["num_leaves"] = Integer(low=16, high=400) hyperparameters_range_dictionary["bagging_fraction"] = Real( low=0.1, high=0.9, prior="log-uniform") hyperparameters_range_dictionary["feature_fraction"] = Real( low=0.1, high=0.9, prior="log-uniform") # Set args for recommender recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[ URM_train, X_train, y_train, X_test, y_test, cutoff_test, categorical_features ], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={ "num_iteration": num_iteration, "early_stopping_round": early_stopping_iteration, "verbose": verbose, "objective": objective }) parameterSearch = SearchBayesianSkopt( recommender_class, evaluator_validation=evaluator_validation) parameterSearch.search( recommender_input_args, parameter_search_space=hyperparameters_range_dictionary, n_cases=n_cases, n_random_starts=n_random_starts, output_folder_path=output_folder_path, output_file_name_root=output_file_name_root, metric_to_optimize=metric_to_optimize, save_model="best")
def runParameterSearch_QSLIM(URM_train, solver, n_reads=50, filter_items_n=100, URM_train_last_test=None, metric_to_optimize="MAP", evaluator_validation=None, evaluator_test=None, output_folder_path="result_experiments/", parallelizeKNN=True, n_cases=35, n_random_starts=5, resume_from_saved=False, save_model="best", item_selection_list=None): if not os.path.exists(output_folder_path): os.makedirs(output_folder_path) output_file_name_root = QuantumSLIM_MSE.RECOMMENDER_NAME URM_train = URM_train.copy() if URM_train_last_test is not None: URM_train_last_test = URM_train_last_test.copy() parameterSearch = SearchBayesianSkopt( QuantumSLIM_MSE, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test) if item_selection_list is None: item_selection_list = [ x for x in QuantumSLIM_MSE.get_implemented_filter_item_methods() if x != "NONE" ] recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train, solver, "NORM_MSE"], CONSTRUCTOR_KEYWORD_ARGS={"verbose": False}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={ "filter_items_n": filter_items_n, "num_reads": n_reads }) if URM_train_last_test is not None: recommender_input_args_last_test = recommender_input_args.copy() recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[ 0] = URM_train_last_test else: recommender_input_args_last_test = None run_KNNCFRecommender_on_similarity_type_partial = partial( run_QSLIM_on_item_selection, recommender_input_args=recommender_input_args, parameter_search_space={}, parameterSearch=parameterSearch, n_cases=n_cases, n_random_starts=n_random_starts, resume_from_saved=resume_from_saved, save_model=save_model, output_folder_path=output_folder_path, output_file_name_root=output_file_name_root, metric_to_optimize=metric_to_optimize, recommender_input_args_last_test=recommender_input_args_last_test) if parallelizeKNN: pool = multiprocessing.Pool(processes=multiprocessing.cpu_count(), maxtasksperchild=1) pool.map(run_KNNCFRecommender_on_similarity_type_partial, item_selection_list) pool.close() pool.join() else: for similarity_type in item_selection_list: run_KNNCFRecommender_on_similarity_type_partial(similarity_type) return
def run_cv_parameter_search_hybrid_avg(recommender_object_list, URM_train_list, ICM_train_list=None, UCM_train_list=None, ICM_name=None, UCM_name=None, metric_to_optimize="MAP", evaluator_validation_list=None, output_folder_path="result_experiments/", parallelize_search=True, n_cases=60, n_random_starts=20, resume_from_saved=False, n_jobs=multiprocessing.cpu_count(), map_max=0): if len(evaluator_validation_list) != len(URM_train_list): raise ValueError("Number of evaluators does not coincide with the number of URM_train") # If directory does not exist, create it if not os.path.exists(output_folder_path): os.makedirs(output_folder_path) output_file_name_root = recommender_object_list[0].RECOMMENDER_NAME if ICM_name is not None: output_file_name_root = output_file_name_root + "_{}".format(ICM_name) if UCM_name is not None: output_file_name_root = output_file_name_root + "_{}".format(UCM_name) try: parameter_search = CrossSearchBayesianSkoptObject(recommender_object_list, evaluator_validation_list=evaluator_validation_list) # Set recommender_input_args for each fold recommender_input_args_list = [] for i in range(len(URM_train_list)): recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={} ) recommender_input_args_list.append(recommender_input_args) # Get hyper parameters range dictionary by recommender_class hyperparameters_range_dictionary = {} for model_name in recommender_object_list[0].get_recommender_names(): if map_max == 0: hyperparameters_range_dictionary[model_name] = Real(0, 1) else: if model_name == "ItemAvg": hyperparameters_range_dictionary[model_name] = Integer((map_max//2)-5, map_max) else: hyperparameters_range_dictionary[model_name] = Integer(0, map_max-3) parameter_search.search(recommender_input_args_list, parameter_search_space=hyperparameters_range_dictionary, n_cases=n_cases, n_random_starts=n_random_starts, resume_from_saved=resume_from_saved, output_folder_path=output_folder_path, output_file_name_root=output_file_name_root, metric_to_optimize=metric_to_optimize) except Exception as e: print("On recommender {} Exception {}".format(recommender_object_list, str(e))) traceback.print_exc() error_file = open(output_folder_path + "ErrorLog.txt", "a") error_file.write("On recommender {} Exception {}\n".format(recommender_object_list, str(e))) error_file.close()
def run_parameter_search_user_item_all( recommender_class, URM_train, UCM_train, ICM_train, UCM_name, ICM_name, metric_to_optimize="PRECISION", evaluator_validation=None, output_folder_path="result_experiments/", parallelizeKNN=True, n_cases=60, n_random_starts=10, similarity_type_list=None): # Create folder if it does not exist if not os.path.exists(output_folder_path): os.makedirs(output_folder_path) output_file_name_root = recommender_class.RECOMMENDER_NAME + "_{}".format( UCM_name) + "_{}".format(ICM_name) parameterSearch = SearchBayesianSkopt( recommender_class, evaluator_validation=evaluator_validation) if similarity_type_list is None: similarity_type_list = ['jaccard', 'asymmetric', "cosine"] # Set hyperparameters hyperparameters_range_dictionary = { "user_topK": Integer(5, 2000), "user_shrink": Integer(0, 2000), "item_topK": Integer(5, 2000), "item_shrink": Integer(0, 2000) } # Set args for recommender recommender_input_args = SearchInputRecommenderArgs( CONSTRUCTOR_POSITIONAL_ARGS=[URM_train, UCM_train, ICM_train], CONSTRUCTOR_KEYWORD_ARGS={}, FIT_POSITIONAL_ARGS=[], FIT_KEYWORD_ARGS={}) run_user_item_all_on_combination_similarity_type_partial = partial( run_user_item_all_on_combination_similarity_type, recommender_input_args=recommender_input_args, parameter_search_space=hyperparameters_range_dictionary, parameterSearch=parameterSearch, n_cases=n_cases, n_random_starts=n_random_starts, output_folder_path=output_folder_path, output_file_name_root=output_file_name_root, metric_to_optimize=metric_to_optimize, allow_user_weighting=True, allow_item_weighting=True) if parallelizeKNN: pool = multiprocessing.Pool(processes=multiprocessing.cpu_count(), maxtasksperchild=1) pool.map( run_user_item_all_on_combination_similarity_type_partial, list( itertools.product( *[similarity_type_list, similarity_type_list]))) pool.close() pool.join() else: for user_similarity_type in similarity_type_list: for item_similarity_type in similarity_type_list: run_user_item_all_on_combination_similarity_type_partial( user_similarity_type, item_similarity_type)