def runParameterSearch_Content(recommender_class, URM_train, ICM_object, ICM_name, n_cases=30,
                               evaluator_validation=None, evaluator_test=None, metric_to_optimize="PRECISION",
                               output_root_path="result_experiments/", parallelizeKNN=False):
    # If directory does not exist, create
    if not os.path.exists(output_root_path):
        os.makedirs(output_root_path)

    ##########################################################################################################

    this_output_root_path = output_root_path + recommender_class.RECOMMENDER_NAME + "_{}".format(ICM_name)

    parameterSearch = BayesianSearch(recommender_class, evaluator_validation=evaluator_validation,
                                     evaluator_test=evaluator_test)

    similarity_type_list = ['cosine', 'jaccard', "asymmetric", "dice", "tversky"]

    run_KNNCBFRecommender_on_similarity_type_partial = partial(run_KNNCBFRecommender_on_similarity_type,
                                                               parameterSearch=parameterSearch,
                                                               URM_train=URM_train,
                                                               ICM_train=ICM_object,
                                                               n_cases=n_cases,
                                                               output_root_path=this_output_root_path,
                                                               metric_to_optimize=metric_to_optimize)

    if parallelizeKNN:
        pool = PoolWithSubprocess(processes=int(multiprocessing.cpu_count()), maxtasksperchild=1)
        resultList = pool.map(run_KNNCBFRecommender_on_similarity_type_partial, similarity_type_list)

    else:

        for similarity_type in similarity_type_list:
            run_KNNCBFRecommender_on_similarity_type_partial(similarity_type)
Ejemplo n.º 2
0
def runParameterSearch_Collaborative(recommender_class,
                                     URM_train,
                                     metric_to_optimize="PRECISION",
                                     evaluator_validation=None,
                                     evaluator_test=None,
                                     evaluator_validation_earlystopping=None,
                                     output_folder_path="result_experiments/",
                                     parallelizeKNN=True,
                                     n_cases=30):

    from ParameterTuning.AbstractClassSearch import DictionaryKeys

    # If directory does not exist, create
    if not os.path.exists(output_folder_path):
        os.makedirs(output_folder_path)

    try:

        output_file_name_root = recommender_class.RECOMMENDER_NAME

        parameterSearch = BayesianSkoptSearch(
            recommender_class,
            evaluator_validation=evaluator_validation,
            evaluator_test=evaluator_test)

        if recommender_class in [TopPop, Random]:

            recommender = recommender_class(URM_train)

            recommender.fit()

            output_file = open(
                output_folder_path + output_file_name_root +
                "_BayesianSearch.txt", "a")
            result_dict, result_baseline = evaluator_validation.evaluateRecommender(
                recommender)
            output_file.write(
                "ParameterSearch: Best result evaluated on URM_validation. Results: {}"
                .format(result_baseline))

            pickle.dump(result_dict.copy(),
                        open(
                            output_folder_path + output_file_name_root +
                            "_best_result_validation", "wb"),
                        protocol=pickle.HIGHEST_PROTOCOL)

            result_dict, result_baseline = evaluator_test.evaluateRecommender(
                recommender)
            output_file.write(
                "ParameterSearch: Best result evaluated on URM_test. Results: {}"
                .format(result_baseline))

            pickle.dump(result_dict.copy(),
                        open(
                            output_folder_path + output_file_name_root +
                            "_best_result_test", "wb"),
                        protocol=pickle.HIGHEST_PROTOCOL)

            output_file.close()

            return

        ##########################################################################################################

        if recommender_class is UserKNNCFRecommender:

            similarity_type_list = [
                'cosine', 'jaccard', "asymmetric", "dice", "tversky"
            ]

            run_KNNCFRecommender_on_similarity_type_partial = partial(
                run_KNNCFRecommender_on_similarity_type,
                parameterSearch=parameterSearch,
                URM_train=URM_train,
                n_cases=n_cases,
                output_folder_path=output_folder_path,
                output_file_name_root=output_file_name_root,
                metric_to_optimize=metric_to_optimize)

            if parallelizeKNN:
                pool = PoolWithSubprocess(processes=int(2), maxtasksperchild=1)
                resultList = pool.map(
                    run_KNNCFRecommender_on_similarity_type_partial,
                    similarity_type_list)

            else:

                for similarity_type in similarity_type_list:
                    run_KNNCFRecommender_on_similarity_type_partial(
                        similarity_type)

            return

        ##########################################################################################################

        if recommender_class is ItemKNNCFRecommender:

            similarity_type_list = [
                'cosine', 'jaccard', "asymmetric", "dice", "tversky"
            ]

            run_KNNCFRecommender_on_similarity_type_partial = partial(
                run_KNNCFRecommender_on_similarity_type,
                parameterSearch=parameterSearch,
                URM_train=URM_train,
                n_cases=n_cases,
                output_folder_path=output_folder_path,
                output_file_name_root=output_file_name_root,
                metric_to_optimize=metric_to_optimize)

            if parallelizeKNN:
                pool = PoolWithSubprocess(processes=int(2), maxtasksperchild=1)
                resultList = pool.map(
                    run_KNNCFRecommender_on_similarity_type_partial,
                    similarity_type_list)

            else:

                for similarity_type in similarity_type_list:
                    run_KNNCFRecommender_on_similarity_type_partial(
                        similarity_type)

            return

    ##########################################################################################################

        if recommender_class is P3alphaRecommender:

            hyperparamethers_range_dictionary = {}
            hyperparamethers_range_dictionary["topK"] = Integer(5, 800)
            hyperparamethers_range_dictionary["alpha"] = Real(low=0,
                                                              high=2,
                                                              prior='uniform')
            hyperparamethers_range_dictionary[
                "normalize_similarity"] = Categorical([True, False])

            recommenderDictionary = {
                DictionaryKeys.CONSTRUCTOR_POSITIONAL_ARGS: [URM_train],
                DictionaryKeys.CONSTRUCTOR_KEYWORD_ARGS: {},
                DictionaryKeys.FIT_POSITIONAL_ARGS:
                dict(),
                DictionaryKeys.FIT_KEYWORD_ARGS:
                dict(),
                DictionaryKeys.FIT_RANGE_KEYWORD_ARGS:
                hyperparamethers_range_dictionary
            }

        ##########################################################################################################

        if recommender_class is RP3betaRecommender:

            hyperparamethers_range_dictionary = {}
            hyperparamethers_range_dictionary["topK"] = Integer(5, 800)
            hyperparamethers_range_dictionary["alpha"] = Real(low=0,
                                                              high=2,
                                                              prior='uniform')
            hyperparamethers_range_dictionary["beta"] = Real(low=0,
                                                             high=2,
                                                             prior='uniform')
            hyperparamethers_range_dictionary[
                "normalize_similarity"] = Categorical([True, False])

            recommenderDictionary = {
                DictionaryKeys.CONSTRUCTOR_POSITIONAL_ARGS: [URM_train],
                DictionaryKeys.CONSTRUCTOR_KEYWORD_ARGS: {},
                DictionaryKeys.FIT_POSITIONAL_ARGS:
                dict(),
                DictionaryKeys.FIT_KEYWORD_ARGS:
                dict(),
                DictionaryKeys.FIT_RANGE_KEYWORD_ARGS:
                hyperparamethers_range_dictionary
            }

        ##########################################################################################################

        if recommender_class is MatrixFactorization_FunkSVD_Cython:

            hyperparamethers_range_dictionary = {}
            hyperparamethers_range_dictionary["sgd_mode"] = Categorical(
                ["adagrad", "adam"])
            #hyperparamethers_range_dictionary["epochs"] = Integer(1, 150)
            hyperparamethers_range_dictionary["num_factors"] = Integer(1, 150)
            hyperparamethers_range_dictionary["reg"] = Real(
                low=1e-12, high=1e-3, prior='log-uniform')
            hyperparamethers_range_dictionary["learning_rate"] = Real(
                low=1e-5, high=1e-2, prior='log-uniform')

            recommenderDictionary = {
                DictionaryKeys.CONSTRUCTOR_POSITIONAL_ARGS: [URM_train],
                DictionaryKeys.CONSTRUCTOR_KEYWORD_ARGS: {},
                DictionaryKeys.FIT_POSITIONAL_ARGS:
                dict(),
                DictionaryKeys.FIT_KEYWORD_ARGS: {
                    "validation_every_n": 5,
                    "stop_on_validation": True,
                    "evaluator_object": evaluator_validation_earlystopping,
                    "lower_validatons_allowed": 20,
                    "validation_metric": metric_to_optimize
                },
                DictionaryKeys.FIT_RANGE_KEYWORD_ARGS:
                hyperparamethers_range_dictionary
            }

        ##########################################################################################################

        if recommender_class is MatrixFactorization_BPR_Cython:

            hyperparamethers_range_dictionary = {}
            hyperparamethers_range_dictionary["sgd_mode"] = Categorical(
                ["adagrad", "adam"])
            #hyperparamethers_range_dictionary["epochs"] = Integer(1, 150)
            hyperparamethers_range_dictionary["num_factors"] = Integer(1, 150)
            hyperparamethers_range_dictionary["batch_size"] = Categorical([1])
            hyperparamethers_range_dictionary["positive_reg"] = Real(
                low=1e-12, high=1e-3, prior='log-uniform')
            hyperparamethers_range_dictionary["negative_reg"] = Real(
                low=1e-12, high=1e-3, prior='log-uniform')
            hyperparamethers_range_dictionary["learning_rate"] = Real(
                low=1e-5, high=1e-2, prior='log-uniform')

            recommenderDictionary = {
                DictionaryKeys.CONSTRUCTOR_POSITIONAL_ARGS: [URM_train],
                DictionaryKeys.CONSTRUCTOR_KEYWORD_ARGS: {
                    'positive_threshold': 0
                },
                DictionaryKeys.FIT_POSITIONAL_ARGS:
                dict(),
                DictionaryKeys.FIT_KEYWORD_ARGS: {
                    "validation_every_n": 5,
                    "stop_on_validation": True,
                    "evaluator_object": evaluator_validation_earlystopping,
                    "lower_validatons_allowed": 20,
                    "validation_metric": metric_to_optimize
                },
                DictionaryKeys.FIT_RANGE_KEYWORD_ARGS:
                hyperparamethers_range_dictionary
            }

        ##########################################################################################################

        if recommender_class is PureSVDRecommender:

            hyperparamethers_range_dictionary = {}
            hyperparamethers_range_dictionary["num_factors"] = Integer(1, 250)

            recommenderDictionary = {
                DictionaryKeys.CONSTRUCTOR_POSITIONAL_ARGS: [URM_train],
                DictionaryKeys.CONSTRUCTOR_KEYWORD_ARGS: {},
                DictionaryKeys.FIT_POSITIONAL_ARGS:
                dict(),
                DictionaryKeys.FIT_KEYWORD_ARGS: {},
                DictionaryKeys.FIT_RANGE_KEYWORD_ARGS:
                hyperparamethers_range_dictionary
            }

        #########################################################################################################

        if recommender_class is SLIM_BPR_Cython:

            hyperparamethers_range_dictionary = {}
            hyperparamethers_range_dictionary["topK"] = Integer(5, 800)
            #hyperparamethers_range_dictionary["epochs"] = Integer(1, 150)
            hyperparamethers_range_dictionary["sgd_mode"] = Categorical(
                ["adagrad", "adam"])
            hyperparamethers_range_dictionary["lambda_i"] = Real(
                low=1e-12, high=1e-3, prior='log-uniform')
            hyperparamethers_range_dictionary["lambda_j"] = Real(
                low=1e-12, high=1e-3, prior='log-uniform')

            recommenderDictionary = {
                DictionaryKeys.CONSTRUCTOR_POSITIONAL_ARGS: [URM_train],
                DictionaryKeys.CONSTRUCTOR_KEYWORD_ARGS: {
                    'train_with_sparse_weights': False,
                    'symmetric': False,
                    'positive_threshold': 0
                },
                DictionaryKeys.FIT_POSITIONAL_ARGS:
                dict(),
                DictionaryKeys.FIT_KEYWORD_ARGS: {
                    "validation_every_n": 5,
                    "stop_on_validation": True,
                    "evaluator_object": evaluator_validation_earlystopping,
                    "lower_validatons_allowed": 10,
                    "validation_metric": metric_to_optimize
                },
                DictionaryKeys.FIT_RANGE_KEYWORD_ARGS:
                hyperparamethers_range_dictionary
            }

        ##########################################################################################################

        if recommender_class is SLIMElasticNetRecommender:

            hyperparamethers_range_dictionary = {}
            hyperparamethers_range_dictionary["topK"] = Integer(5, 800)
            hyperparamethers_range_dictionary["l1_ratio"] = Real(
                low=1e-5, high=1.0, prior='log-uniform')

            recommenderDictionary = {
                DictionaryKeys.CONSTRUCTOR_POSITIONAL_ARGS: [URM_train],
                DictionaryKeys.CONSTRUCTOR_KEYWORD_ARGS: {},
                DictionaryKeys.FIT_POSITIONAL_ARGS:
                dict(),
                DictionaryKeys.FIT_KEYWORD_ARGS:
                dict(),
                DictionaryKeys.FIT_RANGE_KEYWORD_ARGS:
                hyperparamethers_range_dictionary
            }

    #########################################################################################################

    ## Final step, after the hyperparameter range has been defined for each type of algorithm
        best_parameters = parameterSearch.search(
            recommenderDictionary,
            n_cases=n_cases,
            output_folder_path=output_folder_path,
            output_file_name_root=output_file_name_root,
            metric_to_optimize=metric_to_optimize)

    except Exception as e:

        print("On recommender {} Exception {}".format(recommender_class,
                                                      str(e)))
        traceback.print_exc()

        error_file = open(output_folder_path + "ErrorLog.txt", "a")
        error_file.write("On recommender {} Exception {}\n".format(
            recommender_class, str(e)))
        error_file.close()
    def fit(self,
            topK_P_C=991,
            alpha_P_C=0.4705816992313091,
            beta_P_C=0,
            normalize_similarity_P_C=False,
            alpha_C=0.5,
            topK_C=700,
            shrink_C=200,
            similarity_C='jaccard',
            normalize_C=True,
            feature_weighting_C="TF-IDF",
            norm_scores_C=True,
            topK_P=991,
            alpha_P=0.4705816992313091,
            beta_P=0,
            normalize_similarity_P=False,
            alpha=0.5,
            topK=700,
            shrink=200,
            similarity='jaccard',
            normalize=True,
            feature_weighting="TF-IDF",
            norm_scores=True,
            threshold=5.9):
        '''self.Recommender_cold.fit(topK_P=topK_P_C, alpha_P=alpha_P_C,
                                  normalize_similarity_P=normalize_similarity_P_C, alpha=alpha_C, topK=topK_C,
                                  shrink=shrink_C, similarity=similarity_C, normalize=normalize_C,
                                  feature_weighting=feature_weighting_C, norm_scores=norm_scores_C)
        self.Recommender_warm.fit(topK_P=topK_P, alpha_P=alpha_P,
                                  normalize_similarity_P=normalize_similarity_P, alpha=alpha, topK=topK,
                                  shrink=shrink, similarity=similarity, normalize=normalize,
                                  feature_weighting=feature_weighting, norm_scores=norm_scores)'''
        cold_args = {
            "topK_P": topK_P_C,
            "alpha_P": alpha_P_C,
            "beta_P": beta_P_C,
            "normalize_similarity_P": normalize_similarity_P_C,
            "alpha": alpha_C,
            "topK": topK_C,
            "shrink": shrink_C,
            "similarity": similarity_C,
            "normalize": normalize_C,
            "feature_weighting": feature_weighting_C,
            "norm_scores": norm_scores_C
        }
        warm_args = {
            "topK_P": topK_P,
            "alpha_P": alpha_P,
            "beta_P": beta_P,
            "normalize_similarity_P": normalize_similarity_P,
            "alpha": alpha,
            "topK": topK,
            "shrink": shrink,
            "similarity": similarity,
            "normalize": normalize,
            "feature_weighting": feature_weighting,
            "norm_scores": norm_scores
        }
        tot_args = zip([self.Recommender_cold, self.Recommender_warm],
                       [cold_args, warm_args], ["Cold", "Warm"])
        pool = PoolWithSubprocess(processes=int(multiprocessing.cpu_count() -
                                                1),
                                  maxtasksperchild=1)
        resultList = pool.map(self.fitRec, tot_args)
        pool.close()
        pool.join()

        for el in resultList:
            if el[1] == "Cold":
                self.Recommender_cold = el[0]
            elif el[1] == "Warm":
                self.Recommender_warm = el[0]

        self.threshold = threshold
Ejemplo n.º 4
0
    arg_list.append(hyb_warmV2_args)
    name_list.append("hyb_warmV2")
    rec_list.append(hyb_midV2)
    arg_list.append(hyb_midV2_args)
    name_list.append("hyb_midV2")

    hyb5 = ScoresHybridP3alphaKNNCBF.ScoresHybridP3alphaKNNCBF(URM_train, ICM_train)
    hyb5_args = {"topK_P": 903, "alpha_P": 0.4108657561671193, "normalize_similarity_P": False, "topK": 448,
                 "shrink": 20,
                 "similarity": "tversky", "normalize": True, "alpha": 0.6290871066510789, "feature_weighting": "TF-IDF"}
    rec_list.append(hyb5)
    arg_list.append(hyb5_args)
    name_list.append("hyb5")

    tot_args = zip(rec_list, arg_list, name_list)
    pool = PoolWithSubprocess(processes=int(multiprocessing.cpu_count()-1), maxtasksperchild=1)
    resultList = pool.map(fitRec, tot_args)
    pool.close()
    pool.join()

    for el in resultList:
        if el[1] == "hyb_cold":
            hyb_cold = el[0]
        elif el[1] == "hyb_warm":
            hyb_warm = el[0]
        elif el[1] == "hyb_coldV2":
            hyb_coldV2 = el[0]
        elif el[1] == "hyb_midV2":
            hyb_midV2 = el[0]
        elif el[1] == "hyb_warmV2":
            hyb_warmV2 = el[0]
Ejemplo n.º 5
0
                    .replace("feature_weighting", "f_w").replace("normalize", "nor").replace("similarity", "sim")\
                    .replace("alpha", "a").replace("beta", "b").replace(" ", "").replace("False", "F")\
                    .replace("True", "T").replace("topK", "K").replace("shrink", "sh").replace("tversky", "tv")


if __name__ == '__main__':

    URM_all, user_id_unique, item_id_unique = RecSys2020Reader.load_urm()
    ICM_all = RecSys2020Reader.load_icm_asset()
    target_ids = RecSys2020Reader.load_target()

    # seed = 12341
    k_fold = 7
    n_models = 5

    ar = [[0 for x in range(n_models)] for y in range(k_fold)]
    cross_partial = partial(crossval, URM_all, ICM_all, target_ids)
    ks = [x for x in range(0, k_fold)]

    pool = PoolWithSubprocess(processes=int(multiprocessing.cpu_count() - 1),
                              maxtasksperchild=1)
    resultList = pool.map(cross_partial, ks)
    pool.close()
    pool.join()

    print("Hyb: " + str(np.mean(resultList, axis=0)[0]))
    print("Hyb2: " + str(np.mean(resultList, axis=0)[1]))
    print("Hyb3: " + str(np.mean(resultList, axis=0)[2]))
    print("Hyb5 P3_CBF_tuned1: " + str(np.mean(resultList, axis=0)[3]))
    print("Hyb6: " + str(np.mean(resultList, axis=0)[4]))
def runParameterSearch_Collaborative(recommender_class,
                                     URM_train,
                                     metric_to_optimize="PRECISION",
                                     evaluator_validation=None,
                                     evaluator_test=None,
                                     evaluator_validation_earlystopping=None,
                                     output_root_path="result_experiments/",
                                     parallelizeKNN=True,
                                     n_cases=30):
    """runParameterSearch_Collaborative use the BayesianSearch class to fin best parameters
    :param recommender_class: The name of the class on which you want to optimize parameters
    :param URM_train:
    :param metric_to_optimize: The metric you want to optimize; "MAP", "Precision"...
    :param evaluator_validation:
    :param evaluator_test:
    :param evaluator_validation_earlystopping:
    :param output_root_path:
    :param parallelizeKNN:
    :param n_cases:
    :return:
    """

    from ParameterTuning.AbstractClassSearch import DictionaryKeys

    # If directory does not exist, create
    if not os.path.exists(output_root_path):
        os.makedirs(output_root_path)

    try:

        output_root_path_rec_name = output_root_path + recommender_class.RECOMMENDER_NAME

        parameterSearch = BayesianSearch(
            recommender_class,
            evaluator_validation=evaluator_validation,
            evaluator_test=evaluator_test)

        if recommender_class in [TopPop, Random]:

            recommender = recommender_class(URM_train)

            recommender.fit()

            output_file = open(
                output_root_path_rec_name + "_BayesianSearch.txt", "a")
            result_dict, result_baseline = evaluator_validation.evaluateRecommender(
                recommender)
            output_file.write(
                "ParameterSearch: Best result evaluated on URM_validation. Results: {}"
                .format(result_baseline))

            pickle.dump(
                result_dict.copy(),
                open(output_root_path_rec_name + "_best_result_validation",
                     "wb"),
                protocol=pickle.HIGHEST_PROTOCOL)

            result_dict, result_baseline = evaluator_test.evaluateRecommender(
                recommender)
            output_file.write(
                "ParameterSearch: Best result evaluated on URM_test. Results: {}"
                .format(result_baseline))

            pickle.dump(result_dict.copy(),
                        open(output_root_path_rec_name + "_best_result_test",
                             "wb"),
                        protocol=pickle.HIGHEST_PROTOCOL)

            output_file.close()

            return

        ##########################################################################################################

        if recommender_class is UserKNNCFRecommender:

            similarity_type_list = [
                'cosine', 'jaccard', "asymmetric", "dice", "tversky"
            ]

            run_KNNCFRecommender_on_similarity_type_partial = partial(
                run_KNNCFRecommender_on_similarity_type,
                parameterSearch=parameterSearch,
                URM_train=URM_train,
                n_cases=n_cases,
                output_root_path=output_root_path_rec_name,
                metric_to_optimize=metric_to_optimize)

            if parallelizeKNN:
                pool = PoolWithSubprocess(processes=int(2), maxtasksperchild=1)
                resultList = pool.map(
                    run_KNNCFRecommender_on_similarity_type_partial,
                    similarity_type_list)

            else:

                for similarity_type in similarity_type_list:
                    run_KNNCFRecommender_on_similarity_type_partial(
                        similarity_type)

            return

        ##########################################################################################################

        if recommender_class is ItemKNNCFRecommender:

            similarity_type_list = [
                'cosine', 'jaccard', "asymmetric", "dice", "tversky"
            ]

            run_KNNCFRecommender_on_similarity_type_partial = partial(
                run_KNNCFRecommender_on_similarity_type,
                parameterSearch=parameterSearch,
                URM_train=URM_train,
                n_cases=n_cases,
                output_root_path=output_root_path_rec_name,
                metric_to_optimize=metric_to_optimize)

            if parallelizeKNN:
                pool = PoolWithSubprocess(processes=int(2), maxtasksperchild=1)
                resultList = pool.map(
                    run_KNNCFRecommender_on_similarity_type_partial,
                    similarity_type_list)

            else:

                for similarity_type in similarity_type_list:
                    run_KNNCFRecommender_on_similarity_type_partial(
                        similarity_type)

            return

        ##########################################################################################################

        # if recommender_class is MultiThreadSLIM_RMSE:
        #
        #     hyperparamethers_range_dictionary = {}
        #     hyperparamethers_range_dictionary["topK"] = [50, 100]
        #     hyperparamethers_range_dictionary["l1_penalty"] = [1e-2, 1e-3, 1e-4]
        #     hyperparamethers_range_dictionary["l2_penalty"] = [1e-2, 1e-3, 1e-4]
        #
        #
        #     recommenderDictionary = {DictionaryKeys.CONSTRUCTOR_POSITIONAL_ARGS: [URM_train],
        #                              DictionaryKeys.CONSTRUCTOR_KEYWORD_ARGS: {},
        #                              DictionaryKeys.FIT_POSITIONAL_ARGS: dict(),
        #                              DictionaryKeys.FIT_KEYWORD_ARGS: dict(),
        #                              DictionaryKeys.FIT_RANGE_KEYWORD_ARGS: hyperparamethers_range_dictionary}
        #
        #

    ##########################################################################################################

        if recommender_class is P3alphaRecommender:

            hyperparamethers_range_dictionary = {}
            hyperparamethers_range_dictionary["topK"] = [
                5, 10, 20, 50, 100, 150, 200, 300, 400, 500, 600, 700, 800
            ]
            hyperparamethers_range_dictionary["alpha"] = range(0, 2)
            hyperparamethers_range_dictionary["normalize_similarity"] = [
                True, False
            ]

            recommenderDictionary = {
                DictionaryKeys.CONSTRUCTOR_POSITIONAL_ARGS: [URM_train],
                DictionaryKeys.CONSTRUCTOR_KEYWORD_ARGS: {},
                DictionaryKeys.FIT_POSITIONAL_ARGS:
                dict(),
                DictionaryKeys.FIT_KEYWORD_ARGS:
                dict(),
                DictionaryKeys.FIT_RANGE_KEYWORD_ARGS:
                hyperparamethers_range_dictionary
            }

        ##########################################################################################################

        if recommender_class is RP3betaRecommender:

            hyperparamethers_range_dictionary = {}
            hyperparamethers_range_dictionary["topK"] = [
                5, 10, 20, 50, 100, 150, 200, 300, 400, 500, 600, 700, 800
            ]
            hyperparamethers_range_dictionary["alpha"] = range(0, 2)
            hyperparamethers_range_dictionary["beta"] = range(0, 2)
            hyperparamethers_range_dictionary["normalize_similarity"] = [
                True, False
            ]

            recommenderDictionary = {
                DictionaryKeys.CONSTRUCTOR_POSITIONAL_ARGS: [URM_train],
                DictionaryKeys.CONSTRUCTOR_KEYWORD_ARGS: {},
                DictionaryKeys.FIT_POSITIONAL_ARGS:
                dict(),
                DictionaryKeys.FIT_KEYWORD_ARGS:
                dict(),
                DictionaryKeys.FIT_RANGE_KEYWORD_ARGS:
                hyperparamethers_range_dictionary
            }

        ##########################################################################################################

        if recommender_class is MatrixFactorization_FunkSVD_Cython:

            hyperparamethers_range_dictionary = {}
            hyperparamethers_range_dictionary["sgd_mode"] = ["adagrad", "adam"]
            #hyperparamethers_range_dictionary["epochs"] = [1, 5, 10, 20, 30, 50, 70, 90, 110]
            hyperparamethers_range_dictionary["num_factors"] = [
                1, 5, 10, 20, 30, 50, 70, 90, 110
            ]
            hyperparamethers_range_dictionary["reg"] = [0.0, 1e-3, 1e-6, 1e-9]
            hyperparamethers_range_dictionary["learning_rate"] = [
                1e-2, 1e-3, 1e-4, 1e-5
            ]

            recommenderDictionary = {
                DictionaryKeys.CONSTRUCTOR_POSITIONAL_ARGS: [URM_train],
                DictionaryKeys.CONSTRUCTOR_KEYWORD_ARGS: {},
                DictionaryKeys.FIT_POSITIONAL_ARGS:
                dict(),
                DictionaryKeys.FIT_KEYWORD_ARGS: {
                    "validation_every_n": 5,
                    "stop_on_validation": True,
                    "evaluator_object": evaluator_validation_earlystopping,
                    "lower_validatons_allowed": 20,
                    "validation_metric": metric_to_optimize
                },
                DictionaryKeys.FIT_RANGE_KEYWORD_ARGS:
                hyperparamethers_range_dictionary
            }

        ##########################################################################################################

        if recommender_class is MatrixFactorization_BPR_Cython:

            hyperparamethers_range_dictionary = {}
            hyperparamethers_range_dictionary["sgd_mode"] = ["adagrad", "adam"]
            #hyperparamethers_range_dictionary["epochs"] = [1, 5, 10, 20, 30, 50, 70, 90, 110]
            hyperparamethers_range_dictionary["num_factors"] = [
                1, 5, 10, 20, 30, 50, 70, 90, 110
            ]
            hyperparamethers_range_dictionary["batch_size"] = [1]
            hyperparamethers_range_dictionary["positive_reg"] = [
                0.0, 1e-3, 1e-6, 1e-9
            ]
            hyperparamethers_range_dictionary["negative_reg"] = [
                0.0, 1e-3, 1e-6, 1e-9
            ]
            hyperparamethers_range_dictionary["learning_rate"] = [
                1e-2, 1e-3, 1e-4, 1e-5
            ]

            recommenderDictionary = {
                DictionaryKeys.CONSTRUCTOR_POSITIONAL_ARGS: [URM_train],
                DictionaryKeys.CONSTRUCTOR_KEYWORD_ARGS: {
                    'positive_threshold': 0
                },
                DictionaryKeys.FIT_POSITIONAL_ARGS:
                dict(),
                DictionaryKeys.FIT_KEYWORD_ARGS: {
                    "validation_every_n": 5,
                    "stop_on_validation": True,
                    "evaluator_object": evaluator_validation_earlystopping,
                    "lower_validatons_allowed": 20,
                    "validation_metric": metric_to_optimize
                },
                DictionaryKeys.FIT_RANGE_KEYWORD_ARGS:
                hyperparamethers_range_dictionary
            }

        ##########################################################################################################

        if recommender_class is PureSVDRecommender:

            hyperparamethers_range_dictionary = {}
            hyperparamethers_range_dictionary["num_factors"] = list(
                range(0, 250, 5))

            recommenderDictionary = {
                DictionaryKeys.CONSTRUCTOR_POSITIONAL_ARGS: [URM_train],
                DictionaryKeys.CONSTRUCTOR_KEYWORD_ARGS: {},
                DictionaryKeys.FIT_POSITIONAL_ARGS:
                dict(),
                DictionaryKeys.FIT_KEYWORD_ARGS: {},
                DictionaryKeys.FIT_RANGE_KEYWORD_ARGS:
                hyperparamethers_range_dictionary
            }

        #########################################################################################################

        if recommender_class is SLIM_BPR_Cython:

            hyperparamethers_range_dictionary = {}
            hyperparamethers_range_dictionary["topK"] = [
                5, 10, 20, 50, 100, 150, 200, 300, 400, 500, 600, 700, 800
            ]
            #hyperparamethers_range_dictionary["epochs"] = [1, 5, 10, 20, 30, 50, 70, 90, 110]
            hyperparamethers_range_dictionary["sgd_mode"] = ["adagrad", "adam"]
            hyperparamethers_range_dictionary["lambda_i"] = [
                0.0, 1e-3, 1e-6, 1e-9
            ]
            hyperparamethers_range_dictionary["lambda_j"] = [
                0.0, 1e-3, 1e-6, 1e-9
            ]

            recommenderDictionary = {
                DictionaryKeys.CONSTRUCTOR_POSITIONAL_ARGS: [URM_train],
                DictionaryKeys.CONSTRUCTOR_KEYWORD_ARGS: {
                    'train_with_sparse_weights': True,
                    'symmetric': True,
                    'positive_threshold': 0
                },
                DictionaryKeys.FIT_POSITIONAL_ARGS:
                dict(),
                DictionaryKeys.FIT_KEYWORD_ARGS: {
                    "validation_every_n": 5,
                    "stop_on_validation": True,
                    "evaluator_object": evaluator_validation_earlystopping,
                    "lower_validatons_allowed": 10,
                    "validation_metric": metric_to_optimize
                },
                DictionaryKeys.FIT_RANGE_KEYWORD_ARGS:
                hyperparamethers_range_dictionary
            }

        ##########################################################################################################

        if recommender_class is SLIMElasticNetRecommender:

            hyperparamethers_range_dictionary = {}
            hyperparamethers_range_dictionary["topK"] = [
                5, 10, 20, 50, 100, 150, 200, 300, 400, 500, 600, 700, 800
            ]
            hyperparamethers_range_dictionary["l1_penalty"] = [
                1.0, 0.0, 1e-2, 1e-4, 1e-6
            ]
            hyperparamethers_range_dictionary["l2_penalty"] = [
                100.0, 1.0, 0.0, 1e-2, 1e-4, 1e-6
            ]

            recommenderDictionary = {
                DictionaryKeys.CONSTRUCTOR_POSITIONAL_ARGS: [URM_train],
                DictionaryKeys.CONSTRUCTOR_KEYWORD_ARGS: {},
                DictionaryKeys.FIT_POSITIONAL_ARGS:
                dict(),
                DictionaryKeys.FIT_KEYWORD_ARGS:
                dict(),
                DictionaryKeys.FIT_RANGE_KEYWORD_ARGS:
                hyperparamethers_range_dictionary
            }

        ##########################################################################################################

        if recommender_class is HybridRecommender:
            hyperparamethers_range_dictionary = {}
            hyperparamethers_range_dictionary["topK"] = [
                5, 10, 20, 50, 100, 150, 200, 300, 400, 500, 600, 700, 800
            ]
            hyperparamethers_range_dictionary["l1_penalty"] = [
                1.0, 0.0, 1e-2, 1e-4, 1e-6
            ]
            hyperparamethers_range_dictionary["l2_penalty"] = [
                100.0, 1.0, 0.0, 1e-2, 1e-4, 1e-6
            ]

            recommenderDictionary = {
                DictionaryKeys.CONSTRUCTOR_POSITIONAL_ARGS: [URM_train],
                DictionaryKeys.CONSTRUCTOR_KEYWORD_ARGS: {
                    'Similarities_list': True,
                    'weights_list': True,
                    'sparse_weights': True
                },
                DictionaryKeys.FIT_POSITIONAL_ARGS:
                dict(),
                DictionaryKeys.FIT_KEYWORD_ARGS:
                dict(),
                DictionaryKeys.FIT_RANGE_KEYWORD_ARGS:
                hyperparamethers_range_dictionary
            }

    #########################################################################################################

    ## Final step, after the hyperparameter range has been defined for each type of algorithm
        best_parameters = parameterSearch.search(
            recommenderDictionary,
            n_cases=n_cases,
            output_root_path=output_root_path_rec_name,
            metric=metric_to_optimize)

    except Exception as e:

        print("On recommender {} Exception {}".format(recommender_class,
                                                      str(e)))
        traceback.print_exc()

        error_file = open(output_root_path + "ErrorLog.txt", "a")
        error_file.write("On recommender {} Exception {}\n".format(
            recommender_class, str(e)))
        error_file.close()
def read_data_split_and_search():
    from Data_manager.RecSys2020 import RecSys2020Reader
    from datetime import datetime
    from scipy import sparse as sps
    """
    This function provides a simple example on how to tune parameters of a given algorithm

    The BayesianSearch object will save:
        - A .txt file with all the cases explored and the recommendation quality
        - A _best_model file which contains the trained model and can be loaded with recommender.load_model()
        - A _best_parameter file which contains a dictionary with all the fit parameters, it can be passed to recommender.fit(**_best_parameter)
        - A _best_result_validation file which contains a dictionary with the results of the best solution on the validation
        - A _best_result_test file which contains a dictionary with the results, on the test set, of the best solution chosen using the validation set
    """

    #URM_train, URM_test = split_train_in_two_percentage_global_sample(dataset.get_URM_all(), train_percentage = 0.80)
    #URM_train, URM_validation = split_train_in_two_percentage_global_sample(URM_train, train_percentage = 0.80)

    URM_all, user_id_unique, item_id_unique = RecSys2020Reader.load_urm()
    ICM_all = RecSys2020Reader.load_icm_asset()
    URM_train, URM_test = split_train_in_two_percentage_global_sample(
        URM_all, train_percentage=0.95)
    URM_train, URM_validation = split_train_in_two_percentage_global_sample(
        URM_train, train_percentage=0.90)
    ICM_train, ICM_test = split_train_in_two_percentage_global_sample(
        ICM_all, train_percentage=0.95)
    ICM_train, ICM_validation = split_train_in_two_percentage_global_sample(
        ICM_train, train_percentage=0.90)

    URM_ICM_train = sps.vstack([URM_train, ICM_all.T])
    URM_ICM_train = URM_ICM_train.tocsr()

    output_folder_path = "ParamResultsExperiments/SKOPT_ScoresHybridP3alphaKNNCBF_specialized_extend_param"
    output_folder_path += datetime.now().strftime('%b%d_%H-%M-%S/')

    # If directory does not exist, create
    if not os.path.exists(output_folder_path):
        os.makedirs(output_folder_path)

    hybrid_algorithm_list = [
        #ScoresHybridP3alphaKNNCBF,
        ScoresHybridRP3betaKNNCBF,
        #ScoresHybridP3alphaPureSVD,
        #ScoresHybridSpecialized,
        #ScoresHybridSpecializedCold,
        #ScoresHybridSpecializedV2Cold,
        #ScoresHybridSpecializedV3Cold,
        #ScoresHybridSpecializedV2Mid,
        #ScoresHybridSpecializedV2Warm,
        #ScoresHybridSpecializedV3Warm,
        #ScoresHybridSpecializedV2Mid12,
        #ScoresHybridSpecializedV2Warm12,
        #ScoresHybridSpecializedAdaptive,
        #ScoresHybridKNNCFKNNCBF,
        #ScoresHybridUserKNNCFKNNCBF,
        #CFW_D_Similarity_Linalg
    ]

    from Base.Evaluation.Evaluator import EvaluatorHoldout

    evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[10])
    evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[5, 10])

    #cf = ItemKNNCFRecommender(URM_ICM_train)
    #cf.fit(**{"topK": 259, "shrink": 24, "similarity": "cosine", "normalize": True})
    #W_sparse_CF = cf.W_sparse

    runParameterSearch_Hybrid_partial = partial(
        runParameterSearch_Hybrid,
        URM_train=URM_ICM_train,
        ICM_train=URM_ICM_train.T,
        #W_sparse_CF = W_sparse_CF,
        metric_to_optimize="MAP",
        n_cases=100,
        n_random_starts=20,
        evaluator_validation_earlystopping=evaluator_validation,
        evaluator_validation=evaluator_validation,
        evaluator_test=evaluator_test,
        output_folder_path=output_folder_path)

    from Utils.PoolWithSubprocess import PoolWithSubprocess

    pool = PoolWithSubprocess(processes=int(multiprocessing.cpu_count() - 1),
                              maxtasksperchild=1)
    resultList = pool.map_async(runParameterSearch_Hybrid_partial,
                                hybrid_algorithm_list)
    pool.close()
    pool.join()

    for recommender_class in hybrid_algorithm_list:

        try:

            runParameterSearch_Hybrid_partial(recommender_class)

        except Exception as e:

            print("On recommender {} Exception {}".format(
                recommender_class, str(e)))
            traceback.print_exc()
Ejemplo n.º 8
0
def gethyb():
    start_time = time.time()

    URM_all, user_id_unique, item_id_unique = RecSys2020Reader.load_urm()
    ICM_all = RecSys2020Reader.load_icm_asset()
    target_ids = RecSys2020Reader.load_target()

    np.random.seed(12341288)
    URM_train, URM_test = train_test_holdout(URM_all, train_perc=0.8)
    # ICM_train, ICM_test = train_test_holdout(ICM_all, train_perc=0.995)
    evaluator_validation = EvaluatorHoldout(URM_test,
                                            cutoff_list=[10],
                                            exclude_seen=True)
    #URM_train = URM_all
    ICM_train = ICM_all

    URM_ICM_train = sps.vstack([URM_train, ICM_all.T])
    URM_ICM_train = URM_ICM_train.tocsr()

    l_list = []
    profile_length = np.ediff1d(URM_train.indptr)
    block_size = int(len(profile_length) * 0.2)
    sorted_users = np.argsort(profile_length)
    groups = 5
    rec_list = []
    arg_list = []
    name_list = []

    for group_id in range(0, groups):
        start_pos = group_id * block_size
        end_pos = min((group_id + 1) * block_size, len(profile_length))

        users_in_group = sorted_users[start_pos:end_pos]

        users_in_group_p_len = profile_length[users_in_group]
        l_list.append(len(users_in_group))

        print("Group {}, average p.len {:.2f}, min {}, max {}".format(
            group_id, users_in_group_p_len.mean(), users_in_group_p_len.min(),
            users_in_group_p_len.max()))

    hyb_warm = ScoresHybridRP3betaKNNCBF.ScoresHybridRP3betaKNNCBF(
        URM_ICM_train, URM_ICM_train.T)
    hyb_warmV2 = ScoresHybridP3alphaKNNCBF.ScoresHybridP3alphaKNNCBF(
        URM_ICM_train, URM_ICM_train.T)
    # Warm of Kaggle MAP 0.09466
    '''hyb_warm_args = {"topK_P": 127, "alpha_P": 0.35309465855346317, "normalize_similarity_P": False, "topK": 805,
                     "shrink": 307, "similarity": "tversky", "normalize": False, "alpha": 0.486665735781842, "feature_weighting": "TF-IDF"}
    hyb_warmV2_args = {"topK_P": 1496, "alpha_P": 0.4384309705759645, "normalize_similarity_P": False, "topK": 1023,
                       "shrink": 261, "similarity": "asymmetric", "normalize": False, "alpha": 0.7211670365702352, "feature_weighting": "TF-IDF"}'''
    hyb_warm_args = {
        "topK_P": 2000,
        "alpha_P": 0.5202318972174075,
        "normalize_similarity_P": False,
        "topK": 2000,
        "shrink": 2000,
        "similarity": "tversky",
        "normalize": True,
        "alpha": 1.0,
        "beta_P": 0.33040913500424834,
        "feature_weighting": "none"
    }
    hyb_warmV2_args = {
        "topK_P": 1238,
        "alpha_P": 0.580501466821829,
        "normalize_similarity_P": False,
        "topK": 1043,
        "shrink": 163,
        "similarity": "asymmetric",
        "normalize": False,
        "alpha": 0.25081946305309705,
        "feature_weighting": "BM25"
    }
    #{"topK_P": 2000, "alpha_P": 0.5292482627931302, "normalize_similarity_P": False, "topK": 2000, "shrink": 0,
    #"similarity": "tanimoto", "normalize": True, "alpha": 0.7963434906265208, "beta_P": 0.2692980157925566, "feature_weighting": "BM25"}

    hyb_cold = ScoresHybridRP3betaKNNCBF.ScoresHybridRP3betaKNNCBF(
        URM_ICM_train, URM_ICM_train.T)
    # Cold of Kaggle MAP 0.09466
    hyb_coldV2 = ScoresHybridRP3betaKNNCBF.ScoresHybridRP3betaKNNCBF(
        URM_ICM_train, URM_ICM_train.T)
    '''hyb_cold_args = {"topK_P": 482, "alpha_P": 0.4999498678468517, "normalize_similarity_P": False, "topK": 1500,
                     "shrink": 212, "similarity": "cosine", "normalize": False, "alpha": 0.6841610038073574,
                     "feature_weighting": "BM25"}
    # Cold of Kaggle MAP 0.09466
    hyb_coldV2_args = {"topK_P": 326, "alpha_P": 0.5120656418370607, "normalize_similarity_P": False, "topK": 151,
                       "shrink": 183, "similarity": "tversky", "normalize": True, "alpha": 0.6290067931193662, "feature_weighting": "BM25"}'''
    hyb_cold_args = {
        "topK_P": 2093,
        "alpha_P": 0.8263868403373367,
        "normalize_similarity_P": False,
        "topK": 298,
        "shrink": 1954,
        "similarity": "tanimoto",
        "normalize": False,
        "alpha": 0.608862998163905,
        "beta_P": 0.34975586706651757,
        "feature_weighting": "TF-IDF"
    }
    # Cold of Kaggle MAP 0.09466
    hyb_coldV2_args = {
        "topK_P": 1490,
        "alpha_P": 0.5832972099071866,
        "normalize_similarity_P": False,
        "topK": 1533,
        "shrink": 1100,
        "similarity": "tanimoto",
        "normalize": False,
        "alpha": 0.15358895478386428,
        "beta_P": 0.002234792201790459,
        "feature_weighting": "BM25"
    }
    '''hyb_midV2 = ScoresHybridP3alphaKNNCBF.ScoresHybridP3alphaKNNCBF(URM_ICM_train, URM_ICM_train.T)
    # Cold of Kaggle MAP 0.09466
    hyb_midV2_args = {"topK_P": 2064, "alpha_P": 1.9131180703120496, "normalize_similarity_P": False, "topK": 154, "shrink": 620,
                      "similarity": "asymmetric", "normalize": True, "alpha": 0.013221786654690208, "feature_weighting": "TF-IDF"}
    #{"topK_P": 1577, "alpha_P": 0.1835912052126545, "normalize_similarity_P": false, "topK": 1439, "shrink": 3626,
    #"similarity": "cosine", "normalize": false, "alpha": 0.1507714323088927, "feature_weighting": "BM25"}'''

    rec_list.append(hyb_cold)
    arg_list.append(hyb_cold_args)
    name_list.append("hyb_cold")
    rec_list.append(hyb_warm)
    arg_list.append(hyb_warm_args)
    name_list.append("hyb_warm")
    rec_list.append(hyb_warmV2)
    arg_list.append(hyb_warmV2_args)
    name_list.append("hyb_warmV2")
    rec_list.append(hyb_coldV2)
    arg_list.append(hyb_coldV2_args)
    name_list.append("hyb_coldV2")
    '''rec_list.append(hyb_midV2)
    arg_list.append(hyb_midV2_args)
    name_list.append("hyb_midV2")'''

    hyb5 = ScoresHybridP3alphaKNNCBF.ScoresHybridP3alphaKNNCBF(
        URM_train, ICM_train)
    hyb5_args = {
        "topK_P": 903,
        "alpha_P": 0.4108657561671193,
        "normalize_similarity_P": False,
        "topK": 448,
        "shrink": 5,
        "similarity": "tversky",
        "normalize": True,
        "alpha": 0.6290871066510789,
        "feature_weighting": "TF-IDF"
    }
    rec_list.append(hyb5)
    arg_list.append(hyb5_args)
    name_list.append("hyb5")

    tot_args = zip(rec_list, arg_list, name_list)
    pool = PoolWithSubprocess(processes=5, maxtasksperchild=1)
    resultList = pool.map(fitRec, tot_args)
    pool.close()
    pool.join()

    for el in resultList:
        if el[1] == "hyb_cold":
            hyb_cold = el[0]
        elif el[1] == "hyb_warm":
            hyb_warm = el[0]
        elif el[1] == "hyb_coldV2":
            hyb_coldV2 = el[0]
        elif el[1] == "hyb_midV2":
            hyb_midV2 = el[0]
        elif el[1] == "hyb_warmV2":
            hyb_warmV2 = el[0]
        elif el[1] == "hyb5":
            hyb5 = el[0]
        elif el[1] == "hyb6x":
            hyb6x = el[0]

    # cold coldv2 mid sono i nuovi

    #hyb = hyb_warm

    #hyb2 = hyb_cold

    hyb3 = ScoresHybridKNNCFKNNCBF.ScoresHybridKNNCFKNNCBF(
        URM_ICM_train, URM_ICM_train.T)
    hyb3.fit(
        **{
            "topK_CF": 488,
            "shrink_CF": 1500,
            "similarity_CF": "tversky",
            "normalize_CF": True,
            "topK": 1500,
            "shrink": 1500,
            "similarity": "asymmetric",
            "normalize": False,
            "alpha": 0.23233349150222427,
            "feature_weighting": "BM25"
        })
    hyb2 = ItemKNNScoresHybridRecommender.ItemKNNScoresHybridRecommender(
        URM_train, hyb_warm, hyb5)
    hyb2.fit(alpha=0.5)

    hyb6 = ItemKNNScoresHybridRecommender.ItemKNNScoresHybridRecommender(
        URM_train, hyb_warmV2, hyb5)
    hyb6.fit(alpha=0.5)

    hyb7 = ItemKNNScoresHybridRecommender.ItemKNNScoresHybridRecommender(
        URM_train, hyb6, hyb2)
    hyb7.fit(alpha=0.5)

    #hyb = ItemKNNScoresHybridRecommender.ItemKNNScoresHybridRecommender(URM_train, hyb3, hyb7)
    #hyb.fit(alpha=0.5)

    earlystopping_keywargs = {
        "validation_every_n": 1,
        "stop_on_validation": True,
        "evaluator_object": evaluator_validation,
        "lower_validations_allowed": 3,
        "validation_metric": "MAP",
    }

    ials = IALSRecommender.IALSRecommender(URM_ICM_train)
    ials.fit(**earlystopping_keywargs, num_factors=100, alpha=50)

    hyb = ials

    hyb7 = ItemKNNScoresHybridRecommender.ItemKNNScoresHybridRecommender(
        URM_train, hyb2, ials)
    hyb7.fit(alpha=0.5)

    hyb3 = ItemKNNScoresHybridRecommender.ItemKNNScoresHybridRecommender(
        URM_train, hyb2, ials)
    hyb3.fit(alpha=0.85)

    MAP_p3alpha_per_group = []
    MAP_itemKNNCF_per_group = []
    MAP_itemKNNCBF_per_group = []
    MAP_pureSVD_per_group = []
    MAP_hyb_per_group = []
    MAP_hyb2_per_group = []
    MAP_hyb3_per_group = []
    MAP_hyb5_per_group = []
    MAP_hyb6_per_group = []
    MAP_hyb7_per_group = []
    cutoff = 10
    args = {
        "block_size": block_size,
        "profile_length": profile_length,
        "sorted_users": sorted_users,
        "cutoff": cutoff,
        "URM_test": URM_test,
        "hyb": hyb,
        "hyb2": hyb2,
        "hyb3": hyb3,
        "hyb5": hyb5,
        "hyb6": hyb6,
        "hyb7": hyb7
    }

    pool = PoolWithSubprocess(processes=multiprocessing.cpu_count() - 1,
                              maxtasksperchild=1)
    compute_group_MAP_partial = partial(compute_group_MAP, args)
    resultList = pool.map(compute_group_MAP_partial, range(0, groups))
    pool.close()
    pool.join()
    for el in resultList:
        MAP_hyb_per_group.append(el[0])
        MAP_hyb2_per_group.append(el[1])
        MAP_hyb3_per_group.append(el[2])
        MAP_hyb5_per_group.append(el[3])
        MAP_hyb6_per_group.append(el[4])
        if hyb7 is not None:
            MAP_hyb7_per_group.append(el[5])

    # Needed because of memory error
    '''for group_id in range(0, groups):
        start_pos = group_id * block_size
        end_pos = min((group_id + 1) * block_size, len(profile_length))

        users_in_group = sorted_users[start_pos:end_pos]

        users_in_group_p_len = profile_length[users_in_group]

        print("Group {}, average p.len {:.2f}, min {}, max {}".format(group_id,
                                                                      users_in_group_p_len.mean(),
                                                                      users_in_group_p_len.min(),
                                                                      users_in_group_p_len.max()))

        users_not_in_group_flag = np.isin(sorted_users, users_in_group, invert=True)
        users_not_in_group = sorted_users[users_not_in_group_flag]

        evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[cutoff], ignore_users=users_not_in_group)

        results, _ = evaluator_test.evaluateRecommender(hyb7)
        MAP_hyb7_per_group.append(results[cutoff]["MAP"])'''

    import matplotlib.pyplot as pyplot
    '''pyplot.plot(MAP_p3alpha_per_group, label="p3alpha")
    pyplot.plot(MAP_itemKNNCF_per_group, label="itemKNNCF")
    pyplot.plot(MAP_itemKNNCBF_per_group, label="itemKNNCBF")
    pyplot.plot(MAP_pureSVD_per_group, label="pureSVD")'''
    pyplot.plot(MAP_hyb_per_group, label="hyb")
    pyplot.plot(MAP_hyb2_per_group, label="hyb2")
    pyplot.plot(MAP_hyb3_per_group, label="hyb3")
    pyplot.plot(MAP_hyb5_per_group, label="hyb5")
    pyplot.plot(MAP_hyb6_per_group, label="hyb6")
    if hyb7 is not None:
        pyplot.plot(MAP_hyb7_per_group, label="hyb7")
    pyplot.ylabel('MAP')
    pyplot.xlabel('User Group')
    pyplot.legend()
    pyplot.show()

    print(l_list)
    evaluator_validation = EvaluatorHoldout(URM_test,
                                            cutoff_list=[10],
                                            exclude_seen=True)
    pool = PoolWithSubprocess(processes=multiprocessing.cpu_count() - 1,
                              maxtasksperchild=1)
    if hyb7 is not None:
        hyb_list = [hyb, hyb2, hyb3, hyb5, hyb6, hyb7]
    else:
        hyb_list = [hyb, hyb2, hyb3, hyb5, hyb6]
    resultList = pool.map(evaluator_validation.evaluateRecommender, hyb_list)
    pool.close()
    pool.join()
    for el in resultList:
        print(el)
    '''item_list = hyb7.recommend(target_ids, cutoff=10)
    CreateCSV.create_csv(target_ids, item_list, 'Hyb_URM_ICM_cold_warm_V2_more_mix_mid')
    item_list = hyb2.recommend(target_ids, cutoff=10)
    CreateCSV.create_csv(target_ids, item_list, 'Hyb2')
    item_list = hyb6.recommend(target_ids, cutoff=10)
    CreateCSV.create_csv(target_ids, item_list, 'Hyb_URM_ICM')'''

    print("--- Execution time: %s seconds ---" % (time.time() - start_time))
    return hyb2
def runParameterSearch_Collaborative(recommender_class,
                                     URM_train,
                                     ICM_1,
                                     ICM_2,
                                     metric_to_optimize="PRECISION",
                                     evaluator_validation=None,
                                     evaluator_test=None,
                                     evaluator_validation_earlystopping=None,
                                     output_root_path="result_experiments/",
                                     parallelizeKNN=True,
                                     n_cases=100):
    from ParameterTuning.AbstractClassSearch import DictionaryKeys

    # If directory does not exist, create
    if not os.path.exists(output_root_path):
        os.makedirs(output_root_path)

    try:

        output_root_path_rec_name = output_root_path + recommender_class.RECOMMENDER_NAME

        parameterSearch = BayesianSearch(
            recommender_class,
            evaluator_validation=evaluator_validation,
            evaluator_test=evaluator_test)

        if recommender_class in [TopPop, Random]:
            recommender = recommender_class(URM_train)

            recommender.fit()

            output_file = open(
                output_root_path_rec_name + "_BayesianSearch.txt", "a")
            result_dict, result_baseline = evaluator_validation.evaluateRecommender(
                recommender)
            output_file.write(
                "ParameterSearch: Best result evaluated on URM_validation. Results: {}"
                .format(result_baseline))

            pickle.dump(
                result_dict.copy(),
                open(output_root_path_rec_name + "_best_result_validation",
                     "wb"),
                protocol=pickle.HIGHEST_PROTOCOL)

            result_dict, result_baseline = evaluator_test.evaluateRecommender(
                recommender)
            output_file.write(
                "ParameterSearch: Best result evaluated on URM_test. Results: {}"
                .format(result_baseline))

            pickle.dump(result_dict.copy(),
                        open(output_root_path_rec_name + "_best_result_test",
                             "wb"),
                        protocol=pickle.HIGHEST_PROTOCOL)

            output_file.close()

            return

        ##########################################################################################################

        if recommender_class is UserKNNCFRecommender:

            similarity_type_list = ['cosine']

            run_KNNCFRecommender_on_similarity_type_partial = partial(
                run_KNNCFRecommender_on_similarity_type,
                parameterSearch=parameterSearch,
                URM_train=URM_train,
                n_cases=n_cases,
                output_root_path=output_root_path_rec_name,
                metric_to_optimize=metric_to_optimize)

            if parallelizeKNN:
                pool = PoolWithSubprocess(processes=int(2), maxtasksperchild=1)
                resultList = pool.map(
                    run_KNNCFRecommender_on_similarity_type_partial,
                    similarity_type_list)

            else:

                for similarity_type in similarity_type_list:
                    run_KNNCFRecommender_on_similarity_type_partial(
                        similarity_type)

            return

        ##########################################################################################################

        if recommender_class is ItemKNNCFRecommender:

            similarity_type_list = ['cosine']

            run_KNNCFRecommender_on_similarity_type_partial = partial(
                run_KNNCFRecommender_on_similarity_type,
                parameterSearch=parameterSearch,
                URM_train=URM_train,
                n_cases=n_cases,
                output_root_path=output_root_path_rec_name,
                metric_to_optimize=metric_to_optimize)

            if parallelizeKNN:
                pool = PoolWithSubprocess(processes=int(2), maxtasksperchild=1)
                resultList = pool.map(
                    run_KNNCFRecommender_on_similarity_type_partial,
                    similarity_type_list)

            else:

                for similarity_type in similarity_type_list:
                    run_KNNCFRecommender_on_similarity_type_partial(
                        similarity_type)

            return

        ##########################################################################################################

        # if recommender_class is MultiThreadSLIM_RMSE:
        #
        #     hyperparamethers_range_dictionary = {}
        #     hyperparamethers_range_dictionary["topK"] = [50, 100]
        #     hyperparamethers_range_dictionary["l1_penalty"] = [1e-2, 1e-3, 1e-4]
        #     hyperparamethers_range_dictionary["l2_penalty"] = [1e-2, 1e-3, 1e-4]
        #
        #
        #     recommenderDictionary = {DictionaryKeys.CONSTRUCTOR_POSITIONAL_ARGS: [URM_train],
        #                              DictionaryKeys.CONSTRUCTOR_KEYWORD_ARGS: {},
        #                              DictionaryKeys.FIT_POSITIONAL_ARGS: dict(),
        #                              DictionaryKeys.FIT_KEYWORD_ARGS: dict(),
        #                              DictionaryKeys.FIT_RANGE_KEYWORD_ARGS: hyperparamethers_range_dictionary}
        #
        #

        ##########################################################################################################

        if recommender_class is P3alphaRecommender:
            hyperparamethers_range_dictionary = {}
            hyperparamethers_range_dictionary["topK"] = [
                5, 10, 20, 50, 100, 150, 200, 300, 400, 500, 600, 700, 800
            ]
            hyperparamethers_range_dictionary["alpha"] = range(0, 2)
            hyperparamethers_range_dictionary["normalize_similarity"] = [
                True, False
            ]

            recommenderDictionary = {
                DictionaryKeys.CONSTRUCTOR_POSITIONAL_ARGS: [URM_train],
                DictionaryKeys.CONSTRUCTOR_KEYWORD_ARGS: {},
                DictionaryKeys.FIT_POSITIONAL_ARGS:
                dict(),
                DictionaryKeys.FIT_KEYWORD_ARGS:
                dict(),
                DictionaryKeys.FIT_RANGE_KEYWORD_ARGS:
                hyperparamethers_range_dictionary
            }

        ##########################################################################################################

        if recommender_class is HybridRecommender:

            hyperparamethers_range_dictionary = {}
            hyperparamethers_range_dictionary["w_itemcf"] = [
                x * 0.1 + 1 for x in range(0, 10)
            ]
            hyperparamethers_range_dictionary["w_usercf"] = [
                x * 0.1 for x in range(0, 10)
            ]
            hyperparamethers_range_dictionary["w_cbart"] = [
                x * 0.1 for x in range(0, 10)
            ]
            hyperparamethers_range_dictionary["w_cbalb"] = [
                x * 0.1 for x in range(0, 10)
            ]
            hyperparamethers_range_dictionary["w_slim"] = [
                x * 0.1 for x in range(0, 10)
            ]
            #hyperparamethers_range_dictionary["w_svd"] = [x * 0.05 for x in range(0, 20)]
            #hyperparamethers_range_dictionary["w_rp3"] = [x * 0.05 for x in range(0, 20)]

            item = ItemKNNCFRecommender(URM_train)

            user = UserKNNCFRecommender(URM_train)

            SLIM = MultiThreadSLIM_ElasticNet(URM_train=URM_train)

            item.fit(topK=800, shrink=10, similarity='cosine', normalize=True)

            user.fit(topK=70, shrink=22, similarity='cosine', normalize=True)

            SLIM.fit(l1_penalty=1e-05,
                     l2_penalty=0,
                     positive_only=True,
                     topK=150,
                     alpha=0.00415637376180466)

            CBArt = ItemKNNCBFRecommender(ICM=ICM_1, URM_train=URM_train)
            CBArt.fit(topK=160,
                      shrink=5,
                      similarity='cosine',
                      normalize=True,
                      feature_weighting="none")

            CBAlb = ItemKNNCBFRecommender(ICM=ICM_2, URM_train=URM_train)
            CBAlb.fit(topK=160,
                      shrink=5,
                      similarity='cosine',
                      normalize=True,
                      feature_weighting="none")

            recommenderDictionary = {
                DictionaryKeys.CONSTRUCTOR_POSITIONAL_ARGS: [URM_train],
                DictionaryKeys.CONSTRUCTOR_KEYWORD_ARGS: {},
                DictionaryKeys.FIT_POSITIONAL_ARGS:
                dict(),
                DictionaryKeys.FIT_KEYWORD_ARGS: {
                    "ICM_Art": ICM_1,
                    "ICM_Alb": ICM_2,
                    "item": item,
                    "user": user,
                    "SLIM": SLIM,
                    "CBArt": CBArt,
                    "CBAlb": CBAlb,
                },
                DictionaryKeys.FIT_RANGE_KEYWORD_ARGS:
                hyperparamethers_range_dictionary
            }

        ##########################################################################################################

        if recommender_class is RP3betaRecommender:
            hyperparamethers_range_dictionary = {}
            hyperparamethers_range_dictionary["topK"] = [
                5, 10, 20, 50, 100, 150, 200, 300, 400, 500, 600, 700, 800
            ]
            hyperparamethers_range_dictionary["alpha"] = range(0, 2)
            hyperparamethers_range_dictionary["beta"] = range(0, 2)
            hyperparamethers_range_dictionary["normalize_similarity"] = [True]
            hyperparamethers_range_dictionary["implicit"] = [True]

            recommenderDictionary = {
                DictionaryKeys.CONSTRUCTOR_POSITIONAL_ARGS: [URM_train],
                DictionaryKeys.CONSTRUCTOR_KEYWORD_ARGS: {},
                DictionaryKeys.FIT_POSITIONAL_ARGS:
                dict(),
                DictionaryKeys.FIT_KEYWORD_ARGS:
                dict(),
                DictionaryKeys.FIT_RANGE_KEYWORD_ARGS:
                hyperparamethers_range_dictionary
            }

        ##########################################################################################################

        if recommender_class is MatrixFactorization_FunkSVD_Cython:
            hyperparamethers_range_dictionary = {}
            hyperparamethers_range_dictionary["sgd_mode"] = ["adagrad", "adam"]
            # hyperparamethers_range_dictionary["epochs"] = [1, 5, 10, 20, 30, 50, 70, 90, 110]
            hyperparamethers_range_dictionary["num_factors"] = range(
                100, 1000, 20)
            hyperparamethers_range_dictionary["reg"] = [0.0, 1e-3, 1e-6, 1e-9]
            hyperparamethers_range_dictionary["learning_rate"] = [
                1e-2, 1e-3, 1e-4, 1e-5
            ]

            recommenderDictionary = {
                DictionaryKeys.CONSTRUCTOR_POSITIONAL_ARGS: [URM_train],
                DictionaryKeys.CONSTRUCTOR_KEYWORD_ARGS: {},
                DictionaryKeys.FIT_POSITIONAL_ARGS:
                dict(),
                DictionaryKeys.FIT_KEYWORD_ARGS: {
                    "validation_every_n": 5,
                    "stop_on_validation": True,
                    "evaluator_object": evaluator_validation_earlystopping,
                    "lower_validatons_allowed": 20,
                    "validation_metric": metric_to_optimize
                },
                DictionaryKeys.FIT_RANGE_KEYWORD_ARGS:
                hyperparamethers_range_dictionary
            }

        ##########################################################################################################

        if recommender_class is FunkSVD:
            hyperparamethers_range_dictionary = {}

            # hyperparamethers_range_dictionary["epochs"] = [1, 5, 10, 20, 30, 50, 70, 90, 110]
            hyperparamethers_range_dictionary["num_factors"] = range(
                100, 1000, 20)
            hyperparamethers_range_dictionary["reg"] = [
                0.0, 1e-03, 1e-06, 1e-09
            ]
            hyperparamethers_range_dictionary["learning_rate"] = [1e-02, 1e-03]

            recommenderDictionary = {
                DictionaryKeys.CONSTRUCTOR_POSITIONAL_ARGS: [URM_train],
                DictionaryKeys.CONSTRUCTOR_KEYWORD_ARGS: {},
                DictionaryKeys.FIT_POSITIONAL_ARGS:
                dict(),
                DictionaryKeys.FIT_KEYWORD_ARGS:
                dict(),
                DictionaryKeys.FIT_RANGE_KEYWORD_ARGS:
                hyperparamethers_range_dictionary
            }

        ##########################################################################################################

        if recommender_class is MatrixFactorization_AsySVD_Cython:
            hyperparamethers_range_dictionary = {}
            hyperparamethers_range_dictionary["sgd_mode"] = ["adagrad", "adam"]
            # hyperparamethers_range_dictionary["epochs"] = [1, 5, 10, 20, 30, 50, 70, 90, 110]
            hyperparamethers_range_dictionary["num_factors"] = range(
                100, 500, 10)
            hyperparamethers_range_dictionary["batch_size"] = [
                100, 200, 300, 400
            ]
            hyperparamethers_range_dictionary["positive_reg"] = [
                0.0, 1e-3, 1e-6, 1e-9
            ]
            hyperparamethers_range_dictionary["negative_reg"] = [
                0.0, 1e-3, 1e-6, 1e-9
            ]
            hyperparamethers_range_dictionary["learning_rate"] = [
                1e-2, 1e-3, 1e-4, 1e-5
            ]
            hyperparamethers_range_dictionary["user_reg"] = [
                1e-3, 1e-4, 1e-5, 1e-6
            ]

            recommenderDictionary = {
                DictionaryKeys.CONSTRUCTOR_POSITIONAL_ARGS: [URM_train],
                DictionaryKeys.CONSTRUCTOR_KEYWORD_ARGS: {
                    'positive_threshold': 1
                },
                DictionaryKeys.FIT_POSITIONAL_ARGS:
                dict(),
                DictionaryKeys.FIT_KEYWORD_ARGS: {
                    "validation_every_n": 5,
                    "stop_on_validation": True,
                    "evaluator_object": evaluator_validation_earlystopping,
                    "lower_validatons_allowed": 20,
                    "validation_metric": metric_to_optimize
                },
                DictionaryKeys.FIT_RANGE_KEYWORD_ARGS:
                hyperparamethers_range_dictionary
            }

        ##########################################################################################################

        if recommender_class is PureSVDRecommender:
            hyperparamethers_range_dictionary = {}
            hyperparamethers_range_dictionary["num_factors"] = list(
                range(0, 250, 5))

            recommenderDictionary = {
                DictionaryKeys.CONSTRUCTOR_POSITIONAL_ARGS: [URM_train],
                DictionaryKeys.CONSTRUCTOR_KEYWORD_ARGS: {},
                DictionaryKeys.FIT_POSITIONAL_ARGS:
                dict(),
                DictionaryKeys.FIT_KEYWORD_ARGS: {},
                DictionaryKeys.FIT_RANGE_KEYWORD_ARGS:
                hyperparamethers_range_dictionary
            }

        #########################################################################################################

        if recommender_class is SLIM_BPR_Cython:
            hyperparamethers_range_dictionary = {}
            hyperparamethers_range_dictionary["topK"] = [800, 900, 1000, 1200]
            # hyperparamethers_range_dictionary["epochs"] = [1, 5, 10, 20, 30, 50, 70, 90, 110]
            hyperparamethers_range_dictionary["sgd_mode"] = ["adagrad"]
            hyperparamethers_range_dictionary["lambda_i"] = [1e-6]
            hyperparamethers_range_dictionary["lambda_j"] = [1e-9]
            hyperparamethers_range_dictionary["learning_rate"] = [
                0.01, 0.001, 1e-4, 1e-5, 0.1
            ]

            recommenderDictionary = {
                DictionaryKeys.CONSTRUCTOR_POSITIONAL_ARGS: [URM_train],
                DictionaryKeys.CONSTRUCTOR_KEYWORD_ARGS: {
                    'train_with_sparse_weights': True,
                    'symmetric': True,
                    'positive_threshold': 1
                },
                DictionaryKeys.FIT_POSITIONAL_ARGS:
                dict(),
                DictionaryKeys.FIT_KEYWORD_ARGS: {
                    "validation_every_n": 10,
                    "stop_on_validation": True,
                    "evaluator_object": evaluator_validation_earlystopping,
                    "lower_validatons_allowed": 3,
                    "validation_metric": metric_to_optimize
                },
                DictionaryKeys.FIT_RANGE_KEYWORD_ARGS:
                hyperparamethers_range_dictionary
            }

        ##########################################################################################################

        if recommender_class is MultiThreadSLIM_ElasticNet:
            hyperparamethers_range_dictionary = {}
            hyperparamethers_range_dictionary["topK"] = [
                3300, 4300, 5300, 6300, 7300
            ]
            hyperparamethers_range_dictionary["l1_penalty"] = [
                1e-5, 1e-6, 1e-4, 1e-3
            ]
            hyperparamethers_range_dictionary["l2_penalty"] = [1e-4]
            hyperparamethers_range_dictionary["alpha"] = range(0, 1)
            recommenderDictionary = {
                DictionaryKeys.CONSTRUCTOR_POSITIONAL_ARGS: [URM_train],
                DictionaryKeys.CONSTRUCTOR_KEYWORD_ARGS: {},
                DictionaryKeys.FIT_POSITIONAL_ARGS:
                dict(),
                DictionaryKeys.FIT_KEYWORD_ARGS:
                dict(),
                DictionaryKeys.FIT_RANGE_KEYWORD_ARGS:
                hyperparamethers_range_dictionary
            }

        #########################################################################################################

        ## Final step, after the hyperparameter range has been defined for each type of algorithm
        best_parameters = parameterSearch.search(
            recommenderDictionary,
            n_cases=n_cases,
            output_root_path=output_root_path_rec_name,
            metric=metric_to_optimize)

    except Exception as e:

        print("On recommender {} Exception {}".format(recommender_class,
                                                      str(e)))
        traceback.print_exc()

        error_file = open(output_root_path + "ErrorLog.txt", "a")
        error_file.write("On recommender {} Exception {}\n".format(
            recommender_class, str(e)))
        error_file.close()
Ejemplo n.º 10
0
def gethyb():
    start_time = time.time()

    URM_all, user_id_unique, item_id_unique = RecSys2020Reader.load_urm()
    ICM_all = RecSys2020Reader.load_icm_asset()
    target_ids = RecSys2020Reader.load_target()

    #np.random.seed(12341288)
    URM_train, URM_test = train_test_holdout(URM_all, train_perc=0.8)
    # ICM_train, ICM_test = train_test_holdout(ICM_all, train_perc=0.995)
    evaluator_validation = EvaluatorHoldout(URM_test, cutoff_list=[10], exclude_seen=True)
    #URM_train = URM_all
    ICM_train = ICM_all

    URM_ICM_train = sps.vstack([URM_train, ICM_all.T])
    URM_ICM_train = URM_ICM_train.tocsr()

    l_list = []
    profile_length = np.ediff1d(URM_train.indptr)
    block_size = int(len(profile_length) * 0.2)
    sorted_users = np.argsort(profile_length)
    groups = 5
    rec_list = []
    arg_list = []
    name_list = []

    for group_id in range(0, groups):
        start_pos = group_id * block_size
        end_pos = min((group_id + 1) * block_size, len(profile_length))

        users_in_group = sorted_users[start_pos:end_pos]

        users_in_group_p_len = profile_length[users_in_group]
        l_list.append(len(users_in_group))

        print("Group {}, average p.len {:.2f}, min {}, max {}".format(group_id,
                                                                      users_in_group_p_len.mean(),
                                                                      users_in_group_p_len.min(),
                                                                      users_in_group_p_len.max()))

    hyb_warm = ScoresHybridP3alphaKNNCBF.ScoresHybridP3alphaKNNCBF(URM_train, URM_train.T)
    hyb_warmV2 = ScoresHybridP3alphaKNNCBF.ScoresHybridP3alphaKNNCBF(URM_train, ICM_train)
    # Warm of Kaggle MAP 0.09466
    '''hyb_warm_args = {"topK_P": 127, "alpha_P": 0.35309465855346317, "normalize_similarity_P": False, "topK": 805,
                     "shrink": 307, "similarity": "tversky", "normalize": False, "alpha": 0.486665735781842, "feature_weighting": "TF-IDF"}
    hyb_warmV2_args = {"topK_P": 1496, "alpha_P": 0.4384309705759645, "normalize_similarity_P": False, "topK": 1023,
                       "shrink": 261, "similarity": "asymmetric", "normalize": False, "alpha": 0.7211670365702352, "feature_weighting": "TF-IDF"}'''
    hyb_warm_args = {"topK_P": 1500, "alpha_P": 0.499386187332916, "normalize_similarity_P": False, "topK": 1500,
                     "shrink": 0, "similarity": "cosine", "normalize": False, "alpha": 0.6783844599810798, "feature_weighting": "BM25"}
    hyb_warmV2_args = {"topK_P": 1407, "alpha_P": 0.5102184063631549, "normalize_similarity_P": False, "topK": 62,
                       "shrink": 104, "similarity": "tanimoto", "normalize": False, "alpha": 0.7722938163027667, "feature_weighting": "none"}

    hyb_cold = ScoresHybridP3alphaKNNCBF.ScoresHybridP3alphaKNNCBF(URM_train, ICM_train)
    # Cold of Kaggle MAP 0.09466
    hyb_coldV2 = ScoresHybridP3alphaKNNCBF.ScoresHybridP3alphaKNNCBF(URM_train, ICM_train)
    '''hyb_cold_args = {"topK_P": 482, "alpha_P": 0.4999498678468517, "normalize_similarity_P": False, "topK": 1500,
                     "shrink": 212, "similarity": "cosine", "normalize": False, "alpha": 0.6841610038073574,
                     "feature_weighting": "BM25"}
    # Cold of Kaggle MAP 0.09466
    hyb_coldV2_args = {"topK_P": 326, "alpha_P": 0.5120656418370607, "normalize_similarity_P": False, "topK": 151,
                       "shrink": 183, "similarity": "tversky", "normalize": True, "alpha": 0.6290067931193662, "feature_weighting": "BM25"}'''
    hyb_cold_args = {"topK_P": 510, "alpha_P": 0.2857363628982497, "normalize_similarity_P": False, "topK": 483,
                     "shrink": 1491, "similarity": "asymmetric", "normalize": True, "alpha": 0.7682805033640728, "feature_weighting": "TF-IDF"}
    # Cold of Kaggle MAP 0.09466
    hyb_coldV2_args = {"topK_P": 1095, "alpha_P": 0.4546298466859472, "normalize_similarity_P": False, "topK": 866,
                       "shrink": 182, "similarity": "tanimoto", "normalize": False, "alpha": 0.5837079437871213, "feature_weighting": "BM25"}
    hyb_midV2 = ScoresHybridP3alphaKNNCBF.ScoresHybridP3alphaKNNCBF(URM_train, ICM_train)
    # Cold of Kaggle MAP 0.09466
    hyb_midV2_args = {"topK_P": 482, "alpha_P": 0.4999498678468517, "normalize_similarity_P": False, "topK": 1500,
                       "shrink": 212, "similarity": "cosine", "normalize": False, "alpha": 0.6841610038073574, "feature_weighting": "BM25"}

    rec_list.append(hyb_cold)
    arg_list.append(hyb_cold_args)
    name_list.append("hyb_cold")
    rec_list.append(hyb_warm)
    arg_list.append(hyb_warm_args)
    name_list.append("hyb_warm")
    rec_list.append(hyb_warmV2)
    arg_list.append(hyb_warmV2_args)
    name_list.append("hyb_warmV2")
    rec_list.append(hyb_coldV2)
    arg_list.append(hyb_coldV2_args)
    name_list.append("hyb_coldV2")
    rec_list.append(hyb_midV2)
    arg_list.append(hyb_midV2_args)
    name_list.append("hyb_midV2")

    hyb5 = ScoresHybridP3alphaKNNCBF.ScoresHybridP3alphaKNNCBF(URM_train, ICM_train)
    hyb5_args = {"topK_P": 903, "alpha_P": 0.4108657561671193, "normalize_similarity_P": False, "topK": 448,
                 "shrink": 5,
                 "similarity": "tversky", "normalize": True, "alpha": 0.6290871066510789, "feature_weighting": "TF-IDF"}
    rec_list.append(hyb5)
    arg_list.append(hyb5_args)
    name_list.append("hyb5")

    tot_args = zip(rec_list, arg_list, name_list)
    pool = PoolWithSubprocess(processes=int(multiprocessing.cpu_count()-1), maxtasksperchild=1)
    resultList = pool.map(fitRec, tot_args)
    pool.close()
    pool.join()

    for el in resultList:
        if el[1] == "hyb_cold":
            hyb_cold = el[0]
        elif el[1] == "hyb_warm":
            hyb_warm = el[0]
        elif el[1] == "hyb_coldV2":
            hyb_coldV2 = el[0]
        elif el[1] == "hyb_midV2":
            hyb_midV2 = el[0]
        elif el[1] == "hyb_warmV2":
            hyb_warmV2 = el[0]
        elif el[1] == "hyb5":
            hyb5 = el[0]
        elif el[1] == "hyb6x":
            hyb6x = el[0]


    hybuc = ScoresHybridSpecializedV3Warm.ScoresHybridSpecializedV3Warm(URM_train, ICM_all)
    hybuc.fit(**{"topK_P": 509, "alpha_P": 1.045671409326966, "normalize_similarity_P": False, "topK": 1291, "shrink": 430,
             "similarity": "asymmetric", "normalize": False, "alpha": 0.864672904054673, "feature_weighting": "TF-IDF"})

    hyb2 = hyb_warmV2

    hyb3 = ScoresHybridSpecializedFusion.ScoresHybridSpecializedFusion(URM_train, hyb_cold, hyb_warm, 5.9)

    hyb7 = ScoresHybridSpecializedFusion.ScoresHybridSpecializedFusion(URM_train, hyb_coldV2, hyb_warmV2, 5.9)

    hyb6 = ItemKNNScoresHybridRecommender.ItemKNNScoresHybridRecommender(URM_train, hyb7, hyb5)
    hyb6.fit(alpha=0.5)

    hyb2 = ItemKNNScoresHybridRecommender.ItemKNNScoresHybridRecommender(URM_train, hyb3, hyb6)
    hyb2.fit(alpha=0.5)

    hyb = ScoresHybridSpecializedFusion.ScoresHybridSpecializedFusion(URM_train, hyb2, hybuc, 300)

    MAP_p3alpha_per_group = []
    MAP_itemKNNCF_per_group = []
    MAP_itemKNNCBF_per_group = []
    MAP_pureSVD_per_group = []
    MAP_hyb_per_group = []
    MAP_hyb2_per_group = []
    MAP_hyb3_per_group = []
    MAP_hyb5_per_group = []
    MAP_hyb6_per_group = []
    MAP_hyb7_per_group = []
    cutoff = 10
    args = {"block_size": block_size, "profile_length": profile_length, "sorted_users": sorted_users, "cutoff": cutoff,
            "URM_test": URM_test, "hyb": hyb, "hyb2": hyb2, "hyb3": hyb3, "hyb5": hyb5, "hyb6": hyb6, "hyb7": hyb7}

    pool = PoolWithSubprocess(processes=multiprocessing.cpu_count()-1, maxtasksperchild=1)
    compute_group_MAP_partial = partial(compute_group_MAP, args)
    resultList = pool.map(compute_group_MAP_partial, range(0, groups))
    pool.close()
    pool.join()
    for el in resultList:
        MAP_hyb_per_group.append(el[0])
        MAP_hyb2_per_group.append(el[1])
        MAP_hyb3_per_group.append(el[2])
        MAP_hyb5_per_group.append(el[3])
        MAP_hyb6_per_group.append(el[4])
        if hyb7 is not None:
            MAP_hyb7_per_group.append(el[5])

    # Needed because of memory error
    '''for group_id in range(0, groups):
        start_pos = group_id * block_size
        end_pos = min((group_id + 1) * block_size, len(profile_length))

        users_in_group = sorted_users[start_pos:end_pos]

        users_in_group_p_len = profile_length[users_in_group]

        print("Group {}, average p.len {:.2f}, min {}, max {}".format(group_id,
                                                                      users_in_group_p_len.mean(),
                                                                      users_in_group_p_len.min(),
                                                                      users_in_group_p_len.max()))

        users_not_in_group_flag = np.isin(sorted_users, users_in_group, invert=True)
        users_not_in_group = sorted_users[users_not_in_group_flag]

        evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[cutoff], ignore_users=users_not_in_group)

        results, _ = evaluator_test.evaluateRecommender(hyb7)
        MAP_hyb7_per_group.append(results[cutoff]["MAP"])'''


    import matplotlib.pyplot as pyplot

    '''pyplot.plot(MAP_p3alpha_per_group, label="p3alpha")
    pyplot.plot(MAP_itemKNNCF_per_group, label="itemKNNCF")
    pyplot.plot(MAP_itemKNNCBF_per_group, label="itemKNNCBF")
    pyplot.plot(MAP_pureSVD_per_group, label="pureSVD")'''
    pyplot.plot(MAP_hyb_per_group, label="hyb")
    pyplot.plot(MAP_hyb2_per_group, label="hyb2")
    pyplot.plot(MAP_hyb3_per_group, label="hyb3")
    pyplot.plot(MAP_hyb5_per_group, label="hyb5")
    pyplot.plot(MAP_hyb6_per_group, label="hyb6")
    if hyb7 is not None:
        pyplot.plot(MAP_hyb7_per_group, label="hyb7")
    pyplot.ylabel('MAP')
    pyplot.xlabel('User Group')
    pyplot.legend()
    pyplot.show()

    print(l_list)
    evaluator_validation = EvaluatorHoldout(URM_test, cutoff_list=[10], exclude_seen=True)
    pool = PoolWithSubprocess(processes=multiprocessing.cpu_count()-1, maxtasksperchild=1)
    if hyb7 is not None:
        hyb_list = [hyb, hyb2, hyb3, hyb5, hyb6, hyb7]
    else:
        hyb_list = [hyb, hyb2, hyb3, hyb5, hyb6]
    resultList = pool.map(evaluator_validation.evaluateRecommender, hyb_list)
    pool.close()
    pool.join()
    for el in resultList:
        print(el)
    '''item_list = hyb7.recommend(target_ids, cutoff=10)
    CreateCSV.create_csv(target_ids, item_list, 'Hyb_URM_ICM_cold_warm_V2_more_mix_mid')
    item_list = hyb2.recommend(target_ids, cutoff=10)
    CreateCSV.create_csv(target_ids, item_list, 'Hyb2')
    item_list = hyb6.recommend(target_ids, cutoff=10)
    CreateCSV.create_csv(target_ids, item_list, 'Hyb_URM_ICM')'''

    print("--- Execution time: %s seconds ---" % (time.time() - start_time))
    return hyb2