def runParameterSearch_NeuRec(recommender_class, URM_train, earlystopping_hyperparameters, output_file_name_root, URM_train_last_test = None,
                                  n_cases = 35, n_random_starts = 5,
                                  evaluator_validation= None, evaluator_test=None, metric_to_optimize = "RECALL",
                                  output_folder_path ="result_experiments/"):



    # If directory does not exist, create
    if not os.path.exists(output_folder_path):
        os.makedirs(output_folder_path)


    parameterSearch = SearchBayesianSkopt(recommender_class, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test)


    ##########################################################################################################

    hyperparameters_range_dictionary = {}
    hyperparameters_range_dictionary["epochs"] =  Categorical([1500])
    hyperparameters_range_dictionary["num_neurons"] = Integer(100, 400)
    hyperparameters_range_dictionary["num_factors"] = Integer(20, 70)
    hyperparameters_range_dictionary["dropout_percentage"] = Real(low = 0.0, high = 0.1, prior = 'uniform')
    hyperparameters_range_dictionary["learning_rate"] = Real(low = 1e-5, high = 1e-3, prior = 'log-uniform')
    hyperparameters_range_dictionary["regularization_rate"] = Real(low = 0.0, high = 0.2, prior = 'uniform')
    hyperparameters_range_dictionary["batch_size"] =  Categorical([128, 256, 512, 1024, 2048])

    recommender_input_args = SearchInputRecommenderArgs(
        CONSTRUCTOR_POSITIONAL_ARGS = [URM_train],
        CONSTRUCTOR_KEYWORD_ARGS = {},
        FIT_POSITIONAL_ARGS = [],
        FIT_KEYWORD_ARGS = {**earlystopping_hyperparameters,
                            "use_gpu": False,
                            "epochs_min": 200,
                            "display_epoch": None,
                            "display_step": None,
                            "verbose": False}
    )


    #########################################################################################################

    if URM_train_last_test is not None:
        recommender_input_args_last_test = recommender_input_args.copy()
        recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[0] = URM_train_last_test
    else:
        recommender_input_args_last_test = None

    parameterSearch.search(recommender_input_args,
                           parameter_search_space = hyperparameters_range_dictionary,
                           n_cases = n_cases,
                           n_random_starts = n_random_starts,
                           output_folder_path = output_folder_path,
                           output_file_name_root = output_file_name_root,
                           metric_to_optimize = metric_to_optimize,
                           resume_from_saved = True,
                           recommender_input_args_last_test = recommender_input_args_last_test)
def run_train_with_early_stopping(dataset_name, URM_train, URM_validation,
                                  UCM_CoupledCF, ICM_CoupledCF,
                                  evaluator_validation, evaluator_test,
                                  metric_to_optimize, result_folder_path,
                                  map_mode):

    if not os.path.exists(result_folder_path):
        os.makedirs(result_folder_path)

    article_hyperparameters = get_hyperparameters_for_dataset(dataset_name)
    article_hyperparameters["map_mode"] = map_mode

    earlystopping_hyperparameters = {
        "validation_every_n": 5,
        "stop_on_validation": True,
        "lower_validations_allowed": 5,
        "evaluator_object": evaluator_validation,
        "validation_metric": metric_to_optimize
    }

    parameterSearch = SearchSingleCase(
        CoupledCF_RecommenderWrapper,
        evaluator_validation=evaluator_validation,
        evaluator_test=evaluator_test)

    recommender_input_args = SearchInputRecommenderArgs(
        CONSTRUCTOR_POSITIONAL_ARGS=[URM_train, UCM_CoupledCF, ICM_CoupledCF],
        FIT_KEYWORD_ARGS=earlystopping_hyperparameters)

    recommender_input_args_last_test = recommender_input_args.copy()
    recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[
        0] = URM_train + URM_validation

    parameterSearch.search(
        recommender_input_args,
        recommender_input_args_last_test=recommender_input_args_last_test,
        fit_hyperparameters_values=article_hyperparameters,
        output_folder_path=result_folder_path,
        output_file_name_root=CoupledCF_RecommenderWrapper.RECOMMENDER_NAME,
        save_model="last",
        resume_from_saved=True,
        evaluate_on_test="last")

    dataIO = DataIO(result_folder_path)
    search_metadata = dataIO.load_data(
        file_name=CoupledCF_RecommenderWrapper.RECOMMENDER_NAME +
        "_metadata.zip")

    return search_metadata
Beispiel #3
0
def run_permutation_BPRMF(output_folder_path, permutation_index, USER_factors_perm, ITEM_factors_perm):

    output_folder_path_permutation = output_folder_path + "{}/{}_{}/".format("BPRMF", "BPRMF", permutation_index)

    # If directory does not exist, create
    if not os.path.exists(output_folder_path_permutation):
        os.makedirs(output_folder_path_permutation)

    assert USER_factors_perm.shape == (n_users, n_factors)
    assert ITEM_factors_perm.shape == (n_items, n_factors)


    parameterSearch = SearchSingleCase(MatrixFactorizationCustomFactorsRecommender,
                                       evaluator_validation = None,
                                       evaluator_test = evaluator_test)

    recommender_input_args = SearchInputRecommenderArgs(
                                        CONSTRUCTOR_POSITIONAL_ARGS = [URM_train + URM_validation],
                                        FIT_KEYWORD_ARGS = {
                                            "USER_factors": USER_factors,
                                            "ITEM_factors": ITEM_factors
                                        })

    parameterSearch.search(recommender_input_args,
                           save_model = "no",
                           resume_from_saved=True,
                           fit_hyperparameters_values = {},
                           output_folder_path = output_folder_path_permutation,
                           output_file_name_root = MatrixFactorizationCustomFactorsRecommender.RECOMMENDER_NAME)
def pretrain_MFBPR(
    URM_train,
    URM_train_full,
    evaluator_validation,
    evaluator_test,
    result_folder_path,
    metric_to_optimize,
):

    article_hyperparameters = {
        "batch_size": 512,
        "epochs": 500,
        "embed_size": 64,
        "negative_sample_per_positive": 1,
        "learning_rate": 0.05,
        "path_partial_results": result_folder_path,
    }

    earlystopping_keywargs = {
        "validation_every_n": 5,
        "stop_on_validation": True,
        "lower_validations_allowed": 5,
        "evaluator_object": evaluator_validation,
        "validation_metric": metric_to_optimize
    }

    parameterSearch = SearchSingleCase(
        MFBPR_Wrapper,
        evaluator_validation=evaluator_validation,
        evaluator_test=evaluator_test)

    recommender_input_args = SearchInputRecommenderArgs(
        CONSTRUCTOR_POSITIONAL_ARGS=[URM_train],
        FIT_KEYWORD_ARGS=earlystopping_keywargs)

    recommender_input_args_last_test = SearchInputRecommenderArgs(
        CONSTRUCTOR_POSITIONAL_ARGS=[URM_train_full])

    parameterSearch.search(
        recommender_input_args,
        recommender_input_args_last_test=recommender_input_args_last_test,
        fit_hyperparameters_values=article_hyperparameters,
        output_folder_path=result_folder_path,
        output_file_name_root=MFBPR_Wrapper.RECOMMENDER_NAME,
        save_model="last",
        resume_from_saved=True,
        evaluate_on_test="last")
def run_parameter_search_VNN(recommender_class, URM_train,
                             metric_to_optimize="PRECISION",
                             evaluator_validation=None, evaluator_test=None,
                             output_folder_path="result_experiments/", parallelizeKNN=True,
                             n_cases=35, n_random_starts=5, resume_from_saved=False, save_model="best",
                             allow_weighting=True,
                             similarity_type_list=None):
    # If directory does not exist, create
    if not os.path.exists(output_folder_path):
        os.makedirs(output_folder_path)

    output_file_name_root = recommender_class.RECOMMENDER_NAME

    parameterSearch = SearchBayesianSkopt(recommender_class, evaluator_validation=evaluator_validation,
                                          evaluator_test=evaluator_test)

    if similarity_type_list is None:
        similarity_type_list = ['cosine', 'jaccard', "asymmetric", "dice", "tversky"]

    recommender_input_args = SearchInputRecommenderArgs(
        CONSTRUCTOR_POSITIONAL_ARGS=[URM_train],
        CONSTRUCTOR_KEYWORD_ARGS={},
        FIT_POSITIONAL_ARGS=[],
        FIT_KEYWORD_ARGS={}
    )

    recommender_input_args_last_test = None
    run_KNNCFRecommender_on_similarity_type_partial = partial(run_VKNNRecommender_on_similarity_type,
                                                              recommender_input_args=recommender_input_args,
                                                              parameter_search_space={},
                                                              parameterSearch=parameterSearch,
                                                              n_cases=n_cases,
                                                              n_random_starts=n_random_starts,
                                                              resume_from_saved=resume_from_saved,
                                                              save_model=save_model,
                                                              output_folder_path=output_folder_path,
                                                              output_file_name_root=output_file_name_root,
                                                              metric_to_optimize=metric_to_optimize,
                                                              allow_weighting=allow_weighting,
                                                              recommender_input_args_last_test=recommender_input_args_last_test)

    if parallelizeKNN:
        pool = multiprocessing.Pool(processes=multiprocessing.cpu_count(), maxtasksperchild=1)
        pool.map(run_KNNCFRecommender_on_similarity_type_partial, similarity_type_list)

        pool.close()
        pool.join()

    else:

        for similarity_type in similarity_type_list:
            run_KNNCFRecommender_on_similarity_type_partial(similarity_type)

    return
def pretrain_FMwrapper(URM_train_tuning_only,
                       URM_train_full,
                       evaluator_validation,
                       evaluator_test,
                       CFM_data_class_validation,
                       CFM_data_class_full,
                       result_folder_path:str,
                       # hidden_factors:int,
                       metric_to_optimize:str,
                       dataset_name):


    # search best epoch
    article_hyperparameters = get_FM_hyperparameters_for_dataset(dataset_name)

    earlystopping_keywargs = {
        "validation_every_n": 5,
        "stop_on_validation": True,
        "lower_validations_allowed": 5,
        "evaluator_object": evaluator_validation,
        "validation_metric": metric_to_optimize
    }

    parameterSearch = SearchSingleCase(FM_Wrapper,
                                       evaluator_validation=evaluator_validation,
                                       evaluator_test=evaluator_test)

    recommender_input_args = SearchInputRecommenderArgs(CONSTRUCTOR_POSITIONAL_ARGS=[URM_train_tuning_only, CFM_data_class_validation],
                                                        FIT_KEYWORD_ARGS=earlystopping_keywargs)

    recommender_input_args_last_test = SearchInputRecommenderArgs(CONSTRUCTOR_POSITIONAL_ARGS=[URM_train_full, CFM_data_class_full])

    parameterSearch.search(recommender_input_args,
                           recommender_input_args_last_test=recommender_input_args_last_test,
                           fit_hyperparameters_values=article_hyperparameters,
                           output_folder_path=result_folder_path,
                           output_file_name_root=FM_Wrapper.RECOMMENDER_NAME,
                           save_model = "last",
                           resume_from_saved=True,
                           evaluate_on_test = "last")
Beispiel #7
0
def run_train_with_early_stopping(output_folder_path, permutation_index,
                                  USER_factors_perm, ITEM_factors_perm,
                                  map_mode, metric_to_optimize,
                                  evaluator_validation, evaluator_test,
                                  URM_train, URM_validation):

    output_folder_path_permutation = output_folder_path + "fit_ablation_{}/{}_{}/".format(map_mode, map_mode, permutation_index)

    # If directory does not exist, create
    if not os.path.exists(output_folder_path_permutation):
        os.makedirs(output_folder_path_permutation)


    assert USER_factors_perm.shape == (n_users, n_factors)
    assert ITEM_factors_perm.shape == (n_items, n_factors)

    np.save(output_folder_path_permutation + "best_model_latent_factors", [USER_factors_perm, ITEM_factors_perm])

    optimal_hyperparameters = {
        "batch_size": 512,
        "epochs": 1500,
        "load_pretrained_MFBPR_if_available": True,
        "MF_latent_factors_folder": output_folder_path_permutation,
        "embedding_size": 64,
        "hidden_size": 128,
        "negative_sample_per_positive": 1,
        "negative_instances_per_positive": 4,
        "regularization_users_items": 0.01,
        "regularization_weights": 10,
        "regularization_filter_weights": 1,
        "learning_rate_embeddings": 0.05,
        "learning_rate_CNN": 0.05,
        "channel_size": [32, 32, 32, 32, 32, 32],
        "dropout": 0.0,
        "epoch_verbose": 1,
        "temp_file_folder": None,
        }


    optimal_hyperparameters["map_mode"] = map_mode



    earlystopping_hyperparameters = {
        "validation_every_n": 5,
        "stop_on_validation": True,
        "lower_validations_allowed": 5,
        "evaluator_object": evaluator_validation,
        "validation_metric": metric_to_optimize
    }

    parameterSearch = SearchSingleCase(ConvNCF_RecommenderWrapper,
                                       evaluator_validation=evaluator_validation,
                                       evaluator_test=evaluator_test)

    recommender_input_args = SearchInputRecommenderArgs(CONSTRUCTOR_POSITIONAL_ARGS=[URM_train],
                                                        FIT_KEYWORD_ARGS=earlystopping_hyperparameters)

    recommender_input_args_last_test = recommender_input_args.copy()
    recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[0] = URM_train + URM_validation

    parameterSearch.search(recommender_input_args,
                            recommender_input_args_last_test=recommender_input_args_last_test,
                            fit_hyperparameters_values=optimal_hyperparameters,
                            output_folder_path=output_folder_path_permutation,
                            output_file_name_root=ConvNCF_RecommenderWrapper.RECOMMENDER_NAME,
                            save_model = "last",
                            resume_from_saved=True,
                            evaluate_on_test = "last")
parameterSearch = SearchBayesianSkopt(recommender_class,
                                      evaluator_validation=evaluator_valid,
                                      evaluator_test=evaluator_test)

hyperparameters_range_dictionary = {}
hyperparameters_range_dictionary["topK"] = Integer(5, 1000)
hyperparameters_range_dictionary["add_zeros_quota"] = Real(low=0,
                                                           high=1,
                                                           prior='uniform')
hyperparameters_range_dictionary["normalize_similarity"] = Categorical(
    [True, False])

recommender_input_args = SearchInputRecommenderArgs(
    CONSTRUCTOR_POSITIONAL_ARGS=[urm_train, icm_asset, W_sparse_CF],
    CONSTRUCTOR_KEYWORD_ARGS={},
    FIT_POSITIONAL_ARGS=[],
    FIT_KEYWORD_ARGS={})

output_folder_path = "result_experiments/"

import os

# If directory does not exist, create
if not os.path.exists(output_folder_path):
    os.makedirs(output_folder_path)

n_cases = 50
metric_to_optimize = "MAP"

# Clone data structure to perform the fitting with the best hyperparameters on train + validation data
def run_train_with_early_stopping(URM_train_tuning_only,
                               URM_train_full,
                               evaluator_validation,
                               evaluator_test,
                               CFM_data_class_validation,
                               CFM_data_class_full,
                               pretrained_FM_folder_path,
                               output_folder_path,
                               permutation_index,
                               map_mode,
                               metric_to_optimize):

    output_folder_path_permutation = output_folder_path + "fit_ablation_{}/{}_{}/".format(map_mode, map_mode, permutation_index)

    # If directory does not exist, create
    if not os.path.exists(output_folder_path_permutation):
        os.makedirs(output_folder_path_permutation)

    if os.path.isfile(output_folder_path_permutation + CFM_wrapper.RECOMMENDER_NAME + "_metadata.zip"):
        return

    article_hyperparameters = {
        'pretrain_flag': 1,
        'pretrained_FM_folder_path': pretrained_FM_folder_path,
        'hidden_factor': 64,
        'epochs': 300,
        'batch_size': 256,
        'learning_rate': 0.01,
        'lamda_bilinear': 0,
        'keep': 0.8,
        'optimizer_type': 'AdagradOptimizer',
        'batch_norm': 0,
        'verbose': False,
        'regs': '[10,1]',
        'attention_size': 32,
        'attentive_pooling': False,
        'net_channel': '[32,32,32,32,32,32]',
        'num_field': 4,
        'permutation': list(permutation),
        'map_mode': map_mode
    }



    earlystopping_hyperparameters = {
        "epochs_min": int(article_hyperparameters["epochs"]/2),
        "validation_every_n": 5,
        "stop_on_validation": True,
        "lower_validations_allowed": 5,
        "evaluator_object": evaluator_validation,
        "validation_metric": metric_to_optimize
    }

    # Due to the extremely long evaluation time it is computationally too expensive to run earlystopping on all
    # permutations (estimated >60 days on high end GPU)
    # So, select the epochs only at permutation 0 independently for each of the three modes: "all_map", "main_diagonal", "off_diagonal"

    # try to load selected number of epochs, if not present run earlystopping again
    folder_permutation_0 = output_folder_path + "fit_ablation_{}/{}_{}/".format(map_mode, map_mode, 0)

    if permutation_index == 0:

        parameterSearch = SearchSingleCase(CFM_wrapper,
                                           evaluator_validation=evaluator_validation,
                                           evaluator_test=evaluator_test)

        recommender_input_args = SearchInputRecommenderArgs(CONSTRUCTOR_POSITIONAL_ARGS=[URM_train_tuning_only, CFM_data_class_validation],
                                                            FIT_KEYWORD_ARGS=earlystopping_hyperparameters)

        recommender_input_args_last_test = SearchInputRecommenderArgs(CONSTRUCTOR_POSITIONAL_ARGS=[URM_train_full, CFM_data_class_full])

        parameterSearch.search(recommender_input_args,
                               recommender_input_args_last_test=recommender_input_args_last_test,
                               fit_hyperparameters_values=article_hyperparameters,
                               output_folder_path=output_folder_path_permutation,
                               output_file_name_root=CFM_wrapper.RECOMMENDER_NAME,
                               save_model = "last",
                               resume_from_saved=True,
                               evaluate_on_test = "last")



    else:


        dataIO = DataIO(folder_path = folder_permutation_0)
        data_dict = dataIO.load_data(file_name = CFM_wrapper.RECOMMENDER_NAME + "_metadata.zip")

        selected_epochs = data_dict["hyperparameters_best"]["epochs"]

        article_hyperparameters["epochs"] = selected_epochs

        parameterSearch = SearchSingleCase(CFM_wrapper,
                                           evaluator_validation=evaluator_test,
                                           evaluator_test=evaluator_test)

        recommender_input_args_last_test = SearchInputRecommenderArgs(CONSTRUCTOR_POSITIONAL_ARGS=[URM_train_full, CFM_data_class_full])

        parameterSearch.search(recommender_input_args_last_test,
                               recommender_input_args_last_test=None,
                               fit_hyperparameters_values=article_hyperparameters,
                               output_folder_path=output_folder_path_permutation,
                               output_file_name_root=CFM_wrapper.RECOMMENDER_NAME,
                               save_model = "best",
                               resume_from_saved=True,
                               evaluate_on_test = "best")



        # Get the data in the correct format to be readable for the data parsing script
        # Put the results in the "result_on_last" field of the metadata file
        # Change the final model file name into the _best_model_last suffix

        metadata_file_name = CFM_wrapper.RECOMMENDER_NAME + "_metadata.zip"

        dataIO = DataIO(folder_path = output_folder_path_permutation)
        search_metadata = dataIO.load_data(file_name = metadata_file_name)

        search_metadata["result_on_last"] = search_metadata["result_on_test_best"]
        dataIO.save_data(file_name = metadata_file_name, data_dict_to_save = search_metadata)

        recommender_object = CFM_wrapper(URM_train_full, CFM_data_class_full)
        recommender_object.load_model(output_folder_path_permutation,
                                      file_name=CFM_wrapper.RECOMMENDER_NAME + "_best_model")

        recommender_object.save_model(output_folder_path_permutation,
                                      file_name=CFM_wrapper.RECOMMENDER_NAME + "_best_model_last")
Beispiel #10
0
def runParameterSearch_Collaborative(recommender_class,
                                     URM_train,
                                     URM_train_last_test=None,
                                     metric_to_optimize="PRECISION",
                                     evaluator_validation=None,
                                     evaluator_test=None,
                                     evaluator_validation_earlystopping=None,
                                     output_folder_path="result_experiments/",
                                     parallelizeKNN=True,
                                     n_cases=35,
                                     n_random_starts=5,
                                     resume_from_saved=False,
                                     save_model="best",
                                     allow_weighting=True,
                                     similarity_type_list=None):

    # If directory does not exist, create
    if not os.path.exists(output_folder_path):
        os.makedirs(output_folder_path)

    earlystopping_keywargs = {
        "validation_every_n": 5,
        "stop_on_validation": True,
        "evaluator_object": evaluator_validation_earlystopping,
        "lower_validations_allowed": 5,
        "validation_metric": metric_to_optimize,
    }

    URM_train = URM_train.copy()

    if URM_train_last_test is not None:
        URM_train_last_test = URM_train_last_test.copy()

    try:

        output_file_name_root = recommender_class.RECOMMENDER_NAME

        parameterSearch = SearchBayesianSkopt(
            recommender_class,
            evaluator_validation=evaluator_validation,
            evaluator_test=evaluator_test)

        if recommender_class in [TopPop, GlobalEffects, Random]:
            """
            TopPop, GlobalEffects and Random have no parameters therefore only one evaluation is needed
            """

            parameterSearch = SearchSingleCase(
                recommender_class,
                evaluator_validation=evaluator_validation,
                evaluator_test=evaluator_test)

            recommender_input_args = SearchInputRecommenderArgs(
                CONSTRUCTOR_POSITIONAL_ARGS=[URM_train],
                CONSTRUCTOR_KEYWORD_ARGS={},
                FIT_POSITIONAL_ARGS=[],
                FIT_KEYWORD_ARGS={})

            if URM_train_last_test is not None:
                recommender_input_args_last_test = recommender_input_args.copy(
                )
                recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[
                    0] = URM_train_last_test
            else:
                recommender_input_args_last_test = None

            parameterSearch.search(
                recommender_input_args,
                recommender_input_args_last_test=
                recommender_input_args_last_test,
                fit_hyperparameters_values={},
                output_folder_path=output_folder_path,
                output_file_name_root=output_file_name_root,
                resume_from_saved=resume_from_saved,
                save_model=save_model,
            )

            return

        ##########################################################################################################

        if recommender_class in [ItemKNNCFRecommender, UserKNNCFRecommender]:

            if similarity_type_list is None:
                similarity_type_list = [
                    'cosine', 'jaccard', "asymmetric", "dice", "tversky"
                ]

            recommender_input_args = SearchInputRecommenderArgs(
                CONSTRUCTOR_POSITIONAL_ARGS=[URM_train],
                CONSTRUCTOR_KEYWORD_ARGS={},
                FIT_POSITIONAL_ARGS=[],
                FIT_KEYWORD_ARGS={})

            if URM_train_last_test is not None:
                recommender_input_args_last_test = recommender_input_args.copy(
                )
                recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[
                    0] = URM_train_last_test
            else:
                recommender_input_args_last_test = None

            run_KNNCFRecommender_on_similarity_type_partial = partial(
                run_KNNRecommender_on_similarity_type,
                recommender_input_args=recommender_input_args,
                parameter_search_space={},
                parameterSearch=parameterSearch,
                n_cases=n_cases,
                n_random_starts=n_random_starts,
                resume_from_saved=resume_from_saved,
                save_model=save_model,
                output_folder_path=output_folder_path,
                output_file_name_root=output_file_name_root,
                metric_to_optimize=metric_to_optimize,
                allow_weighting=allow_weighting,
                recommender_input_args_last_test=
                recommender_input_args_last_test)

            if parallelizeKNN:
                pool = multiprocessing.Pool(
                    processes=multiprocessing.cpu_count(), maxtasksperchild=1)
                pool.map(run_KNNCFRecommender_on_similarity_type_partial,
                         similarity_type_list)

                pool.close()
                pool.join()

            else:

                for similarity_type in similarity_type_list:
                    run_KNNCFRecommender_on_similarity_type_partial(
                        similarity_type)

            return

    ##########################################################################################################

        if recommender_class is P3alphaRecommender:

            hyperparameters_range_dictionary = {}
            hyperparameters_range_dictionary["topK"] = Integer(5, 1000)
            hyperparameters_range_dictionary["alpha"] = Real(low=0,
                                                             high=2,
                                                             prior='uniform')
            hyperparameters_range_dictionary[
                "normalize_similarity"] = Categorical([True, False])

            recommender_input_args = SearchInputRecommenderArgs(
                CONSTRUCTOR_POSITIONAL_ARGS=[URM_train],
                CONSTRUCTOR_KEYWORD_ARGS={},
                FIT_POSITIONAL_ARGS=[],
                FIT_KEYWORD_ARGS={})

        ##########################################################################################################

        if recommender_class is RP3betaRecommender:

            hyperparameters_range_dictionary = {}
            hyperparameters_range_dictionary["topK"] = Integer(5, 1000)
            hyperparameters_range_dictionary["alpha"] = Real(low=0,
                                                             high=2,
                                                             prior='uniform')
            hyperparameters_range_dictionary["beta"] = Real(low=0,
                                                            high=2,
                                                            prior='uniform')
            hyperparameters_range_dictionary[
                "normalize_similarity"] = Categorical([True, False])

            recommender_input_args = SearchInputRecommenderArgs(
                CONSTRUCTOR_POSITIONAL_ARGS=[URM_train],
                CONSTRUCTOR_KEYWORD_ARGS={},
                FIT_POSITIONAL_ARGS=[],
                FIT_KEYWORD_ARGS={})

        ##########################################################################################################

        if recommender_class is MatrixFactorization_FunkSVD_Cython:

            hyperparameters_range_dictionary = {}
            hyperparameters_range_dictionary["sgd_mode"] = Categorical(
                ["sgd", "adagrad", "adam"])
            hyperparameters_range_dictionary["epochs"] = Categorical([500])
            hyperparameters_range_dictionary["use_bias"] = Categorical(
                [True, False])
            hyperparameters_range_dictionary["batch_size"] = Categorical(
                [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024])
            hyperparameters_range_dictionary["num_factors"] = Integer(1, 200)
            hyperparameters_range_dictionary["item_reg"] = Real(
                low=1e-5, high=1e-2, prior='log-uniform')
            hyperparameters_range_dictionary["user_reg"] = Real(
                low=1e-5, high=1e-2, prior='log-uniform')
            hyperparameters_range_dictionary["learning_rate"] = Real(
                low=1e-4, high=1e-1, prior='log-uniform')
            hyperparameters_range_dictionary[
                "negative_interactions_quota"] = Real(low=0.0,
                                                      high=0.5,
                                                      prior='uniform')

            recommender_input_args = SearchInputRecommenderArgs(
                CONSTRUCTOR_POSITIONAL_ARGS=[URM_train],
                CONSTRUCTOR_KEYWORD_ARGS={},
                FIT_POSITIONAL_ARGS=[],
                FIT_KEYWORD_ARGS=earlystopping_keywargs)

        ##########################################################################################################

        if recommender_class is MatrixFactorization_AsySVD_Cython:

            hyperparameters_range_dictionary = {}
            hyperparameters_range_dictionary["sgd_mode"] = Categorical(
                ["sgd", "adagrad", "adam"])
            hyperparameters_range_dictionary["epochs"] = Categorical([500])
            hyperparameters_range_dictionary["use_bias"] = Categorical(
                [True, False])
            hyperparameters_range_dictionary["batch_size"] = Categorical([1])
            hyperparameters_range_dictionary["num_factors"] = Integer(1, 200)
            hyperparameters_range_dictionary["item_reg"] = Real(
                low=1e-5, high=1e-2, prior='log-uniform')
            hyperparameters_range_dictionary["user_reg"] = Real(
                low=1e-5, high=1e-2, prior='log-uniform')
            hyperparameters_range_dictionary["learning_rate"] = Real(
                low=1e-4, high=1e-1, prior='log-uniform')
            hyperparameters_range_dictionary[
                "negative_interactions_quota"] = Real(low=0.0,
                                                      high=0.5,
                                                      prior='uniform')

            recommender_input_args = SearchInputRecommenderArgs(
                CONSTRUCTOR_POSITIONAL_ARGS=[URM_train],
                CONSTRUCTOR_KEYWORD_ARGS={},
                FIT_POSITIONAL_ARGS=[],
                FIT_KEYWORD_ARGS=earlystopping_keywargs)

        ##########################################################################################################

        if recommender_class is MatrixFactorization_BPR_Cython:

            hyperparameters_range_dictionary = {}
            hyperparameters_range_dictionary["sgd_mode"] = Categorical(
                ["sgd", "adagrad", "adam"])
            hyperparameters_range_dictionary["epochs"] = Categorical([1500])
            hyperparameters_range_dictionary["num_factors"] = Integer(1, 200)
            hyperparameters_range_dictionary["batch_size"] = Categorical(
                [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024])
            hyperparameters_range_dictionary["positive_reg"] = Real(
                low=1e-5, high=1e-2, prior='log-uniform')
            hyperparameters_range_dictionary["negative_reg"] = Real(
                low=1e-5, high=1e-2, prior='log-uniform')
            hyperparameters_range_dictionary["learning_rate"] = Real(
                low=1e-4, high=1e-1, prior='log-uniform')

            recommender_input_args = SearchInputRecommenderArgs(
                CONSTRUCTOR_POSITIONAL_ARGS=[URM_train],
                CONSTRUCTOR_KEYWORD_ARGS={},
                FIT_POSITIONAL_ARGS=[],
                FIT_KEYWORD_ARGS={
                    **earlystopping_keywargs, "positive_threshold_BPR": None
                })

        ##########################################################################################################

        if recommender_class is IALSRecommender:

            hyperparameters_range_dictionary = {}
            hyperparameters_range_dictionary["num_factors"] = Integer(1, 200)
            hyperparameters_range_dictionary[
                "confidence_scaling"] = Categorical(["linear", "log"])
            hyperparameters_range_dictionary["alpha"] = Real(
                low=1e-3, high=50.0, prior='log-uniform')
            hyperparameters_range_dictionary["epsilon"] = Real(
                low=1e-3, high=10.0, prior='log-uniform')
            hyperparameters_range_dictionary["reg"] = Real(low=1e-5,
                                                           high=1e-2,
                                                           prior='log-uniform')

            recommender_input_args = SearchInputRecommenderArgs(
                CONSTRUCTOR_POSITIONAL_ARGS=[URM_train],
                CONSTRUCTOR_KEYWORD_ARGS={},
                FIT_POSITIONAL_ARGS=[],
                FIT_KEYWORD_ARGS=earlystopping_keywargs)

        ##########################################################################################################

        if recommender_class is PureSVDRecommender:

            hyperparameters_range_dictionary = {}
            hyperparameters_range_dictionary["num_factors"] = Integer(1, 350)

            recommender_input_args = SearchInputRecommenderArgs(
                CONSTRUCTOR_POSITIONAL_ARGS=[URM_train],
                CONSTRUCTOR_KEYWORD_ARGS={},
                FIT_POSITIONAL_ARGS=[],
                FIT_KEYWORD_ARGS={})

        ##########################################################################################################

        if recommender_class is NMFRecommender:

            hyperparameters_range_dictionary = {}
            hyperparameters_range_dictionary["num_factors"] = Integer(1, 350)
            hyperparameters_range_dictionary["solver"] = Categorical(
                ["coordinate_descent", "multiplicative_update"])
            hyperparameters_range_dictionary["init_type"] = Categorical(
                ["random", "nndsvda"])
            hyperparameters_range_dictionary["beta_loss"] = Categorical(
                ["frobenius", "kullback-leibler"])

            recommender_input_args = SearchInputRecommenderArgs(
                CONSTRUCTOR_POSITIONAL_ARGS=[URM_train],
                CONSTRUCTOR_KEYWORD_ARGS={},
                FIT_POSITIONAL_ARGS=[],
                FIT_KEYWORD_ARGS={})

        #########################################################################################################

        if recommender_class is SLIM_BPR_Cython:

            hyperparameters_range_dictionary = {}
            hyperparameters_range_dictionary["topK"] = Integer(5, 1000)
            hyperparameters_range_dictionary["epochs"] = Categorical([1500])
            hyperparameters_range_dictionary["symmetric"] = Categorical(
                [True, False])
            hyperparameters_range_dictionary["sgd_mode"] = Categorical(
                ["sgd", "adagrad", "adam"])
            hyperparameters_range_dictionary["lambda_i"] = Real(
                low=1e-5, high=1e-2, prior='log-uniform')
            hyperparameters_range_dictionary["lambda_j"] = Real(
                low=1e-5, high=1e-2, prior='log-uniform')
            hyperparameters_range_dictionary["learning_rate"] = Real(
                low=1e-4, high=1e-1, prior='log-uniform')

            recommender_input_args = SearchInputRecommenderArgs(
                CONSTRUCTOR_POSITIONAL_ARGS=[URM_train],
                CONSTRUCTOR_KEYWORD_ARGS={},
                FIT_POSITIONAL_ARGS=[],
                FIT_KEYWORD_ARGS={
                    **earlystopping_keywargs, "positive_threshold_BPR": None,
                    'train_with_sparse_weights': None
                })

        ##########################################################################################################

        if recommender_class is SLIMElasticNetRecommender:

            hyperparameters_range_dictionary = {}
            hyperparameters_range_dictionary["topK"] = Integer(5, 1000)
            hyperparameters_range_dictionary["l1_ratio"] = Real(
                low=1e-5, high=1.0, prior='log-uniform')
            hyperparameters_range_dictionary["alpha"] = Real(low=1e-3,
                                                             high=1.0,
                                                             prior='uniform')

            recommender_input_args = SearchInputRecommenderArgs(
                CONSTRUCTOR_POSITIONAL_ARGS=[URM_train],
                CONSTRUCTOR_KEYWORD_ARGS={},
                FIT_POSITIONAL_ARGS=[],
                FIT_KEYWORD_ARGS={})

    #########################################################################################################

        if URM_train_last_test is not None:
            recommender_input_args_last_test = recommender_input_args.copy()
            recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[
                0] = URM_train_last_test
        else:
            recommender_input_args_last_test = None

        ## Final step, after the hyperparameter range has been defined for each type of algorithm
        parameterSearch.search(
            recommender_input_args,
            parameter_search_space=hyperparameters_range_dictionary,
            n_cases=n_cases,
            n_random_starts=n_random_starts,
            resume_from_saved=resume_from_saved,
            save_model=save_model,
            output_folder_path=output_folder_path,
            output_file_name_root=output_file_name_root,
            metric_to_optimize=metric_to_optimize,
            recommender_input_args_last_test=recommender_input_args_last_test)

    except Exception as e:

        print("On recommender {} Exception {}".format(recommender_class,
                                                      str(e)))
        traceback.print_exc()

        error_file = open(output_folder_path + "ErrorLog.txt", "a")
        error_file.write("On recommender {} Exception {}\n".format(
            recommender_class, str(e)))
        error_file.close()
            URM_train, URM_validation, URM_test = dataSplitter_fold.get_holdout_split()
            UCM_CoupledCF = dataSplitter_fold.get_UCM_from_name("UCM_all")
            ICM_CoupledCF = dataSplitter_fold.get_ICM_from_name("ICM_all")

            # Ensure negative items are consistent with positive items, accounting for removed cold users
            URM_test_negative_fold = get_URM_negatives_without_cold_users(dataSplitter_fold.removed_cold_users, URM_test_negative)

            # ensure IMPLICIT data
            assert_implicit_data([URM_train, URM_validation, URM_test, URM_test_negative_fold])
            assert_disjoint_matrices([URM_train, URM_validation, URM_test])

            evaluator_validation = EvaluatorNegativeItemSample(URM_validation, URM_test_negative_fold, cutoff_list=cutoff_list_validation)
            evaluator_test = EvaluatorNegativeItemSample(URM_test, URM_test_negative_fold, cutoff_list=cutoff_list_test)
            
            recommender_input_args = SearchInputRecommenderArgs(CONSTRUCTOR_POSITIONAL_ARGS=[URM_train, UCM_CoupledCF, ICM_CoupledCF])


            # Ablation with training on selected mode
            for map_mode in ["all_map", "main_diagonal", "off_diagonal"]:

                result_folder_path = os.path.join(output_folder_path, "fit_ablation_{}/{}_{}/".format(map_mode, map_mode, fold_index))

                search_metadata = run_train_with_early_stopping(input_flags.dataset_name,
                                                                URM_train, URM_validation,
                                                                UCM_CoupledCF, ICM_CoupledCF,
                                                                evaluator_validation,
                                                                evaluator_test,
                                                                metric_to_optimize,
                                                                result_folder_path,
                                                                map_mode = map_mode)
Beispiel #12
0
def read_data_split_and_search(dataset_name,
                               flag_baselines_tune=False,
                               flag_DL_article_default=False,
                               flag_DL_tune=False,
                               flag_print_results=False):

    from Conferences.WWW.NeuMF_our_interface.Movielens1M.Movielens1MReader import Movielens1MReader
    from Conferences.WWW.NeuMF_our_interface.Pinterest.PinterestICCVReader import PinterestICCVReader

    result_folder_path = "result_experiments/{}/{}_{}/".format(
        CONFERENCE_NAME, ALGORITHM_NAME, dataset_name)

    if dataset_name == "movielens1m":
        dataset = Movielens1MReader(result_folder_path)

    elif dataset_name == "pinterest":
        dataset = PinterestICCVReader(result_folder_path)

    URM_train = dataset.URM_DICT["URM_train"].copy()
    URM_validation = dataset.URM_DICT["URM_validation"].copy()
    URM_test = dataset.URM_DICT["URM_test"].copy()
    URM_test_negative = dataset.URM_DICT["URM_test_negative"].copy()

    # Ensure IMPLICIT data and DISJOINT sets
    assert_implicit_data(
        [URM_train, URM_validation, URM_test, URM_test_negative])

    assert_disjoint_matrices([URM_train, URM_validation, URM_test])
    assert_disjoint_matrices([URM_train, URM_validation, URM_test_negative])

    # If directory does not exist, create
    if not os.path.exists(result_folder_path):
        os.makedirs(result_folder_path)

    algorithm_dataset_string = "{}_{}_".format(ALGORITHM_NAME, dataset_name)

    plot_popularity_bias([URM_train + URM_validation, URM_test],
                         ["Training data", "Test data"], result_folder_path +
                         algorithm_dataset_string + "popularity_plot")

    save_popularity_statistics([
        URM_train + URM_validation + URM_test, URM_train + URM_validation,
        URM_test
    ], ["Full data", "Training data", "Test data"],
                               result_folder_path + algorithm_dataset_string +
                               "popularity_statistics")

    collaborative_algorithm_list = [
        Random,
        TopPop,
        UserKNNCFRecommender,
        ItemKNNCFRecommender,
        P3alphaRecommender,
        RP3betaRecommender,
        PureSVDRecommender,
        NMFRecommender,
        IALSRecommender,
        MatrixFactorization_BPR_Cython,
        MatrixFactorization_FunkSVD_Cython,
        EASE_R_Recommender,
        SLIM_BPR_Cython,
        SLIMElasticNetRecommender,
    ]

    metric_to_optimize = "HIT_RATE"
    n_cases = 50
    n_random_starts = 15

    from Base.Evaluation.Evaluator import EvaluatorNegativeItemSample

    evaluator_validation = EvaluatorNegativeItemSample(URM_validation,
                                                       URM_test_negative,
                                                       cutoff_list=[10])
    evaluator_test = EvaluatorNegativeItemSample(
        URM_test,
        URM_test_negative,
        cutoff_list=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10])

    runParameterSearch_Collaborative_partial = partial(
        runParameterSearch_Collaborative,
        URM_train=URM_train,
        URM_train_last_test=URM_train + URM_validation,
        metric_to_optimize=metric_to_optimize,
        evaluator_validation_earlystopping=evaluator_validation,
        evaluator_validation=evaluator_validation,
        evaluator_test=evaluator_test,
        output_folder_path=result_folder_path,
        parallelizeKNN=False,
        allow_weighting=True,
        resume_from_saved=True,
        n_cases=n_cases,
        n_random_starts=n_random_starts)

    if flag_baselines_tune:

        for recommender_class in collaborative_algorithm_list:
            try:
                runParameterSearch_Collaborative_partial(recommender_class)
            except Exception as e:
                print("On recommender {} Exception {}".format(
                    recommender_class, str(e)))
                traceback.print_exc()

    ################################################################################################
    ######
    ######      DL ALGORITHM
    ######

    if flag_DL_article_default:

        try:

            if dataset_name == "movielens1m":
                num_factors = 64
            elif dataset_name == "pinterest":
                num_factors = 16

            neuMF_article_hyperparameters = {
                "epochs": 100,
                "epochs_gmf": 100,
                "epochs_mlp": 100,
                "batch_size": 256,
                "num_factors": num_factors,
                "layers": [num_factors * 4, num_factors * 2, num_factors],
                "reg_mf": 0.0,
                "reg_layers": [0, 0, 0],
                "num_negatives": 4,
                "learning_rate": 1e-3,
                "learning_rate_pretrain": 1e-3,
                "learner": "sgd",
                "learner_pretrain": "adam",
                "pretrain": True
            }

            neuMF_earlystopping_hyperparameters = {
                "validation_every_n": 5,
                "stop_on_validation": True,
                "evaluator_object": evaluator_validation,
                "lower_validations_allowed": 5,
                "validation_metric": metric_to_optimize
            }

            parameterSearch = SearchSingleCase(
                NeuMF_RecommenderWrapper,
                evaluator_validation=evaluator_validation,
                evaluator_test=evaluator_test)

            recommender_input_args = SearchInputRecommenderArgs(
                CONSTRUCTOR_POSITIONAL_ARGS=[URM_train],
                FIT_KEYWORD_ARGS=neuMF_earlystopping_hyperparameters)

            recommender_input_args_last_test = recommender_input_args.copy()
            recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[
                0] = URM_train + URM_validation

            parameterSearch.search(
                recommender_input_args,
                recommender_input_args_last_test=
                recommender_input_args_last_test,
                fit_hyperparameters_values=neuMF_article_hyperparameters,
                output_folder_path=result_folder_path,
                resume_from_saved=True,
                output_file_name_root=NeuMF_RecommenderWrapper.RECOMMENDER_NAME
            )

        except Exception as e:

            print("On recommender {} Exception {}".format(
                NeuMF_RecommenderWrapper, str(e)))
            traceback.print_exc()

    ################################################################################################
    ######
    ######      PRINT RESULTS
    ######

    if flag_print_results:

        n_test_users = np.sum(np.ediff1d(URM_test.indptr) >= 1)
        file_name = "{}..//{}_{}_".format(result_folder_path, ALGORITHM_NAME,
                                          dataset_name)

        result_loader = ResultFolderLoader(
            result_folder_path,
            base_algorithm_list=None,
            other_algorithm_list=[NeuMF_RecommenderWrapper],
            KNN_similarity_list=KNN_similarity_to_report_list,
            ICM_names_list=None,
            UCM_names_list=None)

        result_loader.generate_latex_results(
            file_name + "{}_latex_results.txt".format("article_metrics"),
            metrics_list=["HIT_RATE", "NDCG"],
            cutoffs_list=[1, 5, 10],
            table_title=None,
            highlight_best=True)

        result_loader.generate_latex_results(
            file_name + "{}_latex_results.txt".format("all_metrics"),
            metrics_list=[
                "PRECISION", "RECALL", "MAP", "MRR", "NDCG", "F1", "HIT_RATE",
                "ARHR", "NOVELTY", "DIVERSITY_MEAN_INTER_LIST",
                "DIVERSITY_HERFINDAHL", "COVERAGE_ITEM", "DIVERSITY_GINI",
                "SHANNON_ENTROPY"
            ],
            cutoffs_list=[10],
            table_title=None,
            highlight_best=True)

        result_loader.generate_latex_time_statistics(
            file_name + "{}_latex_results.txt".format("time"),
            n_evaluation_users=n_test_users,
            table_title=None)
Beispiel #13
0
def read_data_split_and_search(dataset_variant,
                               train_interactions,
                               flag_baselines_tune=False,
                               flag_DL_article_default=False,
                               flag_DL_tune=False,
                               flag_print_results=False):

    from Conferences.KDD.CollaborativeVAE_our_interface.Citeulike.CiteulikeReader import CiteulikeReader

    result_folder_path = "result_experiments/{}/{}_citeulike_{}_{}/".format(
        CONFERENCE_NAME, ALGORITHM_NAME, dataset_variant, train_interactions)

    dataset = CiteulikeReader(result_folder_path,
                              dataset_variant=dataset_variant,
                              train_interactions=train_interactions)

    URM_train = dataset.URM_DICT["URM_train"].copy()
    URM_validation = dataset.URM_DICT["URM_validation"].copy()
    URM_test = dataset.URM_DICT["URM_test"].copy()
    del dataset.ICM_DICT["ICM_tokens_bool"]

    # Ensure IMPLICIT data
    assert_implicit_data([URM_train, URM_validation, URM_test])

    # Due to the sparsity of the dataset, choosing an evaluation as subset of the train
    # While keeping validation interaction in the train set
    if train_interactions == 1:
        # In this case the train data will contain validation data to avoid cold users
        assert_disjoint_matrices([URM_train, URM_test])
        assert_disjoint_matrices([URM_validation, URM_test])
        exclude_seen_validation = False
        URM_train_last_test = URM_train
    else:
        assert_disjoint_matrices([URM_train, URM_validation, URM_test])
        exclude_seen_validation = True
        URM_train_last_test = URM_train + URM_validation

    assert_implicit_data([URM_train_last_test])

    # If directory does not exist, create
    if not os.path.exists(result_folder_path):
        os.makedirs(result_folder_path)

    from Base.Evaluation.Evaluator import EvaluatorHoldout

    evaluator_validation = EvaluatorHoldout(
        URM_validation,
        cutoff_list=[150],
        exclude_seen=exclude_seen_validation)
    evaluator_test = EvaluatorHoldout(
        URM_test, cutoff_list=[50, 100, 150, 200, 250, 300])

    collaborative_algorithm_list = [
        Random,
        TopPop,
        UserKNNCFRecommender,
        ItemKNNCFRecommender,
        P3alphaRecommender,
        RP3betaRecommender,
        PureSVDRecommender,
        NMFRecommender,
        IALSRecommender,
        MatrixFactorization_BPR_Cython,
        MatrixFactorization_FunkSVD_Cython,
        EASE_R_Recommender,
        SLIM_BPR_Cython,
        SLIMElasticNetRecommender,
    ]

    metric_to_optimize = "RECALL"
    n_cases = 50
    n_random_starts = 15

    runParameterSearch_Collaborative_partial = partial(
        runParameterSearch_Collaborative,
        URM_train=URM_train,
        URM_train_last_test=URM_train_last_test,
        metric_to_optimize=metric_to_optimize,
        evaluator_validation_earlystopping=evaluator_validation,
        evaluator_validation=evaluator_validation,
        evaluator_test=evaluator_test,
        output_folder_path=result_folder_path,
        parallelizeKNN=False,
        allow_weighting=True,
        resume_from_saved=True,
        n_cases=n_cases,
        n_random_starts=n_random_starts)

    if flag_baselines_tune:

        for recommender_class in collaborative_algorithm_list:
            try:
                runParameterSearch_Collaborative_partial(recommender_class)
            except Exception as e:
                print("On recommender {} Exception {}".format(
                    recommender_class, str(e)))
                traceback.print_exc()

        ################################################################################################
        ###### Content Baselines

        for ICM_name, ICM_object in dataset.ICM_DICT.items():

            try:

                runParameterSearch_Content(
                    ItemKNNCBFRecommender,
                    URM_train=URM_train,
                    URM_train_last_test=URM_train_last_test,
                    metric_to_optimize=metric_to_optimize,
                    evaluator_validation=evaluator_validation,
                    evaluator_test=evaluator_test,
                    output_folder_path=result_folder_path,
                    parallelizeKNN=False,
                    allow_weighting=True,
                    resume_from_saved=True,
                    ICM_name=ICM_name,
                    ICM_object=ICM_object.copy(),
                    n_cases=n_cases,
                    n_random_starts=n_random_starts)

            except Exception as e:

                print("On CBF recommender for ICM {} Exception {}".format(
                    ICM_name, str(e)))
                traceback.print_exc()

        ################################################################################################
        ###### Hybrid

        for ICM_name, ICM_object in dataset.ICM_DICT.items():

            try:

                runParameterSearch_Hybrid(
                    ItemKNN_CFCBF_Hybrid_Recommender,
                    URM_train=URM_train,
                    URM_train_last_test=URM_train_last_test,
                    metric_to_optimize=metric_to_optimize,
                    evaluator_validation=evaluator_validation,
                    evaluator_test=evaluator_test,
                    output_folder_path=result_folder_path,
                    parallelizeKNN=False,
                    allow_weighting=True,
                    resume_from_saved=True,
                    ICM_name=ICM_name,
                    ICM_object=ICM_object.copy(),
                    n_cases=n_cases,
                    n_random_starts=n_random_starts)

            except Exception as e:

                print("On recommender {} Exception {}".format(
                    ItemKNN_CFCBF_Hybrid_Recommender, str(e)))
                traceback.print_exc()

    ################################################################################################
    ######
    ######      DL ALGORITHM
    ######

    if flag_DL_article_default:

        try:

            cvae_recommender_article_hyperparameters = {
                "epochs": 200,
                "learning_rate_vae": 1e-2,
                "learning_rate_cvae": 1e-3,
                "num_factors": 50,
                "dimensions_vae": [200, 100],
                "epochs_vae": [50, 50],
                "batch_size": 128,
                "lambda_u": 0.1,
                "lambda_v": 10,
                "lambda_r": 1,
                "a": 1,
                "b": 0.01,
                "M": 300,
            }

            cvae_earlystopping_hyperparameters = {
                "validation_every_n": 5,
                "stop_on_validation": True,
                "evaluator_object": evaluator_validation,
                "lower_validations_allowed": 5,
                "validation_metric": metric_to_optimize
            }

            parameterSearch = SearchSingleCase(
                CollaborativeVAE_RecommenderWrapper,
                evaluator_validation=evaluator_validation,
                evaluator_test=evaluator_test)

            recommender_input_args = SearchInputRecommenderArgs(
                CONSTRUCTOR_POSITIONAL_ARGS=[
                    URM_train, dataset.ICM_DICT["ICM_tokens_TFIDF"]
                ],
                FIT_KEYWORD_ARGS=cvae_earlystopping_hyperparameters)

            recommender_input_args_last_test = recommender_input_args.copy()
            recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[
                0] = URM_train_last_test

            parameterSearch.search(
                recommender_input_args,
                recommender_input_args_last_test=
                recommender_input_args_last_test,
                fit_hyperparameters_values=
                cvae_recommender_article_hyperparameters,
                output_folder_path=result_folder_path,
                resume_from_saved=True,
                output_file_name_root=CollaborativeVAE_RecommenderWrapper.
                RECOMMENDER_NAME)

        except Exception as e:

            print("On recommender {} Exception {}".format(
                CollaborativeVAE_RecommenderWrapper, str(e)))
            traceback.print_exc()

    ################################################################################################
    ######
    ######      PRINT RESULTS
    ######

    if flag_print_results:

        n_test_users = np.sum(np.ediff1d(URM_test.indptr) >= 1)
        ICM_names_to_report_list = list(dataset.ICM_DICT.keys())
        dataset_name = "{}_{}".format(dataset_variant, train_interactions)
        file_name = "{}..//{}_{}_".format(result_folder_path, ALGORITHM_NAME,
                                          dataset_name)

        result_loader = ResultFolderLoader(
            result_folder_path,
            base_algorithm_list=None,
            other_algorithm_list=other_algorithm_list,
            KNN_similarity_list=KNN_similarity_to_report_list,
            ICM_names_list=ICM_names_to_report_list,
            UCM_names_list=None)

        result_loader.generate_latex_results(
            file_name + "{}_latex_results.txt".format("article_metrics"),
            metrics_list=["RECALL"],
            cutoffs_list=[50, 100, 150, 200, 250, 300],
            table_title=None,
            highlight_best=True)

        result_loader.generate_latex_results(
            file_name + "{}_latex_results.txt".format("all_metrics"),
            metrics_list=[
                "PRECISION", "RECALL", "MAP_MIN_DEN", "MRR", "NDCG", "F1",
                "HIT_RATE", "ARHR_ALL_HITS", "NOVELTY",
                "DIVERSITY_MEAN_INTER_LIST", "DIVERSITY_HERFINDAHL",
                "COVERAGE_ITEM", "DIVERSITY_GINI", "SHANNON_ENTROPY"
            ],
            cutoffs_list=[150],
            table_title=None,
            highlight_best=True)

        result_loader.generate_latex_time_statistics(
            file_name + "{}_latex_results.txt".format("time"),
            n_evaluation_users=n_test_users,
            table_title=None)
Beispiel #14
0
    hyperparameters_range_dictionary = {}
    hyperparameters_range_dictionary["alpha"] = Categorical(weight_list)
    hyperparameters_range_dictionary["beta"] = Categorical(weight_list)
    hyperparameters_range_dictionary["gamma"] = Categorical(weight_list)
    hyperparameters_range_dictionary["delta"] = Categorical(weight_list)
    hyperparameters_range_dictionary["epsilon"] = Categorical([
        0.1, 0.2, 0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6, 0.65, 0.7, 0.8, 0.9, 1,
        2
    ])
    hyperparameters_range_dictionary["zeta"] = Categorical(weight_list)

    recommender_input_args = SearchInputRecommenderArgs(
        CONSTRUCTOR_POSITIONAL_ARGS=[
            URM_train, itemKNNCF, recommenderELASTIC, recommenderCB,
            recommenderBetaGRAPH, recommederUserKNN, recommenderCYTHON
        ],
        CONSTRUCTOR_KEYWORD_ARGS={},
        FIT_POSITIONAL_ARGS=[],
        FIT_KEYWORD_ARGS={})
    output_folder_path = "result_experiments/"

    # If directory does not exist, create
    if not os.path.exists(output_folder_path):
        os.makedirs(output_folder_path)

    # RUN
    n_cases = 45
    metric_to_optimize = "MAP"
    parameterSearch.search(
        recommender_input_args,
        parameter_search_space=hyperparameters_range_dictionary,
def runParameterSearch_Content(recommender_class,
                               URM_train,
                               ICM_object,
                               ICM_name,
                               URM_train_last_test=None,
                               n_cases=30,
                               n_random_starts=5,
                               evaluator_validation=None,
                               evaluator_test=None,
                               metric_to_optimize="PRECISION",
                               output_folder_path="result_experiments/",
                               parallelizeKNN=False,
                               allow_weighting=True,
                               similarity_type_list=None,
                               allow_bias_ICM=False):

    # If directory does not exist, create
    if not os.path.exists(output_folder_path):
        os.makedirs(output_folder_path)

##########################################################################################################

    output_file_name_root = recommender_class.RECOMMENDER_NAME + "_{}".format(
        ICM_name)

    parameterSearch = SearchBayesianSkopt(
        recommender_class,
        evaluator_validation=evaluator_validation,
        evaluator_test=evaluator_test)

    if similarity_type_list is None:
        similarity_type_list = [
            'cosine', 'jaccard', "asymmetric", "dice", "tversky"
        ]

    recommender_input_args = SearchInputRecommenderArgs(
        CONSTRUCTOR_POSITIONAL_ARGS=[ICM_object, URM_train],
        CONSTRUCTOR_KEYWORD_ARGS={},
        FIT_POSITIONAL_ARGS=[],
        FIT_KEYWORD_ARGS={})

    if URM_train_last_test is not None:
        recommender_input_args_last_test = recommender_input_args.copy()
        recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[
            1] = URM_train_last_test
    else:
        recommender_input_args_last_test = None

    run_KNNCBFRecommender_on_similarity_type_partial = partial(
        run_KNNRecommender_on_similarity_type,
        recommender_input_args=recommender_input_args,
        parameter_search_space={},
        parameterSearch=parameterSearch,
        n_cases=n_cases,
        n_random_starts=n_random_starts,
        output_folder_path=output_folder_path,
        output_file_name_root=output_file_name_root,
        metric_to_optimize=metric_to_optimize,
        allow_weighting=allow_weighting,
        allow_bias_ICM=allow_bias_ICM,
        recommender_input_args_last_test=recommender_input_args_last_test)

    if parallelizeKNN:
        pool = multiprocessing.Pool(processes=int(multiprocessing.cpu_count()),
                                    maxtasksperchild=1)
        pool.map(run_KNNCBFRecommender_on_similarity_type_partial,
                 similarity_type_list)

        pool.close()
        pool.join()

    else:

        for similarity_type in similarity_type_list:
            run_KNNCBFRecommender_on_similarity_type_partial(similarity_type)
Beispiel #16
0
def read_data_split_and_search(dataset_name,
                               flag_baselines_tune=False,
                               flag_DL_article_default=False,
                               flag_DL_tune=False,
                               flag_print_results=False):

    result_folder_path = "result_experiments/{}/{}_{}/".format(
        CONFERENCE_NAME, ALGORITHM_NAME, dataset_name)

    if dataset_name == "movielens1m":
        dataset = Movielens1MReader(result_folder_path)
        article_hyperparameters = {
            'num_neurons': 300,
            'num_factors': 50,
            'dropout_percentage': 0.03,
            'learning_rate': 1e-4,
            'regularization_rate': 0.1,
            'epochs': 1500,
            'batch_size': 1024,
            'display_epoch': None,
            'display_step': None,
            'verbose': True
        }
        early_stopping_epochs_min = 800

    elif dataset_name == "hetrec":
        dataset = MovielensHetrec2011Reader(result_folder_path)
        article_hyperparameters = {
            'num_neurons': 300,
            'num_factors': 50,
            'dropout_percentage': 0.03,
            'learning_rate': 1e-4,
            'regularization_rate': 0.1,
            'epochs': 1500,
            'batch_size': 1024,
            'display_epoch': None,
            'display_step': None,
            'verbose': True
        }
        early_stopping_epochs_min = 800

    elif dataset_name == "filmtrust":
        dataset = FilmTrustReader(result_folder_path)
        article_hyperparameters = {
            'num_neurons': 150,
            'num_factors': 40,
            'dropout_percentage': 0.00,
            'learning_rate': 5e-5,
            'regularization_rate': 0.1,
            'epochs': 100,
            'batch_size': 1024,
            'display_epoch': None,
            'display_step': None,
            'verbose': True
        }
        early_stopping_epochs_min = 0

    elif dataset_name == "frappe":
        dataset = FrappeReader(result_folder_path)
        article_hyperparameters = {
            'num_neurons': 300,
            'num_factors': 50,
            'dropout_percentage': 0.03,
            'learning_rate': 1e-4,
            'regularization_rate': 0.01,
            'epochs': 100,
            'batch_size': 1024,
            'display_epoch': None,
            'display_step': None,
            'verbose': True
        }
        early_stopping_epochs_min = 0

    print('Current dataset is: {}'.format(dataset_name))

    URM_train = dataset.URM_DICT["URM_train"].copy()
    URM_validation = dataset.URM_DICT["URM_validation"].copy()
    URM_test = dataset.URM_DICT["URM_test"].copy()

    # Ensure IMPLICIT data
    from Utils.assertions_on_data_for_experiments import assert_implicit_data, assert_disjoint_matrices

    assert_implicit_data([URM_train, URM_validation, URM_test])
    assert_disjoint_matrices([URM_train, URM_validation, URM_test])

    # If directory does not exist, create
    if not os.path.exists(result_folder_path):
        os.makedirs(result_folder_path)

    collaborative_algorithm_list = [
        Random,
        TopPop,
        UserKNNCFRecommender,
        ItemKNNCFRecommender,
        P3alphaRecommender,
        RP3betaRecommender,
        PureSVDRecommender,
        NMFRecommender,
        IALSRecommender,
        MatrixFactorization_BPR_Cython,
        MatrixFactorization_FunkSVD_Cython,
        EASE_R_Recommender,
        SLIM_BPR_Cython,
        SLIMElasticNetRecommender,
    ]

    metric_to_optimize = "NDCG"
    n_cases = 50
    n_random_starts = 15

    from Base.Evaluation.Evaluator import EvaluatorHoldout

    # use max cutoff to compute full MAP and NDCG
    max_cutoff = URM_train.shape[1] - 1

    cutoff_list_validation = [10]
    cutoff_list_test = [5, 10, 50, max_cutoff]

    evaluator_validation = EvaluatorHoldout(URM_validation,
                                            cutoff_list=cutoff_list_validation)
    evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=cutoff_list_test)

    runParameterSearch_Collaborative_partial = partial(
        runParameterSearch_Collaborative,
        URM_train=URM_train,
        URM_train_last_test=URM_train + URM_validation,
        metric_to_optimize=metric_to_optimize,
        evaluator_validation_earlystopping=evaluator_validation,
        evaluator_validation=evaluator_validation,
        evaluator_test=evaluator_test,
        output_folder_path=result_folder_path,
        parallelizeKNN=False,
        allow_weighting=True,
        resume_from_saved=True,
        n_cases=n_cases,
        n_random_starts=n_random_starts)

    if flag_baselines_tune:

        for recommender_class in collaborative_algorithm_list:
            try:
                runParameterSearch_Collaborative_partial(recommender_class)
            except Exception as e:
                print("On recommender {} Exception {}".format(
                    recommender_class, str(e)))
                traceback.print_exc()

    ################################################################################################
    ######
    ######      DL ALGORITHM
    ######

    if flag_DL_article_default:

        earlystopping_hyperparameters = {
            'validation_every_n': 5,
            'stop_on_validation': True,
            'lower_validations_allowed': 20,
            'evaluator_object': evaluator_validation,
            'validation_metric': metric_to_optimize,
            'epochs_min': early_stopping_epochs_min
        }

        try:

            parameterSearch = SearchSingleCase(
                UNeuRec_RecommenderWrapper,
                evaluator_validation=evaluator_validation,
                evaluator_test=evaluator_test)

            recommender_input_args = SearchInputRecommenderArgs(
                CONSTRUCTOR_POSITIONAL_ARGS=[URM_train],
                FIT_KEYWORD_ARGS=earlystopping_hyperparameters)

            recommender_input_args_last_test = recommender_input_args.copy()
            recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[
                0] = URM_train + URM_validation

            parameterSearch.search(
                recommender_input_args,
                recommender_input_args_last_test=
                recommender_input_args_last_test,
                fit_hyperparameters_values=article_hyperparameters,
                output_folder_path=result_folder_path,
                resume_from_saved=True,
                output_file_name_root=UNeuRec_RecommenderWrapper.
                RECOMMENDER_NAME)

        except Exception as e:

            print("On recommender {} Exception {}".format(
                UNeuRec_RecommenderWrapper, str(e)))
            traceback.print_exc()

        try:

            parameterSearch = SearchSingleCase(
                INeuRec_RecommenderWrapper,
                evaluator_validation=evaluator_validation,
                evaluator_test=evaluator_test)

            recommender_input_args = SearchInputRecommenderArgs(
                CONSTRUCTOR_POSITIONAL_ARGS=[URM_train],
                FIT_KEYWORD_ARGS=earlystopping_hyperparameters)

            recommender_input_args_last_test = recommender_input_args.copy()
            recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[
                0] = URM_train + URM_validation

            parameterSearch.search(
                recommender_input_args,
                recommender_input_args_last_test=
                recommender_input_args_last_test,
                fit_hyperparameters_values=article_hyperparameters,
                output_folder_path=result_folder_path,
                resume_from_saved=True,
                output_file_name_root=INeuRec_RecommenderWrapper.
                RECOMMENDER_NAME)

        except Exception as e:

            print("On recommender {} Exception {}".format(
                INeuRec_RecommenderWrapper, str(e)))
            traceback.print_exc()

    # if isUNeuRec_tune:
    #
    #     try:
    #
    #         runParameterSearch_NeuRec(UNeuRec_RecommenderWrapper,
    #                                  URM_train = URM_train,
    #                                  URM_train_last_test = URM_train + URM_validation,
    #                                  earlystopping_hyperparameters = earlystopping_hyperparameters,
    #                                  metric_to_optimize = metric_to_optimize,
    #                                  evaluator_validation = evaluator_validation,
    #                                  evaluator_test = evaluator_test,
    #                                  result_folder_path = result_folder_path,
    #                                  n_cases = n_cases,
    #                                  n_random_starts = n_random_starts,
    #                                  output_file_name_root = UNeuRec_RecommenderWrapper.RECOMMENDER_NAME)
    #
    #
    #     except Exception as e:
    #
    #         print("On recommender {} Exception {}".format(UNeuRec_RecommenderWrapper, str(e)))
    #         traceback.print_exc()
    #
    #
    #
    #
    #
    # if isINeuRec_tune:
    #
    #     try:
    #
    #         runParameterSearch_NeuRec(INeuRec_RecommenderWrapper,
    #                                  URM_train = URM_train,
    #                                  URM_train_last_test = URM_train + URM_validation,
    #                                  earlystopping_hyperparameters = earlystopping_hyperparameters,
    #                                  metric_to_optimize = metric_to_optimize,
    #                                  evaluator_validation = evaluator_validation,
    #                                  evaluator_test = evaluator_test,
    #                                  result_folder_path = result_folder_path,
    #                                  n_cases = n_cases,
    #                                  n_random_starts = n_random_starts,
    #                                  output_file_name_root = INeuRec_RecommenderWrapper.RECOMMENDER_NAME)
    #
    #
    #     except Exception as e:
    #
    #         print("On recommender {} Exception {}".format(INeuRec_RecommenderWrapper, str(e)))
    #         traceback.print_exc()
    #

    ################################################################################################
    ######
    ######      PRINT RESULTS
    ######

    if flag_print_results:

        n_test_users = np.sum(np.ediff1d(URM_test.indptr) >= 1)
        file_name = "{}..//{}_{}_".format(result_folder_path, ALGORITHM_NAME,
                                          dataset_name)

        result_loader = ResultFolderLoader(
            result_folder_path,
            base_algorithm_list=None,
            other_algorithm_list=[
                INeuRec_RecommenderWrapper, UNeuRec_RecommenderWrapper
            ],
            KNN_similarity_list=KNN_similarity_to_report_list,
            ICM_names_list=None,
            UCM_names_list=None)

        result_loader.generate_latex_results(
            file_name + "{}_latex_results.txt".format("article_metrics"),
            metrics_list=["PRECISION", "RECALL", "MAP", "NDCG", "MRR"],
            cutoffs_list=[5, 10, 50],
            table_title=None,
            highlight_best=True)

        result_loader.generate_latex_results(
            file_name +
            "{}_latex_results.txt".format("beyond_accuracy_metrics"),
            metrics_list=[
                "DIVERSITY_MEAN_INTER_LIST", "DIVERSITY_HERFINDAHL",
                "COVERAGE_ITEM", "DIVERSITY_GINI", "SHANNON_ENTROPY"
            ],
            cutoffs_list=[50],
            table_title=None,
            highlight_best=True)

        result_loader.generate_latex_results(
            file_name + "{}_latex_results.txt".format("all_metrics"),
            metrics_list=[
                "PRECISION", "RECALL", "MAP", "MRR", "NDCG", "F1", "HIT_RATE",
                "ARHR", "NOVELTY", "DIVERSITY_MEAN_INTER_LIST",
                "DIVERSITY_HERFINDAHL", "COVERAGE_ITEM", "DIVERSITY_GINI",
                "SHANNON_ENTROPY"
            ],
            cutoffs_list=[50],
            table_title=None,
            highlight_best=True)

        result_loader.generate_latex_time_statistics(
            file_name + "{}_latex_results.txt".format("time"),
            n_evaluation_users=n_test_users,
            table_title=None)
def runParameterSearch_Hybrid(recommender_class,
                              URM_train,
                              ICM_train,
                              W_sparse_CF=None,
                              URM_train_last_test=None,
                              metric_to_optimize="MAP",
                              evaluator_validation=None,
                              evaluator_test=None,
                              evaluator_validation_earlystopping=None,
                              output_folder_path="result_experiments/",
                              n_cases=35,
                              n_random_starts=5,
                              resume_from_saved=False,
                              save_model="best",
                              allow_weighting=True,
                              similarity_type_list=None):

    # If directory does not exist, create
    if not os.path.exists(output_folder_path):
        os.makedirs(output_folder_path)

    URM_train = URM_train.copy()
    ICM_train = ICM_train.copy()
    # W_sparse_CF = W_sparse_CF.copy()

    if URM_train_last_test is not None:
        URM_train_last_test = URM_train_last_test.copy()

    try:

        output_file_name_root = recommender_class.RECOMMENDER_NAME

        parameterSearch = SearchBayesianSkopt(
            recommender_class,
            evaluator_validation=evaluator_validation,
            evaluator_test=evaluator_test)

        ##########################################################################################################

        if recommender_class in [
                ScoresHybridP3alphaKNNCBF, ScoresHybridRP3betaKNNCBF,
                ScoresHybridSpecialized, ScoresHybridSpecializedCold,
                ScoresHybridSpecializedV2Cold, ScoresHybridSpecializedV2Mid,
                ScoresHybridSpecializedV2Warm, ScoresHybridSpecializedV2Mid12,
                ScoresHybridSpecializedV2Warm12, ScoresHybridSpecializedV3Cold,
                ScoresHybridSpecializedV3Warm
        ]:

            hyperparameters_range_dictionary = {}
            hyperparameters_range_dictionary["topK_P"] = Integer(5, 3000)
            hyperparameters_range_dictionary["alpha_P"] = Real(low=0,
                                                               high=2,
                                                               prior='uniform')
            hyperparameters_range_dictionary[
                "normalize_similarity_P"] = Categorical([False])
            hyperparameters_range_dictionary["topK"] = Integer(5, 3000)
            hyperparameters_range_dictionary["shrink"] = Integer(0, 5000)
            hyperparameters_range_dictionary["similarity"] = Categorical(
                ["tversky", "tanimoto", 'cosine', 'asymmetric'])
            hyperparameters_range_dictionary["normalize"] = Categorical(
                [True, False])
            hyperparameters_range_dictionary["alpha"] = Real(low=0,
                                                             high=2,
                                                             prior='uniform')
            if recommender_class is ScoresHybridRP3betaKNNCBF:
                hyperparameters_range_dictionary["beta_P"] = Real(
                    low=0, high=2, prior='uniform')

            if allow_weighting:
                hyperparameters_range_dictionary[
                    "feature_weighting"] = Categorical(
                        ["none", "BM25", "TF-IDF"])

            recommender_input_args = SearchInputRecommenderArgs(
                CONSTRUCTOR_POSITIONAL_ARGS=[URM_train, ICM_train],
                CONSTRUCTOR_KEYWORD_ARGS={},
                FIT_POSITIONAL_ARGS=[],
                FIT_KEYWORD_ARGS={})

        ##########################################################################################################

        if recommender_class in [
                ScoresHybridKNNCFKNNCBF, ScoresHybridUserKNNCFKNNCBF
        ]:

            hyperparameters_range_dictionary = {}
            hyperparameters_range_dictionary["topK_CF"] = Integer(5, 1500)
            hyperparameters_range_dictionary["shrink_CF"] = Integer(0, 1500)
            hyperparameters_range_dictionary["similarity_CF"] = Categorical(
                ["tversky", "tanimoto", 'cosine', 'asymmetric'])
            hyperparameters_range_dictionary["normalize_CF"] = Categorical(
                [True, False])
            hyperparameters_range_dictionary["topK"] = Integer(5, 1500)
            hyperparameters_range_dictionary["shrink"] = Integer(0, 1500)
            hyperparameters_range_dictionary["similarity"] = Categorical(
                ["tversky", "tanimoto", 'cosine', 'asymmetric'])
            hyperparameters_range_dictionary["normalize"] = Categorical(
                [True, False])
            hyperparameters_range_dictionary["alpha"] = Real(low=0,
                                                             high=1,
                                                             prior='uniform')

            if allow_weighting:
                hyperparameters_range_dictionary[
                    "feature_weighting"] = Categorical(
                        ["none", "BM25", "TF-IDF"])

            recommender_input_args = SearchInputRecommenderArgs(
                CONSTRUCTOR_POSITIONAL_ARGS=[URM_train, ICM_train],
                CONSTRUCTOR_KEYWORD_ARGS={},
                FIT_POSITIONAL_ARGS=[],
                FIT_KEYWORD_ARGS={})
        ##########################################################################################################

        if recommender_class is ScoresHybridSpecializedAdaptive:

            hyperparameters_range_dictionary = {}
            # Cold users hybrid
            hyperparameters_range_dictionary["topK_P_C"] = Integer(5, 1500)
            hyperparameters_range_dictionary["alpha_P_C"] = Real(
                low=0, high=2, prior='uniform')
            hyperparameters_range_dictionary["beta_P_C"] = Real(
                low=0, high=2, prior='uniform')
            hyperparameters_range_dictionary[
                "normalize_similarity_P_C"] = Categorical([False])
            hyperparameters_range_dictionary["topK_C"] = Integer(5, 1500)
            hyperparameters_range_dictionary["shrink_C"] = Integer(0, 1500)
            hyperparameters_range_dictionary["similarity_C"] = Categorical(
                ["tversky", "tanimoto", 'cosine', 'asymmetric'])
            hyperparameters_range_dictionary["normalize_C"] = Categorical(
                [True, False])
            # hyperparameters_range_dictionary["alpha_C"] = Real(low=0, high=1, prior='uniform')
            if allow_weighting:
                hyperparameters_range_dictionary[
                    "feature_weighting_C"] = Categorical(
                        ["none", "BM25", "TF-IDF"])

            # Warm users hybrid
            hyperparameters_range_dictionary["topK_P"] = Integer(5, 1500)
            hyperparameters_range_dictionary["alpha_P"] = Real(low=0,
                                                               high=2,
                                                               prior='uniform')
            hyperparameters_range_dictionary["beta_P"] = Real(low=0,
                                                              high=2,
                                                              prior='uniform')
            hyperparameters_range_dictionary[
                "normalize_similarity_P"] = Categorical([False])
            hyperparameters_range_dictionary["topK"] = Integer(5, 1500)
            hyperparameters_range_dictionary["shrink"] = Integer(0, 1500)
            hyperparameters_range_dictionary["similarity"] = Categorical(
                ["tversky", "tanimoto", 'cosine', 'asymmetric'])
            hyperparameters_range_dictionary["normalize"] = Categorical(
                [True, False])
            # hyperparameters_range_dictionary["alpha"] = Real(low=0, high=1, prior='uniform')
            if allow_weighting:
                hyperparameters_range_dictionary[
                    "feature_weighting"] = Categorical(
                        ["none", "BM25", "TF-IDF"])

            hyperparameters_range_dictionary["threshold"] = Integer(1, 30)

            recommender_input_args = SearchInputRecommenderArgs(
                CONSTRUCTOR_POSITIONAL_ARGS=[URM_train, ICM_train],
                CONSTRUCTOR_KEYWORD_ARGS={},
                FIT_POSITIONAL_ARGS=[],
                FIT_KEYWORD_ARGS={})

        ##########################################################################################################

        if recommender_class is ScoresHybridP3alphaPureSVD:

            hyperparameters_range_dictionary = {}
            hyperparameters_range_dictionary["topK_P"] = Integer(5, 1000)
            hyperparameters_range_dictionary["alpha_P"] = Real(low=0,
                                                               high=2,
                                                               prior='uniform')
            hyperparameters_range_dictionary[
                "normalize_similarity_P"] = Categorical([False])
            hyperparameters_range_dictionary["num_factors"] = Integer(1, 500)

            recommender_input_args = SearchInputRecommenderArgs(
                CONSTRUCTOR_POSITIONAL_ARGS=[URM_train],
                CONSTRUCTOR_KEYWORD_ARGS={},
                FIT_POSITIONAL_ARGS=[],
                FIT_KEYWORD_ARGS={})

        ##########################################################################################################

        if recommender_class is CFW_D_Similarity_Linalg:
            hyperparameters_range_dictionary = {}
            hyperparameters_range_dictionary["topK"] = Integer(5, 1000)
            hyperparameters_range_dictionary["add_zeros_quota"] = Real(
                low=0, high=1, prior='uniform')
            hyperparameters_range_dictionary[
                "normalize_similarity"] = Categorical([True, False])

            recommender_input_args = SearchInputRecommenderArgs(
                CONSTRUCTOR_POSITIONAL_ARGS=[
                    URM_train, ICM_train, W_sparse_CF
                ],
                CONSTRUCTOR_KEYWORD_ARGS={},
                FIT_POSITIONAL_ARGS=[],
                FIT_KEYWORD_ARGS={})

    #########################################################################################################

        if URM_train_last_test is not None:
            recommender_input_args_last_test = recommender_input_args.copy()
            recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[
                0] = URM_train_last_test
        else:
            recommender_input_args_last_test = None

        ## Final step, after the hyperparameter range has been defined for each type of algorithm
        parameterSearch.search(
            recommender_input_args,
            parameter_search_space=hyperparameters_range_dictionary,
            n_cases=n_cases,
            n_random_starts=n_random_starts,
            resume_from_saved=resume_from_saved,
            save_model=save_model,
            output_folder_path=output_folder_path,
            output_file_name_root=output_file_name_root,
            metric_to_optimize=metric_to_optimize,
            recommender_input_args_last_test=recommender_input_args_last_test)

    except Exception as e:

        print("On recommender {} Exception {}".format(recommender_class,
                                                      str(e)))
        traceback.print_exc()

        error_file = open(output_folder_path + "ErrorLog.txt", "a")
        error_file.write("On recommender {} Exception {}\n".format(
            recommender_class, str(e)))
        error_file.close()
Beispiel #18
0
def read_data_split_and_search(dataset_name,
                               flag_baselines_tune=False,
                               flag_DL_article_default=False,
                               flag_DL_tune=False,
                               flag_print_results=False):

    result_folder_path = "result_experiments/{}/{}_{}/".format(
        CONFERENCE_NAME, ALGORITHM_NAME, dataset_name)

    if not os.path.exists(result_folder_path):
        os.makedirs(result_folder_path)

    # Ensure both experiments use the same data
    dataset_folder_path = "result_experiments/{}/{}_{}/".format(
        CONFERENCE_NAME, ALGORITHM_NAME,
        dataset_name.replace("_remove_cold_items", ""))

    if not os.path.exists(dataset_folder_path):
        os.makedirs(dataset_folder_path)

    if 'amazon_music' in dataset_name:
        dataset = AmazonMusicReader(dataset_folder_path)

    elif 'movielens1m_ours' in dataset_name:
        dataset = Movielens1MReader(dataset_folder_path, type="ours")

    elif 'movielens1m_original' in dataset_name:
        dataset = Movielens1MReader(dataset_folder_path, type="original")

    else:
        print("Dataset name not supported, current is {}".format(dataset_name))
        return

    print('Current dataset is: {}'.format(dataset_name))

    URM_train = dataset.URM_DICT["URM_train"].copy()
    URM_validation = dataset.URM_DICT["URM_validation"].copy()
    URM_test = dataset.URM_DICT["URM_test"].copy()
    URM_test_negative = dataset.URM_DICT["URM_test_negative"].copy()

    # Ensure IMPLICI data and DISJOINT matrices
    assert_implicit_data(
        [URM_train, URM_validation, URM_test, URM_test_negative])
    assert_disjoint_matrices(
        [URM_train, URM_validation, URM_test, URM_test_negative])

    cold_items_statistics(URM_train, URM_validation, URM_test,
                          URM_test_negative)

    algorithm_dataset_string = "{}_{}_".format(ALGORITHM_NAME, dataset_name)

    plot_popularity_bias([URM_train + URM_validation, URM_test],
                         ["Training data", "Test data"], result_folder_path +
                         algorithm_dataset_string + "popularity_plot")

    save_popularity_statistics([
        URM_train + URM_validation + URM_test, URM_train + URM_validation,
        URM_test
    ], ["Full data", "Training data", "Test data"],
                               result_folder_path + algorithm_dataset_string +
                               "popularity_statistics")

    collaborative_algorithm_list = [
        Random,
        TopPop,
        UserKNNCFRecommender,
        ItemKNNCFRecommender,
        P3alphaRecommender,
        RP3betaRecommender,
        PureSVDRecommender,
        NMFRecommender,
        IALSRecommender,
        MatrixFactorization_BPR_Cython,
        MatrixFactorization_FunkSVD_Cython,
        EASE_R_Recommender,
        SLIM_BPR_Cython,
        SLIMElasticNetRecommender,
    ]

    metric_to_optimize = "NDCG"
    n_cases = 50
    n_random_starts = 15

    cutoff_list_validation = [10]
    cutoff_list_test = [5, 10, 20]

    if "_remove_cold_items" in dataset_name:
        ignore_items_validation = get_cold_items(URM_train)
        ignore_items_test = get_cold_items(URM_train + URM_validation)
    else:
        ignore_items_validation = None
        ignore_items_test = None

    evaluator_validation = EvaluatorNegativeItemSample(
        URM_validation,
        URM_test_negative,
        cutoff_list=cutoff_list_validation,
        ignore_items=ignore_items_validation)
    evaluator_test = EvaluatorNegativeItemSample(
        URM_test,
        URM_test_negative,
        cutoff_list=cutoff_list_test,
        ignore_items=ignore_items_test)

    # The Evaluator automatically skips users with no test interactions
    # in this case we need the evaluation done with and without cold items to be comparable
    # So we ensure the users that are included in the evaluation are the same in both cases.
    evaluator_validation.users_to_evaluate = np.arange(URM_train.shape[0])
    evaluator_test.users_to_evaluate = np.arange(URM_train.shape[0])

    runParameterSearch_Collaborative_partial = partial(
        runParameterSearch_Collaborative,
        URM_train=URM_train,
        URM_train_last_test=URM_train + URM_validation,
        metric_to_optimize=metric_to_optimize,
        evaluator_validation_earlystopping=evaluator_validation,
        evaluator_validation=evaluator_validation,
        evaluator_test=evaluator_test,
        output_folder_path=result_folder_path,
        parallelizeKNN=False,
        allow_weighting=True,
        resume_from_saved=True,
        n_cases=n_cases,
        n_random_starts=n_random_starts)

    if flag_baselines_tune:

        for recommender_class in collaborative_algorithm_list:
            try:
                runParameterSearch_Collaborative_partial(recommender_class)
            except Exception as e:
                print("On recommender {} Exception {}".format(
                    recommender_class, str(e)))
                traceback.print_exc()

    ################################################################################################
    ######
    ######      DL ALGORITHM
    ######

    if flag_DL_article_default:

        earlystopping_hyperparameters = {
            'validation_every_n': 5,
            'stop_on_validation': True,
            'lower_validations_allowed': 5,
            'evaluator_object': evaluator_validation,
            'validation_metric': metric_to_optimize,
        }

        num_factors = 64

        article_hyperparameters = {
            'epochs': 500,
            'learning_rate': 0.001,
            'batch_size': 256,
            'num_negatives': 4,
            'layers': (num_factors * 4, num_factors * 2, num_factors),
            'regularization_layers': (0, 0, 0),
            'learner': 'adam',
            'verbose': False,
        }

        parameterSearch = SearchSingleCase(
            DELF_MLP_RecommenderWrapper,
            evaluator_validation=evaluator_validation,
            evaluator_test=evaluator_test)

        recommender_input_args = SearchInputRecommenderArgs(
            CONSTRUCTOR_POSITIONAL_ARGS=[URM_train],
            FIT_KEYWORD_ARGS=earlystopping_hyperparameters)

        recommender_input_args_last_test = recommender_input_args.copy()
        recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[
            0] = URM_train + URM_validation

        parameterSearch.search(
            recommender_input_args,
            recommender_input_args_last_test=recommender_input_args_last_test,
            fit_hyperparameters_values=article_hyperparameters,
            output_folder_path=result_folder_path,
            resume_from_saved=True,
            output_file_name_root=DELF_MLP_RecommenderWrapper.RECOMMENDER_NAME)

        parameterSearch = SearchSingleCase(
            DELF_EF_RecommenderWrapper,
            evaluator_validation=evaluator_validation,
            evaluator_test=evaluator_test)

        recommender_input_args = SearchInputRecommenderArgs(
            CONSTRUCTOR_POSITIONAL_ARGS=[URM_train],
            FIT_KEYWORD_ARGS=earlystopping_hyperparameters)

        recommender_input_args_last_test = recommender_input_args.copy()
        recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[
            0] = URM_train + URM_validation

        parameterSearch.search(
            recommender_input_args,
            recommender_input_args_last_test=recommender_input_args_last_test,
            fit_hyperparameters_values=article_hyperparameters,
            output_folder_path=result_folder_path,
            resume_from_saved=True,
            output_file_name_root=DELF_EF_RecommenderWrapper.RECOMMENDER_NAME)

    ################################################################################################
    ######
    ######      PRINT RESULTS
    ######

    if flag_print_results:

        n_test_users = np.sum(np.ediff1d(URM_test.indptr) >= 1)
        file_name = "{}..//{}_{}_".format(result_folder_path, ALGORITHM_NAME,
                                          dataset_name)

        result_loader = ResultFolderLoader(
            result_folder_path,
            base_algorithm_list=None,
            other_algorithm_list=[
                DELF_MLP_RecommenderWrapper, DELF_EF_RecommenderWrapper
            ],
            KNN_similarity_list=KNN_similarity_to_report_list,
            ICM_names_list=None,
            UCM_names_list=None)

        result_loader.generate_latex_results(
            file_name + "{}_latex_results.txt".format("article_metrics"),
            metrics_list=["HIT_RATE", "NDCG"],
            cutoffs_list=cutoff_list_test,
            table_title=None,
            highlight_best=True)

        result_loader.generate_latex_results(
            file_name + "{}_latex_results.txt".format("all_metrics"),
            metrics_list=[
                "PRECISION", "RECALL", "MAP_MIN_DEN", "MRR", "NDCG", "F1",
                "HIT_RATE", "ARHR_ALL_HITS", "NOVELTY",
                "DIVERSITY_MEAN_INTER_LIST", "DIVERSITY_HERFINDAHL",
                "COVERAGE_ITEM", "DIVERSITY_GINI", "SHANNON_ENTROPY"
            ],
            cutoffs_list=[10],
            table_title=None,
            highlight_best=True)

        result_loader.generate_latex_time_statistics(
            file_name + "{}_latex_results.txt".format("time"),
            n_evaluation_users=n_test_users,
            table_title=None)
Beispiel #19
0
def read_data_split_and_search(dataset_name,
                               flag_baselines_tune=False,
                               flag_DL_article_default=False,
                               flag_DL_tune=False,
                               flag_print_results=False):

    result_folder_path = "result_experiments/{}/{}_{}/".format(
        CONFERENCE_NAME, ALGORITHM_NAME, dataset_name)

    if dataset_name == "gowalla":
        dataset = GowallaReader(result_folder_path)

    elif dataset_name == "yelp":
        dataset = YelpReader(result_folder_path)

    else:
        print("Dataset name not supported, current is {}".format(dataset_name))
        return

    print('Current dataset is: {}'.format(dataset_name))

    URM_train = dataset.URM_DICT["URM_train"].copy()
    URM_validation = dataset.URM_DICT["URM_validation"].copy()
    URM_test = dataset.URM_DICT["URM_test"].copy()
    URM_test_negative = dataset.URM_DICT["URM_test_negative"].copy()

    print_negative_items_stats(URM_train, URM_validation, URM_test,
                               URM_test_negative)

    # Ensure IMPLICIT data
    from Utils.assertions_on_data_for_experiments import assert_implicit_data, assert_disjoint_matrices

    assert_implicit_data(
        [URM_train, URM_validation, URM_test, URM_test_negative])

    # URM_test_negative contains duplicates in both train and test
    assert_disjoint_matrices([URM_train, URM_validation, URM_test])

    # If directory does not exist, create
    if not os.path.exists(result_folder_path):
        os.makedirs(result_folder_path)

    collaborative_algorithm_list = [
        Random,
        TopPop,
        UserKNNCFRecommender,
        ItemKNNCFRecommender,
        P3alphaRecommender,
        RP3betaRecommender,
        PureSVDRecommender,
        NMFRecommender,
        IALSRecommender,
        MatrixFactorization_BPR_Cython,
        MatrixFactorization_FunkSVD_Cython,
        EASE_R_Recommender,
        SLIM_BPR_Cython,
        SLIMElasticNetRecommender,
    ]

    metric_to_optimize = "NDCG"
    n_cases = 50
    n_random_starts = 15

    from Base.Evaluation.Evaluator import EvaluatorNegativeItemSample

    cutoff_list_validation = [10]
    cutoff_list_test = [5, 10, 20]

    evaluator_validation = EvaluatorNegativeItemSample(
        URM_validation, URM_test_negative, cutoff_list=cutoff_list_validation)
    evaluator_test = EvaluatorNegativeItemSample(URM_test,
                                                 URM_test_negative,
                                                 cutoff_list=cutoff_list_test)

    runParameterSearch_Collaborative_partial = partial(
        runParameterSearch_Collaborative,
        URM_train=URM_train,
        URM_train_last_test=URM_train + URM_validation,
        metric_to_optimize=metric_to_optimize,
        evaluator_validation_earlystopping=evaluator_validation,
        evaluator_validation=evaluator_validation,
        evaluator_test=evaluator_test,
        output_folder_path=result_folder_path,
        parallelizeKNN=False,
        allow_weighting=True,
        resume_from_saved=True,
        n_cases=n_cases,
        n_random_starts=n_random_starts)

    if flag_baselines_tune:

        for recommender_class in collaborative_algorithm_list:
            try:
                runParameterSearch_Collaborative_partial(recommender_class)
            except Exception as e:
                print("On recommender {} Exception {}".format(
                    recommender_class, str(e)))
                traceback.print_exc()

    ################################################################################################
    ######
    ######      DL ALGORITHM
    ######

    if flag_DL_article_default:

        # Providing an empty matrix to URM_negative for the train samples
        article_hyperparameters = {
            "batch_size": 512,
            "epochs": 1500,
            "epochs_MFBPR": 500,
            "embedding_size": 64,
            "hidden_size": 128,
            "negative_sample_per_positive": 1,
            "negative_instances_per_positive": 4,
            "regularization_users_items": 0.01,
            "regularization_weights": 10,
            "regularization_filter_weights": 1,
            "learning_rate_embeddings": 0.05,
            "learning_rate_CNN": 0.05,
            "channel_size": [32, 32, 32, 32, 32, 32],
            "dropout": 0.0,
            "epoch_verbose": 1,
        }

        earlystopping_hyperparameters = {
            "validation_every_n": 5,
            "stop_on_validation": True,
            "lower_validations_allowed": 5,
            "evaluator_object": evaluator_validation,
            "validation_metric": metric_to_optimize,
            "epochs_min": 150
        }

        parameterSearch = SearchSingleCase(
            ConvNCF_RecommenderWrapper,
            evaluator_validation=evaluator_validation,
            evaluator_test=evaluator_test)

        recommender_input_args = SearchInputRecommenderArgs(
            CONSTRUCTOR_POSITIONAL_ARGS=[URM_train],
            FIT_KEYWORD_ARGS=earlystopping_hyperparameters)

        recommender_input_args_last_test = recommender_input_args.copy()
        recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[
            0] = URM_train + URM_validation

        parameterSearch.search(
            recommender_input_args,
            recommender_input_args_last_test=recommender_input_args_last_test,
            fit_hyperparameters_values=article_hyperparameters,
            output_folder_path=result_folder_path,
            resume_from_saved=True,
            output_file_name_root=ConvNCF_RecommenderWrapper.RECOMMENDER_NAME)

        #remember to close the global session since use global variables
        ConvNCF.close_session(verbose=True)

    ################################################################################################
    ######
    ######      PRINT RESULTS
    ######

    if flag_print_results:

        n_test_users = np.sum(np.ediff1d(URM_test.indptr) >= 1)
        file_name = "{}..//{}_{}_".format(result_folder_path, ALGORITHM_NAME,
                                          dataset_name)

        result_loader = ResultFolderLoader(
            result_folder_path,
            base_algorithm_list=None,
            other_algorithm_list=[ConvNCF_RecommenderWrapper],
            KNN_similarity_list=KNN_similarity_to_report_list,
            ICM_names_list=None,
            UCM_names_list=None)

        result_loader.generate_latex_results(
            file_name + "{}_latex_results.txt".format("article_metrics"),
            metrics_list=["HIT_RATE", "NDCG"],
            cutoffs_list=cutoff_list_test,
            table_title=None,
            highlight_best=True)

        result_loader.generate_latex_results(
            file_name + "{}_latex_results.txt".format("all_metrics"),
            metrics_list=[
                "PRECISION", "RECALL", "MAP_MIN_DEN", "MRR", "NDCG", "F1",
                "HIT_RATE", "ARHR_ALL_HITS", "NOVELTY",
                "DIVERSITY_MEAN_INTER_LIST", "DIVERSITY_HERFINDAHL",
                "COVERAGE_ITEM", "DIVERSITY_GINI", "SHANNON_ENTROPY"
            ],
            cutoffs_list=cutoff_list_validation,
            table_title=None,
            highlight_best=True)

        result_loader.generate_latex_time_statistics(
            file_name + "{}_latex_results.txt".format("time"),
            n_evaluation_users=n_test_users,
            table_title=None)
Beispiel #20
0
def read_data_split_and_search(dataset_name,
                               cold_start=False,
                               cold_items=None,
                               flag_baselines_tune=False,
                               flag_DL_article_default=False,
                               flag_DL_tune=False,
                               flag_print_results=False):

    if not cold_start:
        result_folder_path = "result_experiments/{}/{}_{}/".format(
            CONFERENCE_NAME, ALGORITHM_NAME, dataset_name)
    else:
        result_folder_path = "result_experiments/{}/{}_cold_{}_{}/".format(
            CONFERENCE_NAME, ALGORITHM_NAME, cold_items, dataset_name)

    if dataset_name == "movielens1m_original":
        assert (cold_start is not True)
        dataset = Movielens1MReader(result_folder_path, type="original")

    elif dataset_name == "movielens1m_ours":
        dataset = Movielens1MReader(result_folder_path,
                                    type="ours",
                                    cold_start=cold_start,
                                    cold_items=cold_items)

    elif dataset_name == "hetrec":
        assert (cold_start is not True)
        dataset = MovielensHetrec2011Reader(result_folder_path)

    elif dataset_name == "amazon_instant_video":
        assert (cold_start is not True)
        dataset = AmazonInstantVideoReader(result_folder_path)

    URM_train = dataset.URM_DICT["URM_train"].copy()
    URM_validation = dataset.URM_DICT["URM_validation"].copy()
    URM_test = dataset.URM_DICT["URM_test"].copy()

    # Ensure IMPLICIT data and DISJOINT sets
    assert_implicit_data([URM_train, URM_validation, URM_test])
    assert_disjoint_matrices([URM_train, URM_validation, URM_test])

    # If directory does not exist, create
    if not os.path.exists(result_folder_path):
        os.makedirs(result_folder_path)

    algorithm_dataset_string = "{}_{}_".format(ALGORITHM_NAME, dataset_name)

    plot_popularity_bias([URM_train + URM_validation, URM_test],
                         ["Train data", "Test data"], result_folder_path +
                         algorithm_dataset_string + "popularity_plot")

    save_popularity_statistics([
        URM_train + URM_validation + URM_test, URM_train + URM_validation,
        URM_test
    ], ["URM_all", "URM train", "URM test"],
                               result_folder_path + algorithm_dataset_string +
                               "popularity_statistics")

    metric_to_optimize = "RECALL"
    n_cases = 50
    n_random_starts = 15

    from Base.Evaluation.Evaluator import EvaluatorHoldout

    if not cold_start:
        cutoff_list_validation = [50]
        cutoff_list_test = [20, 30, 40, 50, 60, 70, 80, 90, 100]
    else:
        cutoff_list_validation = [20]
        cutoff_list_test = [20]

    evaluator_validation = EvaluatorHoldout(URM_validation,
                                            cutoff_list=cutoff_list_validation)
    evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=cutoff_list_test)

    ################################################################################################
    ###### KNN CF

    collaborative_algorithm_list = [
        Random,
        TopPop,
        UserKNNCFRecommender,
        ItemKNNCFRecommender,
        P3alphaRecommender,
        RP3betaRecommender,
        PureSVDRecommender,
        NMFRecommender,
        IALSRecommender,
        MatrixFactorization_BPR_Cython,
        MatrixFactorization_FunkSVD_Cython,
        EASE_R_Recommender,
        SLIM_BPR_Cython,
        SLIMElasticNetRecommender,
    ]

    runParameterSearch_Collaborative_partial = partial(
        runParameterSearch_Collaborative,
        URM_train=URM_train,
        URM_train_last_test=URM_train + URM_validation,
        metric_to_optimize=metric_to_optimize,
        evaluator_validation_earlystopping=evaluator_validation,
        evaluator_validation=evaluator_validation,
        evaluator_test=evaluator_test,
        output_folder_path=result_folder_path,
        parallelizeKNN=False,
        allow_weighting=True,
        resume_from_saved=True,
        n_cases=n_cases,
        n_random_starts=n_random_starts)

    if flag_baselines_tune:

        for recommender_class in collaborative_algorithm_list:
            try:
                runParameterSearch_Collaborative_partial(recommender_class)
            except Exception as e:
                print("On recommender {} Exception {}".format(
                    recommender_class, str(e)))
                traceback.print_exc()

    ################################################################################################
    ######
    ######      DL ALGORITHM
    ######

    if flag_DL_article_default:

        try:

            spectralCF_article_hyperparameters = {
                "epochs": 1000,
                "batch_size": 1024,
                "embedding_size": 16,
                "decay": 0.001,
                "k": 3,
                "learning_rate": 1e-3,
            }

            spectralCF_earlystopping_hyperparameters = {
                "validation_every_n": 5,
                "stop_on_validation": True,
                "lower_validations_allowed": 5,
                "evaluator_object": evaluator_validation,
                "validation_metric": metric_to_optimize,
                "epochs_min": 400,
            }

            parameterSearch = SearchSingleCase(
                SpectralCF_RecommenderWrapper,
                evaluator_validation=evaluator_validation,
                evaluator_test=evaluator_test)

            recommender_input_args = SearchInputRecommenderArgs(
                CONSTRUCTOR_POSITIONAL_ARGS=[URM_train],
                FIT_KEYWORD_ARGS=spectralCF_earlystopping_hyperparameters)

            recommender_input_args_last_test = recommender_input_args.copy()
            recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[
                0] = URM_train + URM_validation

            parameterSearch.search(
                recommender_input_args,
                recommender_input_args_last_test=
                recommender_input_args_last_test,
                fit_hyperparameters_values=spectralCF_article_hyperparameters,
                output_folder_path=result_folder_path,
                resume_from_saved=True,
                output_file_name_root=SpectralCF_RecommenderWrapper.
                RECOMMENDER_NAME + "_article_default")

        except Exception as e:

            print("On recommender {} Exception {}".format(
                SpectralCF_RecommenderWrapper, str(e)))
            traceback.print_exc()

    if flag_DL_tune:

        try:

            spectralCF_earlystopping_hyperparameters = {
                "validation_every_n": 5,
                "stop_on_validation": True,
                "lower_validations_allowed": 5,
                "evaluator_object": evaluator_validation,
                "validation_metric": metric_to_optimize,
                "epochs_min": 400,
                "epochs": 2000
            }

            runParameterSearch_SpectralCF(
                SpectralCF_RecommenderWrapper,
                URM_train=URM_train,
                URM_train_last_test=URM_train + URM_validation,
                earlystopping_hyperparameters=
                spectralCF_earlystopping_hyperparameters,
                metric_to_optimize=metric_to_optimize,
                evaluator_validation=evaluator_validation,
                evaluator_test=evaluator_test,
                output_folder_path=result_folder_path,
                n_cases=n_cases,
                n_random_starts=n_random_starts,
                output_file_name_root=SpectralCF_RecommenderWrapper.
                RECOMMENDER_NAME)

        except Exception as e:

            print("On recommender {} Exception {}".format(
                SpectralCF_RecommenderWrapper, str(e)))
            traceback.print_exc()

    ################################################################################################
    ######
    ######      PRINT RESULTS
    ######

    if flag_print_results:

        n_test_users = np.sum(np.ediff1d(URM_test.indptr) >= 1)

        file_name = "{}..//{}_{}_".format(
            result_folder_path,
            ALGORITHM_NAME if not cold_start else "{}_cold_{}".format(
                ALGORITHM_NAME, cold_items), dataset_name)

        if cold_start:
            cutoffs_to_report_list = [20]
        else:
            cutoffs_to_report_list = [20, 40, 60, 80, 100]

        result_loader = ResultFolderLoader(
            result_folder_path,
            base_algorithm_list=None,
            other_algorithm_list=other_algorithm_list,
            KNN_similarity_list=KNN_similarity_to_report_list,
            ICM_names_list=None,
            UCM_names_list=None)

        result_loader.generate_latex_results(
            file_name + "{}_latex_results.txt".format("article_metrics"),
            metrics_list=["RECALL", "MAP"],
            cutoffs_list=cutoffs_to_report_list,
            table_title=None,
            highlight_best=True)

        result_loader.generate_latex_results(
            file_name +
            "{}_latex_results.txt".format("beyond_accuracy_metrics"),
            metrics_list=[
                "DIVERSITY_MEAN_INTER_LIST", "DIVERSITY_HERFINDAHL",
                "COVERAGE_ITEM", "DIVERSITY_GINI", "SHANNON_ENTROPY"
            ],
            cutoffs_list=[50],
            table_title=None,
            highlight_best=True)

        result_loader.generate_latex_results(
            file_name + "{}_latex_results.txt".format("all_metrics"),
            metrics_list=[
                "PRECISION", "RECALL", "MAP", "MRR", "NDCG", "F1", "HIT_RATE",
                "ARHR", "NOVELTY", "DIVERSITY_MEAN_INTER_LIST",
                "DIVERSITY_HERFINDAHL", "COVERAGE_ITEM", "DIVERSITY_GINI",
                "SHANNON_ENTROPY"
            ],
            cutoffs_list=[50],
            table_title=None,
            highlight_best=True)

        result_loader.generate_latex_time_statistics(
            file_name + "{}_latex_results.txt".format("time"),
            n_evaluation_users=n_test_users,
            table_title=None)
def read_data_split_and_search(dataset_name,
                               flag_baselines_tune=False,
                               flag_DL_article_default=False,
                               flag_DL_tune=False,
                               flag_print_results=False):

    result_folder_path = "result_experiments/IJCAI/CoupledCF_{}/".format(
        dataset_name)

    #Logger(path=result_folder_path, name_file='CoupledCF_' + dataset_name)

    if dataset_name.startswith("movielens1m"):

        if dataset_name.endswith("_original"):
            dataset = Movielens1MReader(result_folder_path, type='original')
        elif dataset_name.endswith("_ours"):
            dataset = Movielens1MReader(result_folder_path, type='ours')
        else:
            print("Dataset name not supported, current is {}".format(
                dataset_name))
            return

        UCM_to_report = ["UCM_all"]
        ICM_to_report = ["ICM_all"]

        UCM_CoupledCF = dataset.ICM_DICT["UCM_all"]
        ICM_CoupledCF = dataset.ICM_DICT["ICM_all"]

    elif dataset_name.startswith("tafeng"):

        if dataset_name.endswith("_original"):
            dataset = TafengReader(result_folder_path, type='original')
        elif dataset_name.endswith("_ours"):
            dataset = TafengReader(result_folder_path, type='ours')
        else:
            print("Dataset name not supported, current is {}".format(
                dataset_name))
            return

        UCM_to_report = ["UCM_all"]
        ICM_to_report = ["ICM_original"]

        UCM_CoupledCF = dataset.ICM_DICT["UCM_all"]
        ICM_CoupledCF = dataset.ICM_DICT["ICM_original"]

    else:
        print("Dataset name not supported, current is {}".format(dataset_name))
        return

    print('Current dataset is: {}'.format(dataset_name))

    UCM_dict = {
        UCM_name: UCM_object
        for (UCM_name, UCM_object) in dataset.ICM_DICT.items()
        if "UCM" in UCM_name
    }
    ICM_dict = {
        UCM_name: UCM_object
        for (UCM_name, UCM_object) in dataset.ICM_DICT.items()
        if "ICM" in UCM_name
    }

    URM_train = dataset.URM_DICT["URM_train"].copy()
    URM_validation = dataset.URM_DICT["URM_validation"].copy()
    URM_test = dataset.URM_DICT["URM_test"].copy()
    URM_test_negative = dataset.URM_DICT["URM_test_negative"].copy()

    # Matrices are 1-indexed, so remove first row
    print_negative_items_stats(URM_train[1:], URM_validation[1:], URM_test[1:],
                               URM_test_negative[1:])

    # Ensure IMPLICIT data
    from Utils.assertions_on_data_for_experiments import assert_implicit_data, assert_disjoint_matrices

    assert_implicit_data(
        [URM_train, URM_validation, URM_test, URM_test_negative])
    assert_disjoint_matrices([URM_train, URM_validation, URM_test])

    # If directory does not exist, create
    if not os.path.exists(result_folder_path):
        os.makedirs(result_folder_path)

    collaborative_algorithm_list = [
        Random,
        TopPop,
        UserKNNCFRecommender,
        ItemKNNCFRecommender,
        P3alphaRecommender,
        RP3betaRecommender,
        PureSVDRecommender,
        NMFRecommender,
        IALSRecommender,
        MatrixFactorization_BPR_Cython,
        MatrixFactorization_FunkSVD_Cython,
        EASE_R_Recommender,
        SLIM_BPR_Cython,
        SLIMElasticNetRecommender,
    ]

    metric_to_optimize = "NDCG"
    n_cases = 50
    n_random_starts = 15

    from Base.Evaluation.Evaluator import EvaluatorNegativeItemSample

    cutoff_list_validation = [5]
    cutoff_list_test = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
    evaluator_validation = EvaluatorNegativeItemSample(
        URM_validation, URM_test_negative, cutoff_list=cutoff_list_validation)
    evaluator_test = EvaluatorNegativeItemSample(URM_test,
                                                 URM_test_negative,
                                                 cutoff_list=cutoff_list_test)

    runParameterSearch_Collaborative_partial = partial(
        runParameterSearch_Collaborative,
        URM_train=URM_train,
        URM_train_last_test=URM_train + URM_validation,
        metric_to_optimize=metric_to_optimize,
        evaluator_validation_earlystopping=evaluator_validation,
        evaluator_validation=evaluator_validation,
        evaluator_test=evaluator_test,
        output_folder_path=result_folder_path,
        parallelizeKNN=False,
        allow_weighting=True,
        resume_from_saved=True,
        n_cases=n_cases,
        n_random_starts=n_random_starts)

    if flag_baselines_tune:

        for recommender_class in collaborative_algorithm_list:
            try:
                runParameterSearch_Collaborative_partial(recommender_class)
            except Exception as e:
                print("On recommender {} Exception {}".format(
                    recommender_class, str(e)))
                traceback.print_exc()

        ###############################################################################################
        ##### Item Content Baselines

        for ICM_name, ICM_object in ICM_dict.items():

            try:

                runParameterSearch_Content(
                    ItemKNNCBFRecommender,
                    URM_train=URM_train,
                    URM_train_last_test=URM_train + URM_validation,
                    metric_to_optimize=metric_to_optimize,
                    evaluator_validation=evaluator_validation,
                    evaluator_test=evaluator_test,
                    output_folder_path=result_folder_path,
                    parallelizeKNN=False,
                    allow_weighting=True,
                    resume_from_saved=True,
                    ICM_name=ICM_name,
                    ICM_object=ICM_object.copy(),
                    n_cases=n_cases,
                    n_random_starts=n_random_starts)

                runParameterSearch_Hybrid(
                    ItemKNN_CFCBF_Hybrid_Recommender,
                    URM_train=URM_train,
                    URM_train_last_test=URM_train + URM_validation,
                    metric_to_optimize=metric_to_optimize,
                    evaluator_validation=evaluator_validation,
                    evaluator_test=evaluator_test,
                    output_folder_path=result_folder_path,
                    parallelizeKNN=False,
                    allow_weighting=True,
                    resume_from_saved=True,
                    ICM_name=ICM_name,
                    ICM_object=ICM_object.copy(),
                    n_cases=n_cases,
                    n_random_starts=n_random_starts)

            except Exception as e:

                print("On CBF recommender for ICM {} Exception {}".format(
                    ICM_name, str(e)))
                traceback.print_exc()

        ################################################################################################
        ###### User Content Baselines

        for UCM_name, UCM_object in UCM_dict.items():

            try:

                runParameterSearch_Content(
                    UserKNNCBFRecommender,
                    URM_train=URM_train,
                    URM_train_last_test=URM_train + URM_validation,
                    metric_to_optimize=metric_to_optimize,
                    evaluator_validation=evaluator_validation,
                    evaluator_test=evaluator_test,
                    output_folder_path=result_folder_path,
                    parallelizeKNN=False,
                    allow_weighting=True,
                    resume_from_saved=True,
                    ICM_name=UCM_name,
                    ICM_object=UCM_object.copy(),
                    n_cases=n_cases,
                    n_random_starts=n_random_starts)

                runParameterSearch_Hybrid(
                    UserKNN_CFCBF_Hybrid_Recommender,
                    URM_train=URM_train,
                    URM_train_last_test=URM_train + URM_validation,
                    metric_to_optimize=metric_to_optimize,
                    evaluator_validation=evaluator_validation,
                    evaluator_test=evaluator_test,
                    output_folder_path=result_folder_path,
                    parallelizeKNN=False,
                    allow_weighting=True,
                    resume_from_saved=True,
                    ICM_name=UCM_name,
                    ICM_object=UCM_object.copy(),
                    n_cases=n_cases,
                    n_random_starts=n_random_starts)

            except Exception as e:

                print("On CBF recommender for UCM {} Exception {}".format(
                    UCM_name, str(e)))
                traceback.print_exc()

    ################################################################################################
    ######
    ######      DL ALGORITHM
    ######

    if flag_DL_article_default:

        model_name = dataset.DATASET_NAME

        earlystopping_hyperparameters = {
            'validation_every_n': 5,
            'stop_on_validation': True,
            'lower_validations_allowed': 5,
            'evaluator_object': evaluator_validation,
            'validation_metric': metric_to_optimize
        }

        if 'tafeng' in dataset_name:
            model_number = 3
            article_hyperparameters = {
                'learning_rate': 0.005,
                'epochs': 100,
                'n_negative_sample': 4,
                'temp_file_folder': None,
                'dataset_name': model_name,
                'number_model': model_number,
                'verbose': 0,
                'plot_model': False,
            }
        else:
            # movielens1m and other dataset
            model_number = 3
            article_hyperparameters = {
                'learning_rate': 0.001,
                'epochs': 100,
                'n_negative_sample': 4,
                'temp_file_folder': None,
                'dataset_name': model_name,
                'number_model': model_number,
                'verbose': 0,
                'plot_model': False,
            }

        parameterSearch = SearchSingleCase(
            DeepCF_RecommenderWrapper,
            evaluator_validation=evaluator_validation,
            evaluator_test=evaluator_test)

        recommender_input_args = SearchInputRecommenderArgs(
            CONSTRUCTOR_POSITIONAL_ARGS=[URM_train],
            FIT_KEYWORD_ARGS=earlystopping_hyperparameters)

        recommender_input_args_last_test = recommender_input_args.copy()
        recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[
            0] = URM_train + URM_validation

        parameterSearch.search(
            recommender_input_args,
            recommender_input_args_last_test=recommender_input_args_last_test,
            fit_hyperparameters_values=article_hyperparameters,
            output_folder_path=result_folder_path,
            resume_from_saved=True,
            output_file_name_root=DeepCF_RecommenderWrapper.RECOMMENDER_NAME)

        if 'tafeng' in dataset_name:
            # tafeng model has a different structure
            model_number = 2
            article_hyperparameters = {
                'learning_rate': 0.005,
                'epochs': 100,
                'n_negative_sample': 4,
                'temp_file_folder': None,
                'dataset_name': "Tafeng",
                'number_model': model_number,
                'verbose': 0,
                'plot_model': False,
            }
        else:
            # movielens1m use this tructure with model 2
            model_number = 2
            article_hyperparameters = {
                'learning_rate': 0.001,
                'epochs': 100,
                'n_negative_sample': 4,
                'temp_file_folder': None,
                'dataset_name': "Movielens1M",
                'number_model': model_number,
                'verbose': 0,
                'plot_model': False,
            }

        parameterSearch = SearchSingleCase(
            CoupledCF_RecommenderWrapper,
            evaluator_validation=evaluator_validation,
            evaluator_test=evaluator_test)

        recommender_input_args = SearchInputRecommenderArgs(
            CONSTRUCTOR_POSITIONAL_ARGS=[
                URM_train, UCM_CoupledCF, ICM_CoupledCF
            ],
            FIT_KEYWORD_ARGS=earlystopping_hyperparameters)

        recommender_input_args_last_test = recommender_input_args.copy()
        recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[
            0] = URM_train + URM_validation

        parameterSearch.search(
            recommender_input_args,
            recommender_input_args_last_test=recommender_input_args_last_test,
            fit_hyperparameters_values=article_hyperparameters,
            output_folder_path=result_folder_path,
            resume_from_saved=True,
            output_file_name_root=CoupledCF_RecommenderWrapper.RECOMMENDER_NAME
        )

    ################################################################################################
    ######
    ######      PRINT RESULTS
    ######

    if flag_print_results:

        n_test_users = np.sum(np.ediff1d(URM_test.indptr) >= 1)
        file_name = "{}..//{}_{}_".format(result_folder_path, ALGORITHM_NAME,
                                          dataset_name)

        result_loader = ResultFolderLoader(
            result_folder_path,
            base_algorithm_list=None,
            other_algorithm_list=[
                DeepCF_RecommenderWrapper, CoupledCF_RecommenderWrapper
            ],
            KNN_similarity_list=KNN_similarity_to_report_list,
            ICM_names_list=ICM_to_report,
            UCM_names_list=UCM_to_report)

        result_loader.generate_latex_results(
            file_name + "{}_latex_results.txt".format("article_metrics"),
            metrics_list=["HIT_RATE", "NDCG"],
            cutoffs_list=[1, 5, 10],
            table_title=None,
            highlight_best=True)

        result_loader.generate_latex_results(
            file_name +
            "{}_latex_results.txt".format("beyond_accuracy_metrics"),
            metrics_list=[
                "DIVERSITY_MEAN_INTER_LIST", "DIVERSITY_HERFINDAHL",
                "COVERAGE_ITEM", "DIVERSITY_GINI", "SHANNON_ENTROPY"
            ],
            cutoffs_list=[5],
            table_title=None,
            highlight_best=True)

        result_loader.generate_latex_results(
            file_name + "{}_latex_results.txt".format("all_metrics"),
            metrics_list=[
                "PRECISION", "RECALL", "MAP_MIN_DEN", "MRR", "NDCG", "F1",
                "HIT_RATE", "ARHR_ALL_HITS", "NOVELTY",
                "DIVERSITY_MEAN_INTER_LIST", "DIVERSITY_HERFINDAHL",
                "COVERAGE_ITEM", "DIVERSITY_GINI", "SHANNON_ENTROPY"
            ],
            cutoffs_list=[5],
            table_title=None,
            highlight_best=True)

        result_loader.generate_latex_time_statistics(
            file_name + "{}_latex_results.txt".format("time"),
            n_evaluation_users=n_test_users,
            table_title=None)
Beispiel #22
0
def runParameterSearch_SpectralCF(recommender_class,
                                  URM_train,
                                  earlystopping_hyperparameters,
                                  output_file_name_root,
                                  URM_train_last_test=None,
                                  n_cases=35,
                                  n_random_starts=5,
                                  evaluator_validation=None,
                                  evaluator_test=None,
                                  metric_to_optimize="RECALL",
                                  output_folder_path="result_experiments/"):

    # If directory does not exist, create
    if not os.path.exists(output_folder_path):
        os.makedirs(output_folder_path)

    parameterSearch = SearchBayesianSkopt(
        recommender_class,
        evaluator_validation=evaluator_validation,
        evaluator_test=evaluator_test)

    ##########################################################################################################

    if recommender_class is SpectralCF_RecommenderWrapper:

        hyperparameters_range_dictionary = {}
        hyperparameters_range_dictionary["batch_size"] = Categorical(
            [128, 256, 512, 1024, 2048])
        hyperparameters_range_dictionary["embedding_size"] = Categorical(
            [4, 8, 16, 32])
        hyperparameters_range_dictionary["decay"] = Real(low=1e-5,
                                                         high=1e-1,
                                                         prior='log-uniform')
        hyperparameters_range_dictionary["learning_rate"] = Real(
            low=1e-5, high=1e-2, prior='log-uniform')
        hyperparameters_range_dictionary["k"] = Integer(low=1, high=6)

        recommender_input_args = SearchInputRecommenderArgs(
            CONSTRUCTOR_POSITIONAL_ARGS=[URM_train],
            CONSTRUCTOR_KEYWORD_ARGS={},
            FIT_POSITIONAL_ARGS=[],
            FIT_KEYWORD_ARGS=earlystopping_hyperparameters)

    #########################################################################################################

    if URM_train_last_test is not None:
        recommender_input_args_last_test = recommender_input_args.copy()
        recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[
            0] = URM_train_last_test
    else:
        recommender_input_args_last_test = None

    parameterSearch.search(
        recommender_input_args,
        parameter_search_space=hyperparameters_range_dictionary,
        n_cases=n_cases,
        n_random_starts=n_random_starts,
        resume_from_saved=True,
        output_folder_path=output_folder_path,
        output_file_name_root=output_file_name_root,
        metric_to_optimize=metric_to_optimize,
        recommender_input_args_last_test=recommender_input_args_last_test)
def read_data_split_and_search(dataset_variant,
                               train_interactions,
                               flag_baselines_tune=False,
                               flag_DL_article_default=False,
                               flag_DL_tune=False,
                               flag_print_results=False):

    # Using dataReader from CollaborativeVAE_our_interface as they use the same data in the same way
    from Conferences.KDD.CollaborativeVAE_our_interface.Citeulike.CiteulikeReader import CiteulikeReader

    result_folder_path = "result_experiments/{}/{}_citeulike_{}_{}/".format(
        CONFERENCE_NAME, ALGORITHM_NAME, dataset_variant, train_interactions)
    result_folder_path_CollaborativeVAE = "result_experiments/{}/{}_citeulike_{}_{}/".format(
        CONFERENCE_NAME, "CollaborativeVAE", dataset_variant,
        train_interactions)

    dataset = CiteulikeReader(result_folder_path_CollaborativeVAE,
                              dataset_variant=dataset_variant,
                              train_interactions=train_interactions)

    URM_train = dataset.URM_DICT["URM_train"].copy()
    URM_validation = dataset.URM_DICT["URM_validation"].copy()
    URM_test = dataset.URM_DICT["URM_test"].copy()

    # Ensure IMPLICIT data
    assert_implicit_data([URM_train, URM_validation, URM_test])

    # Due to the sparsity of the dataset, choosing an evaluation as subset of the train
    # While keepning validation interaction in the train set
    if train_interactions == 1:
        # In this case the train data will contain validation data to avoid cold users
        assert_disjoint_matrices([URM_train, URM_test])
        assert_disjoint_matrices([URM_validation, URM_test])
        exclude_seen_validation = False
        URM_train_last_test = URM_train
    else:
        assert_disjoint_matrices([URM_train, URM_validation, URM_test])
        exclude_seen_validation = True
        URM_train_last_test = URM_train + URM_validation

    assert_implicit_data([URM_train_last_test])

    # If directory does not exist, create
    if not os.path.exists(result_folder_path):
        os.makedirs(result_folder_path)

    from Base.Evaluation.Evaluator import EvaluatorHoldout

    evaluator_validation = EvaluatorHoldout(
        URM_validation,
        cutoff_list=[150],
        exclude_seen=exclude_seen_validation)
    evaluator_test = EvaluatorHoldout(
        URM_test, cutoff_list=[50, 100, 150, 200, 250, 300])

    ################################################################################################
    ######
    ######      DL ALGORITHM
    ######

    if flag_DL_article_default:

        try:

            collaborativeDL_article_hyperparameters = {
                "para_lv": 10,
                "para_lu": 1,
                "para_ln": 1e3,
                "batch_size": 128,
                "epoch_sdae": 200,
                "epoch_dae": 200,
            }

            parameterSearch = SearchSingleCase(
                CollaborativeDL_Matlab_RecommenderWrapper,
                evaluator_validation=evaluator_validation,
                evaluator_test=evaluator_test)

            recommender_input_args = SearchInputRecommenderArgs(
                CONSTRUCTOR_POSITIONAL_ARGS=[
                    URM_train, dataset.ICM_DICT["ICM_tokens_TFIDF"]
                ],
                FIT_KEYWORD_ARGS={})

            recommender_input_args_last_test = recommender_input_args.copy()
            recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[
                0] = URM_train_last_test

            parameterSearch.search(
                recommender_input_args,
                recommender_input_args_last_test=
                recommender_input_args_last_test,
                fit_hyperparameters_values=
                collaborativeDL_article_hyperparameters,
                output_folder_path=result_folder_path,
                resume_from_saved=True,
                output_file_name_root=CollaborativeDL_Matlab_RecommenderWrapper
                .RECOMMENDER_NAME)

        except Exception as e:

            print("On recommender {} Exception {}".format(
                CollaborativeDL_Matlab_RecommenderWrapper, str(e)))
            traceback.print_exc()

    ################################################################################################
    ######
    ######      PRINT RESULTS
    ######

    if flag_print_results:

        n_test_users = np.sum(np.ediff1d(URM_test.indptr) >= 1)
        ICM_names_to_report_list = list(dataset.ICM_DICT.keys())
        dataset_name = "{}_{}".format(dataset_variant, train_interactions)
        file_name = "{}..//{}_{}_".format(result_folder_path, ALGORITHM_NAME,
                                          dataset_name)

        result_loader = ResultFolderLoader(
            result_folder_path,
            base_algorithm_list=None,
            other_algorithm_list=[CollaborativeDL_Matlab_RecommenderWrapper],
            KNN_similarity_list=KNN_similarity_to_report_list,
            ICM_names_list=ICM_names_to_report_list,
            UCM_names_list=None)

        result_loader.generate_latex_results(
            file_name + "{}_latex_results.txt".format("article_metrics"),
            metrics_list=["RECALL"],
            cutoffs_list=[50, 100, 150, 200, 250, 300],
            table_title=None,
            highlight_best=True)

        result_loader.generate_latex_results(
            file_name + "{}_latex_results.txt".format("all_metrics"),
            metrics_list=[
                "PRECISION", "RECALL", "MAP_MIN_DEN", "MRR", "NDCG", "F1",
                "HIT_RATE", "ARHR_ALL_HITS", "NOVELTY",
                "DIVERSITY_MEAN_INTER_LIST", "DIVERSITY_HERFINDAHL",
                "COVERAGE_ITEM", "DIVERSITY_GINI", "SHANNON_ENTROPY"
            ],
            cutoffs_list=[150],
            table_title=None,
            highlight_best=True)

        result_loader.generate_latex_time_statistics(
            file_name + "{}_latex_results.txt".format("time"),
            n_evaluation_users=n_test_users,
            table_title=None)
Beispiel #24
0
# hyperparameters_range_dictionary["normalize"] = Categorical([True, False])
# hyperparameters_range_dictionary["tversky_alpha"] = Real(0, 1)
# hyperparameters_range_dictionary["tversky_beta"] = Real(0, 1)

# hyperparameters_range_dictionary = {}
# hyperparameters_range_dictionary["topK"] = Integer(5, 2000)
# hyperparameters_range_dictionary["add_zeros_quota"] = Real(low = 0, high = 1, prior = 'uniform')
# hyperparameters_range_dictionary["normalize_similarity"] = Categorical([True, False])


# ucm_w = sps.load_npz('Data/ucm_weighted.npz')
# ucm_age, ucm_region, ucm_all = Data.get_ucm()

recommender_input_args = SearchInputRecommenderArgs(
    CONSTRUCTOR_POSITIONAL_ARGS=[urm_train],
    CONSTRUCTOR_KEYWORD_ARGS={},
    FIT_POSITIONAL_ARGS=[],
    FIT_KEYWORD_ARGS={}
)

output_folder_path = "result_experiments/"

import os

# If directory does not exist, create
if not os.path.exists(output_folder_path):
    os.makedirs(output_folder_path)

n_cases = 300
metric_to_optimize = "MAP"
parameterSearch.search(recommender_input_args,
                       parameter_search_space = hyperparameters_range_dictionary,
def runParameterSearch_cold_user_MF(recommender_class, URM_train, URM_train_last_test = None, metric_to_optimize = "PRECISION",
                                     evaluator_validation = None, evaluator_test = None, evaluator_validation_earlystopping = None,
                                     output_folder_path ="result_experiments/",
                                     n_cases = 35, n_random_starts = 5, resume_from_saved = True):




    # If directory does not exist, create
    if not os.path.exists(output_folder_path):
        os.makedirs(output_folder_path)

    earlystopping_keywargs = {"validation_every_n": 5,
                              "stop_on_validation": True,
                              "evaluator_object": evaluator_validation_earlystopping,
                              "lower_validations_allowed": 5,
                              "validation_metric": metric_to_optimize,
                              }

    URM_train = URM_train.copy()

    if URM_train_last_test is not None:
        URM_train_last_test = URM_train_last_test.copy()

    try:

        output_file_name_root = recommender_class.RECOMMENDER_NAME


        ##########################################################################################################

        if recommender_class is MatrixFactorization_FunkSVD_Cython:

            hyperparameters_range_dictionary = {}
            hyperparameters_range_dictionary["sgd_mode"] = Categorical(["sgd", "adagrad", "adam"])
            hyperparameters_range_dictionary["epochs"] = Categorical([500])
            hyperparameters_range_dictionary["use_bias"] = Categorical([True, False])
            hyperparameters_range_dictionary["batch_size"] = Categorical([1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024])
            hyperparameters_range_dictionary["num_factors"] = Integer(1, 200)
            hyperparameters_range_dictionary["item_reg"] = Real(low = 1e-5, high = 1e-2, prior = 'log-uniform')
            hyperparameters_range_dictionary["user_reg"] = Real(low = 1e-5, high = 1e-2, prior = 'log-uniform')
            hyperparameters_range_dictionary["learning_rate"] = Real(low = 1e-4, high = 1e-1, prior = 'log-uniform')
            hyperparameters_range_dictionary["negative_interactions_quota"] = Real(low = 0.0, high = 0.5, prior = 'uniform')

            recommender_input_args = SearchInputRecommenderArgs(
                CONSTRUCTOR_POSITIONAL_ARGS = [recommender_class, URM_train],
                CONSTRUCTOR_KEYWORD_ARGS = {},
                FIT_POSITIONAL_ARGS = [],
                FIT_KEYWORD_ARGS = earlystopping_keywargs
            )

        ##########################################################################################################

        if recommender_class is MatrixFactorization_AsySVD_Cython:

            hyperparameters_range_dictionary = {}
            hyperparameters_range_dictionary["sgd_mode"] = Categorical(["sgd", "adagrad", "adam"])
            hyperparameters_range_dictionary["epochs"] = Categorical([500])
            hyperparameters_range_dictionary["use_bias"] = Categorical([True, False])
            hyperparameters_range_dictionary["batch_size"] = Categorical([1])
            hyperparameters_range_dictionary["num_factors"] = Integer(1, 200)
            hyperparameters_range_dictionary["item_reg"] = Real(low = 1e-5, high = 1e-2, prior = 'log-uniform')
            hyperparameters_range_dictionary["user_reg"] = Real(low = 1e-5, high = 1e-2, prior = 'log-uniform')
            hyperparameters_range_dictionary["learning_rate"] = Real(low = 1e-4, high = 1e-1, prior = 'log-uniform')
            hyperparameters_range_dictionary["negative_interactions_quota"] = Real(low = 0.0, high = 0.5, prior = 'uniform')

            recommender_input_args = SearchInputRecommenderArgs(
                CONSTRUCTOR_POSITIONAL_ARGS = [recommender_class, URM_train],
                CONSTRUCTOR_KEYWORD_ARGS = {},
                FIT_POSITIONAL_ARGS = [],
                FIT_KEYWORD_ARGS = earlystopping_keywargs
            )

        ##########################################################################################################

        if recommender_class is MatrixFactorization_BPR_Cython:

            hyperparameters_range_dictionary = {}
            hyperparameters_range_dictionary["sgd_mode"] = Categorical(["sgd", "adagrad", "adam"])
            hyperparameters_range_dictionary["epochs"] = Categorical([1500])
            hyperparameters_range_dictionary["num_factors"] = Integer(1, 200)
            hyperparameters_range_dictionary["batch_size"] = Categorical([1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024])
            hyperparameters_range_dictionary["positive_reg"] = Real(low = 1e-5, high = 1e-2, prior = 'log-uniform')
            hyperparameters_range_dictionary["negative_reg"] = Real(low = 1e-5, high = 1e-2, prior = 'log-uniform')
            hyperparameters_range_dictionary["learning_rate"] = Real(low = 1e-4, high = 1e-1, prior = 'log-uniform')

            recommender_input_args = SearchInputRecommenderArgs(
                CONSTRUCTOR_POSITIONAL_ARGS = [recommender_class, URM_train],
                CONSTRUCTOR_KEYWORD_ARGS = {},
                FIT_POSITIONAL_ARGS = [],
                FIT_KEYWORD_ARGS = {**earlystopping_keywargs,
                                    "positive_threshold_BPR": None}
            )

        ##########################################################################################################

        if recommender_class is IALSRecommender:

            hyperparameters_range_dictionary = {}
            hyperparameters_range_dictionary["num_factors"] = Integer(1, 200)
            hyperparameters_range_dictionary["confidence_scaling"] = Categorical(["linear", "log"])
            hyperparameters_range_dictionary["alpha"] = Real(low = 1e-3, high = 50.0, prior = 'log-uniform')
            hyperparameters_range_dictionary["epsilon"] = Real(low = 1e-3, high = 10.0, prior = 'log-uniform')
            hyperparameters_range_dictionary["reg"] = Real(low = 1e-5, high = 1e-2, prior = 'log-uniform')

            recommender_input_args = SearchInputRecommenderArgs(
                CONSTRUCTOR_POSITIONAL_ARGS = [recommender_class, URM_train],
                CONSTRUCTOR_KEYWORD_ARGS = {},
                FIT_POSITIONAL_ARGS = [],
                FIT_KEYWORD_ARGS = earlystopping_keywargs
            )


        ##########################################################################################################

        if recommender_class is PureSVDRecommender:

            hyperparameters_range_dictionary = {}
            hyperparameters_range_dictionary["num_factors"] = Integer(1, 350)

            recommender_input_args = SearchInputRecommenderArgs(
                CONSTRUCTOR_POSITIONAL_ARGS = [recommender_class, URM_train],
                CONSTRUCTOR_KEYWORD_ARGS = {},
                FIT_POSITIONAL_ARGS = [],
                FIT_KEYWORD_ARGS = {}
            )


        ##########################################################################################################

        if recommender_class is NMFRecommender:

            hyperparameters_range_dictionary = {}
            hyperparameters_range_dictionary["num_factors"] = Integer(1, 350)
            hyperparameters_range_dictionary["solver"] = Categorical(["coordinate_descent", "multiplicative_update"])
            hyperparameters_range_dictionary["init_type"] = Categorical(["random", "nndsvda"])
            hyperparameters_range_dictionary["beta_loss"] = Categorical(["frobenius", "kullback-leibler"])

            recommender_input_args = SearchInputRecommenderArgs(
                CONSTRUCTOR_POSITIONAL_ARGS = [recommender_class, URM_train],
                CONSTRUCTOR_KEYWORD_ARGS = {},
                FIT_POSITIONAL_ARGS = [],
                FIT_KEYWORD_ARGS = {}
            )


        #########################################################################################################

        if URM_train_last_test is not None:
            recommender_input_args_last_test = recommender_input_args.copy()
            recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[1] = URM_train_last_test
        else:
            recommender_input_args_last_test = None


        parameterSearch = SearchBayesianSkopt(MF_cold_user_wrapper, evaluator_validation=evaluator_validation, evaluator_test=evaluator_test)

        hyperparameters_range_dictionary["estimate_model_for_cold_users"] = Categorical(["itemKNN", "mean_item_factors"])
        hyperparameters_range_dictionary["estimate_model_for_cold_users_topK"] = Integer(5, 1000)

        ## Final step, after the hyperparameter range has been defined for each type of algorithm
        parameterSearch.search(recommender_input_args,
                               parameter_search_space = hyperparameters_range_dictionary,
                               n_cases = n_cases,
                               n_random_starts = n_random_starts,
                               output_folder_path = output_folder_path,
                               output_file_name_root = output_file_name_root,
                               metric_to_optimize = metric_to_optimize,
                               resume_from_saved = resume_from_saved,
                               recommender_input_args_last_test = recommender_input_args_last_test)




    except Exception as e:

        print("On recommender {} Exception {}".format(recommender_class, str(e)))
        traceback.print_exc()

        error_file = open(output_folder_path + "ErrorLog.txt", "a")
        error_file.write("On recommender {} Exception {}\n".format(recommender_class, str(e)))
        error_file.close()
    ###############################
    ###############################         EVALUATION ABLATION EXPERIMENT
    ###############################
    ################################################################################################################################################

    if input_flags.run_eval_ablation:

        for permutation_index in range(input_flags.n_permutations):

            # Run evaluation of the full map fitted model with the different interaction map modes
            for map_mode in ["all_map", "main_diagonal", "off_diagonal"]:

                input_folder_path = os.path.join(output_folder_path, "fit_ablation_{}/{}_{}/".format("all_map", "all_map", permutation_index))
                result_folder_path = os.path.join(output_folder_path, "evaluation_ablation_{}/{}_{}/".format(map_mode, map_mode, permutation_index))

                recommender_input_args_last_test = SearchInputRecommenderArgs(CONSTRUCTOR_POSITIONAL_ARGS=[URM_train_full, dataset.CFM_data_class_full])

                run_evaluation_ablation(recommender_class = CFM_wrapper,
                                        recommender_input_args = recommender_input_args_last_test,
                                        evaluator_test = evaluator_test,
                                        input_folder_path = input_folder_path,
                                        result_folder_path = result_folder_path,
                                        map_mode = map_mode)


        read_permutation_results(output_folder_path, input_flags.n_permutations, 10,
                                 ["NDCG", "HIT_RATE"],
                                 file_result_name_root = "latex_evaluation_ablation_results",
                                 convolution_model_name = CFM_wrapper.RECOMMENDER_NAME,
                                 pretrained_model_name = 'FM',
                                 pretrained_model_class = FM_Wrapper,
def read_data_split_and_search(dataset_name,
                                   flag_baselines_tune = False,
                                   flag_DL_article_default = False, flag_MF_baselines_tune = False, flag_DL_tune = False,
                                   flag_print_results = False):


    from Conferences.WWW.MultiVAE_our_interface.Movielens20M.Movielens20MReader import Movielens20MReader
    from Conferences.WWW.MultiVAE_our_interface.NetflixPrize.NetflixPrizeReader import NetflixPrizeReader

    split_type = "cold_user"

    result_folder_path = "result_experiments/{}/{}_{}_{}/".format(CONFERENCE_NAME, ALGORITHM_NAME, dataset_name, split_type)


    if dataset_name == "movielens20m":
        dataset = Movielens20MReader(result_folder_path, split_type = split_type)

    elif dataset_name == "netflixPrize":
        dataset = NetflixPrizeReader(result_folder_path)

    # If directory does not exist, create
    if not os.path.exists(result_folder_path):
        os.makedirs(result_folder_path)


    metric_to_optimize = "NDCG"
    n_cases = 50
    n_random_starts = 15


    if split_type == "cold_user":


        collaborative_algorithm_list = [
            Random,
            TopPop,
            # UserKNNCFRecommender,
            ItemKNNCFRecommender,
            P3alphaRecommender,
            RP3betaRecommender,
            # PureSVDRecommender,
            # IALSRecommender,
            # NMFRecommender,
            # MatrixFactorization_BPR_Cython,
            # MatrixFactorization_FunkSVD_Cython,
            EASE_R_Recommender,
            SLIM_BPR_Cython,
            SLIMElasticNetRecommender,
        ]


        URM_train = dataset.URM_DICT["URM_train"].copy()
        URM_train_all = dataset.URM_DICT["URM_train_all"].copy()
        URM_validation = dataset.URM_DICT["URM_validation"].copy()
        URM_test = dataset.URM_DICT["URM_test"].copy()


        # Ensure IMPLICIT data and DISJOINT sets
        assert_implicit_data([URM_train, URM_train_all, URM_validation, URM_test])
        assert_disjoint_matrices([URM_train, URM_validation, URM_test])
        assert_disjoint_matrices([URM_train_all, URM_validation, URM_test])


        from Base.Evaluation.Evaluator import EvaluatorHoldout

        evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[100])
        evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[20, 50, 100])

        evaluator_validation = EvaluatorUserSubsetWrapper(evaluator_validation, URM_train_all)
        evaluator_test = EvaluatorUserSubsetWrapper(evaluator_test, URM_train_all)



    runParameterSearch_Collaborative_partial = partial(runParameterSearch_Collaborative,
                                                       URM_train = URM_train,
                                                       URM_train_last_test = URM_train + URM_validation,
                                                       metric_to_optimize = metric_to_optimize,
                                                       evaluator_validation_earlystopping = evaluator_validation,
                                                       evaluator_validation = evaluator_validation,
                                                       evaluator_test = evaluator_test,
                                                       output_folder_path = result_folder_path,
                                                       parallelizeKNN = False,
                                                       allow_weighting = True,
                                                       resume_from_saved = True,
                                                       n_cases = n_cases,
                                                       n_random_starts = n_random_starts)



    if flag_baselines_tune:

        for recommender_class in collaborative_algorithm_list:
            try:
                runParameterSearch_Collaborative_partial(recommender_class)
            except Exception as e:
                print("On recommender {} Exception {}".format(recommender_class, str(e)))
                traceback.print_exc()



    ################################################################################################
    ###### Matrix Factorization Cold users

    collaborative_MF_algorithm_list = [
        PureSVDRecommender,
        IALSRecommender,
        NMFRecommender,
        MatrixFactorization_BPR_Cython,
        MatrixFactorization_FunkSVD_Cython,
    ]


    runParameterSearch_cold_user_MF_partial = partial(runParameterSearch_cold_user_MF,
                                                       URM_train = URM_train,
                                                       URM_train_last_test = URM_train + URM_validation,
                                                       metric_to_optimize = metric_to_optimize,
                                                       evaluator_validation_earlystopping = evaluator_validation,
                                                       evaluator_validation = evaluator_validation,
                                                       evaluator_test = evaluator_test,
                                                       output_folder_path = result_folder_path,
                                                       resume_from_saved = True,
                                                       n_cases = n_cases,
                                                       n_random_starts = n_random_starts)


    if flag_MF_baselines_tune:

        for recommender_class in collaborative_MF_algorithm_list:

            try:
                runParameterSearch_cold_user_MF_partial(recommender_class)

            except Exception as e:

                print("On recommender {} Exception {}".format(recommender_class, str(e)))
                traceback.print_exc()



    ################################################################################################
    ######
    ######      DL ALGORITHM
    ######

    if flag_DL_article_default:

        try:


            if dataset_name == "movielens20m":
                epochs = 100

            elif dataset_name == "netflixPrize":
                epochs = 200


            multiVAE_article_hyperparameters = {
                "epochs": epochs,
                "batch_size": 500,
                "total_anneal_steps": 200000,
                "p_dims": None,
            }

            multiVAE_earlystopping_hyperparameters = {
                "validation_every_n": 5,
                "stop_on_validation": True,
                "evaluator_object": evaluator_validation,
                "lower_validations_allowed": 5,
                "validation_metric": metric_to_optimize,
            }


            parameterSearch = SearchSingleCase(Mult_VAE_RecommenderWrapper,
                                               evaluator_validation=evaluator_validation,
                                               evaluator_test=evaluator_test)

            recommender_input_args = SearchInputRecommenderArgs(
                                                CONSTRUCTOR_POSITIONAL_ARGS = [URM_train],
                                                FIT_KEYWORD_ARGS = multiVAE_earlystopping_hyperparameters)

            recommender_input_args_last_test = recommender_input_args.copy()
            recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[0] = URM_train + URM_validation

            parameterSearch.search(recommender_input_args,
                                   recommender_input_args_last_test = recommender_input_args_last_test,
                                   fit_hyperparameters_values=multiVAE_article_hyperparameters,
                                   output_folder_path = result_folder_path,
                                   resume_from_saved = True,
                                   output_file_name_root = Mult_VAE_RecommenderWrapper.RECOMMENDER_NAME)



        except Exception as e:

            print("On recommender {} Exception {}".format(Mult_VAE_RecommenderWrapper, str(e)))
            traceback.print_exc()


    ################################################################################################
    ######
    ######      PRINT RESULTS
    ######

    if flag_print_results:

        n_test_users = np.sum(np.ediff1d(URM_test.indptr)>=1)
        file_name = "{}..//{}_{}_".format(result_folder_path, ALGORITHM_NAME, dataset_name)

        result_loader = ResultFolderLoader(result_folder_path,
                                         base_algorithm_list = None,
                                         other_algorithm_list = [Mult_VAE_RecommenderWrapper],
                                         KNN_similarity_list = KNN_similarity_to_report_list,
                                         ICM_names_list = None,
                                         UCM_names_list = None)


        result_loader.generate_latex_results(file_name + "{}_latex_results.txt".format("article_metrics"),
                                           metrics_list = ["RECALL", "NDCG"],
                                           cutoffs_list = [20, 50, 100],
                                           table_title = None,
                                           highlight_best = True)

        result_loader.generate_latex_results(file_name + "{}_latex_results.txt".format("all_metrics"),
                                           metrics_list = ["PRECISION", "RECALL", "MAP_MIN_DEN", "MRR", "NDCG", "F1", "HIT_RATE", "ARHR_ALL_HITS",
                                                           "NOVELTY", "DIVERSITY_MEAN_INTER_LIST", "DIVERSITY_HERFINDAHL", "COVERAGE_ITEM", "DIVERSITY_GINI", "SHANNON_ENTROPY"],
                                           cutoffs_list = [50],
                                           table_title = None,
                                           highlight_best = True)

        result_loader.generate_latex_time_statistics(file_name + "{}_latex_results.txt".format("time"),
                                           n_evaluation_users=n_test_users,
                                           table_title = None)
Beispiel #28
0
def read_data_split_and_search(dataset_name,
                               flag_baselines_tune=False,
                               flag_DL_article_default=False,
                               flag_DL_tune=False,
                               flag_print_results=False):
    result_folder_path = "result_experiments/{}/{}_{}/".format(
        CONFERENCE_NAME, ALGORITHM_NAME, dataset_name)

    if dataset_name == "delicious-hetrec2011":
        dataset = DeliciousHetrec2011Reader(result_folder_path)

    elif dataset_name == "delicious-hetrec2011-cold-users":
        dataset = DeliciousHetrec2011ColdUsersReader(result_folder_path)

    elif dataset_name == "delicious-hetrec2011-cold-items":
        dataset = DeliciousHetrec2011ColdItemsReader(result_folder_path)

    elif dataset_name == "lastfm-hetrec2011":
        dataset = LastFMHetrec2011Reader(result_folder_path)

    elif dataset_name == "lastfm-hetrec2011-cold-users":
        dataset = LastFMHetrec2011ColdUsersReader(result_folder_path)

    elif dataset_name == "lastfm-hetrec2011-cold-items":
        dataset = LastFMHetrec2011ColdItemsReader(result_folder_path)

    else:
        print("Dataset name not supported, current is {}".format(dataset_name))
        return

    print('Current dataset is: {}'.format(dataset_name))

    URM_train = dataset.URM_DICT["URM_train"].copy()
    URM_validation = dataset.URM_DICT["URM_validation"].copy()
    URM_test = dataset.URM_DICT["URM_test"].copy()
    URM_negative = dataset.URM_DICT["URM_negative"].copy()
    UCM_train = dataset.UCM_DICT["UCM"].copy()
    ICM_train = dataset.ICM_DICT["ICM"].copy()

    if dataset_name == "delicious-hetrec2011" or dataset_name == "lastfm-hetrec2011":
        URM_train_last_test = URM_train + URM_validation

        # Ensure IMPLICIT data and disjoint test-train split
        assert_implicit_data([URM_train, URM_validation, URM_test])
        assert_disjoint_matrices([URM_train, URM_validation, URM_test])
    else:
        URM_train_last_test = URM_train

        # Ensure IMPLICIT data and disjoint test-train split
        assert_implicit_data([URM_train, URM_test])
        assert_disjoint_matrices([URM_train, URM_test])

    # If directory does not exist, create
    if not os.path.exists(result_folder_path):
        os.makedirs(result_folder_path)

    metric_to_optimize = "MAP"
    cutoff_list_validation = [5, 10, 20]
    cutoff_list_test = [5, 10, 20]

    n_cases = 50
    n_random_starts = 15

    evaluator_validation = EvaluatorNegativeItemSample(
        URM_validation, URM_negative, cutoff_list=cutoff_list_validation)
    evaluator_test = EvaluatorNegativeItemSample(URM_test,
                                                 URM_negative,
                                                 cutoff_list=cutoff_list_test)

    ################################################################################################
    ######
    ######      DL ALGORITHM
    ######

    if flag_DL_article_default:
        article_hyperparameters = {
            "pretrain_samples": 3,
            "pretrain_batch_size": 200,
            "pretrain_iterations": 5,
            "embed_len": 128,
            "topK": 10,
            "fliter_theta": 16,
            "aggre_theta": 64,
            "batch_size": 400,
            "samples": 3,
            "margin": 20,
            "epochs": 30,
            "iter_without_att": 5,
            "directed": False,
        }

        # Do not modify earlystopping
        earlystopping_hyperparameters = {
            "validation_every_n": 5,
            "stop_on_validation": False,
            "lower_validations_allowed": 5,
            "evaluator_object": evaluator_validation,
            "validation_metric": metric_to_optimize,
        }

        # This is a simple version of the tuning code that is reported below and uses SearchSingleCase
        # You may use this for a simpler testing
        # recommender_instance = HERSWrapper(URM_train, UCM_train, ICM_train)
        #
        # recommender_instance.fit(**article_hyperparameters,
        #                          **earlystopping_hyperparameters)
        #
        # evaluator_test.evaluateRecommender(recommender_instance)

        # Fit the DL model, select the optimal number of epochs and save the result
        parameterSearch = SearchSingleCase(
            HERSWrapper,
            evaluator_validation=evaluator_validation,
            evaluator_test=evaluator_test)

        recommender_input_args = SearchInputRecommenderArgs(
            CONSTRUCTOR_POSITIONAL_ARGS=[URM_train, UCM_train, ICM_train],
            FIT_KEYWORD_ARGS=earlystopping_hyperparameters)

        if dataset_name == "delicious-hetrec2011" or dataset_name == "lastfm-hetrec2011":
            recommender_input_args_last_test = recommender_input_args.copy()
            recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[
                0] = URM_train_last_test

            parameterSearch.search(
                recommender_input_args,
                recommender_input_args_last_test=
                recommender_input_args_last_test,
                fit_hyperparameters_values=article_hyperparameters,
                output_folder_path=result_folder_path,
                output_file_name_root=HERSWrapper.RECOMMENDER_NAME)
        else:
            parameterSearch.search(
                recommender_input_args,
                fit_hyperparameters_values=article_hyperparameters,
                output_folder_path=result_folder_path,
                output_file_name_root=HERSWrapper.RECOMMENDER_NAME)

    ################################################################################################
    ######
    ######      BASELINE ALGORITHMS - Nothing should be modified below this point
    ######

    if flag_baselines_tune:

        ################################################################################################
        ###### Collaborative Baselines

        collaborative_algorithm_list = [
            Random,
            TopPop,
            ItemKNNCFRecommender,
            PureSVDRecommender,
            SLIM_BPR_Cython,
        ]

        # Running hyperparameter tuning of baslines
        # See if the results are reasonable and comparable to baselines reported in the paper
        runParameterSearch_Collaborative_partial = partial(
            runParameterSearch_Collaborative,
            URM_train=URM_train,
            URM_train_last_test=URM_train_last_test,
            metric_to_optimize=metric_to_optimize,
            evaluator_validation_earlystopping=evaluator_validation,
            evaluator_validation=evaluator_validation,
            evaluator_test=evaluator_test,
            output_folder_path=result_folder_path,
            resume_from_saved=True,
            parallelizeKNN=False,
            allow_weighting=True,
            n_cases=n_cases,
            n_random_starts=n_random_starts)

        for recommender_class in collaborative_algorithm_list:
            try:
                runParameterSearch_Collaborative_partial(recommender_class)
            except Exception as e:
                print("On recommender {} Exception {}".format(
                    recommender_class, str(e)))
                traceback.print_exc()

        ################################################################################################
        ###### Content Baselines

        for ICM_name, ICM_object in dataset.ICM_DICT.items():

            try:

                runParameterSearch_Content(
                    ItemKNNCBFRecommender,
                    URM_train=URM_train,
                    URM_train_last_test=URM_train_last_test,
                    metric_to_optimize=metric_to_optimize,
                    evaluator_validation=evaluator_validation,
                    evaluator_test=evaluator_test,
                    output_folder_path=result_folder_path,
                    parallelizeKNN=False,
                    allow_weighting=True,
                    ICM_name=ICM_name,
                    ICM_object=ICM_object.copy(),
                    n_cases=n_cases,
                    n_random_starts=n_random_starts)

            except Exception as e:

                print("On CBF recommender for ICM {} Exception {}".format(
                    ICM_name, str(e)))
                traceback.print_exc()

        ################################################################################################
        ###### Hybrid

        for ICM_name, ICM_object in dataset.ICM_DICT.items():

            try:

                runParameterSearch_Hybrid(
                    ItemKNN_CFCBF_Hybrid_Recommender,
                    URM_train=URM_train,
                    URM_train_last_test=URM_train_last_test,
                    metric_to_optimize=metric_to_optimize,
                    evaluator_validation=evaluator_validation,
                    evaluator_test=evaluator_test,
                    output_folder_path=result_folder_path,
                    parallelizeKNN=False,
                    allow_weighting=True,
                    ICM_name=ICM_name,
                    ICM_object=ICM_object.copy(),
                    n_cases=n_cases,
                    n_random_starts=n_random_starts)

            except Exception as e:

                print("On recommender {} Exception {}".format(
                    ItemKNN_CFCBF_Hybrid_Recommender, str(e)))
                traceback.print_exc()

    ################################################################################################
    ######
    ######      PRINT RESULTS
    ######

    if flag_print_results:
        n_validation_users = np.sum(np.ediff1d(URM_test.indptr) >= 1)
        n_test_users = np.sum(np.ediff1d(URM_test.indptr) >= 1)

        print_time_statistics_latex_table(
            result_folder_path=result_folder_path,
            dataset_name=dataset_name,
            algorithm_name=ALGORITHM_NAME,
            other_algorithm_list=[HERSWrapper],
            KNN_similarity_to_report_list=KNN_similarity_to_report_list,
            n_validation_users=n_validation_users,
            n_test_users=n_test_users,
            n_decimals=2)

        print_results_latex_table(
            result_folder_path=result_folder_path,
            algorithm_name=ALGORITHM_NAME,
            file_name_suffix="article_metrics_",
            dataset_name=dataset_name,
            metrics_to_report_list=["HIT_RATE", "NDCG"],
            cutoffs_to_report_list=cutoff_list_test,
            other_algorithm_list=[HERSWrapper],
            KNN_similarity_to_report_list=KNN_similarity_to_report_list)

        print_results_latex_table(
            result_folder_path=result_folder_path,
            algorithm_name=ALGORITHM_NAME,
            file_name_suffix="all_metrics_",
            dataset_name=dataset_name,
            metrics_to_report_list=[
                "PRECISION", "RECALL", "MAP", "MRR", "NDCG", "F1", "HIT_RATE",
                "ARHR", "NOVELTY", "DIVERSITY_MEAN_INTER_LIST",
                "DIVERSITY_HERFINDAHL", "COVERAGE_ITEM", "DIVERSITY_GINI",
                "SHANNON_ENTROPY"
            ],
            cutoffs_to_report_list=cutoff_list_validation,
            other_algorithm_list=[HERSWrapper],
            KNN_similarity_to_report_list=KNN_similarity_to_report_list)
Beispiel #29
0
def read_data_split_and_search(dataset_name,
                               flag_baselines_tune=False,
                               flag_DL_article_default=False,
                               flag_DL_tune=False,
                               flag_print_results=False):

    from Conferences.KDD.MCRec_our_interface.Movielens100K.Movielens100KReader import Movielens100KReader

    result_folder_path = "result_experiments/{}/{}_{}/".format(
        CONFERENCE_NAME, ALGORITHM_NAME, dataset_name)

    if dataset_name == "movielens100k":
        dataset = Movielens100KReader(result_folder_path)

    URM_train = dataset.URM_DICT["URM_train"].copy()
    URM_validation = dataset.URM_DICT["URM_validation"].copy()
    URM_test = dataset.URM_DICT["URM_test"].copy()
    URM_test_negative = dataset.URM_DICT["URM_test_negative"].copy()

    # Ensure IMPLICIT data and DISJOINT sets
    assert_implicit_data(
        [URM_train, URM_validation, URM_test, URM_test_negative])
    assert_disjoint_matrices(
        [URM_train, URM_validation, URM_test, URM_test_negative])

    # If directory does not exist, create
    if not os.path.exists(result_folder_path):
        os.makedirs(result_folder_path)

    algorithm_dataset_string = "{}_{}_".format(ALGORITHM_NAME, dataset_name)

    plot_popularity_bias([URM_train + URM_validation, URM_test],
                         ["URM train", "URM test"], result_folder_path +
                         algorithm_dataset_string + "popularity_plot")

    save_popularity_statistics([URM_train + URM_validation, URM_test],
                               ["URM train", "URM test"],
                               result_folder_path + algorithm_dataset_string +
                               "popularity_statistics")

    from Base.Evaluation.Evaluator import EvaluatorNegativeItemSample

    evaluator_validation = EvaluatorNegativeItemSample(URM_validation,
                                                       URM_test_negative,
                                                       cutoff_list=[10])
    evaluator_test = EvaluatorNegativeItemSample(URM_test,
                                                 URM_test_negative,
                                                 cutoff_list=[10])

    collaborative_algorithm_list = [
        Random,
        TopPop,
        UserKNNCFRecommender,
        ItemKNNCFRecommender,
        P3alphaRecommender,
        RP3betaRecommender,
        PureSVDRecommender,
        NMFRecommender,
        IALSRecommender,
        MatrixFactorization_BPR_Cython,
        MatrixFactorization_FunkSVD_Cython,
        EASE_R_Recommender,
        SLIM_BPR_Cython,
        SLIMElasticNetRecommender,
    ]

    metric_to_optimize = "PRECISION"
    n_cases = 50
    n_random_starts = 15

    runParameterSearch_Collaborative_partial = partial(
        runParameterSearch_Collaborative,
        URM_train=URM_train,
        URM_train_last_test=URM_train + URM_validation,
        metric_to_optimize=metric_to_optimize,
        evaluator_validation_earlystopping=evaluator_validation,
        evaluator_validation=evaluator_validation,
        evaluator_test=evaluator_test,
        output_folder_path=result_folder_path,
        parallelizeKNN=False,
        allow_weighting=True,
        resume_from_saved=True,
        n_cases=n_cases,
        n_random_starts=n_random_starts)

    if flag_baselines_tune:

        for recommender_class in collaborative_algorithm_list:
            try:
                runParameterSearch_Collaborative_partial(recommender_class)
            except Exception as e:
                print("On recommender {} Exception {}".format(
                    recommender_class, str(e)))
                traceback.print_exc()

        ################################################################################################
        ###### Content Baselines

        for ICM_name, ICM_object in dataset.ICM_DICT.items():

            try:

                runParameterSearch_Content(
                    ItemKNNCBFRecommender,
                    URM_train=URM_train,
                    URM_train_last_test=URM_train + URM_validation,
                    metric_to_optimize=metric_to_optimize,
                    evaluator_validation=evaluator_validation,
                    evaluator_test=evaluator_test,
                    output_folder_path=result_folder_path,
                    parallelizeKNN=False,
                    allow_weighting=True,
                    resume_from_saved=True,
                    ICM_name=ICM_name,
                    ICM_object=ICM_object.copy(),
                    n_cases=n_cases,
                    n_random_starts=n_random_starts)

            except Exception as e:

                print("On CBF recommender for ICM {} Exception {}".format(
                    ICM_name, str(e)))
                traceback.print_exc()

        ################################################################################################
        ###### Hybrid

        for ICM_name, ICM_object in dataset.ICM_DICT.items():

            try:

                runParameterSearch_Hybrid(
                    ItemKNN_CFCBF_Hybrid_Recommender,
                    URM_train=URM_train,
                    URM_train_last_test=URM_train + URM_validation,
                    metric_to_optimize=metric_to_optimize,
                    evaluator_validation=evaluator_validation,
                    evaluator_test=evaluator_test,
                    output_folder_path=result_folder_path,
                    parallelizeKNN=False,
                    allow_weighting=True,
                    resume_from_saved=True,
                    ICM_name=ICM_name,
                    ICM_object=ICM_object.copy(),
                    n_cases=n_cases,
                    n_random_starts=n_random_starts)

            except Exception as e:

                print("On recommender {} Exception {}".format(
                    ItemKNN_CFCBF_Hybrid_Recommender, str(e)))
                traceback.print_exc()

    ################################################################################################
    ######
    ######      DL ALGORITHM
    ######

    if flag_DL_article_default:

        if dataset_name == "movielens100k":
            """
            The code provided by the original authors of MCRec can be used only for the original data.
            Here I am passing to the Wrapper the URM_train matrix that is only required for its shape,
            the train will be done using the preprocessed data the original authors provided
            """
            from Conferences.KDD.MCRec_github.code.Dataset import Dataset

            original_dataset_reader = Dataset(
                'Conferences/KDD/MCRec_github/data/' + 'ml-100k')

            MCRec_article_hyperparameters = {
                "epochs": 200,
                "latent_dim": 128,
                "reg_latent": 0,
                "layers": [512, 256, 128, 64],
                "reg_layes": [0, 0, 0, 0],
                "learning_rate": 1e-3,
                "batch_size": 256,
                "num_negatives": 4,
            }

            MCRec_earlystopping_hyperparameters = {
                "validation_every_n": 5,
                "stop_on_validation": True,
                "evaluator_object": evaluator_validation,
                "lower_validations_allowed": 5,
                "validation_metric": metric_to_optimize
            }

            parameterSearch = SearchSingleCase(
                MCRecML100k_RecommenderWrapper,
                evaluator_validation=evaluator_validation,
                evaluator_test=evaluator_test)

            recommender_input_args = SearchInputRecommenderArgs(
                CONSTRUCTOR_POSITIONAL_ARGS=[
                    URM_train, original_dataset_reader
                ],
                FIT_KEYWORD_ARGS=MCRec_earlystopping_hyperparameters)

            recommender_input_args_last_test = recommender_input_args.copy()
            recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[
                0] = URM_train + URM_validation

            parameterSearch.search(
                recommender_input_args,
                recommender_input_args_last_test=
                recommender_input_args_last_test,
                fit_hyperparameters_values=MCRec_article_hyperparameters,
                output_folder_path=result_folder_path,
                resume_from_saved=True,
                output_file_name_root=MCRecML100k_RecommenderWrapper.
                RECOMMENDER_NAME)

    ################################################################################################
    ######
    ######      PRINT RESULTS
    ######

    if flag_print_results:

        n_test_users = np.sum(np.ediff1d(URM_test.indptr) >= 1)
        file_name = "{}..//{}_{}_".format(result_folder_path, ALGORITHM_NAME,
                                          dataset_name)

        ICM_names_to_report_list = list(dataset.ICM_DICT.keys())

        result_loader = ResultFolderLoader(
            result_folder_path,
            base_algorithm_list=None,
            other_algorithm_list=[MCRecML100k_RecommenderWrapper],
            KNN_similarity_list=KNN_similarity_to_report_list,
            ICM_names_list=ICM_names_to_report_list,
            UCM_names_list=None)

        result_loader.generate_latex_results(
            file_name + "{}_latex_results.txt".format("article_metrics"),
            metrics_list=["PRECISION", "RECALL", "NDCG"],
            cutoffs_list=[10],
            table_title=None,
            highlight_best=True)

        result_loader.generate_latex_results(
            file_name + "{}_latex_results.txt".format("all_metrics"),
            metrics_list=[
                "PRECISION", "RECALL", "MAP", "MRR", "NDCG", "F1", "HIT_RATE",
                "ARHR", "NOVELTY", "DIVERSITY_MEAN_INTER_LIST",
                "DIVERSITY_HERFINDAHL", "COVERAGE_ITEM", "DIVERSITY_GINI",
                "SHANNON_ENTROPY"
            ],
            cutoffs_list=[10],
            table_title=None,
            highlight_best=True)

        result_loader.generate_latex_time_statistics(
            file_name + "{}_latex_results.txt".format("time"),
            n_evaluation_users=n_test_users,
            table_title=None)
Beispiel #30
0
    ###############################
    ###############################         EVALUATION ABLATION EXPERIMENT
    ###############################
    ################################################################################################################################################

    if input_flags.run_eval_ablation:

        for permutation_index in range(input_flags.n_permutations):

            # Run evaluation of the full map fitted model with the different interaction map modes
            for map_mode in ["all_map", "main_diagonal", "off_diagonal"]:

                input_folder_path = os.path.join(output_folder_path, "fit_ablation_{}/{}_{}/".format("all_map", "all_map", permutation_index))
                result_folder_path = os.path.join(output_folder_path, "evaluation_ablation_{}/{}_{}/".format(map_mode, map_mode, permutation_index))

                recommender_input_args = SearchInputRecommenderArgs(CONSTRUCTOR_POSITIONAL_ARGS=[URM_train])

                run_evaluation_ablation(recommender_class=ConvNCF_RecommenderWrapper,
                                        recommender_input_args = recommender_input_args,
                                        evaluator_test = evaluator_test,
                                        input_folder_path = input_folder_path,
                                        result_folder_path = result_folder_path,
                                        map_mode = map_mode)


        read_permutation_results(output_folder_path, input_flags.n_permutations, 10,
                                 ["PRECISION", "MAP_MIN_DEN", "NDCG", "F1", "HIT_RATE"],
                                 file_result_name_root = "latex_evaluation_ablation_results",
                                 convolution_model_name = ConvNCF_RecommenderWrapper.RECOMMENDER_NAME,
                                 pretrained_model_name = 'BPRMF',
                                 pretrained_model_class = MatrixFactorizationCustomFactorsRecommender,