def objective(latent_factors, regularization,
              alpha):  # parameters must be the same defined above
    average_map = 0.0
    n_tests = 3  # number of tests (on different data split)
    seed = [1234, 12, 34]  # seed to define the split

    for i in range(n_tests):
        URM_train, URM_test = splitter.split_train_test(urm,
                                                        testing=0.15,
                                                        seed=seed[i])
        URM_test = n_interaction_interval(
            URM_test, 0, 5
        )  # maintain only users with a number of interaction between 0 and 5 (excluded)

        evaluator_test = EvaluatorHoldout(URM_test, [10])

        rec = ALS(URM_train)  # can be used also with other recommenders
        rec.fit(latent_factors=latent_factors,
                regularization=regularization,
                iterations=100,
                alpha=alpha)  # pass the parameter we are tuning

        results_run_dict, results_run_string = evaluator_test.evaluateRecommender(
            rec)

        cumulative_MAP = results_run_dict[10]['MAP']

        average_map += cumulative_MAP

    print(
        f"\nlatent_factors: {latent_factors}, regularization: {regularization}\navg MAP: {average_map/n_tests}\n\n"
    )
    return -average_map / n_tests  # return the avg_map among the different test (to avoid overfitting on a specific data split)
def single_test(urm_train, urm_test, urm_valid, x_tick):
    evaluator_test = EvaluatorHoldout(urm_test, cutoff_list=[10])

    recommender = HybridNorm3Recommender(urm_train)
    recommender.fit(beta=best_alpha)

    result, str_result = evaluator_test.evaluateRecommender(recommender)
    return result[10]['MAP']
def search_hyperparameter_to_recommenders(urm_train_split: csr_matrix,
                                          urm_validation_split: csr_matrix,
                                          urm_test_split: csr_matrix,
                                          urm_impressions: csr_matrix,
                                          recommender: Type[BaseRecommender]):
    URM_train = urm_train_split.copy()
    URM_validation = urm_validation_split.copy()
    URM_test = urm_test_split.copy()
    URM_impressions = urm_impressions.copy()

    if any(not isspmatrix_csr(split) for split in
           [URM_train, URM_validation, URM_test, URM_impressions]):
        raise ValueError("The matrices are not all CSR matrices.")

    assert_implicit_data([URM_train, URM_validation, URM_test])
    assert_disjoint_matrices([URM_train, URM_validation, URM_test])

    if recommender_class.RECOMMENDER_NAME == Random.RECOMMENDER_NAME:
        evaluator_validation = EvaluatorHoldout(URM_validation,
                                                cutoff_list=[10],
                                                parallel=False)

        evaluator_test = EvaluatorHoldout(URM_test,
                                          cutoff_list=[5, 10, 20],
                                          parallel=False)
    else:
        evaluator_validation = EvaluatorHoldout(URM_validation,
                                                cutoff_list=[10],
                                                parallel=True,
                                                num_workers=NUM_WORKERS)

        evaluator_test = EvaluatorHoldout(URM_test,
                                          cutoff_list=[5, 10, 20],
                                          parallel=True,
                                          num_workers=NUM_WORKERS)

    runParameterSearch_Collaborative_partial = partial(
        runParameterSearch_Collaborative,
        URM_train=URM_train,
        URM_train_last_test=URM_train + URM_validation,
        metric_to_optimize=METRIC_TO_OPTIMIZE,
        evaluator_validation_earlystopping=evaluator_validation,
        evaluator_validation=evaluator_validation,
        evaluator_test=evaluator_test,
        output_folder_path=EXPERIMENTS_FOLDER_PATH,
        parallelizeKNN=False,
        allow_weighting=True,
        resume_from_saved=True,
        n_cases=NUM_CASES,
        n_random_starts=NUM_RANDOM_STARTS,
        URM_impressions=URM_impressions)

    try:
        runParameterSearch_Collaborative_partial(recommender)
    except Exception as e:
        logging.exception(f"On recommender {recommender} Exception {e}")
Example #4
0
def ablation_study(arguments):
    study_path = 'ablation_study'
    if not os.path.exists(study_path):
        os.makedirs(study_path, exist_ok=False)

    exp_path = 'experiments'
    datasets = []
    modes = ['user', 'item']
    run_all = False

    if '--run-all' in arguments:
        datasets = all_datasets
        run_all = True

    for arg in arguments:
        if arg in name_datasets and not run_all:
            datasets.append(all_datasets[name_datasets.index(arg)])
        if arg in modes:
            modes = [arg]

    cutoffs = [5, 10, 20, 50]

    marker = itertools.cycle(['o', '^', 's', 'p', '1', 'D', 'P', '*'])

    for m in modes:
        for d in datasets:
            plotting_data = {c: {m: [] for m in metrics} for c in cutoffs}
            best_params = load_best_params(exp_path, d if isinstance(d, str) else d.DATASET_NAME, 'GANMF', m)
            range_coeff = np.arange(0, 1.1, 0.2)
            for coeff in range_coeff:
                best_params['recon_coefficient'] = coeff
                URM_train, URM_test, _, _, _ = load_URMs(d, dataset_kwargs)
                set_seed(seed)
                test_evaluator = EvaluatorHoldout(URM_test, cutoffs, exclude_seen=True)
                model = GANMF(URM_train, mode=m, seed=seed, is_experiment=True)
                model.fit(validation_set=None, sample_every=None, validation_evaluator=None, **best_params)
                result_dict, result_str = test_evaluator.evaluateRecommender(model)
                plotting_data[coeff] = {}
                for c in cutoffs:
                    for met in metrics:
                        plotting_data[c][met].append(result_dict[c][met])

            dname = d if isinstance(d, str) else d.DATASET_NAME
            substudy_path = os.path.join(study_path, dname + '_GANMF_' + m)
            if not os.path.exists(substudy_path):
                os.makedirs(substudy_path, exist_ok=False)

            for c in cutoffs:
                fig, ax = plt.subplots(figsize=(20, 10))
                ax.set_xlabel('Feature Matching Coefficient')
                for met in metrics:
                    ax.plot(range_coeff, plotting_data[c][met], label=met, marker=next(marker))
                ax.legend(loc='best', fontsize='x-large')
                fig.savefig(os.path.join(substudy_path, str(c) + '_feature_matching_effect.png'), bbox_inches='tight')
def read_data_split_and_search():
    """
    This function provides a simple example on how to tune parameters of a given algorithm

    The BayesianSearch object will save:
        - A .txt file with all the cases explored and the recommendation quality
        - A _best_model file which contains the trained model and can be loaded with recommender.load_model()
        - A _best_parameter file which contains a dictionary with all the fit parameters, it can be passed to recommender.fit(**_best_parameter)
        - A _best_result_validation file which contains a dictionary with the results of the best solution on the validation
        - A _best_result_test file which contains a dictionary with the results, on the test set, of the best solution chosen using the validation set
    """

    dataReader = Movielens10MReader()
    dataset = dataReader.load_data()

    URM_train, URM_test = split_train_in_two_percentage_global_sample(
        dataset.get_URM_all(), train_percentage=0.80)
    URM_train, URM_validation = split_train_in_two_percentage_global_sample(
        URM_train, train_percentage=0.80)

    output_folder_path = "result_experiments/"

    # If directory does not exist, create
    if not os.path.exists(output_folder_path):
        os.makedirs(output_folder_path)

    collaborative_algorithm_list = [
        Random, TopPop, P3alphaRecommender, RP3betaRecommender,
        ItemKNNCFRecommender, UserKNNCFRecommender,
        MatrixFactorization_BPR_Cython, MatrixFactorization_FunkSVD_Cython,
        PureSVDRecommender, SLIM_BPR_Cython, SLIMElasticNetRecommender
    ]

    from Base.Evaluation.Evaluator import EvaluatorHoldout

    evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[5])
    evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[5, 10])

    runParameterSearch_Collaborative_partial = partial(
        runParameterSearch_Collaborative,
        URM_train=URM_train,
        metric_to_optimize="MAP",
        n_cases=10,
        evaluator_validation_earlystopping=evaluator_validation,
        evaluator_validation=evaluator_validation,
        evaluator_test=evaluator_test,
        output_folder_path=output_folder_path,
        similarity_type_list=["cosine"],
        parallelizeKNN=False)

    pool = multiprocessing.Pool(processes=int(multiprocessing.cpu_count()),
                                maxtasksperchild=1)
    pool.map(runParameterSearch_Collaborative_partial,
             collaborative_algorithm_list)
Example #6
0
def get_precision(learning_rate, num_epoch, URM_train, URM_test):
    recommender = SLIM_BPR_Cython(URM_train, recompile_cython=False)

    recommender.fit(epochs=num_epoch,
                    batch_size=1,
                    sgd_mode='sgd',
                    learning_rate=learning_rate,
                    positive_threshold_BPR=1)

    evaluator_validation = EvaluatorHoldout(URM_test, cutoff_list=[10])
    results_dict, results_run_string = evaluator_validation.evaluateRecommender(
        recommender)
    return results_dict[10]['PRECISION']
    def single_test(urm_train, urm_test, urm_valid):
        evaluator_valid = EvaluatorHoldout(urm_valid, cutoff_list=[10])
        evaluator_test = EvaluatorHoldout(urm_test, cutoff_list=[10])

        recommender = UserKNNCBFRecommender(urm_train, ucm_all)
        recommender.fit(shrink=1777,
                        topK=1998,
                        similarity='tversky',
                        feature_weighting='BM25',
                        tversky_alpha=0.1604953616,
                        tversky_beta=0.9862348646)

        result, str_result = evaluator_test.evaluateRecommender(recommender)
        # result, str_result = evaluator_valid.evaluateRecommender(recommender)
        # res[num_test] = result[10]['MAP']
        return result[10]['MAP']
def single_test(urm_train, urm_test, urm_valid, x_tick):
    evaluator_valid = EvaluatorHoldout(urm_valid,
                                       cutoff_list=[10],
                                       verbose=False)

    MAP_per_k_valid = []

    recommender = HybridNorm3Recommender(urm_train)

    for alpha in tqdm(x_tick):
        recommender.fit(beta=alpha)

        result_dict, res_str = evaluator_valid.evaluateRecommender(recommender)
        MAP_per_k_valid.append(result_dict[10]["MAP"])

    return MAP_per_k_valid
    def __init__(self,
                 recommender_class,
                 URM_train,
                 k=5,
                 seed=1666,
                 level=None,
                 evaluator_test=None,
                 verbose=True):

        super(SearchAbstractClass, self).__init__()

        self.recommender_class = recommender_class
        self.URM_train = URM_train.copy()
        self.k = k
        self.seed = seed
        self.verbose = verbose
        self.log_file = None
        self.level = level

        self.results_test_best = {}
        self.parameter_dictionary_best = {}

        self.URM_list = []
        self.URM_test_list = [
        ]  #aggiunta solo perchè ho il dubbio che ci sia bisogno di tenere referenziato l'oggetto da qualche parte
        self.evaluator_list = []

        # k-fold
        kf = KFold(n_splits=k, shuffle=True, random_state=self.seed)

        shape = URM_train.shape
        indptr = URM_train.indptr
        indices = URM_train.indices
        data = URM_train.data

        for train_index, test_index in kf.split(data):
            data_train = np.ones(data.shape)
            data_test = np.ones(data.shape)
            data_train[test_index] = 0
            data_test[train_index] = 0
            kf_train = sps.csr_matrix((data_train, indices, indptr),
                                      shape=shape).copy()
            kf_test = sps.csr_matrix((data_test, indices, indptr),
                                     shape=shape).copy()
            kf_train.eliminate_zeros()
            kf_test.eliminate_zeros()
            self.URM_list.append(kf_train)
            self.URM_test_list.append(kf_test)
            self.evaluator_list.append(
                EvaluatorHoldout(kf_test, cutoff_list=[10]))

        if evaluator_test is None:
            self.evaluator_test = None
        else:
            self.evaluator_test = evaluator_test
Example #10
0
def parallel_fit_and_eval_job(recommender, data: DataObject, epochs,
                              num_factors, learning_rate, sgd,
                              negative_interactions_quota, init_mean,
                              init_std_dev, user_reg, item_reg, bias_reg,
                              positive_reg, negative_reg):
    ev = EvaluatorHoldout(data.urm_test, [10],
                          minRatingsPerUser=1,
                          exclude_seen=True,
                          verbose=True)

    # Fit
    recommender.fit(epochs=epochs,
                    batch_size=1000,
                    num_factors=num_factors,
                    positive_threshold_BPR=None,
                    learning_rate=learning_rate,
                    use_bias=True,
                    sgd_mode=sgd,
                    negative_interactions_quota=negative_interactions_quota,
                    init_mean=init_mean,
                    init_std_dev=init_std_dev,
                    user_reg=user_reg,
                    item_reg=item_reg,
                    bias_reg=bias_reg,
                    positive_reg=positive_reg,
                    negative_reg=negative_reg,
                    validation_every_n=3,
                    epochs_min=1,
                    stop_on_validation=True,
                    validation_metric="MAP",
                    lower_validations_allowed=2,
                    evaluator_object=ev,
                    random_seed=None)

    # Eval
    _result = []
    for n, users, description in data.urm_train_users_by_type:
        _eval, _map = MyEvaluator.evaluate_algorithm(data.urm_test,
                                                     users,
                                                     recommender,
                                                     at=10,
                                                     remove_top=0)
        _result.append(_map)
    users = data.ids_target_users
    _eval, _map = MyEvaluator.evaluate_algorithm(data.urm_test,
                                                 users,
                                                 recommender,
                                                 at=10,
                                                 remove_top=0)
    _result.append(_map)
    return _result
Example #11
0
def evaluate(urm, ICM):
    URM_train, URM_val, URM_test = splitter.split(urm, testing=0.1, validation=0.2)
    
    evaluator_validation = EvaluatorHoldout(URM_val, [10])
    evaluator_test = EvaluatorHoldout(URM_test, [10])

    recommender = Hybrid(URM_train, ICM)
    recommender.fit()    

    results_run_dict, results_run_string = evaluator_validation.evaluateRecommender(recommender)
    print(results_run_string)
    results_run_dict, results_run_string = evaluator_test.evaluateRecommender(recommender)
    print(results_run_string)
Example #12
0
def search_param(alpha, beta, topK):
    res = []
    for current in my_input:
        recommender = current[1]
        urm_valid = current[0]
        evaluator_valid = EvaluatorHoldout(urm_valid, cutoff_list=[10])

        #recommender.fit(alpha=alpha, beta=beta, gamma=gamma, phi=phi, psi=psi, li=li, mi=mi)
        recommender.fit(alpha=alpha, beta=beta, topK=int(topK))
        result_valid, str_result = evaluator_valid.evaluateRecommender(recommender)

        res.append(result_valid[10]['MAP'])
    print('Il max valid è il n: {}  con : {}'.format(vec['n_valid'], optimizer.max))
    print('Il max test è il n : {} con test : {}'.format(vec['n_test'], vec['max_test']))
    res = np.array(res)
    print('Il Map corrente è : {}'.format(res.mean()))

    if res.mean() > vec['max_valid']:
      vec['n_valid'] = vec['n']
      vec['max_valid'] = res.mean()
      print('new max valid found')
      res_test = []
      for current in my_input:
        recommender = current[1]
        urm_test = current[2]
        evaluator_test = EvaluatorHoldout(urm_test, cutoff_list=[10])

        #recommender.fit(alpha=alpha, beta=beta, gamma=gamma, phi=phi, psi=psi, li=li, mi=mi)
        recommender.fit(alpha=alpha, beta=beta, topK=int(topK))
        result_test, str_result = evaluator_test.evaluateRecommender(recommender)

        res_test.append(result_test[10]['MAP'])
      res_test = np.array(res_test)
      if res_test.mean() > vec['max_test']:
        print('un nuovo max è stato trovato')
        vec['max_test'] = res_test.mean()
        vec['n_test'] = vec['n']
    vec['n'] += 1
    return res.mean()
def read_data_split_and_search(dataset_variant,
                               train_interactions,
                               flag_baselines_tune=False,
                               flag_DL_article_default=False,
                               flag_DL_tune=False,
                               flag_print_results=False):

    # Using dataReader from CollaborativeVAE_our_interface as they use the same data in the same way
    from Conferences.KDD.CollaborativeVAE_our_interface.Citeulike.CiteulikeReader import CiteulikeReader

    result_folder_path = "result_experiments/{}/{}_citeulike_{}_{}/".format(
        CONFERENCE_NAME, ALGORITHM_NAME, dataset_variant, train_interactions)
    result_folder_path_CollaborativeVAE = "result_experiments/{}/{}_citeulike_{}_{}/".format(
        CONFERENCE_NAME, "CollaborativeVAE", dataset_variant,
        train_interactions)

    dataset = CiteulikeReader(result_folder_path_CollaborativeVAE,
                              dataset_variant=dataset_variant,
                              train_interactions=train_interactions)

    URM_train = dataset.URM_DICT["URM_train"].copy()
    URM_validation = dataset.URM_DICT["URM_validation"].copy()
    URM_test = dataset.URM_DICT["URM_test"].copy()

    # Ensure IMPLICIT data
    assert_implicit_data([URM_train, URM_validation, URM_test])

    # Due to the sparsity of the dataset, choosing an evaluation as subset of the train
    # While keepning validation interaction in the train set
    if train_interactions == 1:
        # In this case the train data will contain validation data to avoid cold users
        assert_disjoint_matrices([URM_train, URM_test])
        assert_disjoint_matrices([URM_validation, URM_test])
        exclude_seen_validation = False
        URM_train_last_test = URM_train
    else:
        assert_disjoint_matrices([URM_train, URM_validation, URM_test])
        exclude_seen_validation = True
        URM_train_last_test = URM_train + URM_validation

    assert_implicit_data([URM_train_last_test])

    # If directory does not exist, create
    if not os.path.exists(result_folder_path):
        os.makedirs(result_folder_path)

    from Base.Evaluation.Evaluator import EvaluatorHoldout

    evaluator_validation = EvaluatorHoldout(
        URM_validation,
        cutoff_list=[150],
        exclude_seen=exclude_seen_validation)
    evaluator_test = EvaluatorHoldout(
        URM_test, cutoff_list=[50, 100, 150, 200, 250, 300])

    ################################################################################################
    ######
    ######      DL ALGORITHM
    ######

    if flag_DL_article_default:

        try:

            collaborativeDL_article_hyperparameters = {
                "para_lv": 10,
                "para_lu": 1,
                "para_ln": 1e3,
                "batch_size": 128,
                "epoch_sdae": 200,
                "epoch_dae": 200,
            }

            parameterSearch = SearchSingleCase(
                CollaborativeDL_Matlab_RecommenderWrapper,
                evaluator_validation=evaluator_validation,
                evaluator_test=evaluator_test)

            recommender_input_args = SearchInputRecommenderArgs(
                CONSTRUCTOR_POSITIONAL_ARGS=[
                    URM_train, dataset.ICM_DICT["ICM_tokens_TFIDF"]
                ],
                FIT_KEYWORD_ARGS={})

            recommender_input_args_last_test = recommender_input_args.copy()
            recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[
                0] = URM_train_last_test

            parameterSearch.search(
                recommender_input_args,
                recommender_input_args_last_test=
                recommender_input_args_last_test,
                fit_hyperparameters_values=
                collaborativeDL_article_hyperparameters,
                output_folder_path=result_folder_path,
                resume_from_saved=True,
                output_file_name_root=CollaborativeDL_Matlab_RecommenderWrapper
                .RECOMMENDER_NAME)

        except Exception as e:

            print("On recommender {} Exception {}".format(
                CollaborativeDL_Matlab_RecommenderWrapper, str(e)))
            traceback.print_exc()

    ################################################################################################
    ######
    ######      PRINT RESULTS
    ######

    if flag_print_results:

        n_test_users = np.sum(np.ediff1d(URM_test.indptr) >= 1)
        ICM_names_to_report_list = list(dataset.ICM_DICT.keys())
        dataset_name = "{}_{}".format(dataset_variant, train_interactions)
        file_name = "{}..//{}_{}_".format(result_folder_path, ALGORITHM_NAME,
                                          dataset_name)

        result_loader = ResultFolderLoader(
            result_folder_path,
            base_algorithm_list=None,
            other_algorithm_list=[CollaborativeDL_Matlab_RecommenderWrapper],
            KNN_similarity_list=KNN_similarity_to_report_list,
            ICM_names_list=ICM_names_to_report_list,
            UCM_names_list=None)

        result_loader.generate_latex_results(
            file_name + "{}_latex_results.txt".format("article_metrics"),
            metrics_list=["RECALL"],
            cutoffs_list=[50, 100, 150, 200, 250, 300],
            table_title=None,
            highlight_best=True)

        result_loader.generate_latex_results(
            file_name + "{}_latex_results.txt".format("all_metrics"),
            metrics_list=[
                "PRECISION", "RECALL", "MAP_MIN_DEN", "MRR", "NDCG", "F1",
                "HIT_RATE", "ARHR_ALL_HITS", "NOVELTY",
                "DIVERSITY_MEAN_INTER_LIST", "DIVERSITY_HERFINDAHL",
                "COVERAGE_ITEM", "DIVERSITY_GINI", "SHANNON_ENTROPY"
            ],
            cutoffs_list=[150],
            table_title=None,
            highlight_best=True)

        result_loader.generate_latex_time_statistics(
            file_name + "{}_latex_results.txt".format("time"),
            n_evaluation_users=n_test_users,
            table_title=None)
Example #14
0
    # idx_topk = idx_topk_part[np.arange(batch_users)[:, np.newaxis], idx_part]

    recommended_items = np.argsort(-pred_val, axis=1).ravel()[:k]

    is_relevant = np.in1d(recommended_items, pos_items_array, assume_unique=True)

    # his_recall = Recall_at_k_batch(pred_val, pos_items_sparse, k=20)[0]
    # my_recall = recall(is_relevant, pos_items_array)

    his_ndcg = NDCG_binary_at_k_batch(pred_val, pos_items_sparse, k=100)[0]
    my_ndcg = ndcg(recommended_items, pos_items_array)

    if not np.allclose(my_ndcg, his_ndcg, atol=0.0001):
        pass

n100_list = np.concatenate(n100_list)
r20_list = np.concatenate(r20_list)
r50_list = np.concatenate(r50_list)

print("Test NDCG@100=%.5f (%.5f)" % (np.mean(n100_list), np.std(n100_list) / np.sqrt(len(n100_list))))
print("Test Recall@20=%.5f (%.5f)" % (np.mean(r20_list), np.std(r20_list) / np.sqrt(len(r20_list))))
print("Test Recall@50=%.5f (%.5f)" % (np.mean(r50_list), np.std(r50_list) / np.sqrt(len(r50_list))))

from Base.Evaluation.Evaluator import EvaluatorHoldout

evaluator = EvaluatorHoldout(test_data_te, cutoff_list=[20, 50, 100])

results_dict, results_run_string = evaluator.evaluateRecommender(recommender)

print(results_run_string)
def run_recommender(recommender_class):



    temp_save_file_folder = "./result_experiments/__temp_model/"

    if not os.path.isdir(temp_save_file_folder):
        os.makedirs(temp_save_file_folder)

    try:
        dataset_object = Movielens1MReader()

        dataSplitter = DataSplitter_leave_k_out(dataset_object, k_out_value=2)

        dataSplitter.load_data()
        URM_train, URM_validation, URM_test = dataSplitter.get_holdout_split()

        write_log_string(log_file, "On Recommender {}\n".format(recommender_class))



        recommender_object = recommender_class(URM_train)

        if isinstance(recommender_object, Incremental_Training_Early_Stopping):
            fit_params = {"epochs": 15}
        else:
            fit_params = {}

        recommender_object.fit(**fit_params)

        write_log_string(log_file, "Fit OK, ")



        evaluator = EvaluatorHoldout(URM_test, [5], exclude_seen=True)
        _, results_run_string = evaluator.evaluateRecommender(recommender_object)

        write_log_string(log_file, "EvaluatorHoldout OK, ")



        evaluator = EvaluatorNegativeItemSample(URM_test, URM_train, [5], exclude_seen=True)
        _, _ = evaluator.evaluateRecommender(recommender_object)

        write_log_string(log_file, "EvaluatorNegativeItemSample OK, ")



        recommender_object.saveModel(temp_save_file_folder, file_name="temp_model")

        write_log_string(log_file, "saveModel OK, ")



        recommender_object = recommender_class(URM_train)
        recommender_object.loadModel(temp_save_file_folder, file_name="temp_model")

        evaluator = EvaluatorHoldout(URM_test, [5], exclude_seen=True)
        _, results_run_string_2 = evaluator.evaluateRecommender(recommender_object)

        write_log_string(log_file, "loadModel OK, ")



        shutil.rmtree(temp_save_file_folder, ignore_errors=True)

        write_log_string(log_file, " PASS\n")
        write_log_string(log_file, results_run_string + "\n\n")



    except Exception as e:

        print("On Recommender {} Exception {}".format(recommender_class, str(e)))
        log_file.write("On Recommender {} Exception {}\n\n\n".format(recommender_class, str(e)))
        log_file.flush()

        traceback.print_exc()
Example #16
0
def read_data_split_and_search():
    """
    This function provides a simple example on how to tune parameters of a given algorithm

    The BayesianSearch object will save:
        - A .txt file with all the cases explored and the recommendation quality
        - A _best_model file which contains the trained model and can be loaded with recommender.load_model()
        - A _best_parameter file which contains a dictionary with all the fit parameters, it can be passed to recommender.fit(**_best_parameter)
        - A _best_result_validation file which contains a dictionary with the results of the best solution on the validation
        - A _best_result_test file which contains a dictionary with the results, on the test set, of the best solution chosen using the validation set
    """

    from Data_manager.Movielens1M.Movielens1MReader import Movielens1MReader
    from Data_manager.DataSplitter_k_fold_stratified import DataSplitter_Warm_k_fold

    dataset_object = Movielens1MReader()

    dataSplitter = DataSplitter_Warm_k_fold(dataset_object)

    dataSplitter.load_data()

    URM_train, URM_validation, URM_test = dataSplitter.get_holdout_split()

    output_folder_path = "result_experiments/SKOPT_prova/"

    # If directory does not exist, create
    if not os.path.exists(output_folder_path):
        os.makedirs(output_folder_path)

    collaborative_algorithm_list = [
        Random,
        TopPop,
        P3alphaRecommender,
        RP3betaRecommender,
        ItemKNNCFRecommender,
        UserKNNCFRecommender,
        # MatrixFactorization_BPR_Cython,
        # MatrixFactorization_FunkSVD_Cython,
        # PureSVDRecommender,
        # SLIM_BPR_Cython,
        # SLIMElasticNetRecommender
    ]

    from Base.Evaluation.Evaluator import EvaluatorHoldout

    evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[5])
    evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[5, 10])

    runParameterSearch_Collaborative_partial = partial(
        runParameterSearch_Collaborative,
        URM_train=URM_train,
        metric_to_optimize="MAP",
        n_cases=8,
        evaluator_validation_earlystopping=evaluator_validation,
        evaluator_validation=evaluator_validation,
        evaluator_test=evaluator_test,
        output_folder_path=output_folder_path)

    from Utils.PoolWithSubprocess import PoolWithSubprocess

    # pool = PoolWithSubprocess(processes=int(multiprocessing.cpu_count()), maxtasksperchild=1)
    # resultList = pool.map(runParameterSearch_Collaborative_partial, collaborative_algorithm_list)
    # pool.close()
    # pool.join()

    for recommender_class in collaborative_algorithm_list:

        try:

            runParameterSearch_Collaborative_partial(recommender_class)

        except Exception as e:

            print("On recommender {} Exception {}".format(
                recommender_class, str(e)))
            traceback.print_exc()
Example #17
0
def read_data_split_and_search():
    """
    This function provides a simple example on how to tune parameters of a given algorithm

    The BayesianSearch object will save:
        - A .txt file with all the cases explored and the recommendation quality
        - A _best_model file which contains the trained model and can be loaded with recommender.load_model()
        - A _best_parameter file which contains a dictionary with all the fit parameters, it can be passed to recommender.fit(**_best_parameter)
        - A _best_result_validation file which contains a dictionary with the results of the best solution on the validation
        - A _best_result_test file which contains a dictionary with the results, on the test set, of the best solution chosen using the validation set
    """

    parser = DataParser()

    URM_all = parser.get_URM_all()
    ICM_obj = parser.get_ICM_all()
    URM_train, URM_test = split_train_in_two_percentage_global_sample(
        URM_all, train_percentage=0.80)
    URM_train, URM_validation = split_train_in_two_percentage_global_sample(
        URM_train, train_percentage=0.85)
    """
    26-10-2020
    > OPTIMIZATION ON THE RANGE [200, +INF)
    
    Already done optimizations:
    >
    
    RECOMMENDER I'AM CONSIDERING (the fastest up to now)
    > PureSVD
    > ItemKNNCBF
    > ItemKNNCF
    > UserKNNCF
    > P3A
    > RP3beta
    """
    f_range = (200, -1)

    URM_validation = parser.filter_URM_test_by_range(URM_train, URM_validation,
                                                     f_range)
    URM_test = parser.filter_URM_test_by_range(URM_train, URM_test, f_range)
    output_folder_path = "result_experiments_v2/" + "range_" + str(
        f_range[0]) + "-" + str(f_range[1]) + "/"

    # If directory does not exist, create
    if not os.path.exists(output_folder_path):
        os.makedirs(output_folder_path)
    """
    collaborative_algorithm_list = [
        #EASE_R_Recommender
        PipeHybrid001,
        #Random,
        #TopPop,
        #P3alphaRecommender,
        #RP3betaRecommender,
        #ItemKNNCFRecommender,
        #UserKNNCFRecommender,
        #MatrixFactorization_BPR_Cython,
        #MatrixFactorization_FunkSVD_Cython,
        #PureSVDRecommender,
        #NMFRecommender,
        #PureSVDItemRecommender
        #SLIM_BPR_Cython,
        #SLIMElasticNetRecommender
        #IALSRecommender
        #MF_MSE_PyTorch
        #MergedHybrid000
    ]

    content_algorithm_list= [
        #ItemKNNCBFRecommender
    ]
    """

    algorithm_in_sequence = [(ItemKNNCFRecommender, 'CF'),
                             (UserKNNCFRecommender, 'CF'),
                             (P3alphaRecommender, 'CF'),
                             (RP3betaRecommender, 'CF'),
                             (PureSVDRecommender, 'CF'),
                             (ItemKNNCBFRecommender, 'CBF')]

    from Base.Evaluation.Evaluator import EvaluatorHoldout

    evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[10])
    evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[10])

    for algo, type in algorithm_in_sequence:
        print(F"OPTIMIZING {algo.RECOMMENDER_NAME} - {type}")
        if type == 'CF':
            collaborative_algorithm_list = []
            collaborative_algorithm_list.append(algo)

            runParameterSearch_Collaborative_partial = partial(
                runParameterSearch_Collaborative,
                URM_train=URM_train,
                ICM_train=ICM_obj,
                metric_to_optimize="MAP",
                n_cases=50,
                n_random_starts=50 * 0.3,
                evaluator_validation_earlystopping=evaluator_validation,
                evaluator_validation=evaluator_validation,
                evaluator_test=evaluator_test,
                output_folder_path=output_folder_path,
                allow_weighting=False,  #LOOOK AT HEREEEEEEEEEEEEEEEEE
                parallelizeKNN=False)
            pool = multiprocessing.Pool(processes=int(
                multiprocessing.cpu_count()),
                                        maxtasksperchild=1)
            pool.map(runParameterSearch_Collaborative_partial,
                     collaborative_algorithm_list)

        elif type == 'CBF':
            content_algorithm_list = []
            content_algorithm_list.append(algo)
            runParameterSearch_Content_partial = partial(
                runParameterSearch_Content,
                URM_train=URM_train,
                ICM_object=ICM_obj,
                ICM_name='BookFeatures',
                n_cases=50,
                n_random_starts=50 * 0.3,
                evaluator_validation=evaluator_validation,
                evaluator_test=evaluator_test,
                metric_to_optimize="MAP",
                parallelizeKNN=False,
                allow_weighting=True,
                #similarity_type_list=['cosine']
            )
            pool = multiprocessing.Pool(processes=int(
                multiprocessing.cpu_count()),
                                        maxtasksperchild=1)
            pool.map(runParameterSearch_Content_partial,
                     content_algorithm_list)
Example #18
0
    def __init__(self, recommender_class, dataset, fit_param_names=[], metric='MAP',
                 method='bayesian', at=5, verbose=True, seed=1234):

        # Seed for reproducibility of results and consistent initialization of weights/splitting of dataset
        set_seed(seed)

        self.recommender_class = recommender_class
        self.dataset = dataset
        self.dataset_name = self.dataset if isinstance(self.dataset, str) else self.dataset.DATASET_NAME
        self.fit_param_names = fit_param_names
        self.metric = metric
        self.method = method
        self.at = at
        self.verbose = verbose
        self.seed = seed
        self.isGAN = False

        # if isinstance(self.dataset, str) and self.dataset in Movielens.urls.keys():
        #     self.reader = Movielens(version=self.dataset, **dataset_kwargs)
        # else:
        #     self.reader = self.dataset(**dataset_kwargs)

        # self.logsdir = os.path.join('experiments', self.recommender_class.RECOMMENDER_NAME + '_' + self.reader.DATASET_NAME)
        self.logsdir = os.path.join('experiments',
                self.recommender_class.RECOMMENDER_NAME + '_' + train_mode + '_' + self.dataset_name)

        if not os.path.exists(self.logsdir):
            os.makedirs(self.logsdir, exist_ok=False)

        # with open(os.path.join(self.logsdir, 'dataset_config.txt'), 'w') as f:
        #     json.dump(self.reader.config, f, indent=4)

        codesdir = os.path.join(self.logsdir, 'code')
        os.makedirs(codesdir, exist_ok=True)
        shutil.copy(os.path.abspath(sys.modules[self.__module__].__file__), codesdir)
        shutil.copy(os.path.abspath(sys.modules[self.recommender_class.__module__].__file__), codesdir)

        # self.URM_train, self.URM_test, self.URM_validation = self.reader.split_urm(split_ratio=[0.6, 0.2, 0.2], save_local=False, verbose=False)
        # self.URM_train = self.reader.get_URM_train()
        # self.URM_test = self.reader.get_URM_test()
        # self.URM_for_train, _, self.URM_validation = self.reader.split_urm(
        #         self.URM_train.tocoo(), split_ratio=[0.75, 0, 0.25], save_local=False, verbose=False)
        # self.URM_train_small, _, self.URM_early_stop = self.reader.split_urm(self.URM_for_train.tocoo(), split_ratio=[0.85, 0, 0.15], save_local=False, verbose=False)

        # del self.URM_for_train

        self.URM_train, self.URM_test, self.URM_validation, self.URM_train_small, self.URM_early_stop = load_URMs(
            dataset, dataset_kwargs)

        self.evaluator_validation = EvaluatorHoldout(self.URM_validation, [self.at], exclude_seen=True)
        self.evaluator_earlystop = EvaluatorHoldout(self.URM_early_stop, [self.at], exclude_seen=True)
        self.evaluatorTest = EvaluatorHoldout(self.URM_test, [self.at, 10, 20, 50], exclude_seen=True, minRatingsPerUser=2)

        self.fit_params = {}

        modules = getattr(self.recommender_class, '__module__', None)
        if modules and modules.split('.')[0] == gans.__name__:
            self.isGAN = True

        # EARLY STOPPING from Maurizio's framework for baselines 对于基准框架的early stopping
        self.early_stopping_parameters = {
            'epochs_min': 0,
            'validation_every_n': 5,
            'stop_on_validation': True,
            'validation_metric': self.metric,
            'lower_validations_allowed': 5,
            'evaluator_object': self.evaluator_earlystop
        }

        # EARYL STOPPING for GAN-based recommenders 对于 基于GAN 的 推荐算法的 early stopping
        self.my_early_stopping = {
            'allow_worse': 5,
            'freq': 5,
            'validation_evaluator': self.evaluator_earlystop,
            'validation_set': None,
            'sample_every': None,
        }
Example #19
0
    pyplot.xlabel('Sorted Item')
    pyplot.show()

    user_activity = np.ediff1d(URM_all.indptr)
    user_activity = np.sort(user_activity)

    pyplot.plot(user_activity, 'ro')
    pyplot.ylabel('Num Interactions ')
    pyplot.xlabel('Sorted User')
    pyplot.show()'''

    #np.random.seed(1234)
    URM_train, URM_test = train_test_holdout(URM_all, train_perc=0.90)
    ICM_train, ICM_test = train_test_holdout(ICM_all, train_perc=0.9)
    evaluator_validation = EvaluatorHoldout(URM_test,
                                            cutoff_list=[10],
                                            exclude_seen=True)

    URM_ICM_train = sps.vstack([URM_train, ICM_all.T])
    URM_ICM_train = URM_ICM_train.tocsr()
    URM_ICM_train2 = sps.hstack([ICM_all, URM_train.T])
    URM_ICM_train2 = URM_ICM_train2.tocsr()

    earlystopping_keywargs = {
        "validation_every_n": 10,
        "stop_on_validation": True,
        "evaluator_object": evaluator_validation,
        "lower_validations_allowed": 5,
        "validation_metric": "MAP",
    }
Example #20
0
    pyplot.xlabel('Sorted Item')
    pyplot.show()

    user_activity = np.ediff1d(URM_all.indptr)
    user_activity = np.sort(user_activity)

    pyplot.plot(user_activity, 'ro')
    pyplot.ylabel('Num Interactions ')
    pyplot.xlabel('Sorted User')
    pyplot.show()

    np.random.seed(1234)
    URM_train, URM_test = train_test_holdout(URM_all, train_perc=0.8)
    ICM_train, ICM_test = train_test_holdout(ICM_all, train_perc=0.8)
    evaluator_validation = EvaluatorHoldout(URM_test,
                                            cutoff_list=[10],
                                            exclude_seen=True)

    earlystopping_keywargs = {
        "validation_every_n": 10,
        "stop_on_validation": True,
        "evaluator_object": evaluator_validation,
        "lower_validations_allowed": 5,
        "validation_metric": "MAP",
    }

    # MAP 0.057, kaggle MAP 0.054
    recommender1 = SLIM_BPR_Cython(URM_train, recompile_cython=False)
    recommender1.load_model('SavedModels', 'SLIM_BPR_Cyrhon')
    #recommender1.fit(**{"topK": 865, "epochs": 1000, "symmetric": False, "sgd_mode": "adagrad", "lambda_i": 0.01,
    #                  "lambda_j": 1e-05, "learning_rate": 0.0001})
Example #21
0
 def single_test(i):
     evaluator_test = EvaluatorHoldout(n_urm_test[i], cutoff_list=[10])
     #n_recommender[i].fit(alpha=alpha, beta=beta, gamma=gamma, phi=phi, psi=psi, li=li)
     n_recommender[i].fit(alpha=alpha, beta=beta, topK=int(topK))
     result, str_result = evaluator_test.evaluateRecommender(n_recommender[i])
     return result[10]['MAP']
Example #22
0
from FeatureWeighting.User_CFW_D_Similarity_Linalg import User_CFW_D_Similarity_Linalg
from Hybrid.HybridNorm3Recommender import HybridNorm3Recommender
from MatrixFactorization.ALSRecommender import ALSRecommender
from MatrixFactorization.BPRRecommender import BPRRecommender
import similaripy as sim

data = DataManager()
urm_train = data.get_urm()

urm_train, urm_test = split_train_leave_k_out_user_wise(data.get_urm(),
                                                        temperature='normal')
urm_train, urm_valid = split_train_leave_k_out_user_wise(urm_train,
                                                         temperature='valid2')

urm_train_warm = data.create_test_warm_users(urm_train, threshold=10)
urm_test_warm = data.create_test_warm_users(urm_test, threshold=10)

evaluator_test_warm = EvaluatorHoldout(urm_test_warm, cutoff_list=[10])

recommender = UserKNNCFRecommender(urm_train)
recommender.fit(shrink=2, topK=600, normalize=True)

recommender_warm = UserKNNCFRecommender(urm_train_warm)
recommender_warm.fit(shrink=2, topK=500, normalize=True)

result, str_result = evaluator_test_warm.evaluateRecommender(recommender)
print('The Map of test of urm normal is : {}'.format(result[10]['MAP']))

result, str_result = evaluator_test_warm.evaluateRecommender(recommender_warm)
print('The Map of test of urm warm is : {}'.format(result[10]['MAP']))
Example #23
0
from KNN.UserKNNCBFRecommender import UserKNNCBFRecommender
from KNN.UserKNNCFRecommender import UserKNNCFRecommender
import numpy as np
import scipy.sparse as sps
from FeatureWeighting.User_CFW_D_Similarity_Linalg import User_CFW_D_Similarity_Linalg
from Hybrid.HybridGen2Recommender import HybridGen2Recommender
from Hybrid.HybridNormRecommender import HybridNormRecommender
from Hybrid.HybridNorm1Recommender import HybridNorm1Recommender
from Hybrid.HybridNorm2Recommender import HybridNorm2Recommender

Data = DataManager()


urm_train, urm_test = split_train_leave_k_out_user_wise(Data.get_urm(), threshold=10, temperature='normal')
urm_train, urm_valid = split_train_leave_k_out_user_wise(urm_train, threshold=10, temperature='valid')
evaluator_valid = EvaluatorHoldout(urm_valid, cutoff_list=[10])
evaluator_test = EvaluatorHoldout(urm_test, cutoff_list=[10])

recommender = HybridNorm1Recommender

# recommender_3 = UserKNNCFRecommender(urm_train)
# recommender_3.fit(shrink=2, topK=600, normalize=True)
# w_sparse = recommender_3.W_sparse

parameterSearch = SearchBayesianSkopt(recommender,
                                 evaluator_validation=evaluator_valid,
                                 evaluator_test=evaluator_test)

# earlystopping_keywargs = {"validation_every_n": 5,
#                               "stop_on_validation": True,
#                               "evaluator_object": evaluator_valid,
Example #24
0
def main(arguments):
    test_results_path = 'test_results'
    if not os.path.exists(test_results_path):
        os.makedirs(test_results_path, exist_ok=False)

    exp_path = 'experiments'
    datasets = []
    run_all = False
    train_mode = ['user', 'item']
    cutoffs = [5, 10, 20, 50]
    recommender = None

    dict_rec_classes = {}
    dict_rec_classes['TopPop'] = TopPop
    dict_rec_classes['Random'] = Random
    dict_rec_classes['PureSVD'] = PureSVDRecommender
    dict_rec_classes['BPR'] = MatrixFactorization_BPR_Cython
    dict_rec_classes['ALS'] = IALSRecommender
    dict_rec_classes['NMF'] = NMFRecommender
    dict_rec_classes['GANMF'] = GANMF
    dict_rec_classes['CFGAN'] = CFGAN
    dict_rec_classes['DisGANMF'] = DisGANMF
    dict_rec_classes['SLIMBPR'] = SLIM_BPR_Cython
    dict_rec_classes['fullGANMF'] = fullGANMF
    dict_rec_classes['DeepGANMF'] = DeepGANMF

    if '--run-all' in arguments:
        datasets = all_datasets
        run_all = True

    for arg in arguments:
        if arg in name_datasets and not run_all:
            datasets.append(all_datasets[name_datasets.index(arg)])
        if arg in ['user', 'item']:
            train_mode = [arg]
        if arg in all_recommenders and recommender is None:
            recommender = arg

    if recommender not in ['GANMF', 'DisGANMF', 'CFGAN', 'fullGANMF', 'DeepGANMF']:
        train_mode = ['']

    for d in datasets:
        dname = d if isinstance(d, str) else d.DATASET_NAME
        for mode in train_mode:
            if recommender == 'fullGANMF':
                best_params = load_best_params(exp_path, dname, 'GANMF', mode)
            else:
                best_params = load_best_params(exp_path, dname, dict_rec_classes[recommender].RECOMMENDER_NAME, mode)
            set_seed(seed)
            URM_train, URM_test, _, _, _ = load_URMs(d, dataset_kwargs)
            test_evaluator = EvaluatorHoldout(URM_test, cutoffs, exclude_seen=True)
            if recommender in ['GANMF', 'DisGANMF', 'CFGAN', 'fullGANMF', 'DeepGANMF']:
                model = dict_rec_classes[recommender](URM_train, mode=mode, seed=seed, is_experiment=True)
                model.fit(validation_set=None, sample_every=None, validation_evaluator=None, **best_params)
            else:
                model = dict_rec_classes[recommender](URM_train)
                model.fit(**best_params)
            results_dict, results_str = test_evaluator.evaluateRecommender(model)

            save_path = os.path.join(test_results_path, model.RECOMMENDER_NAME + '_' + mode + '_' + dname)
            if not os.path.exists(save_path):
                os.makedirs(save_path, exist_ok=False)
                with open(os.path.join(save_path, 'test_results.txt'), 'a') as f:
                    f.write(results_str)
            else:
                results_filename = os.path.join(save_path, 'test_results.txt')
                if not os.path.exists(results_filename):
                    with open(results_filename, 'a') as f:
                        f.write(results_str)
def read_data_split_and_search(dataset_name,
                                   flag_baselines_tune = False,
                                   flag_DL_article_default = False, flag_MF_baselines_tune = False, flag_DL_tune = False,
                                   flag_print_results = False):


    from Conferences.WWW.MultiVAE_our_interface.Movielens20M.Movielens20MReader import Movielens20MReader
    from Conferences.WWW.MultiVAE_our_interface.NetflixPrize.NetflixPrizeReader import NetflixPrizeReader

    split_type = "cold_user"

    result_folder_path = "result_experiments/{}/{}_{}_{}/".format(CONFERENCE_NAME, ALGORITHM_NAME, dataset_name, split_type)


    if dataset_name == "movielens20m":
        dataset = Movielens20MReader(result_folder_path, split_type = split_type)

    elif dataset_name == "netflixPrize":
        dataset = NetflixPrizeReader(result_folder_path)

    # If directory does not exist, create
    if not os.path.exists(result_folder_path):
        os.makedirs(result_folder_path)


    metric_to_optimize = "NDCG"
    n_cases = 50
    n_random_starts = 15


    if split_type == "cold_user":


        collaborative_algorithm_list = [
            Random,
            TopPop,
            # UserKNNCFRecommender,
            ItemKNNCFRecommender,
            P3alphaRecommender,
            RP3betaRecommender,
            # PureSVDRecommender,
            # IALSRecommender,
            # NMFRecommender,
            # MatrixFactorization_BPR_Cython,
            # MatrixFactorization_FunkSVD_Cython,
            EASE_R_Recommender,
            SLIM_BPR_Cython,
            SLIMElasticNetRecommender,
        ]


        URM_train = dataset.URM_DICT["URM_train"].copy()
        URM_train_all = dataset.URM_DICT["URM_train_all"].copy()
        URM_validation = dataset.URM_DICT["URM_validation"].copy()
        URM_test = dataset.URM_DICT["URM_test"].copy()


        # Ensure IMPLICIT data and DISJOINT sets
        assert_implicit_data([URM_train, URM_train_all, URM_validation, URM_test])
        assert_disjoint_matrices([URM_train, URM_validation, URM_test])
        assert_disjoint_matrices([URM_train_all, URM_validation, URM_test])


        from Base.Evaluation.Evaluator import EvaluatorHoldout

        evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[100])
        evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[20, 50, 100])

        evaluator_validation = EvaluatorUserSubsetWrapper(evaluator_validation, URM_train_all)
        evaluator_test = EvaluatorUserSubsetWrapper(evaluator_test, URM_train_all)



    runParameterSearch_Collaborative_partial = partial(runParameterSearch_Collaborative,
                                                       URM_train = URM_train,
                                                       URM_train_last_test = URM_train + URM_validation,
                                                       metric_to_optimize = metric_to_optimize,
                                                       evaluator_validation_earlystopping = evaluator_validation,
                                                       evaluator_validation = evaluator_validation,
                                                       evaluator_test = evaluator_test,
                                                       output_folder_path = result_folder_path,
                                                       parallelizeKNN = False,
                                                       allow_weighting = True,
                                                       resume_from_saved = True,
                                                       n_cases = n_cases,
                                                       n_random_starts = n_random_starts)



    if flag_baselines_tune:

        for recommender_class in collaborative_algorithm_list:
            try:
                runParameterSearch_Collaborative_partial(recommender_class)
            except Exception as e:
                print("On recommender {} Exception {}".format(recommender_class, str(e)))
                traceback.print_exc()



    ################################################################################################
    ###### Matrix Factorization Cold users

    collaborative_MF_algorithm_list = [
        PureSVDRecommender,
        IALSRecommender,
        NMFRecommender,
        MatrixFactorization_BPR_Cython,
        MatrixFactorization_FunkSVD_Cython,
    ]


    runParameterSearch_cold_user_MF_partial = partial(runParameterSearch_cold_user_MF,
                                                       URM_train = URM_train,
                                                       URM_train_last_test = URM_train + URM_validation,
                                                       metric_to_optimize = metric_to_optimize,
                                                       evaluator_validation_earlystopping = evaluator_validation,
                                                       evaluator_validation = evaluator_validation,
                                                       evaluator_test = evaluator_test,
                                                       output_folder_path = result_folder_path,
                                                       resume_from_saved = True,
                                                       n_cases = n_cases,
                                                       n_random_starts = n_random_starts)


    if flag_MF_baselines_tune:

        for recommender_class in collaborative_MF_algorithm_list:

            try:
                runParameterSearch_cold_user_MF_partial(recommender_class)

            except Exception as e:

                print("On recommender {} Exception {}".format(recommender_class, str(e)))
                traceback.print_exc()



    ################################################################################################
    ######
    ######      DL ALGORITHM
    ######

    if flag_DL_article_default:

        try:


            if dataset_name == "movielens20m":
                epochs = 100

            elif dataset_name == "netflixPrize":
                epochs = 200


            multiVAE_article_hyperparameters = {
                "epochs": epochs,
                "batch_size": 500,
                "total_anneal_steps": 200000,
                "p_dims": None,
            }

            multiVAE_earlystopping_hyperparameters = {
                "validation_every_n": 5,
                "stop_on_validation": True,
                "evaluator_object": evaluator_validation,
                "lower_validations_allowed": 5,
                "validation_metric": metric_to_optimize,
            }


            parameterSearch = SearchSingleCase(Mult_VAE_RecommenderWrapper,
                                               evaluator_validation=evaluator_validation,
                                               evaluator_test=evaluator_test)

            recommender_input_args = SearchInputRecommenderArgs(
                                                CONSTRUCTOR_POSITIONAL_ARGS = [URM_train],
                                                FIT_KEYWORD_ARGS = multiVAE_earlystopping_hyperparameters)

            recommender_input_args_last_test = recommender_input_args.copy()
            recommender_input_args_last_test.CONSTRUCTOR_POSITIONAL_ARGS[0] = URM_train + URM_validation

            parameterSearch.search(recommender_input_args,
                                   recommender_input_args_last_test = recommender_input_args_last_test,
                                   fit_hyperparameters_values=multiVAE_article_hyperparameters,
                                   output_folder_path = result_folder_path,
                                   resume_from_saved = True,
                                   output_file_name_root = Mult_VAE_RecommenderWrapper.RECOMMENDER_NAME)



        except Exception as e:

            print("On recommender {} Exception {}".format(Mult_VAE_RecommenderWrapper, str(e)))
            traceback.print_exc()


    ################################################################################################
    ######
    ######      PRINT RESULTS
    ######

    if flag_print_results:

        n_test_users = np.sum(np.ediff1d(URM_test.indptr)>=1)
        file_name = "{}..//{}_{}_".format(result_folder_path, ALGORITHM_NAME, dataset_name)

        result_loader = ResultFolderLoader(result_folder_path,
                                         base_algorithm_list = None,
                                         other_algorithm_list = [Mult_VAE_RecommenderWrapper],
                                         KNN_similarity_list = KNN_similarity_to_report_list,
                                         ICM_names_list = None,
                                         UCM_names_list = None)


        result_loader.generate_latex_results(file_name + "{}_latex_results.txt".format("article_metrics"),
                                           metrics_list = ["RECALL", "NDCG"],
                                           cutoffs_list = [20, 50, 100],
                                           table_title = None,
                                           highlight_best = True)

        result_loader.generate_latex_results(file_name + "{}_latex_results.txt".format("all_metrics"),
                                           metrics_list = ["PRECISION", "RECALL", "MAP_MIN_DEN", "MRR", "NDCG", "F1", "HIT_RATE", "ARHR_ALL_HITS",
                                                           "NOVELTY", "DIVERSITY_MEAN_INTER_LIST", "DIVERSITY_HERFINDAHL", "COVERAGE_ITEM", "DIVERSITY_GINI", "SHANNON_ENTROPY"],
                                           cutoffs_list = [50],
                                           table_title = None,
                                           highlight_best = True)

        result_loader.generate_latex_time_statistics(file_name + "{}_latex_results.txt".format("time"),
                                           n_evaluation_users=n_test_users,
                                           table_title = None)
Example #26
0
# The load_data function will split the data and save it in the desired folder.
# Once the split is saved, further calls to the load_data will load the splitted data ensuring you always use the same split
dataSplitter.load_data(
    save_folder_path="result_experiments/usage_example/data/")

# We can access the three URMs with this function and the ICMs (if present in the data Reader)
URM_train, URM_validation, URM_test = dataSplitter.get_holdout_split()

ICM_dict = dataSplitter.get_loaded_ICM_dict()

# Now that we have the split, we can create the evaluators.
# The constructor of the evaluator allows you to specify the evaluation conditions (data, recommendation list length,
# excluding already seen items). Whenever you want to evaluate a model, use the evaluateRecommender function of the evaluator object
evaluator_validation = EvaluatorHoldout(URM_validation,
                                        cutoff_list=[5],
                                        exclude_seen=False)
evaluator_test = EvaluatorHoldout(URM_test,
                                  cutoff_list=[5, 10, 20],
                                  exclude_seen=False)

# We now fit and evaluate a non personalized algorithm
recommender = TopPop(URM_train)
recommender.fit()

results_dict, results_run_string = evaluator_validation.evaluateRecommender(
    recommender)
print("Result of TopPop is:\n" + results_run_string)

# We now fit and evaluate a personalized algorithm passing some hyperparameters to the fit functions
recommender = P3alphaRecommender(URM_train)
Example #27
0
class RecSysExp:
    def __init__(self, recommender_class, dataset, fit_param_names=[], metric='MAP',
                 method='bayesian', at=5, verbose=True, seed=1234):

        # Seed for reproducibility of results and consistent initialization of weights/splitting of dataset
        set_seed(seed)

        self.recommender_class = recommender_class
        self.dataset = dataset
        self.dataset_name = self.dataset if isinstance(self.dataset, str) else self.dataset.DATASET_NAME
        self.fit_param_names = fit_param_names
        self.metric = metric
        self.method = method
        self.at = at
        self.verbose = verbose
        self.seed = seed
        self.isGAN = False

        # if isinstance(self.dataset, str) and self.dataset in Movielens.urls.keys():
        #     self.reader = Movielens(version=self.dataset, **dataset_kwargs)
        # else:
        #     self.reader = self.dataset(**dataset_kwargs)

        # self.logsdir = os.path.join('experiments', self.recommender_class.RECOMMENDER_NAME + '_' + self.reader.DATASET_NAME)
        self.logsdir = os.path.join('experiments',
                self.recommender_class.RECOMMENDER_NAME + '_' + train_mode + '_' + self.dataset_name)

        if not os.path.exists(self.logsdir):
            os.makedirs(self.logsdir, exist_ok=False)

        # with open(os.path.join(self.logsdir, 'dataset_config.txt'), 'w') as f:
        #     json.dump(self.reader.config, f, indent=4)

        codesdir = os.path.join(self.logsdir, 'code')
        os.makedirs(codesdir, exist_ok=True)
        shutil.copy(os.path.abspath(sys.modules[self.__module__].__file__), codesdir)
        shutil.copy(os.path.abspath(sys.modules[self.recommender_class.__module__].__file__), codesdir)

        # self.URM_train, self.URM_test, self.URM_validation = self.reader.split_urm(split_ratio=[0.6, 0.2, 0.2], save_local=False, verbose=False)
        # self.URM_train = self.reader.get_URM_train()
        # self.URM_test = self.reader.get_URM_test()
        # self.URM_for_train, _, self.URM_validation = self.reader.split_urm(
        #         self.URM_train.tocoo(), split_ratio=[0.75, 0, 0.25], save_local=False, verbose=False)
        # self.URM_train_small, _, self.URM_early_stop = self.reader.split_urm(self.URM_for_train.tocoo(), split_ratio=[0.85, 0, 0.15], save_local=False, verbose=False)

        # del self.URM_for_train

        self.URM_train, self.URM_test, self.URM_validation, self.URM_train_small, self.URM_early_stop = load_URMs(
            dataset, dataset_kwargs)

        self.evaluator_validation = EvaluatorHoldout(self.URM_validation, [self.at], exclude_seen=True)
        self.evaluator_earlystop = EvaluatorHoldout(self.URM_early_stop, [self.at], exclude_seen=True)
        self.evaluatorTest = EvaluatorHoldout(self.URM_test, [self.at, 10, 20, 50], exclude_seen=True, minRatingsPerUser=2)

        self.fit_params = {}

        modules = getattr(self.recommender_class, '__module__', None)
        if modules and modules.split('.')[0] == gans.__name__:
            self.isGAN = True

        # EARLY STOPPING from Maurizio's framework for baselines 对于基准框架的early stopping
        self.early_stopping_parameters = {
            'epochs_min': 0,
            'validation_every_n': 5,
            'stop_on_validation': True,
            'validation_metric': self.metric,
            'lower_validations_allowed': 5,
            'evaluator_object': self.evaluator_earlystop
        }

        # EARYL STOPPING for GAN-based recommenders 对于 基于GAN 的 推荐算法的 early stopping
        self.my_early_stopping = {
            'allow_worse': 5,
            'freq': 5,
            'validation_evaluator': self.evaluator_earlystop,
            'validation_set': None,
            'sample_every': None,
        }

    def build_fit_params(self, params):
        for i, val in enumerate(params):
            param_name = self.dimension_names[i]
            if param_name in self.fit_param_names:
                self.fit_params[param_name] = val
            elif param_name == 'epochs' and self.recommender_class in early_stopping_algos:
                self.fit_params[param_name] = val

    def save_best_params(self, additional_params=None):
        d = dict(self.fit_params)
        if additional_params is not None:
            d.update(additional_params)
        with open(os.path.join(self.logsdir, 'best_params.pkl'), 'wb') as f:
            pickle.dump(d, f, pickle.HIGHEST_PROTOCOL)

    def load_best_params(self):
        with open(os.path.join(self.logsdir, 'best_params.pkl'), 'rb') as f:
            return pickle.load(f)

    def obj_func(self, params):
        """
        Black-box objective function.

        Parameters
        ----------
        params: list
            Ranges of hyperparameters to consider. List of skopt.space.space.Dimension.

        Returns
        -------
        obj_func_value: float
            Value of the objective function as denoted by the experiment metric.
        """

        # print('Optimizing for', self.reader.DATASET_NAME)
        print('Optimizing', self.recommender_class.RECOMMENDER_NAME, 'for', self.dataset_name)

        # Split the parameters into build_params and fit_params
        self.build_fit_params(params)

        # Create the model and fit it.
        try:
            if self.isGAN:
                model = self.recommender_class(self.URM_train_small, mode=train_mode, seed=seed, is_experiment=True)
                model.logsdir = self.logsdir
                fit_early_params = dict(self.fit_params)
                fit_early_params.update(self.my_early_stopping)
                last_epoch = model.fit(**fit_early_params)

                # Save the right number of epochs that produces the current model
                if last_epoch != self.fit_params['epochs']:
                    self.fit_params['epochs'] = last_epoch - \
                                                self.my_early_stopping['allow_worse'] * self.my_early_stopping['freq']

            else:
                model = self.recommender_class(self.URM_train_small)
                if self.recommender_class in early_stopping_algos:
                    fit_early_params = dict(self.fit_params)
                    fit_early_params.update(self.early_stopping_parameters)
                    model.fit(**fit_early_params)
                else:
                    model.fit(**self.fit_params)

            results_dic, results_run_string = self.evaluator_validation.evaluateRecommender(model)
            fitness = -results_dic[self.at][self.metric]
        except tf.errors.ResourceExhaustedError:
            return 0

        try:
            if fitness < self.best_res:
                self.best_res = fitness
                self.save_best_params(additional_params=dict(epochs=model.epochs_best) if self.recommender_class in early_stopping_algos else None)
        except AttributeError:
            self.best_res = fitness
            self.save_best_params(additional_params=model.get_early_stopping_final_epochs_dict() if self.recommender_class in early_stopping_algos else None)

        with open(os.path.join(self.logsdir, 'results.txt'), 'a') as f:
            d = self.fit_params
            if self.recommender_class in early_stopping_algos:
                d.update(model.get_early_stopping_final_epochs_dict()) 
            d_str = json.dumps(d)
            f.write(d_str)
            f.write('\n')
            f.write(results_run_string)
            f.write('\n\n')

        return fitness

    def tune(self, params, evals=10, init_config=None, seed=None):
        """
        Runs the hyperparameter search using Gaussian Process as surrogate model or Random Search,
        saves the results of the trials and print the best found parameters.
        使用 高斯过程 作为 替代模型 进行 超参数 搜索 或 随机搜索
        保存 并 打印 训练 得到的 最佳 参数
        Parameters
        ----------
        params: list
            List of skopt.space.space.Dimensions to be searched.
        参数为 scikit-learn Base class for search space dimensions
        evals: int
            Number of evaluations to perform.

        init_config: list, default None
            An initial parameter configuration for seeding the Gaussian Process

        seed: int, default None
            Seed for random_state of `gp_minimize` or `dummy_minimize`.
            Set to a fixed integer for reproducibility.
        """

        msg = 'Started ' + self.recommender_class.RECOMMENDER_NAME + ' ' + self.dataset_name
        subprocess.run(['telegram-send', msg])

        # URM_test CSR矩阵的shape
        U, I = self.URM_test.shape

        if self.recommender_class == GANMF:
            params.append(Integer(4, int(I * 0.75) if I <= 1024 else 1024, name='emb_dim', dtype=int))
            self.fit_param_names.append('emb_dim')

        if self.recommender_class == CFGAN or self.recommender_class == DeepGANMF:
            params.append(Integer(4, int(I * 0.75) if I <= 1024 else 1024, name='d_nodes', dtype=int))
            params.append(Integer(4, int(I * 0.75) if I <= 1024 else 1024, name='g_nodes', dtype=int))
            self.fit_param_names.append('d_nodes')
            self.fit_param_names.append('g_nodes')

        if self.recommender_class == DisGANMF:
            params.append(Integer(4, int(I * 0.75) if I <= 1024 else 1024, name='d_nodes', dtype=int))
            self.fit_param_names.append('d_nodes')

        self.dimension_names = [p.name for p in params]

        '''
        Need to make sure that the max. value of `num_factors` parameters must be lower than
        the max(U, I)
        '''
        try:
            idx = self.dimension_names.index('num_factors')
            maxval = params[idx].bounds[1]
            if maxval > min(U, I):
                params[idx] = Integer(1, min(U, I), name='num_factors', dtype=int)
        except ValueError:
            pass

        if len(params) > 0:

            # Check if there is already a checkpoint for this experiment 检查点
            checkpoint_path = os.path.join(self.logsdir, 'checkpoint.pkl')
            checkpoint_exists = True if os.path.exists(checkpoint_path) else False
            checkpoint_saver = CheckpointSaver(os.path.join(self.logsdir, 'checkpoint.pkl'), compress=3)

            if seed is None:
                seed = self.seed

            t_start = int(time.time())

            if checkpoint_exists:
                previous_run = skopt.load(checkpoint_path)
                if self.method == 'bayesian':
                    results = gp_minimize(self.obj_func, params, n_calls=evals - len(previous_run.func_vals),
                                          x0=previous_run.x_iters, y0=previous_run.func_vals, n_random_starts=0,
                                          random_state=seed, verbose=True, callback=[checkpoint_saver])
                else:
                    results = dummy_minimize(self.obj_func, params, n_calls=evals - len(previous_run.func_vals),
                                             x0=previous_run.x_iters, y0=previous_run.func_vals, random_state=seed,
                                             verbose=True, callback=[checkpoint_saver])
            else:
                # 超参数优化
                if self.method == 'bayesian':
                    results = gp_minimize(self.obj_func, params, n_calls=evals, random_state=seed, verbose=True,
                                          callback=[checkpoint_saver])
                else:
                    results = dummy_minimize(self.obj_func, params, n_calls=evals, random_state=seed, verbose=True,
                                          callback=[checkpoint_saver])

            t_end = int(time.time())

        # Save best parameters of this experiment
        # best_params = dict(zip(self.dimension_names, results.x))
        # with open(os.path.join(self.logsdir, 'best_params.pkl'), 'wb') as f:
        #     pickle.dump(best_params, f, pickle.HIGHEST_PROTOCOL)

            best_params = self.load_best_params()

            with open(os.path.join(self.logsdir, 'results.txt'), 'a') as f:
                f.write('Experiment ran for {}\n'.format(str(datetime.timedelta(seconds=t_end - t_start))))
                f.write('Best {} score: {}. Best result found at: {}\n'.format(self.metric, results.fun, best_params))

            if self.recommender_class in [IALSRecommender, MatrixFactorization_BPR_Cython]:
                self.dimension_names.append('epochs')
            self.build_fit_params(best_params.values())

        # Retrain with all training data
        set_seed(seed)
        if self.isGAN:
            model = self.recommender_class(self.URM_train, mode=train_mode, is_experiment=True)
            model.logsdir = self.logsdir
            model.fit(**self.fit_params)
            # load_models(model, save_dir='best_model', all_in_folder=True)

        else:
            model = self.recommender_class(self.URM_train)
            model.fit(**self.fit_params)
            # model.loadModel(os.path.join(self.logsdir, 'best_model'))

        _, results_run_string = self.evaluatorTest.evaluateRecommender(model)

        print('\n\nResults on test set:')
        print(results_run_string)
        print('\n\n')

        with open(os.path.join(self.logsdir, 'result_test.txt'), 'w') as f:
            f.write(results_run_string)

        msg = 'Finished ' + self.recommender_class.RECOMMENDER_NAME + ' ' + self.dataset_name
        subprocess.run(['telegram-send', msg])
def read_data_split_and_search_SpectralCF(dataset_name, cold_start=False,
                                          cold_items=None, isKNN_multiprocess=True, isKNN_tune=True,
                                          isSpectralCF_train_default=True, isSpectralCF_tune=True, print_results=True):


    if dataset_name == "movielens1m_original":
        assert(cold_start is not True)
        dataset = Movielens1MReader(type="original")

    elif dataset_name == "movielens1m_ours":
        dataset = Movielens1MReader(type="ours", cold_start=cold_start, cold_items=cold_items)

    elif dataset_name == "hetrec":
        assert (cold_start is not True)
        dataset = MovielensHetrec2011Reader()

    elif dataset_name == "amazon_instant_video":
        assert (cold_start is not True)
        dataset = AmazonInstantVideoReader()


    if not cold_start:
        output_folder_path = "result_experiments/{}/{}_{}/".format(CONFERENCE_NAME, ALGORITHM_NAME, dataset_name)
    else:
        output_folder_path = "result_experiments/{}/{}_cold_{}_{}/".format(CONFERENCE_NAME, ALGORITHM_NAME, cold_items, dataset_name)


    URM_train = dataset.URM_train.copy()
    URM_validation = dataset.URM_validation.copy()
    URM_test = dataset.URM_test.copy()

    # Ensure IMPLICIT data and DISJOINT sets
    assert_implicit_data([URM_train, URM_validation, URM_test])
    assert_disjoint_matrices([URM_train, URM_validation, URM_test])


    # If directory does not exist, create
    if not os.path.exists(output_folder_path):
        os.makedirs(output_folder_path)

    algorithm_dataset_string = "{}_{}_".format(ALGORITHM_NAME, dataset_name)

    plot_popularity_bias([URM_train + URM_validation, URM_test],
                         ["URM train", "URM test"],
                         output_folder_path + algorithm_dataset_string + "popularity_plot")

    save_popularity_statistics([URM_train + URM_validation, URM_test],
                               ["URM train", "URM test"],
                               output_folder_path + algorithm_dataset_string + "popularity_statistics")


    metric_to_optimize = "RECALL"

    from Base.Evaluation.Evaluator import EvaluatorHoldout

    if not cold_start:
        cutoff_list_validation = [50]
        cutoff_list_test = [20, 30, 40, 50, 60, 70, 80, 90, 100]
    else:
        cutoff_list_validation = [20]
        cutoff_list_test = [20]

    evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=cutoff_list_validation)
    evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=cutoff_list_test)

    ################################################################################################
    ###### KNN CF

    if isKNN_tune:
        collaborative_algorithm_list = [
            Random,
            TopPop,
            UserKNNCFRecommender,
            ItemKNNCFRecommender,
            P3alphaRecommender,
            RP3betaRecommender,
        ]

        runParameterSearch_Collaborative_partial = partial(runParameterSearch_Collaborative,
                                                           URM_train = URM_train,
                                                           metric_to_optimize = metric_to_optimize,
                                                           evaluator_validation_earlystopping = evaluator_validation,
                                                           evaluator_validation = evaluator_validation,
                                                           evaluator_test = evaluator_test,
                                                           output_folder_path = output_folder_path,
                                                           parallelizeKNN = False,
                                                           allow_weighting = True,
                                                           n_cases = 35)
        if isKNN_multiprocess:
            pool = multiprocessing.Pool(processes=int(multiprocessing.cpu_count()), maxtasksperchild=1)
            resultList = pool.map(runParameterSearch_Collaborative_partial, collaborative_algorithm_list)

            pool.close()
            pool.join()

        else:
            for recommender_class in collaborative_algorithm_list:
                try:
                    runParameterSearch_Collaborative_partial(recommender_class)
                except Exception as e:
                    print("On recommender {} Exception {}".format(recommender_class, str(e)))
                    traceback.print_exc()



    ################################################################################################
    ###### SpectralCF

    if isSpectralCF_train_default:
        try:

            spectralCF_article_parameters = {
                "epochs": 1000,
                "batch_size": 1024,
                "embedding_size": 16,
                "decay": 0.001,
                "k": 3,
                "learning_rate": 1e-3,
            }

            spectralCF_earlystopping_parameters = {
                "validation_every_n": 5,
                "stop_on_validation": True,
                "lower_validations_allowed": 20,
                "evaluator_object": evaluator_validation,
                "validation_metric": metric_to_optimize,
                "epochs_min": 400,
            }

            parameterSearch = SearchSingleCase(SpectralCF_RecommenderWrapper,
                                               evaluator_validation=evaluator_validation,
                                               evaluator_test=evaluator_test)

            recommender_parameters = SearchInputRecommenderParameters(
                                                CONSTRUCTOR_POSITIONAL_ARGS = [URM_train],
                                                FIT_KEYWORD_ARGS = spectralCF_earlystopping_parameters)

            parameterSearch.search(recommender_parameters,
                                   fit_parameters_values = spectralCF_article_parameters,
                                   output_folder_path = output_folder_path,
                                   output_file_name_root = SpectralCF_RecommenderWrapper.RECOMMENDER_NAME + "_article_default")

        except Exception as e:

            print("On recommender {} Exception {}".format(SpectralCF_RecommenderWrapper, str(e)))
            traceback.print_exc()


    elif isSpectralCF_tune:

        try:

            spectralCF_earlystopping_parameters = {
                "validation_every_n": 5,
                "stop_on_validation": True,
                "lower_validations_allowed": 20,
                "evaluator_object": evaluator_validation,
                "validation_metric": metric_to_optimize,
                "epochs_min": 400,
                "epochs": 2000
            }

            runParameterSearch_SpectralCF(SpectralCF_RecommenderWrapper,
                                             URM_train = URM_train,
                                             earlystopping_parameters = spectralCF_earlystopping_parameters,
                                             metric_to_optimize = metric_to_optimize,
                                             evaluator_validation = evaluator_validation,
                                             evaluator_test = evaluator_test,
                                             output_folder_path = output_folder_path,
                                             n_cases = 35,
                                             output_file_name_root = SpectralCF_RecommenderWrapper.RECOMMENDER_NAME)



        except Exception as e:

            print("On recommender {} Exception {}".format(SpectralCF_RecommenderWrapper, str(e)))
            traceback.print_exc()

    ################################################################################################
    ###### print results

    if print_results:

        n_validation_users = np.sum(np.ediff1d(URM_validation.indptr)>=1)
        n_test_users = np.sum(np.ediff1d(URM_test.indptr)>=1)

        if not cold_start:
            results_file_root_name = ALGORITHM_NAME
        else:
            results_file_root_name = "{}_cold_{}".format(ALGORITHM_NAME, cold_items)



        print_time_statistics_latex_table(result_folder_path = output_folder_path,
                                          dataset_name = dataset_name,
                                          results_file_prefix_name = results_file_root_name,
                                          other_algorithm_list = [SpectralCF_RecommenderWrapper],
                                          n_validation_users = n_validation_users,
                                          n_test_users = n_test_users,
                                          n_decimals = 2)

        if cold_start:
            cutoffs_to_report_list = [20]
        else:
            cutoffs_to_report_list = [20, 40, 60, 80, 100]

        print_results_latex_table(result_folder_path = output_folder_path,
                                  results_file_prefix_name = results_file_root_name,
                                  dataset_name = dataset_name,
                                  metrics_to_report_list = ["RECALL", "MAP"],
                                  cutoffs_to_report_list = cutoffs_to_report_list,
                                  other_algorithm_list = [SpectralCF_RecommenderWrapper])
Example #29
0
URM_train, URM_test = train_test_holdout(URM_all, train_perc=0.8)
itemCF_recommender = ItemKNNCFRecommender(URM_train)
itemCF_recommender.fit(**itemCFParam)
slim_recommender = SLIM_BPR_Cython(URM_train, recompile_cython=False)
slim_recommender.fit(**slimParam)
p3_recommender = P3alphaRecommender(URM_train)
p3_recommender.fit(**p3Param)

recommender1 = SimilarityHybridRecommender(URM_train,
                                           itemCF_recommender.W_sparse,
                                           slim_recommender.W_sparse,
                                           p3_recommender.W_sparse)
recommender1.fit(topK=100, alpha1=alpha1, alpha2=alpha2, alpha3=alpha3)

evaluator_validation = EvaluatorHoldout(URM_test, cutoff_list=[10])
eval_res = evaluator_validation.evaluateRecommender(recommender1)
MAP = eval_res[0][10]['MAP']
print("The MAP in one test is: ", MAP)

itemCF_recommender = ItemKNNCFRecommender(URM_all)
itemCF_recommender.fit(**itemCFParam)
slim_recommender = SLIM_BPR_Cython(URM_all, recompile_cython=False)
slim_recommender.fit(**slimParam)
p3_recommender = P3alphaRecommender(URM_all)
p3_recommender.fit(**p3Param)
recommender1 = SimilarityHybridRecommender(URM_all,
                                           itemCF_recommender.W_sparse,
                                           slim_recommender.W_sparse,
                                           p3_recommender.W_sparse)
recommender1.fit(topK=100, alpha1=alpha1, alpha2=alpha2, alpha3=alpha3)
        ItemKNNCFRecommender,
        P3alphaRecommender,
        RP3betaRecommender,
        SLIM_BPR_Cython,
        SLIMElasticNetRecommender,
        MatrixFactorization_BPR_Cython,
        MatrixFactorization_FunkSVD_Cython,
        MatrixFactorization_AsySVD_Cython,
        PureSVDRecommender,
        IALSRecommender,
    ]


    from Base.Evaluation.Evaluator import EvaluatorHoldout

    evaluator = EvaluatorHoldout(URM_test, [5, 20], exclude_seen=True)


    output_root_path = "./result_experiments/"

    # If directory does not exist, create
    if not os.path.exists(output_root_path):
        os.makedirs(output_root_path)


    logFile = open(output_root_path + "result_all_algorithms.txt", "a")


    for recommender_class in recommender_list:

        try: