Beispiel #1
0
def evaluate(urm, ICM):
    URM_train, URM_val, URM_test = splitter.split(urm, testing=0.1, validation=0.2)
    
    evaluator_validation = EvaluatorHoldout(URM_val, [10])
    evaluator_test = EvaluatorHoldout(URM_test, [10])

    recommender = Hybrid(URM_train, ICM)
    recommender.fit()    

    results_run_dict, results_run_string = evaluator_validation.evaluateRecommender(recommender)
    print(results_run_string)
    results_run_dict, results_run_string = evaluator_test.evaluateRecommender(recommender)
    print(results_run_string)
def objective(latent_factors, regularization,
              alpha):  # parameters must be the same defined above
    average_map = 0.0
    n_tests = 3  # number of tests (on different data split)
    seed = [1234, 12, 34]  # seed to define the split

    for i in range(n_tests):
        URM_train, URM_test = splitter.split_train_test(urm,
                                                        testing=0.15,
                                                        seed=seed[i])
        URM_test = n_interaction_interval(
            URM_test, 0, 5
        )  # maintain only users with a number of interaction between 0 and 5 (excluded)

        evaluator_test = EvaluatorHoldout(URM_test, [10])

        rec = ALS(URM_train)  # can be used also with other recommenders
        rec.fit(latent_factors=latent_factors,
                regularization=regularization,
                iterations=100,
                alpha=alpha)  # pass the parameter we are tuning

        results_run_dict, results_run_string = evaluator_test.evaluateRecommender(
            rec)

        cumulative_MAP = results_run_dict[10]['MAP']

        average_map += cumulative_MAP

    print(
        f"\nlatent_factors: {latent_factors}, regularization: {regularization}\navg MAP: {average_map/n_tests}\n\n"
    )
    return -average_map / n_tests  # return the avg_map among the different test (to avoid overfitting on a specific data split)
def single_test(urm_train, urm_test, urm_valid, x_tick):
    evaluator_test = EvaluatorHoldout(urm_test, cutoff_list=[10])

    recommender = HybridNorm3Recommender(urm_train)
    recommender.fit(beta=best_alpha)

    result, str_result = evaluator_test.evaluateRecommender(recommender)
    return result[10]['MAP']
Beispiel #4
0
def ablation_study(arguments):
    study_path = 'ablation_study'
    if not os.path.exists(study_path):
        os.makedirs(study_path, exist_ok=False)

    exp_path = 'experiments'
    datasets = []
    modes = ['user', 'item']
    run_all = False

    if '--run-all' in arguments:
        datasets = all_datasets
        run_all = True

    for arg in arguments:
        if arg in name_datasets and not run_all:
            datasets.append(all_datasets[name_datasets.index(arg)])
        if arg in modes:
            modes = [arg]

    cutoffs = [5, 10, 20, 50]

    marker = itertools.cycle(['o', '^', 's', 'p', '1', 'D', 'P', '*'])

    for m in modes:
        for d in datasets:
            plotting_data = {c: {m: [] for m in metrics} for c in cutoffs}
            best_params = load_best_params(exp_path, d if isinstance(d, str) else d.DATASET_NAME, 'GANMF', m)
            range_coeff = np.arange(0, 1.1, 0.2)
            for coeff in range_coeff:
                best_params['recon_coefficient'] = coeff
                URM_train, URM_test, _, _, _ = load_URMs(d, dataset_kwargs)
                set_seed(seed)
                test_evaluator = EvaluatorHoldout(URM_test, cutoffs, exclude_seen=True)
                model = GANMF(URM_train, mode=m, seed=seed, is_experiment=True)
                model.fit(validation_set=None, sample_every=None, validation_evaluator=None, **best_params)
                result_dict, result_str = test_evaluator.evaluateRecommender(model)
                plotting_data[coeff] = {}
                for c in cutoffs:
                    for met in metrics:
                        plotting_data[c][met].append(result_dict[c][met])

            dname = d if isinstance(d, str) else d.DATASET_NAME
            substudy_path = os.path.join(study_path, dname + '_GANMF_' + m)
            if not os.path.exists(substudy_path):
                os.makedirs(substudy_path, exist_ok=False)

            for c in cutoffs:
                fig, ax = plt.subplots(figsize=(20, 10))
                ax.set_xlabel('Feature Matching Coefficient')
                for met in metrics:
                    ax.plot(range_coeff, plotting_data[c][met], label=met, marker=next(marker))
                ax.legend(loc='best', fontsize='x-large')
                fig.savefig(os.path.join(substudy_path, str(c) + '_feature_matching_effect.png'), bbox_inches='tight')
Beispiel #5
0
def get_precision(learning_rate, num_epoch, URM_train, URM_test):
    recommender = SLIM_BPR_Cython(URM_train, recompile_cython=False)

    recommender.fit(epochs=num_epoch,
                    batch_size=1,
                    sgd_mode='sgd',
                    learning_rate=learning_rate,
                    positive_threshold_BPR=1)

    evaluator_validation = EvaluatorHoldout(URM_test, cutoff_list=[10])
    results_dict, results_run_string = evaluator_validation.evaluateRecommender(
        recommender)
    return results_dict[10]['PRECISION']
Beispiel #6
0
def search_param(alpha, beta, topK):
    res = []
    for current in my_input:
        recommender = current[1]
        urm_valid = current[0]
        evaluator_valid = EvaluatorHoldout(urm_valid, cutoff_list=[10])

        #recommender.fit(alpha=alpha, beta=beta, gamma=gamma, phi=phi, psi=psi, li=li, mi=mi)
        recommender.fit(alpha=alpha, beta=beta, topK=int(topK))
        result_valid, str_result = evaluator_valid.evaluateRecommender(recommender)

        res.append(result_valid[10]['MAP'])
    print('Il max valid è il n: {}  con : {}'.format(vec['n_valid'], optimizer.max))
    print('Il max test è il n : {} con test : {}'.format(vec['n_test'], vec['max_test']))
    res = np.array(res)
    print('Il Map corrente è : {}'.format(res.mean()))

    if res.mean() > vec['max_valid']:
      vec['n_valid'] = vec['n']
      vec['max_valid'] = res.mean()
      print('new max valid found')
      res_test = []
      for current in my_input:
        recommender = current[1]
        urm_test = current[2]
        evaluator_test = EvaluatorHoldout(urm_test, cutoff_list=[10])

        #recommender.fit(alpha=alpha, beta=beta, gamma=gamma, phi=phi, psi=psi, li=li, mi=mi)
        recommender.fit(alpha=alpha, beta=beta, topK=int(topK))
        result_test, str_result = evaluator_test.evaluateRecommender(recommender)

        res_test.append(result_test[10]['MAP'])
      res_test = np.array(res_test)
      if res_test.mean() > vec['max_test']:
        print('un nuovo max è stato trovato')
        vec['max_test'] = res_test.mean()
        vec['n_test'] = vec['n']
    vec['n'] += 1
    return res.mean()
def single_test(urm_train, urm_test, urm_valid, x_tick):
    evaluator_valid = EvaluatorHoldout(urm_valid,
                                       cutoff_list=[10],
                                       verbose=False)

    MAP_per_k_valid = []

    recommender = HybridNorm3Recommender(urm_train)

    for alpha in tqdm(x_tick):
        recommender.fit(beta=alpha)

        result_dict, res_str = evaluator_valid.evaluateRecommender(recommender)
        MAP_per_k_valid.append(result_dict[10]["MAP"])

    return MAP_per_k_valid
    def single_test(urm_train, urm_test, urm_valid):
        evaluator_valid = EvaluatorHoldout(urm_valid, cutoff_list=[10])
        evaluator_test = EvaluatorHoldout(urm_test, cutoff_list=[10])

        recommender = UserKNNCBFRecommender(urm_train, ucm_all)
        recommender.fit(shrink=1777,
                        topK=1998,
                        similarity='tversky',
                        feature_weighting='BM25',
                        tversky_alpha=0.1604953616,
                        tversky_beta=0.9862348646)

        result, str_result = evaluator_test.evaluateRecommender(recommender)
        # result, str_result = evaluator_valid.evaluateRecommender(recommender)
        # res[num_test] = result[10]['MAP']
        return result[10]['MAP']
Beispiel #9
0
# Now that we have the split, we can create the evaluators.
# The constructor of the evaluator allows you to specify the evaluation conditions (data, recommendation list length,
# excluding already seen items). Whenever you want to evaluate a model, use the evaluateRecommender function of the evaluator object
evaluator_validation = EvaluatorHoldout(URM_validation,
                                        cutoff_list=[5],
                                        exclude_seen=False)
evaluator_test = EvaluatorHoldout(URM_test,
                                  cutoff_list=[5, 10, 20],
                                  exclude_seen=False)

# We now fit and evaluate a non personalized algorithm
recommender = TopPop(URM_train)
recommender.fit()

results_dict, results_run_string = evaluator_validation.evaluateRecommender(
    recommender)
print("Result of TopPop is:\n" + results_run_string)

# We now fit and evaluate a personalized algorithm passing some hyperparameters to the fit functions
recommender = P3alphaRecommender(URM_train)
recommender.fit(topK=100, alpha=0.5)

results_dict, results_run_string = evaluator_validation.evaluateRecommender(
    recommender)
print("Result of P3alpha is:\n" + results_run_string)

# We now use a content-based algorithm and a hybrid content-collaborative algorithm
ICM_genres = ICM_dict["ICM_genres"]
recommender = ItemKNNCBFRecommender(URM_train, ICM_genres)
recommender.fit(topK=100, similarity="cosine")
Beispiel #10
0
def main(arguments):
    test_results_path = 'test_results'
    if not os.path.exists(test_results_path):
        os.makedirs(test_results_path, exist_ok=False)

    exp_path = 'experiments'
    datasets = []
    run_all = False
    train_mode = ['user', 'item']
    cutoffs = [5, 10, 20, 50]
    recommender = None

    dict_rec_classes = {}
    dict_rec_classes['TopPop'] = TopPop
    dict_rec_classes['Random'] = Random
    dict_rec_classes['PureSVD'] = PureSVDRecommender
    dict_rec_classes['BPR'] = MatrixFactorization_BPR_Cython
    dict_rec_classes['ALS'] = IALSRecommender
    dict_rec_classes['NMF'] = NMFRecommender
    dict_rec_classes['GANMF'] = GANMF
    dict_rec_classes['CFGAN'] = CFGAN
    dict_rec_classes['DisGANMF'] = DisGANMF
    dict_rec_classes['SLIMBPR'] = SLIM_BPR_Cython
    dict_rec_classes['fullGANMF'] = fullGANMF
    dict_rec_classes['DeepGANMF'] = DeepGANMF

    if '--run-all' in arguments:
        datasets = all_datasets
        run_all = True

    for arg in arguments:
        if arg in name_datasets and not run_all:
            datasets.append(all_datasets[name_datasets.index(arg)])
        if arg in ['user', 'item']:
            train_mode = [arg]
        if arg in all_recommenders and recommender is None:
            recommender = arg

    if recommender not in ['GANMF', 'DisGANMF', 'CFGAN', 'fullGANMF', 'DeepGANMF']:
        train_mode = ['']

    for d in datasets:
        dname = d if isinstance(d, str) else d.DATASET_NAME
        for mode in train_mode:
            if recommender == 'fullGANMF':
                best_params = load_best_params(exp_path, dname, 'GANMF', mode)
            else:
                best_params = load_best_params(exp_path, dname, dict_rec_classes[recommender].RECOMMENDER_NAME, mode)
            set_seed(seed)
            URM_train, URM_test, _, _, _ = load_URMs(d, dataset_kwargs)
            test_evaluator = EvaluatorHoldout(URM_test, cutoffs, exclude_seen=True)
            if recommender in ['GANMF', 'DisGANMF', 'CFGAN', 'fullGANMF', 'DeepGANMF']:
                model = dict_rec_classes[recommender](URM_train, mode=mode, seed=seed, is_experiment=True)
                model.fit(validation_set=None, sample_every=None, validation_evaluator=None, **best_params)
            else:
                model = dict_rec_classes[recommender](URM_train)
                model.fit(**best_params)
            results_dict, results_str = test_evaluator.evaluateRecommender(model)

            save_path = os.path.join(test_results_path, model.RECOMMENDER_NAME + '_' + mode + '_' + dname)
            if not os.path.exists(save_path):
                os.makedirs(save_path, exist_ok=False)
                with open(os.path.join(save_path, 'test_results.txt'), 'a') as f:
                    f.write(results_str)
            else:
                results_filename = os.path.join(save_path, 'test_results.txt')
                if not os.path.exists(results_filename):
                    with open(results_filename, 'a') as f:
                        f.write(results_str)
Beispiel #11
0
from FeatureWeighting.User_CFW_D_Similarity_Linalg import User_CFW_D_Similarity_Linalg
from Hybrid.HybridNorm3Recommender import HybridNorm3Recommender
from MatrixFactorization.ALSRecommender import ALSRecommender
from MatrixFactorization.BPRRecommender import BPRRecommender
import similaripy as sim

data = DataManager()
urm_train = data.get_urm()

urm_train, urm_test = split_train_leave_k_out_user_wise(data.get_urm(),
                                                        temperature='normal')
urm_train, urm_valid = split_train_leave_k_out_user_wise(urm_train,
                                                         temperature='valid2')

urm_train_warm = data.create_test_warm_users(urm_train, threshold=10)
urm_test_warm = data.create_test_warm_users(urm_test, threshold=10)

evaluator_test_warm = EvaluatorHoldout(urm_test_warm, cutoff_list=[10])

recommender = UserKNNCFRecommender(urm_train)
recommender.fit(shrink=2, topK=600, normalize=True)

recommender_warm = UserKNNCFRecommender(urm_train_warm)
recommender_warm.fit(shrink=2, topK=500, normalize=True)

result, str_result = evaluator_test_warm.evaluateRecommender(recommender)
print('The Map of test of urm normal is : {}'.format(result[10]['MAP']))

result, str_result = evaluator_test_warm.evaluateRecommender(recommender_warm)
print('The Map of test of urm warm is : {}'.format(result[10]['MAP']))
        rec.save_model(
            f'stored_recommenders/seed_{str(seed)}_hybrid_search/{rec.RECOMMENDER_NAME}/',
            f'{str(seed)}_fold-{str(i)}')

        # r, _ =evaluator_list[i].evaluateRecommender(rec)
        # result.append(r[10]['MAP'])

    # print(result)
    # print(np.average(result))

    exit(0)
    print("Making a submission... ")
    parser = DataParser()
    URM_all = parser.get_URM_all()
    ICM_all = parser.get_ICM_all()

    dict_1 = {'ICM_train': ICM_all}
    rec = ItemKNNCBFRecommender(URM_all, *dict_1)
    # rec_sub = UserWiseHybrid009(URM_all, ICM_all, submission=True)
    # rec_sub.fit()
    # create_csv(parser, rec_sub, 'UserWiseHybrid009')

    URM_train, URM_test = split_train_in_two_percentage_global_sample(
        URM_all, train_percentage=0.90, seed=1205)
    evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[10])

    recommender = UserWiseHybrid009(URM_train, ICM_all, submission=True)
    recommender.fit()
    result, _ = evaluator_test.evaluateRecommender(recommender)
    print(result[10])
Beispiel #13
0
gan = GANMF(URM_train, mode='user')

gan.fit(num_factors=10,
        emb_dim=128,
        d_reg=1e-4,
        g_reg=0,
        epochs=300,
        batch_size=128,
        g_lr=1e-3,
        d_lr=1e-3,
        d_steps=1,
        g_steps=1,
        recon_coefficient=0.05,
        m=3,
        allow_worse=5,
        freq=5,
        validation_evaluator=evaluatorValidation,
        sample_every=10,
        validation_set=URM_validation)

if not only_build:
    results_dic, results_run_string = evaluator.evaluateRecommender(gan)
    print(results_run_string)

    map_folder = os.path.join('plots', gan.RECOMMENDER_NAME,
                              'MAP_' + str(results_dic[5]['MAP'])[:7])
    if os.path.exists(map_folder):
        shutil.rmtree(map_folder)
    shutil.move(src=gan.logsdir, dst=map_folder)
Beispiel #14
0
    userKNNCF = UserKNNCFRecommender.UserKNNCFRecommender(URM_train)
    userKNNCF.fit(**{
        "topK": 131,
        "shrink": 2,
        "similarity": "cosine",
        "normalize": True
    })

    itemKNNCBF = ItemKNNCBFRecommender.ItemKNNCBFRecommender(
        URM_train, ICM_train)
    itemKNNCBF.fit(topK=700,
                   shrink=200,
                   similarity='jaccard',
                   normalize=True,
                   feature_weighting="TF-IDF")

    hyb = ItemKNNScoresHybridRecommender.ItemKNNScoresHybridRecommender(
        URM_train, itemKNNCBF, userKNNCF)
    hyb.fit(alpha=0.5)

    # Kaggle MAP 0.081
    hyb2 = ItemKNNScoresHybridRecommender.ItemKNNScoresHybridRecommender(
        URM_train, hyb, itemKNNCF)
    hyb2.fit(alpha=0.5)

    print(evaluator_validation.evaluateRecommender(userKNNCF))
    print(evaluator_validation.evaluateRecommender(hyb))
    print(evaluator_validation.evaluateRecommender(hyb2))

    item_list = hyb.recommend(target_ids, cutoff=10)
    CreateCSV.create_csv(target_ids, item_list, 'Hyb_User_Item_KNNCF')
def run_recommender(recommender_class):



    temp_save_file_folder = "./result_experiments/__temp_model/"

    if not os.path.isdir(temp_save_file_folder):
        os.makedirs(temp_save_file_folder)

    try:
        dataset_object = Movielens1MReader()

        dataSplitter = DataSplitter_leave_k_out(dataset_object, k_out_value=2)

        dataSplitter.load_data()
        URM_train, URM_validation, URM_test = dataSplitter.get_holdout_split()

        write_log_string(log_file, "On Recommender {}\n".format(recommender_class))



        recommender_object = recommender_class(URM_train)

        if isinstance(recommender_object, Incremental_Training_Early_Stopping):
            fit_params = {"epochs": 15}
        else:
            fit_params = {}

        recommender_object.fit(**fit_params)

        write_log_string(log_file, "Fit OK, ")



        evaluator = EvaluatorHoldout(URM_test, [5], exclude_seen=True)
        _, results_run_string = evaluator.evaluateRecommender(recommender_object)

        write_log_string(log_file, "EvaluatorHoldout OK, ")



        evaluator = EvaluatorNegativeItemSample(URM_test, URM_train, [5], exclude_seen=True)
        _, _ = evaluator.evaluateRecommender(recommender_object)

        write_log_string(log_file, "EvaluatorNegativeItemSample OK, ")



        recommender_object.saveModel(temp_save_file_folder, file_name="temp_model")

        write_log_string(log_file, "saveModel OK, ")



        recommender_object = recommender_class(URM_train)
        recommender_object.loadModel(temp_save_file_folder, file_name="temp_model")

        evaluator = EvaluatorHoldout(URM_test, [5], exclude_seen=True)
        _, results_run_string_2 = evaluator.evaluateRecommender(recommender_object)

        write_log_string(log_file, "loadModel OK, ")



        shutil.rmtree(temp_save_file_folder, ignore_errors=True)

        write_log_string(log_file, " PASS\n")
        write_log_string(log_file, results_run_string + "\n\n")



    except Exception as e:

        print("On Recommender {} Exception {}".format(recommender_class, str(e)))
        log_file.write("On Recommender {} Exception {}\n\n\n".format(recommender_class, str(e)))
        log_file.flush()

        traceback.print_exc()
Beispiel #16
0
class RecSysExp:
    def __init__(self, recommender_class, dataset, fit_param_names=[], metric='MAP',
                 method='bayesian', at=5, verbose=True, seed=1234):

        # Seed for reproducibility of results and consistent initialization of weights/splitting of dataset
        set_seed(seed)

        self.recommender_class = recommender_class
        self.dataset = dataset
        self.dataset_name = self.dataset if isinstance(self.dataset, str) else self.dataset.DATASET_NAME
        self.fit_param_names = fit_param_names
        self.metric = metric
        self.method = method
        self.at = at
        self.verbose = verbose
        self.seed = seed
        self.isGAN = False

        # if isinstance(self.dataset, str) and self.dataset in Movielens.urls.keys():
        #     self.reader = Movielens(version=self.dataset, **dataset_kwargs)
        # else:
        #     self.reader = self.dataset(**dataset_kwargs)

        # self.logsdir = os.path.join('experiments', self.recommender_class.RECOMMENDER_NAME + '_' + self.reader.DATASET_NAME)
        self.logsdir = os.path.join('experiments',
                self.recommender_class.RECOMMENDER_NAME + '_' + train_mode + '_' + self.dataset_name)

        if not os.path.exists(self.logsdir):
            os.makedirs(self.logsdir, exist_ok=False)

        # with open(os.path.join(self.logsdir, 'dataset_config.txt'), 'w') as f:
        #     json.dump(self.reader.config, f, indent=4)

        codesdir = os.path.join(self.logsdir, 'code')
        os.makedirs(codesdir, exist_ok=True)
        shutil.copy(os.path.abspath(sys.modules[self.__module__].__file__), codesdir)
        shutil.copy(os.path.abspath(sys.modules[self.recommender_class.__module__].__file__), codesdir)

        # self.URM_train, self.URM_test, self.URM_validation = self.reader.split_urm(split_ratio=[0.6, 0.2, 0.2], save_local=False, verbose=False)
        # self.URM_train = self.reader.get_URM_train()
        # self.URM_test = self.reader.get_URM_test()
        # self.URM_for_train, _, self.URM_validation = self.reader.split_urm(
        #         self.URM_train.tocoo(), split_ratio=[0.75, 0, 0.25], save_local=False, verbose=False)
        # self.URM_train_small, _, self.URM_early_stop = self.reader.split_urm(self.URM_for_train.tocoo(), split_ratio=[0.85, 0, 0.15], save_local=False, verbose=False)

        # del self.URM_for_train

        self.URM_train, self.URM_test, self.URM_validation, self.URM_train_small, self.URM_early_stop = load_URMs(
            dataset, dataset_kwargs)

        self.evaluator_validation = EvaluatorHoldout(self.URM_validation, [self.at], exclude_seen=True)
        self.evaluator_earlystop = EvaluatorHoldout(self.URM_early_stop, [self.at], exclude_seen=True)
        self.evaluatorTest = EvaluatorHoldout(self.URM_test, [self.at, 10, 20, 50], exclude_seen=True, minRatingsPerUser=2)

        self.fit_params = {}

        modules = getattr(self.recommender_class, '__module__', None)
        if modules and modules.split('.')[0] == gans.__name__:
            self.isGAN = True

        # EARLY STOPPING from Maurizio's framework for baselines 对于基准框架的early stopping
        self.early_stopping_parameters = {
            'epochs_min': 0,
            'validation_every_n': 5,
            'stop_on_validation': True,
            'validation_metric': self.metric,
            'lower_validations_allowed': 5,
            'evaluator_object': self.evaluator_earlystop
        }

        # EARYL STOPPING for GAN-based recommenders 对于 基于GAN 的 推荐算法的 early stopping
        self.my_early_stopping = {
            'allow_worse': 5,
            'freq': 5,
            'validation_evaluator': self.evaluator_earlystop,
            'validation_set': None,
            'sample_every': None,
        }

    def build_fit_params(self, params):
        for i, val in enumerate(params):
            param_name = self.dimension_names[i]
            if param_name in self.fit_param_names:
                self.fit_params[param_name] = val
            elif param_name == 'epochs' and self.recommender_class in early_stopping_algos:
                self.fit_params[param_name] = val

    def save_best_params(self, additional_params=None):
        d = dict(self.fit_params)
        if additional_params is not None:
            d.update(additional_params)
        with open(os.path.join(self.logsdir, 'best_params.pkl'), 'wb') as f:
            pickle.dump(d, f, pickle.HIGHEST_PROTOCOL)

    def load_best_params(self):
        with open(os.path.join(self.logsdir, 'best_params.pkl'), 'rb') as f:
            return pickle.load(f)

    def obj_func(self, params):
        """
        Black-box objective function.

        Parameters
        ----------
        params: list
            Ranges of hyperparameters to consider. List of skopt.space.space.Dimension.

        Returns
        -------
        obj_func_value: float
            Value of the objective function as denoted by the experiment metric.
        """

        # print('Optimizing for', self.reader.DATASET_NAME)
        print('Optimizing', self.recommender_class.RECOMMENDER_NAME, 'for', self.dataset_name)

        # Split the parameters into build_params and fit_params
        self.build_fit_params(params)

        # Create the model and fit it.
        try:
            if self.isGAN:
                model = self.recommender_class(self.URM_train_small, mode=train_mode, seed=seed, is_experiment=True)
                model.logsdir = self.logsdir
                fit_early_params = dict(self.fit_params)
                fit_early_params.update(self.my_early_stopping)
                last_epoch = model.fit(**fit_early_params)

                # Save the right number of epochs that produces the current model
                if last_epoch != self.fit_params['epochs']:
                    self.fit_params['epochs'] = last_epoch - \
                                                self.my_early_stopping['allow_worse'] * self.my_early_stopping['freq']

            else:
                model = self.recommender_class(self.URM_train_small)
                if self.recommender_class in early_stopping_algos:
                    fit_early_params = dict(self.fit_params)
                    fit_early_params.update(self.early_stopping_parameters)
                    model.fit(**fit_early_params)
                else:
                    model.fit(**self.fit_params)

            results_dic, results_run_string = self.evaluator_validation.evaluateRecommender(model)
            fitness = -results_dic[self.at][self.metric]
        except tf.errors.ResourceExhaustedError:
            return 0

        try:
            if fitness < self.best_res:
                self.best_res = fitness
                self.save_best_params(additional_params=dict(epochs=model.epochs_best) if self.recommender_class in early_stopping_algos else None)
        except AttributeError:
            self.best_res = fitness
            self.save_best_params(additional_params=model.get_early_stopping_final_epochs_dict() if self.recommender_class in early_stopping_algos else None)

        with open(os.path.join(self.logsdir, 'results.txt'), 'a') as f:
            d = self.fit_params
            if self.recommender_class in early_stopping_algos:
                d.update(model.get_early_stopping_final_epochs_dict()) 
            d_str = json.dumps(d)
            f.write(d_str)
            f.write('\n')
            f.write(results_run_string)
            f.write('\n\n')

        return fitness

    def tune(self, params, evals=10, init_config=None, seed=None):
        """
        Runs the hyperparameter search using Gaussian Process as surrogate model or Random Search,
        saves the results of the trials and print the best found parameters.
        使用 高斯过程 作为 替代模型 进行 超参数 搜索 或 随机搜索
        保存 并 打印 训练 得到的 最佳 参数
        Parameters
        ----------
        params: list
            List of skopt.space.space.Dimensions to be searched.
        参数为 scikit-learn Base class for search space dimensions
        evals: int
            Number of evaluations to perform.

        init_config: list, default None
            An initial parameter configuration for seeding the Gaussian Process

        seed: int, default None
            Seed for random_state of `gp_minimize` or `dummy_minimize`.
            Set to a fixed integer for reproducibility.
        """

        msg = 'Started ' + self.recommender_class.RECOMMENDER_NAME + ' ' + self.dataset_name
        subprocess.run(['telegram-send', msg])

        # URM_test CSR矩阵的shape
        U, I = self.URM_test.shape

        if self.recommender_class == GANMF:
            params.append(Integer(4, int(I * 0.75) if I <= 1024 else 1024, name='emb_dim', dtype=int))
            self.fit_param_names.append('emb_dim')

        if self.recommender_class == CFGAN or self.recommender_class == DeepGANMF:
            params.append(Integer(4, int(I * 0.75) if I <= 1024 else 1024, name='d_nodes', dtype=int))
            params.append(Integer(4, int(I * 0.75) if I <= 1024 else 1024, name='g_nodes', dtype=int))
            self.fit_param_names.append('d_nodes')
            self.fit_param_names.append('g_nodes')

        if self.recommender_class == DisGANMF:
            params.append(Integer(4, int(I * 0.75) if I <= 1024 else 1024, name='d_nodes', dtype=int))
            self.fit_param_names.append('d_nodes')

        self.dimension_names = [p.name for p in params]

        '''
        Need to make sure that the max. value of `num_factors` parameters must be lower than
        the max(U, I)
        '''
        try:
            idx = self.dimension_names.index('num_factors')
            maxval = params[idx].bounds[1]
            if maxval > min(U, I):
                params[idx] = Integer(1, min(U, I), name='num_factors', dtype=int)
        except ValueError:
            pass

        if len(params) > 0:

            # Check if there is already a checkpoint for this experiment 检查点
            checkpoint_path = os.path.join(self.logsdir, 'checkpoint.pkl')
            checkpoint_exists = True if os.path.exists(checkpoint_path) else False
            checkpoint_saver = CheckpointSaver(os.path.join(self.logsdir, 'checkpoint.pkl'), compress=3)

            if seed is None:
                seed = self.seed

            t_start = int(time.time())

            if checkpoint_exists:
                previous_run = skopt.load(checkpoint_path)
                if self.method == 'bayesian':
                    results = gp_minimize(self.obj_func, params, n_calls=evals - len(previous_run.func_vals),
                                          x0=previous_run.x_iters, y0=previous_run.func_vals, n_random_starts=0,
                                          random_state=seed, verbose=True, callback=[checkpoint_saver])
                else:
                    results = dummy_minimize(self.obj_func, params, n_calls=evals - len(previous_run.func_vals),
                                             x0=previous_run.x_iters, y0=previous_run.func_vals, random_state=seed,
                                             verbose=True, callback=[checkpoint_saver])
            else:
                # 超参数优化
                if self.method == 'bayesian':
                    results = gp_minimize(self.obj_func, params, n_calls=evals, random_state=seed, verbose=True,
                                          callback=[checkpoint_saver])
                else:
                    results = dummy_minimize(self.obj_func, params, n_calls=evals, random_state=seed, verbose=True,
                                          callback=[checkpoint_saver])

            t_end = int(time.time())

        # Save best parameters of this experiment
        # best_params = dict(zip(self.dimension_names, results.x))
        # with open(os.path.join(self.logsdir, 'best_params.pkl'), 'wb') as f:
        #     pickle.dump(best_params, f, pickle.HIGHEST_PROTOCOL)

            best_params = self.load_best_params()

            with open(os.path.join(self.logsdir, 'results.txt'), 'a') as f:
                f.write('Experiment ran for {}\n'.format(str(datetime.timedelta(seconds=t_end - t_start))))
                f.write('Best {} score: {}. Best result found at: {}\n'.format(self.metric, results.fun, best_params))

            if self.recommender_class in [IALSRecommender, MatrixFactorization_BPR_Cython]:
                self.dimension_names.append('epochs')
            self.build_fit_params(best_params.values())

        # Retrain with all training data
        set_seed(seed)
        if self.isGAN:
            model = self.recommender_class(self.URM_train, mode=train_mode, is_experiment=True)
            model.logsdir = self.logsdir
            model.fit(**self.fit_params)
            # load_models(model, save_dir='best_model', all_in_folder=True)

        else:
            model = self.recommender_class(self.URM_train)
            model.fit(**self.fit_params)
            # model.loadModel(os.path.join(self.logsdir, 'best_model'))

        _, results_run_string = self.evaluatorTest.evaluateRecommender(model)

        print('\n\nResults on test set:')
        print(results_run_string)
        print('\n\n')

        with open(os.path.join(self.logsdir, 'result_test.txt'), 'w') as f:
            f.write(results_run_string)

        msg = 'Finished ' + self.recommender_class.RECOMMENDER_NAME + ' ' + self.dataset_name
        subprocess.run(['telegram-send', msg])
Beispiel #17
0
    recommender4 = UserKNNCFRecommender.UserKNNCFRecommender(URM_train)
    recommender4.fit(
        **{
            "topK": 305,
            "shrink": 0,
            "similarity": "cosine",
            "normalize": True,
            "feature_weighting": "TF-IDF"
        })

    # MAP 0.049 (topK=100, l2_norm = 1e3, normalize_matrix = False, verbose = True)
    # recommender = EASE_R_Recommender.EASE_R_Recommender(URM_train)
    # recommender.fit(topK=None, l2_norm = 3 * 1e3, normalize_matrix = False, verbose = True)

    # MAP 0.053
    #recommender = ItemKNNSimilarityHybridRecommender.ItemKNNSimilarityHybridRecommender(URM_train, w1, w2)
    #recommender.fit(topK=300, alpha = 0.7)

    recommendert = ItemKNNScoresHybridRecommender.ItemKNNScoresHybridRecommender(
        URM_train, recommender3, recommender4)
    recommendert.fit(alpha=0.6)

    recommender = ItemKNNScoresHybridRecommender.ItemKNNScoresHybridRecommender(
        URM_train, recommender1, recommendert)
    recommender.fit(alpha=0.6)

    print(evaluator_validation.evaluateRecommender(recommender))

    item_list = recommender.recommend(target_ids, cutoff=10)
    CreateCSV.create_csv(target_ids, item_list, 'MyRec')
        return scores


# recommenderSLIM = SLIM_BPR_Recommender(URM_all)
# recommenderSLIM.fit(learning_rate=0.001, epochs=5)

URM_train = sps.load_npz('myFiles/train_set.npz')
URM_test = sps.load_npz('myFiles/test_set.npz')

URM_train = URM_train.tocsr()
URM_test = URM_test.tocsr()

recommenderCYTHON = SLIM_BPR_Cython(URM_train, recompile_cython=False)

recommenderCYTHON.fit(epochs=3000, batch_size=100, sgd_mode='sdg', learning_rate=1e-6, topK=10)


evaluator_validation = EvaluatorHoldout(URM_test, cutoff_list=[5])


result, result_string = evaluator_validation.evaluateRecommender(recommenderCYTHON)

print(result_string)
'''
filename = 'out' + '19'
# REMEMBER TO CHANGE THE FILE NAME!!!
print_to_csv_age_and_region(recommenderCYTHON, recommenderTP, filename)
'''

def compute_group_MAP(args, group_id):
    block_size = args["block_size"]
    profile_length = args["profile_length"]
    sorted_users = args["sorted_users"]
    cutoff = args["cutoff"]
    URM_test = args["URM_test"]
    hyb = args["hyb"]
    hyb2 = args["hyb2"]
    hyb3 = args["hyb3"]
    hyb5 = args["hyb5"]
    hyb6 = args["hyb6"]
    hyb7 = args["hyb7"]

    MAP_hyb_per_group = []
    MAP_hyb2_per_group = []
    MAP_hyb3_per_group = []
    MAP_hyb5_per_group = []
    MAP_hyb6_per_group = []
    MAP_hyb7_per_group = []

    start_pos = group_id * block_size
    end_pos = min((group_id + 1) * block_size, len(profile_length))

    users_in_group = sorted_users[start_pos:end_pos]

    users_in_group_p_len = profile_length[users_in_group]

    print("Group {}, average p.len {:.2f}, min {}, max {}".format(group_id,
                                                                  users_in_group_p_len.mean(),
                                                                  users_in_group_p_len.min(),
                                                                  users_in_group_p_len.max()))

    users_not_in_group_flag = np.isin(sorted_users, users_in_group, invert=True)
    users_not_in_group = sorted_users[users_not_in_group_flag]

    evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[cutoff], ignore_users=users_not_in_group)

    results, _ = evaluator_test.evaluateRecommender(hyb)
    MAP_hyb_per_group.append(results[cutoff]["MAP"])

    results, _ = evaluator_test.evaluateRecommender(hyb2)
    MAP_hyb2_per_group.append(results[cutoff]["MAP"])

    results, _ = evaluator_test.evaluateRecommender(hyb3)
    MAP_hyb3_per_group.append(results[cutoff]["MAP"])

    results, _ = evaluator_test.evaluateRecommender(hyb5)
    MAP_hyb5_per_group.append(results[cutoff]["MAP"])

    results, _ = evaluator_test.evaluateRecommender(hyb6)
    MAP_hyb6_per_group.append(results[cutoff]["MAP"])

    if hyb7 is not None:
        results, _ = evaluator_test.evaluateRecommender(hyb7)
        MAP_hyb7_per_group.append(results[cutoff]["MAP"])

    if hyb7 is not None:
        return [MAP_hyb_per_group, MAP_hyb2_per_group, MAP_hyb3_per_group, MAP_hyb5_per_group, MAP_hyb6_per_group,
                MAP_hyb7_per_group]
    else:
        return [MAP_hyb_per_group, MAP_hyb2_per_group, MAP_hyb3_per_group, MAP_hyb5_per_group, MAP_hyb6_per_group]
Beispiel #20
0
    users_in_group_p_len = profile_length[users_in_group]

    print("Group {}, average p.len {:.2f}, min {}, max {}".format(
        group_id, users_in_group_p_len.mean(), users_in_group_p_len.min(),
        users_in_group_p_len.max()))

    users_not_in_group_flag = np.isin(sorted_users,
                                      users_in_group,
                                      invert=True)
    users_not_in_group = sorted_users[users_not_in_group_flag]

    evaluator_test = EvaluatorHoldout(URM_test,
                                      cutoff_list=[cutoff],
                                      ignore_users=users_not_in_group)

    results, _ = evaluator_test.evaluateRecommender(slim_model)
    MAP_slim_per_group.append(results[cutoff]["MAP"])

    results, _ = evaluator_test.evaluateRecommender(rp3_model)
    MAP_rp3_per_group.append(results[cutoff]["MAP"])

    results, _ = evaluator_test.evaluateRecommender(userCF_model)
    MAP_userCF_per_group.append(results[cutoff]["MAP"])

slim_model.save_model("./result_experiments/results_ensemble/", "slim_1")
rp3_model.save_model("./result_experiments/results_ensemble/", "rp3_1")
userCF_model.save_model("./result_experiments/results_ensemble/", "userCF_1")

import matplotlib.pyplot as pyplot

pyplot.plot(MAP_slim_per_group, label="slim")
Beispiel #21
0
def crossval(URM_all, ICM_all, target_ids, k):

    seed = 1234 + k  #+ int(time.time())
    np.random.seed(seed)
    tp = 0.75
    URM_train, URM_test = train_test_holdout(URM_all, train_perc=tp)
    ICM_train, ICM_test = train_test_holdout(ICM_all, train_perc=0.95)
    evaluator_validation = EvaluatorHoldout(URM_test,
                                            cutoff_list=[10],
                                            exclude_seen=True)
    args = {}

    p3alpha = P3alphaRecommender.P3alphaRecommender(URM_train)
    try:
        args = {
            "topK": 991,
            "alpha": 0.4705816992313091,
            "normalize_similarity": False
        }
        p3alpha.load_model(
            'SavedModels\\', p3alpha.RECOMMENDER_NAME + toFileName(args) +
            ",s=" + str(seed) + ",tp=" + str(tp) + ".zip")
    except:
        print("Saved model not found. Fitting a new one...")
        p3alpha.fit(**args)
        p3alpha.save_model(
            'SavedModels\\', p3alpha.RECOMMENDER_NAME + toFileName(args) +
            ",s=" + str(seed) + ",tp=" + str(tp))

    rp3beta = RP3betaRecommender.RP3betaRecommender(URM_train)
    try:
        args = {
            "topK": 991,
            "alpha": 0.4705816992313091,
            "beta": 0.37,
            "normalize_similarity": False
        }
        rp3beta.load_model(
            'SavedModels\\', rp3beta.RECOMMENDER_NAME + toFileName(args) +
            ",s=" + str(seed) + ",tp=" + str(tp) + ".zip")
    except:
        print("Saved model not found. Fitting a new one...")
        rp3beta.fit(**args)
        rp3beta.save_model(
            'SavedModels\\', rp3beta.RECOMMENDER_NAME + toFileName(args) +
            ",s=" + str(seed) + ",tp=" + str(tp))

    itemKNNCF = ItemKNNCFRecommender.ItemKNNCFRecommender(URM_train)
    try:
        args = {
            "topK": 1000,
            "shrink": 732,
            "similarity": "cosine",
            "normalize": True,
            "feature_weighting": "TF-IDF"
        }
        itemKNNCF.load_model(
            'SavedModels\\', itemKNNCF.RECOMMENDER_NAME + toFileName(args) +
            ",s=" + str(seed) + ",tp=" + str(tp) + ".zip")
    except:
        print("Saved model not found. Fitting a new one...")
        itemKNNCF.fit(**args)
        itemKNNCF.save_model(
            'SavedModels\\', itemKNNCF.RECOMMENDER_NAME + toFileName(args) +
            ",s=" + str(seed) + ",tp=" + str(tp))

    userKNNCF = UserKNNCFRecommender.UserKNNCFRecommender(URM_train)
    try:
        args = {
            "topK": 131,
            "shrink": 2,
            "similarity": "cosine",
            "normalize": True
        }
        userKNNCF.load_model(
            'SavedModels\\', userKNNCF.RECOMMENDER_NAME + toFileName(args) +
            ",s=" + str(seed) + ",tp=" + str(tp) + ".zip")
    except:
        print("Saved model not found. Fitting a new one...")
        userKNNCF.fit(**args)
        userKNNCF.save_model(
            'SavedModels\\', userKNNCF.RECOMMENDER_NAME + toFileName(args) +
            ",s=" + str(seed) + ",tp=" + str(tp))

    itemKNNCBF = ItemKNNCBFRecommender.ItemKNNCBFRecommender(
        URM_train, ICM_all)
    try:
        args = {
            "topK": 700,
            "shrink": 100,
            "similarity": 'jaccard',
            "normalize": True,
            "feature_weighting": "TF-IDF"
        }
        itemKNNCBF.load_model(
            'SavedModels\\', itemKNNCBF.RECOMMENDER_NAME + toFileName(args) +
            ",s=" + str(seed) + ",tp=" + str(tp) + ".zip")
    except:
        print("Saved model not found. Fitting a new one...")
        itemKNNCBF.fit(**args)
        itemKNNCBF.save_model(
            'SavedModels\\', itemKNNCBF.RECOMMENDER_NAME + toFileName(args) +
            ",s=" + str(seed) + ",tp=" + str(tp))

    #cfw = CFW_D_Similarity_Linalg.CFW_D_Similarity_Linalg(URM_train, ICM_train, itemKNNCF.W_sparse)
    #cfw.fit(show_max_performance=False, logFile=None, loss_tolerance=1e-6,
    #        iteration_limit=500000, damp_coeff=0.5, topK=900, add_zeros_quota=0.5, normalize_similarity=True)

    # Need to change bpr code to avoid memory error, useless since it's bad
    # bpr = SLIM_BPR_Cython(URM_train, recompile_cython=False)
    # bpr.fit(**{"topK": 1000, "epochs": 130, "symmetric": False, "sgd_mode": "adagrad", "lambda_i": 1e-05,
    #          "lambda_j": 0.01, "learning_rate": 0.0001})

    pureSVD = PureSVDRecommender.PureSVDRecommender(URM_train)
    pureSVD.fit(num_factors=1000)

    hyb = ItemKNNScoresHybridRecommender.ItemKNNScoresHybridRecommender(
        URM_train, p3alpha, itemKNNCBF)
    hyb.fit(alpha=0.5)

    # Kaggle MAP 0.084 rp3beta, itemKNNCBF
    hyb2 = ItemKNNScoresHybridRecommender.ItemKNNScoresHybridRecommender(
        URM_train, pureSVD, itemKNNCBF)
    hyb2.fit(alpha=0.5)

    # Kaggle MAP 0.08667
    hyb3 = ItemKNNScoresHybridRecommender.ItemKNNScoresHybridRecommender(
        URM_train, hyb, hyb2)
    hyb3.fit(alpha=0.5)

    #hyb3 = ItemKNNScoresHybridRecommender.ItemKNNScoresHybridRecommender(URM_train, p3alpha, userKNNCF)
    #hyb3.fit(alpha=0.5)

    hyb5 = ScoresHybridP3alphaKNNCBF.ScoresHybridP3alphaKNNCBF(
        URM_train, ICM_all)
    # Kaggle MAP 0.08856
    try:
        # Full values: "alpha_P": 0.4108657561671193, "alpha": 0.6290871066510789
        args = {
            "topK_P": 903,
            "alpha_P": 0.41086575,
            "normalize_similarity_P": False,
            "topK": 448,
            "shrink": 20,
            "similarity": "tversky",
            "normalize": True,
            "alpha": 0.6290871,
            "feature_weighting": "TF-IDF"
        }
        hyb5.load_model(
            'SavedModels\\', hyb5.RECOMMENDER_NAME + toFileName(args) + ",s=" +
            str(seed) + ",tp=" + str(tp) + ".zip")
    except:
        print("Saved model not found. Fitting a new one...")
        hyb5.fit(**args)
        hyb5.save_model(
            'SavedModels\\', hyb5.RECOMMENDER_NAME + toFileName(args) + ",s=" +
            str(seed) + ",tp=" + str(tp))

    # hyb5.fit(**{"topK_P": 1000, "alpha_P": 0.5432601071314623, "normalize_similarity_P": True, "topK": 620, "shrink": 0,
    #             "similarity": "tversky", "normalize": False, "alpha": 0.5707347522847057, "feature_weighting": "BM25"})

    # Kaggle MAP 0.086 :(
    #hyb6 = ScoresHybrid3Recommender.ScoresHybrid3Recommender(URM_train, rp3beta, itemKNNCBF, p3alpha)
    #hyb6.fit()

    hyb6 = ScoresHybridRP3betaKNNCBF.ScoresHybridRP3betaKNNCBF(
        URM_train, ICM_all)
    try:
        # Full values: "alpha_P": 0.5081918012150626, "alpha": 0.44740093610861603
        args = {
            "topK_P": 623,
            "alpha_P": 0.5081918,
            "normalize_similarity_P": False,
            "topK": 1000,
            "shrink": 1000,
            "similarity": "tversky",
            "normalize": True,
            "alpha": 0.4474009,
            "beta_P": 0.0,
            "feature_weighting": "TF-IDF"
        }
        hyb6.load_model(
            'SavedModels\\', hyb6.RECOMMENDER_NAME + toFileName(args) + ",s=" +
            str(seed) + ",tp=" + str(tp) + ".zip")
    except:
        print("Saved model not found. Fitting a new one...")
        hyb6.fit(**args)
        hyb6.save_model(
            'SavedModels\\', hyb6.RECOMMENDER_NAME + toFileName(args) + ",s=" +
            str(seed) + ",tp=" + str(tp))

    v0 = evaluator_validation.evaluateRecommender(hyb)[0][10]["MAP"]
    v1 = evaluator_validation.evaluateRecommender(hyb2)[0][10]["MAP"]
    v2 = evaluator_validation.evaluateRecommender(hyb3)[0][10]["MAP"]
    v3 = evaluator_validation.evaluateRecommender(hyb5)[0][10]["MAP"]
    v4 = evaluator_validation.evaluateRecommender(hyb6)[0][10]["MAP"]

    #item_list = hyb3.recommend(target_ids, cutoff=10)
    #CreateCSV.create_csv(target_ids, item_list, 'ItemKNNCBF__RP3beta')

    return [v0, v1, v2, v3, v4]
Beispiel #22
0
    evaluator_validation, ICM_target=ICM, model_to_use="last")

# We compute the similarity matrix resulting from a RP3beta recommender
# Note that we have not included the code for parameter tuning, which should be done

cf_parameters = {
    'topK': 500,
    'alpha': 0.9,
    'beta': 0.7,
    'normalize_similarity': True
}

recommender_collaborative = RP3betaRecommender(URM_train)
recommender_collaborative.fit(**cf_parameters)

result_dict, result_string = evaluator_test.evaluateRecommender(
    recommender_collaborative)
print("CF recommendation quality is: {}".format(result_string))

# We get the similarity matrix
# The similarity is a scipy.sparse matrix of shape |items|x|items|
similarity_collaborative = recommender_collaborative.W_sparse.copy()

# We instance and fit the feature weighting algorithm, it takes as input:
# - The train URM
# - The ICM
# - The collaborative similarity matrix
# Note that we have not included the code for parameter tuning, which should be done as those are just default parameters

fw_parameters = {
    'epochs': 200,
    'learning_rate': 0.0001,
Beispiel #23
0
    # idx_topk = idx_topk_part[np.arange(batch_users)[:, np.newaxis], idx_part]

    recommended_items = np.argsort(-pred_val, axis=1).ravel()[:k]

    is_relevant = np.in1d(recommended_items, pos_items_array, assume_unique=True)

    # his_recall = Recall_at_k_batch(pred_val, pos_items_sparse, k=20)[0]
    # my_recall = recall(is_relevant, pos_items_array)

    his_ndcg = NDCG_binary_at_k_batch(pred_val, pos_items_sparse, k=100)[0]
    my_ndcg = ndcg(recommended_items, pos_items_array)

    if not np.allclose(my_ndcg, his_ndcg, atol=0.0001):
        pass

n100_list = np.concatenate(n100_list)
r20_list = np.concatenate(r20_list)
r50_list = np.concatenate(r50_list)

print("Test NDCG@100=%.5f (%.5f)" % (np.mean(n100_list), np.std(n100_list) / np.sqrt(len(n100_list))))
print("Test Recall@20=%.5f (%.5f)" % (np.mean(r20_list), np.std(r20_list) / np.sqrt(len(r20_list))))
print("Test Recall@50=%.5f (%.5f)" % (np.mean(r50_list), np.std(r50_list) / np.sqrt(len(r50_list))))

from Base.Evaluation.Evaluator import EvaluatorHoldout

evaluator = EvaluatorHoldout(test_data_te, cutoff_list=[20, 50, 100])

results_dict, results_run_string = evaluator.evaluateRecommender(recommender)

print(results_run_string)
def crossval(URM_all, ICM_all, target_ids, k):

    seed = 1234 + k  #+ int(time.time())
    np.random.seed()
    URM_train, URM_test = train_test_holdout(URM_all, train_perc=0.90)
    ICM_train, ICM_test = train_test_holdout(ICM_all, train_perc=0.95)
    evaluator_validation = EvaluatorHoldout(URM_test,
                                            cutoff_list=[10],
                                            exclude_seen=True)
    args = {}

    p3alpha = P3alphaRecommender.P3alphaRecommender(URM_train)
    args = {
        "topK": 991,
        "alpha": 0.4705816992313091,
        "normalize_similarity": False
    }
    p3alpha.fit(**args)

    #p3alpha2 = P3alphaRecommender.P3alphaRecommender(URM_train)
    #args = {"topK": 400, "alpha": 0.5305816992313091, "normalize_similarity": False}
    #p3alpha2.fit(**args)

    #rp3beta = RP3betaRecommender.RP3betaRecommender(URM_train)
    #args = {"topK": 991, "alpha": 0.4705816992313091, "beta": 0.15, "normalize_similarity": False}
    #rp3beta.fit(**args)

    itemKNNCF = ItemKNNCFRecommender.ItemKNNCFRecommender(URM_train)
    args = {
        "topK": 1000,
        "shrink": 732,
        "similarity": "cosine",
        "normalize": True,
        "feature_weighting": "TF-IDF"
    }
    itemKNNCF.fit(**args)

    userKNNCF = UserKNNCFRecommender.UserKNNCFRecommender(URM_train)
    args = {
        "topK": 131,
        "shrink": 2,
        "similarity": "cosine",
        "normalize": True
    }
    userKNNCF.fit(**args)

    itemKNNCBF = ItemKNNCBFRecommender.ItemKNNCBFRecommender(
        URM_train, ICM_all)
    args = {
        "topK": 700,
        "shrink": 100,
        "similarity": 'jaccard',
        "normalize": True,
        "feature_weighting": "TF-IDF"
    }
    itemKNNCBF.fit(**args)

    itemKNNCBF2 = ItemKNNCBFRecommender.ItemKNNCBFRecommender(
        URM_train, ICM_all)
    args = {
        "topK": 200,
        "shrink": 15,
        "similarity": 'jaccard',
        "normalize": True,
        "feature_weighting": "TF-IDF"
    }
    itemKNNCBF2.fit(**args)

    #cfw = CFW_D_Similarity_Linalg.CFW_D_Similarity_Linalg(URM_train, ICM_train, itemKNNCF.W_sparse)
    #cfw.fit(show_max_performance=False, logFile=None, loss_tolerance=1e-6,
    #        iteration_limit=500000, damp_coeff=0.5, topK=900, add_zeros_quota=0.5, normalize_similarity=True)

    # Need to change bpr code to avoid memory error, useless since it's bad
    #bpr = SLIM_BPR_Cython(URM_train, recompile_cython=False)
    #bpr.fit(**{"topK": 1000, "epochs": 130, "symmetric": False, "sgd_mode": "adagrad", "lambda_i": 1e-05,
    #          "lambda_j": 0.01, "learning_rate": 0.0001})

    pureSVD = PureSVDRecommender.PureSVDRecommender(URM_train)
    pureSVD.fit(num_factors=340)

    #hyb = ItemKNNScoresHybridRecommender.ItemKNNScoresHybridRecommender(URM_train, p3alpha, itemKNNCBF)
    #hyb.fit(alpha=0.5)
    hyb = ItemKNNScoresHybridRecommender.ItemKNNScoresHybridRecommender(
        URM_train, itemKNNCBF, pureSVD)
    hyb.fit(alpha=0.5)

    # Kaggle MAP 0.084 rp3beta, itemKNNCBF
    #hyb2 = ItemKNNScoresHybridRecommender.ItemKNNScoresHybridRecommender(URM_train, p3alpha, itemKNNCBF)
    #hyb2.fit(alpha=0.5)
    hyb2 = ItemKNNSimilarityHybridRecommender.ItemKNNSimilarityHybridRecommender(
        URM_train, itemKNNCBF.W_sparse, itemKNNCF.W_sparse)
    hyb2.fit(topK=1600)

    # Kaggle MAP 0.08667
    hyb3 = ItemKNNScoresHybridRecommender.ItemKNNScoresHybridRecommender(
        URM_train, hyb, hyb2)
    hyb3.fit(alpha=0.5)
    #hyb3 = RankingHybrid.RankingHybrid(URM_train, hyb, hyb2)

    #hyb3 = ItemKNNScoresHybridRecommender.ItemKNNScoresHybridRecommender(URM_train, p3alpha, userKNNCF)
    #hyb3.fit(alpha=0.5)

    hyb5 = ScoresHybridP3alphaKNNCBF.ScoresHybridP3alphaKNNCBF(
        URM_train, ICM_all)
    # Kaggle MAP 0.08856
    args = {
        "topK_P": 903,
        "alpha_P": 0.4108657561671193,
        "normalize_similarity_P": False,
        "topK": 448,
        "shrink": 20,
        "similarity": "tversky",
        "normalize": True,
        "alpha": 0.6290871066510789,
        "feature_weighting": "TF-IDF"
    }
    hyb5.fit(**args)

    # hyb5.fit(**{"topK_P": 1000, "alpha_P": 0.5432601071314623, "normalize_similarity_P": True, "topK": 620, "shrink": 0,
    #             "similarity": "tversky", "normalize": False, "alpha": 0.5707347522847057, "feature_weighting": "BM25"})

    # Kaggle MAP 0.086 :(
    #hyb6 = ItemKNNScoresHybridRecommender.ItemKNNScoresHybridRecommender(URM_train, hyb3, hyb5)
    #hyb6.fit()
    hyb6 = ScoresHybridP3alphaKNNCBF.ScoresHybridP3alphaKNNCBF(
        URM_train, ICM_all)
    args = {
        "topK_P": 756,
        "alpha_P": 0.5292654015790155,
        "normalize_similarity_P": False,
        "topK": 1000,
        "shrink": 47,
        "similarity": "tversky",
        "normalize": False,
        "alpha": 0.5207647439152092,
        "feature_weighting": "none"
    }
    hyb6.fit(**args)
    '''hyb6 = ScoresHybridRP3betaKNNCBF.ScoresHybridRP3betaKNNCBF(URM_train, ICM_all)
    args = {"topK_P": 623, "alpha_P": 0.5081918012150626, "normalize_similarity_P": False, "topK": 1000,
            "shrink": 1000, "similarity": "tversky", "normalize": True, "alpha": 0.44740093610861603, "beta_P": 0.0,
            "feature_weighting": "TF-IDF"}
    hyb6.fit(**args)'''

    hyb7 = RankingHybrid.RankingHybrid(URM_train, hyb6, hyb3)

    v0 = evaluator_validation.evaluateRecommender(hyb)[0][10]["MAP"]
    v1 = evaluator_validation.evaluateRecommender(hyb2)[0][10]["MAP"]
    v2 = evaluator_validation.evaluateRecommender(hyb3)[0][10]["MAP"]
    #v2 = 0
    v3 = evaluator_validation.evaluateRecommender(hyb5)[0][10]["MAP"]
    v4 = evaluator_validation.evaluateRecommender(hyb6)[0][10]["MAP"]
    #v4 = 0
    v5 = evaluator_validation.evaluateRecommender(hyb7)[0][10]["MAP"]

    #item_list = hyb6.recommend(target_ids, cutoff=10)
    #CreateCSV.create_csv(target_ids, item_list, 'HybPureSVD')

    return [v0, v1, v2, v3, v4, v5]
Beispiel #25
0
 def single_test(i):
     evaluator_test = EvaluatorHoldout(n_urm_test[i], cutoff_list=[10])
     #n_recommender[i].fit(alpha=alpha, beta=beta, gamma=gamma, phi=phi, psi=psi, li=li)
     n_recommender[i].fit(alpha=alpha, beta=beta, topK=int(topK))
     result, str_result = evaluator_test.evaluateRecommender(n_recommender[i])
     return result[10]['MAP']
Beispiel #26
0
if __name__ == '__main__':
    seed = 1205

    parser = DataParser()
    URM_all = parser.get_URM_all()
    ICM_all = parser.get_ICM_all()

    URM_train, URM_test = split_train_in_two_percentage_global_sample(
        URM_all, train_percentage=0.85, seed=seed)

    evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[10])
    rec1 = ItemKNNCBFRecommender(URM_train, ICM_all)
    rec2 = SLIMElasticNetRecommender(URM_train)

    # 'topK': 40, 'shrink': 1000, 'similarity': 'cosine', 'normalize': True, 'feature_weighting': 'BM25'
    rec1.fit(topK=40,
             shrink=1000,
             similarity='cosine',
             feature_weighting='BM25')

    # topK': 140, 'l1_ratio': 1e-05, 'alpha': 0.386
    rec2.fit(topK=140, l1_ratio=1e-5, alpha=0.386)
    print("recomenders are ready")
    merged_recommender = MergedHybrid000(URM_train,
                                         content_recommender=rec1,
                                         collaborative_recommender=rec2)
    for alpha in np.arange(0, 1, 0.1):
        merged_recommender.fit(alpha)
        result, _ = evaluator_test.evaluateRecommender(merged_recommender)
        print(alpha, result[10]['MAP'])
Beispiel #27
0
    #cf.fit(**{"topK": 259, "shrink": 24, "similarity": "cosine", "normalize": True})
    #W_sparse_CF = cf.W_sparse
    #hyb7 = CFW_D_Similarity_Linalg.CFW_D_Similarity_Linalg(URM_train, ICM_all, W_sparse_CF)
    #hyb7.fit(**{"topK": 575, "add_zeros_quota": 0.6070346405411541, "normalize_similarity": False})

    hyb7 = ScoresHybridSpecializedV2Mid.ScoresHybridSpecializedV2Mid(
        URM_ICM_train, URM_ICM_train.T)
    hyb7.fit(
        **{
            "topK_P": 516,
            "alpha_P": 0.4753488773601332,
            "normalize_similarity_P": False,
            "topK": 258,
            "shrink": 136,
            "similarity": "asymmetric",
            "normalize": False,
            "alpha": 0.48907705969537585,
            "feature_weighting": "BM25"
        })

    print(evaluator_validation.evaluateRecommender(svd))
    print(evaluator_validation.evaluateRecommender(itemKNNCBF))
    print(evaluator_validation.evaluateRecommender(itemKNNCBF2))
    print(evaluator_validation.evaluateRecommender(itemKNNCBF3))
    print(evaluator_validation.evaluateRecommender(hyb7))
    print(evaluator_validation.evaluateRecommender(hyb5))
    print(evaluator_validation.evaluateRecommender(hyb6))

    #item_list = recommender.recommend(target_ids, cutoff=10)
    #CreateCSV.create_csv(target_ids, item_list, 'MyRec')
Beispiel #28
0
# recommenderBetaGRAPH = RP3betaRecommender(URM_train)
# recommenderBetaGRAPH.fit(topK=54, implicit=True, normalize_similarity=True, alpha=1e-6, beta=0.2, min_rating=0)

# recommenderSLIMELASTIC = SLIMElasticNetRecommender(URM_all)
# recommenderSLIMELASTIC.fit(topK=10, alpha=1e-4)
# recommenderSLIMELASTIC.save_model('model/', file_name='SLIM_ElasticNet')

# recommenderCYTHON = SLIM_BPR_Cython(URM_train, recompile_cython=False)
# recommenderCYTHON.fit(epochs=350, batch_size=200, sgd_mode='adagrad', learning_rate=0.001, topK=10)

# URM_validation = sps.load_npz('URM/URM_validation.npz')

evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[10])

result, result_string = evaluator_test.evaluateRecommender(recommenderCB)
print(result_string)

# evaluate_algorithm(URM_test, recommenderGRAPH, recommenderTP, at=10)
'''
x_tick = [x for x in range(15, 28, 3)]
MAP_per_k = []
trains = []
tests = []

for i in range(4):
    URM_train, URM_test = splitURM(URM_all)
    trains.append(URM_train)
    tests.append(URM_test)

i = 0
Beispiel #29
0
URM_train, URM_test = train_test_holdout(URM_all, train_perc=0.8)
itemCF_recommender = ItemKNNCFRecommender(URM_train)
itemCF_recommender.fit(**itemCFParam)
slim_recommender = SLIM_BPR_Cython(URM_train, recompile_cython=False)
slim_recommender.fit(**slimParam)
p3_recommender = P3alphaRecommender(URM_train)
p3_recommender.fit(**p3Param)

recommender1 = SimilarityHybridRecommender(URM_train,
                                           itemCF_recommender.W_sparse,
                                           slim_recommender.W_sparse,
                                           p3_recommender.W_sparse)
recommender1.fit(topK=100, alpha1=alpha1, alpha2=alpha2, alpha3=alpha3)

evaluator_validation = EvaluatorHoldout(URM_test, cutoff_list=[10])
eval_res = evaluator_validation.evaluateRecommender(recommender1)
MAP = eval_res[0][10]['MAP']
print("The MAP in one test is: ", MAP)

itemCF_recommender = ItemKNNCFRecommender(URM_all)
itemCF_recommender.fit(**itemCFParam)
slim_recommender = SLIM_BPR_Cython(URM_all, recompile_cython=False)
slim_recommender.fit(**slimParam)
p3_recommender = P3alphaRecommender(URM_all)
p3_recommender.fit(**p3Param)
recommender1 = SimilarityHybridRecommender(URM_all,
                                           itemCF_recommender.W_sparse,
                                           slim_recommender.W_sparse,
                                           p3_recommender.W_sparse)
recommender1.fit(topK=100, alpha1=alpha1, alpha2=alpha2, alpha3=alpha3)
recommender1.save_model("model/", "hybrid_item_slim_basic")
        # recommender = HybridGenRecommender(urm_train, eurm=True)
        # recommender.fit()

        # recommender = ItemKNNCBFRecommender(urm_train, icm_all)
        # recommender.fit(shrink=40, topK=20, feature_weighting='BM25')

        recommender = HybridNorm3Recommender(urm_train)
        recommender.fit()

    normal_recommender = recommender

    if test:

        if temperature == 'cold':
            result, str_result = evaluator_test.evaluateRecommender(
                cold_recommender)
            print('The Map is : {}'.format(result[10]['MAP']))

        if temperature == 'zero':
            result, str_result = evaluator_test.evaluateRecommender(
                zero_recommender)
            print('The Map of test is : {}'.format(result[10]['MAP']))
            # if valid:
            #     result, str_result = evaluator_valid.evaluateRecommender(zero_recommender)
            #     print('The Map of valid is : {}'.format(result[10]['MAP']))
        if temperature == 'warm':
            result, str_result = evaluator_test.evaluateRecommender(
                warm_recommender)
            print('The Map is : {}'.format(result[10]['MAP']))
            # if valid:
            #     result, str_result = evaluator_valid.evaluateRecommender(warm_recommender)