Python split_train_in_two_percentage_global_sample Examples

Programming Language: Python

Namespace/Package Name: Data_manager.split_functions.split_train_validation_random_holdout

Method/Function: split_train_in_two_percentage_global_sample

Examples at hotexamples.com: 15

Python split_train_in_two_percentage_global_sample - 15 examples found. These are the top rated real world Python examples of Data_manager.split_functions.split_train_validation_random_holdout.split_train_in_two_percentage_global_sample extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: run_parameter_search.py Project: matteogambella/Recommender

def read_data_split_and_search():
    """
    This function provides a simple example on how to tune parameters of a given algorithm

    The BayesianSearch object will save:
        - A .txt file with all the cases explored and the recommendation quality
        - A _best_model file which contains the trained model and can be loaded with recommender.load_model()
        - A _best_parameter file which contains a dictionary with all the fit parameters, it can be passed to recommender.fit(**_best_parameter)
        - A _best_result_validation file which contains a dictionary with the results of the best solution on the validation
        - A _best_result_test file which contains a dictionary with the results, on the test set, of the best solution chosen using the validation set
    """

    dataReader = Movielens10MReader()
    dataset = dataReader.load_data()

    URM_train, URM_test = split_train_in_two_percentage_global_sample(
        dataset.get_URM_all(), train_percentage=0.80)
    URM_train, URM_validation = split_train_in_two_percentage_global_sample(
        URM_train, train_percentage=0.80)

    output_folder_path = "result_experiments/"

    # If directory does not exist, create
    if not os.path.exists(output_folder_path):
        os.makedirs(output_folder_path)

    collaborative_algorithm_list = [
        Random, TopPop, P3alphaRecommender, RP3betaRecommender,
        ItemKNNCFRecommender, UserKNNCFRecommender,
        MatrixFactorization_BPR_Cython, MatrixFactorization_FunkSVD_Cython,
        PureSVDRecommender, SLIM_BPR_Cython, SLIMElasticNetRecommender
    ]

    from Base.Evaluation.Evaluator import EvaluatorHoldout

    evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[5])
    evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[5, 10])

    runParameterSearch_Collaborative_partial = partial(
        runParameterSearch_Collaborative,
        URM_train=URM_train,
        metric_to_optimize="MAP",
        n_cases=10,
        evaluator_validation_earlystopping=evaluator_validation,
        evaluator_validation=evaluator_validation,
        evaluator_test=evaluator_test,
        output_folder_path=output_folder_path,
        similarity_type_list=["cosine"],
        parallelizeKNN=False)

    pool = multiprocessing.Pool(processes=int(multiprocessing.cpu_count()),
                                maxtasksperchild=1)
    pool.map(runParameterSearch_Collaborative_partial,
             collaborative_algorithm_list)

Example #2

Show file

File: cython_example_FunkSVD.py Project: exotol/RecSys_Course_AT_PoliMi

"""
Created on 09/11/2020

@author: Maurizio Ferrari Dacrema
"""

from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample
from Data_manager.Movielens.Movielens10MReader import Movielens10MReader

data_reader = Movielens10MReader()
data_loaded = data_reader.load_data()

URM_all = data_loaded.get_URM_all()

URM_train, URM_test = split_train_in_two_percentage_global_sample(
    URM_all, train_percentage=0.8)

from Cython_examples.FunkSVD_fastest import train_multiple_epochs

train_multiple_epochs(URM_train, 1e-3, 10)

Example #3

Show file

def read_data_split_and_search():
    """
    This function provides a simple example on how to tune parameters of a given algorithm

    The BayesianSearch object will save:
        - A .txt file with all the cases explored and the recommendation quality
        - A _best_model file which contains the trained model and can be loaded with recommender.load_model()
        - A _best_parameter file which contains a dictionary with all the fit parameters, it can be passed to recommender.fit(**_best_parameter)
        - A _best_result_validation file which contains a dictionary with the results of the best solution on the validation
        - A _best_result_test file which contains a dictionary with the results, on the test set, of the best solution chosen using the validation set
    """

    parser = DataParser()

    URM_all = parser.get_URM_all()
    ICM_obj = parser.get_ICM_all()
    URM_train, URM_test = split_train_in_two_percentage_global_sample(
        URM_all, train_percentage=0.80)
    URM_train, URM_validation = split_train_in_two_percentage_global_sample(
        URM_train, train_percentage=0.85)
    """
    26-10-2020
    > OPTIMIZATION ON THE RANGE [200, +INF)
    
    Already done optimizations:
    >
    
    RECOMMENDER I'AM CONSIDERING (the fastest up to now)
    > PureSVD
    > ItemKNNCBF
    > ItemKNNCF
    > UserKNNCF
    > P3A
    > RP3beta
    """
    f_range = (200, -1)

    URM_validation = parser.filter_URM_test_by_range(URM_train, URM_validation,
                                                     f_range)
    URM_test = parser.filter_URM_test_by_range(URM_train, URM_test, f_range)
    output_folder_path = "result_experiments_v2/" + "range_" + str(
        f_range[0]) + "-" + str(f_range[1]) + "/"

    # If directory does not exist, create
    if not os.path.exists(output_folder_path):
        os.makedirs(output_folder_path)
    """
    collaborative_algorithm_list = [
        #EASE_R_Recommender
        PipeHybrid001,
        #Random,
        #TopPop,
        #P3alphaRecommender,
        #RP3betaRecommender,
        #ItemKNNCFRecommender,
        #UserKNNCFRecommender,
        #MatrixFactorization_BPR_Cython,
        #MatrixFactorization_FunkSVD_Cython,
        #PureSVDRecommender,
        #NMFRecommender,
        #PureSVDItemRecommender
        #SLIM_BPR_Cython,
        #SLIMElasticNetRecommender
        #IALSRecommender
        #MF_MSE_PyTorch
        #MergedHybrid000
    ]

    content_algorithm_list= [
        #ItemKNNCBFRecommender
    ]
    """

    algorithm_in_sequence = [(ItemKNNCFRecommender, 'CF'),
                             (UserKNNCFRecommender, 'CF'),
                             (P3alphaRecommender, 'CF'),
                             (RP3betaRecommender, 'CF'),
                             (PureSVDRecommender, 'CF'),
                             (ItemKNNCBFRecommender, 'CBF')]

    from Base.Evaluation.Evaluator import EvaluatorHoldout

    evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[10])
    evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[10])

    for algo, type in algorithm_in_sequence:
        print(F"OPTIMIZING {algo.RECOMMENDER_NAME} - {type}")
        if type == 'CF':
            collaborative_algorithm_list = []
            collaborative_algorithm_list.append(algo)

            runParameterSearch_Collaborative_partial = partial(
                runParameterSearch_Collaborative,
                URM_train=URM_train,
                ICM_train=ICM_obj,
                metric_to_optimize="MAP",
                n_cases=50,
                n_random_starts=50 * 0.3,
                evaluator_validation_earlystopping=evaluator_validation,
                evaluator_validation=evaluator_validation,
                evaluator_test=evaluator_test,
                output_folder_path=output_folder_path,
                allow_weighting=False,  #LOOOK AT HEREEEEEEEEEEEEEEEEE
                parallelizeKNN=False)
            pool = multiprocessing.Pool(processes=int(
                multiprocessing.cpu_count()),
                                        maxtasksperchild=1)
            pool.map(runParameterSearch_Collaborative_partial,
                     collaborative_algorithm_list)

        elif type == 'CBF':
            content_algorithm_list = []
            content_algorithm_list.append(algo)
            runParameterSearch_Content_partial = partial(
                runParameterSearch_Content,
                URM_train=URM_train,
                ICM_object=ICM_obj,
                ICM_name='BookFeatures',
                n_cases=50,
                n_random_starts=50 * 0.3,
                evaluator_validation=evaluator_validation,
                evaluator_test=evaluator_test,
                metric_to_optimize="MAP",
                parallelizeKNN=False,
                allow_weighting=True,
                #similarity_type_list=['cosine']
            )
            pool = multiprocessing.Pool(processes=int(
                multiprocessing.cpu_count()),
                                        maxtasksperchild=1)
            pool.map(runParameterSearch_Content_partial,
                     content_algorithm_list)

Example #4

Show file

def read_data_split_and_search():
    """
    This function provides a simple example on how to tune parameters of a given algorithm

    The BayesianSearch object will save:
        - A .txt file with all the cases explored and the recommendation quality
        - A _best_model file which contains the trained model and can be loaded with recommender.load_model()
        - A _best_parameter file which contains a dictionary with all the fit parameters, it can be passed to recommender.fit(**_best_parameter)
        - A _best_result_validation file which contains a dictionary with the results of the best solution on the validation
        - A _best_result_test file which contains a dictionary with the results, on the test set, of the best solution chosen using the validation set
    """

    dataReader = BookDataReader()
    dataset = dataReader.load_data()

    URM_train, URM_test = split_train_in_two_percentage_global_sample(
        dataset.get_URM_all(), train_percentage=0.8)
    URM_train, URM_validation = split_train_in_two_percentage_global_sample(
        URM_train, train_percentage=0.8)

    ICM = dataset.get_ICM_from_name("ICM_all")

    output_folder_path = "Result_experiments/"

    # If directory does not exist, create
    if not os.path.exists(output_folder_path):
        os.makedirs(output_folder_path)

    collaborative_algorithm_list = [
        P3alphaRecommender,
        ItemKNNCFRecommender,
        UserKNNCFRecommender,
        RP3betaRecommender,
    ]

    content_algorithm_list = [
        ItemKNNCBFRecommender,
    ]

    from Base.Evaluation.Evaluator import EvaluatorHoldout

    evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[10])
    evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[10])

    runParameterSearch_Collaborative_partial = partial(
        runParameterSearch_Collaborative,
        URM_train=URM_train,
        metric_to_optimize="MAP",
        n_cases=1024,
        n_random_starts=32,
        evaluator_validation_earlystopping=evaluator_validation,
        evaluator_validation=evaluator_validation,
        evaluator_test=evaluator_test,
        output_folder_path=output_folder_path,
        parallelizeKNN=False,
        save_model="no",
    )

    pool = multiprocessing.Pool(processes=int(multiprocessing.cpu_count()),
                                maxtasksperchild=1)
    pool.map(runParameterSearch_Collaborative_partial,
             collaborative_algorithm_list)

    runParameterSearch_Content_partial = partial(
        runParameterSearch_Content,
        URM_train=URM_train,
        ICM_object=ICM,
        ICM_name="ICM_all",
        metric_to_optimize="MAP",
        n_cases=1024,
        n_random_starts=32,
        evaluator_validation=evaluator_validation,
        evaluator_test=evaluator_test,
        output_folder_path=output_folder_path,
        parallelizeKNN=False,
        save_model="no",
    )

    pool = multiprocessing.Pool(processes=int(multiprocessing.cpu_count()),
                                maxtasksperchild=1)
    pool.map(runParameterSearch_Content_partial, content_algorithm_list)

Example #5

Show file

File: run_parameter_search_hyb.py Project: Lodz97/RecSys-PoliMi-2020

def read_data_split_and_search():
    from Data_manager.RecSys2020 import RecSys2020Reader
    from datetime import datetime
    from scipy import sparse as sps
    """
    This function provides a simple example on how to tune parameters of a given algorithm

    The BayesianSearch object will save:
        - A .txt file with all the cases explored and the recommendation quality
        - A _best_model file which contains the trained model and can be loaded with recommender.load_model()
        - A _best_parameter file which contains a dictionary with all the fit parameters, it can be passed to recommender.fit(**_best_parameter)
        - A _best_result_validation file which contains a dictionary with the results of the best solution on the validation
        - A _best_result_test file which contains a dictionary with the results, on the test set, of the best solution chosen using the validation set
    """

    #URM_train, URM_test = split_train_in_two_percentage_global_sample(dataset.get_URM_all(), train_percentage = 0.80)
    #URM_train, URM_validation = split_train_in_two_percentage_global_sample(URM_train, train_percentage = 0.80)

    URM_all, user_id_unique, item_id_unique = RecSys2020Reader.load_urm()
    ICM_all = RecSys2020Reader.load_icm_asset()
    URM_train, URM_test = split_train_in_two_percentage_global_sample(
        URM_all, train_percentage=0.95)
    URM_train, URM_validation = split_train_in_two_percentage_global_sample(
        URM_train, train_percentage=0.90)
    ICM_train, ICM_test = split_train_in_two_percentage_global_sample(
        ICM_all, train_percentage=0.95)
    ICM_train, ICM_validation = split_train_in_two_percentage_global_sample(
        ICM_train, train_percentage=0.90)

    URM_ICM_train = sps.vstack([URM_train, ICM_all.T])
    URM_ICM_train = URM_ICM_train.tocsr()

    output_folder_path = "ParamResultsExperiments/SKOPT_ScoresHybridP3alphaKNNCBF_specialized_extend_param"
    output_folder_path += datetime.now().strftime('%b%d_%H-%M-%S/')

    # If directory does not exist, create
    if not os.path.exists(output_folder_path):
        os.makedirs(output_folder_path)

    hybrid_algorithm_list = [
        #ScoresHybridP3alphaKNNCBF,
        ScoresHybridRP3betaKNNCBF,
        #ScoresHybridP3alphaPureSVD,
        #ScoresHybridSpecialized,
        #ScoresHybridSpecializedCold,
        #ScoresHybridSpecializedV2Cold,
        #ScoresHybridSpecializedV3Cold,
        #ScoresHybridSpecializedV2Mid,
        #ScoresHybridSpecializedV2Warm,
        #ScoresHybridSpecializedV3Warm,
        #ScoresHybridSpecializedV2Mid12,
        #ScoresHybridSpecializedV2Warm12,
        #ScoresHybridSpecializedAdaptive,
        #ScoresHybridKNNCFKNNCBF,
        #ScoresHybridUserKNNCFKNNCBF,
        #CFW_D_Similarity_Linalg
    ]

    from Base.Evaluation.Evaluator import EvaluatorHoldout

    evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[10])
    evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[5, 10])

    #cf = ItemKNNCFRecommender(URM_ICM_train)
    #cf.fit(**{"topK": 259, "shrink": 24, "similarity": "cosine", "normalize": True})
    #W_sparse_CF = cf.W_sparse

    runParameterSearch_Hybrid_partial = partial(
        runParameterSearch_Hybrid,
        URM_train=URM_ICM_train,
        ICM_train=URM_ICM_train.T,
        #W_sparse_CF = W_sparse_CF,
        metric_to_optimize="MAP",
        n_cases=100,
        n_random_starts=20,
        evaluator_validation_earlystopping=evaluator_validation,
        evaluator_validation=evaluator_validation,
        evaluator_test=evaluator_test,
        output_folder_path=output_folder_path)

    from Utils.PoolWithSubprocess import PoolWithSubprocess

    pool = PoolWithSubprocess(processes=int(multiprocessing.cpu_count() - 1),
                              maxtasksperchild=1)
    resultList = pool.map_async(runParameterSearch_Hybrid_partial,
                                hybrid_algorithm_list)
    pool.close()
    pool.join()

    for recommender_class in hybrid_algorithm_list:

        try:

            runParameterSearch_Hybrid_partial(recommender_class)

        except Exception as e:

            print("On recommender {} Exception {}".format(
                recommender_class, str(e)))
            traceback.print_exc()

Example #6

Show file

    def _split_data_from_original_dataset(self, save_folder_path):

        self.loaded_dataset = self.dataReader_object.load_data()
        self._load_from_DataReader_ICM_and_mappers(self.loaded_dataset)

        URM_all = self.loaded_dataset.get_URM_all()

        train_quota, validation_quota, test_quota = self.input_split_interaction_quota_list
        train_quota /= 100
        validation_quota /= 100
        test_quota /= 100

        if self.user_wise:
            URM_train_validation, URM_test = split_train_in_two_percentage_user_wise(
                URM_all, train_percentage=train_quota + validation_quota)
        else:
            URM_train_validation, URM_test = split_train_in_two_percentage_global_sample(
                URM_all, train_percentage=train_quota + validation_quota)

        #Adjust train quota to account for the reduced size of the sample
        # URM_train_validation * adjusted_train_quota = URM_all * train quota

        adjusted_train_quota = URM_all.nnz * train_quota / URM_train_validation.nnz

        if self.user_wise:
            URM_train, URM_validation = split_train_in_two_percentage_user_wise(
                URM_train_validation, train_percentage=adjusted_train_quota)
        else:
            URM_train, URM_validation = split_train_in_two_percentage_global_sample(
                URM_train_validation, train_percentage=adjusted_train_quota)

        if not self.allow_cold_users:

            user_interactions = np.ediff1d(URM_train.indptr)
            user_to_preserve = user_interactions >= 1
            user_to_remove = np.logical_not(user_to_preserve)

            n_users = URM_train.shape[0]

            if user_to_remove.sum() > 0:

                self._print(
                    "Removing {} ({:.2f} %) of {} users because they have no interactions in train data."
                    .format(user_to_remove.sum(),
                            user_to_remove.sum() / n_users * 100, n_users))

                URM_train = URM_train[user_to_preserve, :]
                URM_validation = URM_validation[user_to_preserve, :]
                URM_test = URM_test[user_to_preserve, :]

                self.SPLIT_GLOBAL_MAPPER_DICT[
                    "user_original_ID_to_index"] = reconcile_mapper_with_removed_tokens(
                        self.
                        SPLIT_GLOBAL_MAPPER_DICT["user_original_ID_to_index"],
                        np.arange(0, len(user_to_remove),
                                  dtype=np.int)[user_to_remove])

                for UCM_name, UCM_object in self.SPLIT_UCM_DICT.items():
                    UCM_object = UCM_object[user_to_preserve, :]
                    self.SPLIT_UCM_DICT[UCM_name] = UCM_object

        self.SPLIT_URM_DICT = {
            "URM_train": URM_train,
            "URM_validation": URM_validation,
            "URM_test": URM_test,
        }

        self._compute_real_split_interaction_quota()

        self._save_split(save_folder_path)

        self._print("Split complete")

Example #7

Show file

File: my_run_parameter_search_combine_hybrid.py Project: yifanzhu314/RecSys-2020

def read_data_split_and_search():
    """
    This function provides a simple example on how to tune parameters of a given algorithm

    The BayesianSearch object will save:
        - A .txt file with all the cases explored and the recommendation quality
        - A _best_model file which contains the trained model and can be loaded with recommender.load_model()
        - A _best_parameter file which contains a dictionary with all the fit parameters, it can be passed to recommender.fit(**_best_parameter)
        - A _best_result_validation file which contains a dictionary with the results of the best solution on the validation
        - A _best_result_test file which contains a dictionary with the results, on the test set, of the best solution chosen using the validation set
    """

    seed = 1205
    parser = DataParser()

    URM_all = parser.get_URM_all()
    ICM_obj = parser.get_ICM_all()

    # SPLIT TO GET TEST PARTITION
    URM_train, URM_test = split_train_in_two_percentage_global_sample(
        URM_all, train_percentage=0.90, seed=seed)

    # SPLIT TO GET THE HYBRID VALID PARTITION
    URM_train, URM_valid_hybrid = split_train_in_two_percentage_global_sample(
        URM_train, train_percentage=0.85, seed=seed)

    # SPLIT TO GET THE sub_rec VALID PARTITION
    URM_train_bis, URM_valid_sub = split_train_in_two_percentage_global_sample(
        URM_train, train_percentage=0.85, seed=seed)

    collaborative_algorithm_list = [
        #EASE_R_Recommender
        #PipeHybrid001,
        #Random,
        #TopPop,
        #P3alphaRecommender,
        #RP3betaRecommender,
        #ItemKNNCFRecommender,
        #UserKNNCFRecommender,
        #MatrixFactorization_BPR_Cython,
        #MatrixFactorization_FunkSVD_Cython,
        #PureSVDRecommender,
        #NMFRecommender,
        #PureSVDItemRecommender
        #SLIM_BPR_Cython,
        #SLIMElasticNetRecommender
        #IALSRecommender
        #MF_MSE_PyTorch
        #MergedHybrid000
        #LinearHybrid002ggg
        HybridCombinationSearch
    ]

    content_algorithm_list = [
        #ItemKNNCBFRecommender
    ]

    from Base.Evaluation.Evaluator import EvaluatorHoldout

    evaluator_valid_sub = EvaluatorHoldout(URM_valid_sub, cutoff_list=[10])
    evaluator_valid_hybrid = EvaluatorHoldout(URM_valid_hybrid,
                                              cutoff_list=[10])
    evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[10])
    """
        # TODO: setta I GIUSTI EVALUATOR QUI!!!!
    runParameterSearch_Content_partial = partial(runParameterSearch_Content,
                                                 URM_train=URM_train,
                                                 ICM_object=ICM_obj,
                                                 ICM_name='1BookFeatures',
                                                 n_cases = 50,
                                                 n_random_starts = 20,
                                                 evaluator_validation= evaluator_valid_sub,
                                                 evaluator_test = evaluator_valid_hybrid,
                                                 metric_to_optimize = "MAP",
                                                 output_folder_path=output_folder_path,
                                                 parallelizeKNN = False,
                                                 allow_weighting = True,
                                                 #similarity_type_list = ['cosine']
                                                 )
    pool = multiprocessing.Pool(processes=int(multiprocessing.cpu_count()), maxtasksperchild=1)
    pool.map(runParameterSearch_Content_partial, content_algorithm_list)
    """
    print("Rp3beta training...")
    rp3b = RP3betaRecommender(URM_train, verbose=False)
    rp3b_params = {
        'topK': 1000,
        'alpha': 0.38192761611274967,
        'beta': 0.0,
        'normalize_similarity': False
    }
    rp3b.fit(**rp3b_params)
    print("Done")
    print("P3alpha training...")
    p3a = P3alphaRecommender(URM_train, verbose=False)
    p3a_params = {
        'topK': 131,
        'alpha': 0.33660811631883863,
        'normalize_similarity': False
    }
    p3a.fit(**p3a_params)
    print("Done")
    print("ItemKnnCF training...")
    icf = ItemKNNCFRecommender(URM_train, verbose=False)
    icf_params = {
        'topK': 100,
        'shrink': 1000,
        'similarity': 'asymmetric',
        'normalize': True,
        'asymmetric_alpha': 0.0
    }
    icf.fit(**icf_params)
    print("Done")
    print("UserKnnCF training...")
    ucf = UserKNNCFRecommender(URM_train, verbose=False)
    ucf_params = {
        'topK': 190,
        'shrink': 0,
        'similarity': 'cosine',
        'normalize': True
    }
    ucf.fit(**ucf_params)
    print("Done")
    print("ItemKnnCBF training...")
    icb = ItemKNNCBFRecommender(URM_train, ICM_obj, verbose=False)
    icb_params = {
        'topK': 205,
        'shrink': 1000,
        'similarity': 'cosine',
        'normalize': True,
        'feature_weighting': 'BM25'
    }
    icb.fit(**icb_params)
    print("Done")
    print("SlimBPR training...")
    sbpr = SLIM_BPR_Cython(URM_train, verbose=False)
    sbpr_params = {
        'topK': 979,
        'epochs': 130,
        'symmetric': False,
        'sgd_mode': 'adam',
        'lambda_i': 0.004947329669424629,
        'lambda_j': 1.1534760845071758e-05,
        'learning_rate': 0.0001
    }
    sbpr.fit(**sbpr_params)
    print("Done")
    print("SlimElasticNet training...")
    sen = SLIMElasticNetRecommender(URM_train, verbose=False)
    sen_params = {
        'topK': 992,
        'l1_ratio': 0.004065081925341167,
        'alpha': 0.003725005053334143
    }
    sen.fit(**sen_params)
    print("Done")

    list_recommender = [rp3b, p3a, icf, ucf, icb, sen, sbpr]
    list_already_seen = [rp3b, p3a, icf, ucf, icb]

    for rec_perm in combinations(list_recommender, 3):

        if rec_perm not in combinations(list_already_seen, 3):

            recommender_names = '_'.join(
                [r.RECOMMENDER_NAME for r in rec_perm])
            output_folder_path = "result_experiments_v3/seed_" + str(
                seed) + '/' + recommender_names + '/'

            # If directory does not exist, create
            if not os.path.exists(output_folder_path):
                os.makedirs(output_folder_path)

            # TODO: setta I GIUSTI EVALUATOR QUI!!!!
            runParameterSearch_Collaborative_partial = partial(
                runParameterSearch_Collaborative,
                URM_train=URM_train,
                ICM_train=ICM_obj,
                metric_to_optimize="MAP",
                n_cases=50,
                n_random_starts=20,
                evaluator_validation_earlystopping=evaluator_valid_hybrid,
                evaluator_validation=evaluator_valid_hybrid,
                evaluator_test=evaluator_test,
                output_folder_path=output_folder_path,
                allow_weighting=False,
                #similarity_type_list = ["cosine", 'jaccard'],
                parallelizeKNN=False,
                list_rec=rec_perm)
            pool = multiprocessing.Pool(processes=int(
                multiprocessing.cpu_count()),
                                        maxtasksperchild=1)
            pool.map(runParameterSearch_Collaborative_partial,
                     collaborative_algorithm_list)

Example #8

Show file

File: run_parameter_search.py Project: yifanzhu314/RecSys-2020

def read_data_split_and_search():
    """
    This function provides a simple example on how to tune parameters of a given algorithm

    The BayesianSearch object will save:
        - A .txt file with all the cases explored and the recommendation quality
        - A _best_model file which contains the trained model and can be loaded with recommender.load_model()
        - A _best_parameter file which contains a dictionary with all the fit parameters, it can be passed to recommender.fit(**_best_parameter)
        - A _best_result_validation file which contains a dictionary with the results of the best solution on the validation
        - A _best_result_test file which contains a dictionary with the results, on the test set, of the best solution chosen using the validation set
    """

    seed = 1205
    parser = DataParser()

    URM_all = parser.get_URM_all()
    ICM_obj = parser.get_ICM_all()

    # SPLIT TO GET TEST PARTITION
    URM_train, URM_test = split_train_in_two_percentage_global_sample(
        URM_all, train_percentage=0.90, seed=seed)

    # SPLIT TO GET THE HYBRID VALID PARTITION
    URM_train, URM_valid_hybrid = split_train_in_two_percentage_global_sample(
        URM_train, train_percentage=0.85, seed=seed)

    # SPLIT TO GET THE sub_rec VALID PARTITION
    URM_train, URM_valid_sub = split_train_in_two_percentage_global_sample(
        URM_train, train_percentage=0.85, seed=seed)

    output_folder_path = "result_experiments_v3/seed_" + str(seed) + '/'

    # If directory does not exist, create
    if not os.path.exists(output_folder_path):
        os.makedirs(output_folder_path)

    collaborative_algorithm_list = [
        #EASE_R_Recommender
        #PipeHybrid001,
        #Random,
        #TopPop,
        #P3alphaRecommender,
        #RP3betaRecommender,
        #ItemKNNCFRecommender,
        #UserKNNCFRecommender,
        #MatrixFactorization_BPR_Cython,
        #MatrixFactorization_FunkSVD_Cython,
        #PureSVDRecommender,
        #NMFRecommender,
        #PureSVDItemRecommender
        #SLIM_BPR_Cython,
        SLIMElasticNetRecommender
        #IALSRecommender
        #MF_MSE_PyTorch
        #MergedHybrid000
        #LinearHybrid002
    ]

    content_algorithm_list = [
        #ItemKNNCBFRecommender
    ]

    from Base.Evaluation.Evaluator import EvaluatorHoldout

    evaluator_valid_sub = EvaluatorHoldout(URM_valid_sub, cutoff_list=[10])
    evaluator_valid_hybrid = EvaluatorHoldout(URM_valid_hybrid,
                                              cutoff_list=[10])
    evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[10])
    """
        # TODO: setta I GIUSTI EVALUATOR QUI!!!!
    runParameterSearch_Content_partial = partial(runParameterSearch_Content,
                                                 URM_train=URM_train,
                                                 ICM_object=ICM_obj,
                                                 ICM_name='1BookFeatures',
                                                 n_cases = 50,
                                                 n_random_starts = 20,
                                                 evaluator_validation= evaluator_valid_sub,
                                                 evaluator_test = evaluator_valid_hybrid,
                                                 metric_to_optimize = "MAP",
                                                 output_folder_path=output_folder_path,
                                                 parallelizeKNN = False,
                                                 allow_weighting = True,
                                                 #similarity_type_list = ['cosine']
                                                 )
    pool = multiprocessing.Pool(processes=int(multiprocessing.cpu_count()), maxtasksperchild=1)
    pool.map(runParameterSearch_Content_partial, content_algorithm_list)
    """

    # TODO: setta I GIUSTI EVALUATOR QUI!!!!
    runParameterSearch_Collaborative_partial = partial(
        runParameterSearch_Collaborative,
        URM_train=URM_train,
        ICM_train=ICM_obj,
        metric_to_optimize="MAP",
        n_cases=50,
        n_random_starts=20,
        evaluator_validation_earlystopping=evaluator_valid_sub,
        evaluator_validation=evaluator_valid_sub,
        evaluator_test=evaluator_valid_hybrid,
        output_folder_path=output_folder_path,
        allow_weighting=False,
        #similarity_type_list = ["cosine", 'jaccard'],
        parallelizeKNN=False)
    pool = multiprocessing.Pool(processes=int(multiprocessing.cpu_count()),
                                maxtasksperchild=1)
    pool.map(runParameterSearch_Collaborative_partial,
             collaborative_algorithm_list)

Example #9

Show file

File: my_run_parameter_search_combine_hybrid2.py Project: yifanzhu314/RecSys-2020

def read_data_split_and_search():
    """
    This function provides a simple example on how to tune parameters of a given algorithm

    The BayesianSearch object will save:
        - A .txt file with all the cases explored and the recommendation quality
        - A _best_model file which contains the trained model and can be loaded with recommender.load_model()
        - A _best_parameter file which contains a dictionary with all the fit parameters, it can be passed to recommender.fit(**_best_parameter)
        - A _best_result_validation file which contains a dictionary with the results of the best solution on the validation
        - A _best_result_test file which contains a dictionary with the results, on the test set, of the best solution chosen using the validation set
    """

    seed = 1205
    parser = DataParser()

    URM_all = parser.get_URM_all()
    ICM_obj = parser.get_ICM_all()

    # SPLIT TO GET TEST PARTITION
    URM_train, URM_test = split_train_in_two_percentage_global_sample(
        URM_all, train_percentage=0.90, seed=seed)

    # SPLIT TO GET THE HYBRID VALID PARTITION
    URM_train, URM_valid_hybrid = split_train_in_two_percentage_global_sample(
        URM_train, train_percentage=0.85, seed=seed)

    URM_valid_hybrid = parser.filter_URM_test_by_range(URM_train,
                                                       URM_valid_hybrid,
                                                       (3, -1))

    collaborative_algorithm_list = [
        # EASE_R_Recommender
        # PipeHybrid001,
        # Random,
        # TopPop,
        # P3alphaRecommender,
        # RP3betaRecommender,
        # ItemKNNCFRecommender,
        # UserKNNCFRecommender,
        # MatrixFactorization_BPR_Cython,
        # MatrixFactorization_FunkSVD_Cython,
        # PureSVDRecommender,
        # NMFRecommender,
        # PureSVDItemRecommender
        # SLIM_BPR_Cython,
        # SLIMElasticNetRecommender
        # IALSRecommender
        # MF_MSE_PyTorch
        # MergedHybrid000
        # LinearHybrid002ggg
        HybridCombinationSearch
    ]

    content_algorithm_list = [
        # ItemKNNCBFRecommender
    ]

    from Base.Evaluation.Evaluator import EvaluatorHoldout

    evaluator_valid_hybrid = EvaluatorHoldout(URM_valid_hybrid,
                                              cutoff_list=[10])
    evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[10])
    """
    earlystopping_keywargs = {"validation_every_n": 5,
                              "stop_on_validation": True,
                              "evaluator_object": evaluator_valid_hybrid,
                              "lower_validations_allowed": 5,
                              "validation_metric": 'MAP',
                              }
    
    print('IALS training...')
    ials = IALSRecommender(URM_train, verbose=False)
    ials_params = {'num_factors': 83, 'confidence_scaling': 'linear', 'alpha': 28.4278070726612,
                   'epsilon': 1.0234211788885077, 'reg': 0.0027328110246575004, 'epochs': 20}
    ials.fit(**ials_params, **earlystopping_keywargs)
    print("Done")
    
    
    print("PureSVD training...")
    psvd = PureSVDRecommender(URM_train, verbose=False)
    psvd_params = {'num_factors': 711}
    psvd.fit(**psvd_params)
    print("Done")
    """
    print("Rp3beta training...")
    rp3b = RP3betaRecommender(URM_train, verbose=False)
    rp3b_params = {
        'topK': 753,
        'alpha': 0.3873710051288722,
        'beta': 0.0,
        'normalize_similarity': False
    }
    rp3b.fit(**rp3b_params)
    print("Done")
    print("P3alpha training...")
    p3a = P3alphaRecommender(URM_train, verbose=False)
    p3a_params = {
        'topK': 438,
        'alpha': 0.41923120471415165,
        'normalize_similarity': False
    }
    p3a.fit(**p3a_params)
    print("Done")
    print("ItemKnnCF training...")
    icf = ItemKNNCFRecommender(URM_train, verbose=False)
    icf_params = {
        'topK': 565,
        'shrink': 554,
        'similarity': 'tversky',
        'normalize': True,
        'tversky_alpha': 1.9109121434662428,
        'tversky_beta': 1.7823834698905734
    }
    icf.fit(**icf_params)
    print("Done")
    print("UserKnnCF training...")
    ucf = UserKNNCFRecommender(URM_train, verbose=False)
    ucf_params = {
        'topK': 190,
        'shrink': 0,
        'similarity': 'cosine',
        'normalize': True
    }
    ucf.fit(**ucf_params)
    print("Done")
    print("ItemKnnCBF training...")
    icb = ItemKNNCBFRecommender(URM_train, ICM_obj, verbose=False)
    icb_params = {
        'topK': 205,
        'shrink': 1000,
        'similarity': 'cosine',
        'normalize': True,
        'feature_weighting': 'BM25'
    }
    icb.fit(**icb_params)
    print("Done")
    """
    print("SlimElasticNet training...")
    sen = SLIMElasticNetRecommender(URM_train, verbose=False)
    sen_params = {'topK': 954, 'l1_ratio': 3.87446082207643e-05, 'alpha': 0.07562657698792305}
    sen.fit(**sen_params)
    print("Done")
    """

    list_recommender = [icb, icf, ucf, p3a, rp3b]
    list_already_seen = []

    for rec_perm in combinations(list_recommender, 3):

        if rec_perm not in combinations(list_already_seen, 3):

            recommender_names = '_'.join(
                [r.RECOMMENDER_NAME for r in rec_perm])
            output_folder_path = "result_experiments_v3/seed_" + str(
                seed) + '_3--1' + '/' + recommender_names + '/'

            # If directory does not exist, create
            if not os.path.exists(output_folder_path):
                os.makedirs(output_folder_path)

            # TODO: setta I GIUSTI EVALUATOR QUI!!!!
            runParameterSearch_Collaborative_partial = partial(
                runParameterSearch_Collaborative,
                URM_train=URM_train,
                ICM_train=ICM_obj,
                metric_to_optimize="MAP",
                n_cases=50,
                n_random_starts=20,
                evaluator_validation_earlystopping=evaluator_valid_hybrid,
                evaluator_validation=evaluator_valid_hybrid,
                evaluator_test=evaluator_test,
                output_folder_path=output_folder_path,
                allow_weighting=False,
                # similarity_type_list = ["cosine", 'jaccard'],
                parallelizeKNN=False,
                list_rec=rec_perm)
            pool = multiprocessing.Pool(processes=int(
                multiprocessing.cpu_count()),
                                        maxtasksperchild=1)
            pool.map(runParameterSearch_Collaborative_partial,
                     collaborative_algorithm_list)

Example #10

Show file

File: run_parameter_search_CV.py Project: yifanzhu314/RecSys-2020

def read_data_split_and_search():
    """
    This function provides a simple example on how to tune parameters of a given algorithm

    The BayesianSearch object will save:
        - A .txt file with all the cases explored and the recommendation quality
        - A _best_model file which contains the trained model and can be loaded with recommender.load_model()
        - A _best_parameter file which contains a dictionary with all the fit parameters, it can be passed to recommender.fit(**_best_parameter)
        - A _best_result_validation file which contains a dictionary with the results of the best solution on the validation
        - A _best_result_test file which contains a dictionary with the results, on the test set, of the best solution chosen using the validation set
    """

    parser = DataParser()
    seed = 1666
    URM_all = parser.get_URM_all()
    ICM_obj = parser.get_ICM_all()

    URM_train, URM_test = split_train_in_two_percentage_global_sample(
        URM_all, train_percentage=0.85, seed=seed)
    URM_train, URM_validation = split_train_in_two_percentage_global_sample(
        URM_train, train_percentage=0.85, seed=seed)

    k = 5

    output_folder_path = "result_experiments_CV/"

    # If directory does not exist, create
    if not os.path.exists(output_folder_path):
        os.makedirs(output_folder_path)

    collaborative = True

    content_algorithm_list = [
        #ItemKNNCBFRecommender
    ]

    collaborative_algorithm_list = [
        #Random,
        #TopPop,
        #P3alphaRecommender,
        #RP3betaRecommender,
        ItemKNNCFRecommender,
        #UserKNNCFRecommender,
        #MatrixFactorization_BPR_Cython,
        #MatrixFactorization_FunkSVD_Cython,
        #PureSVDRecommender,
        #SLIM_BPR_Cython,
        #SLIMElasticNetRecommender,
        #IALSRecommender,
    ]

    from Base.Evaluation.Evaluator import EvaluatorHoldout

    evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[5])
    evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[10])

    if not collaborative:
        runParameterSearch_Content_partial = partial(
            runParameterSearch_Content,
            URM_train=URM_train,
            ICM_object=ICM_obj,
            ICM_name='1BookFeatures',
            n_cases=50,
            n_random_starts=20,
            metric_to_optimize="MAP",
            output_folder_path=output_folder_path,
            parallelizeKNN=False,
            allow_weighting=True,
            #similarity_type_list = ['cosine']
            k=k,
            seed=seed)

        pool = multiprocessing.Pool(processes=int(multiprocessing.cpu_count()),
                                    maxtasksperchild=1)
        pool.map(runParameterSearch_Content_partial, content_algorithm_list)

    else:
        runParameterSearch_Collaborative_partial = partial(
            runParameterSearch_Collaborative,
            URM_train=URM_train,
            metric_to_optimize="MAP",
            n_cases=50,
            n_random_starts=20,
            #evaluator_test = evaluator_test,
            output_folder_path=output_folder_path,
            similarity_type_list=["cosine"],
            parallelizeKNN=False,
            allow_weighting=False,
            k=k,
            seed=seed)

        pool = multiprocessing.Pool(processes=int(multiprocessing.cpu_count()),
                                    maxtasksperchild=1)
        pool.map(runParameterSearch_Collaborative_partial,
                 collaborative_algorithm_list)

Example #11

Show file

def read_data_split_and_search():
    """
    This function provides a simple example on how to tune parameters of a given algorithm

    The BayesianSearch object will save:
        - A .txt file with all the cases explored and the recommendation quality
        - A _best_model file which contains the trained model and can be loaded with recommender.load_model()
        - A _best_parameter file which contains a dictionary with all the fit parameters, it can be passed to recommender.fit(**_best_parameter)
        - A _best_result_validation file which contains a dictionary with the results of the best solution on the validation
        - A _best_result_test file which contains a dictionary with the results, on the test set, of the best solution chosen using the validation set
    """

    dataReader = BookDataReader()
    dataset = dataReader.load_data()

    URM_train, URM_test = split_train_in_two_percentage_global_sample(
        dataset.get_URM_all(), train_percentage=0.80)
    URM_train, URM_validation = split_train_in_two_percentage_global_sample(
        URM_train, train_percentage=0.80)

    output_folder_path = "Result_experiments/SKOPT_prova/"

    # If directory does not exist, create
    if not os.path.exists(output_folder_path):
        os.makedirs(output_folder_path)

    collaborative_algorithm_list = [
        Random,
        TopPop,
        P3alphaRecommender,
        RP3betaRecommender,
        ItemKNNCFRecommender,
        UserKNNCFRecommender,
        # MatrixFactorization_BPR_Cython,
        # MatrixFactorization_FunkSVD_Cython,
        # PureSVDRecommender,
        # SLIM_BPR_Cython,
        # SLIMElasticNetRecommender
    ]

    from Base.Evaluation.Evaluator import EvaluatorHoldout

    evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[5])
    evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[5, 10])

    runParameterSearch_Collaborative_partial = partial(
        runParameterSearch_Collaborative,
        URM_train=URM_train,
        metric_to_optimize="MAP",
        n_cases=10,
        evaluator_validation_earlystopping=evaluator_validation,
        evaluator_validation=evaluator_validation,
        evaluator_test=evaluator_test,
        output_folder_path=output_folder_path,
    )

    from Utils.PoolWithSubprocess import PoolWithSubprocess

    # pool = PoolWithSubprocess(processes=int(multiprocessing.cpu_count()), maxtasksperchild=1)
    # resultList = pool.map(runParameterSearch_Collaborative_partial, collaborative_algorithm_list)
    # pool.close()
    # pool.join()

    for recommender_class in collaborative_algorithm_list:

        try:

            runParameterSearch_Collaborative_partial(recommender_class)

        except Exception as e:

            print("On recommender {} Exception {}".format(
                recommender_class, str(e)))
            traceback.print_exc()

Example #12

Show file

File: my_run_parameter_search_combine_hybrid3.py Project: yifanzhu314/RecSys-2020

def read_data_split_and_search():
    """
    This function provides a simple example on how to tune parameters of a given algorithm

    The BayesianSearch object will save:
        - A .txt file with all the cases explored and the recommendation quality
        - A _best_model file which contains the trained model and can be loaded with recommender.load_model()
        - A _best_parameter file which contains a dictionary with all the fit parameters, it can be passed to recommender.fit(**_best_parameter)
        - A _best_result_validation file which contains a dictionary with the results of the best solution on the validation
        - A _best_result_test file which contains a dictionary with the results, on the test set, of the best solution chosen using the validation set
    """

    seed = 1205
    parser = DataParser()

    URM_all = parser.get_URM_all()
    ICM_obj = parser.get_ICM_all()

    # SPLIT TO GET TEST PARTITION
    URM_train, URM_test = split_train_in_two_percentage_global_sample(URM_all, train_percentage=0.90, seed=seed)

    # SPLIT TO GET THE HYBRID VALID PARTITION
    URM_train, URM_valid_hybrid = split_train_in_two_percentage_global_sample(URM_train, train_percentage=0.85,
                                                                              seed=seed)

    collaborative_algorithm_list = [
        # EASE_R_Recommender
        # PipeHybrid001,
        # Random,
        # TopPop,
        # P3alphaRecommender,
        # RP3betaRecommender,
        # ItemKNNCFRecommender,
        # UserKNNCFRecommender,
        # MatrixFactorization_BPR_Cython,
        # MatrixFactorization_FunkSVD_Cython,
        # PureSVDRecommender,
        # NMFRecommender,
        # PureSVDItemRecommender
        # SLIM_BPR_Cython,
        # SLIMElasticNetRecommender
        # IALSRecommender
        # MF_MSE_PyTorch
        # MergedHybrid000
        # LinearHybrid002ggg
        HybridCombinationSearch
    ]

    content_algorithm_list = [
        # ItemKNNCBFRecommender
    ]

    from Base.Evaluation.Evaluator import EvaluatorHoldout

    evaluator_valid_hybrid = EvaluatorHoldout(URM_valid_hybrid, cutoff_list=[10])
    evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[10])

    """
    earlystopping_keywargs = {"validation_every_n": 5,
                              "stop_on_validation": True,
                              "evaluator_object": evaluator_valid_hybrid,
                              "lower_validations_allowed": 5,
                              "validation_metric": 'MAP',
                              }

    print('IALS training...')
    ials = IALSRecommender(URM_train, verbose=False)
    ials_params = {'num_factors': 83, 'confidence_scaling': 'linear', 'alpha': 28.4278070726612,
                   'epsilon': 1.0234211788885077, 'reg': 0.0027328110246575004, 'epochs': 20}
    ials.fit(**ials_params, **earlystopping_keywargs)
    print("Done")


    print("PureSVD training...")
    psvd = PureSVDRecommender(URM_train, verbose=False)
    psvd_params = {'num_factors': 711}
    psvd.fit(**psvd_params)
    print("Done")
    """

    rp3b = RP3betaRecommender(URM_train, verbose=False)
    rp3b_params = {'topK': 1000, 'alpha': 0.38192761611274967, 'beta': 0.0, 'normalize_similarity': False}
    try:
        rp3b.load_model(f'stored_recommenders/seed_{str(seed)}_hybrid_search/',
                        f'{rp3b.RECOMMENDER_NAME}_for_second_search')
        print(f"{rp3b.RECOMMENDER_NAME} loaded.")
    except:
        print(f"Fitting {rp3b.RECOMMENDER_NAME} ...")
        rp3b.fit(**rp3b_params)
        print(f"done.")
        rp3b.save_model(f'stored_recommenders/seed_{str(seed)}_hybrid_search/',
                        f'{rp3b.RECOMMENDER_NAME}_for_second_search')

    p3a = P3alphaRecommender(URM_train, verbose=False)
    p3a_params = {'topK': 131, 'alpha': 0.33660811631883863, 'normalize_similarity': False}
    try:
        p3a.load_model(f'stored_recommenders/seed_{str(seed)}_hybrid_search/',
                       f'{p3a.RECOMMENDER_NAME}_for_second_search')
        print(f"{p3a.RECOMMENDER_NAME} loaded.")
    except:
        print(f"Fitting {p3a.RECOMMENDER_NAME} ...")
        p3a.fit(**p3a_params)
        print(f"done.")
        p3a.save_model(f'stored_recommenders/seed_{str(seed)}_hybrid_search/',
                       f'{p3a.RECOMMENDER_NAME}_for_second_search')

    icf = ItemKNNCFRecommender(URM_train, verbose=False)
    icf_params = {'topK': 55, 'shrink': 1000, 'similarity': 'asymmetric', 'normalize': True, 'asymmetric_alpha': 0.0}
    try:
        icf.load_model(f'stored_recommenders/seed_{str(seed)}_hybrid_search/',
                       f'{icf.RECOMMENDER_NAME}_for_second_search')
        print(f"{icf.RECOMMENDER_NAME} loaded.")
    except:
        print(f"Fitting {icf.RECOMMENDER_NAME} ...")
        icf.fit(**icf_params)
        print(f"done.")
        icf.save_model(f'stored_recommenders/seed_{str(seed)}_hybrid_search/',
                       f'{icf.RECOMMENDER_NAME}_for_second_search')

    ucf = UserKNNCFRecommender(URM_train, verbose=False)
    ucf_params = {'topK': 190, 'shrink': 0, 'similarity': 'cosine', 'normalize': True}
    try:
        ucf.load_model(f'stored_recommenders/seed_{str(seed)}_hybrid_search/',
                       f'{ucf.RECOMMENDER_NAME}_for_second_search')
        print(f"{ucf.RECOMMENDER_NAME} loaded.")
    except:
        print(f"Fitting {ucf.RECOMMENDER_NAME} ...")
        ucf.fit(**ucf_params)
        print(f"done.")
        ucf.save_model(f'stored_recommenders/seed_{str(seed)}_hybrid_search/',
                       f'{ucf.RECOMMENDER_NAME}_for_second_search')

    icb = ItemKNNCBFRecommender(URM_train, ICM_obj, verbose=False)
    icb_params = {'topK': 65, 'shrink': 0, 'similarity': 'dice', 'normalize': True}
    try:
        icb.load_model(f'stored_recommenders/seed_{str(seed)}_hybrid_search/',
                       f'{icb.RECOMMENDER_NAME}_for_second_search')
        print(f"{icb.RECOMMENDER_NAME} loaded.")
    except:
        print(f"Fitting {icf.RECOMMENDER_NAME} ...")
        icb.fit(**icb_params)
        print(f"done.")
        icb.save_model(f'stored_recommenders/seed_{str(seed)}_hybrid_search/',
                       f'{icb.RECOMMENDER_NAME}_for_second_search')

    sen = SLIMElasticNetRecommender(URM_train, verbose=False)
    sen_params = {'topK': 992, 'l1_ratio': 0.004065081925341167, 'alpha': 0.003725005053334143}
    try:
        sen.load_model(f'stored_recommenders/seed_{str(seed)}_hybrid_search/',
                       f'{sen.RECOMMENDER_NAME}_for_second_search')
        print(f"{sen.RECOMMENDER_NAME} loaded.")
    except:
        print(f"Fitting {sen.RECOMMENDER_NAME} ...")
        sen.fit(**sen_params)
        print(f"done.")
        sen.save_model(f'stored_recommenders/seed_{str(seed)}_hybrid_search/',
                       f'{sen.RECOMMENDER_NAME}_for_second_search')

    print("\nStart.")
    list_recommender = [icb, icf, ucf, p3a, rp3b, sen]
    list_already_seen = []
    combinations_already_seen = []
    """
    (icb, icf, p3a), (icb, icf, rp3b), (icb, icf, sen), (icb, p3a, rp3b), (icb, p3a, sen),
                                (icb, rp3b, sen), (icf, p3a, rp3b), (icf, p3a, sen)
    """

    for rec_perm in combinations(list_recommender, 3):

        if rec_perm not in combinations_already_seen:

            recommender_names = '_'.join([r.RECOMMENDER_NAME for r in rec_perm])
            output_folder_path = "result_experiments_v3/seed_" + str(
                seed) + '/linear_combination/' + recommender_names + '/'
            print(F"\nTESTING THE COMBO {recommender_names}")

            # If directory does not exist, create
            if not os.path.exists(output_folder_path):
                os.makedirs(output_folder_path)

            # TODO: setta I GIUSTI EVALUATOR QUI!!!!
            runParameterSearch_Collaborative_partial = partial(runParameterSearch_Collaborative,
                                                               URM_train=URM_train,
                                                               ICM_train=ICM_obj,
                                                               metric_to_optimize="MAP",
                                                               n_cases=50,
                                                               n_random_starts=20,
                                                               evaluator_validation_earlystopping=evaluator_valid_hybrid,
                                                               evaluator_validation=evaluator_valid_hybrid,
                                                               #evaluator_test=evaluator_test,
                                                               output_folder_path=output_folder_path,
                                                               allow_weighting=False,
                                                               # similarity_type_list = ["cosine", 'jaccard'],
                                                               parallelizeKNN=False,
                                                               list_rec=rec_perm)
            pool = multiprocessing.Pool(processes=int(multiprocessing.cpu_count()), maxtasksperchild=1)
            pool.map(runParameterSearch_Collaborative_partial, collaborative_algorithm_list)

Example #13

Show file

File: save_best_slim.py Project: yifanzhu314/RecSys-2020

import numpy as np
import os
import scipy.sparse as sps
from DataParser import DataParser
from Data_manager.split_functions.split_train_validation_random_holdout import \
    split_train_in_two_percentage_global_sample

from SLIM_ElasticNet.SLIMElasticNetRecommender import SLIMElasticNetRecommender

if __name__ == '__main__':
    parser = DataParser()
    URM_all = parser.get_URM_all()
    random_seed = 1205
    URM_train, URM_test = split_train_in_two_percentage_global_sample(URM_all, train_percentage=0.85, seed=random_seed)
    slim = SLIMElasticNetRecommender(URM_train)
    slim.fit(topK=140, l1_ratio=1e-5, alpha=0.386)
    slim.save_model('stored_recommenders/slim_elastic_net/',
                    f'best_{random_seed}_23_10_20')

Example #14

Show file

    urm_train = sp.csr_matrix((ratings_training, (user_ids_training, item_ids_training)),
                              shape=(num_users, num_items))

    urm_validation = sp.csr_matrix((ratings_validation, (user_ids_validation, item_ids_validation)),
                                   shape=(num_users, num_items))

    urm_test = sp.csr_matrix((ratings_test, (user_ids_test, item_ids_test)),
                             shape=(num_users, num_items))

    return urm_train, urm_validation, urm_test
"""

if __name__ == '__main__':
    parser = DataParser()
    URM_all = parser.get_URM_all()
    URM_train, URM_test = split_train_in_two_percentage_global_sample(URM_all, train_percentage=0.85)
    URM_train, URM_validation = split_train_in_two_percentage_global_sample(URM_train, train_percentage=0.80)

    evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[10])
    evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[10])

    run = 0
    topK_values = range(100,900, 50)
    shrink_values = range(100,900, 50)
    space = [Categorical(categories=topK_values, name='topK'),
             Categorical(categories=shrink_values, name='shrink')]

    @use_named_args(space)
    def objective(**params):
        recommender = ItemKNNCFRecommender(URM_train)
        recommender.fit(**params)

Example #15

Show file

def read_data_split_and_search():
    """
    This function provides a simple example on how to tune parameters of a given algorithm

    The BayesianSearch object will save:
        - A .txt file with all the cases explored and the recommendation quality
        - A _best_model file which contains the trained model and can be loaded with recommender.load_model()
        - A _best_parameter file which contains a dictionary with all the fit parameters, it can be passed to recommender.fit(**_best_parameter)
        - A _best_result_validation file which contains a dictionary with the results of the best solution on the validation
        - A _best_result_test file which contains a dictionary with the results, on the test set, of the best solution chosen using the validation set
    """



    dataReader = Movielens1MReader()
    dataset = dataReader.load_data()

    URM_train, URM_test = split_train_in_two_percentage_global_sample(dataset.get_URM_all(), train_percentage = 0.80)
    URM_train, URM_validation = split_train_in_two_percentage_global_sample(URM_train, train_percentage = 0.80)

    output_folder_path = "result_experiments/"


    # If directory does not exist, create
    if not os.path.exists(output_folder_path):
        os.makedirs(output_folder_path)


    collaborative_algorithm_list = [
        Random,
        TopPop,
        P3alphaRecommender,
        RP3betaRecommender,
        ItemKNNCFRecommender,
        UserKNNCFRecommender,
        MatrixFactorization_BPR_Cython,
        MatrixFactorization_FunkSVD_Cython,
        PureSVDRecommender,
        SLIM_BPR_Cython,
        SLIMElasticNetRecommender
    ]




    from Evaluation.Evaluator import EvaluatorHoldout

    cutoff_list = [5, 10, 20]
    metric_to_optimize = "MAP"
    cutoff_to_optimize = 10

    n_cases = 10
    n_random_starts = int(n_cases/3)

    evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list = cutoff_list)
    evaluator_test = EvaluatorHoldout(URM_test, cutoff_list = cutoff_list)


    runParameterSearch_Collaborative_partial = partial(runHyperparameterSearch_Collaborative,
                                                       URM_train = URM_train,
                                                       metric_to_optimize = metric_to_optimize,
                                                       cutoff_to_optimize = cutoff_to_optimize,
                                                       n_cases = n_cases,
                                                       n_random_starts = n_random_starts,
                                                       evaluator_validation_earlystopping = evaluator_validation,
                                                       evaluator_validation = evaluator_validation,
                                                       evaluator_test = evaluator_test,
                                                       output_folder_path = output_folder_path,
                                                       resume_from_saved = True,
                                                       similarity_type_list = ["cosine"],
                                                       parallelizeKNN = False)





    pool = multiprocessing.Pool(processes=int(multiprocessing.cpu_count()), maxtasksperchild=1)
    pool.map(runParameterSearch_Collaborative_partial, collaborative_algorithm_list)

    #
    #
    # for recommender_class in collaborative_algorithm_list:
    #
    #     try:
    #
    #         runParameterSearch_Collaborative_partial(recommender_class)
    #
    #     except Exception as e:
    #
    #         print("On recommender {} Exception {}".format(recommender_class, str(e)))
    #         traceback.print_exc()
    #




    ################################################################################################
    ###### Content Baselines

    for ICM_name, ICM_object in dataset.get_loaded_ICM_dict().items():

        try:

            runHyperparameterSearch_Content(ItemKNNCBFRecommender,
                                        URM_train = URM_train,
                                        URM_train_last_test = URM_train + URM_validation,
                                        metric_to_optimize = metric_to_optimize,
                                        cutoff_to_optimize = cutoff_to_optimize,
                                        evaluator_validation = evaluator_validation,
                                        evaluator_test = evaluator_test,
                                        output_folder_path = output_folder_path,
                                        parallelizeKNN = True,
                                        allow_weighting = True,
                                        resume_from_saved = True,
                                        similarity_type_list = ["cosine"],
                                        ICM_name = ICM_name,
                                        ICM_object = ICM_object.copy(),
                                        n_cases = n_cases,
                                        n_random_starts = n_random_starts)

        except Exception as e:

            print("On CBF recommender for ICM {} Exception {}".format(ICM_name, str(e)))
            traceback.print_exc()


        try:

            runHyperparameterSearch_Hybrid(ItemKNN_CFCBF_Hybrid_Recommender,
                                        URM_train = URM_train,
                                        URM_train_last_test = URM_train + URM_validation,
                                        metric_to_optimize = metric_to_optimize,
                                        cutoff_to_optimize = cutoff_to_optimize,
                                        evaluator_validation = evaluator_validation,
                                        evaluator_test = evaluator_test,
                                        output_folder_path = output_folder_path,
                                        parallelizeKNN = True,
                                        allow_weighting = True,
                                        resume_from_saved = True,
                                        similarity_type_list = ["cosine"],
                                        ICM_name = ICM_name,
                                        ICM_object = ICM_object.copy(),
                                        n_cases = n_cases,
                                        n_random_starts = n_random_starts)


        except Exception as e:

            print("On recommender {} Exception {}".format(ItemKNN_CFCBF_Hybrid_Recommender, str(e)))
            traceback.print_exc()