コード例 #1
0
from Utils.s_plus import dot_product
import scipy.sparse as sps
from Base.Evaluation.Evaluator import EvaluatorHoldout
from GraphBased.RP3betaRecommender import RP3betaRecommender
from ParameterTuning.SearchBayesianSkopt import SearchBayesianSkopt
from skopt.space import Real, Integer, Categorical
from ParameterTuning.SearchAbstractClass import SearchInputRecommenderArgs

data = DataManager()

ucm_age, ucm_region, ucm_all = data.get_ucm()

icm_price, icm_asset, icm_sub, icm_all = data.get_icm()

recommender_4 = UserKNNCFRecommender(data.get_urm())
recommender_4.fit(shrink=2, topK=600, normalize=True)

W_sparse_CF = recommender_4.W_sparse

cfw = User_CFW_D_Similarity_Linalg(URM_train=data.get_urm(),
                              UCM=ucm_all.copy(),
                              S_matrix_target=W_sparse_CF
                              )

cfw.fit(topK=1740, add_zeros_quota=0.3528735601555612, normalize_similarity=True)

weights = sps.diags(cfw.D_best)

ucm_weighted = ucm_all.dot(weights)

sps.save_npz("Data/ucm_weighted.npz", ucm_weighted.tocsr())
def read_data_split_and_search():
    """
    This function provides a simple example on how to tune parameters of a given algorithm

    The BayesianSearch object will save:
        - A .txt file with all the cases explored and the recommendation quality
        - A _best_model file which contains the trained model and can be loaded with recommender.load_model()
        - A _best_parameter file which contains a dictionary with all the fit parameters, it can be passed to recommender.fit(**_best_parameter)
        - A _best_result_validation file which contains a dictionary with the results of the best solution on the validation
        - A _best_result_test file which contains a dictionary with the results, on the test set, of the best solution chosen using the validation set
    """

    seed = 1205
    parser = DataParser()

    URM_all = parser.get_URM_all()
    ICM_obj = parser.get_ICM_all()

    # SPLIT TO GET TEST PARTITION
    URM_train, URM_test = split_train_in_two_percentage_global_sample(
        URM_all, train_percentage=0.90, seed=seed)

    # SPLIT TO GET THE HYBRID VALID PARTITION
    URM_train, URM_valid_hybrid = split_train_in_two_percentage_global_sample(
        URM_train, train_percentage=0.85, seed=seed)

    # SPLIT TO GET THE sub_rec VALID PARTITION
    URM_train_bis, URM_valid_sub = split_train_in_two_percentage_global_sample(
        URM_train, train_percentage=0.85, seed=seed)

    collaborative_algorithm_list = [
        #EASE_R_Recommender
        #PipeHybrid001,
        #Random,
        #TopPop,
        #P3alphaRecommender,
        #RP3betaRecommender,
        #ItemKNNCFRecommender,
        #UserKNNCFRecommender,
        #MatrixFactorization_BPR_Cython,
        #MatrixFactorization_FunkSVD_Cython,
        #PureSVDRecommender,
        #NMFRecommender,
        #PureSVDItemRecommender
        #SLIM_BPR_Cython,
        #SLIMElasticNetRecommender
        #IALSRecommender
        #MF_MSE_PyTorch
        #MergedHybrid000
        #LinearHybrid002ggg
        HybridCombinationSearch
    ]

    content_algorithm_list = [
        #ItemKNNCBFRecommender
    ]

    from Base.Evaluation.Evaluator import EvaluatorHoldout

    evaluator_valid_sub = EvaluatorHoldout(URM_valid_sub, cutoff_list=[10])
    evaluator_valid_hybrid = EvaluatorHoldout(URM_valid_hybrid,
                                              cutoff_list=[10])
    evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[10])
    """
        # TODO: setta I GIUSTI EVALUATOR QUI!!!!
    runParameterSearch_Content_partial = partial(runParameterSearch_Content,
                                                 URM_train=URM_train,
                                                 ICM_object=ICM_obj,
                                                 ICM_name='1BookFeatures',
                                                 n_cases = 50,
                                                 n_random_starts = 20,
                                                 evaluator_validation= evaluator_valid_sub,
                                                 evaluator_test = evaluator_valid_hybrid,
                                                 metric_to_optimize = "MAP",
                                                 output_folder_path=output_folder_path,
                                                 parallelizeKNN = False,
                                                 allow_weighting = True,
                                                 #similarity_type_list = ['cosine']
                                                 )
    pool = multiprocessing.Pool(processes=int(multiprocessing.cpu_count()), maxtasksperchild=1)
    pool.map(runParameterSearch_Content_partial, content_algorithm_list)
    """
    print("Rp3beta training...")
    rp3b = RP3betaRecommender(URM_train, verbose=False)
    rp3b_params = {
        'topK': 1000,
        'alpha': 0.38192761611274967,
        'beta': 0.0,
        'normalize_similarity': False
    }
    rp3b.fit(**rp3b_params)
    print("Done")
    print("P3alpha training...")
    p3a = P3alphaRecommender(URM_train, verbose=False)
    p3a_params = {
        'topK': 131,
        'alpha': 0.33660811631883863,
        'normalize_similarity': False
    }
    p3a.fit(**p3a_params)
    print("Done")
    print("ItemKnnCF training...")
    icf = ItemKNNCFRecommender(URM_train, verbose=False)
    icf_params = {
        'topK': 100,
        'shrink': 1000,
        'similarity': 'asymmetric',
        'normalize': True,
        'asymmetric_alpha': 0.0
    }
    icf.fit(**icf_params)
    print("Done")
    print("UserKnnCF training...")
    ucf = UserKNNCFRecommender(URM_train, verbose=False)
    ucf_params = {
        'topK': 190,
        'shrink': 0,
        'similarity': 'cosine',
        'normalize': True
    }
    ucf.fit(**ucf_params)
    print("Done")
    print("ItemKnnCBF training...")
    icb = ItemKNNCBFRecommender(URM_train, ICM_obj, verbose=False)
    icb_params = {
        'topK': 205,
        'shrink': 1000,
        'similarity': 'cosine',
        'normalize': True,
        'feature_weighting': 'BM25'
    }
    icb.fit(**icb_params)
    print("Done")
    print("SlimBPR training...")
    sbpr = SLIM_BPR_Cython(URM_train, verbose=False)
    sbpr_params = {
        'topK': 979,
        'epochs': 130,
        'symmetric': False,
        'sgd_mode': 'adam',
        'lambda_i': 0.004947329669424629,
        'lambda_j': 1.1534760845071758e-05,
        'learning_rate': 0.0001
    }
    sbpr.fit(**sbpr_params)
    print("Done")
    print("SlimElasticNet training...")
    sen = SLIMElasticNetRecommender(URM_train, verbose=False)
    sen_params = {
        'topK': 992,
        'l1_ratio': 0.004065081925341167,
        'alpha': 0.003725005053334143
    }
    sen.fit(**sen_params)
    print("Done")

    list_recommender = [rp3b, p3a, icf, ucf, icb, sen, sbpr]
    list_already_seen = [rp3b, p3a, icf, ucf, icb]

    for rec_perm in combinations(list_recommender, 3):

        if rec_perm not in combinations(list_already_seen, 3):

            recommender_names = '_'.join(
                [r.RECOMMENDER_NAME for r in rec_perm])
            output_folder_path = "result_experiments_v3/seed_" + str(
                seed) + '/' + recommender_names + '/'

            # If directory does not exist, create
            if not os.path.exists(output_folder_path):
                os.makedirs(output_folder_path)

            # TODO: setta I GIUSTI EVALUATOR QUI!!!!
            runParameterSearch_Collaborative_partial = partial(
                runParameterSearch_Collaborative,
                URM_train=URM_train,
                ICM_train=ICM_obj,
                metric_to_optimize="MAP",
                n_cases=50,
                n_random_starts=20,
                evaluator_validation_earlystopping=evaluator_valid_hybrid,
                evaluator_validation=evaluator_valid_hybrid,
                evaluator_test=evaluator_test,
                output_folder_path=output_folder_path,
                allow_weighting=False,
                #similarity_type_list = ["cosine", 'jaccard'],
                parallelizeKNN=False,
                list_rec=rec_perm)
            pool = multiprocessing.Pool(processes=int(
                multiprocessing.cpu_count()),
                                        maxtasksperchild=1)
            pool.map(runParameterSearch_Collaborative_partial,
                     collaborative_algorithm_list)
def read_data_split_and_search():
    """
    This function provides a simple example on how to tune parameters of a given algorithm

    The BayesianSearch object will save:
        - A .txt file with all the cases explored and the recommendation quality
        - A _best_model file which contains the trained model and can be loaded with recommender.load_model()
        - A _best_parameter file which contains a dictionary with all the fit parameters, it can be passed to recommender.fit(**_best_parameter)
        - A _best_result_validation file which contains a dictionary with the results of the best solution on the validation
        - A _best_result_test file which contains a dictionary with the results, on the test set, of the best solution chosen using the validation set
    """

    seed = 1205
    parser = DataParser()

    URM_all = parser.get_URM_all()
    ICM_obj = parser.get_ICM_all()

    # SPLIT TO GET TEST PARTITION
    URM_train, URM_test = split_train_in_two_percentage_global_sample(
        URM_all, train_percentage=0.90, seed=seed)

    # SPLIT TO GET THE HYBRID VALID PARTITION
    URM_train, URM_valid_hybrid = split_train_in_two_percentage_global_sample(
        URM_train, train_percentage=0.85, seed=seed)

    URM_valid_hybrid = parser.filter_URM_test_by_range(URM_train,
                                                       URM_valid_hybrid,
                                                       (3, -1))

    collaborative_algorithm_list = [
        # EASE_R_Recommender
        # PipeHybrid001,
        # Random,
        # TopPop,
        # P3alphaRecommender,
        # RP3betaRecommender,
        # ItemKNNCFRecommender,
        # UserKNNCFRecommender,
        # MatrixFactorization_BPR_Cython,
        # MatrixFactorization_FunkSVD_Cython,
        # PureSVDRecommender,
        # NMFRecommender,
        # PureSVDItemRecommender
        # SLIM_BPR_Cython,
        # SLIMElasticNetRecommender
        # IALSRecommender
        # MF_MSE_PyTorch
        # MergedHybrid000
        # LinearHybrid002ggg
        HybridCombinationSearch
    ]

    content_algorithm_list = [
        # ItemKNNCBFRecommender
    ]

    from Base.Evaluation.Evaluator import EvaluatorHoldout

    evaluator_valid_hybrid = EvaluatorHoldout(URM_valid_hybrid,
                                              cutoff_list=[10])
    evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[10])
    """
    earlystopping_keywargs = {"validation_every_n": 5,
                              "stop_on_validation": True,
                              "evaluator_object": evaluator_valid_hybrid,
                              "lower_validations_allowed": 5,
                              "validation_metric": 'MAP',
                              }
    
    print('IALS training...')
    ials = IALSRecommender(URM_train, verbose=False)
    ials_params = {'num_factors': 83, 'confidence_scaling': 'linear', 'alpha': 28.4278070726612,
                   'epsilon': 1.0234211788885077, 'reg': 0.0027328110246575004, 'epochs': 20}
    ials.fit(**ials_params, **earlystopping_keywargs)
    print("Done")
    
    
    print("PureSVD training...")
    psvd = PureSVDRecommender(URM_train, verbose=False)
    psvd_params = {'num_factors': 711}
    psvd.fit(**psvd_params)
    print("Done")
    """
    print("Rp3beta training...")
    rp3b = RP3betaRecommender(URM_train, verbose=False)
    rp3b_params = {
        'topK': 753,
        'alpha': 0.3873710051288722,
        'beta': 0.0,
        'normalize_similarity': False
    }
    rp3b.fit(**rp3b_params)
    print("Done")
    print("P3alpha training...")
    p3a = P3alphaRecommender(URM_train, verbose=False)
    p3a_params = {
        'topK': 438,
        'alpha': 0.41923120471415165,
        'normalize_similarity': False
    }
    p3a.fit(**p3a_params)
    print("Done")
    print("ItemKnnCF training...")
    icf = ItemKNNCFRecommender(URM_train, verbose=False)
    icf_params = {
        'topK': 565,
        'shrink': 554,
        'similarity': 'tversky',
        'normalize': True,
        'tversky_alpha': 1.9109121434662428,
        'tversky_beta': 1.7823834698905734
    }
    icf.fit(**icf_params)
    print("Done")
    print("UserKnnCF training...")
    ucf = UserKNNCFRecommender(URM_train, verbose=False)
    ucf_params = {
        'topK': 190,
        'shrink': 0,
        'similarity': 'cosine',
        'normalize': True
    }
    ucf.fit(**ucf_params)
    print("Done")
    print("ItemKnnCBF training...")
    icb = ItemKNNCBFRecommender(URM_train, ICM_obj, verbose=False)
    icb_params = {
        'topK': 205,
        'shrink': 1000,
        'similarity': 'cosine',
        'normalize': True,
        'feature_weighting': 'BM25'
    }
    icb.fit(**icb_params)
    print("Done")
    """
    print("SlimElasticNet training...")
    sen = SLIMElasticNetRecommender(URM_train, verbose=False)
    sen_params = {'topK': 954, 'l1_ratio': 3.87446082207643e-05, 'alpha': 0.07562657698792305}
    sen.fit(**sen_params)
    print("Done")
    """

    list_recommender = [icb, icf, ucf, p3a, rp3b]
    list_already_seen = []

    for rec_perm in combinations(list_recommender, 3):

        if rec_perm not in combinations(list_already_seen, 3):

            recommender_names = '_'.join(
                [r.RECOMMENDER_NAME for r in rec_perm])
            output_folder_path = "result_experiments_v3/seed_" + str(
                seed) + '_3--1' + '/' + recommender_names + '/'

            # If directory does not exist, create
            if not os.path.exists(output_folder_path):
                os.makedirs(output_folder_path)

            # TODO: setta I GIUSTI EVALUATOR QUI!!!!
            runParameterSearch_Collaborative_partial = partial(
                runParameterSearch_Collaborative,
                URM_train=URM_train,
                ICM_train=ICM_obj,
                metric_to_optimize="MAP",
                n_cases=50,
                n_random_starts=20,
                evaluator_validation_earlystopping=evaluator_valid_hybrid,
                evaluator_validation=evaluator_valid_hybrid,
                evaluator_test=evaluator_test,
                output_folder_path=output_folder_path,
                allow_weighting=False,
                # similarity_type_list = ["cosine", 'jaccard'],
                parallelizeKNN=False,
                list_rec=rec_perm)
            pool = multiprocessing.Pool(processes=int(
                multiprocessing.cpu_count()),
                                        maxtasksperchild=1)
            pool.map(runParameterSearch_Collaborative_partial,
                     collaborative_algorithm_list)
コード例 #4
0
def runParameterSearch_Collaborative(recommender_class,
                                     URM_train,
                                     ICM_1,
                                     ICM_2,
                                     metric_to_optimize="PRECISION",
                                     evaluator_validation=None,
                                     evaluator_test=None,
                                     evaluator_validation_earlystopping=None,
                                     output_root_path="result_experiments/",
                                     parallelizeKNN=True,
                                     n_cases=100):
    from ParameterTuning.AbstractClassSearch import DictionaryKeys

    # If directory does not exist, create
    if not os.path.exists(output_root_path):
        os.makedirs(output_root_path)

    try:

        output_root_path_rec_name = output_root_path + recommender_class.RECOMMENDER_NAME

        parameterSearch = BayesianSearch(
            recommender_class,
            evaluator_validation=evaluator_validation,
            evaluator_test=evaluator_test)

        if recommender_class in [TopPop, Random]:
            recommender = recommender_class(URM_train)

            recommender.fit()

            output_file = open(
                output_root_path_rec_name + "_BayesianSearch.txt", "a")
            result_dict, result_baseline = evaluator_validation.evaluateRecommender(
                recommender)
            output_file.write(
                "ParameterSearch: Best result evaluated on URM_validation. Results: {}"
                .format(result_baseline))

            pickle.dump(
                result_dict.copy(),
                open(output_root_path_rec_name + "_best_result_validation",
                     "wb"),
                protocol=pickle.HIGHEST_PROTOCOL)

            result_dict, result_baseline = evaluator_test.evaluateRecommender(
                recommender)
            output_file.write(
                "ParameterSearch: Best result evaluated on URM_test. Results: {}"
                .format(result_baseline))

            pickle.dump(result_dict.copy(),
                        open(output_root_path_rec_name + "_best_result_test",
                             "wb"),
                        protocol=pickle.HIGHEST_PROTOCOL)

            output_file.close()

            return

        ##########################################################################################################

        if recommender_class is UserKNNCFRecommender:

            similarity_type_list = ['cosine']

            run_KNNCFRecommender_on_similarity_type_partial = partial(
                run_KNNCFRecommender_on_similarity_type,
                parameterSearch=parameterSearch,
                URM_train=URM_train,
                n_cases=n_cases,
                output_root_path=output_root_path_rec_name,
                metric_to_optimize=metric_to_optimize)

            if parallelizeKNN:
                pool = PoolWithSubprocess(processes=int(2), maxtasksperchild=1)
                resultList = pool.map(
                    run_KNNCFRecommender_on_similarity_type_partial,
                    similarity_type_list)

            else:

                for similarity_type in similarity_type_list:
                    run_KNNCFRecommender_on_similarity_type_partial(
                        similarity_type)

            return

        ##########################################################################################################

        if recommender_class is ItemKNNCFRecommender:

            similarity_type_list = ['cosine']

            run_KNNCFRecommender_on_similarity_type_partial = partial(
                run_KNNCFRecommender_on_similarity_type,
                parameterSearch=parameterSearch,
                URM_train=URM_train,
                n_cases=n_cases,
                output_root_path=output_root_path_rec_name,
                metric_to_optimize=metric_to_optimize)

            if parallelizeKNN:
                pool = PoolWithSubprocess(processes=int(2), maxtasksperchild=1)
                resultList = pool.map(
                    run_KNNCFRecommender_on_similarity_type_partial,
                    similarity_type_list)

            else:

                for similarity_type in similarity_type_list:
                    run_KNNCFRecommender_on_similarity_type_partial(
                        similarity_type)

            return

        ##########################################################################################################

        # if recommender_class is MultiThreadSLIM_RMSE:
        #
        #     hyperparamethers_range_dictionary = {}
        #     hyperparamethers_range_dictionary["topK"] = [50, 100]
        #     hyperparamethers_range_dictionary["l1_penalty"] = [1e-2, 1e-3, 1e-4]
        #     hyperparamethers_range_dictionary["l2_penalty"] = [1e-2, 1e-3, 1e-4]
        #
        #
        #     recommenderDictionary = {DictionaryKeys.CONSTRUCTOR_POSITIONAL_ARGS: [URM_train],
        #                              DictionaryKeys.CONSTRUCTOR_KEYWORD_ARGS: {},
        #                              DictionaryKeys.FIT_POSITIONAL_ARGS: dict(),
        #                              DictionaryKeys.FIT_KEYWORD_ARGS: dict(),
        #                              DictionaryKeys.FIT_RANGE_KEYWORD_ARGS: hyperparamethers_range_dictionary}
        #
        #

        ##########################################################################################################

        if recommender_class is P3alphaRecommender:
            hyperparamethers_range_dictionary = {}
            hyperparamethers_range_dictionary["topK"] = [
                5, 10, 20, 50, 100, 150, 200, 300, 400, 500, 600, 700, 800
            ]
            hyperparamethers_range_dictionary["alpha"] = range(0, 2)
            hyperparamethers_range_dictionary["normalize_similarity"] = [
                True, False
            ]

            recommenderDictionary = {
                DictionaryKeys.CONSTRUCTOR_POSITIONAL_ARGS: [URM_train],
                DictionaryKeys.CONSTRUCTOR_KEYWORD_ARGS: {},
                DictionaryKeys.FIT_POSITIONAL_ARGS:
                dict(),
                DictionaryKeys.FIT_KEYWORD_ARGS:
                dict(),
                DictionaryKeys.FIT_RANGE_KEYWORD_ARGS:
                hyperparamethers_range_dictionary
            }

        ##########################################################################################################

        if recommender_class is HybridRecommender:

            hyperparamethers_range_dictionary = {}
            hyperparamethers_range_dictionary["w_itemcf"] = [
                x * 0.1 + 1 for x in range(0, 10)
            ]
            hyperparamethers_range_dictionary["w_usercf"] = [
                x * 0.1 for x in range(0, 10)
            ]
            hyperparamethers_range_dictionary["w_cbart"] = [
                x * 0.1 for x in range(0, 10)
            ]
            hyperparamethers_range_dictionary["w_cbalb"] = [
                x * 0.1 for x in range(0, 10)
            ]
            hyperparamethers_range_dictionary["w_slim"] = [
                x * 0.1 for x in range(0, 10)
            ]
            #hyperparamethers_range_dictionary["w_svd"] = [x * 0.05 for x in range(0, 20)]
            #hyperparamethers_range_dictionary["w_rp3"] = [x * 0.05 for x in range(0, 20)]

            item = ItemKNNCFRecommender(URM_train)

            user = UserKNNCFRecommender(URM_train)

            SLIM = MultiThreadSLIM_ElasticNet(URM_train=URM_train)

            item.fit(topK=800, shrink=10, similarity='cosine', normalize=True)

            user.fit(topK=70, shrink=22, similarity='cosine', normalize=True)

            SLIM.fit(l1_penalty=1e-05,
                     l2_penalty=0,
                     positive_only=True,
                     topK=150,
                     alpha=0.00415637376180466)

            CBArt = ItemKNNCBFRecommender(ICM=ICM_1, URM_train=URM_train)
            CBArt.fit(topK=160,
                      shrink=5,
                      similarity='cosine',
                      normalize=True,
                      feature_weighting="none")

            CBAlb = ItemKNNCBFRecommender(ICM=ICM_2, URM_train=URM_train)
            CBAlb.fit(topK=160,
                      shrink=5,
                      similarity='cosine',
                      normalize=True,
                      feature_weighting="none")

            recommenderDictionary = {
                DictionaryKeys.CONSTRUCTOR_POSITIONAL_ARGS: [URM_train],
                DictionaryKeys.CONSTRUCTOR_KEYWORD_ARGS: {},
                DictionaryKeys.FIT_POSITIONAL_ARGS:
                dict(),
                DictionaryKeys.FIT_KEYWORD_ARGS: {
                    "ICM_Art": ICM_1,
                    "ICM_Alb": ICM_2,
                    "item": item,
                    "user": user,
                    "SLIM": SLIM,
                    "CBArt": CBArt,
                    "CBAlb": CBAlb,
                },
                DictionaryKeys.FIT_RANGE_KEYWORD_ARGS:
                hyperparamethers_range_dictionary
            }

        ##########################################################################################################

        if recommender_class is RP3betaRecommender:
            hyperparamethers_range_dictionary = {}
            hyperparamethers_range_dictionary["topK"] = [
                5, 10, 20, 50, 100, 150, 200, 300, 400, 500, 600, 700, 800
            ]
            hyperparamethers_range_dictionary["alpha"] = range(0, 2)
            hyperparamethers_range_dictionary["beta"] = range(0, 2)
            hyperparamethers_range_dictionary["normalize_similarity"] = [True]
            hyperparamethers_range_dictionary["implicit"] = [True]

            recommenderDictionary = {
                DictionaryKeys.CONSTRUCTOR_POSITIONAL_ARGS: [URM_train],
                DictionaryKeys.CONSTRUCTOR_KEYWORD_ARGS: {},
                DictionaryKeys.FIT_POSITIONAL_ARGS:
                dict(),
                DictionaryKeys.FIT_KEYWORD_ARGS:
                dict(),
                DictionaryKeys.FIT_RANGE_KEYWORD_ARGS:
                hyperparamethers_range_dictionary
            }

        ##########################################################################################################

        if recommender_class is MatrixFactorization_FunkSVD_Cython:
            hyperparamethers_range_dictionary = {}
            hyperparamethers_range_dictionary["sgd_mode"] = ["adagrad", "adam"]
            # hyperparamethers_range_dictionary["epochs"] = [1, 5, 10, 20, 30, 50, 70, 90, 110]
            hyperparamethers_range_dictionary["num_factors"] = range(
                100, 1000, 20)
            hyperparamethers_range_dictionary["reg"] = [0.0, 1e-3, 1e-6, 1e-9]
            hyperparamethers_range_dictionary["learning_rate"] = [
                1e-2, 1e-3, 1e-4, 1e-5
            ]

            recommenderDictionary = {
                DictionaryKeys.CONSTRUCTOR_POSITIONAL_ARGS: [URM_train],
                DictionaryKeys.CONSTRUCTOR_KEYWORD_ARGS: {},
                DictionaryKeys.FIT_POSITIONAL_ARGS:
                dict(),
                DictionaryKeys.FIT_KEYWORD_ARGS: {
                    "validation_every_n": 5,
                    "stop_on_validation": True,
                    "evaluator_object": evaluator_validation_earlystopping,
                    "lower_validatons_allowed": 20,
                    "validation_metric": metric_to_optimize
                },
                DictionaryKeys.FIT_RANGE_KEYWORD_ARGS:
                hyperparamethers_range_dictionary
            }

        ##########################################################################################################

        if recommender_class is FunkSVD:
            hyperparamethers_range_dictionary = {}

            # hyperparamethers_range_dictionary["epochs"] = [1, 5, 10, 20, 30, 50, 70, 90, 110]
            hyperparamethers_range_dictionary["num_factors"] = range(
                100, 1000, 20)
            hyperparamethers_range_dictionary["reg"] = [
                0.0, 1e-03, 1e-06, 1e-09
            ]
            hyperparamethers_range_dictionary["learning_rate"] = [1e-02, 1e-03]

            recommenderDictionary = {
                DictionaryKeys.CONSTRUCTOR_POSITIONAL_ARGS: [URM_train],
                DictionaryKeys.CONSTRUCTOR_KEYWORD_ARGS: {},
                DictionaryKeys.FIT_POSITIONAL_ARGS:
                dict(),
                DictionaryKeys.FIT_KEYWORD_ARGS:
                dict(),
                DictionaryKeys.FIT_RANGE_KEYWORD_ARGS:
                hyperparamethers_range_dictionary
            }

        ##########################################################################################################

        if recommender_class is MatrixFactorization_AsySVD_Cython:
            hyperparamethers_range_dictionary = {}
            hyperparamethers_range_dictionary["sgd_mode"] = ["adagrad", "adam"]
            # hyperparamethers_range_dictionary["epochs"] = [1, 5, 10, 20, 30, 50, 70, 90, 110]
            hyperparamethers_range_dictionary["num_factors"] = range(
                100, 500, 10)
            hyperparamethers_range_dictionary["batch_size"] = [
                100, 200, 300, 400
            ]
            hyperparamethers_range_dictionary["positive_reg"] = [
                0.0, 1e-3, 1e-6, 1e-9
            ]
            hyperparamethers_range_dictionary["negative_reg"] = [
                0.0, 1e-3, 1e-6, 1e-9
            ]
            hyperparamethers_range_dictionary["learning_rate"] = [
                1e-2, 1e-3, 1e-4, 1e-5
            ]
            hyperparamethers_range_dictionary["user_reg"] = [
                1e-3, 1e-4, 1e-5, 1e-6
            ]

            recommenderDictionary = {
                DictionaryKeys.CONSTRUCTOR_POSITIONAL_ARGS: [URM_train],
                DictionaryKeys.CONSTRUCTOR_KEYWORD_ARGS: {
                    'positive_threshold': 1
                },
                DictionaryKeys.FIT_POSITIONAL_ARGS:
                dict(),
                DictionaryKeys.FIT_KEYWORD_ARGS: {
                    "validation_every_n": 5,
                    "stop_on_validation": True,
                    "evaluator_object": evaluator_validation_earlystopping,
                    "lower_validatons_allowed": 20,
                    "validation_metric": metric_to_optimize
                },
                DictionaryKeys.FIT_RANGE_KEYWORD_ARGS:
                hyperparamethers_range_dictionary
            }

        ##########################################################################################################

        if recommender_class is PureSVDRecommender:
            hyperparamethers_range_dictionary = {}
            hyperparamethers_range_dictionary["num_factors"] = list(
                range(0, 250, 5))

            recommenderDictionary = {
                DictionaryKeys.CONSTRUCTOR_POSITIONAL_ARGS: [URM_train],
                DictionaryKeys.CONSTRUCTOR_KEYWORD_ARGS: {},
                DictionaryKeys.FIT_POSITIONAL_ARGS:
                dict(),
                DictionaryKeys.FIT_KEYWORD_ARGS: {},
                DictionaryKeys.FIT_RANGE_KEYWORD_ARGS:
                hyperparamethers_range_dictionary
            }

        #########################################################################################################

        if recommender_class is SLIM_BPR_Cython:
            hyperparamethers_range_dictionary = {}
            hyperparamethers_range_dictionary["topK"] = [800, 900, 1000, 1200]
            # hyperparamethers_range_dictionary["epochs"] = [1, 5, 10, 20, 30, 50, 70, 90, 110]
            hyperparamethers_range_dictionary["sgd_mode"] = ["adagrad"]
            hyperparamethers_range_dictionary["lambda_i"] = [1e-6]
            hyperparamethers_range_dictionary["lambda_j"] = [1e-9]
            hyperparamethers_range_dictionary["learning_rate"] = [
                0.01, 0.001, 1e-4, 1e-5, 0.1
            ]

            recommenderDictionary = {
                DictionaryKeys.CONSTRUCTOR_POSITIONAL_ARGS: [URM_train],
                DictionaryKeys.CONSTRUCTOR_KEYWORD_ARGS: {
                    'train_with_sparse_weights': True,
                    'symmetric': True,
                    'positive_threshold': 1
                },
                DictionaryKeys.FIT_POSITIONAL_ARGS:
                dict(),
                DictionaryKeys.FIT_KEYWORD_ARGS: {
                    "validation_every_n": 10,
                    "stop_on_validation": True,
                    "evaluator_object": evaluator_validation_earlystopping,
                    "lower_validatons_allowed": 3,
                    "validation_metric": metric_to_optimize
                },
                DictionaryKeys.FIT_RANGE_KEYWORD_ARGS:
                hyperparamethers_range_dictionary
            }

        ##########################################################################################################

        if recommender_class is MultiThreadSLIM_ElasticNet:
            hyperparamethers_range_dictionary = {}
            hyperparamethers_range_dictionary["topK"] = [
                3300, 4300, 5300, 6300, 7300
            ]
            hyperparamethers_range_dictionary["l1_penalty"] = [
                1e-5, 1e-6, 1e-4, 1e-3
            ]
            hyperparamethers_range_dictionary["l2_penalty"] = [1e-4]
            hyperparamethers_range_dictionary["alpha"] = range(0, 1)
            recommenderDictionary = {
                DictionaryKeys.CONSTRUCTOR_POSITIONAL_ARGS: [URM_train],
                DictionaryKeys.CONSTRUCTOR_KEYWORD_ARGS: {},
                DictionaryKeys.FIT_POSITIONAL_ARGS:
                dict(),
                DictionaryKeys.FIT_KEYWORD_ARGS:
                dict(),
                DictionaryKeys.FIT_RANGE_KEYWORD_ARGS:
                hyperparamethers_range_dictionary
            }

        #########################################################################################################

        ## Final step, after the hyperparameter range has been defined for each type of algorithm
        best_parameters = parameterSearch.search(
            recommenderDictionary,
            n_cases=n_cases,
            output_root_path=output_root_path_rec_name,
            metric=metric_to_optimize)

    except Exception as e:

        print("On recommender {} Exception {}".format(recommender_class,
                                                      str(e)))
        traceback.print_exc()

        error_file = open(output_root_path + "ErrorLog.txt", "a")
        error_file.write("On recommender {} Exception {}\n".format(
            recommender_class, str(e)))
        error_file.close()
コード例 #5
0
    base_recommenders = []
    tested_recommenders = []
    datas = [None] * anti_overfitting
    tested_users = []
    rec_model = []

    for j in range(anti_overfitting):
        data_reader = DataReader()
        datas[j] = DataObject(data_reader, 1, random_seed=(50 + j * 10))

        # TODO: Edit here
        # TODO: If you edit here, remember to edit at the end too
        # Insert the mix of recommender systems
        rec1 = UserKNNCFRecommender(datas[j].urm_train)
        rec1.fit(shrink=1000,
                 topK=1000,
                 similarity="cosine",
                 feature_weighting="TF-IDF")
        rec2 = RP3betaRecommender(datas[j].urm_train)
        rec2.fit(topK=5000, alpha=0.35, beta=0.025, implicit=True)
        rec3 = ItemKNNCFRecommender(datas[j].urm_train)
        rec3.fit(topK=200,
                 shrink=1000,
                 similarity="jaccard",
                 feature_weighting="TF-IDF")
        rec4 = UserKNNCBFRecommender(datas[j].ucm_all, datas[j].urm_train)
        rec4.fit(topK=5000,
                 shrink=5,
                 feature_weighting="TF-IDF",
                 similarity="euclidean")
        rec5 = ItemKNNCBFRecommender(datas[j].urm_train,
                                     datas[j].icm_all_augmented)
def read_data_split_and_search():
    """
    This function provides a simple example on how to tune parameters of a given algorithm

    The BayesianSearch object will save:
        - A .txt file with all the cases explored and the recommendation quality
        - A _best_model file which contains the trained model and can be loaded with recommender.load_model()
        - A _best_parameter file which contains a dictionary with all the fit parameters, it can be passed to recommender.fit(**_best_parameter)
        - A _best_result_validation file which contains a dictionary with the results of the best solution on the validation
        - A _best_result_test file which contains a dictionary with the results, on the test set, of the best solution chosen using the validation set
    """

    seed = 1205
    parser = DataParser()

    URM_all = parser.get_URM_all()
    ICM_obj = parser.get_ICM_all()

    # SPLIT TO GET TEST PARTITION
    URM_train, URM_test = split_train_in_two_percentage_global_sample(URM_all, train_percentage=0.90, seed=seed)

    # SPLIT TO GET THE HYBRID VALID PARTITION
    URM_train, URM_valid_hybrid = split_train_in_two_percentage_global_sample(URM_train, train_percentage=0.85,
                                                                              seed=seed)

    collaborative_algorithm_list = [
        # EASE_R_Recommender
        # PipeHybrid001,
        # Random,
        # TopPop,
        # P3alphaRecommender,
        # RP3betaRecommender,
        # ItemKNNCFRecommender,
        # UserKNNCFRecommender,
        # MatrixFactorization_BPR_Cython,
        # MatrixFactorization_FunkSVD_Cython,
        # PureSVDRecommender,
        # NMFRecommender,
        # PureSVDItemRecommender
        # SLIM_BPR_Cython,
        # SLIMElasticNetRecommender
        # IALSRecommender
        # MF_MSE_PyTorch
        # MergedHybrid000
        # LinearHybrid002ggg
        HybridCombinationSearch
    ]

    content_algorithm_list = [
        # ItemKNNCBFRecommender
    ]

    from Base.Evaluation.Evaluator import EvaluatorHoldout

    evaluator_valid_hybrid = EvaluatorHoldout(URM_valid_hybrid, cutoff_list=[10])
    evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[10])

    """
    earlystopping_keywargs = {"validation_every_n": 5,
                              "stop_on_validation": True,
                              "evaluator_object": evaluator_valid_hybrid,
                              "lower_validations_allowed": 5,
                              "validation_metric": 'MAP',
                              }

    print('IALS training...')
    ials = IALSRecommender(URM_train, verbose=False)
    ials_params = {'num_factors': 83, 'confidence_scaling': 'linear', 'alpha': 28.4278070726612,
                   'epsilon': 1.0234211788885077, 'reg': 0.0027328110246575004, 'epochs': 20}
    ials.fit(**ials_params, **earlystopping_keywargs)
    print("Done")


    print("PureSVD training...")
    psvd = PureSVDRecommender(URM_train, verbose=False)
    psvd_params = {'num_factors': 711}
    psvd.fit(**psvd_params)
    print("Done")
    """

    rp3b = RP3betaRecommender(URM_train, verbose=False)
    rp3b_params = {'topK': 1000, 'alpha': 0.38192761611274967, 'beta': 0.0, 'normalize_similarity': False}
    try:
        rp3b.load_model(f'stored_recommenders/seed_{str(seed)}_hybrid_search/',
                        f'{rp3b.RECOMMENDER_NAME}_for_second_search')
        print(f"{rp3b.RECOMMENDER_NAME} loaded.")
    except:
        print(f"Fitting {rp3b.RECOMMENDER_NAME} ...")
        rp3b.fit(**rp3b_params)
        print(f"done.")
        rp3b.save_model(f'stored_recommenders/seed_{str(seed)}_hybrid_search/',
                        f'{rp3b.RECOMMENDER_NAME}_for_second_search')

    p3a = P3alphaRecommender(URM_train, verbose=False)
    p3a_params = {'topK': 131, 'alpha': 0.33660811631883863, 'normalize_similarity': False}
    try:
        p3a.load_model(f'stored_recommenders/seed_{str(seed)}_hybrid_search/',
                       f'{p3a.RECOMMENDER_NAME}_for_second_search')
        print(f"{p3a.RECOMMENDER_NAME} loaded.")
    except:
        print(f"Fitting {p3a.RECOMMENDER_NAME} ...")
        p3a.fit(**p3a_params)
        print(f"done.")
        p3a.save_model(f'stored_recommenders/seed_{str(seed)}_hybrid_search/',
                       f'{p3a.RECOMMENDER_NAME}_for_second_search')

    icf = ItemKNNCFRecommender(URM_train, verbose=False)
    icf_params = {'topK': 55, 'shrink': 1000, 'similarity': 'asymmetric', 'normalize': True, 'asymmetric_alpha': 0.0}
    try:
        icf.load_model(f'stored_recommenders/seed_{str(seed)}_hybrid_search/',
                       f'{icf.RECOMMENDER_NAME}_for_second_search')
        print(f"{icf.RECOMMENDER_NAME} loaded.")
    except:
        print(f"Fitting {icf.RECOMMENDER_NAME} ...")
        icf.fit(**icf_params)
        print(f"done.")
        icf.save_model(f'stored_recommenders/seed_{str(seed)}_hybrid_search/',
                       f'{icf.RECOMMENDER_NAME}_for_second_search')

    ucf = UserKNNCFRecommender(URM_train, verbose=False)
    ucf_params = {'topK': 190, 'shrink': 0, 'similarity': 'cosine', 'normalize': True}
    try:
        ucf.load_model(f'stored_recommenders/seed_{str(seed)}_hybrid_search/',
                       f'{ucf.RECOMMENDER_NAME}_for_second_search')
        print(f"{ucf.RECOMMENDER_NAME} loaded.")
    except:
        print(f"Fitting {ucf.RECOMMENDER_NAME} ...")
        ucf.fit(**ucf_params)
        print(f"done.")
        ucf.save_model(f'stored_recommenders/seed_{str(seed)}_hybrid_search/',
                       f'{ucf.RECOMMENDER_NAME}_for_second_search')

    icb = ItemKNNCBFRecommender(URM_train, ICM_obj, verbose=False)
    icb_params = {'topK': 65, 'shrink': 0, 'similarity': 'dice', 'normalize': True}
    try:
        icb.load_model(f'stored_recommenders/seed_{str(seed)}_hybrid_search/',
                       f'{icb.RECOMMENDER_NAME}_for_second_search')
        print(f"{icb.RECOMMENDER_NAME} loaded.")
    except:
        print(f"Fitting {icf.RECOMMENDER_NAME} ...")
        icb.fit(**icb_params)
        print(f"done.")
        icb.save_model(f'stored_recommenders/seed_{str(seed)}_hybrid_search/',
                       f'{icb.RECOMMENDER_NAME}_for_second_search')

    sen = SLIMElasticNetRecommender(URM_train, verbose=False)
    sen_params = {'topK': 992, 'l1_ratio': 0.004065081925341167, 'alpha': 0.003725005053334143}
    try:
        sen.load_model(f'stored_recommenders/seed_{str(seed)}_hybrid_search/',
                       f'{sen.RECOMMENDER_NAME}_for_second_search')
        print(f"{sen.RECOMMENDER_NAME} loaded.")
    except:
        print(f"Fitting {sen.RECOMMENDER_NAME} ...")
        sen.fit(**sen_params)
        print(f"done.")
        sen.save_model(f'stored_recommenders/seed_{str(seed)}_hybrid_search/',
                       f'{sen.RECOMMENDER_NAME}_for_second_search')

    print("\nStart.")
    list_recommender = [icb, icf, ucf, p3a, rp3b, sen]
    list_already_seen = []
    combinations_already_seen = []
    """
    (icb, icf, p3a), (icb, icf, rp3b), (icb, icf, sen), (icb, p3a, rp3b), (icb, p3a, sen),
                                (icb, rp3b, sen), (icf, p3a, rp3b), (icf, p3a, sen)
    """

    for rec_perm in combinations(list_recommender, 3):

        if rec_perm not in combinations_already_seen:

            recommender_names = '_'.join([r.RECOMMENDER_NAME for r in rec_perm])
            output_folder_path = "result_experiments_v3/seed_" + str(
                seed) + '/linear_combination/' + recommender_names + '/'
            print(F"\nTESTING THE COMBO {recommender_names}")

            # If directory does not exist, create
            if not os.path.exists(output_folder_path):
                os.makedirs(output_folder_path)

            # TODO: setta I GIUSTI EVALUATOR QUI!!!!
            runParameterSearch_Collaborative_partial = partial(runParameterSearch_Collaborative,
                                                               URM_train=URM_train,
                                                               ICM_train=ICM_obj,
                                                               metric_to_optimize="MAP",
                                                               n_cases=50,
                                                               n_random_starts=20,
                                                               evaluator_validation_earlystopping=evaluator_valid_hybrid,
                                                               evaluator_validation=evaluator_valid_hybrid,
                                                               #evaluator_test=evaluator_test,
                                                               output_folder_path=output_folder_path,
                                                               allow_weighting=False,
                                                               # similarity_type_list = ["cosine", 'jaccard'],
                                                               parallelizeKNN=False,
                                                               list_rec=rec_perm)
            pool = multiprocessing.Pool(processes=int(multiprocessing.cpu_count()), maxtasksperchild=1)
            pool.map(runParameterSearch_Collaborative_partial, collaborative_algorithm_list)
コード例 #7
0
class HybridLinear10Recommneder(BaseItemSimilarityMatrixRecommender):

    RECOMMENDER_NAME = "HybridLinear10Recommender"

    def __init__(self, URM_train, seed: int):
        super(HybridLinear10Recommneder, self).__init__(URM_train)
        self.slimBPR = SLIM_BPR_Cython(URM_train)
        self.userKnnCF = UserKNNCFRecommender(URM_train)
        #self.itemcf = ItemKNNCFRecommender(urm)

    def fit(self, alpha=1):
        self.slimBPR.fit(epochs=135,
                         topK=933,
                         symmetric=False,
                         sgd_mode='adagrad',
                         lambda_i=1.054e-05,
                         lambda_j=1.044e-05,
                         learning_rate=0.00029)
        self.userKnnCF.fit(topK=201,
                           shrink=998,
                           similarity='cosine',
                           normalize=True,
                           feature_weighting='TF-IDF')
        self.alpha = alpha
        self.beta = 1 - alpha
        #self.gamma = alpha_gamma_ratio

    def _compute_item_score(self, user_id_array, items_to_compute=None):
        # ATTENTION!
        # THIS METHOD WORKS ONLY IF user_id_array IS A SCALAR AND NOT AN ARRAY
        # TODO

        scores_slimBPR = self.slimBPR._compute_item_score(
            user_id_array=user_id_array)
        scores_userKnnCF = self.userKnnCF._compute_item_score(
            user_id_array=user_id_array)

        # normalization
        #slim_max = scores_slim.max()
        #rp3_max = scores_rp3.max()
        #itemcf_max = scores_itemcf.max()

        #if not slim_max == 0:
        #   scores_slim /= slim_max
        #if not rp3_max == 0:
        #   scores_rp3 /= rp3_max
        #if not itemcf_max == 0:
        #   scores_itemcf /= itemcf_max

        scores_total = self.alpha * scores_slimBPR + self.beta * scores_userKnnCF  #+ self.gamma * scores_itemcf

        return scores_total

    def save_model(self, folder_path, file_name=None):
        if file_name is None:
            file_name = self.RECOMMENDER_NAME

        self._print("Saving model in file '{}'".format(folder_path +
                                                       file_name))

        dataIO = DataIO(folder_path=folder_path)
        dataIO.save_data(file_name=file_name, data_dict_to_save={})

        self._print("Saving complete")
コード例 #8
0
URM = data.get_URM()
ICM = data.get_ICM()

URM_train = data.get_URM_train()
URM_test = data.get_URM_test()

print("URM_train shape : {}".format(URM_train.shape))
print("URM_test shape : {}".format(URM_test.shape))
print("ICM shape : {}".format(ICM.shape))

evaluator_test = SequentialEvaluator(URM_test, cutoff_list=[10])
evaluator_test = EvaluatorWrapper(evaluator_test)

UserBased = UserKNNCFRecommender(URM_train)
UserBased.fit(topK=500, shrink=10)

ContentBased = ItemKNNCBFRecommender(ICM, URM_train)
ContentBased.fit(topK=5, shrink=1000)

ItemKNNCF = ItemKNNCFRecommender(URM_train)
ItemKNNCF.fit(topK=400, shrink=50)

hybridRecommender_scores = ItemKNNScoresHybridRecommender_multiple(
    URM_train, ItemKNNCF, ContentBased, UserBased)
hybridRecommender_scores.fit(weight_1=HYBRID_ICF_CB_UCF_WEIGHTS[0],
                             weight_2=HYBRID_ICF_CB_UCF_WEIGHTS[1],
                             weight_3=HYBRID_ICF_CB_UCF_WEIGHTS[2])

dict, _ = evaluator_test.evaluateRecommender(hybridRecommender_scores)
コード例 #9
0
    def __init__(self, urm_train, eurm=False):
        super(HybridNormRecommender, self).__init__(urm_train)
        self.data_folder = Path(__file__).parent.parent.absolute()
        self.eurm = eurm
        self.num_users = urm_train.shape[0]

        urm_train = check_matrix(urm_train.copy(), 'csr')

        recommender_1 = HybridGen2Recommender(urm_train, eurm=self.eurm)
        recommender_1.fit()

        # recommender_2 = ItemKNNCFRecommender(urm_train)
        # recommender_2.fit(shrink=30, topK=20)

        recommender_2 = ItemKNNCFRecommender(urm_train)
        recommender_2.fit(topK=5,
                          shrink=500,
                          feature_weighting='BM25',
                          similarity='tversky',
                          normalize=False,
                          tversky_alpha=0.0,
                          tversky_beta=1.0)

        recommender_3 = UserKNNCFRecommender(urm_train)
        recommender_3.fit(shrink=2, topK=600, normalize=True)
        # recommender_3 = UserKNNCFRecommender(urm_train)
        # recommender_3.fit(topK=697, shrink=1000, feature_weighting='TF-IDF', similarity='tversky', normalize=False,
        #                   tversky_alpha=1.0, tversky_beta=1.0)

        recommender_4 = RP3betaRecommender(urm_train)
        recommender_4.fit(topK=16,
                          alpha=0.03374950051351756,
                          beta=0.24087176329409027,
                          normalize_similarity=True)

        recommender_5 = SLIM_BPR_Cython(urm_train)
        recommender_5.fit(lambda_i=0.0926694015,
                          lambda_j=0.001697250,
                          learning_rate=0.002391,
                          epochs=65,
                          topK=200)

        recommender_6 = ALSRecommender(urm_train)
        recommender_6.fit(alpha=5, iterations=40, reg=0.3)

        self.recommender_1 = recommender_1
        self.recommender_2 = recommender_2
        self.recommender_3 = recommender_3
        self.recommender_4 = recommender_4
        self.recommender_5 = recommender_5
        self.recommender_6 = recommender_6

        if self.eurm:

            if Path(self.data_folder / 'Data/uicm_sparse.npz').is_file():
                print("Previous uicm_sparse found")
                self.score_matrix_1 = sps.load_npz(self.data_folder /
                                                   'Data/uicm_sparse.npz')
            else:
                print("uicm_sparse not found, create new one...")
                self.score_matrix_1 = self.recommender_1._compute_item_matrix_score(
                    np.arange(self.num_users))
                sps.save_npz(self.data_folder / 'Data/uicm_sparse.npz',
                             self.score_matrix_1)

            self.score_matrix_2 = self.recommender_2._compute_item_matrix_score(
                np.arange(self.num_users))
            self.score_matrix_3 = self.recommender_3._compute_item_matrix_score(
                np.arange(self.num_users))
            self.score_matrix_4 = self.recommender_4._compute_item_matrix_score(
                np.arange(self.num_users))
            self.score_matrix_5 = self.recommender_5._compute_item_matrix_score(
                np.arange(self.num_users))
            self.score_matrix_6 = self.recommender_6._compute_item_score(
                np.arange(self.num_users))

            self.score_matrix_2 = normalize(self.score_matrix_2,
                                            norm='l2',
                                            axis=1)
            self.score_matrix_3 = normalize(self.score_matrix_3,
                                            norm='l2',
                                            axis=1)
            self.score_matrix_4 = normalize(self.score_matrix_4,
                                            norm='l2',
                                            axis=1)
            self.score_matrix_5 = normalize(self.score_matrix_5,
                                            norm='l2',
                                            axis=1)
            self.score_matrix_6 = normalize(self.score_matrix_6,
                                            norm='l2',
                                            axis=1)
コード例 #10
0
class LinearHybrid002(BaseItemSimilarityMatrixRecommender):
    RECOMMENDER_NAME = "LinearHybrid002"

    # set the seed equal to the one of the parameter search!!!!
    def __init__(self,
                 URM_train,
                 ICM_train,
                 submission=False,
                 verbose=True,
                 seed=1205):
        super(LinearHybrid002, self).__init__(URM_train, verbose=verbose)
        self.URM_train = URM_train
        self.ICM_train = ICM_train

        # seed 1205: 'topK': 190, 'shrink': 0, 'similarity': 'cosine', 'normalize': True
        self.__rec1 = UserKNNCFRecommender(URM_train, verbose=False)
        self.__rec1_params = {
            'topK': 190,
            'shrink': 0,
            'similarity': 'cosine',
            'normalize': True
        }

        # seed 1205: 'topK': 100, 'shrink': 1000, 'similarity': 'asymmetric', 'normalize': True, 'asymmetric_alpha': 0.0
        self.__rec2 = ItemKNNCFRecommender(URM_train, verbose=False)
        self.__rec2_params = {
            'topK': 100,
            'shrink': 1000,
            'similarity': 'asymmetric',
            'normalize': True,
            'asymmetric_alpha': 0.0
        }

        # seed 1205: 'topK': 205, 'shrink': 1000, 'similarity': 'cosine', 'normalize': True, 'feature_weighting': 'BM25'
        self.__rec3 = ItemKNNCBFRecommender(URM_train,
                                            ICM_train,
                                            verbose=False)
        self.__rec3_params = {
            'topK': 205,
            'shrink': 1000,
            'similarity': 'cosine',
            'normalize': True,
            'feature_weighting': 'BM25'
        }

        self.__a = self.__b = self.__c = None
        self.seed = seed
        self.__submission = submission

    def fit(self, alpha=0.5, l1_ratio=0.5):
        self.__a = alpha * l1_ratio
        self.__b = alpha - self.__a
        self.__c = 1 - self.__a - self.__b
        if not self.__submission:
            try:
                self.__rec1.load_model(
                    f'stored_recommenders/seed_{str(self.seed)}_{self.__rec1.RECOMMENDER_NAME}/',
                    'best_for_LinearHybrid002')
                print(f"{self.__rec1.RECOMMENDER_NAME} loaded.")
            except:
                print(f"Fitting {self.__rec1.RECOMMENDER_NAME} ...")
                self.__rec1.fit(**self.__rec1_params)
                print(f"done.")
                self.__rec1.save_model(
                    f'stored_recommenders/seed_{str(self.seed)}_{self.__rec1.RECOMMENDER_NAME}/',
                    'best_for_LinearHybrid002')

            try:
                self.__rec2.load_model(
                    f'stored_recommenders/seed_{str(self.seed)}_{self.__rec2.RECOMMENDER_NAME}/',
                    'best_for_LinearHybrid002')
                print(f"{self.__rec2.RECOMMENDER_NAME} loaded.")
            except:
                print(f"Fitting {self.__rec2.RECOMMENDER_NAME} ...")
                self.__rec2.fit(**self.__rec2_params)
                print(f"done.")
                self.__rec2.save_model(
                    f'stored_recommenders/seed_{str(self.seed)}_{self.__rec2.RECOMMENDER_NAME}/',
                    'best_for_LinearHybrid002')

            try:
                self.__rec3.load_model(
                    f'stored_recommenders/seed_{str(self.seed)}_{self.__rec3.RECOMMENDER_NAME}/',
                    'best_for_LinearHybrid002')
                print(f"{self.__rec3.RECOMMENDER_NAME} loaded.")
            except:
                print(f"Fitting {self.__rec3.RECOMMENDER_NAME} ...")
                self.__rec3.fit(**self.__rec3_params)
                print(f"done.")
                self.__rec3.save_model(
                    f'stored_recommenders/seed_{str(self.seed)}_{self.__rec3.RECOMMENDER_NAME}/',
                    'best_for_LinearHybrid002')
        else:
            self.__rec1.fit(**self.__rec1_params)
            self.__rec2.fit(**self.__rec2_params)
            self.__rec3.fit(**self.__rec3_params)

    def _compute_item_score(self, user_id_array, items_to_compute=None):

        item_weights_1 = self.__rec1._compute_item_score(user_id_array)
        item_weights_2 = self.__rec2._compute_item_score(user_id_array)
        item_weights_3 = self.__rec3._compute_item_score(user_id_array)

        item_weights = item_weights_1 * self.__a + item_weights_2 * self.__b + item_weights_3 * self.__c

        return item_weights

    def save_model(self, folder_path, file_name=None):
        if file_name is None:
            file_name = self.RECOMMENDER_NAME
        self._print("Saving model in file '{}'".format(folder_path +
                                                       file_name))
        dataIO = DataIO(folder_path=folder_path)
        dataIO.save_data(file_name=file_name, data_dict_to_save={})
        self._print("Saving complete")
コード例 #11
0
    for recommender_class in recommender_list:

        try:

            print("Algorithm: {}".format(recommender_class))

            recommender = recommender_class(URM_train)
            item = ItemKNNCFRecommender(URM_train)

            user = UserKNNCFRecommender(URM_train)

            SLIM = MultiThreadSLIM_ElasticNet(URM_train=URM_train)

            item.fit(topK=800, shrink=10, similarity='cosine', normalize=True)

            user.fit(topK=70, shrink=22, similarity='cosine', normalize=True)

            SLIM.fit(l1_penalty=1e-05,
                     l2_penalty=0,
                     positive_only=True,
                     topK=150,
                     alpha=0.00415637376180466)

            recommender.fit(ICM_Art,
                            ICM_Alb,
                            item=item,
                            user=user,
                            SLIM=SLIM,
                            w_itemcf=1.1,
                            w_usercf=0.6,
                            w_cbart=0.3,