예제 #1
0
class Hybrid3ScoreSubRecommender(BaseRecommender):
    """Hybrid3ScoreSubRecommender recommender"""

    RECOMMENDER_NAME = "Hybrid3ScoreRecommender"

    def __init__(self, data: DataObject):
        super(Hybrid3ScoreSubRecommender, self).__init__(data.urm_train)
        urm = sps.vstack([data.urm_train, data.icm_all_augmented.T])
        urm = urm.tocsr()
        self.slim = SLIMElasticNetRecommender(urm)
        self.rp3 = RP3betaRecommender(self.URM_train)
        self.itemcf = ItemKNNCFRecommender(self.URM_train)
        self.random_seed = data.random_seed



    def fit(self, alpha_beta_ratio=1, alpha_gamma_ratio=1):
        try:
            self.slim.load_model('stored_recommenders/slim_elastic_net/',
                                 f'with_icm_{self.random_seed}_topK=100_l1_ratio=0.04705_alpha=0.00115_positive_only=True_max_iter=35')
        except:
            self.slim.fit(topK=100, l1_ratio=0.04705, alpha=0.00115, positive_only=True, max_iter=35)
            self.slim.save_model('stored_recommenders/slim_elastic_net/',
                                 f'with_icm_{self.random_seed}_topK=100_l1_ratio=0.04705_alpha=0.00115_positive_only=True_max_iter=35')

        self.rp3.fit(topK=20, alpha=0.16, beta=0.24)
        self.itemcf.fit(topK=22, shrink=850, similarity='jaccard', feature_weighting='BM25')

        self.alpha = 1
        self.beta = alpha_beta_ratio
        self.gamma = alpha_gamma_ratio


    def _compute_item_score(self, user_id_array, items_to_compute = None):
        # ATTENTION!
        # THIS METHOD WORKS ONLY IF user_id_array IS A SCALAR AND NOT AN ARRAY
        # TODO

        scores_slim = self.slim._compute_item_score(user_id_array=user_id_array)
        scores_rp3 = self.rp3._compute_item_score(user_id_array=user_id_array)
        scores_itemcf = self.itemcf._compute_item_score(user_id_array=user_id_array)

        # normalization
        slim_max = scores_slim.max()
        rp3_max = scores_rp3.max()
        itemcf_max = scores_itemcf.max()

        if not slim_max == 0:
            scores_slim /= slim_max
        if not rp3_max == 0:
            scores_rp3 /= rp3_max
        if not itemcf_max == 0:
            scores_itemcf /= itemcf_max

        scores_total = self.alpha * scores_slim + self.beta * scores_rp3 + self.gamma * scores_itemcf

        return scores_total
    def fit(self, topK=160, shrink=22, normalize=True):

        SLIM = SLIMElasticNetRecommender(URM_train=self.URM)


        self.W_sparse_SLIM = SLIM.fit(l1_penalty=1e-5, l2_penalty=0, positive_only=True, topK=150, alpha=0.004156373761804666)

        similarity_object_CF = Compute_Similarity_Python(self.URM, shrink=10,
                                                         topK=800, normalize=normalize,
                                                         similarity="cosine")

        self.W_sparse_CF = similarity_object_CF.compute_similarity()

        similarity_object_CF_user = Compute_Similarity_Python(self.URM.T, shrink=0,
                                                              topK=400, normalize=normalize,
                                                              similarity="cosine")

        self.W_sparse_CF_user = similarity_object_CF_user.compute_similarity()
        # self.W_sparse_CF_user = normalize(self.W_sparse_CF_user)

        similarity_object_artist = Compute_Similarity_Python(self.ICM_art.T, shrink=5,
                                                             topK=topK, normalize=normalize,
                                                             similarity="cosine")

        self.W_sparse_art = similarity_object_artist.compute_similarity()

        similarity_object_album = Compute_Similarity_Python(self.ICM_Alb.T, shrink=5,
                                                            topK=topK, normalize=normalize,
                                                            similarity="cosine")

        self.W_sparse_alb = similarity_object_album.compute_similarity()

        # similarity_object_dur = Compute_Similarity_Python(self.ICM_Dur.T, shrink=shrink,
        #                                            topK=topK, normalize=normalize,
        #                                           similarity = similarity)

        #  self.W_sparse_dur = similarity_object_dur.compute_similarity()



        nItems = self.URM.shape[1]
        URMidf = sps.lil_matrix((self.URM.shape[0], self.URM.shape[1]))

        for i in range(0, self.URM.shape[0]):
            IDF_i = log(nItems / np.sum(self.URM[i]))
            URMidf[i] = np.multiply(self.URM[i], IDF_i)

        self.URM = URMidf.tocsr()

        self.URM_SLIM = self.URM.dot(self.W_sparse_SLIM)
        self.URM_CF = self.URM.dot(self.W_sparse_CF)
        self.URM_art = self.URM.dot(self.W_sparse_art)
        self.URM_alb = self.URM.dot(self.W_sparse_alb)
        self.URM_CF_user = self.W_sparse_CF_user.dot(self.URM)

        self.URM_final_hybrid = self.URM_CF *  1.25 + self.URM_art * 0.6 + self.URM_alb * 0.5 + self.URM_CF_user * 0.6 + self.URM_SLIM * 0.9

        self.pen_mask = np.ones(self.URM_final_hybrid.shape[1], dtype=int)
    def split_and_fit(self, random_seed):

        print(random_seed)

        data = self.data

        # Split policy
        new_urm_train = split_with_triple(data.urm_train, self.split_policy)[0]

        # concatenation with ICM
        new_urm_train = sps.vstack([new_urm_train, data.icm_all_augmented.T])

        rec = SLIMElasticNetRecommender(new_urm_train)

        rec.fit(topK=self.topK,
                l1_ratio=self.l1_ratio,
                alpha=self.alpha,
                positive_only=self.positive_only,
                max_iter=self.max_iter)

        rec.URM_train = data.urm_train

        return rec
예제 #4
0
class Hybrid3ScoreRecommender(BaseRecommender):
    """Hybrid3ScoreRecommender recommender"""

    RECOMMENDER_NAME = "Hybrid3ScoreRecommender"

    def __init__(self, data: DataObject, random_seed: int, alpha=1):
        super(Hybrid3ScoreRecommender, self).__init__(data.urm_train)
        self.random_seed = random_seed
        urm = sps.vstack([data.urm_train, data.icm_all_augmented.T])
        urm = urm.tocsr()
        self.slim = SLIMElasticNetRecommender(urm)
        self.rp3 = RP3betaRecommender(urm)
        self.alpha = alpha

    def fit(self, alpha_beta_ratio=1, alpha_gamma_ratio=1):
        try:
            self.slim.load_model(
                'stored_recommenders/slim_elastic_net/',
                f'with_icm_{self.random_seed}_topK=191_l1_ratio=0.0458089_alpha=0.000707_positive_only=True_max_iter=100'
            )
        except:
            self.slim.fit(topK=191,
                          l1_ratio=0.0458089,
                          alpha=0.000707,
                          positive_only=True,
                          max_iter=100)
            self.slim.save_model(
                'stored_recommenders/slim_elastic_net/',
                f'with_icm_{self.random_seed}_topK=191_l1_ratio=0.0458089_alpha=0.000707_positive_only=True_max_iter=100'
            )
        try:
            self.rp3.load_model(
                'stored_recommenders/rp3_beta/',
                f'with_icm_{self.random_seed}_topK=40_alpha=0.4_beta=0.2')
        except:
            self.rp3.fit(topK=40, alpha=0.4, beta=0.2)
            self.rp3.save_model(
                'stored_recommenders/rp3_beta/',
                f'with_icm_{self.random_seed}_topK=40_alpha=0.4_beta=0.2')

        # self.alpha = 1
        self.beta = self.alpha * alpha_beta_ratio
        self.gamma = self.alpha * alpha_gamma_ratio

    def _compute_item_score(self, user_id_array, items_to_compute=None):
        # ATTENTION!
        # THIS METHOD WORKS ONLY IF user_id_array IS A SCALAR AND NOT AN ARRAY
        # TODO

        scores_slim = self.slim._compute_item_score(
            user_id_array=user_id_array)
        scores_rp3 = self.rp3._compute_item_score(user_id_array=user_id_array)

        # normalization
        slim_max = scores_slim.max()
        rp3_max = scores_rp3.max()

        if not slim_max == 0:
            scores_slim /= slim_max
        if not rp3_max == 0:
            scores_rp3 /= rp3_max

        scores_total = self.alpha * scores_slim + self.beta * scores_rp3

        return scores_total
예제 #5
0
class LinearHybrid001(BaseItemSimilarityMatrixRecommender):
    RECOMMENDER_NAME = "LinearHybrid001"

    # set the seed equal to the one of the parameter search!!!!
    def __init__(self, URM_train, ICM_train, submission=False, verbose=True, seed=1205):
        super(LinearHybrid001, self).__init__(URM_train, verbose = verbose)
        self.URM_train = URM_train
        self.ICM_train = ICM_train

        self.__rec1 = SLIMElasticNetRecommender(URM_train, verbose=False)
        self.__rec1_params = {'topK': 120, 'l1_ratio': 1e-5, 'alpha': 0.066}

        # seed 1205: 'topK': 620, 'shrink': 121, 'similarity': 'asymmetric', 'normalize': True, 'asymmetric_alpha': 0.5526988987666924
        self.__rec2 = ItemKNNCFRecommender(URM_train, verbose=False)
        self.__rec2_params = {'topK': 620, 'shrink': 121, 'similarity': 'asymmetric', 'normalize': True, 'asymmetric_alpha': 0.5526988987666924}

        # seed 1205: 'topK': 115, 'shrink': 1000, 'similarity': 'cosine', 'normalize': True, 'feature_weighting': 'BM25'
        self.__rec3 = ItemKNNCBFRecommender(URM_train, ICM_train, verbose=False)
        self.__rec3_params = {'topK': 115, 'shrink': 1000, 'similarity': 'cosine', 'normalize': True, 'feature_weighting': 'BM25'}

        self.__a = self.__b = self.__c = None
        self.seed=seed
        self.__submission=submission

    def fit(self, alpha=0.5, l1_ratio=0.5):
        self.__a = alpha * l1_ratio
        self.__b = alpha - self.__a
        self.__c = 1 - self.__a - self.__b
        if not self.__submission:
            try:
                self.__rec1.load_model('stored_recommenders/'+self.__rec1.RECOMMENDER_NAME+'/', f'seed_{str(self.seed)}_best_for_LinearHybrid001')
                print(f"{self.__rec1.RECOMMENDER_NAME} loaded.")
            except:
                print(f"Fitting {self.__rec1.RECOMMENDER_NAME} ...")
                self.__rec1.fit(**self.__rec1_params)
                print(f"done.")
                self.__rec1.save_model('stored_recommenders/'+self.__rec1.RECOMMENDER_NAME+'/', f'seed_{str(self.seed)}_best_for_LinearHybrid001')

            try:
                self.__rec2.load_model('stored_recommenders/'+self.__rec2.RECOMMENDER_NAME+'/', f'seed_{str(self.seed)}_best_for_LinearHybrid001')
                print(f"{self.__rec2.RECOMMENDER_NAME} loaded.")
            except:
                print(f"Fitting {self.__rec2.RECOMMENDER_NAME} ...")
                self.__rec2.fit(**self.__rec2_params)
                print(f"done.")
                self.__rec2.save_model('stored_recommenders/'+self.__rec2.RECOMMENDER_NAME+'/', f'seed_{str(self.seed)}_best_for_LinearHybrid001')

            try:
                self.__rec3.load_model('stored_recommenders/'+self.__rec3.RECOMMENDER_NAME+'/', f'seed_{str(self.seed)}_best_for_LinearHybrid001')
                print(f"{self.__rec3.RECOMMENDER_NAME} loaded.")
            except:
                print(f"Fitting {self.__rec3.RECOMMENDER_NAME} ...")
                self.__rec3.fit(**self.__rec3_params)
                print(f"done.")
                self.__rec3.save_model('stored_recommenders/'+self.__rec3.RECOMMENDER_NAME+'/', f'seed_{str(self.seed)}_best_for_LinearHybrid001')
        else:
            self.__rec1.fit(**self.__rec1_params)
            self.__rec2.fit(**self.__rec2_params)
            self.__rec3.fit(**self.__rec3_params)

    def _compute_item_score(self, user_id_array, items_to_compute=None):

        item_weights_1 = self.__rec1._compute_item_score(user_id_array)
        item_weights_2 = self.__rec2._compute_item_score(user_id_array)
        item_weights_3 = self.__rec3._compute_item_score(user_id_array)

        item_weights = item_weights_1 * self.__a + item_weights_2 * self.__b + item_weights_3 * self.__c

        return item_weights

    def save_model(self, folder_path, file_name = None):
        if file_name is None:
            file_name = self.RECOMMENDER_NAME

        self._print("Saving model in file '{}'".format(folder_path + file_name))
        dataIO = DataIO(folder_path=folder_path)
        dataIO.save_data(file_name=file_name, data_dict_to_save = {})
        self._print("Saving complete")
예제 #6
0
    def __init__(self,
                 URM_train,
                 ICM_train,
                 submission=False,
                 verbose=True,
                 seed=1205):
        super(LinearOverMerged001, self).__init__(URM_train, verbose=verbose)
        self.URM_train = URM_train
        self.ICM_train = ICM_train
        self.__submission = submission
        self.__rec1 = UserKNNCFRecommender(URM_train, verbose=False)
        self.__rec1_params = {
            'topK': 190,
            'shrink': 0,
            'similarity': 'cosine',
            'normalize': True
        }
        self.seed = seed

        icb = ItemKNNCBFRecommender(URM_train, ICM_train, verbose=False)
        icb_params = {
            'topK': 65,
            'shrink': 0,
            'similarity': 'dice',
            'normalize': True
        }
        rp3b = RP3betaRecommender(URM_train, verbose=False)
        rp3b_params = {
            'topK': 1000,
            'alpha': 0.38192761611274967,
            'beta': 0.0,
            'normalize_similarity': False
        }
        sen = SLIMElasticNetRecommender(URM_train, verbose=False)
        sen_params = {
            'topK': 992,
            'l1_ratio': 0.004065081925341167,
            'alpha': 0.003725005053334143
        }

        if not self.__submission:
            try:
                icb.load_model(
                    f'stored_recommenders/seed_{str(self.seed)}_{icb.RECOMMENDER_NAME}/',
                    f'best_for_{self.RECOMMENDER_NAME}')
                print(f"{icb.RECOMMENDER_NAME} loaded.")
            except:
                print(f"Fitting {icb.RECOMMENDER_NAME} ...")
                icb.fit(**icb_params)
                print(f"done.")
                icb.save_model(
                    f'stored_recommenders/seed_{str(self.seed)}_{icb.RECOMMENDER_NAME}/',
                    f'best_for_{self.RECOMMENDER_NAME}')
            try:
                rp3b.load_model(
                    f'stored_recommenders/seed_{str(self.seed)}_{rp3b.RECOMMENDER_NAME}/',
                    f'best_for_{self.RECOMMENDER_NAME}')
                print(f"{rp3b.RECOMMENDER_NAME} loaded.")
            except:
                print(f"Fitting {rp3b.RECOMMENDER_NAME} ...")
                rp3b.fit(**rp3b_params)
                print(f"done.")
                rp3b.save_model(
                    f'stored_recommenders/seed_{str(self.seed)}_{rp3b.RECOMMENDER_NAME}/',
                    f'best_for_{self.RECOMMENDER_NAME}')
            try:
                sen.load_model(
                    f'stored_recommenders/seed_{str(self.seed)}_{sen.RECOMMENDER_NAME}/',
                    f'best_for_{self.RECOMMENDER_NAME}')
                print(f"{sen.RECOMMENDER_NAME} loaded.")
            except:
                print(f"Fitting {sen.RECOMMENDER_NAME} ...")
                sen.fit(**sen_params)
                print(f"done.")
                sen.save_model(
                    f'stored_recommenders/seed_{str(self.seed)}_{sen.RECOMMENDER_NAME}/',
                    f'best_for_{self.RECOMMENDER_NAME}')
        else:
            icb.fit(**icb_params)
            rp3b.fit(**rp3b_params)
            sen.fit(**sen_params)

        self.__rec2 = HiddenMergedRecommender(URM_train,
                                              ICM_train, [icb, rp3b, sen],
                                              verbose=False)
        self.__rec2_params = {
            'alpha': 0.6355738550417837,
            'l1_ratio': 0.6617849709204384,
            'topK': 538
        }

        self.__a = self.__b = None
예제 #7
0
if __name__ == '__main__':
    seed = 1205

    parser = DataParser()
    URM_all = parser.get_URM_all()
    ICM_all = parser.get_ICM_all()

    URM_train, URM_test = split_train_in_two_percentage_global_sample(
        URM_all, train_percentage=0.85, seed=seed)

    evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[10])
    rec1 = ItemKNNCBFRecommender(URM_train, ICM_all)
    rec2 = SLIMElasticNetRecommender(URM_train)

    # 'topK': 40, 'shrink': 1000, 'similarity': 'cosine', 'normalize': True, 'feature_weighting': 'BM25'
    rec1.fit(topK=40,
             shrink=1000,
             similarity='cosine',
             feature_weighting='BM25')

    # topK': 140, 'l1_ratio': 1e-05, 'alpha': 0.386
    rec2.fit(topK=140, l1_ratio=1e-5, alpha=0.386)
    print("recomenders are ready")
    merged_recommender = MergedHybrid000(URM_train,
                                         content_recommender=rec1,
                                         collaborative_recommender=rec2)
    for alpha in np.arange(0, 1, 0.1):
        merged_recommender.fit(alpha)
        result, _ = evaluator_test.evaluateRecommender(merged_recommender)
        print(alpha, result[10]['MAP'])
def read_data_split_and_search():
    """
    This function provides a simple example on how to tune parameters of a given algorithm

    The BayesianSearch object will save:
        - A .txt file with all the cases explored and the recommendation quality
        - A _best_model file which contains the trained model and can be loaded with recommender.load_model()
        - A _best_parameter file which contains a dictionary with all the fit parameters, it can be passed to recommender.fit(**_best_parameter)
        - A _best_result_validation file which contains a dictionary with the results of the best solution on the validation
        - A _best_result_test file which contains a dictionary with the results, on the test set, of the best solution chosen using the validation set
    """

    seed = 1205
    parser = DataParser()

    URM_all = parser.get_URM_all()
    ICM_obj = parser.get_ICM_all()

    # SPLIT TO GET TEST PARTITION
    URM_train, URM_test = split_train_in_two_percentage_global_sample(
        URM_all, train_percentage=0.90, seed=seed)

    # SPLIT TO GET THE HYBRID VALID PARTITION
    URM_train, URM_valid_hybrid = split_train_in_two_percentage_global_sample(
        URM_train, train_percentage=0.85, seed=seed)

    # SPLIT TO GET THE sub_rec VALID PARTITION
    URM_train_bis, URM_valid_sub = split_train_in_two_percentage_global_sample(
        URM_train, train_percentage=0.85, seed=seed)

    collaborative_algorithm_list = [
        #EASE_R_Recommender
        #PipeHybrid001,
        #Random,
        #TopPop,
        #P3alphaRecommender,
        #RP3betaRecommender,
        #ItemKNNCFRecommender,
        #UserKNNCFRecommender,
        #MatrixFactorization_BPR_Cython,
        #MatrixFactorization_FunkSVD_Cython,
        #PureSVDRecommender,
        #NMFRecommender,
        #PureSVDItemRecommender
        #SLIM_BPR_Cython,
        #SLIMElasticNetRecommender
        #IALSRecommender
        #MF_MSE_PyTorch
        #MergedHybrid000
        #LinearHybrid002ggg
        HybridCombinationSearch
    ]

    content_algorithm_list = [
        #ItemKNNCBFRecommender
    ]

    from Base.Evaluation.Evaluator import EvaluatorHoldout

    evaluator_valid_sub = EvaluatorHoldout(URM_valid_sub, cutoff_list=[10])
    evaluator_valid_hybrid = EvaluatorHoldout(URM_valid_hybrid,
                                              cutoff_list=[10])
    evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[10])
    """
        # TODO: setta I GIUSTI EVALUATOR QUI!!!!
    runParameterSearch_Content_partial = partial(runParameterSearch_Content,
                                                 URM_train=URM_train,
                                                 ICM_object=ICM_obj,
                                                 ICM_name='1BookFeatures',
                                                 n_cases = 50,
                                                 n_random_starts = 20,
                                                 evaluator_validation= evaluator_valid_sub,
                                                 evaluator_test = evaluator_valid_hybrid,
                                                 metric_to_optimize = "MAP",
                                                 output_folder_path=output_folder_path,
                                                 parallelizeKNN = False,
                                                 allow_weighting = True,
                                                 #similarity_type_list = ['cosine']
                                                 )
    pool = multiprocessing.Pool(processes=int(multiprocessing.cpu_count()), maxtasksperchild=1)
    pool.map(runParameterSearch_Content_partial, content_algorithm_list)
    """
    print("Rp3beta training...")
    rp3b = RP3betaRecommender(URM_train, verbose=False)
    rp3b_params = {
        'topK': 1000,
        'alpha': 0.38192761611274967,
        'beta': 0.0,
        'normalize_similarity': False
    }
    rp3b.fit(**rp3b_params)
    print("Done")
    print("P3alpha training...")
    p3a = P3alphaRecommender(URM_train, verbose=False)
    p3a_params = {
        'topK': 131,
        'alpha': 0.33660811631883863,
        'normalize_similarity': False
    }
    p3a.fit(**p3a_params)
    print("Done")
    print("ItemKnnCF training...")
    icf = ItemKNNCFRecommender(URM_train, verbose=False)
    icf_params = {
        'topK': 100,
        'shrink': 1000,
        'similarity': 'asymmetric',
        'normalize': True,
        'asymmetric_alpha': 0.0
    }
    icf.fit(**icf_params)
    print("Done")
    print("UserKnnCF training...")
    ucf = UserKNNCFRecommender(URM_train, verbose=False)
    ucf_params = {
        'topK': 190,
        'shrink': 0,
        'similarity': 'cosine',
        'normalize': True
    }
    ucf.fit(**ucf_params)
    print("Done")
    print("ItemKnnCBF training...")
    icb = ItemKNNCBFRecommender(URM_train, ICM_obj, verbose=False)
    icb_params = {
        'topK': 205,
        'shrink': 1000,
        'similarity': 'cosine',
        'normalize': True,
        'feature_weighting': 'BM25'
    }
    icb.fit(**icb_params)
    print("Done")
    print("SlimBPR training...")
    sbpr = SLIM_BPR_Cython(URM_train, verbose=False)
    sbpr_params = {
        'topK': 979,
        'epochs': 130,
        'symmetric': False,
        'sgd_mode': 'adam',
        'lambda_i': 0.004947329669424629,
        'lambda_j': 1.1534760845071758e-05,
        'learning_rate': 0.0001
    }
    sbpr.fit(**sbpr_params)
    print("Done")
    print("SlimElasticNet training...")
    sen = SLIMElasticNetRecommender(URM_train, verbose=False)
    sen_params = {
        'topK': 992,
        'l1_ratio': 0.004065081925341167,
        'alpha': 0.003725005053334143
    }
    sen.fit(**sen_params)
    print("Done")

    list_recommender = [rp3b, p3a, icf, ucf, icb, sen, sbpr]
    list_already_seen = [rp3b, p3a, icf, ucf, icb]

    for rec_perm in combinations(list_recommender, 3):

        if rec_perm not in combinations(list_already_seen, 3):

            recommender_names = '_'.join(
                [r.RECOMMENDER_NAME for r in rec_perm])
            output_folder_path = "result_experiments_v3/seed_" + str(
                seed) + '/' + recommender_names + '/'

            # If directory does not exist, create
            if not os.path.exists(output_folder_path):
                os.makedirs(output_folder_path)

            # TODO: setta I GIUSTI EVALUATOR QUI!!!!
            runParameterSearch_Collaborative_partial = partial(
                runParameterSearch_Collaborative,
                URM_train=URM_train,
                ICM_train=ICM_obj,
                metric_to_optimize="MAP",
                n_cases=50,
                n_random_starts=20,
                evaluator_validation_earlystopping=evaluator_valid_hybrid,
                evaluator_validation=evaluator_valid_hybrid,
                evaluator_test=evaluator_test,
                output_folder_path=output_folder_path,
                allow_weighting=False,
                #similarity_type_list = ["cosine", 'jaccard'],
                parallelizeKNN=False,
                list_rec=rec_perm)
            pool = multiprocessing.Pool(processes=int(
                multiprocessing.cpu_count()),
                                        maxtasksperchild=1)
            pool.map(runParameterSearch_Collaborative_partial,
                     collaborative_algorithm_list)
def read_data_split_and_search():
    """
    This function provides a simple example on how to tune parameters of a given algorithm

    The BayesianSearch object will save:
        - A .txt file with all the cases explored and the recommendation quality
        - A _best_model file which contains the trained model and can be loaded with recommender.load_model()
        - A _best_parameter file which contains a dictionary with all the fit parameters, it can be passed to recommender.fit(**_best_parameter)
        - A _best_result_validation file which contains a dictionary with the results of the best solution on the validation
        - A _best_result_test file which contains a dictionary with the results, on the test set, of the best solution chosen using the validation set
    """

    seed = 1205
    parser = DataParser()

    URM_all = parser.get_URM_all()
    ICM_obj = parser.get_ICM_all()

    # SPLIT TO GET TEST PARTITION
    URM_train, URM_test = split_train_in_two_percentage_global_sample(URM_all, train_percentage=0.90, seed=seed)

    # SPLIT TO GET THE HYBRID VALID PARTITION
    URM_train, URM_valid_hybrid = split_train_in_two_percentage_global_sample(URM_train, train_percentage=0.85,
                                                                              seed=seed)

    collaborative_algorithm_list = [
        # EASE_R_Recommender
        # PipeHybrid001,
        # Random,
        # TopPop,
        # P3alphaRecommender,
        # RP3betaRecommender,
        # ItemKNNCFRecommender,
        # UserKNNCFRecommender,
        # MatrixFactorization_BPR_Cython,
        # MatrixFactorization_FunkSVD_Cython,
        # PureSVDRecommender,
        # NMFRecommender,
        # PureSVDItemRecommender
        # SLIM_BPR_Cython,
        # SLIMElasticNetRecommender
        # IALSRecommender
        # MF_MSE_PyTorch
        # MergedHybrid000
        # LinearHybrid002ggg
        HybridCombinationSearch
    ]

    content_algorithm_list = [
        # ItemKNNCBFRecommender
    ]

    from Base.Evaluation.Evaluator import EvaluatorHoldout

    evaluator_valid_hybrid = EvaluatorHoldout(URM_valid_hybrid, cutoff_list=[10])
    evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[10])

    """
    earlystopping_keywargs = {"validation_every_n": 5,
                              "stop_on_validation": True,
                              "evaluator_object": evaluator_valid_hybrid,
                              "lower_validations_allowed": 5,
                              "validation_metric": 'MAP',
                              }

    print('IALS training...')
    ials = IALSRecommender(URM_train, verbose=False)
    ials_params = {'num_factors': 83, 'confidence_scaling': 'linear', 'alpha': 28.4278070726612,
                   'epsilon': 1.0234211788885077, 'reg': 0.0027328110246575004, 'epochs': 20}
    ials.fit(**ials_params, **earlystopping_keywargs)
    print("Done")


    print("PureSVD training...")
    psvd = PureSVDRecommender(URM_train, verbose=False)
    psvd_params = {'num_factors': 711}
    psvd.fit(**psvd_params)
    print("Done")
    """

    rp3b = RP3betaRecommender(URM_train, verbose=False)
    rp3b_params = {'topK': 1000, 'alpha': 0.38192761611274967, 'beta': 0.0, 'normalize_similarity': False}
    try:
        rp3b.load_model(f'stored_recommenders/seed_{str(seed)}_hybrid_search/',
                        f'{rp3b.RECOMMENDER_NAME}_for_second_search')
        print(f"{rp3b.RECOMMENDER_NAME} loaded.")
    except:
        print(f"Fitting {rp3b.RECOMMENDER_NAME} ...")
        rp3b.fit(**rp3b_params)
        print(f"done.")
        rp3b.save_model(f'stored_recommenders/seed_{str(seed)}_hybrid_search/',
                        f'{rp3b.RECOMMENDER_NAME}_for_second_search')

    p3a = P3alphaRecommender(URM_train, verbose=False)
    p3a_params = {'topK': 131, 'alpha': 0.33660811631883863, 'normalize_similarity': False}
    try:
        p3a.load_model(f'stored_recommenders/seed_{str(seed)}_hybrid_search/',
                       f'{p3a.RECOMMENDER_NAME}_for_second_search')
        print(f"{p3a.RECOMMENDER_NAME} loaded.")
    except:
        print(f"Fitting {p3a.RECOMMENDER_NAME} ...")
        p3a.fit(**p3a_params)
        print(f"done.")
        p3a.save_model(f'stored_recommenders/seed_{str(seed)}_hybrid_search/',
                       f'{p3a.RECOMMENDER_NAME}_for_second_search')

    icf = ItemKNNCFRecommender(URM_train, verbose=False)
    icf_params = {'topK': 55, 'shrink': 1000, 'similarity': 'asymmetric', 'normalize': True, 'asymmetric_alpha': 0.0}
    try:
        icf.load_model(f'stored_recommenders/seed_{str(seed)}_hybrid_search/',
                       f'{icf.RECOMMENDER_NAME}_for_second_search')
        print(f"{icf.RECOMMENDER_NAME} loaded.")
    except:
        print(f"Fitting {icf.RECOMMENDER_NAME} ...")
        icf.fit(**icf_params)
        print(f"done.")
        icf.save_model(f'stored_recommenders/seed_{str(seed)}_hybrid_search/',
                       f'{icf.RECOMMENDER_NAME}_for_second_search')

    ucf = UserKNNCFRecommender(URM_train, verbose=False)
    ucf_params = {'topK': 190, 'shrink': 0, 'similarity': 'cosine', 'normalize': True}
    try:
        ucf.load_model(f'stored_recommenders/seed_{str(seed)}_hybrid_search/',
                       f'{ucf.RECOMMENDER_NAME}_for_second_search')
        print(f"{ucf.RECOMMENDER_NAME} loaded.")
    except:
        print(f"Fitting {ucf.RECOMMENDER_NAME} ...")
        ucf.fit(**ucf_params)
        print(f"done.")
        ucf.save_model(f'stored_recommenders/seed_{str(seed)}_hybrid_search/',
                       f'{ucf.RECOMMENDER_NAME}_for_second_search')

    icb = ItemKNNCBFRecommender(URM_train, ICM_obj, verbose=False)
    icb_params = {'topK': 65, 'shrink': 0, 'similarity': 'dice', 'normalize': True}
    try:
        icb.load_model(f'stored_recommenders/seed_{str(seed)}_hybrid_search/',
                       f'{icb.RECOMMENDER_NAME}_for_second_search')
        print(f"{icb.RECOMMENDER_NAME} loaded.")
    except:
        print(f"Fitting {icf.RECOMMENDER_NAME} ...")
        icb.fit(**icb_params)
        print(f"done.")
        icb.save_model(f'stored_recommenders/seed_{str(seed)}_hybrid_search/',
                       f'{icb.RECOMMENDER_NAME}_for_second_search')

    sen = SLIMElasticNetRecommender(URM_train, verbose=False)
    sen_params = {'topK': 992, 'l1_ratio': 0.004065081925341167, 'alpha': 0.003725005053334143}
    try:
        sen.load_model(f'stored_recommenders/seed_{str(seed)}_hybrid_search/',
                       f'{sen.RECOMMENDER_NAME}_for_second_search')
        print(f"{sen.RECOMMENDER_NAME} loaded.")
    except:
        print(f"Fitting {sen.RECOMMENDER_NAME} ...")
        sen.fit(**sen_params)
        print(f"done.")
        sen.save_model(f'stored_recommenders/seed_{str(seed)}_hybrid_search/',
                       f'{sen.RECOMMENDER_NAME}_for_second_search')

    print("\nStart.")
    list_recommender = [icb, icf, ucf, p3a, rp3b, sen]
    list_already_seen = []
    combinations_already_seen = []
    """
    (icb, icf, p3a), (icb, icf, rp3b), (icb, icf, sen), (icb, p3a, rp3b), (icb, p3a, sen),
                                (icb, rp3b, sen), (icf, p3a, rp3b), (icf, p3a, sen)
    """

    for rec_perm in combinations(list_recommender, 3):

        if rec_perm not in combinations_already_seen:

            recommender_names = '_'.join([r.RECOMMENDER_NAME for r in rec_perm])
            output_folder_path = "result_experiments_v3/seed_" + str(
                seed) + '/linear_combination/' + recommender_names + '/'
            print(F"\nTESTING THE COMBO {recommender_names}")

            # If directory does not exist, create
            if not os.path.exists(output_folder_path):
                os.makedirs(output_folder_path)

            # TODO: setta I GIUSTI EVALUATOR QUI!!!!
            runParameterSearch_Collaborative_partial = partial(runParameterSearch_Collaborative,
                                                               URM_train=URM_train,
                                                               ICM_train=ICM_obj,
                                                               metric_to_optimize="MAP",
                                                               n_cases=50,
                                                               n_random_starts=20,
                                                               evaluator_validation_earlystopping=evaluator_valid_hybrid,
                                                               evaluator_validation=evaluator_valid_hybrid,
                                                               #evaluator_test=evaluator_test,
                                                               output_folder_path=output_folder_path,
                                                               allow_weighting=False,
                                                               # similarity_type_list = ["cosine", 'jaccard'],
                                                               parallelizeKNN=False,
                                                               list_rec=rec_perm)
            pool = multiprocessing.Pool(processes=int(multiprocessing.cpu_count()), maxtasksperchild=1)
            pool.map(runParameterSearch_Collaborative_partial, collaborative_algorithm_list)
예제 #10
0
import numpy as np
import os
import scipy.sparse as sps
from DataParser import DataParser
from Data_manager.split_functions.split_train_validation_random_holdout import \
    split_train_in_two_percentage_global_sample

from SLIM_ElasticNet.SLIMElasticNetRecommender import SLIMElasticNetRecommender

if __name__ == '__main__':
    parser = DataParser()
    URM_all = parser.get_URM_all()
    random_seed = 1205
    URM_train, URM_test = split_train_in_two_percentage_global_sample(URM_all, train_percentage=0.85, seed=random_seed)
    slim = SLIMElasticNetRecommender(URM_train)
    slim.fit(topK=140, l1_ratio=1e-5, alpha=0.386)
    slim.save_model('stored_recommenders/slim_elastic_net/',
                    f'best_{random_seed}_23_10_20')