def __init__(self, urm_train, eurm=False):
        super(HybridGenRecommender, self).__init__(urm_train)

        self.data_folder = Path(__file__).parent.parent.absolute()

        self.eurm = eurm

        self.num_users = urm_train.shape[0]
        data = DataManager()

        urm_train = check_matrix(urm_train.copy(), 'csr')
        icm_price, icm_asset, icm_sub, icm_all = data.get_icm()
        ucm_age, ucm_region, ucm_all = data.get_ucm()

        recommender_1 = ItemKNNCBFRecommender(urm_train, icm_all)
        recommender_1.fit(shrink=40, topK=20, feature_weighting='BM25')

        # recommender_2 = UserKNNCBFRecommender(urm_train, ucm_all)
        # recommender_2.fit(shrink=500, topK=1600, normalize=True)

        recommender_2 = UserKNNCBFRecommender(urm_train, ucm_all)
        recommender_2.fit(shrink=1777,
                          topK=1998,
                          similarity='tversky',
                          feature_weighting='BM25',
                          tversky_alpha=0.1604953616,
                          tversky_beta=0.9862348646)

        self.recommender_1 = recommender_1
        self.recommender_2 = recommender_2
    def __init__(self,
                 data: DataObject,
                 k: int,
                 leave_k_out=0,
                 threshold=0,
                 probability=0.2):
        super(Hybrid400AlphaRecommender, self).__init__(data.urm_train)
        self.data = data
        self.max_cutoff = 30

        rec = ItemKNNCBFRecommender(data.urm_train, data.icm_all_augmented)
        rec.fit(topK=10)

        print(f"Not augmented {data.augmented_urm.nnz}")

        data.augmented_urm = augment_with_item_similarity_best_scores(
            data.augmented_urm,
            rec.W_sparse,
            500,
            value=0.3,
            remove_seen=False)
        # print(f"After User CBF {data.augmented_urm.nnz}")
        # data.augmented_urm = augment_with_best_recommended_items(data.augmented_urm, rec,
        #                                                          data.urm_train_users_by_type[1][1], 1, value=0.2)
        #
        print(f"After Item CBF {data.augmented_urm.nnz}")

        rec = Hybrid100AlphaRecommender(data)
        rec.fit()

        data.augmented_urm = augment_with_best_recommended_items(
            data.augmented_urm,
            rec,
            data.urm_train_users_by_type[0][1],
            1,
            value=1)
        # data.augmented_urm = augment_with_best_recommended_items(data.augmented_urm, rec,
        #                                                          data.ids_warm_train_users, 2, value=0.1)
        print(f"After User CBF {data.augmented_urm.nnz}")
        # print(f"After User CBF {data.augmented_urm.nnz}")

        rec = None

        recs = Parallel(n_jobs=6)(delayed(
            par(data,
                leave_k_out=leave_k_out,
                threshold=threshold,
                probability=probability).split_and_fit)(i) for i in range(k))
        self.hybrid_rec = Hybrid1CXAlphaRecommender(
            data,
            recommenders=recs,
            recommended_users=data.ids_user,
            max_cutoff=self.max_cutoff)
        self.hybrid_rec.weights = np.array([
            np.sqrt(np.array(fib(30)[::-1])).astype(np.int).tolist()
            for _ in range(k)
        ])
Example #3
0
class ItemKNNCBFOnlyColdRecommender(BaseSimilarityMatrixRecommender):

    def __init__(self, data):
        super(ItemKNNCBFOnlyColdRecommender, self).__init__(data.urm_train)
        self.rec = ItemKNNCBFRecommender(data.urm_train, data.icm_all_augmented)
        self.cold_item = np.concatenate((data.ids_cold_train_items, data.ids_cold_item))
        self.data = data

    def _compute_item_score_postprocess_for_cold_items(self, item_scores):
        """
        In CBF no cold items are to be removed
        :param item_scores:
        :return:
        """

        return item_scores

    def fit(self, topK=50, shrink=100, similarity='cosine', normalize=True, feature_weighting="none",
            **similarity_args):
        self.rec.fit(topK=topK, shrink=shrink, similarity=similarity, normalize=normalize,
                     feature_weighting=feature_weighting, **similarity_args)

    # def recommend(self, user_id_array, cutoff=None, remove_seen_flag=True, items_to_compute=None,
    #               remove_top_pop_flag=False, remove_CustomItems_flag=False, return_scores=False):
    #     recommended_items = self.rec.recommend(user_id_array=user_id_array, cutoff=cutoff, remove_seen_flag=False)
    #     recommended_items = np.array([x for x in recommended_items if
    #                                   x in self.cold_item and x not in self.data.urm_train[user_id_array].indices])
    #     recommended_items = np.pad(recommended_items, (0, cutoff - recommended_items.shape[0]), 'constant', constant_values=-1)
    #     return recommended_items

    # no pad
    def recommend(self, user_id_array, cutoff=None, remove_seen_flag=True, items_to_compute=None,
                         remove_top_pop_flag=False, remove_CustomItems_flag=False, return_scores=False):
        recommended_items = self.rec.recommend(user_id_array=user_id_array, cutoff=cutoff, remove_seen_flag=False)
        recommended_items = np.array([x for x in recommended_items if
                                      x in self.cold_item and x not in self.data.urm_train[user_id_array].indices])
        return recommended_items
Example #4
0
class PipeHybrid001(RP3betaRecommender):
    RECOMMENDER_NAME = "PipeHybrid001"

    def __init__(self, URM_train, ICM_train,verbose=True):
        super(PipeHybrid001, self).__init__(URM_train, verbose = verbose)
        self.URM_train_recommendation = URM_train
        self.ICM_train = ICM_train
        self.__content_recommender = ItemKNNCBFRecommender(URM_train, ICM_train)
        #print("fitting ItemKNNCBF...")
        try:
            self.__content_recommender.load_model('stored_recommenders/ItemKNNCBFRecommender/best_at_26_10_20')
        except:
            self.__content_recommender.fit(topK=140, shrink=1000, similarity='cosine', normalize=True,
                                           feature_weighting='BM25')  # best parameter up to now
            self.__content_recommender.save_model('stored_recommenders/ItemKNNCBFRecommender/best_at_26_10_20')

        #print("... done")

        #print(f"URM_train shape: {URM_train.shape}")
        #print(f"W_sparse knn shape: {self.__content_recommender.W_sparse.shape}")

        self.URM_train = URM_train.dot(self.__content_recommender.W_sparse)
        self._URM_train_format_checked = False
        self._W_sparse_format_checked = False

    def recommend(self, user_id_array, cutoff = None, remove_seen_flag=True, items_to_compute = None,
                  remove_top_pop_flag = False, remove_custom_items_flag = False, return_scores = False):
        """
        redefinition using self.URM_train_recommendation, not the new URM train of the RP3Beta algorithm
        """
        # If is a scalar transform it in a 1-cell array
        if np.isscalar(user_id_array):
            user_id_array = np.atleast_1d(user_id_array)
            single_user = True
        else:
            single_user = False

        if cutoff is None:
            cutoff = self.URM_train_recommendation.shape[1] - 1

        # Compute the scores using the model-specific function
        # Vectorize over all users in user_id_array
        scores_batch = self._compute_item_score(user_id_array, items_to_compute=items_to_compute)


        for user_index in range(len(user_id_array)):

            user_id = user_id_array[user_index]

            if remove_seen_flag:
                scores_batch[user_index,:] = self._remove_seen_on_scores(user_id, scores_batch[user_index, :])

            # Sorting is done in three steps. Faster then plain np.argsort for higher number of items
            # - Partition the data to extract the set of relevant items
            # - Sort only the relevant items
            # - Get the original item index
            # relevant_items_partition = (-scores_user).argpartition(cutoff)[0:cutoff]
            # relevant_items_partition_sorting = np.argsort(-scores_user[relevant_items_partition])
            # ranking = relevant_items_partition[relevant_items_partition_sorting]
            #
            # ranking_list.append(ranking)

        if remove_top_pop_flag:
            scores_batch = self._remove_TopPop_on_scores(scores_batch)

        if remove_custom_items_flag:
            scores_batch = self._remove_custom_items_on_scores(scores_batch)

        # relevant_items_partition is block_size x cutoff
        relevant_items_partition = (-scores_batch).argpartition(cutoff, axis=1)[:,0:cutoff]

        # Get original value and sort it
        # [:, None] adds 1 dimension to the array, from (block_size,) to (block_size,1)
        # This is done to correctly get scores_batch value as [row, relevant_items_partition[row,:]]
        relevant_items_partition_original_value = scores_batch[np.arange(scores_batch.shape[0])[:, None], relevant_items_partition]
        relevant_items_partition_sorting = np.argsort(-relevant_items_partition_original_value, axis=1)
        ranking = relevant_items_partition[np.arange(relevant_items_partition.shape[0])[:, None], relevant_items_partition_sorting]

        ranking_list = [None] * ranking.shape[0]

        # Remove from the recommendation list any item that has a -inf score
        # Since -inf is a flag to indicate an item to remove
        for user_index in range(len(user_id_array)):
            user_recommendation_list = ranking[user_index]
            user_item_scores = scores_batch[user_index, user_recommendation_list]

            not_inf_scores_mask = np.logical_not(np.isinf(user_item_scores))

            user_recommendation_list = user_recommendation_list[not_inf_scores_mask]
            ranking_list[user_index] = user_recommendation_list.tolist()

            #TEST
            """
            user_profile_array = self.URM_train[user_id_array[user_index]]
            if np.empty(user_profile_array):
                print(f"WARNING! {user_index} is a cold user!")
                rec = TopPop(URM_train)
                rec.fit()
                ranking_list[user_index]=rec.recommend([user_id_array[user_index]], cutoff=cutoff)
            """


        # Return single list for one user, instead of list of lists
        if single_user:
            ranking_list = ranking_list[0]

        if return_scores:
            return ranking_list, scores_batch

        else:
            return ranking_list
Example #5
0
        print(f"{ucf.RECOMMENDER_NAME} loaded.")
    except:
        print(f"Fitting {ucf.RECOMMENDER_NAME} ...")
        ucf.fit(**ucf_params)
        print(f"done.")
        ucf.save_model(f'stored_recommenders/seed_{str(seed)}_{ucf.RECOMMENDER_NAME}/', 'for_notebook_analysis')

    icb = ItemKNNCBFRecommender(URM_train, ICM_all, verbose=False)

    icb_params = {'topK': 65, 'shrink': 0, 'similarity': 'dice', 'normalize': True}
    try:
        icb.load_model(f'stored_recommenders/seed_{str(seed)}_{icb.RECOMMENDER_NAME}/', 'for_notebook_analysis')
        print(f"{icb.RECOMMENDER_NAME} loaded.")
    except:
        print(f"Fitting {icb.RECOMMENDER_NAME} ...")
        icb.fit(**icb_params)
        print(f"done.")
        icb.save_model(f'stored_recommenders/seed_{str(seed)}_{icb.RECOMMENDER_NAME}/', 'for_notebook_analysis')

    list_recommender = [sslim, icb, ucf]
    best_recommender = HybridCombinationSearch(URM_train, ICM_all, list_recommender)
    params = {'alpha': 0.6461624491197696, 'l1_ratio': 0.7617220099582368}
    best_recommender.fit(**params)

    user_ids = parser.get_ratings().user_id.unique()
    cutoff = 20
    user_recommendations_items = []
    user_recommendations_user_id = []
    target = []

    for n_user in user_ids:
Example #6
0
def hybrid_repo(is_test):
    b = Builder()
    ev = Evaluator()
    ev.split()
    ICM = b.build_ICM()

    URM_train, URM_test = train_test_holdout(b.get_URM(), train_perc=0.8)
    URM_train, URM_validation = train_test_holdout(URM_train, train_perc=0.9)

    from ParameterTuning.AbstractClassSearch import EvaluatorWrapper
    from Base.Evaluation.Evaluator import SequentialEvaluator

    evaluator_validation = SequentialEvaluator(URM_validation, cutoff_list=[5])
    evaluator_test = SequentialEvaluator(URM_test, cutoff_list=[5, 10])

    evaluator_validation = EvaluatorWrapper(evaluator_validation)
    evaluator_test = EvaluatorWrapper(evaluator_test)

    from KNN.ItemKNNCFRecommender import ItemKNNCFRecommender
    from ParameterTuning.BayesianSearch import BayesianSearch

    recommender_class = ItemKNNCFRecommender

    parameterSearch = BayesianSearch(recommender_class,
                                     evaluator_validation=evaluator_validation,
                                     evaluator_test=evaluator_test)

    from ParameterTuning.AbstractClassSearch import DictionaryKeys

    hyperparamethers_range_dictionary = {}
    hyperparamethers_range_dictionary["topK"] = [
        5, 10, 20, 50, 100, 150, 200, 300, 400, 500, 600, 700, 800
    ]
    hyperparamethers_range_dictionary["shrink"] = [
        0, 10, 50, 100, 200, 300, 500, 1000
    ]
    hyperparamethers_range_dictionary["similarity"] = ["cosine"]
    hyperparamethers_range_dictionary["normalize"] = [True, False]

    recommenderDictionary = {
        DictionaryKeys.CONSTRUCTOR_POSITIONAL_ARGS: [URM_train],
        DictionaryKeys.CONSTRUCTOR_KEYWORD_ARGS: {},
        DictionaryKeys.FIT_POSITIONAL_ARGS: dict(),
        DictionaryKeys.FIT_KEYWORD_ARGS: dict(),
        DictionaryKeys.FIT_RANGE_KEYWORD_ARGS:
        hyperparamethers_range_dictionary
    }

    output_root_path = "result_experiments/"

    import os

    # If directory does not exist, create
    if not os.path.exists(output_root_path):
        os.makedirs(output_root_path)

    output_root_path += recommender_class.RECOMMENDER_NAME

    n_cases = 2
    metric_to_optimize = "MAP"

    best_parameters = parameterSearch.search(recommenderDictionary,
                                             n_cases=n_cases,
                                             output_root_path=output_root_path,
                                             metric=metric_to_optimize)

    itemKNNCF = ItemKNNCFRecommender(URM_train)
    itemKNNCF.fit(**best_parameters)

    from FW_Similarity.CFW_D_Similarity_Linalg import CFW_D_Similarity_Linalg

    n_cases = 2
    metric_to_optimize = "MAP"

    best_parameters_ItemKNNCBF = parameterSearch.search(
        recommenderDictionary,
        n_cases=n_cases,
        output_root_path=output_root_path,
        metric=metric_to_optimize)

    itemKNNCBF = ItemKNNCBFRecommender(ICM, URM_train)
    itemKNNCBF.fit(**best_parameters_ItemKNNCBF)
    """
    #_____________________________________________________________________
    from ParameterTuning.BayesianSearch import BayesianSearch
    from ParameterTuning.AbstractClassSearch import DictionaryKeys

    from ParameterTuning.AbstractClassSearch import EvaluatorWrapper

    evaluator_validation_tuning = EvaluatorWrapper(evaluator_validation)
    evaluator_test_tuning = EvaluatorWrapper(evaluator_test)

    recommender_class = CFW_D_Similarity_Linalg

    parameterSearch = BayesianSearch(recommender_class,
                                     evaluator_validation=evaluator_validation_tuning,
                                     evaluator_test=evaluator_test_tuning)

    hyperparamethers_range_dictionary = {}
    hyperparamethers_range_dictionary["topK"] = [5, 10, 20, 50, 100, 150, 200, 300, 400, 500, 600, 700, 800]
    hyperparamethers_range_dictionary["add_zeros_quota"] = range(0, 1)
    hyperparamethers_range_dictionary["normalize_similarity"] = [True, False]

    recommenderDictionary = {DictionaryKeys.CONSTRUCTOR_POSITIONAL_ARGS: [URM_train, ICM, itemKNNCF.W_sparse],
                             DictionaryKeys.CONSTRUCTOR_KEYWORD_ARGS: {},
                             DictionaryKeys.FIT_POSITIONAL_ARGS: dict(),
                             DictionaryKeys.FIT_KEYWORD_ARGS: dict(),
                             DictionaryKeys.FIT_RANGE_KEYWORD_ARGS: hyperparamethers_range_dictionary}

    output_root_path = "result_experiments/"

    import os

    # If directory does not exist, create
    if not os.path.exists(output_root_path):
        os.makedirs(output_root_path)

    output_root_path += recommender_class.RECOMMENDER_NAME

    n_cases = 2
    metric_to_optimize = "MAP"

    best_parameters_CFW_D = parameterSearch.search(recommenderDictionary,
                                                   n_cases=n_cases,
                                                   output_root_path=output_root_path,
                                                   metric=metric_to_optimize)

    CFW_weithing = CFW_D_Similarity_Linalg(URM_train, ICM, itemKNNCF.W_sparse)
    CFW_weithing.fit(**best_parameters_CFW_D)
    #___________________________________________________________________________________________-

    """

    from GraphBased.P3alphaRecommender import P3alphaRecommender

    P3alpha = P3alphaRecommender(URM_train)
    P3alpha.fit()

    from MatrixFactorization.PureSVD import PureSVDRecommender

    #pureSVD = PureSVDRecommender(URM_train)
    #pureSVD.fit()

    rec = HybridRec.HybridRec()

    S_UCM = b.get_S_UCM_KNN(b.get_UCM(ev.get_URM_train()), 600)
    S_ICM = b.build_S_ICM_knn(b.build_ICM(), 250)

    rec.fit(ev.get_URM_train(),
            ev.get_target_playlists(),
            ev.get_target_tracks(),
            ev.num_playlists_to_test,
            itemKNNCBF.W_sparse,
            itemKNNCF.W_sparse,
            P3alpha.W_sparse,
            is_test=True,
            alfa=0.7,
            avg=0.3)

    train_df = rec.recommend()

    if is_test:
        map5 = ev.map5(train_df)
        print('Hybrid MAP@10:', map5)
        return map5
    else:
        print('Prediction saved!')
        train_df.to_csv(os.path.dirname(os.path.realpath(__file__))[:-19] +
                        "/all/sub.csv",
                        sep=',',
                        index=False)
        return 0

    #hybridrecommender = ItemKNNSimilarityHybridRecommender(URM_train, itemKNNCF.W_sparse, P3alpha.W_sparse)
    #hybridrecommender.fit(alpha=0.5)

    #print(evaluator_validation.evaluateRecommender(hybridrecommender))
    """
Example #7
0
# URM_train, URM_test = splitURM(URM_all)
ICM_all = sps.load_npz('myFiles/ICM_all.npz')
URM_train = sps.load_npz('myFiles/Train_for_main.npz')
URM_test = sps.load_npz('myFiles/Test_for_main.npz')
# sps.save_npz('myFiles/URM_all.npz', URM_all, compressed=True)
# sps.save_npz('MyFiles/Train_for_main.npz', URM_train, compressed=True)
# sps.save_npz('MyFiles/Test_for_main.npz', URM_test, compressed=True)
URM_train = URM_train.tocsr()
URM_test = URM_test.tocsr()

# recommenderICF = ItemKNNCFRecommender(URM_train)
# recommenderICF.fit(shrink=30, topK=10, similarity='jaccard', normalize=True)

recommenderCB = ItemKNNCBFRecommender(URM_train, ICM_all)
recommenderCB.fit(shrink=112.2, topK=5, normalize=True, similarity='jaccard')

# recommenderTP = TopPopRecommender()
# recommenderTP.fit(URM_train)

# recommenderGRAPH = P3alphaRecommender(URM_train)
# recommenderGRAPH.fit(topK=10, alpha=0.22, implicit=True, normalize_similarity=True)

# recommenderBetaGRAPH = RP3betaRecommender(URM_train)
# recommenderBetaGRAPH.fit(topK=54, implicit=True, normalize_similarity=True, alpha=1e-6, beta=0.2, min_rating=0)

# recommenderSLIMELASTIC = SLIMElasticNetRecommender(URM_all)
# recommenderSLIMELASTIC.fit(topK=10, alpha=1e-4)
# recommenderSLIMELASTIC.save_model('model/', file_name='SLIM_ElasticNet')

# recommenderCYTHON = SLIM_BPR_Cython(URM_train, recompile_cython=False)
Example #8
0
class LinearHybridC001(BaseItemSimilarityMatrixRecommender):
    RECOMMENDER_NAME = "LinearHybridC001"
    """
    This hybrid works for users who have a profile length shorter or equal to 2 interactions
    """

    # set the seed equal to the one of the parameter search!!!!
    def __init__(self,
                 URM_train,
                 ICM_train,
                 submission=False,
                 verbose=True,
                 seed=1205):
        super(LinearHybridC001, self).__init__(URM_train, verbose=verbose)
        self.URM_train = URM_train
        self.ICM_train = ICM_train

        # seed 1205: {'num_factors': 83, 'confidence_scaling': 'linear', 'alpha': 28.4278070726612, 'epsilon':
        # 1.0234211788885077, 'reg': 0.0027328110246575004, 'epochs': 20}
        self.__rec1 = IALSRecommender(URM_train, verbose=False)
        self.__rec1_params = {
            'num_factors': 83,
            'confidence_scaling': 'linear',
            'alpha': 28.4278070726612,
            'epsilon': 1.0234211788885077,
            'reg': 0.0027328110246575004,
            'epochs': 15
        }  #### -5!!

        # seed 1205: {'topK': 225, 'shrink': 1000, 'similarity': 'cosine', 'normalize': True, 'feature_weighting':
        # 'BM25'}
        self.__rec2 = ItemKNNCBFRecommender(URM_train,
                                            ICM_train,
                                            verbose=False)
        self.__rec2_params = {
            'topK': 225,
            'shrink': 1000,
            'similarity': 'cosine',
            'normalize': True,
            'feature_weighting': 'BM25'
        }

        # seed 1205: {'topK': 220, 'shrink': 175, 'similarity': 'cosine', 'normalize': False}
        self.__rec3 = ItemKNNCFRecommender(URM_train, verbose=False)
        self.__rec3_params = {
            'topK': 220,
            'shrink': 175,
            'similarity': 'cosine',
            'normalize': False
        }

        self.__a = self.__b = self.__c = None
        self.seed = seed
        self.__submission = submission

    def fit(self, alpha=0.5, l1_ratio=0.5):
        self.__a = alpha * l1_ratio
        self.__b = alpha - self.__a
        self.__c = 1 - self.__a - self.__b
        if not self.__submission:
            try:
                self.__rec1.load_model(
                    f'stored_recommenders/seed_{str(self.seed)}_{self.__rec1.RECOMMENDER_NAME}/',
                    f'best_for_{self.RECOMMENDER_NAME}')
                print(f"{self.__rec1.RECOMMENDER_NAME} loaded.")
            except:
                print(f"Fitting {self.__rec1.RECOMMENDER_NAME} ...")
                self.__rec1.fit(**self.__rec1_params)
                print(f"done.")
                self.__rec1.save_model(
                    f'stored_recommenders/seed_{str(self.seed)}_{self.__rec1.RECOMMENDER_NAME}/',
                    f'best_for_{self.RECOMMENDER_NAME}')

            try:
                self.__rec2.load_model(
                    f'stored_recommenders/seed_{str(self.seed)}_{self.__rec2.RECOMMENDER_NAME}/',
                    f'best_for_{self.RECOMMENDER_NAME}')
                print(f"{self.__rec2.RECOMMENDER_NAME} loaded.")
            except:
                print(f"Fitting {self.__rec2.RECOMMENDER_NAME} ...")
                self.__rec2.fit(**self.__rec2_params)
                print(f"done.")
                self.__rec2.save_model(
                    f'stored_recommenders/seed_{str(self.seed)}_{self.__rec2.RECOMMENDER_NAME}/',
                    f'best_for_{self.RECOMMENDER_NAME}')

            try:
                self.__rec3.load_model(
                    f'stored_recommenders/seed_{str(self.seed)}_{self.__rec3.RECOMMENDER_NAME}/',
                    f'best_for_{self.RECOMMENDER_NAME}')
                print(f"{self.__rec3.RECOMMENDER_NAME} loaded.")
            except:
                print(f"Fitting {self.__rec3.RECOMMENDER_NAME} ...")
                self.__rec3.fit(**self.__rec3_params)
                print(f"done.")
                self.__rec3.save_model(
                    f'stored_recommenders/seed_{str(self.seed)}_{self.__rec3.RECOMMENDER_NAME}/',
                    f'best_for_{self.RECOMMENDER_NAME}')
        else:
            self.__rec1.fit(**self.__rec1_params)
            self.__rec2.fit(**self.__rec2_params)
            self.__rec3.fit(**self.__rec3_params)

    def _compute_item_score(self, user_id_array, items_to_compute=None):

        item_weights_1 = self.__rec1._compute_item_score(user_id_array)
        item_weights_2 = self.__rec2._compute_item_score(user_id_array)
        item_weights_3 = self.__rec3._compute_item_score(user_id_array)

        item_weights = item_weights_1 * self.__a + item_weights_2 * self.__b + item_weights_3 * self.__c

        return item_weights

    def save_model(self, folder_path, file_name=None):
        if file_name is None:
            file_name = self.RECOMMENDER_NAME
        self._print("Saving model in file '{}'".format(folder_path +
                                                       file_name))
        dataIO = DataIO(folder_path=folder_path)
        dataIO.save_data(file_name=file_name, data_dict_to_save={})
        self._print("Saving complete")
Example #9
0
    evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[10])
    evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[10])

    itemKNNCF = ItemKNNCFRecommender(URM_train)
    itemKNNCF.fit(shrink=24, topK=10)

    recommenderCYTHON = SLIM_BPR_Cython(URM_train, recompile_cython=False)
    recommenderCYTHON.fit(epochs=2000,
                          batch_size=200,
                          sgd_mode='sdg',
                          learning_rate=1e-5,
                          topK=10)

    recommenderCB = ItemKNNCBFRecommender(URM_train, ICM_all)
    recommenderCB.fit(shrink=24, topK=10)

    recommenderELASTIC = SLIMElasticNetRecommender(URM_train)
    # recommenderELASTIC.fit(topK=10)
    # recommenderELASTIC.save_model('model/', file_name='SLIM_ElasticNet')
    recommenderELASTIC.load_model('model/', file_name='SLIM_ElasticNet_train')

    # recommenderAlphaGRAPH = P3alphaRecommender(URM_train)
    # recommenderAlphaGRAPH.fit(topK=10, alpha=0.22, implicit=True, normalize_similarity=True)

    recommenderBetaGRAPH = RP3betaRecommender(URM_train)
    recommenderBetaGRAPH.fit(topK=10,
                             implicit=True,
                             normalize_similarity=True,
                             alpha=0.41,
                             beta=0.049)
class LinearHybrid008(BaseItemSimilarityMatrixRecommender):
    RECOMMENDER_NAME = "LinearHybrid008"

    # set the seed equal to the one of the parameter search!!!!
    def __init__(self,
                 URM_train,
                 ICM_train,
                 submission=False,
                 verbose=True,
                 seed=1205):
        super(LinearHybrid008, self).__init__(URM_train, verbose=verbose)
        self.URM_train = URM_train
        self.ICM_train = ICM_train

        self.__rec1 = SSLIMElasticNet(URM_train, ICM_train, verbose=False)
        self.__rec1_params = {
            'beta': 0.4849594591575789,
            'topK': 1000,
            'l1_ratio': 1e-05,
            'alpha': 0.001
        }

        self.__rec2 = ItemKNNCBFRecommender(URM_train,
                                            ICM_train,
                                            verbose=False)
        self.__rec2_params = {
            'topK': 65,
            'shrink': 0,
            'similarity': 'dice',
            'normalize': True
        }

        self.__rec3 = UserKNNCFRecommender(URM_train, verbose=False)
        self.__rec3_params = {
            'topK': 190,
            'shrink': 0,
            'similarity': 'cosine',
            'normalize': True
        }

        self.__a = self.__b = self.__c = None
        self.seed = seed
        self.__submission = submission

    def fit(self, alpha=0.5, l1_ratio=0.5):
        self.__a = alpha * l1_ratio
        self.__b = alpha - self.__a
        self.__c = 1 - self.__a - self.__b
        if not self.__submission:
            try:
                self.__rec1.load_model(
                    f'stored_recommenders/seed_{str(self.seed)}_{self.__rec1.RECOMMENDER_NAME}/',
                    f'best_for_{self.RECOMMENDER_NAME}')
                print(f"{self.__rec1.RECOMMENDER_NAME} loaded.")
            except:
                print(f"Fitting {self.__rec1.RECOMMENDER_NAME} ...")
                self.__rec1.fit(**self.__rec1_params)
                print(f"done.")
                self.__rec1.save_model(
                    f'stored_recommenders/seed_{str(self.seed)}_{self.__rec1.RECOMMENDER_NAME}/',
                    f'best_for_{self.RECOMMENDER_NAME}')

            try:
                self.__rec2.load_model(
                    f'stored_recommenders/seed_{str(self.seed)}_{self.__rec2.RECOMMENDER_NAME}/',
                    f'best_for_{self.RECOMMENDER_NAME}')
                print(f"{self.__rec2.RECOMMENDER_NAME} loaded.")
            except:
                print(f"Fitting {self.__rec2.RECOMMENDER_NAME} ...")
                self.__rec2.fit(**self.__rec2_params)
                print(f"done.")
                self.__rec2.save_model(
                    f'stored_recommenders/seed_{str(self.seed)}_{self.__rec2.RECOMMENDER_NAME}/',
                    f'best_for_{self.RECOMMENDER_NAME}')

            try:
                self.__rec3.load_model(
                    f'stored_recommenders/seed_{str(self.seed)}_{self.__rec3.RECOMMENDER_NAME}/',
                    f'best_for_{self.RECOMMENDER_NAME}')
                print(f"{self.__rec3.RECOMMENDER_NAME} loaded.")
            except:
                print(f"Fitting {self.__rec3.RECOMMENDER_NAME} ...")
                self.__rec3.fit(**self.__rec3_params)
                print(f"done.")
                self.__rec3.save_model(
                    f'stored_recommenders/seed_{str(self.seed)}_{self.__rec3.RECOMMENDER_NAME}/',
                    f'best_for_{self.RECOMMENDER_NAME}')
        else:
            self.__rec1.fit(**self.__rec1_params)
            self.__rec2.fit(**self.__rec2_params)
            self.__rec3.fit(**self.__rec3_params)

    def _compute_item_score(self, user_id_array, items_to_compute=None):

        item_weights_1 = self.__rec1._compute_item_score(user_id_array)
        item_weights_2 = self.__rec2._compute_item_score(user_id_array)
        item_weights_3 = self.__rec3._compute_item_score(user_id_array)

        # normalization
        item_weights_1_max = item_weights_1.max()
        item_weights_2_max = item_weights_2.max()
        item_weights_3_max = item_weights_3.max()

        if not item_weights_1_max == 0:
            item_weights_1 /= item_weights_1_max
        if not item_weights_2_max == 0:
            item_weights_2 /= item_weights_2_max
        if not item_weights_3_max == 0:
            item_weights_3 /= item_weights_3_max

        item_weights = item_weights_1 * self.__a + item_weights_2 * self.__b + item_weights_3 * self.__c

        return item_weights

    def save_model(self, folder_path, file_name=None):
        if file_name is None:
            file_name = self.RECOMMENDER_NAME
        self._print("Saving model in file '{}'".format(folder_path +
                                                       file_name))
        dataIO = DataIO(folder_path=folder_path)
        dataIO.save_data(file_name=file_name, data_dict_to_save={})
        self._print("Saving complete")
Example #11
0
    def __init__(self, urm_train, eurm=False):
        super(HybridNormOrigRecommender, self).__init__(urm_train)
        self.data_folder = Path(__file__).parent.parent.absolute()
        self.eurm = eurm
        self.num_users = urm_train.shape[0]

        data = DataManager()

        urm_train = check_matrix(urm_train.copy(), 'csr')
        icm_price, icm_asset, icm_sub, icm_all = data.get_icm()
        ucm_age, ucm_region, ucm_all = data.get_ucm()

        recommender_1 = ItemKNNCBFRecommender(urm_train, icm_all)
        recommender_1.fit(shrink=40, topK=20, feature_weighting='BM25')

        recommender_7 = UserKNNCBFRecommender(urm_train, ucm_all)
        recommender_7.fit(shrink=1777,
                          topK=1998,
                          similarity='tversky',
                          feature_weighting='BM25',
                          tversky_alpha=0.1604953616,
                          tversky_beta=0.9862348646)

        # recommender_1 = HybridGenRecommender(urm_train, eurm=self.eurm)
        # recommender_1.fit()

        # recommender_2 = ItemKNNCFRecommender(urm_train)
        # recommender_2.fit(shrink=30, topK=20)

        recommender_2 = ItemKNNCFRecommender(urm_train)
        recommender_2.fit(topK=5,
                          shrink=500,
                          feature_weighting='BM25',
                          similarity='tversky',
                          normalize=False,
                          tversky_alpha=0.0,
                          tversky_beta=1.0)

        recommender_3 = UserKNNCFRecommender(urm_train)
        recommender_3.fit(shrink=2, topK=600, normalize=True)
        # recommender_3 = UserKNNCFRecommender(urm_train)
        # recommender_3.fit(topK=697, shrink=1000, feature_weighting='TF-IDF', similarity='tversky', normalize=False,
        #                   tversky_alpha=1.0, tversky_beta=1.0)

        recommender_4 = RP3betaRecommender(urm_train)
        recommender_4.fit(topK=16,
                          alpha=0.03374950051351756,
                          beta=0.24087176329409027,
                          normalize_similarity=False)

        recommender_5 = SLIM_BPR_Cython(urm_train)
        recommender_5.fit(lambda_i=0.0926694015,
                          lambda_j=0.001697250,
                          learning_rate=0.002391,
                          epochs=65,
                          topK=200)

        recommender_6 = ALSRecommender(urm_train)
        recommender_6.fit(alpha=5, iterations=40, reg=0.3)

        self.recommender_1 = recommender_1
        self.recommender_2 = recommender_2
        self.recommender_3 = recommender_3
        self.recommender_4 = recommender_4
        self.recommender_5 = recommender_5
        self.recommender_6 = recommender_6
        self.recommender_7 = recommender_7

        if self.eurm:

            if Path(self.data_folder / 'Data/uicm_orig_sparse.npz').is_file():
                print("Previous uicm_sparse found")
                self.score_matrix_1 = sps.load_npz(self.data_folder /
                                                   'Data/uicm_sparse.npz')
            else:
                print("uicm_sparse not found, create new one...")
                self.score_matrix_1 = self.recommender_1._compute_item_matrix_score(
                    np.arange(self.num_users))
                sps.save_npz(self.data_folder / 'Data/uicm_orig_sparse.npz',
                             self.score_matrix_1)

            self.score_matrix_2 = self.recommender_2._compute_item_matrix_score(
                np.arange(self.num_users))
            self.score_matrix_3 = self.recommender_3._compute_item_matrix_score(
                np.arange(self.num_users))
            self.score_matrix_4 = self.recommender_4._compute_item_matrix_score(
                np.arange(self.num_users))
            self.score_matrix_5 = self.recommender_5._compute_item_matrix_score(
                np.arange(self.num_users))
            self.score_matrix_6 = self.recommender_6._compute_item_score(
                np.arange(self.num_users))

            self.score_matrix_1 = normalize(self.score_matrix_2,
                                            norm='max',
                                            axis=1)
            self.score_matrix_2 = normalize(self.score_matrix_2,
                                            norm='max',
                                            axis=1)
            self.score_matrix_3 = normalize(self.score_matrix_3,
                                            norm='max',
                                            axis=1)
            self.score_matrix_4 = normalize(self.score_matrix_4,
                                            norm='max',
                                            axis=1)
            self.score_matrix_5 = normalize(self.score_matrix_5,
                                            norm='max',
                                            axis=1)
            self.score_matrix_6 = normalize(self.score_matrix_6,
                                            norm='max',
                                            axis=1)
def read_data_split_and_search():
    """
    This function provides a simple example on how to tune parameters of a given algorithm

    The BayesianSearch object will save:
        - A .txt file with all the cases explored and the recommendation quality
        - A _best_model file which contains the trained model and can be loaded with recommender.load_model()
        - A _best_parameter file which contains a dictionary with all the fit parameters, it can be passed to recommender.fit(**_best_parameter)
        - A _best_result_validation file which contains a dictionary with the results of the best solution on the validation
        - A _best_result_test file which contains a dictionary with the results, on the test set, of the best solution chosen using the validation set
    """

    seed = 1205
    parser = DataParser()

    URM_all = parser.get_URM_all()
    ICM_obj = parser.get_ICM_all()

    # SPLIT TO GET TEST PARTITION
    URM_train, URM_test = split_train_in_two_percentage_global_sample(URM_all, train_percentage=0.90, seed=seed)

    # SPLIT TO GET THE HYBRID VALID PARTITION
    URM_train, URM_valid_hybrid = split_train_in_two_percentage_global_sample(URM_train, train_percentage=0.85,
                                                                              seed=seed)

    collaborative_algorithm_list = [
        # EASE_R_Recommender
        # PipeHybrid001,
        # Random,
        # TopPop,
        # P3alphaRecommender,
        # RP3betaRecommender,
        # ItemKNNCFRecommender,
        # UserKNNCFRecommender,
        # MatrixFactorization_BPR_Cython,
        # MatrixFactorization_FunkSVD_Cython,
        # PureSVDRecommender,
        # NMFRecommender,
        # PureSVDItemRecommender
        # SLIM_BPR_Cython,
        # SLIMElasticNetRecommender
        # IALSRecommender
        # MF_MSE_PyTorch
        # MergedHybrid000
        # LinearHybrid002ggg
        HybridCombinationSearch
    ]

    content_algorithm_list = [
        # ItemKNNCBFRecommender
    ]

    from Base.Evaluation.Evaluator import EvaluatorHoldout

    evaluator_valid_hybrid = EvaluatorHoldout(URM_valid_hybrid, cutoff_list=[10])
    evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[10])

    """
    earlystopping_keywargs = {"validation_every_n": 5,
                              "stop_on_validation": True,
                              "evaluator_object": evaluator_valid_hybrid,
                              "lower_validations_allowed": 5,
                              "validation_metric": 'MAP',
                              }

    print('IALS training...')
    ials = IALSRecommender(URM_train, verbose=False)
    ials_params = {'num_factors': 83, 'confidence_scaling': 'linear', 'alpha': 28.4278070726612,
                   'epsilon': 1.0234211788885077, 'reg': 0.0027328110246575004, 'epochs': 20}
    ials.fit(**ials_params, **earlystopping_keywargs)
    print("Done")


    print("PureSVD training...")
    psvd = PureSVDRecommender(URM_train, verbose=False)
    psvd_params = {'num_factors': 711}
    psvd.fit(**psvd_params)
    print("Done")
    """

    rp3b = RP3betaRecommender(URM_train, verbose=False)
    rp3b_params = {'topK': 1000, 'alpha': 0.38192761611274967, 'beta': 0.0, 'normalize_similarity': False}
    try:
        rp3b.load_model(f'stored_recommenders/seed_{str(seed)}_hybrid_search/',
                        f'{rp3b.RECOMMENDER_NAME}_for_second_search')
        print(f"{rp3b.RECOMMENDER_NAME} loaded.")
    except:
        print(f"Fitting {rp3b.RECOMMENDER_NAME} ...")
        rp3b.fit(**rp3b_params)
        print(f"done.")
        rp3b.save_model(f'stored_recommenders/seed_{str(seed)}_hybrid_search/',
                        f'{rp3b.RECOMMENDER_NAME}_for_second_search')

    p3a = P3alphaRecommender(URM_train, verbose=False)
    p3a_params = {'topK': 131, 'alpha': 0.33660811631883863, 'normalize_similarity': False}
    try:
        p3a.load_model(f'stored_recommenders/seed_{str(seed)}_hybrid_search/',
                       f'{p3a.RECOMMENDER_NAME}_for_second_search')
        print(f"{p3a.RECOMMENDER_NAME} loaded.")
    except:
        print(f"Fitting {p3a.RECOMMENDER_NAME} ...")
        p3a.fit(**p3a_params)
        print(f"done.")
        p3a.save_model(f'stored_recommenders/seed_{str(seed)}_hybrid_search/',
                       f'{p3a.RECOMMENDER_NAME}_for_second_search')

    icf = ItemKNNCFRecommender(URM_train, verbose=False)
    icf_params = {'topK': 55, 'shrink': 1000, 'similarity': 'asymmetric', 'normalize': True, 'asymmetric_alpha': 0.0}
    try:
        icf.load_model(f'stored_recommenders/seed_{str(seed)}_hybrid_search/',
                       f'{icf.RECOMMENDER_NAME}_for_second_search')
        print(f"{icf.RECOMMENDER_NAME} loaded.")
    except:
        print(f"Fitting {icf.RECOMMENDER_NAME} ...")
        icf.fit(**icf_params)
        print(f"done.")
        icf.save_model(f'stored_recommenders/seed_{str(seed)}_hybrid_search/',
                       f'{icf.RECOMMENDER_NAME}_for_second_search')

    ucf = UserKNNCFRecommender(URM_train, verbose=False)
    ucf_params = {'topK': 190, 'shrink': 0, 'similarity': 'cosine', 'normalize': True}
    try:
        ucf.load_model(f'stored_recommenders/seed_{str(seed)}_hybrid_search/',
                       f'{ucf.RECOMMENDER_NAME}_for_second_search')
        print(f"{ucf.RECOMMENDER_NAME} loaded.")
    except:
        print(f"Fitting {ucf.RECOMMENDER_NAME} ...")
        ucf.fit(**ucf_params)
        print(f"done.")
        ucf.save_model(f'stored_recommenders/seed_{str(seed)}_hybrid_search/',
                       f'{ucf.RECOMMENDER_NAME}_for_second_search')

    icb = ItemKNNCBFRecommender(URM_train, ICM_obj, verbose=False)
    icb_params = {'topK': 65, 'shrink': 0, 'similarity': 'dice', 'normalize': True}
    try:
        icb.load_model(f'stored_recommenders/seed_{str(seed)}_hybrid_search/',
                       f'{icb.RECOMMENDER_NAME}_for_second_search')
        print(f"{icb.RECOMMENDER_NAME} loaded.")
    except:
        print(f"Fitting {icf.RECOMMENDER_NAME} ...")
        icb.fit(**icb_params)
        print(f"done.")
        icb.save_model(f'stored_recommenders/seed_{str(seed)}_hybrid_search/',
                       f'{icb.RECOMMENDER_NAME}_for_second_search')

    sen = SLIMElasticNetRecommender(URM_train, verbose=False)
    sen_params = {'topK': 992, 'l1_ratio': 0.004065081925341167, 'alpha': 0.003725005053334143}
    try:
        sen.load_model(f'stored_recommenders/seed_{str(seed)}_hybrid_search/',
                       f'{sen.RECOMMENDER_NAME}_for_second_search')
        print(f"{sen.RECOMMENDER_NAME} loaded.")
    except:
        print(f"Fitting {sen.RECOMMENDER_NAME} ...")
        sen.fit(**sen_params)
        print(f"done.")
        sen.save_model(f'stored_recommenders/seed_{str(seed)}_hybrid_search/',
                       f'{sen.RECOMMENDER_NAME}_for_second_search')

    print("\nStart.")
    list_recommender = [icb, icf, ucf, p3a, rp3b, sen]
    list_already_seen = []
    combinations_already_seen = []
    """
    (icb, icf, p3a), (icb, icf, rp3b), (icb, icf, sen), (icb, p3a, rp3b), (icb, p3a, sen),
                                (icb, rp3b, sen), (icf, p3a, rp3b), (icf, p3a, sen)
    """

    for rec_perm in combinations(list_recommender, 3):

        if rec_perm not in combinations_already_seen:

            recommender_names = '_'.join([r.RECOMMENDER_NAME for r in rec_perm])
            output_folder_path = "result_experiments_v3/seed_" + str(
                seed) + '/linear_combination/' + recommender_names + '/'
            print(F"\nTESTING THE COMBO {recommender_names}")

            # If directory does not exist, create
            if not os.path.exists(output_folder_path):
                os.makedirs(output_folder_path)

            # TODO: setta I GIUSTI EVALUATOR QUI!!!!
            runParameterSearch_Collaborative_partial = partial(runParameterSearch_Collaborative,
                                                               URM_train=URM_train,
                                                               ICM_train=ICM_obj,
                                                               metric_to_optimize="MAP",
                                                               n_cases=50,
                                                               n_random_starts=20,
                                                               evaluator_validation_earlystopping=evaluator_valid_hybrid,
                                                               evaluator_validation=evaluator_valid_hybrid,
                                                               #evaluator_test=evaluator_test,
                                                               output_folder_path=output_folder_path,
                                                               allow_weighting=False,
                                                               # similarity_type_list = ["cosine", 'jaccard'],
                                                               parallelizeKNN=False,
                                                               list_rec=rec_perm)
            pool = multiprocessing.Pool(processes=int(multiprocessing.cpu_count()), maxtasksperchild=1)
            pool.map(runParameterSearch_Collaborative_partial, collaborative_algorithm_list)
Example #13
0
if __name__ == '__main__':
    seed = 1205

    parser = DataParser()
    URM_all = parser.get_URM_all()
    ICM_all = parser.get_ICM_all()

    URM_train, URM_test = split_train_in_two_percentage_global_sample(
        URM_all, train_percentage=0.85, seed=seed)

    evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[10])
    rec1 = ItemKNNCBFRecommender(URM_train, ICM_all)
    rec2 = SLIMElasticNetRecommender(URM_train)

    # 'topK': 40, 'shrink': 1000, 'similarity': 'cosine', 'normalize': True, 'feature_weighting': 'BM25'
    rec1.fit(topK=40,
             shrink=1000,
             similarity='cosine',
             feature_weighting='BM25')

    # topK': 140, 'l1_ratio': 1e-05, 'alpha': 0.386
    rec2.fit(topK=140, l1_ratio=1e-5, alpha=0.386)
    print("recomenders are ready")
    merged_recommender = MergedHybrid000(URM_train,
                                         content_recommender=rec1,
                                         collaborative_recommender=rec2)
    for alpha in np.arange(0, 1, 0.1):
        merged_recommender.fit(alpha)
        result, _ = evaluator_test.evaluateRecommender(merged_recommender)
        print(alpha, result[10]['MAP'])
class LinearHybrid003(BaseItemSimilarityMatrixRecommender):
    RECOMMENDER_NAME = "LinearHybrid003"

    # set the seed equal to the one of the parameter search!!!!
    def __init__(self,
                 URM_train,
                 ICM_train,
                 submission=False,
                 verbose=True,
                 seed=1205):
        super(LinearHybrid003, self).__init__(URM_train, verbose=verbose)
        self.URM_train = URM_train
        self.ICM_train = ICM_train

        # seed 1205: 'topK': 190, 'shrink': 0, 'similarity': 'cosine', 'normalize': True
        self.__rec1 = UserKNNCFRecommender(URM_train, verbose=False)
        self.__rec1_params = {
            'topK': 190,
            'shrink': 0,
            'similarity': 'cosine',
            'normalize': True
        }

        self.__rec2 = P3alphaRecommender(URM_train, verbose=False)
        self.__rec2_params = {
            'topK': 131,
            'alpha': 0.33660811631883863,
            'normalize_similarity': False
        }

        # seed 1205: 'topK': 205, 'shrink': 1000, 'similarity': 'cosine', 'normalize': True, 'feature_weighting': 'BM25'
        self.__rec3 = ItemKNNCBFRecommender(URM_train,
                                            ICM_train,
                                            verbose=False)
        self.__rec3_params = {
            'topK': 205,
            'shrink': 1000,
            'similarity': 'cosine',
            'normalize': True,
            'feature_weighting': 'BM25'
        }

        self.__a = self.__b = self.__c = None
        self.seed = seed
        self.__submission = submission

    def fit(self, alpha=0.5, l1_ratio=0.5):
        self.__a = alpha * l1_ratio
        self.__b = alpha - self.__a
        self.__c = 1 - self.__a - self.__b
        if not self.__submission:
            try:
                self.__rec1.load_model(
                    f'stored_recommenders/seed_{str(self.seed)}_{self.__rec1.RECOMMENDER_NAME}/',
                    f'best_for_{self.RECOMMENDER_NAME}')
                print(f"{self.__rec1.RECOMMENDER_NAME} loaded.")
            except:
                print(f"Fitting {self.__rec1.RECOMMENDER_NAME} ...")
                self.__rec1.fit(**self.__rec1_params)
                print(f"done.")
                self.__rec1.save_model(
                    f'stored_recommenders/seed_{str(self.seed)}_{self.__rec1.RECOMMENDER_NAME}/',
                    f'best_for_{self.RECOMMENDER_NAME}')

            try:
                self.__rec2.load_model(
                    f'stored_recommenders/seed_{str(self.seed)}_{self.__rec2.RECOMMENDER_NAME}/',
                    f'best_for_{self.RECOMMENDER_NAME}')
                print(f"{self.__rec2.RECOMMENDER_NAME} loaded.")
            except:
                print(f"Fitting {self.__rec2.RECOMMENDER_NAME} ...")
                self.__rec2.fit(**self.__rec2_params)
                print(f"done.")
                self.__rec2.save_model(
                    f'stored_recommenders/seed_{str(self.seed)}_{self.__rec2.RECOMMENDER_NAME}/',
                    f'best_for_{self.RECOMMENDER_NAME}')

            try:
                self.__rec3.load_model(
                    f'stored_recommenders/seed_{str(self.seed)}_{self.__rec3.RECOMMENDER_NAME}/',
                    f'best_for_{self.RECOMMENDER_NAME}')
                print(f"{self.__rec3.RECOMMENDER_NAME} loaded.")
            except:
                print(f"Fitting {self.__rec3.RECOMMENDER_NAME} ...")
                self.__rec3.fit(**self.__rec3_params)
                print(f"done.")
                self.__rec3.save_model(
                    f'stored_recommenders/seed_{str(self.seed)}_{self.__rec3.RECOMMENDER_NAME}/',
                    f'best_for_{self.RECOMMENDER_NAME}')
        else:
            self.__rec1.fit(**self.__rec1_params)
            self.__rec2.fit(**self.__rec2_params)
            self.__rec3.fit(**self.__rec3_params)

    def _compute_item_score(self, user_id_array, items_to_compute=None):

        item_weights_1 = self.__rec1._compute_item_score(user_id_array)
        item_weights_2 = self.__rec2._compute_item_score(user_id_array)
        item_weights_3 = self.__rec3._compute_item_score(user_id_array)

        item_weights = item_weights_1 * self.__a + item_weights_2 * self.__b + item_weights_3 * self.__c

        return item_weights

    def save_model(self, folder_path, file_name=None):
        if file_name is None:
            file_name = self.RECOMMENDER_NAME
        self._print("Saving model in file '{}'".format(folder_path +
                                                       file_name))
        dataIO = DataIO(folder_path=folder_path)
        dataIO.save_data(file_name=file_name, data_dict_to_save={})
        self._print("Saving complete")
def read_data_split_and_search():
    """
    This function provides a simple example on how to tune parameters of a given algorithm

    The BayesianSearch object will save:
        - A .txt file with all the cases explored and the recommendation quality
        - A _best_model file which contains the trained model and can be loaded with recommender.load_model()
        - A _best_parameter file which contains a dictionary with all the fit parameters, it can be passed to recommender.fit(**_best_parameter)
        - A _best_result_validation file which contains a dictionary with the results of the best solution on the validation
        - A _best_result_test file which contains a dictionary with the results, on the test set, of the best solution chosen using the validation set
    """

    seed = 1205
    parser = DataParser()

    URM_all = parser.get_URM_all()
    ICM_obj = parser.get_ICM_all()

    # SPLIT TO GET TEST PARTITION
    URM_train, URM_test = split_train_in_two_percentage_global_sample(
        URM_all, train_percentage=0.90, seed=seed)

    # SPLIT TO GET THE HYBRID VALID PARTITION
    URM_train, URM_valid_hybrid = split_train_in_two_percentage_global_sample(
        URM_train, train_percentage=0.85, seed=seed)

    # SPLIT TO GET THE sub_rec VALID PARTITION
    URM_train_bis, URM_valid_sub = split_train_in_two_percentage_global_sample(
        URM_train, train_percentage=0.85, seed=seed)

    collaborative_algorithm_list = [
        #EASE_R_Recommender
        #PipeHybrid001,
        #Random,
        #TopPop,
        #P3alphaRecommender,
        #RP3betaRecommender,
        #ItemKNNCFRecommender,
        #UserKNNCFRecommender,
        #MatrixFactorization_BPR_Cython,
        #MatrixFactorization_FunkSVD_Cython,
        #PureSVDRecommender,
        #NMFRecommender,
        #PureSVDItemRecommender
        #SLIM_BPR_Cython,
        #SLIMElasticNetRecommender
        #IALSRecommender
        #MF_MSE_PyTorch
        #MergedHybrid000
        #LinearHybrid002ggg
        HybridCombinationSearch
    ]

    content_algorithm_list = [
        #ItemKNNCBFRecommender
    ]

    from Base.Evaluation.Evaluator import EvaluatorHoldout

    evaluator_valid_sub = EvaluatorHoldout(URM_valid_sub, cutoff_list=[10])
    evaluator_valid_hybrid = EvaluatorHoldout(URM_valid_hybrid,
                                              cutoff_list=[10])
    evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[10])
    """
        # TODO: setta I GIUSTI EVALUATOR QUI!!!!
    runParameterSearch_Content_partial = partial(runParameterSearch_Content,
                                                 URM_train=URM_train,
                                                 ICM_object=ICM_obj,
                                                 ICM_name='1BookFeatures',
                                                 n_cases = 50,
                                                 n_random_starts = 20,
                                                 evaluator_validation= evaluator_valid_sub,
                                                 evaluator_test = evaluator_valid_hybrid,
                                                 metric_to_optimize = "MAP",
                                                 output_folder_path=output_folder_path,
                                                 parallelizeKNN = False,
                                                 allow_weighting = True,
                                                 #similarity_type_list = ['cosine']
                                                 )
    pool = multiprocessing.Pool(processes=int(multiprocessing.cpu_count()), maxtasksperchild=1)
    pool.map(runParameterSearch_Content_partial, content_algorithm_list)
    """
    print("Rp3beta training...")
    rp3b = RP3betaRecommender(URM_train, verbose=False)
    rp3b_params = {
        'topK': 1000,
        'alpha': 0.38192761611274967,
        'beta': 0.0,
        'normalize_similarity': False
    }
    rp3b.fit(**rp3b_params)
    print("Done")
    print("P3alpha training...")
    p3a = P3alphaRecommender(URM_train, verbose=False)
    p3a_params = {
        'topK': 131,
        'alpha': 0.33660811631883863,
        'normalize_similarity': False
    }
    p3a.fit(**p3a_params)
    print("Done")
    print("ItemKnnCF training...")
    icf = ItemKNNCFRecommender(URM_train, verbose=False)
    icf_params = {
        'topK': 100,
        'shrink': 1000,
        'similarity': 'asymmetric',
        'normalize': True,
        'asymmetric_alpha': 0.0
    }
    icf.fit(**icf_params)
    print("Done")
    print("UserKnnCF training...")
    ucf = UserKNNCFRecommender(URM_train, verbose=False)
    ucf_params = {
        'topK': 190,
        'shrink': 0,
        'similarity': 'cosine',
        'normalize': True
    }
    ucf.fit(**ucf_params)
    print("Done")
    print("ItemKnnCBF training...")
    icb = ItemKNNCBFRecommender(URM_train, ICM_obj, verbose=False)
    icb_params = {
        'topK': 205,
        'shrink': 1000,
        'similarity': 'cosine',
        'normalize': True,
        'feature_weighting': 'BM25'
    }
    icb.fit(**icb_params)
    print("Done")
    print("SlimBPR training...")
    sbpr = SLIM_BPR_Cython(URM_train, verbose=False)
    sbpr_params = {
        'topK': 979,
        'epochs': 130,
        'symmetric': False,
        'sgd_mode': 'adam',
        'lambda_i': 0.004947329669424629,
        'lambda_j': 1.1534760845071758e-05,
        'learning_rate': 0.0001
    }
    sbpr.fit(**sbpr_params)
    print("Done")
    print("SlimElasticNet training...")
    sen = SLIMElasticNetRecommender(URM_train, verbose=False)
    sen_params = {
        'topK': 992,
        'l1_ratio': 0.004065081925341167,
        'alpha': 0.003725005053334143
    }
    sen.fit(**sen_params)
    print("Done")

    list_recommender = [rp3b, p3a, icf, ucf, icb, sen, sbpr]
    list_already_seen = [rp3b, p3a, icf, ucf, icb]

    for rec_perm in combinations(list_recommender, 3):

        if rec_perm not in combinations(list_already_seen, 3):

            recommender_names = '_'.join(
                [r.RECOMMENDER_NAME for r in rec_perm])
            output_folder_path = "result_experiments_v3/seed_" + str(
                seed) + '/' + recommender_names + '/'

            # If directory does not exist, create
            if not os.path.exists(output_folder_path):
                os.makedirs(output_folder_path)

            # TODO: setta I GIUSTI EVALUATOR QUI!!!!
            runParameterSearch_Collaborative_partial = partial(
                runParameterSearch_Collaborative,
                URM_train=URM_train,
                ICM_train=ICM_obj,
                metric_to_optimize="MAP",
                n_cases=50,
                n_random_starts=20,
                evaluator_validation_earlystopping=evaluator_valid_hybrid,
                evaluator_validation=evaluator_valid_hybrid,
                evaluator_test=evaluator_test,
                output_folder_path=output_folder_path,
                allow_weighting=False,
                #similarity_type_list = ["cosine", 'jaccard'],
                parallelizeKNN=False,
                list_rec=rec_perm)
            pool = multiprocessing.Pool(processes=int(
                multiprocessing.cpu_count()),
                                        maxtasksperchild=1)
            pool.map(runParameterSearch_Collaborative_partial,
                     collaborative_algorithm_list)
class LinearHybridW001(BaseItemSimilarityMatrixRecommender):
    RECOMMENDER_NAME = "LinearHybridW001"
    """
    This hybrid works for users who have a profile length greater than or equal to 3 interactions
    """

    # set the seed equal to the one of the parameter search!!!!
    def __init__(self, URM_train, ICM_train, submission=False, verbose=True, seed=1205):
        super(LinearHybridW001, self).__init__(URM_train, verbose=verbose)
        self.URM_train = URM_train
        self.ICM_train = ICM_train

        # seed 1205: {'topK': 205, 'shrink': 1000, 'similarity': 'cosine',
        #             'normalize': True, 'feature_weighting': 'BM25'}
        self.__rec1 = ItemKNNCBFRecommender(URM_train,ICM_train, verbose=False)
        self.__rec1_params = {'topK': 205, 'shrink': 1000, 'similarity': 'cosine', 'normalize': True,
                              'feature_weighting': 'BM25'}

        # seed 1205: {'topK': 565, 'shrink': 554, 'similarity': 'tversky', 'normalize': True,
        #             'tversky_alpha': 1.9109121434662428, 'tversky_beta': 1.7823834698905734}
        self.__rec2 = ItemKNNCFRecommender(URM_train, verbose=False)
        self.__rec2_params = {'topK': 565, 'shrink': 554, 'similarity': 'tversky', 'normalize': True,
                              'tversky_alpha': 1.9109121434662428, 'tversky_beta': 1.7823834698905734}

        # seed 1205: {'topK': 753, 'alpha': 0.3873710051288722, 'beta': 0.0, 'normalize_similarity': False}
        self.__rec3 = RP3betaRecommender(URM_train, verbose=False)
        self.__rec3_params = {'topK': 753, 'alpha': 0.3873710051288722, 'beta': 0.0, 'normalize_similarity': False}

        self.__a = self.__b = self.__c = None
        self.seed = seed
        self.__submission = submission

    def fit(self, alpha=0.5, l1_ratio=0.5):
        self.__a = alpha * l1_ratio
        self.__b = alpha - self.__a
        self.__c = 1 - self.__a - self.__b
        if not self.__submission:
            try:
                self.__rec1.load_model(f'stored_recommenders/seed_{str(self.seed)}_{self.__rec1.RECOMMENDER_NAME}/',
                                       f'best_for_{self.RECOMMENDER_NAME}')
                print(f"{self.__rec1.RECOMMENDER_NAME} loaded.")
            except:
                print(f"Fitting {self.__rec1.RECOMMENDER_NAME} ...")
                self.__rec1.fit(**self.__rec1_params)
                print(f"done.")
                self.__rec1.save_model(f'stored_recommenders/seed_{str(self.seed)}_{self.__rec1.RECOMMENDER_NAME}/',
                                       f'best_for_{self.RECOMMENDER_NAME}')

            try:
                self.__rec2.load_model(f'stored_recommenders/seed_{str(self.seed)}_{self.__rec2.RECOMMENDER_NAME}/',
                                       f'best_for_{self.RECOMMENDER_NAME}')
                print(f"{self.__rec2.RECOMMENDER_NAME} loaded.")
            except:
                print(f"Fitting {self.__rec2.RECOMMENDER_NAME} ...")
                self.__rec2.fit(**self.__rec2_params)
                print(f"done.")
                self.__rec2.save_model(f'stored_recommenders/seed_{str(self.seed)}_{self.__rec2.RECOMMENDER_NAME}/',
                                       f'best_for_{self.RECOMMENDER_NAME}')

            try:
                self.__rec3.load_model(f'stored_recommenders/seed_{str(self.seed)}_{self.__rec3.RECOMMENDER_NAME}/',
                                       f'best_for_{self.RECOMMENDER_NAME}')
                print(f"{self.__rec3.RECOMMENDER_NAME} loaded.")
            except:
                print(f"Fitting {self.__rec3.RECOMMENDER_NAME} ...")
                self.__rec3.fit(**self.__rec3_params)
                print(f"done.")
                self.__rec3.save_model(f'stored_recommenders/seed_{str(self.seed)}_{self.__rec3.RECOMMENDER_NAME}/',
                                       f'best_for_{self.RECOMMENDER_NAME}')
        else:
            self.__rec1.fit(**self.__rec1_params)
            self.__rec2.fit(**self.__rec2_params)
            self.__rec3.fit(**self.__rec3_params)

    def _compute_item_score(self, user_id_array, items_to_compute=None):

        item_weights_1 = self.__rec1._compute_item_score(user_id_array)
        item_weights_2 = self.__rec2._compute_item_score(user_id_array)
        item_weights_3 = self.__rec3._compute_item_score(user_id_array)

        item_weights = item_weights_1 * self.__a + item_weights_2 * self.__b + item_weights_3 * self.__c

        return item_weights

    def save_model(self, folder_path, file_name=None):
        if file_name is None:
            file_name = self.RECOMMENDER_NAME
        self._print("Saving model in file '{}'".format(folder_path + file_name))
        dataIO = DataIO(folder_path=folder_path)
        dataIO.save_data(file_name=file_name, data_dict_to_save={})
        self._print("Saving complete")
def runParameterSearch_Collaborative(recommender_class,
                                     URM_train,
                                     ICM_1,
                                     ICM_2,
                                     metric_to_optimize="PRECISION",
                                     evaluator_validation=None,
                                     evaluator_test=None,
                                     evaluator_validation_earlystopping=None,
                                     output_root_path="result_experiments/",
                                     parallelizeKNN=True,
                                     n_cases=100):
    from ParameterTuning.AbstractClassSearch import DictionaryKeys

    # If directory does not exist, create
    if not os.path.exists(output_root_path):
        os.makedirs(output_root_path)

    try:

        output_root_path_rec_name = output_root_path + recommender_class.RECOMMENDER_NAME

        parameterSearch = BayesianSearch(
            recommender_class,
            evaluator_validation=evaluator_validation,
            evaluator_test=evaluator_test)

        if recommender_class in [TopPop, Random]:
            recommender = recommender_class(URM_train)

            recommender.fit()

            output_file = open(
                output_root_path_rec_name + "_BayesianSearch.txt", "a")
            result_dict, result_baseline = evaluator_validation.evaluateRecommender(
                recommender)
            output_file.write(
                "ParameterSearch: Best result evaluated on URM_validation. Results: {}"
                .format(result_baseline))

            pickle.dump(
                result_dict.copy(),
                open(output_root_path_rec_name + "_best_result_validation",
                     "wb"),
                protocol=pickle.HIGHEST_PROTOCOL)

            result_dict, result_baseline = evaluator_test.evaluateRecommender(
                recommender)
            output_file.write(
                "ParameterSearch: Best result evaluated on URM_test. Results: {}"
                .format(result_baseline))

            pickle.dump(result_dict.copy(),
                        open(output_root_path_rec_name + "_best_result_test",
                             "wb"),
                        protocol=pickle.HIGHEST_PROTOCOL)

            output_file.close()

            return

        ##########################################################################################################

        if recommender_class is UserKNNCFRecommender:

            similarity_type_list = ['cosine']

            run_KNNCFRecommender_on_similarity_type_partial = partial(
                run_KNNCFRecommender_on_similarity_type,
                parameterSearch=parameterSearch,
                URM_train=URM_train,
                n_cases=n_cases,
                output_root_path=output_root_path_rec_name,
                metric_to_optimize=metric_to_optimize)

            if parallelizeKNN:
                pool = PoolWithSubprocess(processes=int(2), maxtasksperchild=1)
                resultList = pool.map(
                    run_KNNCFRecommender_on_similarity_type_partial,
                    similarity_type_list)

            else:

                for similarity_type in similarity_type_list:
                    run_KNNCFRecommender_on_similarity_type_partial(
                        similarity_type)

            return

        ##########################################################################################################

        if recommender_class is ItemKNNCFRecommender:

            similarity_type_list = ['cosine']

            run_KNNCFRecommender_on_similarity_type_partial = partial(
                run_KNNCFRecommender_on_similarity_type,
                parameterSearch=parameterSearch,
                URM_train=URM_train,
                n_cases=n_cases,
                output_root_path=output_root_path_rec_name,
                metric_to_optimize=metric_to_optimize)

            if parallelizeKNN:
                pool = PoolWithSubprocess(processes=int(2), maxtasksperchild=1)
                resultList = pool.map(
                    run_KNNCFRecommender_on_similarity_type_partial,
                    similarity_type_list)

            else:

                for similarity_type in similarity_type_list:
                    run_KNNCFRecommender_on_similarity_type_partial(
                        similarity_type)

            return

        ##########################################################################################################

        # if recommender_class is MultiThreadSLIM_RMSE:
        #
        #     hyperparamethers_range_dictionary = {}
        #     hyperparamethers_range_dictionary["topK"] = [50, 100]
        #     hyperparamethers_range_dictionary["l1_penalty"] = [1e-2, 1e-3, 1e-4]
        #     hyperparamethers_range_dictionary["l2_penalty"] = [1e-2, 1e-3, 1e-4]
        #
        #
        #     recommenderDictionary = {DictionaryKeys.CONSTRUCTOR_POSITIONAL_ARGS: [URM_train],
        #                              DictionaryKeys.CONSTRUCTOR_KEYWORD_ARGS: {},
        #                              DictionaryKeys.FIT_POSITIONAL_ARGS: dict(),
        #                              DictionaryKeys.FIT_KEYWORD_ARGS: dict(),
        #                              DictionaryKeys.FIT_RANGE_KEYWORD_ARGS: hyperparamethers_range_dictionary}
        #
        #

        ##########################################################################################################

        if recommender_class is P3alphaRecommender:
            hyperparamethers_range_dictionary = {}
            hyperparamethers_range_dictionary["topK"] = [
                5, 10, 20, 50, 100, 150, 200, 300, 400, 500, 600, 700, 800
            ]
            hyperparamethers_range_dictionary["alpha"] = range(0, 2)
            hyperparamethers_range_dictionary["normalize_similarity"] = [
                True, False
            ]

            recommenderDictionary = {
                DictionaryKeys.CONSTRUCTOR_POSITIONAL_ARGS: [URM_train],
                DictionaryKeys.CONSTRUCTOR_KEYWORD_ARGS: {},
                DictionaryKeys.FIT_POSITIONAL_ARGS:
                dict(),
                DictionaryKeys.FIT_KEYWORD_ARGS:
                dict(),
                DictionaryKeys.FIT_RANGE_KEYWORD_ARGS:
                hyperparamethers_range_dictionary
            }

        ##########################################################################################################

        if recommender_class is HybridRecommender:

            hyperparamethers_range_dictionary = {}
            hyperparamethers_range_dictionary["w_itemcf"] = [
                x * 0.1 + 1 for x in range(0, 10)
            ]
            hyperparamethers_range_dictionary["w_usercf"] = [
                x * 0.1 for x in range(0, 10)
            ]
            hyperparamethers_range_dictionary["w_cbart"] = [
                x * 0.1 for x in range(0, 10)
            ]
            hyperparamethers_range_dictionary["w_cbalb"] = [
                x * 0.1 for x in range(0, 10)
            ]
            hyperparamethers_range_dictionary["w_slim"] = [
                x * 0.1 for x in range(0, 10)
            ]
            #hyperparamethers_range_dictionary["w_svd"] = [x * 0.05 for x in range(0, 20)]
            #hyperparamethers_range_dictionary["w_rp3"] = [x * 0.05 for x in range(0, 20)]

            item = ItemKNNCFRecommender(URM_train)

            user = UserKNNCFRecommender(URM_train)

            SLIM = MultiThreadSLIM_ElasticNet(URM_train=URM_train)

            item.fit(topK=800, shrink=10, similarity='cosine', normalize=True)

            user.fit(topK=70, shrink=22, similarity='cosine', normalize=True)

            SLIM.fit(l1_penalty=1e-05,
                     l2_penalty=0,
                     positive_only=True,
                     topK=150,
                     alpha=0.00415637376180466)

            CBArt = ItemKNNCBFRecommender(ICM=ICM_1, URM_train=URM_train)
            CBArt.fit(topK=160,
                      shrink=5,
                      similarity='cosine',
                      normalize=True,
                      feature_weighting="none")

            CBAlb = ItemKNNCBFRecommender(ICM=ICM_2, URM_train=URM_train)
            CBAlb.fit(topK=160,
                      shrink=5,
                      similarity='cosine',
                      normalize=True,
                      feature_weighting="none")

            recommenderDictionary = {
                DictionaryKeys.CONSTRUCTOR_POSITIONAL_ARGS: [URM_train],
                DictionaryKeys.CONSTRUCTOR_KEYWORD_ARGS: {},
                DictionaryKeys.FIT_POSITIONAL_ARGS:
                dict(),
                DictionaryKeys.FIT_KEYWORD_ARGS: {
                    "ICM_Art": ICM_1,
                    "ICM_Alb": ICM_2,
                    "item": item,
                    "user": user,
                    "SLIM": SLIM,
                    "CBArt": CBArt,
                    "CBAlb": CBAlb,
                },
                DictionaryKeys.FIT_RANGE_KEYWORD_ARGS:
                hyperparamethers_range_dictionary
            }

        ##########################################################################################################

        if recommender_class is RP3betaRecommender:
            hyperparamethers_range_dictionary = {}
            hyperparamethers_range_dictionary["topK"] = [
                5, 10, 20, 50, 100, 150, 200, 300, 400, 500, 600, 700, 800
            ]
            hyperparamethers_range_dictionary["alpha"] = range(0, 2)
            hyperparamethers_range_dictionary["beta"] = range(0, 2)
            hyperparamethers_range_dictionary["normalize_similarity"] = [True]
            hyperparamethers_range_dictionary["implicit"] = [True]

            recommenderDictionary = {
                DictionaryKeys.CONSTRUCTOR_POSITIONAL_ARGS: [URM_train],
                DictionaryKeys.CONSTRUCTOR_KEYWORD_ARGS: {},
                DictionaryKeys.FIT_POSITIONAL_ARGS:
                dict(),
                DictionaryKeys.FIT_KEYWORD_ARGS:
                dict(),
                DictionaryKeys.FIT_RANGE_KEYWORD_ARGS:
                hyperparamethers_range_dictionary
            }

        ##########################################################################################################

        if recommender_class is MatrixFactorization_FunkSVD_Cython:
            hyperparamethers_range_dictionary = {}
            hyperparamethers_range_dictionary["sgd_mode"] = ["adagrad", "adam"]
            # hyperparamethers_range_dictionary["epochs"] = [1, 5, 10, 20, 30, 50, 70, 90, 110]
            hyperparamethers_range_dictionary["num_factors"] = range(
                100, 1000, 20)
            hyperparamethers_range_dictionary["reg"] = [0.0, 1e-3, 1e-6, 1e-9]
            hyperparamethers_range_dictionary["learning_rate"] = [
                1e-2, 1e-3, 1e-4, 1e-5
            ]

            recommenderDictionary = {
                DictionaryKeys.CONSTRUCTOR_POSITIONAL_ARGS: [URM_train],
                DictionaryKeys.CONSTRUCTOR_KEYWORD_ARGS: {},
                DictionaryKeys.FIT_POSITIONAL_ARGS:
                dict(),
                DictionaryKeys.FIT_KEYWORD_ARGS: {
                    "validation_every_n": 5,
                    "stop_on_validation": True,
                    "evaluator_object": evaluator_validation_earlystopping,
                    "lower_validatons_allowed": 20,
                    "validation_metric": metric_to_optimize
                },
                DictionaryKeys.FIT_RANGE_KEYWORD_ARGS:
                hyperparamethers_range_dictionary
            }

        ##########################################################################################################

        if recommender_class is FunkSVD:
            hyperparamethers_range_dictionary = {}

            # hyperparamethers_range_dictionary["epochs"] = [1, 5, 10, 20, 30, 50, 70, 90, 110]
            hyperparamethers_range_dictionary["num_factors"] = range(
                100, 1000, 20)
            hyperparamethers_range_dictionary["reg"] = [
                0.0, 1e-03, 1e-06, 1e-09
            ]
            hyperparamethers_range_dictionary["learning_rate"] = [1e-02, 1e-03]

            recommenderDictionary = {
                DictionaryKeys.CONSTRUCTOR_POSITIONAL_ARGS: [URM_train],
                DictionaryKeys.CONSTRUCTOR_KEYWORD_ARGS: {},
                DictionaryKeys.FIT_POSITIONAL_ARGS:
                dict(),
                DictionaryKeys.FIT_KEYWORD_ARGS:
                dict(),
                DictionaryKeys.FIT_RANGE_KEYWORD_ARGS:
                hyperparamethers_range_dictionary
            }

        ##########################################################################################################

        if recommender_class is MatrixFactorization_AsySVD_Cython:
            hyperparamethers_range_dictionary = {}
            hyperparamethers_range_dictionary["sgd_mode"] = ["adagrad", "adam"]
            # hyperparamethers_range_dictionary["epochs"] = [1, 5, 10, 20, 30, 50, 70, 90, 110]
            hyperparamethers_range_dictionary["num_factors"] = range(
                100, 500, 10)
            hyperparamethers_range_dictionary["batch_size"] = [
                100, 200, 300, 400
            ]
            hyperparamethers_range_dictionary["positive_reg"] = [
                0.0, 1e-3, 1e-6, 1e-9
            ]
            hyperparamethers_range_dictionary["negative_reg"] = [
                0.0, 1e-3, 1e-6, 1e-9
            ]
            hyperparamethers_range_dictionary["learning_rate"] = [
                1e-2, 1e-3, 1e-4, 1e-5
            ]
            hyperparamethers_range_dictionary["user_reg"] = [
                1e-3, 1e-4, 1e-5, 1e-6
            ]

            recommenderDictionary = {
                DictionaryKeys.CONSTRUCTOR_POSITIONAL_ARGS: [URM_train],
                DictionaryKeys.CONSTRUCTOR_KEYWORD_ARGS: {
                    'positive_threshold': 1
                },
                DictionaryKeys.FIT_POSITIONAL_ARGS:
                dict(),
                DictionaryKeys.FIT_KEYWORD_ARGS: {
                    "validation_every_n": 5,
                    "stop_on_validation": True,
                    "evaluator_object": evaluator_validation_earlystopping,
                    "lower_validatons_allowed": 20,
                    "validation_metric": metric_to_optimize
                },
                DictionaryKeys.FIT_RANGE_KEYWORD_ARGS:
                hyperparamethers_range_dictionary
            }

        ##########################################################################################################

        if recommender_class is PureSVDRecommender:
            hyperparamethers_range_dictionary = {}
            hyperparamethers_range_dictionary["num_factors"] = list(
                range(0, 250, 5))

            recommenderDictionary = {
                DictionaryKeys.CONSTRUCTOR_POSITIONAL_ARGS: [URM_train],
                DictionaryKeys.CONSTRUCTOR_KEYWORD_ARGS: {},
                DictionaryKeys.FIT_POSITIONAL_ARGS:
                dict(),
                DictionaryKeys.FIT_KEYWORD_ARGS: {},
                DictionaryKeys.FIT_RANGE_KEYWORD_ARGS:
                hyperparamethers_range_dictionary
            }

        #########################################################################################################

        if recommender_class is SLIM_BPR_Cython:
            hyperparamethers_range_dictionary = {}
            hyperparamethers_range_dictionary["topK"] = [800, 900, 1000, 1200]
            # hyperparamethers_range_dictionary["epochs"] = [1, 5, 10, 20, 30, 50, 70, 90, 110]
            hyperparamethers_range_dictionary["sgd_mode"] = ["adagrad"]
            hyperparamethers_range_dictionary["lambda_i"] = [1e-6]
            hyperparamethers_range_dictionary["lambda_j"] = [1e-9]
            hyperparamethers_range_dictionary["learning_rate"] = [
                0.01, 0.001, 1e-4, 1e-5, 0.1
            ]

            recommenderDictionary = {
                DictionaryKeys.CONSTRUCTOR_POSITIONAL_ARGS: [URM_train],
                DictionaryKeys.CONSTRUCTOR_KEYWORD_ARGS: {
                    'train_with_sparse_weights': True,
                    'symmetric': True,
                    'positive_threshold': 1
                },
                DictionaryKeys.FIT_POSITIONAL_ARGS:
                dict(),
                DictionaryKeys.FIT_KEYWORD_ARGS: {
                    "validation_every_n": 10,
                    "stop_on_validation": True,
                    "evaluator_object": evaluator_validation_earlystopping,
                    "lower_validatons_allowed": 3,
                    "validation_metric": metric_to_optimize
                },
                DictionaryKeys.FIT_RANGE_KEYWORD_ARGS:
                hyperparamethers_range_dictionary
            }

        ##########################################################################################################

        if recommender_class is MultiThreadSLIM_ElasticNet:
            hyperparamethers_range_dictionary = {}
            hyperparamethers_range_dictionary["topK"] = [
                3300, 4300, 5300, 6300, 7300
            ]
            hyperparamethers_range_dictionary["l1_penalty"] = [
                1e-5, 1e-6, 1e-4, 1e-3
            ]
            hyperparamethers_range_dictionary["l2_penalty"] = [1e-4]
            hyperparamethers_range_dictionary["alpha"] = range(0, 1)
            recommenderDictionary = {
                DictionaryKeys.CONSTRUCTOR_POSITIONAL_ARGS: [URM_train],
                DictionaryKeys.CONSTRUCTOR_KEYWORD_ARGS: {},
                DictionaryKeys.FIT_POSITIONAL_ARGS:
                dict(),
                DictionaryKeys.FIT_KEYWORD_ARGS:
                dict(),
                DictionaryKeys.FIT_RANGE_KEYWORD_ARGS:
                hyperparamethers_range_dictionary
            }

        #########################################################################################################

        ## Final step, after the hyperparameter range has been defined for each type of algorithm
        best_parameters = parameterSearch.search(
            recommenderDictionary,
            n_cases=n_cases,
            output_root_path=output_root_path_rec_name,
            metric=metric_to_optimize)

    except Exception as e:

        print("On recommender {} Exception {}".format(recommender_class,
                                                      str(e)))
        traceback.print_exc()

        error_file = open(output_root_path + "ErrorLog.txt", "a")
        error_file.write("On recommender {} Exception {}\n".format(
            recommender_class, str(e)))
        error_file.close()
def read_data_split_and_search():
    """
    This function provides a simple example on how to tune parameters of a given algorithm

    The BayesianSearch object will save:
        - A .txt file with all the cases explored and the recommendation quality
        - A _best_model file which contains the trained model and can be loaded with recommender.load_model()
        - A _best_parameter file which contains a dictionary with all the fit parameters, it can be passed to recommender.fit(**_best_parameter)
        - A _best_result_validation file which contains a dictionary with the results of the best solution on the validation
        - A _best_result_test file which contains a dictionary with the results, on the test set, of the best solution chosen using the validation set
    """

    seed = 1205
    parser = DataParser()

    URM_all = parser.get_URM_all()
    ICM_obj = parser.get_ICM_all()

    # SPLIT TO GET TEST PARTITION
    URM_train, URM_test = split_train_in_two_percentage_global_sample(
        URM_all, train_percentage=0.90, seed=seed)

    # SPLIT TO GET THE HYBRID VALID PARTITION
    URM_train, URM_valid_hybrid = split_train_in_two_percentage_global_sample(
        URM_train, train_percentage=0.85, seed=seed)

    URM_valid_hybrid = parser.filter_URM_test_by_range(URM_train,
                                                       URM_valid_hybrid,
                                                       (3, -1))

    collaborative_algorithm_list = [
        # EASE_R_Recommender
        # PipeHybrid001,
        # Random,
        # TopPop,
        # P3alphaRecommender,
        # RP3betaRecommender,
        # ItemKNNCFRecommender,
        # UserKNNCFRecommender,
        # MatrixFactorization_BPR_Cython,
        # MatrixFactorization_FunkSVD_Cython,
        # PureSVDRecommender,
        # NMFRecommender,
        # PureSVDItemRecommender
        # SLIM_BPR_Cython,
        # SLIMElasticNetRecommender
        # IALSRecommender
        # MF_MSE_PyTorch
        # MergedHybrid000
        # LinearHybrid002ggg
        HybridCombinationSearch
    ]

    content_algorithm_list = [
        # ItemKNNCBFRecommender
    ]

    from Base.Evaluation.Evaluator import EvaluatorHoldout

    evaluator_valid_hybrid = EvaluatorHoldout(URM_valid_hybrid,
                                              cutoff_list=[10])
    evaluator_test = EvaluatorHoldout(URM_test, cutoff_list=[10])
    """
    earlystopping_keywargs = {"validation_every_n": 5,
                              "stop_on_validation": True,
                              "evaluator_object": evaluator_valid_hybrid,
                              "lower_validations_allowed": 5,
                              "validation_metric": 'MAP',
                              }
    
    print('IALS training...')
    ials = IALSRecommender(URM_train, verbose=False)
    ials_params = {'num_factors': 83, 'confidence_scaling': 'linear', 'alpha': 28.4278070726612,
                   'epsilon': 1.0234211788885077, 'reg': 0.0027328110246575004, 'epochs': 20}
    ials.fit(**ials_params, **earlystopping_keywargs)
    print("Done")
    
    
    print("PureSVD training...")
    psvd = PureSVDRecommender(URM_train, verbose=False)
    psvd_params = {'num_factors': 711}
    psvd.fit(**psvd_params)
    print("Done")
    """
    print("Rp3beta training...")
    rp3b = RP3betaRecommender(URM_train, verbose=False)
    rp3b_params = {
        'topK': 753,
        'alpha': 0.3873710051288722,
        'beta': 0.0,
        'normalize_similarity': False
    }
    rp3b.fit(**rp3b_params)
    print("Done")
    print("P3alpha training...")
    p3a = P3alphaRecommender(URM_train, verbose=False)
    p3a_params = {
        'topK': 438,
        'alpha': 0.41923120471415165,
        'normalize_similarity': False
    }
    p3a.fit(**p3a_params)
    print("Done")
    print("ItemKnnCF training...")
    icf = ItemKNNCFRecommender(URM_train, verbose=False)
    icf_params = {
        'topK': 565,
        'shrink': 554,
        'similarity': 'tversky',
        'normalize': True,
        'tversky_alpha': 1.9109121434662428,
        'tversky_beta': 1.7823834698905734
    }
    icf.fit(**icf_params)
    print("Done")
    print("UserKnnCF training...")
    ucf = UserKNNCFRecommender(URM_train, verbose=False)
    ucf_params = {
        'topK': 190,
        'shrink': 0,
        'similarity': 'cosine',
        'normalize': True
    }
    ucf.fit(**ucf_params)
    print("Done")
    print("ItemKnnCBF training...")
    icb = ItemKNNCBFRecommender(URM_train, ICM_obj, verbose=False)
    icb_params = {
        'topK': 205,
        'shrink': 1000,
        'similarity': 'cosine',
        'normalize': True,
        'feature_weighting': 'BM25'
    }
    icb.fit(**icb_params)
    print("Done")
    """
    print("SlimElasticNet training...")
    sen = SLIMElasticNetRecommender(URM_train, verbose=False)
    sen_params = {'topK': 954, 'l1_ratio': 3.87446082207643e-05, 'alpha': 0.07562657698792305}
    sen.fit(**sen_params)
    print("Done")
    """

    list_recommender = [icb, icf, ucf, p3a, rp3b]
    list_already_seen = []

    for rec_perm in combinations(list_recommender, 3):

        if rec_perm not in combinations(list_already_seen, 3):

            recommender_names = '_'.join(
                [r.RECOMMENDER_NAME for r in rec_perm])
            output_folder_path = "result_experiments_v3/seed_" + str(
                seed) + '_3--1' + '/' + recommender_names + '/'

            # If directory does not exist, create
            if not os.path.exists(output_folder_path):
                os.makedirs(output_folder_path)

            # TODO: setta I GIUSTI EVALUATOR QUI!!!!
            runParameterSearch_Collaborative_partial = partial(
                runParameterSearch_Collaborative,
                URM_train=URM_train,
                ICM_train=ICM_obj,
                metric_to_optimize="MAP",
                n_cases=50,
                n_random_starts=20,
                evaluator_validation_earlystopping=evaluator_valid_hybrid,
                evaluator_validation=evaluator_valid_hybrid,
                evaluator_test=evaluator_test,
                output_folder_path=output_folder_path,
                allow_weighting=False,
                # similarity_type_list = ["cosine", 'jaccard'],
                parallelizeKNN=False,
                list_rec=rec_perm)
            pool = multiprocessing.Pool(processes=int(
                multiprocessing.cpu_count()),
                                        maxtasksperchild=1)
            pool.map(runParameterSearch_Collaborative_partial,
                     collaborative_algorithm_list)
    def __init__(self, urm_train, eurm=False):
        super(HybridGen2Recommender, self).__init__(urm_train)
        self.eurm = eurm
        self.data_folder = Path(__file__).parent.parent.absolute()

        self.num_users = urm_train.shape[0]

        urm_train = check_matrix(urm_train.copy(), 'csr')

        data = DataManager()

        if Path(self.data_folder /
                'Data/icm_sparse.npz').is_file() and self.eurm:
            print("Previous icm_sparse found")
        else:
            _, _, _, icm_all = data.get_icm()

            args = {
                'topK': 6,
                'shrink': 5,
                'feature_weighting': 'TF-IDF',
                'similarity': 'cosine',
                'normalize': False
            }

            recommender_1 = ItemKNNCBFRecommender(urm_train, icm_all)
            recommender_1.fit(**args)
            self.recommender_1 = recommender_1

        if Path(self.data_folder /
                'Data/ucm_sparse.npz').is_file() and self.eurm:
            print("Previous ucm_sparse found")
        else:
            ucm_age, ucm_region, ucm_all = data.get_ucm()
            recommender_2 = UserKNNCBFRecommender(urm_train, ucm_all)
            recommender_2.fit(shrink=1777,
                              topK=1998,
                              similarity='tversky',
                              feature_weighting='BM25',
                              tversky_alpha=0.1604953616,
                              tversky_beta=0.9862348646)
            self.recommender_2 = recommender_2

        if self.eurm:
            beta = 0.6
            if Path(self.data_folder / 'Data/icm_sparse.npz').is_file():
                self.score_matrix_1 = sps.load_npz(self.data_folder /
                                                   'Data/icm_sparse.npz')
            else:
                print("icm_sparse not found, create new one...")
                self.score_matrix_1 = self.recommender_1._compute_item_matrix_score(
                    np.arange(self.num_users))
                user_score_matrix_1 = normalize(self.score_matrix_1,
                                                norm='max',
                                                axis=1)
                item_score_matrix_1 = normalize(self.score_matrix_1.tocsc(),
                                                norm='max',
                                                axis=0)
                self.score_matrix_1 = item_score_matrix_1 * beta + user_score_matrix_1.tocsr(
                ) * (1 - beta)
                sps.save_npz(self.data_folder / 'Data/icm_sparse.npz',
                             self.score_matrix_1)

            if Path(self.data_folder / 'Data/ucm_sparse.npz').is_file():
                self.score_matrix_2 = sps.load_npz(self.data_folder /
                                                   'Data/ucm_sparse.npz')
            else:
                print("ucm_sparse not found, create new one...")
                self.score_matrix_2 = self.recommender_2._compute_item_matrix_score(
                    np.arange(self.num_users))
                user_score_matrix_2 = normalize(self.score_matrix_2,
                                                norm='max',
                                                axis=1)
                item_score_matrix_2 = normalize(self.score_matrix_2.tocsc(),
                                                norm='max',
                                                axis=0)
                self.score_matrix_2 = item_score_matrix_2 * beta + user_score_matrix_2.tocsr(
                ) * (1 - beta)
                sps.save_npz(self.data_folder / 'Data/ucm_sparse.npz',
                             self.score_matrix_2)
Example #20
0
           + "hybrid_opt"

output_root_path += filename
output_file = open(output_root_path, "a")

P3alpha = P3alphaRecommender(URM_train)
P3alpha.fit(topK=100, alpha=0.7905462550621185, implicit=True, normalize_similarity=True)
# print("-------------------")
# print("--P3alpha fitted---")
# print("-------------------")

UserBased = UserKNNCFRecommender(URM_train)
UserBased.fit(topK=300, shrink=200)

ContentBased = ItemKNNCBFRecommender(ICM, URM_train)
ContentBased.fit(topK=50, shrink=100)
# print("-------------------")
# print("--KNNCBF fitted---")
# print("-------------------")
ItemKNNCF = ItemKNNCFRecommender(URM_train)
ItemKNNCF.fit(topK=300, shrink=100)
# print("-------------------")
# print("---KNNCF fitted----")
# print("-------------------")
PureSVD = PureSVDRecommender(URM_train)
PureSVD.fit(num_factors=240)
# print("-------------------")
# print("---PureSVD fitted--")
# print("-------------------")

#hybridRecommender = ItemKNNSimilarityHybridRecommender(URM_train, ItemKNNCF.W_sparse, ContentBased.W_sparse)
Example #21
0
    def __init__(self,
                 URM_train,
                 ICM_train,
                 submission=False,
                 verbose=True,
                 seed=1205):
        super(LinearOverMerged001, self).__init__(URM_train, verbose=verbose)
        self.URM_train = URM_train
        self.ICM_train = ICM_train
        self.__submission = submission
        self.__rec1 = UserKNNCFRecommender(URM_train, verbose=False)
        self.__rec1_params = {
            'topK': 190,
            'shrink': 0,
            'similarity': 'cosine',
            'normalize': True
        }
        self.seed = seed

        icb = ItemKNNCBFRecommender(URM_train, ICM_train, verbose=False)
        icb_params = {
            'topK': 65,
            'shrink': 0,
            'similarity': 'dice',
            'normalize': True
        }
        rp3b = RP3betaRecommender(URM_train, verbose=False)
        rp3b_params = {
            'topK': 1000,
            'alpha': 0.38192761611274967,
            'beta': 0.0,
            'normalize_similarity': False
        }
        sen = SLIMElasticNetRecommender(URM_train, verbose=False)
        sen_params = {
            'topK': 992,
            'l1_ratio': 0.004065081925341167,
            'alpha': 0.003725005053334143
        }

        if not self.__submission:
            try:
                icb.load_model(
                    f'stored_recommenders/seed_{str(self.seed)}_{icb.RECOMMENDER_NAME}/',
                    f'best_for_{self.RECOMMENDER_NAME}')
                print(f"{icb.RECOMMENDER_NAME} loaded.")
            except:
                print(f"Fitting {icb.RECOMMENDER_NAME} ...")
                icb.fit(**icb_params)
                print(f"done.")
                icb.save_model(
                    f'stored_recommenders/seed_{str(self.seed)}_{icb.RECOMMENDER_NAME}/',
                    f'best_for_{self.RECOMMENDER_NAME}')
            try:
                rp3b.load_model(
                    f'stored_recommenders/seed_{str(self.seed)}_{rp3b.RECOMMENDER_NAME}/',
                    f'best_for_{self.RECOMMENDER_NAME}')
                print(f"{rp3b.RECOMMENDER_NAME} loaded.")
            except:
                print(f"Fitting {rp3b.RECOMMENDER_NAME} ...")
                rp3b.fit(**rp3b_params)
                print(f"done.")
                rp3b.save_model(
                    f'stored_recommenders/seed_{str(self.seed)}_{rp3b.RECOMMENDER_NAME}/',
                    f'best_for_{self.RECOMMENDER_NAME}')
            try:
                sen.load_model(
                    f'stored_recommenders/seed_{str(self.seed)}_{sen.RECOMMENDER_NAME}/',
                    f'best_for_{self.RECOMMENDER_NAME}')
                print(f"{sen.RECOMMENDER_NAME} loaded.")
            except:
                print(f"Fitting {sen.RECOMMENDER_NAME} ...")
                sen.fit(**sen_params)
                print(f"done.")
                sen.save_model(
                    f'stored_recommenders/seed_{str(self.seed)}_{sen.RECOMMENDER_NAME}/',
                    f'best_for_{self.RECOMMENDER_NAME}')
        else:
            icb.fit(**icb_params)
            rp3b.fit(**rp3b_params)
            sen.fit(**sen_params)

        self.__rec2 = HiddenMergedRecommender(URM_train,
                                              ICM_train, [icb, rp3b, sen],
                                              verbose=False)
        self.__rec2_params = {
            'alpha': 0.6355738550417837,
            'l1_ratio': 0.6617849709204384,
            'topK': 538
        }

        self.__a = self.__b = None
            SLIM = MultiThreadSLIM_ElasticNet(URM_train=URM_train)
            SLIM.fit(l1_penalty=1e-05,
                     l2_penalty=0,
                     positive_only=True,
                     topK=300,
                     alpha=4e-5)
            item = ItemKNNCFRecommender(URM_train)
            W_sparse_CF = item.fit(topK=800,
                                   shrink=8,
                                   similarity='cosine',
                                   normalize=True)
            CBAlb = ItemKNNCBFRecommender(ICM=ICM_Alb, URM_train=URM_train)
            CBAlb.fit(topK=10,
                      shrink=16,
                      similarity='cosine',
                      normalize=True,
                      feature_weighting="none")
            CBArt = ItemKNNCBFRecommender(ICM=ICM_Art, URM_train=URM_train)
            CBArt.fit(topK=12,
                      shrink=16,
                      similarity='cosine',
                      normalize=True,
                      feature_weighting="none")
            user = UserKNNCFRecommender(URM_train)
            user.fit(topK=300, shrink=2, similarity='cosine', normalize=True)
            p3 = RP3betaRecommender(URM_train=URM_train)
            p3.fit(alpha=0.94,
                   beta=0.3,
                   min_rating=0,
                   topK=300,
        rec2 = RP3betaRecommender(datas[j].urm_train)
        rec2.fit(topK=5000, alpha=0.35, beta=0.025, implicit=True)
        rec3 = ItemKNNCFRecommender(datas[j].urm_train)
        rec3.fit(topK=200,
                 shrink=1000,
                 similarity="jaccard",
                 feature_weighting="TF-IDF")
        rec4 = UserKNNCBFRecommender(datas[j].ucm_all, datas[j].urm_train)
        rec4.fit(topK=5000,
                 shrink=5,
                 feature_weighting="TF-IDF",
                 similarity="euclidean")
        rec5 = ItemKNNCBFRecommender(datas[j].urm_train,
                                     datas[j].icm_all_augmented)
        rec5.fit(topK=100,
                 shrink=1,
                 feature_weighting="TF-IDF",
                 similarity="cosine")
        base_recommenders.append([rec1, rec2, rec3, rec4, rec5])

        # TODO: Edit here
        # Insert the type of users to test the recommenders
        n, t_users, description = datas[j].urm_train_users_by_type[1]

        tested_users.append(t_users)

        # TODO: Edit here
        # This code auto assign the weights
        # It is possible to manually assign the weights
        # For example, we can assign the weights of a previously tuned recommender
        cached_hybrid_rec = Hybrid1CXAlphaRecommender(datas[j],
                                                      base_recommenders[j],