Ejemplo n.º 1
0
    def initialize_components(self):

        self.item_cosineCF_recommender = Cosine_Similarity(self.train,
                                                           topK=200,
                                                           shrink=15,
                                                           normalize=True,
                                                           mode='cosine')
        self.user_cosineCF_recommender = Cosine_Similarity(self.train.T,
                                                           topK=200,
                                                           shrink=15,
                                                           normalize=True,
                                                           mode='cosine')
        self.item_bpr_recommender = SLIM_BPR_Cython(self.train,
                                                    positive_threshold=0)
        self.svd_recommender = PureSVDRecommender(self.train)
        self.cbf_bpr_recommender = SLIM_BPR_Cython(self.icm.T,
                                                   positive_threshold=0)
        self.cbf_recommender = Cosine_Similarity(self.icm.T,
                                                 topK=50,
                                                 shrink=10,
                                                 normalize=True,
                                                 mode='cosine')
        if self.ensemble_weights["IALS"] == 0:
            self.ials_recommender = IALS_numpy(iters=0)
        else:
            self.ials_recommender = IALS_numpy()
Ejemplo n.º 2
0
    def initialize_components(self):

        self.train = self.rescale_wrt_insertion_order(self.train)

        self.item_cosineCF_recommender = Cosine_Similarity(self.train, topK=200, shrink=15, normalize=True, mode='cosine')
        self.user_cosineCF_recommender = Cosine_Similarity(self.train.T, topK=200, shrink=15, normalize=True, mode='cosine')
        self.svd_recommender = PureSVDRecommender(self.train)
        self.cbf_bpr_recommender = SLIM_BPR_Cython(self.icm.T, positive_threshold=0)
        self.cbf_recommender = Cosine_Similarity(self.icm.T, topK=50, shrink=10, normalize=True, mode='cosine')
        self.item_rp3b_recommender = RP3betaRecommender(self.train)
        self.user_rp3b_recommender = RP3betaRecommender(self.train.T)
        self.bpr_mf = BPR_matrix_factorization(factors=800, regularization=0.01, learning_rate=0.01, iterations=300)
        self.ials_cg_mf = IALS_CG(iterations=15, calculate_training_loss=True, factors=500, use_cg=True, regularization=1e-3)
        self.lightfm = LightFM_Recommender(self.train, self.icm, no_components=200)
Ejemplo n.º 3
0
# print("-------------------")

UserBased = UserKNNCFRecommender(URM_train)
UserBased.fit(topK=300, shrink=200)

ContentBased = ItemKNNCBFRecommender(ICM, URM_train)
ContentBased.fit(topK=50, shrink=100)
# print("-------------------")
# print("--KNNCBF fitted---")
# print("-------------------")
ItemKNNCF = ItemKNNCFRecommender(URM_train)
ItemKNNCF.fit(topK=300, shrink=100)
# print("-------------------")
# print("---KNNCF fitted----")
# print("-------------------")
PureSVD = PureSVDRecommender(URM_train)
PureSVD.fit(num_factors=240)
# print("-------------------")
# print("---PureSVD fitted--")
# print("-------------------")

#hybridRecommender = ItemKNNSimilarityHybridRecommender(URM_train, ItemKNNCF.W_sparse, ContentBased.W_sparse)
hybridRecommender_scores = ItemKNNScoresHybridRecommender_multiple(URM_train, ItemKNNCF, ContentBased, UserBased)
#hybridRecommender_scores = ItemKNNScoresHybridRecommender(URM_train, ItemKNNCF, PureSVD)

alpha_list = np.arange(0.1, 1.0, 0.1)
l1 = (alpha_list)
weight_list = list(itertools.product(l1, l1, l1))
only_sum_equal_1 = list(filter(lambda x: sum(list(x)) == 1, weight_list))
only_sum_equal_1 = list(map(lambda x : list(x), only_sum_equal_1))
Ejemplo n.º 4
0
    def __init__(self, urm, urm_t, icm, icm2, enable_dict, urm_test=None):
        self.urm = urm
        self.setEnables(enable_dict)

        if self.enableRP3B:
            self.rp3b = RP3betaRecommender(urm.getCSR())
            self.rp3b.fit(topK=100,
                          alpha=0.7,
                          beta=0.3,
                          normalize_similarity=True,
                          implicit=True)

        if self.enableSLIM:
            choice = 2
            logFile = open("SLIM_BPR_Cython.txt", "a")

            self.slim = SLIM_BPR_Cython(urm.getCSR(),
                                        recompile_cython=False,
                                        positive_threshold=0,
                                        URM_validation=urm_test.getCSR(),
                                        final_model_sparse_weights=True,
                                        train_with_sparse_weights=False)

            self.slim.fit(epochs=100,
                          validation_every_n=1,
                          logFile=logFile,
                          batch_size=5,
                          topK=200,
                          sgd_mode="adagrad",
                          learning_rate=0.075)

            self.slim_sim = self.slim.get_similarity()

        if self.enableP3A:
            self.p3a = P3alpha(urm.getCSR())
            self.p3a.fit(topK=80,
                         alpha=1,
                         min_rating=0,
                         implicit=True,
                         normalize_similarity=True)

        # if self.enableCBF2:
        #     print("starting CBF2")
        #     self.cbf2 = ContentBasedFiltering(icm2, urm, k=25, shrinkage=0)
        #     self.cbf2.fit()
        #     print("CBF2 finished")

        if self.enableLFM:
            # LightFM
            print("starting USER CF")
            self.lfm = LightFMRecommender()
            self.lfm.fit(urm, epochs=100)
            print("USER CF finished")

        if self.enableSVD:
            self.svd = PureSVDRecommender(urm.getCSR())
            self.svd.fit(num_factors=225)
            print("USER CF finished")

        # User based
        print("starting USER CF")
        self.cbu = CollaborativeFiltering()
        self.cbu.fit(urm_t, k=100, h=0, mode='user')
        print("USER CF finished")

        self.item_item = IIHybridRecommender(urm, icm, icm2)
        self.item_item.fit(item_weight=0.4, cbf1_weight=0.25, cbf2_weight=0.1)
Ejemplo n.º 5
0
class UserItemHybridRecommender():
    def __init__(self, urm, urm_t, icm, icm2, enable_dict, urm_test=None):
        self.urm = urm
        self.setEnables(enable_dict)

        if self.enableRP3B:
            self.rp3b = RP3betaRecommender(urm.getCSR())
            self.rp3b.fit(topK=100,
                          alpha=0.7,
                          beta=0.3,
                          normalize_similarity=True,
                          implicit=True)

        if self.enableSLIM:
            choice = 2
            logFile = open("SLIM_BPR_Cython.txt", "a")

            self.slim = SLIM_BPR_Cython(urm.getCSR(),
                                        recompile_cython=False,
                                        positive_threshold=0,
                                        URM_validation=urm_test.getCSR(),
                                        final_model_sparse_weights=True,
                                        train_with_sparse_weights=False)

            self.slim.fit(epochs=100,
                          validation_every_n=1,
                          logFile=logFile,
                          batch_size=5,
                          topK=200,
                          sgd_mode="adagrad",
                          learning_rate=0.075)

            self.slim_sim = self.slim.get_similarity()

        if self.enableP3A:
            self.p3a = P3alpha(urm.getCSR())
            self.p3a.fit(topK=80,
                         alpha=1,
                         min_rating=0,
                         implicit=True,
                         normalize_similarity=True)

        # if self.enableCBF2:
        #     print("starting CBF2")
        #     self.cbf2 = ContentBasedFiltering(icm2, urm, k=25, shrinkage=0)
        #     self.cbf2.fit()
        #     print("CBF2 finished")

        if self.enableLFM:
            # LightFM
            print("starting USER CF")
            self.lfm = LightFMRecommender()
            self.lfm.fit(urm, epochs=100)
            print("USER CF finished")

        if self.enableSVD:
            self.svd = PureSVDRecommender(urm.getCSR())
            self.svd.fit(num_factors=225)
            print("USER CF finished")

        # User based
        print("starting USER CF")
        self.cbu = CollaborativeFiltering()
        self.cbu.fit(urm_t, k=100, h=0, mode='user')
        print("USER CF finished")

        self.item_item = IIHybridRecommender(urm, icm, icm2)
        self.item_item.fit(item_weight=0.4, cbf1_weight=0.25, cbf2_weight=0.1)

        # # Item based
        # print("starting ITEM CF")
        # self.cbi = CollaborativeFiltering()
        # self.cbi.fit(urm, k=125, h=0, mode='item')
        # print("ITEM CF finished")
        #
        # # Content based artist
        # print("starting CBF")
        # self.cbf = ContentBasedFiltering(icm, urm, k=25, shrinkage=0)
        # self.cbf.fit()
        # print("CBF finished")

    def fit(self, weights_dict, method='rating_weight', norm='max'):

        self.svd_weight = weights_dict.get('svd_weight', 0)
        self.user_weight = weights_dict.get('user_weight', 0)
        self.item_weight = weights_dict.get('item_weight', 0)
        self.cbf_weight = weights_dict.get('cbf_weight', 0)
        self.cbf2_weight = weights_dict.get('cbf2_weight', 0)
        self.rp3b_weight = weights_dict.get('rp3b_weight', 0)
        self.slim_weight = weights_dict.get('slim_weight', 0)
        self.p3a_weight = weights_dict.get('p3a_weight', 0)
        self.lfm_weight = weights_dict.get('lfm_weight', 0)

        self.method = method
        self.norm = norm

    def s_recommend(self, user, nRec=10, switchTH="15"):

        if self.method == 'item_weight':
            extra = 1

            recommended_items_user = self.cbu.s_recommend(user, nRec + extra)
            recommended_items_item = self.cbi.s_recommend(user, nRec + extra)
            recommended_items_cbf = self.cbf.s_recommend(user, nRec + extra)

            weighting_dict = {
                'user': (recommended_items_user, self.user_weight),
                'item': (recommended_items_item, self.item_weight),
                'cbf': (recommended_items_cbf, self.cbf_weight)
            }

            if (self.enableCBF2):
                recommended_items_cbf2 = self.cbf2.s_recommend(
                    user, nRec + extra)
                weighting_dict['cbf2'] = (recommended_items_cbf2,
                                          self.cbf2_weight)

            if (self.enableLFM):
                recommended_items_lfm = self.lfm.s_recommend(
                    user, nRec + extra)
                weighting_dict['lfm'] = (recommended_items_lfm,
                                         self.lfm_weight)

            if (self.enableSVD):
                recommended_items_svd = self.svd.s_recommend(
                    user, nRec + extra)
                weighting_dict['svd'] = (recommended_items_svd,
                                         self.svd_weight)

            if (self.enableSLIM):
                recommended_items_slim = self.slim.s_recommend(
                    user, nRec + extra)
                weighting_dict['slim'] = (recommended_items_slim,
                                          self.slim_weight)

            if (self.enableP3A):
                recommended_items_p3a = self.p3a.s_recommend(
                    user, nRec + extra)
                weighting_dict['p3a'] = (recommended_items_p3a,
                                         self.p3a_weight)

            return self.item_weighter(weighting_dict, nRec, extra)

        elif self.method == 'rating_weight':

            norm_method = self.norm

            recommended_items_user = self.normalize_row(
                self.cbu.get_pred_row(user), method=norm_method)
            recommended_items_item = self.normalize_row(
                self.cbi.get_pred_row(user), method=norm_method)
            recommended_items_cbf = self.normalize_row(
                self.cbf.get_pred_row(user), method=norm_method)

            pred_row_sparse = recommended_items_user * self.user_weight + recommended_items_item * self.item_weight \
                              + recommended_items_cbf * self.cbf_weight

            if self.enableSLIM:
                recommended_items_slim = self.normalize_row(
                    self.getSlimRow(user), method=norm_method)
                pred_row_sparse = pred_row_sparse + self.slim_weight * recommended_items_slim

            if self.enableCBF2:
                recommended_items_cbf2 = self.normalize_row(
                    self.cbf2.get_pred_row(user), method=norm_method)
                pred_row_sparse = pred_row_sparse + self.cbf2_weight * recommended_items_cbf2

            if self.enableP3A:
                row = self.p3a.get_pred_row(user)
                pred_row_sparse = pred_row_sparse + self.p3a_weight * row

            if self.enableRP3B:
                row = self.rp3b.get_pred_row(user)
                pred_row_sparse = pred_row_sparse + self.rp3b_weight * row

            pred_row = np.array(pred_row_sparse.todense()).squeeze()

            if self.enableLFM:
                recommended_items_lfm = self.normalize_row(
                    self.lfm.get_pred_row(user), method=norm_method)
                pred_row = pred_row + self.lfm_weight * recommended_items_lfm

            if self.enableSVD:
                recommended_items_svd = self.normalize_row(
                    self.svd.get_pred_row(user), method=norm_method)
                pred_row = pred_row + self.svd_weight * recommended_items_svd

            ranking = np.argsort(-pred_row)
            recommended_items = self._filter_seen(user, ranking)

            return recommended_items[0:nRec]

        elif self.method == "hybrid":

            norm_method = 'max'
            extra = 1

            recommended_items_user = self.normalize_row(
                self.cbu.get_pred_row(user), method=norm_method)
            recommended_items_item = self.normalize_row(
                self.cbi.get_pred_row(user), method=norm_method)
            recommended_items_cbf = self.normalize_row(
                self.cbf.get_pred_row(user), method=norm_method)

            recommended_items_cbf2 = None
            if (self.enableCBF2):
                recommended_items_cbf2 = self.normalize_row(
                    self.cbf2.get_pred_row(user), method=norm_method)

            recommended_items_rp3b = None
            if (self.enableRP3B):
                recommended_items_rp3b = self.normalize_row(
                    self.rp3b.get_pred_row(user), method=norm_method)

            recommended_items_slim = None
            if (self.enableSLIM):
                recommended_items_slim = self.normalize_row(
                    self.getSlimRow(user), method=norm_method)

            weighting_dict = {}

            return self.item_weighter(weighting_dict, nRec, extra)

        elif self.method == 'switch':

            if len(self.urm.extractTracksFromPlaylist(user)) < switchTH:
                # enough recommendations, use user
                return self.cbu.s_recommend(user, nRec=nRec)
            else:
                # not enough recommendations, use item
                return self.cbi.s_recommend(user, nRec=nRec)

        else:
            raise ValueError('Not a valid hybrid method')

    def m_recommend(self, user_ids, nRec=10):
        results = []
        for uid in user_ids:
            results.append(self.s_recommend(uid, nRec))
        return results

    def item_weighter(self, tupleDict, nRec, extra):

        # initialize a dict with recommended items as keys and value zero
        result = {}
        for tuple in tupleDict.values():

            items = tuple[0]

            for i in range(nRec + extra):
                result[str(items[i])] = 0

        # assign a score based on position

        for tuple in tupleDict.values():

            items = tuple[0]
            weight = tuple[1]

            for i in range(nRec + extra):
                result[str(items[i])] += (nRec + extra - i) * weight

        # sort the dict
        sorted_results = sorted(result.items(), key=itemgetter(1))
        rec_items = [x[0] for x in sorted_results]

        # flip to order by decreasing order
        rec_items = rec_items[::-1]

        # return only the topN recommendations
        return np.array(rec_items[0:nRec]).astype(int)

    def predWeightRatingRows(self, user, nRec, recommended_items_user,
                             recommended_items_item, recommended_items_cbf,
                             recommended_items_cbf2, recommended_items_rp3b,
                             recommended_items_slim):


        pred_row_sparse = recommended_items_user * self.user_weight + recommended_items_item * self.item_weight \
                    + recommended_items_cbf * self.cbf_weight

        if self.enableSLIM and self.method != "hybrid":
            pred_row_sparse = pred_row_sparse + self.slim_weight * recommended_items_slim

        if self.enableCBF2:
            pred_row_sparse = pred_row_sparse + self.cbf2_weight * recommended_items_cbf2

        # needs to be before rp3b because rp3b output is dense
        pred_row = np.array(pred_row_sparse.todense()).squeeze()

        if self.enableRP3B:
            pred_row = pred_row + self.rp3b_weight * recommended_items_rp3b

        ranking = np.argsort(-pred_row)
        recommended_items = self._filter_seen(user, ranking)

        return recommended_items[0:nRec]

    def _filter_seen(self, user_id, ranking):
        seen = self.urm.extractTracksFromPlaylist(user_id)
        unseen_mask = np.in1d(ranking, seen, assume_unique=True, invert=True)
        return ranking[unseen_mask]

    def getSlimRow(self, user):
        return self.urm.getCSR().getrow(user) * self.slim_sim

    def setEnables(self, enable_dict):
        self.enableSVD = enable_dict.get('enableSVD')
        self.enableRP3B = enable_dict.get('enableRP3B')
        self.enableSLIM = enable_dict.get('enableSLIM')
        self.enableCBF2 = enable_dict.get('enableCBF2')
        self.enableP3A = enable_dict.get('enableP3A')
        self.enableLFM = enable_dict.get('enableLFM')

    def normalize_row(self, recommended_items, method):
        if method == 'max':
            norm_factor = recommended_items.max()
            if norm_factor == 0: norm_factor = 1
            return recommended_items / norm_factor

        elif method == 'sum':
            norm_factor = recommended_items.sum()
            if norm_factor == 0: norm_factor = 1
            return recommended_items / norm_factor

        elif method == 'l1':

            return normalize(recommended_items, norm='l1')

        elif method == 'l2':
            return normalize(recommended_items, norm='l2')
        else:
            raise ValueError('Not a valid normalization method')
Ejemplo n.º 6
0
class NapoEnsemble:
    def __init__(self, urm_train, urm_test, icm, parameters=None):

        if parameters is None:
            parameters = {
                "USER_CF": 0.8,
                "SVD": 0.7,
                "ITEM_CF": 1,
                "ITEM_BPR": 0.8,
                "CBF": 0.3,
                "IALS": 1.0,
                "CBF_BPR": 1
            }

        self.ensemble_weights = parameters
        self.train = urm_train.tocsr()
        self.test = urm_test.tocsr()
        self.icm = icm.tocsr()

        self.initialize_components()

    def initialize_components(self):

        self.item_cosineCF_recommender = Cosine_Similarity(self.train,
                                                           topK=200,
                                                           shrink=15,
                                                           normalize=True,
                                                           mode='cosine')
        self.user_cosineCF_recommender = Cosine_Similarity(self.train.T,
                                                           topK=200,
                                                           shrink=15,
                                                           normalize=True,
                                                           mode='cosine')
        self.item_bpr_recommender = SLIM_BPR_Cython(self.train,
                                                    positive_threshold=0)
        self.svd_recommender = PureSVDRecommender(self.train)
        self.cbf_bpr_recommender = SLIM_BPR_Cython(self.icm.T,
                                                   positive_threshold=0)
        self.cbf_recommender = Cosine_Similarity(self.icm.T,
                                                 topK=50,
                                                 shrink=10,
                                                 normalize=True,
                                                 mode='cosine')
        if self.ensemble_weights["IALS"] == 0:
            self.ials_recommender = IALS_numpy(iters=0)
        else:
            self.ials_recommender = IALS_numpy()

    def fit(self):

        self.item_bpr_w = self.item_bpr_recommender.fit(epochs=10,
                                                        topK=200,
                                                        batch_size=200,
                                                        sgd_mode='adagrad',
                                                        learning_rate=1e-2)
        self.svd_latent_x, self.svd_latent_y = self.svd_recommender.fit(
            num_factors=500)
        self.cbf_bpr_w = self.cbf_bpr_recommender.fit(epochs=10,
                                                      topK=200,
                                                      batch_size=200,
                                                      sgd_mode='adagrad',
                                                      learning_rate=1e-2)
        self.item_cosineCF_w = self.item_cosineCF_recommender.compute_similarity(
        )
        self.user_cosineCF_w = self.user_cosineCF_recommender.compute_similarity(
        )
        self.cbf_w = self.cbf_recommender.compute_similarity()
        self.ials_latent_x, self.ials_latent_y = self.ials_recommender.fit(
            R=self.train)
        self.min_ials = np.dot(self.ials_latent_x, self.ials_latent_y.T).min()
        self.min_svd = np.dot(self.svd_latent_x, self.svd_latent_y).min()

    def recommend(self, user_id, combiner, at=10):
        user_profile = self.train[user_id, :]

        item_bpr_r = user_profile.dot(self.item_bpr_w).toarray().ravel()
        svd_r = self.svd_latent_x[user_id, :].dot(self.svd_latent_y)
        item_cosineCF_r = user_profile.dot(
            self.item_cosineCF_w).toarray().ravel()
        user_cosineCF_r = self.user_cosineCF_w[user_id].dot(
            self.train).toarray().ravel()
        cbf_r = user_profile.dot(self.cbf_w).toarray().ravel()
        cbf_bpr_r = user_profile.dot(self.cbf_bpr_w).toarray().ravel()
        ials_r = np.dot(self.ials_latent_x[user_id],
                        self.ials_latent_y.T + self.min_ials).ravel()

        scores = [
            [item_bpr_r, self.ensemble_weights["ITEM_BPR"], "ITEM_BPR"],
            [svd_r, self.ensemble_weights["SVD"], "SVD"],
            [item_cosineCF_r, self.ensemble_weights["ITEM_CF"], "ITEM_CF"],
            [user_cosineCF_r, self.ensemble_weights["USER_CF"], "USER_CF"],
            [ials_r, self.ensemble_weights["IALS"], "IALS"],
            [cbf_r, self.ensemble_weights["CBF"], "CBF"],
            [cbf_bpr_r, self.ensemble_weights["CBF_BPR"], "CBF_BPR"]
        ]

        for r in scores:
            self.filter_seen(user_id, r[0])

        return combiner.combine(scores, at)

    def filter_seen(self, user_id, scores):

        start_pos = int(self.train.indptr[user_id])
        end_pos = int(self.train.indptr[user_id + 1])

        user_profile = self.train.indices[start_pos:end_pos]

        scores[user_profile] = -np.inf

        return scores

    def recommend_batch(self, user_list, combiner, at=10):
        res = np.array([])
        n = 0
        for i in user_list:
            recList = self.recommend(i, combiner, at).T
            tuple = np.concatenate(([i], recList))
            if (res.size == 0):
                res = tuple
            else:
                res = np.vstack([res, tuple])
        return res

    def get_component_data(self):
        item_cf_rating = self.ensemble_weights["ITEM_CF"] * self.train.dot(
            self.item_cosineCF_w)

        item_cf = {
            "min": item_cf_rating.min(),
            "max": item_cf_rating.max(),
            "mean": item_cf_rating.mean(),
        }
        del item_cf_rating

        user_cf_rating = self.ensemble_weights[
            "USER_CF"] * self.user_cosineCF_w.dot(self.train)

        user_cf = {
            "min": user_cf_rating.min(),
            "max": user_cf_rating.max(),
            "mean": user_cf_rating.mean(),
        }
        del user_cf_rating
        svd_ratings = self.ensemble_weights["SVD"] * (
            np.dot(self.svd_latent_x, self.svd_latent_y) + self.min_svd)

        user_bpr = {
            "min": svd_ratings.min(),
            "max": svd_ratings.max(),
            "mean": svd_ratings.mean(),
        }
        del svd_ratings
        item_bpr_rating = self.ensemble_weights["ITEM_BPR"] * self.train.dot(
            self.item_bpr_w)
        item_bpr = {
            "min": item_bpr_rating.min(),
            "max": item_bpr_rating.max(),
            "mean": item_bpr_rating.mean(),
        }
        del item_bpr_rating
        ials_rating = self.ensemble_weights["IALS"] * (
            np.dot(self.ials_latent_x, self.ials_latent_y.T) + self.min_ials)

        ials = {
            "min": ials_rating.min(),
            "max": ials_rating.max(),
            "mean": np.mean(ials_rating),
        }
        del ials_rating
        cbf_rating = self.ensemble_weights["CBF"] * self.train.dot(self.cbf_w)
        cbf = {
            "min": cbf_rating.min(),
            "max": cbf_rating.max(),
            "mean": cbf_rating.mean(),
        }
        del cbf_rating
        cbf_bpr_rating = self.ensemble_weights["CBF_BPR"] * self.train.dot(
            self.cbf_bpr_w)
        cbf_bpr = {
            "min": cbf_bpr_rating.min(),
            "max": cbf_bpr_rating.max(),
            "mean": cbf_bpr_rating.mean(),
        }
        del cbf_bpr_rating
        return {
            "ITEM_CF": item_cf,
            "SVD": user_cf,
            "ITEM_BPR": item_bpr,
            "USER_BPR": user_bpr,
            "IALS": ials,
            "CBF": cbf,
            "CBF_BPR": cbf_bpr
        }
Ejemplo n.º 7
0
    xgb = XGBoostRecommender(urm, urm_t, icm, icm2, enable_dict, urm_test)
    #xgb.fit(urm, epochs=100)

    if submission:
        recommended_items = xgb.m_recommend(targetList, nRec=10)
        generate_output(targetList, recommended_items)
    else:
        cumulative_precision, cumulative_recall, cumulative_MAP = evaluate_algorithm(
            urm_test, xgb)
        print(
            "Recommender, performance is: Precision = {:.4f}, Recall = {:.4f}, MAP = {:.6f}"
            .format(cumulative_precision, cumulative_recall, cumulative_MAP))

elif htype == "psvd":

    psvd = PureSVDRecommender(urm.getCSR())
    psvd.fit(num_factors=225, n_iters=10)
    if submission:
        recommended_items = psvd.m_recommend(targetList, nRec=10)
        generate_output(targetList, recommended_items)
    else:
        cumulative_precision, cumulative_recall, cumulative_MAP = evaluate_algorithm(
            urm_test, psvd)
        print(
            "Recommender, performance is: Precision = {:.4f}, Recall = {:.4f}, MAP = {:.6f}"
            .format(cumulative_precision, cumulative_recall, cumulative_MAP))

elif htype == "bprmf":
    mf = BPRMF(num_factors=50, lrate=0.01, iters=10)
    mf.fit(urm.getCSR())
    if submission:
Ejemplo n.º 8
0
class BMussoliniEnsemble:

    def __init__(self, urm_train, urm_test, icm, parameters=None):

        if parameters is None:
            parameters = {
                "USER_CF" : 7,
                "SVD" : 26,
                "ITEM_CF" : 0,
                "ITEM_BPR" : 16,
                "CBF" : 7,
                "IALS" : 26,
                "CBF_BPR" : 64,
                "BPR_MF": 6,
                "ITEM_RP3B": 16,
                "USER_RP3B": 0,
                "FM": 10
            }

        self.ensemble_weights = parameters
        self.train = urm_train.tocsr()
        self.test = urm_test.tocsr()
        self.icm = icm.tocsr()
        self.sequential_playlists = None
        self.sequential_playlists = load_sequential.load_train_sequential()
        self.initialize_components()


    def initialize_components(self):

        self.train = self.rescale_wrt_insertion_order(self.train)

        self.item_cosineCF_recommender = Cosine_Similarity(self.train, topK=200, shrink=15, normalize=True, mode='cosine')
        self.user_cosineCF_recommender = Cosine_Similarity(self.train.T, topK=200, shrink=15, normalize=True, mode='cosine')
        self.svd_recommender = PureSVDRecommender(self.train)
        self.cbf_bpr_recommender = SLIM_BPR_Cython(self.icm.T, positive_threshold=0)
        self.cbf_recommender = Cosine_Similarity(self.icm.T, topK=50, shrink=10, normalize=True, mode='cosine')
        self.item_rp3b_recommender = RP3betaRecommender(self.train)
        self.user_rp3b_recommender = RP3betaRecommender(self.train.T)
        self.bpr_mf = BPR_matrix_factorization(factors=800, regularization=0.01, learning_rate=0.01, iterations=300)
        self.ials_cg_mf = IALS_CG(iterations=15, calculate_training_loss=True, factors=500, use_cg=True, regularization=1e-3)
        self.lightfm = LightFM_Recommender(self.train, self.icm, no_components=200)

    def fit(self):

        self.svd_latent_x, self.svd_latent_y = self.svd_recommender.fit(num_factors=500)
        self.min_svd = np.dot(self.svd_latent_x, self.svd_latent_y).min()
        self.cbf_bpr_w = self.cbf_bpr_recommender.fit(epochs=10, topK=200, batch_size=20, sgd_mode='adagrad', learning_rate=1e-2)
        self.item_cosineCF_w = self.item_cosineCF_recommender.compute_similarity()
        self.user_cosineCF_w = self.user_cosineCF_recommender.compute_similarity()
        self.cbf_w = self.cbf_recommender.compute_similarity()
        self.item_rp3b_w = self.item_rp3b_recommender.fit()
        self.user_rp3b_w = self.user_rp3b_recommender.fit()
        self.ials_cg_mf.fit(40*self.train.T)
        self.ials_latent_x = self.ials_cg_mf.user_factors.copy()
        self.ials_latent_y = self.ials_cg_mf.item_factors.copy()
        self.min_ials = np.dot(self.ials_latent_x, self.ials_latent_y.T).min()
        self.bpr_mf.fit(self.train.T.tocoo())
        self.bpr_mf_latent_x = self.bpr_mf.user_factors.copy()
        self.bpr_mf_latent_y = self.bpr_mf.item_factors.copy()
        self.lightfm.fit(100)


    def recommend(self, user_id, combiner, at=10):
        user_profile = self.train[user_id, :]

        svd_r = self.svd_latent_x[user_id, :].dot(self.svd_latent_y)
        item_cosineCF_r = user_profile.dot(self.item_cosineCF_w).toarray().ravel()
        user_cosineCF_r = self.user_cosineCF_w[user_id].dot(self.train).toarray().ravel()
        cbf_r = user_profile.dot(self.cbf_w).toarray().ravel()
        cbf_bpr_r = user_profile.dot(self.cbf_bpr_w).toarray().ravel()
        ials_r = np.dot(self.ials_latent_x[user_id], self.ials_latent_y.T + self.min_ials).ravel()
        bpr_mf_r = np.dot(self.bpr_mf_latent_x[user_id], self.bpr_mf_latent_y.T).ravel()
        item_rp3b_r = user_profile.dot(self.item_rp3b_w).toarray().ravel()
        user_rp3b_r = self.user_rp3b_w[user_id].dot(self.train).toarray().ravel()
        lightfm_r = self.lightfm.scores(user_id)

        scores = [
            # [item_bpr_r, self.ensemble_weights["ITEM_BPR"], "ITEM_BPR" ],
            # [user_bpr_r, self.ensemble_weights["USER_BPR"], "USER_BPR" ],
            [svd_r, self.ensemble_weights["SVD"], "SVD"],
            [item_cosineCF_r, self.ensemble_weights["ITEM_CF"], "ITEM_CF" ],
            [user_cosineCF_r, self.ensemble_weights["USER_CF"], "USER_CF" ],
            [ials_r, self.ensemble_weights["IALS"], "IALS" ],
            [cbf_r, self.ensemble_weights["CBF"], "CBF" ],
            [cbf_bpr_r, self.ensemble_weights["CBF_BPR"], "CBF_BPR"],
            [bpr_mf_r, self.ensemble_weights["BPR_MF"], "BPR_MF"],
            [item_rp3b_r, self.ensemble_weights["ITEM_RP3B"], "ITEM_RP3B"],
            [user_rp3b_r, self.ensemble_weights["USER_RP3B"], "USER_RP3B"],
            [lightfm_r, self.ensemble_weights["FM"], "FM"]
            ]

        for r in scores:
            self.filter_seen(user_id, r[0])

        R = combiner.combine(scores, at)
        return R

    def rescale_wrt_insertion_order(self, R):
        R = R.copy()
        R = R.tolil()
        R = R*0.8
        for i in self.sequential_playlists:
            pl = i["id"]
            k = 1
            for j in i["songs"]:
                factor = 1/(k**POPULARITY_SCALING_EXP)
                R[pl, j] = factor*(R[pl,j] + 0.2)
                k += 1
        R = R.tocsr()
        return R
    def filter_seen(self, user_id, scores):

        start_pos = int(self.train.indptr[user_id])
        end_pos = int(self.train.indptr[user_id + 1])

        user_profile = self.train.indices[start_pos:end_pos]

        scores[user_profile] = -1000000 #-np.inf
        return scores

    def recommend_batch(self, user_list, combiner, at=10):
        res = np.array([])
        n=0
        for i in user_list:
            recList = self.recommend(i, combiner, at).T
            tuple = np.concatenate(([i], recList))
            if (res.size == 0):
                res = tuple
            else:
                res = np.vstack([res, tuple])
        return res

    def get_component_data(self):
        item_cf_rating = self.ensemble_weights["ITEM_CF"]*self.train.dot(self.item_cosineCF_w)

        item_cf = {

                "min" : item_cf_rating.min(),
                "max" : item_cf_rating.max(),
                "mean" : item_cf_rating.mean(),

            }
        del item_cf_rating

        user_cf_rating = self.ensemble_weights["USER_CF"]*self.user_cosineCF_w.dot(self.train)

        user_cf = {
                "min": user_cf_rating.min(),
                "max": user_cf_rating.max(),
                "mean": user_cf_rating.mean(),
            }
        del user_cf_rating
        ials_rating =  self.ensemble_weights["IALS"]*(np.dot(self.ials_latent_x, self.ials_latent_y.T)+self.min_ials)

        ials = {

                "min": ials_rating.min(),
                "max": ials_rating.max(),
                "mean": np.mean(ials_rating),
            }
        del ials_rating
        cbf_rating = self.ensemble_weights["CBF"]*self.train.dot(self.cbf_w)
        cbf = {

                "min": cbf_rating.min(),
                "max": cbf_rating.max(),
                "mean": cbf_rating.mean(),
            }
        del cbf_rating
        cbf_bpr_rating = self.ensemble_weights["CBF_BPR"]*self.train.dot(self.cbf_bpr_w)
        cbf_bpr = {

                "min": cbf_bpr_rating.min(),
                "max": cbf_bpr_rating.max(),
                "mean": cbf_bpr_rating.mean(),
            }
        del cbf_bpr_rating
        svd_ratings = self.ensemble_weights["SVD"] * (np.dot(self.svd_latent_x, self.svd_latent_y) + self.min_svd)

        svd = {

            "min": svd_ratings.min(),
            "max": svd_ratings.max(),
            "mean": svd_ratings.mean(),
        }
        del svd_ratings


        return {
            "ITEM_CF" : item_cf,
            "USER_CF": user_cf ,
            "SVD" : svd ,
            "IALS" : ials,
            "CBF" : cbf,
            "CBF_BPR" : cbf_bpr
        }