Beispiel #1
0
np.random.seed()

# Train
user_item_train, user_item_test = train_test_split(csr_df_matrix, train_percentage=train_percent)
bpr = BayesianPersonalizedRanking(iterations=train_interactions)
bpr.fit(user_item_train.T.tocsr())

print(user_item_train[user_id])
interacted_ids = user_item_train[user_id].nonzero()[1]
index2item = index2item.astype('int32')

interacted_items = [item_mapping[index2item[index]] for index in interacted_ids if
                    index2item[index] in item_mapping.keys()]

# it returns the recommended index and their corresponding score
reco = bpr.recommend(user_id, user_item_train, N=topn)
print(reco)

# map the index to Item
reco_items = [item_mapping[index2item[index]] for index, _ in reco if index2item[index] in item_mapping.keys()]
print(reco_items)


# we can check that the probability does in fact add up to 1
interacted_distr = compute_genre_distr(interacted_items)
reco_distr = compute_genre_distr(reco_items[:topn])



# ================================ Testing and showing Graphics ========================
# change default style figure and font size
Beispiel #2
0
bpr_related_subreddits('dogs')

#%%
users = data['user'].cat.categories.array.to_numpy()

#%%
write_bpr_recommendations = False

#%%
user_comments = comments.T.tocsr()
if write_bpr_recommendations:
    # generate recommendations for each user and write out to a file
    with tqdm.tqdm_notebook(total=len(users)) as progress:
        with codecs.open(output_filename, "w", "utf8") as o:
            for userid, username in enumerate(users):
                for subredditid, score in model.recommend(
                        userid, user_comments):
                    o.write("%s\t%s\t%s\n" %
                            (username, subreddits[subredditid], score))
                progress.update(1)

#%% [markdown]
# ### Sample user recommendations
#
# We went through the user 'xkcd_transciber' list of subreddits, where he/she commented. Taking a view of the kind of subreddits followed by the user we see that the predictions are good. This is just one sample, we are saving the recommendations for all users in a file and will also write the AUC score function for getting the exact scores for the generated recommendations.


#%%
def recommend_for_user(username):
    sample_user_id = np.where(users == username)[0][0]
    return [(subreddits[i], v)
            for i, v in model.recommend(2293528, user_comments)]
class HHimmlerEnsemble:
    def __init__(self, urm_train, urm_test, icm, parameters=None):

        if parameters is None:
            parameters = {
                "USER_CF": 0.8,
                "USER_BPR": 0.7,
                "ITEM_CF": 1,
                "ITEM_BPR": 0.8,
                "CBF": 0.3,
                "IALS": 1.0,
                "CBF_BPR": 1
            }

        self.ensemble_weights = parameters
        self.train = urm_train.tocsr()
        self.test = urm_test.tocsr()
        self.icm = icm.tocsr()

        self.initialize_components()

    def initialize_components(self):
        self.bpr_mf = BPR_matrix_factorization(factors=200,
                                               regularization=0.00000,
                                               learning_rate=0.01,
                                               iterations=65)
        self.ials_cg_mf = IALS_CG(iterations=15,
                                  calculate_training_loss=True,
                                  factors=500,
                                  use_cg=True,
                                  regularization=1e-3)

    def fit(self):
        self.bpr_mf.fit(self.train.T.tocoo())
        self.ials_cg_mf.fit(40 * self.train.T)
        self.bpr_mf_latent_x = self.bpr_mf.user_factors.copy()
        self.bpr_mf_latent_y = self.bpr_mf.item_factors.copy()
        self.ials_cg_mf_latent_x = self.ials_cg_mf.user_factors.copy()
        self.ials_cg_mf_latent_y = self.ials_cg_mf.item_factors.copy()

    def recommend(self, user_id, combiner, at=10):
        bpr_mf_r = np.dot(self.bpr_mf_latent_x[user_id],
                          self.bpr_mf_latent_y.T).ravel()
        ials_cg_mf_r = np.dot(self.ials_cg_mf_latent_x[user_id],
                              self.ials_cg_mf_latent_y.T).ravel()

        scores = [
            # [bpr_mf_r, self.ensemble_weights["BPR_MF"], "BPR_MF"],
            [ials_cg_mf_r, 1, "IALS_CG"]
        ]

        for r in scores:
            self.filter_seen(user_id, r[0])

        return combiner.combine(scores, at)

    def filter_seen(self, user_id, scores):

        start_pos = int(self.train.indptr[user_id])
        end_pos = int(self.train.indptr[user_id + 1])

        user_profile = self.train.indices[start_pos:end_pos]

        scores[user_profile] = -1000000  #-np.inf
        return scores

    def recommend_batch(self, user_list, combiner, at=10):
        res = np.array([])
        n = 0
        for i in user_list:
            bpr = self.bpr_mf.recommend(user_items=self.train,
                                        userid=i,
                                        N=at,
                                        recalculate_user=False)
            ials = self.ials_cg_mf.recommend(userid=i,
                                             user_items=self.train,
                                             N=10)
            list = [x[0] for x in ials]
            recList = np.array(list)
            tuple = np.concatenate(([i], recList))
            if (res.size == 0):
                res = tuple
            else:
                res = np.vstack([res, tuple])
        return res

    def get_component_data(self):
        print('cyka')