np.random.seed() # Train user_item_train, user_item_test = train_test_split(csr_df_matrix, train_percentage=train_percent) bpr = BayesianPersonalizedRanking(iterations=train_interactions) bpr.fit(user_item_train.T.tocsr()) print(user_item_train[user_id]) interacted_ids = user_item_train[user_id].nonzero()[1] index2item = index2item.astype('int32') interacted_items = [item_mapping[index2item[index]] for index in interacted_ids if index2item[index] in item_mapping.keys()] # it returns the recommended index and their corresponding score reco = bpr.recommend(user_id, user_item_train, N=topn) print(reco) # map the index to Item reco_items = [item_mapping[index2item[index]] for index, _ in reco if index2item[index] in item_mapping.keys()] print(reco_items) # we can check that the probability does in fact add up to 1 interacted_distr = compute_genre_distr(interacted_items) reco_distr = compute_genre_distr(reco_items[:topn]) # ================================ Testing and showing Graphics ======================== # change default style figure and font size
bpr_related_subreddits('dogs') #%% users = data['user'].cat.categories.array.to_numpy() #%% write_bpr_recommendations = False #%% user_comments = comments.T.tocsr() if write_bpr_recommendations: # generate recommendations for each user and write out to a file with tqdm.tqdm_notebook(total=len(users)) as progress: with codecs.open(output_filename, "w", "utf8") as o: for userid, username in enumerate(users): for subredditid, score in model.recommend( userid, user_comments): o.write("%s\t%s\t%s\n" % (username, subreddits[subredditid], score)) progress.update(1) #%% [markdown] # ### Sample user recommendations # # We went through the user 'xkcd_transciber' list of subreddits, where he/she commented. Taking a view of the kind of subreddits followed by the user we see that the predictions are good. This is just one sample, we are saving the recommendations for all users in a file and will also write the AUC score function for getting the exact scores for the generated recommendations. #%% def recommend_for_user(username): sample_user_id = np.where(users == username)[0][0] return [(subreddits[i], v) for i, v in model.recommend(2293528, user_comments)]
class HHimmlerEnsemble: def __init__(self, urm_train, urm_test, icm, parameters=None): if parameters is None: parameters = { "USER_CF": 0.8, "USER_BPR": 0.7, "ITEM_CF": 1, "ITEM_BPR": 0.8, "CBF": 0.3, "IALS": 1.0, "CBF_BPR": 1 } self.ensemble_weights = parameters self.train = urm_train.tocsr() self.test = urm_test.tocsr() self.icm = icm.tocsr() self.initialize_components() def initialize_components(self): self.bpr_mf = BPR_matrix_factorization(factors=200, regularization=0.00000, learning_rate=0.01, iterations=65) self.ials_cg_mf = IALS_CG(iterations=15, calculate_training_loss=True, factors=500, use_cg=True, regularization=1e-3) def fit(self): self.bpr_mf.fit(self.train.T.tocoo()) self.ials_cg_mf.fit(40 * self.train.T) self.bpr_mf_latent_x = self.bpr_mf.user_factors.copy() self.bpr_mf_latent_y = self.bpr_mf.item_factors.copy() self.ials_cg_mf_latent_x = self.ials_cg_mf.user_factors.copy() self.ials_cg_mf_latent_y = self.ials_cg_mf.item_factors.copy() def recommend(self, user_id, combiner, at=10): bpr_mf_r = np.dot(self.bpr_mf_latent_x[user_id], self.bpr_mf_latent_y.T).ravel() ials_cg_mf_r = np.dot(self.ials_cg_mf_latent_x[user_id], self.ials_cg_mf_latent_y.T).ravel() scores = [ # [bpr_mf_r, self.ensemble_weights["BPR_MF"], "BPR_MF"], [ials_cg_mf_r, 1, "IALS_CG"] ] for r in scores: self.filter_seen(user_id, r[0]) return combiner.combine(scores, at) def filter_seen(self, user_id, scores): start_pos = int(self.train.indptr[user_id]) end_pos = int(self.train.indptr[user_id + 1]) user_profile = self.train.indices[start_pos:end_pos] scores[user_profile] = -1000000 #-np.inf return scores def recommend_batch(self, user_list, combiner, at=10): res = np.array([]) n = 0 for i in user_list: bpr = self.bpr_mf.recommend(user_items=self.train, userid=i, N=at, recalculate_user=False) ials = self.ials_cg_mf.recommend(userid=i, user_items=self.train, N=10) list = [x[0] for x in ials] recList = np.array(list) tuple = np.concatenate(([i], recList)) if (res.size == 0): res = tuple else: res = np.vstack([res, tuple]) return res def get_component_data(self): print('cyka')