def test_explain(self): counts = csr_matrix( [ [1, 1, 0, 1, 0, 0], [0, 1, 1, 1, 0, 0], [1, 4, 1, 0, 7, 0], [1, 1, 0, 0, 0, 0], [9, 0, 4, 1, 0, 1], [0, 1, 0, 0, 0, 1], [0, 0, 2, 0, 1, 1], ], dtype=np.float64, ) user_items = counts * 2 item_users = user_items.T model = AlternatingLeastSquares( factors=4, regularization=20, use_native=False, use_cg=False, use_gpu=False, iterations=100, random_state=23, ) model.fit(user_items, show_progress=False) userid = 0 # Assert recommendation is the the same if we recompute user vectors recs = model.recommend(userid, item_users, N=10) recalculated_recs = model.recommend(userid, item_users, N=10, recalculate_user=True) for (item1, score1), (item2, score2) in zip(recs, recalculated_recs): self.assertEqual(item1, item2) self.assertAlmostEqual(score1, score2, 4) # Assert explanation makes sense top_rec, score = recalculated_recs[0] score_explained, contributions, W = model.explain(userid, item_users, itemid=top_rec) scores = [s for _, s in contributions] items = [i for i, _ in contributions] self.assertAlmostEqual(score, score_explained, 4) self.assertAlmostEqual(score, sum(scores), 4) self.assertEqual(scores, sorted(scores, reverse=True), "Scores not in order") self.assertEqual([0, 2, 3, 4], sorted(items), "Items not seen by user") # Assert explanation with precomputed user weights is correct top_score_explained, top_contributions, W = model.explain( userid, item_users, itemid=top_rec, user_weights=W, N=2 ) top_scores = [s for _, s in top_contributions] top_items = [i for i, _ in top_contributions] self.assertEqual(2, len(top_contributions)) self.assertAlmostEqual(score, top_score_explained, 4) self.assertEqual(scores[:2], top_scores) self.assertEqual(items[:2], top_items)
def test_explain(self): counts = csr_matrix([[1, 1, 0, 1, 0, 0], [0, 1, 1, 1, 0, 0], [1, 4, 1, 0, 7, 0], [1, 1, 0, 0, 0, 0], [9, 0, 4, 1, 0, 1], [0, 1, 0, 0, 0, 1], [0, 0, 2, 0, 1, 1]], dtype=np.float64) user_items = counts * 2 item_users = user_items.T model = AlternatingLeastSquares(factors=4, regularization=20, use_native=False, use_cg=False, iterations=100) np.random.seed(23) model.fit(user_items, show_progress=False) userid = 0 # Assert recommendation is the the same if we recompute user vectors recs = model.recommend(userid, item_users, N=10) recalculated_recs = model.recommend(userid, item_users, N=10, recalculate_user=True) for (item1, score1), (item2, score2) in zip(recs, recalculated_recs): self.assertEqual(item1, item2) self.assertAlmostEqual(score1, score2, 4) # Assert explanation makes sense top_rec, score = recalculated_recs[0] score_explained, contributions, W = model.explain(userid, item_users, itemid=top_rec) scores = [s for _, s in contributions] items = [i for i, _ in contributions] self.assertAlmostEqual(score, score_explained, 4) self.assertAlmostEqual(score, sum(scores), 4) self.assertEqual(scores, sorted(scores, reverse=True), "Scores not in order") self.assertEqual([0, 2, 3, 4], sorted(items), "Items not seen by user") # Assert explanation with precomputed user weights is correct top_score_explained, top_contributions, W = model.explain( userid, item_users, itemid=top_rec, user_weights=W, N=2) top_scores = [s for _, s in top_contributions] top_items = [i for i, _ in top_contributions] self.assertEqual(2, len(top_contributions)) self.assertAlmostEqual(score, top_score_explained, 4) self.assertEqual(scores[:2], top_scores) self.assertEqual(items[:2], top_items)
class Recommender: def __init__(self, factors=50): self.model = AlternatingLeastSquares(factors=factors, regularization=0.01, dtype=np.float64, iterations=50) def train(self, data): userids = data.userid.astype("category") itemids = data.itemid.astype("category") matrix = coo_matrix((data.confidence.astype('float64'), (itemids.cat.codes.copy(), userids.cat.codes.copy()))) self.model.fit(matrix) self.t_matrix = matrix.T.tocsr() self.userid_to_code = dict([(category, code) for code, category in enumerate(userids.cat.categories)]) self.itemid_to_code = dict([(category, code) for code, category in enumerate(itemids.cat.categories)]) self.usercode_to_id = dict([(code, category) for code, category in enumerate(userids.cat.categories)]) self.itemcode_to_id = dict([(code, category) for code, category in enumerate(itemids.cat.categories)]) def similar_items(self, itemid, N=10): item_code = self.itemid_to_code[itemid] similar_codes = self.model.similar_items(item_code, N) similar_ids = [(self.itemcode_to_id[code], s) for code, s in similar_codes] return pd.DataFrame(similar_ids, columns=["itemid", "similarity"]) def recommendations(self, userid, N=10): user_code = self.userid_to_code[userid] user_item_codes = self.model.recommend(user_code, self.t_matrix, N) user_item_ids = [(self.itemcode_to_id[code], c) for code, c in user_item_codes] return pd.DataFrame(user_item_ids, columns=["itemid", "confidence"]) def explain(self, userid, itemid): user_code = self.userid_to_code[userid] item_code = self.itemid_to_code[itemid] return self.model.explain(user_code, self.t_matrix, item_code) def confidence(self, userid, itemid): item_code = self.itemid_to_code[itemid] user_code = self.userid_to_code[userid] item_factor = self.model.item_factors[item_code] user_factor = self.model.user_factors[user_code] return item_factor.dot(user_factor) def user_factors(self): factors = pd.DataFrame(self.model.user_factors).add_prefix("f") ids = factors.index.map(lambda code: self.usercode_to_id[code]) factors.insert(0, "userid", ids) return factors def item_factors(self): factors = pd.DataFrame(self.model.item_factors).add_prefix("f") ids = factors.index.map(lambda code: self.itemcode_to_id[code]) factors.insert(0, "itemid", ids) return factors def items_recommendations(self, itemids, N=10): user_code = 0 item_codes = [self.itemid_to_code[id] for id in itemids] data = [1 for _ in item_codes] rows = [0 for _ in item_codes] shape = (1, self.model.item_factors.shape[0]) user_items = coo_matrix( (data, (rows, item_codes)), shape=shape).tocsr() user_item_codes = self.model.recommend( user_code, user_items, N, recalculate_user=True) user_item_ids = [(self.itemcode_to_id[code], c) for code, c in user_item_codes] return pd.DataFrame(user_item_ids, columns=["itemid", "confidence"])