Ejemplo n.º 1
0
    def test_explain(self):
        counts = csr_matrix(
            [
                [1, 1, 0, 1, 0, 0],
                [0, 1, 1, 1, 0, 0],
                [1, 4, 1, 0, 7, 0],
                [1, 1, 0, 0, 0, 0],
                [9, 0, 4, 1, 0, 1],
                [0, 1, 0, 0, 0, 1],
                [0, 0, 2, 0, 1, 1],
            ],
            dtype=np.float64,
        )
        user_items = counts * 2
        item_users = user_items.T

        model = AlternatingLeastSquares(
            factors=4,
            regularization=20,
            use_native=False,
            use_cg=False,
            use_gpu=False,
            iterations=100,
            random_state=23,
        )
        model.fit(user_items, show_progress=False)

        userid = 0

        # Assert recommendation is the the same if we recompute user vectors
        recs = model.recommend(userid, item_users, N=10)
        recalculated_recs = model.recommend(userid, item_users, N=10, recalculate_user=True)
        for (item1, score1), (item2, score2) in zip(recs, recalculated_recs):
            self.assertEqual(item1, item2)
            self.assertAlmostEqual(score1, score2, 4)

        # Assert explanation makes sense
        top_rec, score = recalculated_recs[0]
        score_explained, contributions, W = model.explain(userid, item_users, itemid=top_rec)
        scores = [s for _, s in contributions]
        items = [i for i, _ in contributions]
        self.assertAlmostEqual(score, score_explained, 4)
        self.assertAlmostEqual(score, sum(scores), 4)
        self.assertEqual(scores, sorted(scores, reverse=True), "Scores not in order")
        self.assertEqual([0, 2, 3, 4], sorted(items), "Items not seen by user")

        # Assert explanation with precomputed user weights is correct
        top_score_explained, top_contributions, W = model.explain(
            userid, item_users, itemid=top_rec, user_weights=W, N=2
        )
        top_scores = [s for _, s in top_contributions]
        top_items = [i for i, _ in top_contributions]
        self.assertEqual(2, len(top_contributions))
        self.assertAlmostEqual(score, top_score_explained, 4)
        self.assertEqual(scores[:2], top_scores)
        self.assertEqual(items[:2], top_items)
Ejemplo n.º 2
0
    def test_explain(self):
        counts = csr_matrix([[1, 1, 0, 1, 0, 0],
                             [0, 1, 1, 1, 0, 0],
                             [1, 4, 1, 0, 7, 0],
                             [1, 1, 0, 0, 0, 0],
                             [9, 0, 4, 1, 0, 1],
                             [0, 1, 0, 0, 0, 1],
                             [0, 0, 2, 0, 1, 1]], dtype=np.float64)
        user_items = counts * 2
        item_users = user_items.T

        model = AlternatingLeastSquares(factors=4,
                                        regularization=20,
                                        use_native=False,
                                        use_cg=False,
                                        iterations=100)
        np.random.seed(23)
        model.fit(user_items, show_progress=False)

        userid = 0

        # Assert recommendation is the the same if we recompute user vectors
        recs = model.recommend(userid, item_users, N=10)
        recalculated_recs = model.recommend(userid, item_users, N=10, recalculate_user=True)
        for (item1, score1), (item2, score2) in zip(recs, recalculated_recs):
            self.assertEqual(item1, item2)
            self.assertAlmostEqual(score1, score2, 4)

        # Assert explanation makes sense
        top_rec, score = recalculated_recs[0]
        score_explained, contributions, W = model.explain(userid, item_users, itemid=top_rec)
        scores = [s for _, s in contributions]
        items = [i for i, _ in contributions]
        self.assertAlmostEqual(score, score_explained, 4)
        self.assertAlmostEqual(score, sum(scores), 4)
        self.assertEqual(scores, sorted(scores, reverse=True), "Scores not in order")
        self.assertEqual([0, 2, 3, 4], sorted(items), "Items not seen by user")

        # Assert explanation with precomputed user weights is correct
        top_score_explained, top_contributions, W = model.explain(
            userid, item_users, itemid=top_rec, user_weights=W, N=2)
        top_scores = [s for _, s in top_contributions]
        top_items = [i for i, _ in top_contributions]
        self.assertEqual(2, len(top_contributions))
        self.assertAlmostEqual(score, top_score_explained, 4)
        self.assertEqual(scores[:2], top_scores)
        self.assertEqual(items[:2], top_items)
Ejemplo n.º 3
0
class Recommender:
    def __init__(self, factors=50):
        self.model = AlternatingLeastSquares(factors=factors,
                                             regularization=0.01,
                                             dtype=np.float64,
                                             iterations=50)

    def train(self, data):
        userids = data.userid.astype("category")
        itemids = data.itemid.astype("category")

        matrix = coo_matrix((data.confidence.astype('float64'),
                             (itemids.cat.codes.copy(),
                              userids.cat.codes.copy())))
        self.model.fit(matrix)
        self.t_matrix = matrix.T.tocsr()
        self.userid_to_code = dict([(category, code)
                                    for code, category in enumerate(userids.cat.categories)])
        self.itemid_to_code = dict([(category, code)
                                    for code, category in enumerate(itemids.cat.categories)])
        self.usercode_to_id = dict([(code, category)
                                    for code, category in enumerate(userids.cat.categories)])
        self.itemcode_to_id = dict([(code, category)
                                    for code, category in enumerate(itemids.cat.categories)])

    def similar_items(self, itemid, N=10):
        item_code = self.itemid_to_code[itemid]
        similar_codes = self.model.similar_items(item_code, N)
        similar_ids = [(self.itemcode_to_id[code], s)
                       for code, s in similar_codes]
        return pd.DataFrame(similar_ids, columns=["itemid", "similarity"])

    def recommendations(self, userid, N=10):
        user_code = self.userid_to_code[userid]
        user_item_codes = self.model.recommend(user_code, self.t_matrix, N)
        user_item_ids = [(self.itemcode_to_id[code], c)
                         for code, c in user_item_codes]
        return pd.DataFrame(user_item_ids, columns=["itemid", "confidence"])

    def explain(self, userid, itemid):
        user_code = self.userid_to_code[userid]
        item_code = self.itemid_to_code[itemid]
        return self.model.explain(user_code, self.t_matrix, item_code)

    def confidence(self, userid, itemid):
        item_code = self.itemid_to_code[itemid]
        user_code = self.userid_to_code[userid]
        item_factor = self.model.item_factors[item_code]
        user_factor = self.model.user_factors[user_code]
        return item_factor.dot(user_factor)

    def user_factors(self):
        factors = pd.DataFrame(self.model.user_factors).add_prefix("f")
        ids = factors.index.map(lambda code: self.usercode_to_id[code])
        factors.insert(0, "userid", ids)
        return factors

    def item_factors(self):
        factors = pd.DataFrame(self.model.item_factors).add_prefix("f")
        ids = factors.index.map(lambda code: self.itemcode_to_id[code])
        factors.insert(0, "itemid", ids)
        return factors

    def items_recommendations(self, itemids, N=10):
        user_code = 0
        item_codes = [self.itemid_to_code[id] for id in itemids]

        data = [1 for _ in item_codes]
        rows = [0 for _ in item_codes]
        shape = (1, self.model.item_factors.shape[0])
        user_items = coo_matrix(
            (data, (rows, item_codes)), shape=shape).tocsr()

        user_item_codes = self.model.recommend(
            user_code, user_items, N, recalculate_user=True)
        user_item_ids = [(self.itemcode_to_id[code], c)
                         for code, c in user_item_codes]
        return pd.DataFrame(user_item_ids, columns=["itemid", "confidence"])