Ejemplo n.º 1
0
    def test_cosine_similarity_dense_row_weighted(self):

        from Base.Cython.cosine_similarity import Cosine_Similarity as Cosine_Similarity_Cython
        from Base.cosine_similarity_parallel import Cosine_Similarity_Parallel as Cosine_Similarity_Parallel

        TopK = 0

        data_matrix = np.array([[1, 2, 0, 1], [0, 1, 4, 1], [3, 0, 1, 0]])
        data_matrix = sps.csr_matrix(data_matrix, dtype=np.float)

        row_weights = [2, 3, 0, 4]

        cosine_similarity = Cosine_Similarity_Cython(data_matrix.T, topK=TopK, normalize=False, row_weights=row_weights)
        W_dense_Cython = cosine_similarity.compute_similarity()

        cosine_similarity = Compute_Similarity_Python(data_matrix.T, topK=TopK, normalize=False,
                                                      row_weights=row_weights)
        W_dense_Python = cosine_similarity.compute_similarity()

        cosine_similarity = Cosine_Similarity_Parallel(data_matrix.T, topK=TopK, normalize=False,
                                                       row_weights=row_weights)
        W_dense_Parallel = cosine_similarity.compute_similarity()

        W_dense_mul = data_matrix.dot(sps.diags(row_weights)).dot(data_matrix.T).toarray()
        W_dense_mul[np.arange(W_dense_mul.shape[0]), np.arange(W_dense_mul.shape[0])] = 0.0

        assert np.allclose(W_dense_Cython, W_dense_mul, atol=1e-4), "W_dense_Cython not matching control"
        assert np.allclose(W_dense_Python, W_dense_mul, atol=1e-4), "W_dense_Python not matching control"
        assert np.allclose(W_dense_Parallel, W_dense_mul, atol=1e-4), "W_dense_Parallel not matching control"
Ejemplo n.º 2
0
    def test_cosine_similarity_dense(self):

        from Base.Cython.cosine_similarity import Cosine_Similarity as Cosine_Similarity_Cython
        from Base.cosine_similarity_parallel import Cosine_Similarity_Parallel as Cosine_Similarity_Parallel

        TopK = 0

        data_matrix = np.array([[1, 1, 0, 1], [0, 1, 1, 1], [1, 0, 1, 0]])
        data_matrix = sps.csr_matrix(data_matrix)

        cosine_similarity = Cosine_Similarity_Cython(data_matrix,
                                                     topK=TopK,
                                                     normalize=False)
        W_dense_Cython = cosine_similarity.compute_similarity()

        cosine_similarity = Compute_Similarity_Python(data_matrix,
                                                      topK=TopK,
                                                      normalize=False)
        W_dense_Python = cosine_similarity.compute_similarity()

        cosine_similarity = Cosine_Similarity_Parallel(data_matrix,
                                                       topK=TopK,
                                                       normalize=False)
        W_dense_Parallel = cosine_similarity.compute_similarity()

        W_dense_mul = data_matrix.T.dot(data_matrix)
        W_dense_mul[np.arange(W_dense_mul.shape[0]),
                    np.arange(W_dense_mul.shape[0])] = 0.0

        assert np.all(W_dense_Cython ==
                      W_dense_mul), "W_dense_Cython not matching control"
        assert np.all(W_dense_Python ==
                      W_dense_mul), "W_dense_Python not matching control"
        assert np.all(W_dense_Parallel ==
                      W_dense_mul), "W_dense_Parallel not matching control"
Ejemplo n.º 3
0
class ItemKNNCFRecommender(Recommender, Similarity_Matrix_Recommender):
    """ ItemKNN recommender"""
    def __init__(self, URM_train, sparse_weights=True):
        super(ItemKNNCFRecommender, self).__init__()

        # CSR is faster during evaluation
        self.URM_train = check_matrix(URM_train, 'csr')

        self.dataset = None

        self.sparse_weights = sparse_weights

    def fit(self, k=50, shrink=100, similarity='cosine', normalize=True):

        self.k = k
        self.shrink = shrink

        self.similarity = Cosine_Similarity(self.URM_train,
                                            shrink=shrink,
                                            topK=k,
                                            normalize=normalize,
                                            mode=similarity)

        if self.sparse_weights:
            self.W_sparse = self.similarity.compute_similarity()
        else:
            self.W = self.similarity.compute_similarity()
            self.W = self.W.toarray()
Ejemplo n.º 4
0
    def test_cosine_similarity_TopK_big(self):

        from Base.Cython.cosine_similarity import Cosine_Similarity as Cosine_Similarity_Cython
        from Base.cosine_similarity import Compute_Similarity as Cosine_Similarity_Python
        from Base.cosine_similarity_parallel import Cosine_Similarity_Parallel as Cosine_Similarity_Parallel

        n_items = 500
        n_users = 1000
        TopK = n_items

        data_matrix = sps.random(n_users, n_items, density=0.1)

        cosine_similarity = Cosine_Similarity_Cython(data_matrix, topK=TopK, normalize=False)
        W_dense_Cython = cosine_similarity.compute_similarity().toarray()

        cosine_similarity = Cosine_Similarity_Python(data_matrix, topK=TopK, normalize=False)
        W_dense_Python = cosine_similarity.compute_similarity().toarray()

        cosine_similarity = Cosine_Similarity_Parallel(data_matrix, topK=TopK, normalize=False)
        W_dense_Parallel = cosine_similarity.compute_similarity().toarray()

        W_dense_mul = data_matrix.T.dot(data_matrix)
        W_dense_mul[np.arange(W_dense_mul.shape[0]), np.arange(W_dense_mul.shape[0])] = 0.0

        W_dense_mul = similarityMatrixTopK(W_dense_mul, k=TopK).toarray()

        assert np.allclose(W_dense_Cython, W_dense_mul, atol=1e-4), "W_sparse_Cython not matching control"
        assert np.allclose(W_dense_Python, W_dense_mul, atol=1e-4), "W_dense_Python not matching control"
        assert np.allclose(W_dense_Parallel, W_dense_mul, atol=1e-4), "W_dense_Parallel not matching control"
Ejemplo n.º 5
0
    def test_cosine_similarity_TopK(self):

        from Base.Cython.cosine_similarity import Cosine_Similarity as Cosine_Similarity_Cython
        from Base.cosine_similarity import Compute_Similarity as Cosine_Similarity_Python
        from Base.cosine_similarity_parallel import Cosine_Similarity_Parallel as Cosine_Similarity_Parallel

        TopK = 4

        data_matrix = np.array([[1, 1, 0, 1], [0, 1, 1, 1], [1, 0, 1, 0]])
        data_matrix = sps.csr_matrix(data_matrix)

        cosine_similarity = Cosine_Similarity_Cython(data_matrix, topK=TopK, normalize=False)
        W_dense_Cython = cosine_similarity.compute_similarity().toarray()

        cosine_similarity = Cosine_Similarity_Python(data_matrix, topK=TopK, normalize=False)
        W_dense_Python = cosine_similarity.compute_similarity().toarray()

        cosine_similarity = Cosine_Similarity_Parallel(data_matrix, topK=TopK, normalize=False)
        W_dense_Parallel = cosine_similarity.compute_similarity().toarray()

        W_dense_mul = data_matrix.T.dot(data_matrix)
        W_dense_mul[np.arange(W_dense_mul.shape[0]), np.arange(W_dense_mul.shape[0])] = 0.0

        W_dense_mul = similarityMatrixTopK(W_dense_mul, k=TopK).toarray()

        assert np.allclose(W_dense_Cython, W_dense_mul, atol=1e-4), "W_sparse_Cython not matching control"
        assert np.allclose(W_dense_Python, W_dense_mul, atol=1e-4), "W_dense_Python not matching control"
        assert np.allclose(W_dense_Parallel, W_dense_mul, atol=1e-4), "W_dense_Parallel not matching control"
Ejemplo n.º 6
0
    def test_cosine_similarity_dense_normalize(self):

        from Base.Cython.cosine_similarity import Cosine_Similarity as Cosine_Similarity_Cython
        from Base.cosine_similarity import Compute_Similarity as Cosine_Similarity_Python
        from Base.cosine_similarity_parallel import Cosine_Similarity_Parallel as Cosine_Similarity_Parallel

        import numpy.matlib

        TopK = 0
        shrink = 5

        data_matrix = np.array([[1, 1, 0, 1], [0, 1, 1, 1], [1, 0, 1, 0]])
        data_matrix = sps.csr_matrix(data_matrix)

        cosine_similarity = Cosine_Similarity_Cython(data_matrix, topK=TopK, normalize=True, shrink=shrink)
        W_dense_Cython = cosine_similarity.compute_similarity()

        cosine_similarity = Cosine_Similarity_Python(data_matrix, topK=TopK, normalize=True, shrink=shrink)
        W_dense_Python = cosine_similarity.compute_similarity()

        cosine_similarity = Cosine_Similarity_Parallel(data_matrix, topK=TopK, normalize=True, shrink=shrink)
        W_dense_Parallel = cosine_similarity.compute_similarity()

        W_dense_denominator = np.matlib.repmat(data_matrix.power(2).sum(axis=0), data_matrix.shape[1], 1)
        W_dense_denominator = np.sqrt(W_dense_denominator)
        W_dense_denominator = np.multiply(W_dense_denominator, W_dense_denominator.T) + shrink

        W_dense_mul = data_matrix.T.dot(data_matrix)
        W_dense_mul /= W_dense_denominator

        W_dense_mul[np.arange(W_dense_mul.shape[0]), np.arange(W_dense_mul.shape[0])] = 0.0

        assert np.allclose(W_dense_Cython, W_dense_mul, atol=1e-4), "W_dense_Cython not matching control"
        assert np.allclose(W_dense_Python, W_dense_mul, atol=1e-4), "W_dense_Python not matching control"
        assert np.allclose(W_dense_Parallel, W_dense_mul, atol=1e-4), "W_dense_Parallel not matching control"
Ejemplo n.º 7
0
    def test_cosine_similarity_dense_jaccard(self):

        from Base.Cython.cosine_similarity import Cosine_Similarity as Cosine_Similarity_Cython
        from Base.cosine_similarity import Compute_Similarity as Cosine_Similarity_Python
        from Base.cosine_similarity_parallel import Cosine_Similarity_Parallel as Cosine_Similarity_Parallel

        import numpy.matlib

        TopK = 0
        shrink = 0

        data_matrix = np.array([[1, 2, 0, 1], [0, 1, 4, 1], [1, 3, 1, 0]])
        data_matrix = sps.csr_matrix(data_matrix)

        cosine_similarity = Cosine_Similarity_Cython(data_matrix,
                                                     topK=TopK,
                                                     normalize=True,
                                                     shrink=shrink,
                                                     mode='jaccard')
        W_dense_Cython = cosine_similarity.compute_similarity()

        cosine_similarity = Cosine_Similarity_Python(data_matrix,
                                                     topK=TopK,
                                                     normalize=True,
                                                     shrink=shrink,
                                                     mode='jaccard')
        W_dense_Python = cosine_similarity.compute_similarity()

        cosine_similarity = Cosine_Similarity_Parallel(data_matrix,
                                                       topK=TopK,
                                                       normalize=True,
                                                       shrink=shrink,
                                                       mode='jaccard')
        W_dense_Parallel = cosine_similarity.compute_similarity()

        data_matrix.data = np.ones_like(data_matrix.data)
        data_matrix = data_matrix.toarray().astype(np.float64)

        W_dense_mul = data_matrix.T.dot(data_matrix)

        W_dense_denominator = np.matlib.repmat((data_matrix**2).sum(axis=0),
                                               data_matrix.shape[1], 1)
        W_dense_denominator = W_dense_denominator + W_dense_denominator.T - W_dense_mul + shrink

        W_dense_mul[W_dense_denominator > 0] /= W_dense_denominator[
            W_dense_denominator > 0]

        W_dense_mul[np.arange(W_dense_mul.shape[0]),
                    np.arange(W_dense_mul.shape[0])] = 0.0

        assert np.allclose(W_dense_Cython, W_dense_mul,
                           atol=1e-4), "W_dense_Cython not matching control"
        assert np.allclose(W_dense_Python, W_dense_mul,
                           atol=1e-4), "W_dense_Python not matching control"
        assert np.allclose(W_dense_Parallel, W_dense_mul,
                           atol=1e-4), "W_dense_Parallel not matching control"
Ejemplo n.º 8
0
    def test_cosine_similarity_dense_pearson(self):

        from Base.Cython.cosine_similarity import Cosine_Similarity as Cosine_Similarity_Cython
        from Base.cosine_similarity import Cosine_Similarity as Cosine_Similarity_Python

        import numpy.matlib

        TopK = 0
        shrink = 0

        data_matrix = np.array([[1, 2, 0, 1], [0, 1, 4, 1], [1, 3, 1, 0]])
        data_matrix = sps.csr_matrix(data_matrix)

        cosine_similarity = Cosine_Similarity_Cython(data_matrix,
                                                     topK=TopK,
                                                     normalize=True,
                                                     shrink=shrink,
                                                     mode='pearson')
        W_dense_Cython = cosine_similarity.compute_similarity()

        cosine_similarity = Cosine_Similarity_Python(data_matrix,
                                                     topK=TopK,
                                                     normalize=True,
                                                     shrink=shrink,
                                                     mode='pearson')
        W_dense_Python = cosine_similarity.compute_similarity()

        data_matrix = data_matrix.toarray().astype(np.float64)
        for col in range(data_matrix.shape[1]):

            nonzeroMask = data_matrix[:, col] > 0
            data_matrix[:, col][nonzeroMask] -= np.mean(
                data_matrix[:, col][nonzeroMask])

        W_dense_denominator = np.matlib.repmat((data_matrix**2).sum(axis=0),
                                               data_matrix.shape[1], 1)
        W_dense_denominator = np.sqrt(W_dense_denominator)
        W_dense_denominator = np.multiply(W_dense_denominator,
                                          W_dense_denominator.T) + shrink

        W_dense_mul = data_matrix.T.dot(data_matrix)
        W_dense_mul[W_dense_denominator > 0] /= W_dense_denominator[
            W_dense_denominator > 0]

        W_dense_mul[np.arange(W_dense_mul.shape[0]),
                    np.arange(W_dense_mul.shape[0])] = 0.0

        assert np.allclose(W_dense_Cython, W_dense_mul,
                           atol=1e-4), "W_dense_Cython not matching control"
        assert np.allclose(W_dense_Python, W_dense_mul,
                           atol=1e-4), "W_dense_Python not matching control"
Ejemplo n.º 9
0
    def test_cosine_similarity_dense_external_cfr(self):

        from Base.Cython.cosine_similarity import Cosine_Similarity as Cosine_Similarity_Cython
        from Base.cosine_similarity_parallel import Cosine_Similarity_Parallel as Cosine_Similarity_Parallel
        from sklearn.metrics.pairwise import cosine_similarity as Cosine_Similarity_Sklearn

        from scipy.spatial.distance import jaccard as Jaccard_Distance_Scipy

        TopK = 0
        shrink = 0

        data_matrix = np.array([[1, 2, 0, 1], [0, 1, 4, 1], [1, 3, 1, 0]])
        data_matrix = sps.csr_matrix(data_matrix)

        cosine_similarity = Cosine_Similarity_Cython(data_matrix,
                                                     topK=TopK,
                                                     normalize=True,
                                                     shrink=shrink)
        W_dense_Cython = cosine_similarity.compute_similarity()

        cosine_similarity = Compute_Similarity_Python(data_matrix,
                                                      topK=TopK,
                                                      normalize=True,
                                                      shrink=shrink)
        W_dense_Python = cosine_similarity.compute_similarity()

        cosine_similarity = Cosine_Similarity_Parallel(data_matrix,
                                                       topK=TopK,
                                                       normalize=True,
                                                       shrink=shrink)
        W_dense_Parallel = cosine_similarity.compute_similarity()

        W_dense_sklearn = Cosine_Similarity_Sklearn(data_matrix.copy().T)
        W_dense_sklearn[np.arange(W_dense_sklearn.shape[0]),
                        np.arange(W_dense_sklearn.shape[0])] = 0.0

        assert np.allclose(
            W_dense_Cython, W_dense_sklearn,
            atol=1e-4), "W_dense_Cython Cosine not matching Sklearn control"
        assert np.allclose(
            W_dense_Python, W_dense_sklearn,
            atol=1e-4), "W_dense_Python Cosine not matching Sklearn control"
        assert np.allclose(
            W_dense_Parallel, W_dense_sklearn,
            atol=1e-4), "W_dense_Parallel Cosine not matching Sklearn control"

        data_matrix = np.array([[1, 2, 0, 1], [0, 1, 4, 1], [1, 3, 1, 0]])
        data_matrix = sps.csr_matrix(data_matrix)

        cosine_similarity = Cosine_Similarity_Cython(data_matrix,
                                                     topK=TopK,
                                                     normalize=True,
                                                     shrink=shrink,
                                                     mode='jaccard')
        W_dense_Cython = cosine_similarity.compute_similarity()

        cosine_similarity = Compute_Similarity_Python(data_matrix,
                                                      topK=TopK,
                                                      normalize=True,
                                                      shrink=shrink,
                                                      mode='jaccard')
        W_dense_Python = cosine_similarity.compute_similarity()

        cosine_similarity = Cosine_Similarity_Parallel(data_matrix,
                                                       topK=TopK,
                                                       normalize=True,
                                                       shrink=shrink,
                                                       mode='jaccard')
        W_dense_Parallel = cosine_similarity.compute_similarity()

        W_dense_Scipy = np.zeros_like(W_dense_Python)
        data_matrix.data = np.ones_like(data_matrix.data)
        data_matrix = data_matrix.toarray()

        for row in range(W_dense_Scipy.shape[0]):
            for col in range(W_dense_Scipy.shape[1]):

                if row != col:
                    W_dense_Scipy[row, col] = 1 - Jaccard_Distance_Scipy(
                        data_matrix[:, row], data_matrix[:, col])

        assert np.allclose(
            W_dense_Cython, W_dense_Scipy,
            atol=1e-4), "W_dense_Cython Jaccard not matching Scipy control"
        assert np.allclose(
            W_dense_Python, W_dense_Scipy,
            atol=1e-4), "W_dense_Python Jaccard not matching Scipy control"
        assert np.allclose(
            W_dense_Parallel, W_dense_Scipy,
            atol=1e-4), "W_dense_Parallel Jaccard not matching Scipy control"
Ejemplo n.º 10
0
class UserKNNCFRecommender(Recommender, Similarity_Matrix_Recommender):
    """ UserKNN recommender"""
    def __init__(self, URM_train, sparse_weights=True):
        super(UserKNNCFRecommender, self).__init__()

        # Not sure if CSR here is faster
        self.URM_train = check_matrix(URM_train, 'csr')

        self.dataset = None

        self.sparse_weights = sparse_weights

    def fit(self, k=50, shrink=100, similarity='cosine', normalize=True):

        self.k = k
        self.shrink = shrink

        self.similarity = Cosine_Similarity(self.URM_train.T,
                                            shrink=shrink,
                                            topK=k,
                                            normalize=normalize,
                                            mode=similarity)

        if self.sparse_weights:
            self.W_sparse = self.similarity.compute_similarity()
        else:
            self.W = self.similarity.compute_similarity()
            self.W = self.W.toarray()

    def recommend(self,
                  user_id,
                  n=None,
                  exclude_seen=True,
                  filterTopPop=False,
                  filterCustomItems=False):

        if n == None:
            n = self.URM_train.shape[1] - 1

        # compute the scores using the dot product
        if self.sparse_weights:

            scores = self.W_sparse[user_id].dot(
                self.URM_train).toarray().ravel()

        else:
            # Numpy dot does not recognize sparse matrices, so we must
            # invoke the dot function on the sparse one
            scores = self.URM_train.T.dot(self.W[user_id])

        if self.normalize:
            # normalization will keep the scores in the same range
            # of value of the ratings in dataset
            user_profile = self.URM_train[user_id]

            rated = user_profile.copy()
            rated.data = np.ones_like(rated.data)
            if self.sparse_weights:
                den = rated.dot(self.W_sparse).toarray().ravel()
            else:
                den = rated.dot(self.W).ravel()
            den[np.abs(den) < 1e-6] = 1.0  # to avoid NaNs
            scores /= den

        if exclude_seen:
            scores = self._filter_seen_on_scores(user_id, scores)

        if filterTopPop:
            scores = self._filter_TopPop_on_scores(scores)

        if filterCustomItems:
            scores = self._filterCustomItems_on_scores(scores)

        # rank items and mirror column to obtain a ranking in descending score
        #ranking = scores.argsort()
        #ranking = np.flip(ranking, axis=0)

        # Sorting is done in three steps. Faster then plain np.argsort for higher number of items
        # - Partition the data to extract the set of relevant items
        # - Sort only the relevant items
        # - Get the original item index
        relevant_items_partition = (-scores).argpartition(n)[0:n]
        relevant_items_partition_sorting = np.argsort(
            -scores[relevant_items_partition])
        ranking = relevant_items_partition[relevant_items_partition_sorting]

        return ranking

    def recommendBatch(self,
                       users_in_batch,
                       n=None,
                       exclude_seen=True,
                       filterTopPop=False,
                       filterCustomItems=False):

        # compute the scores using the dot product

        if self.sparse_weights:

            scores_array = self.W_sparse[users_in_batch].dot(self.URM_train)
            scores_array = scores_array.toarray()

        else:
            # Numpy dot does not recognize sparse matrices, so we must
            # invoke the dot function on the sparse one
            scores_array = self.URM_train.T.dot(self.W[users_in_batch].T)

        if self.normalize:
            raise ValueError("Not implemented")

        # To exclude seen items perform a boolean indexing and replace their score with -inf
        # Seen items will be at the bottom of the list but there is no guarantee they'll NOT be
        # recommended
        if exclude_seen:
            user_profile_batch = self.URM_train[users_in_batch]
            scores_array[user_profile_batch.nonzero()] = -np.inf

        if filterTopPop:
            scores_array[:, self.filterTopPop_ItemsID] = -np.inf

        if filterCustomItems:
            scores_array[:, self.filterCustomItems_ItemsID] = -np.inf

        # rank items and mirror column to obtain a ranking in descending score
        #ranking = (-scores_array).argsort(axis=1)
        #ranking = np.fliplr(ranking)
        #ranking = ranking[:,0:n]

        ranking = np.zeros((scores_array.shape[0], n), dtype=np.int)

        for row_index in range(scores_array.shape[0]):
            scores = scores_array[row_index]

            relevant_items_partition = (-scores).argpartition(n)[0:n]
            relevant_items_partition_sorting = np.argsort(
                -scores[relevant_items_partition])
            ranking[row_index] = relevant_items_partition[
                relevant_items_partition_sorting]

        return ranking
Ejemplo n.º 11
0
class NapoEnsemble:
    def __init__(self, urm_train, urm_test, icm, parameters=None):

        if parameters is None:
            parameters = {
                "USER_CF": 0.8,
                "SVD": 0.7,
                "ITEM_CF": 1,
                "ITEM_BPR": 0.8,
                "CBF": 0.3,
                "IALS": 1.0,
                "CBF_BPR": 1
            }

        self.ensemble_weights = parameters
        self.train = urm_train.tocsr()
        self.test = urm_test.tocsr()
        self.icm = icm.tocsr()

        self.initialize_components()

    def initialize_components(self):

        self.item_cosineCF_recommender = Cosine_Similarity(self.train,
                                                           topK=200,
                                                           shrink=15,
                                                           normalize=True,
                                                           mode='cosine')
        self.user_cosineCF_recommender = Cosine_Similarity(self.train.T,
                                                           topK=200,
                                                           shrink=15,
                                                           normalize=True,
                                                           mode='cosine')
        self.item_bpr_recommender = SLIM_BPR_Cython(self.train,
                                                    positive_threshold=0)
        self.svd_recommender = PureSVDRecommender(self.train)
        self.cbf_bpr_recommender = SLIM_BPR_Cython(self.icm.T,
                                                   positive_threshold=0)
        self.cbf_recommender = Cosine_Similarity(self.icm.T,
                                                 topK=50,
                                                 shrink=10,
                                                 normalize=True,
                                                 mode='cosine')
        if self.ensemble_weights["IALS"] == 0:
            self.ials_recommender = IALS_numpy(iters=0)
        else:
            self.ials_recommender = IALS_numpy()

    def fit(self):

        self.item_bpr_w = self.item_bpr_recommender.fit(epochs=10,
                                                        topK=200,
                                                        batch_size=200,
                                                        sgd_mode='adagrad',
                                                        learning_rate=1e-2)
        self.svd_latent_x, self.svd_latent_y = self.svd_recommender.fit(
            num_factors=500)
        self.cbf_bpr_w = self.cbf_bpr_recommender.fit(epochs=10,
                                                      topK=200,
                                                      batch_size=200,
                                                      sgd_mode='adagrad',
                                                      learning_rate=1e-2)
        self.item_cosineCF_w = self.item_cosineCF_recommender.compute_similarity(
        )
        self.user_cosineCF_w = self.user_cosineCF_recommender.compute_similarity(
        )
        self.cbf_w = self.cbf_recommender.compute_similarity()
        self.ials_latent_x, self.ials_latent_y = self.ials_recommender.fit(
            R=self.train)
        self.min_ials = np.dot(self.ials_latent_x, self.ials_latent_y.T).min()
        self.min_svd = np.dot(self.svd_latent_x, self.svd_latent_y).min()

    def recommend(self, user_id, combiner, at=10):
        user_profile = self.train[user_id, :]

        item_bpr_r = user_profile.dot(self.item_bpr_w).toarray().ravel()
        svd_r = self.svd_latent_x[user_id, :].dot(self.svd_latent_y)
        item_cosineCF_r = user_profile.dot(
            self.item_cosineCF_w).toarray().ravel()
        user_cosineCF_r = self.user_cosineCF_w[user_id].dot(
            self.train).toarray().ravel()
        cbf_r = user_profile.dot(self.cbf_w).toarray().ravel()
        cbf_bpr_r = user_profile.dot(self.cbf_bpr_w).toarray().ravel()
        ials_r = np.dot(self.ials_latent_x[user_id],
                        self.ials_latent_y.T + self.min_ials).ravel()

        scores = [
            [item_bpr_r, self.ensemble_weights["ITEM_BPR"], "ITEM_BPR"],
            [svd_r, self.ensemble_weights["SVD"], "SVD"],
            [item_cosineCF_r, self.ensemble_weights["ITEM_CF"], "ITEM_CF"],
            [user_cosineCF_r, self.ensemble_weights["USER_CF"], "USER_CF"],
            [ials_r, self.ensemble_weights["IALS"], "IALS"],
            [cbf_r, self.ensemble_weights["CBF"], "CBF"],
            [cbf_bpr_r, self.ensemble_weights["CBF_BPR"], "CBF_BPR"]
        ]

        for r in scores:
            self.filter_seen(user_id, r[0])

        return combiner.combine(scores, at)

    def filter_seen(self, user_id, scores):

        start_pos = int(self.train.indptr[user_id])
        end_pos = int(self.train.indptr[user_id + 1])

        user_profile = self.train.indices[start_pos:end_pos]

        scores[user_profile] = -np.inf

        return scores

    def recommend_batch(self, user_list, combiner, at=10):
        res = np.array([])
        n = 0
        for i in user_list:
            recList = self.recommend(i, combiner, at).T
            tuple = np.concatenate(([i], recList))
            if (res.size == 0):
                res = tuple
            else:
                res = np.vstack([res, tuple])
        return res

    def get_component_data(self):
        item_cf_rating = self.ensemble_weights["ITEM_CF"] * self.train.dot(
            self.item_cosineCF_w)

        item_cf = {
            "min": item_cf_rating.min(),
            "max": item_cf_rating.max(),
            "mean": item_cf_rating.mean(),
        }
        del item_cf_rating

        user_cf_rating = self.ensemble_weights[
            "USER_CF"] * self.user_cosineCF_w.dot(self.train)

        user_cf = {
            "min": user_cf_rating.min(),
            "max": user_cf_rating.max(),
            "mean": user_cf_rating.mean(),
        }
        del user_cf_rating
        svd_ratings = self.ensemble_weights["SVD"] * (
            np.dot(self.svd_latent_x, self.svd_latent_y) + self.min_svd)

        user_bpr = {
            "min": svd_ratings.min(),
            "max": svd_ratings.max(),
            "mean": svd_ratings.mean(),
        }
        del svd_ratings
        item_bpr_rating = self.ensemble_weights["ITEM_BPR"] * self.train.dot(
            self.item_bpr_w)
        item_bpr = {
            "min": item_bpr_rating.min(),
            "max": item_bpr_rating.max(),
            "mean": item_bpr_rating.mean(),
        }
        del item_bpr_rating
        ials_rating = self.ensemble_weights["IALS"] * (
            np.dot(self.ials_latent_x, self.ials_latent_y.T) + self.min_ials)

        ials = {
            "min": ials_rating.min(),
            "max": ials_rating.max(),
            "mean": np.mean(ials_rating),
        }
        del ials_rating
        cbf_rating = self.ensemble_weights["CBF"] * self.train.dot(self.cbf_w)
        cbf = {
            "min": cbf_rating.min(),
            "max": cbf_rating.max(),
            "mean": cbf_rating.mean(),
        }
        del cbf_rating
        cbf_bpr_rating = self.ensemble_weights["CBF_BPR"] * self.train.dot(
            self.cbf_bpr_w)
        cbf_bpr = {
            "min": cbf_bpr_rating.min(),
            "max": cbf_bpr_rating.max(),
            "mean": cbf_bpr_rating.mean(),
        }
        del cbf_bpr_rating
        return {
            "ITEM_CF": item_cf,
            "SVD": user_cf,
            "ITEM_BPR": item_bpr,
            "USER_BPR": user_bpr,
            "IALS": ials,
            "CBF": cbf,
            "CBF_BPR": cbf_bpr
        }
Ejemplo n.º 12
0
class BMussoliniEnsemble:

    def __init__(self, urm_train, urm_test, icm, parameters=None):

        if parameters is None:
            parameters = {
                "USER_CF" : 7,
                "SVD" : 26,
                "ITEM_CF" : 0,
                "ITEM_BPR" : 16,
                "CBF" : 7,
                "IALS" : 26,
                "CBF_BPR" : 64,
                "BPR_MF": 6,
                "ITEM_RP3B": 16,
                "USER_RP3B": 0,
                "FM": 10
            }

        self.ensemble_weights = parameters
        self.train = urm_train.tocsr()
        self.test = urm_test.tocsr()
        self.icm = icm.tocsr()
        self.sequential_playlists = None
        self.sequential_playlists = load_sequential.load_train_sequential()
        self.initialize_components()


    def initialize_components(self):

        self.train = self.rescale_wrt_insertion_order(self.train)

        self.item_cosineCF_recommender = Cosine_Similarity(self.train, topK=200, shrink=15, normalize=True, mode='cosine')
        self.user_cosineCF_recommender = Cosine_Similarity(self.train.T, topK=200, shrink=15, normalize=True, mode='cosine')
        self.svd_recommender = PureSVDRecommender(self.train)
        self.cbf_bpr_recommender = SLIM_BPR_Cython(self.icm.T, positive_threshold=0)
        self.cbf_recommender = Cosine_Similarity(self.icm.T, topK=50, shrink=10, normalize=True, mode='cosine')
        self.item_rp3b_recommender = RP3betaRecommender(self.train)
        self.user_rp3b_recommender = RP3betaRecommender(self.train.T)
        self.bpr_mf = BPR_matrix_factorization(factors=800, regularization=0.01, learning_rate=0.01, iterations=300)
        self.ials_cg_mf = IALS_CG(iterations=15, calculate_training_loss=True, factors=500, use_cg=True, regularization=1e-3)
        self.lightfm = LightFM_Recommender(self.train, self.icm, no_components=200)

    def fit(self):

        self.svd_latent_x, self.svd_latent_y = self.svd_recommender.fit(num_factors=500)
        self.min_svd = np.dot(self.svd_latent_x, self.svd_latent_y).min()
        self.cbf_bpr_w = self.cbf_bpr_recommender.fit(epochs=10, topK=200, batch_size=20, sgd_mode='adagrad', learning_rate=1e-2)
        self.item_cosineCF_w = self.item_cosineCF_recommender.compute_similarity()
        self.user_cosineCF_w = self.user_cosineCF_recommender.compute_similarity()
        self.cbf_w = self.cbf_recommender.compute_similarity()
        self.item_rp3b_w = self.item_rp3b_recommender.fit()
        self.user_rp3b_w = self.user_rp3b_recommender.fit()
        self.ials_cg_mf.fit(40*self.train.T)
        self.ials_latent_x = self.ials_cg_mf.user_factors.copy()
        self.ials_latent_y = self.ials_cg_mf.item_factors.copy()
        self.min_ials = np.dot(self.ials_latent_x, self.ials_latent_y.T).min()
        self.bpr_mf.fit(self.train.T.tocoo())
        self.bpr_mf_latent_x = self.bpr_mf.user_factors.copy()
        self.bpr_mf_latent_y = self.bpr_mf.item_factors.copy()
        self.lightfm.fit(100)


    def recommend(self, user_id, combiner, at=10):
        user_profile = self.train[user_id, :]

        svd_r = self.svd_latent_x[user_id, :].dot(self.svd_latent_y)
        item_cosineCF_r = user_profile.dot(self.item_cosineCF_w).toarray().ravel()
        user_cosineCF_r = self.user_cosineCF_w[user_id].dot(self.train).toarray().ravel()
        cbf_r = user_profile.dot(self.cbf_w).toarray().ravel()
        cbf_bpr_r = user_profile.dot(self.cbf_bpr_w).toarray().ravel()
        ials_r = np.dot(self.ials_latent_x[user_id], self.ials_latent_y.T + self.min_ials).ravel()
        bpr_mf_r = np.dot(self.bpr_mf_latent_x[user_id], self.bpr_mf_latent_y.T).ravel()
        item_rp3b_r = user_profile.dot(self.item_rp3b_w).toarray().ravel()
        user_rp3b_r = self.user_rp3b_w[user_id].dot(self.train).toarray().ravel()
        lightfm_r = self.lightfm.scores(user_id)

        scores = [
            # [item_bpr_r, self.ensemble_weights["ITEM_BPR"], "ITEM_BPR" ],
            # [user_bpr_r, self.ensemble_weights["USER_BPR"], "USER_BPR" ],
            [svd_r, self.ensemble_weights["SVD"], "SVD"],
            [item_cosineCF_r, self.ensemble_weights["ITEM_CF"], "ITEM_CF" ],
            [user_cosineCF_r, self.ensemble_weights["USER_CF"], "USER_CF" ],
            [ials_r, self.ensemble_weights["IALS"], "IALS" ],
            [cbf_r, self.ensemble_weights["CBF"], "CBF" ],
            [cbf_bpr_r, self.ensemble_weights["CBF_BPR"], "CBF_BPR"],
            [bpr_mf_r, self.ensemble_weights["BPR_MF"], "BPR_MF"],
            [item_rp3b_r, self.ensemble_weights["ITEM_RP3B"], "ITEM_RP3B"],
            [user_rp3b_r, self.ensemble_weights["USER_RP3B"], "USER_RP3B"],
            [lightfm_r, self.ensemble_weights["FM"], "FM"]
            ]

        for r in scores:
            self.filter_seen(user_id, r[0])

        R = combiner.combine(scores, at)
        return R

    def rescale_wrt_insertion_order(self, R):
        R = R.copy()
        R = R.tolil()
        R = R*0.8
        for i in self.sequential_playlists:
            pl = i["id"]
            k = 1
            for j in i["songs"]:
                factor = 1/(k**POPULARITY_SCALING_EXP)
                R[pl, j] = factor*(R[pl,j] + 0.2)
                k += 1
        R = R.tocsr()
        return R
    def filter_seen(self, user_id, scores):

        start_pos = int(self.train.indptr[user_id])
        end_pos = int(self.train.indptr[user_id + 1])

        user_profile = self.train.indices[start_pos:end_pos]

        scores[user_profile] = -1000000 #-np.inf
        return scores

    def recommend_batch(self, user_list, combiner, at=10):
        res = np.array([])
        n=0
        for i in user_list:
            recList = self.recommend(i, combiner, at).T
            tuple = np.concatenate(([i], recList))
            if (res.size == 0):
                res = tuple
            else:
                res = np.vstack([res, tuple])
        return res

    def get_component_data(self):
        item_cf_rating = self.ensemble_weights["ITEM_CF"]*self.train.dot(self.item_cosineCF_w)

        item_cf = {

                "min" : item_cf_rating.min(),
                "max" : item_cf_rating.max(),
                "mean" : item_cf_rating.mean(),

            }
        del item_cf_rating

        user_cf_rating = self.ensemble_weights["USER_CF"]*self.user_cosineCF_w.dot(self.train)

        user_cf = {
                "min": user_cf_rating.min(),
                "max": user_cf_rating.max(),
                "mean": user_cf_rating.mean(),
            }
        del user_cf_rating
        ials_rating =  self.ensemble_weights["IALS"]*(np.dot(self.ials_latent_x, self.ials_latent_y.T)+self.min_ials)

        ials = {

                "min": ials_rating.min(),
                "max": ials_rating.max(),
                "mean": np.mean(ials_rating),
            }
        del ials_rating
        cbf_rating = self.ensemble_weights["CBF"]*self.train.dot(self.cbf_w)
        cbf = {

                "min": cbf_rating.min(),
                "max": cbf_rating.max(),
                "mean": cbf_rating.mean(),
            }
        del cbf_rating
        cbf_bpr_rating = self.ensemble_weights["CBF_BPR"]*self.train.dot(self.cbf_bpr_w)
        cbf_bpr = {

                "min": cbf_bpr_rating.min(),
                "max": cbf_bpr_rating.max(),
                "mean": cbf_bpr_rating.mean(),
            }
        del cbf_bpr_rating
        svd_ratings = self.ensemble_weights["SVD"] * (np.dot(self.svd_latent_x, self.svd_latent_y) + self.min_svd)

        svd = {

            "min": svd_ratings.min(),
            "max": svd_ratings.max(),
            "mean": svd_ratings.mean(),
        }
        del svd_ratings


        return {
            "ITEM_CF" : item_cf,
            "USER_CF": user_cf ,
            "SVD" : svd ,
            "IALS" : ials,
            "CBF" : cbf,
            "CBF_BPR" : cbf_bpr
        }