def test_cosine_similarity_dense_row_weighted(self): from Base.Cython.cosine_similarity import Cosine_Similarity as Cosine_Similarity_Cython from Base.cosine_similarity_parallel import Cosine_Similarity_Parallel as Cosine_Similarity_Parallel TopK = 0 data_matrix = np.array([[1, 2, 0, 1], [0, 1, 4, 1], [3, 0, 1, 0]]) data_matrix = sps.csr_matrix(data_matrix, dtype=np.float) row_weights = [2, 3, 0, 4] cosine_similarity = Cosine_Similarity_Cython(data_matrix.T, topK=TopK, normalize=False, row_weights=row_weights) W_dense_Cython = cosine_similarity.compute_similarity() cosine_similarity = Compute_Similarity_Python(data_matrix.T, topK=TopK, normalize=False, row_weights=row_weights) W_dense_Python = cosine_similarity.compute_similarity() cosine_similarity = Cosine_Similarity_Parallel(data_matrix.T, topK=TopK, normalize=False, row_weights=row_weights) W_dense_Parallel = cosine_similarity.compute_similarity() W_dense_mul = data_matrix.dot(sps.diags(row_weights)).dot(data_matrix.T).toarray() W_dense_mul[np.arange(W_dense_mul.shape[0]), np.arange(W_dense_mul.shape[0])] = 0.0 assert np.allclose(W_dense_Cython, W_dense_mul, atol=1e-4), "W_dense_Cython not matching control" assert np.allclose(W_dense_Python, W_dense_mul, atol=1e-4), "W_dense_Python not matching control" assert np.allclose(W_dense_Parallel, W_dense_mul, atol=1e-4), "W_dense_Parallel not matching control"
def test_cosine_similarity_dense(self): from Base.Cython.cosine_similarity import Cosine_Similarity as Cosine_Similarity_Cython from Base.cosine_similarity_parallel import Cosine_Similarity_Parallel as Cosine_Similarity_Parallel TopK = 0 data_matrix = np.array([[1, 1, 0, 1], [0, 1, 1, 1], [1, 0, 1, 0]]) data_matrix = sps.csr_matrix(data_matrix) cosine_similarity = Cosine_Similarity_Cython(data_matrix, topK=TopK, normalize=False) W_dense_Cython = cosine_similarity.compute_similarity() cosine_similarity = Compute_Similarity_Python(data_matrix, topK=TopK, normalize=False) W_dense_Python = cosine_similarity.compute_similarity() cosine_similarity = Cosine_Similarity_Parallel(data_matrix, topK=TopK, normalize=False) W_dense_Parallel = cosine_similarity.compute_similarity() W_dense_mul = data_matrix.T.dot(data_matrix) W_dense_mul[np.arange(W_dense_mul.shape[0]), np.arange(W_dense_mul.shape[0])] = 0.0 assert np.all(W_dense_Cython == W_dense_mul), "W_dense_Cython not matching control" assert np.all(W_dense_Python == W_dense_mul), "W_dense_Python not matching control" assert np.all(W_dense_Parallel == W_dense_mul), "W_dense_Parallel not matching control"
class ItemKNNCFRecommender(Recommender, Similarity_Matrix_Recommender): """ ItemKNN recommender""" def __init__(self, URM_train, sparse_weights=True): super(ItemKNNCFRecommender, self).__init__() # CSR is faster during evaluation self.URM_train = check_matrix(URM_train, 'csr') self.dataset = None self.sparse_weights = sparse_weights def fit(self, k=50, shrink=100, similarity='cosine', normalize=True): self.k = k self.shrink = shrink self.similarity = Cosine_Similarity(self.URM_train, shrink=shrink, topK=k, normalize=normalize, mode=similarity) if self.sparse_weights: self.W_sparse = self.similarity.compute_similarity() else: self.W = self.similarity.compute_similarity() self.W = self.W.toarray()
def test_cosine_similarity_TopK_big(self): from Base.Cython.cosine_similarity import Cosine_Similarity as Cosine_Similarity_Cython from Base.cosine_similarity import Compute_Similarity as Cosine_Similarity_Python from Base.cosine_similarity_parallel import Cosine_Similarity_Parallel as Cosine_Similarity_Parallel n_items = 500 n_users = 1000 TopK = n_items data_matrix = sps.random(n_users, n_items, density=0.1) cosine_similarity = Cosine_Similarity_Cython(data_matrix, topK=TopK, normalize=False) W_dense_Cython = cosine_similarity.compute_similarity().toarray() cosine_similarity = Cosine_Similarity_Python(data_matrix, topK=TopK, normalize=False) W_dense_Python = cosine_similarity.compute_similarity().toarray() cosine_similarity = Cosine_Similarity_Parallel(data_matrix, topK=TopK, normalize=False) W_dense_Parallel = cosine_similarity.compute_similarity().toarray() W_dense_mul = data_matrix.T.dot(data_matrix) W_dense_mul[np.arange(W_dense_mul.shape[0]), np.arange(W_dense_mul.shape[0])] = 0.0 W_dense_mul = similarityMatrixTopK(W_dense_mul, k=TopK).toarray() assert np.allclose(W_dense_Cython, W_dense_mul, atol=1e-4), "W_sparse_Cython not matching control" assert np.allclose(W_dense_Python, W_dense_mul, atol=1e-4), "W_dense_Python not matching control" assert np.allclose(W_dense_Parallel, W_dense_mul, atol=1e-4), "W_dense_Parallel not matching control"
def test_cosine_similarity_TopK(self): from Base.Cython.cosine_similarity import Cosine_Similarity as Cosine_Similarity_Cython from Base.cosine_similarity import Compute_Similarity as Cosine_Similarity_Python from Base.cosine_similarity_parallel import Cosine_Similarity_Parallel as Cosine_Similarity_Parallel TopK = 4 data_matrix = np.array([[1, 1, 0, 1], [0, 1, 1, 1], [1, 0, 1, 0]]) data_matrix = sps.csr_matrix(data_matrix) cosine_similarity = Cosine_Similarity_Cython(data_matrix, topK=TopK, normalize=False) W_dense_Cython = cosine_similarity.compute_similarity().toarray() cosine_similarity = Cosine_Similarity_Python(data_matrix, topK=TopK, normalize=False) W_dense_Python = cosine_similarity.compute_similarity().toarray() cosine_similarity = Cosine_Similarity_Parallel(data_matrix, topK=TopK, normalize=False) W_dense_Parallel = cosine_similarity.compute_similarity().toarray() W_dense_mul = data_matrix.T.dot(data_matrix) W_dense_mul[np.arange(W_dense_mul.shape[0]), np.arange(W_dense_mul.shape[0])] = 0.0 W_dense_mul = similarityMatrixTopK(W_dense_mul, k=TopK).toarray() assert np.allclose(W_dense_Cython, W_dense_mul, atol=1e-4), "W_sparse_Cython not matching control" assert np.allclose(W_dense_Python, W_dense_mul, atol=1e-4), "W_dense_Python not matching control" assert np.allclose(W_dense_Parallel, W_dense_mul, atol=1e-4), "W_dense_Parallel not matching control"
def test_cosine_similarity_dense_normalize(self): from Base.Cython.cosine_similarity import Cosine_Similarity as Cosine_Similarity_Cython from Base.cosine_similarity import Compute_Similarity as Cosine_Similarity_Python from Base.cosine_similarity_parallel import Cosine_Similarity_Parallel as Cosine_Similarity_Parallel import numpy.matlib TopK = 0 shrink = 5 data_matrix = np.array([[1, 1, 0, 1], [0, 1, 1, 1], [1, 0, 1, 0]]) data_matrix = sps.csr_matrix(data_matrix) cosine_similarity = Cosine_Similarity_Cython(data_matrix, topK=TopK, normalize=True, shrink=shrink) W_dense_Cython = cosine_similarity.compute_similarity() cosine_similarity = Cosine_Similarity_Python(data_matrix, topK=TopK, normalize=True, shrink=shrink) W_dense_Python = cosine_similarity.compute_similarity() cosine_similarity = Cosine_Similarity_Parallel(data_matrix, topK=TopK, normalize=True, shrink=shrink) W_dense_Parallel = cosine_similarity.compute_similarity() W_dense_denominator = np.matlib.repmat(data_matrix.power(2).sum(axis=0), data_matrix.shape[1], 1) W_dense_denominator = np.sqrt(W_dense_denominator) W_dense_denominator = np.multiply(W_dense_denominator, W_dense_denominator.T) + shrink W_dense_mul = data_matrix.T.dot(data_matrix) W_dense_mul /= W_dense_denominator W_dense_mul[np.arange(W_dense_mul.shape[0]), np.arange(W_dense_mul.shape[0])] = 0.0 assert np.allclose(W_dense_Cython, W_dense_mul, atol=1e-4), "W_dense_Cython not matching control" assert np.allclose(W_dense_Python, W_dense_mul, atol=1e-4), "W_dense_Python not matching control" assert np.allclose(W_dense_Parallel, W_dense_mul, atol=1e-4), "W_dense_Parallel not matching control"
def test_cosine_similarity_dense_jaccard(self): from Base.Cython.cosine_similarity import Cosine_Similarity as Cosine_Similarity_Cython from Base.cosine_similarity import Compute_Similarity as Cosine_Similarity_Python from Base.cosine_similarity_parallel import Cosine_Similarity_Parallel as Cosine_Similarity_Parallel import numpy.matlib TopK = 0 shrink = 0 data_matrix = np.array([[1, 2, 0, 1], [0, 1, 4, 1], [1, 3, 1, 0]]) data_matrix = sps.csr_matrix(data_matrix) cosine_similarity = Cosine_Similarity_Cython(data_matrix, topK=TopK, normalize=True, shrink=shrink, mode='jaccard') W_dense_Cython = cosine_similarity.compute_similarity() cosine_similarity = Cosine_Similarity_Python(data_matrix, topK=TopK, normalize=True, shrink=shrink, mode='jaccard') W_dense_Python = cosine_similarity.compute_similarity() cosine_similarity = Cosine_Similarity_Parallel(data_matrix, topK=TopK, normalize=True, shrink=shrink, mode='jaccard') W_dense_Parallel = cosine_similarity.compute_similarity() data_matrix.data = np.ones_like(data_matrix.data) data_matrix = data_matrix.toarray().astype(np.float64) W_dense_mul = data_matrix.T.dot(data_matrix) W_dense_denominator = np.matlib.repmat((data_matrix**2).sum(axis=0), data_matrix.shape[1], 1) W_dense_denominator = W_dense_denominator + W_dense_denominator.T - W_dense_mul + shrink W_dense_mul[W_dense_denominator > 0] /= W_dense_denominator[ W_dense_denominator > 0] W_dense_mul[np.arange(W_dense_mul.shape[0]), np.arange(W_dense_mul.shape[0])] = 0.0 assert np.allclose(W_dense_Cython, W_dense_mul, atol=1e-4), "W_dense_Cython not matching control" assert np.allclose(W_dense_Python, W_dense_mul, atol=1e-4), "W_dense_Python not matching control" assert np.allclose(W_dense_Parallel, W_dense_mul, atol=1e-4), "W_dense_Parallel not matching control"
def test_cosine_similarity_dense_pearson(self): from Base.Cython.cosine_similarity import Cosine_Similarity as Cosine_Similarity_Cython from Base.cosine_similarity import Cosine_Similarity as Cosine_Similarity_Python import numpy.matlib TopK = 0 shrink = 0 data_matrix = np.array([[1, 2, 0, 1], [0, 1, 4, 1], [1, 3, 1, 0]]) data_matrix = sps.csr_matrix(data_matrix) cosine_similarity = Cosine_Similarity_Cython(data_matrix, topK=TopK, normalize=True, shrink=shrink, mode='pearson') W_dense_Cython = cosine_similarity.compute_similarity() cosine_similarity = Cosine_Similarity_Python(data_matrix, topK=TopK, normalize=True, shrink=shrink, mode='pearson') W_dense_Python = cosine_similarity.compute_similarity() data_matrix = data_matrix.toarray().astype(np.float64) for col in range(data_matrix.shape[1]): nonzeroMask = data_matrix[:, col] > 0 data_matrix[:, col][nonzeroMask] -= np.mean( data_matrix[:, col][nonzeroMask]) W_dense_denominator = np.matlib.repmat((data_matrix**2).sum(axis=0), data_matrix.shape[1], 1) W_dense_denominator = np.sqrt(W_dense_denominator) W_dense_denominator = np.multiply(W_dense_denominator, W_dense_denominator.T) + shrink W_dense_mul = data_matrix.T.dot(data_matrix) W_dense_mul[W_dense_denominator > 0] /= W_dense_denominator[ W_dense_denominator > 0] W_dense_mul[np.arange(W_dense_mul.shape[0]), np.arange(W_dense_mul.shape[0])] = 0.0 assert np.allclose(W_dense_Cython, W_dense_mul, atol=1e-4), "W_dense_Cython not matching control" assert np.allclose(W_dense_Python, W_dense_mul, atol=1e-4), "W_dense_Python not matching control"
def test_cosine_similarity_dense_external_cfr(self): from Base.Cython.cosine_similarity import Cosine_Similarity as Cosine_Similarity_Cython from Base.cosine_similarity_parallel import Cosine_Similarity_Parallel as Cosine_Similarity_Parallel from sklearn.metrics.pairwise import cosine_similarity as Cosine_Similarity_Sklearn from scipy.spatial.distance import jaccard as Jaccard_Distance_Scipy TopK = 0 shrink = 0 data_matrix = np.array([[1, 2, 0, 1], [0, 1, 4, 1], [1, 3, 1, 0]]) data_matrix = sps.csr_matrix(data_matrix) cosine_similarity = Cosine_Similarity_Cython(data_matrix, topK=TopK, normalize=True, shrink=shrink) W_dense_Cython = cosine_similarity.compute_similarity() cosine_similarity = Compute_Similarity_Python(data_matrix, topK=TopK, normalize=True, shrink=shrink) W_dense_Python = cosine_similarity.compute_similarity() cosine_similarity = Cosine_Similarity_Parallel(data_matrix, topK=TopK, normalize=True, shrink=shrink) W_dense_Parallel = cosine_similarity.compute_similarity() W_dense_sklearn = Cosine_Similarity_Sklearn(data_matrix.copy().T) W_dense_sklearn[np.arange(W_dense_sklearn.shape[0]), np.arange(W_dense_sklearn.shape[0])] = 0.0 assert np.allclose( W_dense_Cython, W_dense_sklearn, atol=1e-4), "W_dense_Cython Cosine not matching Sklearn control" assert np.allclose( W_dense_Python, W_dense_sklearn, atol=1e-4), "W_dense_Python Cosine not matching Sklearn control" assert np.allclose( W_dense_Parallel, W_dense_sklearn, atol=1e-4), "W_dense_Parallel Cosine not matching Sklearn control" data_matrix = np.array([[1, 2, 0, 1], [0, 1, 4, 1], [1, 3, 1, 0]]) data_matrix = sps.csr_matrix(data_matrix) cosine_similarity = Cosine_Similarity_Cython(data_matrix, topK=TopK, normalize=True, shrink=shrink, mode='jaccard') W_dense_Cython = cosine_similarity.compute_similarity() cosine_similarity = Compute_Similarity_Python(data_matrix, topK=TopK, normalize=True, shrink=shrink, mode='jaccard') W_dense_Python = cosine_similarity.compute_similarity() cosine_similarity = Cosine_Similarity_Parallel(data_matrix, topK=TopK, normalize=True, shrink=shrink, mode='jaccard') W_dense_Parallel = cosine_similarity.compute_similarity() W_dense_Scipy = np.zeros_like(W_dense_Python) data_matrix.data = np.ones_like(data_matrix.data) data_matrix = data_matrix.toarray() for row in range(W_dense_Scipy.shape[0]): for col in range(W_dense_Scipy.shape[1]): if row != col: W_dense_Scipy[row, col] = 1 - Jaccard_Distance_Scipy( data_matrix[:, row], data_matrix[:, col]) assert np.allclose( W_dense_Cython, W_dense_Scipy, atol=1e-4), "W_dense_Cython Jaccard not matching Scipy control" assert np.allclose( W_dense_Python, W_dense_Scipy, atol=1e-4), "W_dense_Python Jaccard not matching Scipy control" assert np.allclose( W_dense_Parallel, W_dense_Scipy, atol=1e-4), "W_dense_Parallel Jaccard not matching Scipy control"
class UserKNNCFRecommender(Recommender, Similarity_Matrix_Recommender): """ UserKNN recommender""" def __init__(self, URM_train, sparse_weights=True): super(UserKNNCFRecommender, self).__init__() # Not sure if CSR here is faster self.URM_train = check_matrix(URM_train, 'csr') self.dataset = None self.sparse_weights = sparse_weights def fit(self, k=50, shrink=100, similarity='cosine', normalize=True): self.k = k self.shrink = shrink self.similarity = Cosine_Similarity(self.URM_train.T, shrink=shrink, topK=k, normalize=normalize, mode=similarity) if self.sparse_weights: self.W_sparse = self.similarity.compute_similarity() else: self.W = self.similarity.compute_similarity() self.W = self.W.toarray() def recommend(self, user_id, n=None, exclude_seen=True, filterTopPop=False, filterCustomItems=False): if n == None: n = self.URM_train.shape[1] - 1 # compute the scores using the dot product if self.sparse_weights: scores = self.W_sparse[user_id].dot( self.URM_train).toarray().ravel() else: # Numpy dot does not recognize sparse matrices, so we must # invoke the dot function on the sparse one scores = self.URM_train.T.dot(self.W[user_id]) if self.normalize: # normalization will keep the scores in the same range # of value of the ratings in dataset user_profile = self.URM_train[user_id] rated = user_profile.copy() rated.data = np.ones_like(rated.data) if self.sparse_weights: den = rated.dot(self.W_sparse).toarray().ravel() else: den = rated.dot(self.W).ravel() den[np.abs(den) < 1e-6] = 1.0 # to avoid NaNs scores /= den if exclude_seen: scores = self._filter_seen_on_scores(user_id, scores) if filterTopPop: scores = self._filter_TopPop_on_scores(scores) if filterCustomItems: scores = self._filterCustomItems_on_scores(scores) # rank items and mirror column to obtain a ranking in descending score #ranking = scores.argsort() #ranking = np.flip(ranking, axis=0) # Sorting is done in three steps. Faster then plain np.argsort for higher number of items # - Partition the data to extract the set of relevant items # - Sort only the relevant items # - Get the original item index relevant_items_partition = (-scores).argpartition(n)[0:n] relevant_items_partition_sorting = np.argsort( -scores[relevant_items_partition]) ranking = relevant_items_partition[relevant_items_partition_sorting] return ranking def recommendBatch(self, users_in_batch, n=None, exclude_seen=True, filterTopPop=False, filterCustomItems=False): # compute the scores using the dot product if self.sparse_weights: scores_array = self.W_sparse[users_in_batch].dot(self.URM_train) scores_array = scores_array.toarray() else: # Numpy dot does not recognize sparse matrices, so we must # invoke the dot function on the sparse one scores_array = self.URM_train.T.dot(self.W[users_in_batch].T) if self.normalize: raise ValueError("Not implemented") # To exclude seen items perform a boolean indexing and replace their score with -inf # Seen items will be at the bottom of the list but there is no guarantee they'll NOT be # recommended if exclude_seen: user_profile_batch = self.URM_train[users_in_batch] scores_array[user_profile_batch.nonzero()] = -np.inf if filterTopPop: scores_array[:, self.filterTopPop_ItemsID] = -np.inf if filterCustomItems: scores_array[:, self.filterCustomItems_ItemsID] = -np.inf # rank items and mirror column to obtain a ranking in descending score #ranking = (-scores_array).argsort(axis=1) #ranking = np.fliplr(ranking) #ranking = ranking[:,0:n] ranking = np.zeros((scores_array.shape[0], n), dtype=np.int) for row_index in range(scores_array.shape[0]): scores = scores_array[row_index] relevant_items_partition = (-scores).argpartition(n)[0:n] relevant_items_partition_sorting = np.argsort( -scores[relevant_items_partition]) ranking[row_index] = relevant_items_partition[ relevant_items_partition_sorting] return ranking
class NapoEnsemble: def __init__(self, urm_train, urm_test, icm, parameters=None): if parameters is None: parameters = { "USER_CF": 0.8, "SVD": 0.7, "ITEM_CF": 1, "ITEM_BPR": 0.8, "CBF": 0.3, "IALS": 1.0, "CBF_BPR": 1 } self.ensemble_weights = parameters self.train = urm_train.tocsr() self.test = urm_test.tocsr() self.icm = icm.tocsr() self.initialize_components() def initialize_components(self): self.item_cosineCF_recommender = Cosine_Similarity(self.train, topK=200, shrink=15, normalize=True, mode='cosine') self.user_cosineCF_recommender = Cosine_Similarity(self.train.T, topK=200, shrink=15, normalize=True, mode='cosine') self.item_bpr_recommender = SLIM_BPR_Cython(self.train, positive_threshold=0) self.svd_recommender = PureSVDRecommender(self.train) self.cbf_bpr_recommender = SLIM_BPR_Cython(self.icm.T, positive_threshold=0) self.cbf_recommender = Cosine_Similarity(self.icm.T, topK=50, shrink=10, normalize=True, mode='cosine') if self.ensemble_weights["IALS"] == 0: self.ials_recommender = IALS_numpy(iters=0) else: self.ials_recommender = IALS_numpy() def fit(self): self.item_bpr_w = self.item_bpr_recommender.fit(epochs=10, topK=200, batch_size=200, sgd_mode='adagrad', learning_rate=1e-2) self.svd_latent_x, self.svd_latent_y = self.svd_recommender.fit( num_factors=500) self.cbf_bpr_w = self.cbf_bpr_recommender.fit(epochs=10, topK=200, batch_size=200, sgd_mode='adagrad', learning_rate=1e-2) self.item_cosineCF_w = self.item_cosineCF_recommender.compute_similarity( ) self.user_cosineCF_w = self.user_cosineCF_recommender.compute_similarity( ) self.cbf_w = self.cbf_recommender.compute_similarity() self.ials_latent_x, self.ials_latent_y = self.ials_recommender.fit( R=self.train) self.min_ials = np.dot(self.ials_latent_x, self.ials_latent_y.T).min() self.min_svd = np.dot(self.svd_latent_x, self.svd_latent_y).min() def recommend(self, user_id, combiner, at=10): user_profile = self.train[user_id, :] item_bpr_r = user_profile.dot(self.item_bpr_w).toarray().ravel() svd_r = self.svd_latent_x[user_id, :].dot(self.svd_latent_y) item_cosineCF_r = user_profile.dot( self.item_cosineCF_w).toarray().ravel() user_cosineCF_r = self.user_cosineCF_w[user_id].dot( self.train).toarray().ravel() cbf_r = user_profile.dot(self.cbf_w).toarray().ravel() cbf_bpr_r = user_profile.dot(self.cbf_bpr_w).toarray().ravel() ials_r = np.dot(self.ials_latent_x[user_id], self.ials_latent_y.T + self.min_ials).ravel() scores = [ [item_bpr_r, self.ensemble_weights["ITEM_BPR"], "ITEM_BPR"], [svd_r, self.ensemble_weights["SVD"], "SVD"], [item_cosineCF_r, self.ensemble_weights["ITEM_CF"], "ITEM_CF"], [user_cosineCF_r, self.ensemble_weights["USER_CF"], "USER_CF"], [ials_r, self.ensemble_weights["IALS"], "IALS"], [cbf_r, self.ensemble_weights["CBF"], "CBF"], [cbf_bpr_r, self.ensemble_weights["CBF_BPR"], "CBF_BPR"] ] for r in scores: self.filter_seen(user_id, r[0]) return combiner.combine(scores, at) def filter_seen(self, user_id, scores): start_pos = int(self.train.indptr[user_id]) end_pos = int(self.train.indptr[user_id + 1]) user_profile = self.train.indices[start_pos:end_pos] scores[user_profile] = -np.inf return scores def recommend_batch(self, user_list, combiner, at=10): res = np.array([]) n = 0 for i in user_list: recList = self.recommend(i, combiner, at).T tuple = np.concatenate(([i], recList)) if (res.size == 0): res = tuple else: res = np.vstack([res, tuple]) return res def get_component_data(self): item_cf_rating = self.ensemble_weights["ITEM_CF"] * self.train.dot( self.item_cosineCF_w) item_cf = { "min": item_cf_rating.min(), "max": item_cf_rating.max(), "mean": item_cf_rating.mean(), } del item_cf_rating user_cf_rating = self.ensemble_weights[ "USER_CF"] * self.user_cosineCF_w.dot(self.train) user_cf = { "min": user_cf_rating.min(), "max": user_cf_rating.max(), "mean": user_cf_rating.mean(), } del user_cf_rating svd_ratings = self.ensemble_weights["SVD"] * ( np.dot(self.svd_latent_x, self.svd_latent_y) + self.min_svd) user_bpr = { "min": svd_ratings.min(), "max": svd_ratings.max(), "mean": svd_ratings.mean(), } del svd_ratings item_bpr_rating = self.ensemble_weights["ITEM_BPR"] * self.train.dot( self.item_bpr_w) item_bpr = { "min": item_bpr_rating.min(), "max": item_bpr_rating.max(), "mean": item_bpr_rating.mean(), } del item_bpr_rating ials_rating = self.ensemble_weights["IALS"] * ( np.dot(self.ials_latent_x, self.ials_latent_y.T) + self.min_ials) ials = { "min": ials_rating.min(), "max": ials_rating.max(), "mean": np.mean(ials_rating), } del ials_rating cbf_rating = self.ensemble_weights["CBF"] * self.train.dot(self.cbf_w) cbf = { "min": cbf_rating.min(), "max": cbf_rating.max(), "mean": cbf_rating.mean(), } del cbf_rating cbf_bpr_rating = self.ensemble_weights["CBF_BPR"] * self.train.dot( self.cbf_bpr_w) cbf_bpr = { "min": cbf_bpr_rating.min(), "max": cbf_bpr_rating.max(), "mean": cbf_bpr_rating.mean(), } del cbf_bpr_rating return { "ITEM_CF": item_cf, "SVD": user_cf, "ITEM_BPR": item_bpr, "USER_BPR": user_bpr, "IALS": ials, "CBF": cbf, "CBF_BPR": cbf_bpr }
class BMussoliniEnsemble: def __init__(self, urm_train, urm_test, icm, parameters=None): if parameters is None: parameters = { "USER_CF" : 7, "SVD" : 26, "ITEM_CF" : 0, "ITEM_BPR" : 16, "CBF" : 7, "IALS" : 26, "CBF_BPR" : 64, "BPR_MF": 6, "ITEM_RP3B": 16, "USER_RP3B": 0, "FM": 10 } self.ensemble_weights = parameters self.train = urm_train.tocsr() self.test = urm_test.tocsr() self.icm = icm.tocsr() self.sequential_playlists = None self.sequential_playlists = load_sequential.load_train_sequential() self.initialize_components() def initialize_components(self): self.train = self.rescale_wrt_insertion_order(self.train) self.item_cosineCF_recommender = Cosine_Similarity(self.train, topK=200, shrink=15, normalize=True, mode='cosine') self.user_cosineCF_recommender = Cosine_Similarity(self.train.T, topK=200, shrink=15, normalize=True, mode='cosine') self.svd_recommender = PureSVDRecommender(self.train) self.cbf_bpr_recommender = SLIM_BPR_Cython(self.icm.T, positive_threshold=0) self.cbf_recommender = Cosine_Similarity(self.icm.T, topK=50, shrink=10, normalize=True, mode='cosine') self.item_rp3b_recommender = RP3betaRecommender(self.train) self.user_rp3b_recommender = RP3betaRecommender(self.train.T) self.bpr_mf = BPR_matrix_factorization(factors=800, regularization=0.01, learning_rate=0.01, iterations=300) self.ials_cg_mf = IALS_CG(iterations=15, calculate_training_loss=True, factors=500, use_cg=True, regularization=1e-3) self.lightfm = LightFM_Recommender(self.train, self.icm, no_components=200) def fit(self): self.svd_latent_x, self.svd_latent_y = self.svd_recommender.fit(num_factors=500) self.min_svd = np.dot(self.svd_latent_x, self.svd_latent_y).min() self.cbf_bpr_w = self.cbf_bpr_recommender.fit(epochs=10, topK=200, batch_size=20, sgd_mode='adagrad', learning_rate=1e-2) self.item_cosineCF_w = self.item_cosineCF_recommender.compute_similarity() self.user_cosineCF_w = self.user_cosineCF_recommender.compute_similarity() self.cbf_w = self.cbf_recommender.compute_similarity() self.item_rp3b_w = self.item_rp3b_recommender.fit() self.user_rp3b_w = self.user_rp3b_recommender.fit() self.ials_cg_mf.fit(40*self.train.T) self.ials_latent_x = self.ials_cg_mf.user_factors.copy() self.ials_latent_y = self.ials_cg_mf.item_factors.copy() self.min_ials = np.dot(self.ials_latent_x, self.ials_latent_y.T).min() self.bpr_mf.fit(self.train.T.tocoo()) self.bpr_mf_latent_x = self.bpr_mf.user_factors.copy() self.bpr_mf_latent_y = self.bpr_mf.item_factors.copy() self.lightfm.fit(100) def recommend(self, user_id, combiner, at=10): user_profile = self.train[user_id, :] svd_r = self.svd_latent_x[user_id, :].dot(self.svd_latent_y) item_cosineCF_r = user_profile.dot(self.item_cosineCF_w).toarray().ravel() user_cosineCF_r = self.user_cosineCF_w[user_id].dot(self.train).toarray().ravel() cbf_r = user_profile.dot(self.cbf_w).toarray().ravel() cbf_bpr_r = user_profile.dot(self.cbf_bpr_w).toarray().ravel() ials_r = np.dot(self.ials_latent_x[user_id], self.ials_latent_y.T + self.min_ials).ravel() bpr_mf_r = np.dot(self.bpr_mf_latent_x[user_id], self.bpr_mf_latent_y.T).ravel() item_rp3b_r = user_profile.dot(self.item_rp3b_w).toarray().ravel() user_rp3b_r = self.user_rp3b_w[user_id].dot(self.train).toarray().ravel() lightfm_r = self.lightfm.scores(user_id) scores = [ # [item_bpr_r, self.ensemble_weights["ITEM_BPR"], "ITEM_BPR" ], # [user_bpr_r, self.ensemble_weights["USER_BPR"], "USER_BPR" ], [svd_r, self.ensemble_weights["SVD"], "SVD"], [item_cosineCF_r, self.ensemble_weights["ITEM_CF"], "ITEM_CF" ], [user_cosineCF_r, self.ensemble_weights["USER_CF"], "USER_CF" ], [ials_r, self.ensemble_weights["IALS"], "IALS" ], [cbf_r, self.ensemble_weights["CBF"], "CBF" ], [cbf_bpr_r, self.ensemble_weights["CBF_BPR"], "CBF_BPR"], [bpr_mf_r, self.ensemble_weights["BPR_MF"], "BPR_MF"], [item_rp3b_r, self.ensemble_weights["ITEM_RP3B"], "ITEM_RP3B"], [user_rp3b_r, self.ensemble_weights["USER_RP3B"], "USER_RP3B"], [lightfm_r, self.ensemble_weights["FM"], "FM"] ] for r in scores: self.filter_seen(user_id, r[0]) R = combiner.combine(scores, at) return R def rescale_wrt_insertion_order(self, R): R = R.copy() R = R.tolil() R = R*0.8 for i in self.sequential_playlists: pl = i["id"] k = 1 for j in i["songs"]: factor = 1/(k**POPULARITY_SCALING_EXP) R[pl, j] = factor*(R[pl,j] + 0.2) k += 1 R = R.tocsr() return R def filter_seen(self, user_id, scores): start_pos = int(self.train.indptr[user_id]) end_pos = int(self.train.indptr[user_id + 1]) user_profile = self.train.indices[start_pos:end_pos] scores[user_profile] = -1000000 #-np.inf return scores def recommend_batch(self, user_list, combiner, at=10): res = np.array([]) n=0 for i in user_list: recList = self.recommend(i, combiner, at).T tuple = np.concatenate(([i], recList)) if (res.size == 0): res = tuple else: res = np.vstack([res, tuple]) return res def get_component_data(self): item_cf_rating = self.ensemble_weights["ITEM_CF"]*self.train.dot(self.item_cosineCF_w) item_cf = { "min" : item_cf_rating.min(), "max" : item_cf_rating.max(), "mean" : item_cf_rating.mean(), } del item_cf_rating user_cf_rating = self.ensemble_weights["USER_CF"]*self.user_cosineCF_w.dot(self.train) user_cf = { "min": user_cf_rating.min(), "max": user_cf_rating.max(), "mean": user_cf_rating.mean(), } del user_cf_rating ials_rating = self.ensemble_weights["IALS"]*(np.dot(self.ials_latent_x, self.ials_latent_y.T)+self.min_ials) ials = { "min": ials_rating.min(), "max": ials_rating.max(), "mean": np.mean(ials_rating), } del ials_rating cbf_rating = self.ensemble_weights["CBF"]*self.train.dot(self.cbf_w) cbf = { "min": cbf_rating.min(), "max": cbf_rating.max(), "mean": cbf_rating.mean(), } del cbf_rating cbf_bpr_rating = self.ensemble_weights["CBF_BPR"]*self.train.dot(self.cbf_bpr_w) cbf_bpr = { "min": cbf_bpr_rating.min(), "max": cbf_bpr_rating.max(), "mean": cbf_bpr_rating.mean(), } del cbf_bpr_rating svd_ratings = self.ensemble_weights["SVD"] * (np.dot(self.svd_latent_x, self.svd_latent_y) + self.min_svd) svd = { "min": svd_ratings.min(), "max": svd_ratings.max(), "mean": svd_ratings.mean(), } del svd_ratings return { "ITEM_CF" : item_cf, "USER_CF": user_cf , "SVD" : svd , "IALS" : ials, "CBF" : cbf, "CBF_BPR" : cbf_bpr }