def test_cosine_similarity_TopK_big(self): from Base.Cython.cosine_similarity import Cosine_Similarity as Cosine_Similarity_Cython from Base.cosine_similarity import Cosine_Similarity as Cosine_Similarity_Python n_items = 500 n_users = 1000 TopK = n_items data_matrix = sps.random(n_users, n_items, density=0.1) cosine_similarity = Cosine_Similarity_Cython(data_matrix, topK=TopK, normalize=False) W_dense_Cython = cosine_similarity.compute_similarity().toarray() cosine_similarity = Cosine_Similarity_Python(data_matrix, topK=TopK, normalize=False) W_dense_Python = cosine_similarity.compute_similarity().toarray() W_dense_mul = data_matrix.T.dot(data_matrix) W_dense_mul[np.arange(W_dense_mul.shape[0]), np.arange(W_dense_mul.shape[0])] = 0.0 W_dense_mul = similarityMatrixTopK(W_dense_mul, k=TopK).toarray() assert np.allclose(W_dense_Cython, W_dense_mul, atol=1e-4), "W_sparse_Cython not matching control" assert np.allclose(W_dense_Python, W_dense_mul, atol=1e-4), "W_dense_Python not matching control"
class ItemKNNCFRecommender(Recommender, Similarity_Matrix_Recommender): """ ItemKNN recommender""" def __init__(self, URM_train, sparse_weights=True): super(ItemKNNCFRecommender, self).__init__() # CSR is faster during evaluation self.URM_train = check_matrix(URM_train, 'csr') self.dataset = None self.sparse_weights = sparse_weights def fit(self, k=50, shrink=100, similarity='cosine', normalize=True): self.k = k self.shrink = shrink self.similarity = Cosine_Similarity(self.URM_train, shrink=shrink, topK=k, normalize=normalize, mode=similarity) if self.sparse_weights: self.W_sparse = self.similarity.compute_similarity() else: self.W = self.similarity.compute_similarity() self.W = self.W.toarray()
def test_cosine_similarity_TopK(self): from Base.Cython.cosine_similarity import Cosine_Similarity as Cosine_Similarity_Cython from Base.cosine_similarity import Cosine_Similarity as Cosine_Similarity_Python TopK = 4 data_matrix = np.array([[1, 1, 0, 1], [0, 1, 1, 1], [1, 0, 1, 0]]) data_matrix = sps.csr_matrix(data_matrix) cosine_similarity = Cosine_Similarity_Cython(data_matrix, topK=TopK, normalize=False) W_dense_Cython = cosine_similarity.compute_similarity().toarray() cosine_similarity = Cosine_Similarity_Python(data_matrix, topK=TopK, normalize=False) W_dense_Python = cosine_similarity.compute_similarity().toarray() W_dense_mul = data_matrix.T.dot(data_matrix) W_dense_mul[np.arange(W_dense_mul.shape[0]), np.arange(W_dense_mul.shape[0])] = 0.0 W_dense_mul = similarityMatrixTopK(W_dense_mul, k=TopK).toarray() assert np.allclose(W_dense_Cython, W_dense_mul, atol=1e-4), "W_sparse_Cython not matching control" assert np.allclose(W_dense_Python, W_dense_mul, atol=1e-4), "W_dense_Python not matching control"
def test_cosine_similarity_dense(self): from Base.Cython.cosine_similarity import Cosine_Similarity as Cosine_Similarity_Cython from Base.cosine_similarity import Cosine_Similarity as Cosine_Similarity_Python TopK = 0 data_matrix = np.array([[1, 1, 0, 1], [0, 1, 1, 1], [1, 0, 1, 0]]) data_matrix = sps.csr_matrix(data_matrix) cosine_similarity = Cosine_Similarity_Cython(data_matrix, topK=TopK, normalize=False) W_dense_Cython = cosine_similarity.compute_similarity() cosine_similarity = Cosine_Similarity_Python(data_matrix, topK=TopK, normalize=False) W_dense_Python = cosine_similarity.compute_similarity() W_dense_mul = data_matrix.T.dot(data_matrix) W_dense_mul[np.arange(W_dense_mul.shape[0]), np.arange(W_dense_mul.shape[0])] = 0.0 assert np.all(W_dense_Cython == W_dense_mul), "W_dense_Cython not matching control" assert np.all(W_dense_Python == W_dense_mul), "W_dense_Python not matching control"
def fit(self, alpha): evaluator_MF = SequentialEvaluator(URM_test_list=self._URM_test, cutoff_list=[10]) #bprmf = MatrixFactorization_Cython(self._URM_train, # positive_threshold=0, # algorithm="MF_BPR", # ) # self.MF_BPRW, self.MF_BPRH = bprmf.fit(epochs=200, # num_factors=5, # batch_size=1, # sgd_mode='adagrad' # ) #print(evaluator_MF.evaluateRecommender(bprmf)) self.bpr_WII = SLIM_BPR_Cython(self._URM_train, positive_threshold=0, symmetric=True).fit(epochs=10, topK=200, batch_size=200, sgd_mode='adagrad', learning_rate=1e-2) self.bpr_WUU = SLIM_BPR_Cython(self._URM_train.T, positive_threshold=0).fit(epochs=10, topK=200, batch_size=200, sgd_mode='adagrad', learning_rate=1e-2) print(self.bpr_WII) print("\n \n max bprII: {0}".format(self.bpr_WII.max())) print(self.bpr_WII) print("\n \n max bprUU: {0}".format(self.bpr_WUU.max())) self._similarity_matrixUU = Cosine_Similarity(self._URM_train.T, topK=200, shrink=15, normalize=True, mode='cosine').compute_similarity() print("\n \n max uu: {0}".format(self._similarity_matrixUU.max())) # self._similarity_matrixII = Cosine_Similarity(self._URM_train.tocsc(), # topK=200, # shrink=10, # normalize=True, # mode='cosine').compute_similarity() # print("\n \n max II: {0}".format(self._similarity_matrixII.max())) self._similarity_matrixCBF = Cosine_Similarity(self._ICM.T, topK=10, shrink=10, normalize=True, mode='cosine').compute_similarity() # print(self._similarity_matrixII) self.latent_x, self.latent_y = (IALS_numpy()).fit(self._URM_train) print(self.latent_x.dot(self.latent_y.T)) print("\n \n max IALS: {0}".format(self.latent_x.dot(self.latent_y.T).max()))
def fit(self, k=50, shrink=100, similarity='cosine', normalize=True): self.k = k self.shrink = shrink self.similarity = Cosine_Similarity(self.ICM.T, shrink=shrink, topK=k, normalize=normalize, mode = similarity) if self.sparse_weights: self.W_sparse = self.similarity.compute_similarity() else: self.W = self.similarity.compute_similarity() self.W = self.W.toarray()
def test_cosine_similarity_dense_pearson(self): from Base.Cython.cosine_similarity import Cosine_Similarity as Cosine_Similarity_Cython from Base.cosine_similarity import Cosine_Similarity as Cosine_Similarity_Python import numpy.matlib TopK = 0 shrink = 0 data_matrix = np.array([[1, 2, 0, 1], [0, 1, 4, 1], [1, 3, 1, 0]]) data_matrix = sps.csr_matrix(data_matrix) cosine_similarity = Cosine_Similarity_Cython(data_matrix, topK=TopK, normalize=True, shrink=shrink, mode='pearson') W_dense_Cython = cosine_similarity.compute_similarity() cosine_similarity = Cosine_Similarity_Python(data_matrix, topK=TopK, normalize=True, shrink=shrink, mode='pearson') W_dense_Python = cosine_similarity.compute_similarity() data_matrix = data_matrix.toarray().astype(np.float64) for col in range(data_matrix.shape[1]): nonzeroMask = data_matrix[:, col] > 0 data_matrix[:, col][nonzeroMask] -= np.mean( data_matrix[:, col][nonzeroMask]) W_dense_denominator = np.matlib.repmat((data_matrix**2).sum(axis=0), data_matrix.shape[1], 1) W_dense_denominator = np.sqrt(W_dense_denominator) W_dense_denominator = np.multiply(W_dense_denominator, W_dense_denominator.T) + shrink W_dense_mul = data_matrix.T.dot(data_matrix) W_dense_mul[W_dense_denominator > 0] /= W_dense_denominator[ W_dense_denominator > 0] W_dense_mul[np.arange(W_dense_mul.shape[0]), np.arange(W_dense_mul.shape[0])] = 0.0 assert np.allclose(W_dense_Cython, W_dense_mul, atol=1e-4), "W_dense_Cython not matching control" assert np.allclose(W_dense_Python, W_dense_mul, atol=1e-4), "W_dense_Python not matching control"
def test_cosine_similarity_dense_jaccard(self): from Base.Cython.cosine_similarity import Cosine_Similarity as Cosine_Similarity_Cython from Base.cosine_similarity import Cosine_Similarity as Cosine_Similarity_Python import numpy.matlib TopK = 0 shrink = 0 data_matrix = np.array([[1, 2, 0, 1], [0, 1, 4, 1], [1, 3, 1, 0]]) data_matrix = sps.csr_matrix(data_matrix) cosine_similarity = Cosine_Similarity_Cython(data_matrix, topK=TopK, normalize=True, shrink=shrink, mode='jaccard') W_dense_Cython = cosine_similarity.compute_similarity() cosine_similarity = Cosine_Similarity_Python(data_matrix, topK=TopK, normalize=True, shrink=shrink, mode='jaccard') W_dense_Python = cosine_similarity.compute_similarity() data_matrix.data = np.ones_like(data_matrix.data) data_matrix = data_matrix.toarray().astype(np.float64) W_dense_mul = data_matrix.T.dot(data_matrix) W_dense_denominator = np.matlib.repmat((data_matrix**2).sum(axis=0), data_matrix.shape[1], 1) W_dense_denominator = W_dense_denominator + W_dense_denominator.T - W_dense_mul + shrink W_dense_mul[W_dense_denominator > 0] /= W_dense_denominator[ W_dense_denominator > 0] W_dense_mul[np.arange(W_dense_mul.shape[0]), np.arange(W_dense_mul.shape[0])] = 0.0 assert np.allclose(W_dense_Cython, W_dense_mul, atol=1e-4), "W_dense_Cython not matching control" assert np.allclose(W_dense_Python, W_dense_mul, atol=1e-4), "W_dense_Python not matching control"
def test_cosine_similarity_dense_normalize(self): from Base.Cython.cosine_similarity import Cosine_Similarity as Cosine_Similarity_Cython from Base.cosine_similarity import Cosine_Similarity as Cosine_Similarity_Python import numpy.matlib TopK = 0 shrink = 5 data_matrix = np.array([[1, 1, 0, 1], [0, 1, 1, 1], [1, 0, 1, 0]]) data_matrix = sps.csr_matrix(data_matrix) cosine_similarity = Cosine_Similarity_Cython(data_matrix, topK=TopK, normalize=True, shrink=shrink) W_dense_Cython = cosine_similarity.compute_similarity() cosine_similarity = Cosine_Similarity_Python(data_matrix, topK=TopK, normalize=True, shrink=shrink) W_dense_Python = cosine_similarity.compute_similarity() W_dense_denominator = np.matlib.repmat( data_matrix.power(2).sum(axis=0), data_matrix.shape[1], 1) W_dense_denominator = np.sqrt(W_dense_denominator) W_dense_denominator = np.multiply(W_dense_denominator, W_dense_denominator.T) + shrink W_dense_mul = data_matrix.T.dot(data_matrix) W_dense_mul /= W_dense_denominator W_dense_mul[np.arange(W_dense_mul.shape[0]), np.arange(W_dense_mul.shape[0])] = 0.0 assert np.allclose(W_dense_Cython, W_dense_mul, atol=1e-4), "W_dense_Cython not matching control" assert np.allclose(W_dense_Python, W_dense_mul, atol=1e-4), "W_dense_Python not matching control"
def test_cosine_similarity_dense_row_weighted(self): from Base.Cython.cosine_similarity import Cosine_Similarity as Cosine_Similarity_Cython from Base.cosine_similarity_parallel import Cosine_Similarity_Parallel as Cosine_Similarity_Parallel TopK = 0 data_matrix = np.array([[1, 2, 0, 1], [0, 1, 4, 1], [3, 0, 1, 0]]) data_matrix = sps.csr_matrix(data_matrix, dtype=np.float) row_weights = [2, 3, 0, 4] cosine_similarity = Cosine_Similarity_Cython(data_matrix.T, topK=TopK, normalize=False, row_weights=row_weights) W_dense_Cython = cosine_similarity.compute_similarity() cosine_similarity = Compute_Similarity_Python(data_matrix.T, topK=TopK, normalize=False, row_weights=row_weights) W_dense_Python = cosine_similarity.compute_similarity() cosine_similarity = Cosine_Similarity_Parallel(data_matrix.T, topK=TopK, normalize=False, row_weights=row_weights) W_dense_Parallel = cosine_similarity.compute_similarity() W_dense_mul = data_matrix.dot(sps.diags(row_weights)).dot(data_matrix.T).toarray() W_dense_mul[np.arange(W_dense_mul.shape[0]), np.arange(W_dense_mul.shape[0])] = 0.0 assert np.allclose(W_dense_Cython, W_dense_mul, atol=1e-4), "W_dense_Cython not matching control" assert np.allclose(W_dense_Python, W_dense_mul, atol=1e-4), "W_dense_Python not matching control" assert np.allclose(W_dense_Parallel, W_dense_mul, atol=1e-4), "W_dense_Parallel not matching control"
def initialize_components(self): self.item_cosineCF_recommender = Cosine_Similarity(self.train, topK=200, shrink=15, normalize=True, mode='cosine') self.user_cosineCF_recommender = Cosine_Similarity(self.train.T, topK=200, shrink=15, normalize=True, mode='cosine') self.item_bpr_recommender = SLIM_BPR_Cython(self.train, positive_threshold=0) self.svd_recommender = PureSVDRecommender(self.train) self.cbf_bpr_recommender = SLIM_BPR_Cython(self.icm.T, positive_threshold=0) self.cbf_recommender = Cosine_Similarity(self.icm.T, topK=50, shrink=10, normalize=True, mode='cosine') if self.ensemble_weights["IALS"] == 0: self.ials_recommender = IALS_numpy(iters=0) else: self.ials_recommender = IALS_numpy()
def initialize_components(self): self.train = self.rescale_wrt_insertion_order(self.train) self.item_cosineCF_recommender = Cosine_Similarity(self.train, topK=200, shrink=15, normalize=True, mode='cosine') self.user_cosineCF_recommender = Cosine_Similarity(self.train.T, topK=200, shrink=15, normalize=True, mode='cosine') self.svd_recommender = PureSVDRecommender(self.train) self.cbf_bpr_recommender = SLIM_BPR_Cython(self.icm.T, positive_threshold=0) self.cbf_recommender = Cosine_Similarity(self.icm.T, topK=50, shrink=10, normalize=True, mode='cosine') self.item_rp3b_recommender = RP3betaRecommender(self.train) self.user_rp3b_recommender = RP3betaRecommender(self.train.T) self.bpr_mf = BPR_matrix_factorization(factors=800, regularization=0.01, learning_rate=0.01, iterations=300) self.ials_cg_mf = IALS_CG(iterations=15, calculate_training_loss=True, factors=500, use_cg=True, regularization=1e-3) self.lightfm = LightFM_Recommender(self.train, self.icm, no_components=200)
def test_cosine_similarity_dense_external_cfr(self): from Base.Cython.cosine_similarity import Cosine_Similarity as Cosine_Similarity_Cython from Base.cosine_similarity_parallel import Cosine_Similarity_Parallel as Cosine_Similarity_Parallel from sklearn.metrics.pairwise import cosine_similarity as Cosine_Similarity_Sklearn from scipy.spatial.distance import jaccard as Jaccard_Distance_Scipy TopK = 0 shrink = 0 data_matrix = np.array([[1, 2, 0, 1], [0, 1, 4, 1], [1, 3, 1, 0]]) data_matrix = sps.csr_matrix(data_matrix) cosine_similarity = Cosine_Similarity_Cython(data_matrix, topK=TopK, normalize=True, shrink=shrink) W_dense_Cython = cosine_similarity.compute_similarity() cosine_similarity = Compute_Similarity_Python(data_matrix, topK=TopK, normalize=True, shrink=shrink) W_dense_Python = cosine_similarity.compute_similarity() cosine_similarity = Cosine_Similarity_Parallel(data_matrix, topK=TopK, normalize=True, shrink=shrink) W_dense_Parallel = cosine_similarity.compute_similarity() W_dense_sklearn = Cosine_Similarity_Sklearn(data_matrix.copy().T) W_dense_sklearn[np.arange(W_dense_sklearn.shape[0]), np.arange(W_dense_sklearn.shape[0])] = 0.0 assert np.allclose( W_dense_Cython, W_dense_sklearn, atol=1e-4), "W_dense_Cython Cosine not matching Sklearn control" assert np.allclose( W_dense_Python, W_dense_sklearn, atol=1e-4), "W_dense_Python Cosine not matching Sklearn control" assert np.allclose( W_dense_Parallel, W_dense_sklearn, atol=1e-4), "W_dense_Parallel Cosine not matching Sklearn control" data_matrix = np.array([[1, 2, 0, 1], [0, 1, 4, 1], [1, 3, 1, 0]]) data_matrix = sps.csr_matrix(data_matrix) cosine_similarity = Cosine_Similarity_Cython(data_matrix, topK=TopK, normalize=True, shrink=shrink, mode='jaccard') W_dense_Cython = cosine_similarity.compute_similarity() cosine_similarity = Compute_Similarity_Python(data_matrix, topK=TopK, normalize=True, shrink=shrink, mode='jaccard') W_dense_Python = cosine_similarity.compute_similarity() cosine_similarity = Cosine_Similarity_Parallel(data_matrix, topK=TopK, normalize=True, shrink=shrink, mode='jaccard') W_dense_Parallel = cosine_similarity.compute_similarity() W_dense_Scipy = np.zeros_like(W_dense_Python) data_matrix.data = np.ones_like(data_matrix.data) data_matrix = data_matrix.toarray() for row in range(W_dense_Scipy.shape[0]): for col in range(W_dense_Scipy.shape[1]): if row != col: W_dense_Scipy[row, col] = 1 - Jaccard_Distance_Scipy( data_matrix[:, row], data_matrix[:, col]) assert np.allclose( W_dense_Cython, W_dense_Scipy, atol=1e-4), "W_dense_Cython Jaccard not matching Scipy control" assert np.allclose( W_dense_Python, W_dense_Scipy, atol=1e-4), "W_dense_Python Jaccard not matching Scipy control" assert np.allclose( W_dense_Parallel, W_dense_Scipy, atol=1e-4), "W_dense_Parallel Jaccard not matching Scipy control"
class BMussoliniEnsemble: def __init__(self, urm_train, urm_test, icm, parameters=None): if parameters is None: parameters = { "USER_CF" : 7, "SVD" : 26, "ITEM_CF" : 0, "ITEM_BPR" : 16, "CBF" : 7, "IALS" : 26, "CBF_BPR" : 64, "BPR_MF": 6, "ITEM_RP3B": 16, "USER_RP3B": 0, "FM": 10 } self.ensemble_weights = parameters self.train = urm_train.tocsr() self.test = urm_test.tocsr() self.icm = icm.tocsr() self.sequential_playlists = None self.sequential_playlists = load_sequential.load_train_sequential() self.initialize_components() def initialize_components(self): self.train = self.rescale_wrt_insertion_order(self.train) self.item_cosineCF_recommender = Cosine_Similarity(self.train, topK=200, shrink=15, normalize=True, mode='cosine') self.user_cosineCF_recommender = Cosine_Similarity(self.train.T, topK=200, shrink=15, normalize=True, mode='cosine') self.svd_recommender = PureSVDRecommender(self.train) self.cbf_bpr_recommender = SLIM_BPR_Cython(self.icm.T, positive_threshold=0) self.cbf_recommender = Cosine_Similarity(self.icm.T, topK=50, shrink=10, normalize=True, mode='cosine') self.item_rp3b_recommender = RP3betaRecommender(self.train) self.user_rp3b_recommender = RP3betaRecommender(self.train.T) self.bpr_mf = BPR_matrix_factorization(factors=800, regularization=0.01, learning_rate=0.01, iterations=300) self.ials_cg_mf = IALS_CG(iterations=15, calculate_training_loss=True, factors=500, use_cg=True, regularization=1e-3) self.lightfm = LightFM_Recommender(self.train, self.icm, no_components=200) def fit(self): self.svd_latent_x, self.svd_latent_y = self.svd_recommender.fit(num_factors=500) self.min_svd = np.dot(self.svd_latent_x, self.svd_latent_y).min() self.cbf_bpr_w = self.cbf_bpr_recommender.fit(epochs=10, topK=200, batch_size=20, sgd_mode='adagrad', learning_rate=1e-2) self.item_cosineCF_w = self.item_cosineCF_recommender.compute_similarity() self.user_cosineCF_w = self.user_cosineCF_recommender.compute_similarity() self.cbf_w = self.cbf_recommender.compute_similarity() self.item_rp3b_w = self.item_rp3b_recommender.fit() self.user_rp3b_w = self.user_rp3b_recommender.fit() self.ials_cg_mf.fit(40*self.train.T) self.ials_latent_x = self.ials_cg_mf.user_factors.copy() self.ials_latent_y = self.ials_cg_mf.item_factors.copy() self.min_ials = np.dot(self.ials_latent_x, self.ials_latent_y.T).min() self.bpr_mf.fit(self.train.T.tocoo()) self.bpr_mf_latent_x = self.bpr_mf.user_factors.copy() self.bpr_mf_latent_y = self.bpr_mf.item_factors.copy() self.lightfm.fit(100) def recommend(self, user_id, combiner, at=10): user_profile = self.train[user_id, :] svd_r = self.svd_latent_x[user_id, :].dot(self.svd_latent_y) item_cosineCF_r = user_profile.dot(self.item_cosineCF_w).toarray().ravel() user_cosineCF_r = self.user_cosineCF_w[user_id].dot(self.train).toarray().ravel() cbf_r = user_profile.dot(self.cbf_w).toarray().ravel() cbf_bpr_r = user_profile.dot(self.cbf_bpr_w).toarray().ravel() ials_r = np.dot(self.ials_latent_x[user_id], self.ials_latent_y.T + self.min_ials).ravel() bpr_mf_r = np.dot(self.bpr_mf_latent_x[user_id], self.bpr_mf_latent_y.T).ravel() item_rp3b_r = user_profile.dot(self.item_rp3b_w).toarray().ravel() user_rp3b_r = self.user_rp3b_w[user_id].dot(self.train).toarray().ravel() lightfm_r = self.lightfm.scores(user_id) scores = [ # [item_bpr_r, self.ensemble_weights["ITEM_BPR"], "ITEM_BPR" ], # [user_bpr_r, self.ensemble_weights["USER_BPR"], "USER_BPR" ], [svd_r, self.ensemble_weights["SVD"], "SVD"], [item_cosineCF_r, self.ensemble_weights["ITEM_CF"], "ITEM_CF" ], [user_cosineCF_r, self.ensemble_weights["USER_CF"], "USER_CF" ], [ials_r, self.ensemble_weights["IALS"], "IALS" ], [cbf_r, self.ensemble_weights["CBF"], "CBF" ], [cbf_bpr_r, self.ensemble_weights["CBF_BPR"], "CBF_BPR"], [bpr_mf_r, self.ensemble_weights["BPR_MF"], "BPR_MF"], [item_rp3b_r, self.ensemble_weights["ITEM_RP3B"], "ITEM_RP3B"], [user_rp3b_r, self.ensemble_weights["USER_RP3B"], "USER_RP3B"], [lightfm_r, self.ensemble_weights["FM"], "FM"] ] for r in scores: self.filter_seen(user_id, r[0]) R = combiner.combine(scores, at) return R def rescale_wrt_insertion_order(self, R): R = R.copy() R = R.tolil() R = R*0.8 for i in self.sequential_playlists: pl = i["id"] k = 1 for j in i["songs"]: factor = 1/(k**POPULARITY_SCALING_EXP) R[pl, j] = factor*(R[pl,j] + 0.2) k += 1 R = R.tocsr() return R def filter_seen(self, user_id, scores): start_pos = int(self.train.indptr[user_id]) end_pos = int(self.train.indptr[user_id + 1]) user_profile = self.train.indices[start_pos:end_pos] scores[user_profile] = -1000000 #-np.inf return scores def recommend_batch(self, user_list, combiner, at=10): res = np.array([]) n=0 for i in user_list: recList = self.recommend(i, combiner, at).T tuple = np.concatenate(([i], recList)) if (res.size == 0): res = tuple else: res = np.vstack([res, tuple]) return res def get_component_data(self): item_cf_rating = self.ensemble_weights["ITEM_CF"]*self.train.dot(self.item_cosineCF_w) item_cf = { "min" : item_cf_rating.min(), "max" : item_cf_rating.max(), "mean" : item_cf_rating.mean(), } del item_cf_rating user_cf_rating = self.ensemble_weights["USER_CF"]*self.user_cosineCF_w.dot(self.train) user_cf = { "min": user_cf_rating.min(), "max": user_cf_rating.max(), "mean": user_cf_rating.mean(), } del user_cf_rating ials_rating = self.ensemble_weights["IALS"]*(np.dot(self.ials_latent_x, self.ials_latent_y.T)+self.min_ials) ials = { "min": ials_rating.min(), "max": ials_rating.max(), "mean": np.mean(ials_rating), } del ials_rating cbf_rating = self.ensemble_weights["CBF"]*self.train.dot(self.cbf_w) cbf = { "min": cbf_rating.min(), "max": cbf_rating.max(), "mean": cbf_rating.mean(), } del cbf_rating cbf_bpr_rating = self.ensemble_weights["CBF_BPR"]*self.train.dot(self.cbf_bpr_w) cbf_bpr = { "min": cbf_bpr_rating.min(), "max": cbf_bpr_rating.max(), "mean": cbf_bpr_rating.mean(), } del cbf_bpr_rating svd_ratings = self.ensemble_weights["SVD"] * (np.dot(self.svd_latent_x, self.svd_latent_y) + self.min_svd) svd = { "min": svd_ratings.min(), "max": svd_ratings.max(), "mean": svd_ratings.mean(), } del svd_ratings return { "ITEM_CF" : item_cf, "USER_CF": user_cf , "SVD" : svd , "IALS" : ials, "CBF" : cbf, "CBF_BPR" : cbf_bpr }
class UserKNNCFRecommender(Recommender, Similarity_Matrix_Recommender): """ UserKNN recommender""" def __init__(self, URM_train, sparse_weights=True): super(UserKNNCFRecommender, self).__init__() # Not sure if CSR here is faster self.URM_train = check_matrix(URM_train, 'csr') self.dataset = None self.sparse_weights = sparse_weights def fit(self, k=50, shrink=100, similarity='cosine', normalize=True): self.k = k self.shrink = shrink self.similarity = Cosine_Similarity(self.URM_train.T, shrink=shrink, topK=k, normalize=normalize, mode=similarity) if self.sparse_weights: self.W_sparse = self.similarity.compute_similarity() else: self.W = self.similarity.compute_similarity() self.W = self.W.toarray() def recommend(self, user_id, n=None, exclude_seen=True, filterTopPop=False, filterCustomItems=False): if n == None: n = self.URM_train.shape[1] - 1 # compute the scores using the dot product if self.sparse_weights: scores = self.W_sparse[user_id].dot( self.URM_train).toarray().ravel() else: # Numpy dot does not recognize sparse matrices, so we must # invoke the dot function on the sparse one scores = self.URM_train.T.dot(self.W[user_id]) if self.normalize: # normalization will keep the scores in the same range # of value of the ratings in dataset user_profile = self.URM_train[user_id] rated = user_profile.copy() rated.data = np.ones_like(rated.data) if self.sparse_weights: den = rated.dot(self.W_sparse).toarray().ravel() else: den = rated.dot(self.W).ravel() den[np.abs(den) < 1e-6] = 1.0 # to avoid NaNs scores /= den if exclude_seen: scores = self._filter_seen_on_scores(user_id, scores) if filterTopPop: scores = self._filter_TopPop_on_scores(scores) if filterCustomItems: scores = self._filterCustomItems_on_scores(scores) # rank items and mirror column to obtain a ranking in descending score #ranking = scores.argsort() #ranking = np.flip(ranking, axis=0) # Sorting is done in three steps. Faster then plain np.argsort for higher number of items # - Partition the data to extract the set of relevant items # - Sort only the relevant items # - Get the original item index relevant_items_partition = (-scores).argpartition(n)[0:n] relevant_items_partition_sorting = np.argsort( -scores[relevant_items_partition]) ranking = relevant_items_partition[relevant_items_partition_sorting] return ranking def recommendBatch(self, users_in_batch, n=None, exclude_seen=True, filterTopPop=False, filterCustomItems=False): # compute the scores using the dot product if self.sparse_weights: scores_array = self.W_sparse[users_in_batch].dot(self.URM_train) scores_array = scores_array.toarray() else: # Numpy dot does not recognize sparse matrices, so we must # invoke the dot function on the sparse one scores_array = self.URM_train.T.dot(self.W[users_in_batch].T) if self.normalize: raise ValueError("Not implemented") # To exclude seen items perform a boolean indexing and replace their score with -inf # Seen items will be at the bottom of the list but there is no guarantee they'll NOT be # recommended if exclude_seen: user_profile_batch = self.URM_train[users_in_batch] scores_array[user_profile_batch.nonzero()] = -np.inf if filterTopPop: scores_array[:, self.filterTopPop_ItemsID] = -np.inf if filterCustomItems: scores_array[:, self.filterCustomItems_ItemsID] = -np.inf # rank items and mirror column to obtain a ranking in descending score #ranking = (-scores_array).argsort(axis=1) #ranking = np.fliplr(ranking) #ranking = ranking[:,0:n] ranking = np.zeros((scores_array.shape[0], n), dtype=np.int) for row_index in range(scores_array.shape[0]): scores = scores_array[row_index] relevant_items_partition = (-scores).argpartition(n)[0:n] relevant_items_partition_sorting = np.argsort( -scores[relevant_items_partition]) ranking[row_index] = relevant_items_partition[ relevant_items_partition_sorting] return ranking
class GeneralEnsemble: def __init__(self, URM_train, URM_test, ICM, k=100, alpha= 0.7662, beta= 0.6188, gamma=0.3, epsilon= 0.6212, #0.6212, ro=0.8062, mu = 0.7118, chi = 1, shrink=15, recommendation_mode='linComb'): self._URM_train = URM_train.tocsr() self._URM_test = URM_test.tocsr() self._ICM = ICM.tocsr() self._k = k self._shrink = shrink self._recommendationMode = recommendation_mode self.UUSCORE = alpha self.IISCORE = beta self. CBFSCORE = gamma self.IALSSCORE = epsilon self.IALS_SCALING = 1 self.SLIM_BPR_SCALING = 1 self.SLIM_BPR = ro self.SLIM_BPRUU = mu self.MF_BPR = chi def fit(self, alpha): evaluator_MF = SequentialEvaluator(URM_test_list=self._URM_test, cutoff_list=[10]) #bprmf = MatrixFactorization_Cython(self._URM_train, # positive_threshold=0, # algorithm="MF_BPR", # ) # self.MF_BPRW, self.MF_BPRH = bprmf.fit(epochs=200, # num_factors=5, # batch_size=1, # sgd_mode='adagrad' # ) #print(evaluator_MF.evaluateRecommender(bprmf)) self.bpr_WII = SLIM_BPR_Cython(self._URM_train, positive_threshold=0, symmetric=True).fit(epochs=10, topK=200, batch_size=200, sgd_mode='adagrad', learning_rate=1e-2) self.bpr_WUU = SLIM_BPR_Cython(self._URM_train.T, positive_threshold=0).fit(epochs=10, topK=200, batch_size=200, sgd_mode='adagrad', learning_rate=1e-2) print(self.bpr_WII) print("\n \n max bprII: {0}".format(self.bpr_WII.max())) print(self.bpr_WII) print("\n \n max bprUU: {0}".format(self.bpr_WUU.max())) self._similarity_matrixUU = Cosine_Similarity(self._URM_train.T, topK=200, shrink=15, normalize=True, mode='cosine').compute_similarity() print("\n \n max uu: {0}".format(self._similarity_matrixUU.max())) # self._similarity_matrixII = Cosine_Similarity(self._URM_train.tocsc(), # topK=200, # shrink=10, # normalize=True, # mode='cosine').compute_similarity() # print("\n \n max II: {0}".format(self._similarity_matrixII.max())) self._similarity_matrixCBF = Cosine_Similarity(self._ICM.T, topK=10, shrink=10, normalize=True, mode='cosine').compute_similarity() # print(self._similarity_matrixII) self.latent_x, self.latent_y = (IALS_numpy()).fit(self._URM_train) print(self.latent_x.dot(self.latent_y.T)) print("\n \n max IALS: {0}".format(self.latent_x.dot(self.latent_y.T).max())) def _scoreOthers(self, user_id, exclude_seen=True): # compute the scores using the dot product user_profile = self._URM_train[user_id] # normalized_IALS = np.dot(self.latent_x[user_id], self.latent_y.T) normalized_IALS = self.IALS_SCALING*self.IALSSCORE*np.dot(self.latent_x[user_id], self.latent_y.T) cfii = self.IISCORE*user_profile.dot(self._similarity_matrixII).toarray() cfuu = self.UUSCORE*self._similarity_matrixUU[user_id].dot(self._URM_train).toarray() cbf = self.CBFSCORE*self.CBFSCORE*user_profile.dot(self._similarity_matrixCBF).toarray() scores = ( cbf + normalized_IALS).ravel() if exclude_seen: scores = self.filter_seen(user_id, scores) return scores def _scoreBPR(self, user_id, exclude_seen=True): user_profile = self._URM_train[user_id] bprii = self.SLIM_BPR*user_profile.dot(self.bpr_WII.T).toarray().ravel() bpruu = self.SLIM_BPRUU*self.bpr_WUU[user_id].dot(self._URM_train).toarray().ravel() # mfbpr = self.MF_BPR*self.MF_BPRW[user_id].dot(self.MF_BPRH.T) ensemble = bprii + bpruu if exclude_seen: ensemble = self.filter_seen(user_id, bprii+bpruu) return ensemble def _recommendOthers(self, user_id, at=30, exclude_seen=True): # compute the scores using the dot product user_profile = self._URM_train[user_id] # normalized_IALS = np.dot(self.latent_x[user_id], self.latent_y.T) normalized_IALS = np.dot(self.latent_x[user_id], self.latent_y.T) # cfii = self.IISCORE*user_profile.dot(self._similarity_matrixII).toarray() # cfuu = self.UUSCORE*self._similarity_matrixUU[user_id].dot(self._URM_train).toarray() cbf = self.CBFSCORE*self.CBFSCORE*user_profile.dot(self._similarity_matrixCBF).toarray() scores = ( cbf + normalized_IALS) scores = preprocessing.normalize(scores, norm='max').ravel() if exclude_seen: scores = self.filter_seen(user_id, scores) # rank items ranking = scores.argsort()[::-1] return ranking[:at].ravel() def _recommendOthersBPRII(self, user_id, at=30, exclude_seen=True): scores = self._scoreOthers(user_id, exclude_seen) + self._scoreBPR(user_id, exclude_seen) ranking = scores.argsort()[::-1] return ranking[:at].ravel() def _recommendBPRUU(self, user_id, at=30, exclude_seen=True): user_profile = self._URM_train[user_id] bpruu = self.SLIM_BPRUU*self.bpr_WUU[user_id].dot(self._URM_train).toarray().ravel() ensemble = bpruu if exclude_seen: ensemble = self.filter_seen(user_id, ensemble) # rank items ranking = ensemble.argsort()[::-1] return ranking[:at].ravel() def recommendProbabilistic(self, user_id, at=10, exclude_seen=True): others = self._recommendOthers(user_id, at=30, exclude_seen=exclude_seen) bpr = self._recommendBPRUU(user_id, at=30, exclude_seen=exclude_seen) result = [] i = 0 while i < at: rand = np.random.uniform(0, 1) if rand < 0.2: if type(others) is np.ndarray: chosen = others[0] else: chosen = others others = np.delete(others, 0) else: if type(bpr) is np.ndarray: chosen = bpr[0] else: chosen = bpr bpr = np.delete(bpr, 0) if chosen in result: continue else: result.append(chosen) i += 1 return np.array(result) def recommendLinComb(self, user_id, at=10, exclude_seen=True): scores = (self._scoreOthers(user_id) + self.SLIM_BPR_SCALING*self._scoreBPR(user_id)) # rank items ranking = scores.argsort()[::-1] return ranking[:at].ravel() def filter_seen(self, user_id, scores): start_pos = int(self._URM_train.indptr[user_id]) end_pos = int(self._URM_train.indptr[user_id + 1]) user_profile = self._URM_train.indices[start_pos:end_pos] scores[user_profile] = -np.inf return scores def recommendALL(self, userList, at=10): res = np.array([]) n=0 for i in userList: n+=1 if self._recommendationMode == 'linComb': recList = self.recommendLinComb(i, at) elif self._recommendationMode == 'probabilistic': recList = self.recommendProbabilistic(i, at) else: recList = self.recommendLinComb(i, at) tuple = np.concatenate((i, recList)) if (res.size == 0): res = tuple else: res = np.vstack([res, tuple]) del self.bpr_WII return res
class NapoEnsemble: def __init__(self, urm_train, urm_test, icm, parameters=None): if parameters is None: parameters = { "USER_CF": 0.8, "SVD": 0.7, "ITEM_CF": 1, "ITEM_BPR": 0.8, "CBF": 0.3, "IALS": 1.0, "CBF_BPR": 1 } self.ensemble_weights = parameters self.train = urm_train.tocsr() self.test = urm_test.tocsr() self.icm = icm.tocsr() self.initialize_components() def initialize_components(self): self.item_cosineCF_recommender = Cosine_Similarity(self.train, topK=200, shrink=15, normalize=True, mode='cosine') self.user_cosineCF_recommender = Cosine_Similarity(self.train.T, topK=200, shrink=15, normalize=True, mode='cosine') self.item_bpr_recommender = SLIM_BPR_Cython(self.train, positive_threshold=0) self.svd_recommender = PureSVDRecommender(self.train) self.cbf_bpr_recommender = SLIM_BPR_Cython(self.icm.T, positive_threshold=0) self.cbf_recommender = Cosine_Similarity(self.icm.T, topK=50, shrink=10, normalize=True, mode='cosine') if self.ensemble_weights["IALS"] == 0: self.ials_recommender = IALS_numpy(iters=0) else: self.ials_recommender = IALS_numpy() def fit(self): self.item_bpr_w = self.item_bpr_recommender.fit(epochs=10, topK=200, batch_size=200, sgd_mode='adagrad', learning_rate=1e-2) self.svd_latent_x, self.svd_latent_y = self.svd_recommender.fit( num_factors=500) self.cbf_bpr_w = self.cbf_bpr_recommender.fit(epochs=10, topK=200, batch_size=200, sgd_mode='adagrad', learning_rate=1e-2) self.item_cosineCF_w = self.item_cosineCF_recommender.compute_similarity( ) self.user_cosineCF_w = self.user_cosineCF_recommender.compute_similarity( ) self.cbf_w = self.cbf_recommender.compute_similarity() self.ials_latent_x, self.ials_latent_y = self.ials_recommender.fit( R=self.train) self.min_ials = np.dot(self.ials_latent_x, self.ials_latent_y.T).min() self.min_svd = np.dot(self.svd_latent_x, self.svd_latent_y).min() def recommend(self, user_id, combiner, at=10): user_profile = self.train[user_id, :] item_bpr_r = user_profile.dot(self.item_bpr_w).toarray().ravel() svd_r = self.svd_latent_x[user_id, :].dot(self.svd_latent_y) item_cosineCF_r = user_profile.dot( self.item_cosineCF_w).toarray().ravel() user_cosineCF_r = self.user_cosineCF_w[user_id].dot( self.train).toarray().ravel() cbf_r = user_profile.dot(self.cbf_w).toarray().ravel() cbf_bpr_r = user_profile.dot(self.cbf_bpr_w).toarray().ravel() ials_r = np.dot(self.ials_latent_x[user_id], self.ials_latent_y.T + self.min_ials).ravel() scores = [ [item_bpr_r, self.ensemble_weights["ITEM_BPR"], "ITEM_BPR"], [svd_r, self.ensemble_weights["SVD"], "SVD"], [item_cosineCF_r, self.ensemble_weights["ITEM_CF"], "ITEM_CF"], [user_cosineCF_r, self.ensemble_weights["USER_CF"], "USER_CF"], [ials_r, self.ensemble_weights["IALS"], "IALS"], [cbf_r, self.ensemble_weights["CBF"], "CBF"], [cbf_bpr_r, self.ensemble_weights["CBF_BPR"], "CBF_BPR"] ] for r in scores: self.filter_seen(user_id, r[0]) return combiner.combine(scores, at) def filter_seen(self, user_id, scores): start_pos = int(self.train.indptr[user_id]) end_pos = int(self.train.indptr[user_id + 1]) user_profile = self.train.indices[start_pos:end_pos] scores[user_profile] = -np.inf return scores def recommend_batch(self, user_list, combiner, at=10): res = np.array([]) n = 0 for i in user_list: recList = self.recommend(i, combiner, at).T tuple = np.concatenate(([i], recList)) if (res.size == 0): res = tuple else: res = np.vstack([res, tuple]) return res def get_component_data(self): item_cf_rating = self.ensemble_weights["ITEM_CF"] * self.train.dot( self.item_cosineCF_w) item_cf = { "min": item_cf_rating.min(), "max": item_cf_rating.max(), "mean": item_cf_rating.mean(), } del item_cf_rating user_cf_rating = self.ensemble_weights[ "USER_CF"] * self.user_cosineCF_w.dot(self.train) user_cf = { "min": user_cf_rating.min(), "max": user_cf_rating.max(), "mean": user_cf_rating.mean(), } del user_cf_rating svd_ratings = self.ensemble_weights["SVD"] * ( np.dot(self.svd_latent_x, self.svd_latent_y) + self.min_svd) user_bpr = { "min": svd_ratings.min(), "max": svd_ratings.max(), "mean": svd_ratings.mean(), } del svd_ratings item_bpr_rating = self.ensemble_weights["ITEM_BPR"] * self.train.dot( self.item_bpr_w) item_bpr = { "min": item_bpr_rating.min(), "max": item_bpr_rating.max(), "mean": item_bpr_rating.mean(), } del item_bpr_rating ials_rating = self.ensemble_weights["IALS"] * ( np.dot(self.ials_latent_x, self.ials_latent_y.T) + self.min_ials) ials = { "min": ials_rating.min(), "max": ials_rating.max(), "mean": np.mean(ials_rating), } del ials_rating cbf_rating = self.ensemble_weights["CBF"] * self.train.dot(self.cbf_w) cbf = { "min": cbf_rating.min(), "max": cbf_rating.max(), "mean": cbf_rating.mean(), } del cbf_rating cbf_bpr_rating = self.ensemble_weights["CBF_BPR"] * self.train.dot( self.cbf_bpr_w) cbf_bpr = { "min": cbf_bpr_rating.min(), "max": cbf_bpr_rating.max(), "mean": cbf_bpr_rating.mean(), } del cbf_bpr_rating return { "ITEM_CF": item_cf, "SVD": user_cf, "ITEM_BPR": item_bpr, "USER_BPR": user_bpr, "IALS": ials, "CBF": cbf, "CBF_BPR": cbf_bpr }