def fit(self, topK=100, weights_list=None): """fit :param topK: :param weights_list: The list of weights we assign at each recommender :return: """ if not len(self.Similarities_list) == len(weights_list): raise ValueError("The lists are not the same length") else: self.weights_list = weights_list self.topK = topK W = sum([ np.dot(a, b) for a, b in zip(self.Similarities_list, self.weights_list) ]) if self.sparse_weights: self.W_sparse = similarityMatrixTopK(W, forceSparseOutput=True, k=self.topK) else: self.W = similarityMatrixTopK(W, forceSparseOutput=False, k=self.topK)
def test_similarityMatrixTopK_sparseToSparse(self): numRows = 20 TopK = 5 dense_input = np.random.random((numRows, numRows)) sparse_input = sps.csr_matrix(dense_input) dense_output = similarityMatrixTopK(dense_input, k=TopK, forceSparseOutput=False, inplace=False) sparse_output = similarityMatrixTopK(sparse_input, k=TopK, forceSparseOutput=True) self.assertTrue( np.all((dense_output - sparse_output.todense()) < 1e-6), "sparseToSparse CSR incorrect") sparse_input = sps.csc_matrix(dense_input) sparse_output = similarityMatrixTopK(sparse_input, k=TopK, forceSparseOutput=True) self.assertTrue( np.all((dense_output - sparse_output.todense()) < 1e-6), "sparseToSparse CSC incorrect")
def get_S_incremental_and_set_W(self, similarity_path): self.S_incremental = self.cythonEpoch.get_S() if self.train_with_sparse_weights: if self.tuning: if not os.path.exists(os.getcwd() + similarity_path): self.W_sparse = self.S_incremental self.helper.export_similarity_matrix(os.getcwd() + similarity_path, self.W_sparse, name=RECOMMENDER_NAME) self.W_sparse = self.helper.import_similarity_matrix( os.getcwd() + similarity_path) else: self.W_sparse = self.S_incremental else: if self.tuning: if not os.path.exists(os.getcwd() + similarity_path): self.W_sparse = similarityMatrixTopK(self.S_incremental, k=self.topK) self.helper.export_similarity_matrix(os.getcwd() + similarity_path, self.W_sparse, name=RECOMMENDER_NAME) self.W_sparse = self.helper.import_similarity_matrix( os.getcwd() + similarity_path) else: self.W_sparse = similarityMatrixTopK(self.S_incremental, k=self.topK)
def updateSimilarityMatrix(self): if self.topK != False: if self.sparse_weights: self.W_sparse = similarityMatrixTopK(self.S.T, k=self.topK, forceSparseOutput=True) else: self.W = similarityMatrixTopK(self.S.T, k=self.topK, forceSparseOutput=False) else: if self.sparse_weights: self.W_sparse = sps.csr_matrix(self.S.T) else: self.W = self.S.T
def test_similarityMatrixTopK_denseToSparse(self): numRows = 100 TopK = 20 dense = np.random.random((numRows, numRows)) sparse = similarityMatrixTopK(dense, k=TopK, forceSparseOutput=True) dense = similarityMatrixTopK(dense, k=TopK, forceSparseOutput=False) self.assertTrue( np.equal(dense, sparse.todense()).all(), "denseToSparse incorrect")
def test_cosine_similarity_TopK(self): from Base.Cython.cosine_similarity import Cosine_Similarity as Cosine_Similarity_Cython from Base.cosine_similarity import Compute_Similarity as Cosine_Similarity_Python from Base.cosine_similarity_parallel import Cosine_Similarity_Parallel as Cosine_Similarity_Parallel TopK = 4 data_matrix = np.array([[1, 1, 0, 1], [0, 1, 1, 1], [1, 0, 1, 0]]) data_matrix = sps.csr_matrix(data_matrix) cosine_similarity = Cosine_Similarity_Cython(data_matrix, topK=TopK, normalize=False) W_dense_Cython = cosine_similarity.compute_similarity().toarray() cosine_similarity = Cosine_Similarity_Python(data_matrix, topK=TopK, normalize=False) W_dense_Python = cosine_similarity.compute_similarity().toarray() cosine_similarity = Cosine_Similarity_Parallel(data_matrix, topK=TopK, normalize=False) W_dense_Parallel = cosine_similarity.compute_similarity().toarray() W_dense_mul = data_matrix.T.dot(data_matrix) W_dense_mul[np.arange(W_dense_mul.shape[0]), np.arange(W_dense_mul.shape[0])] = 0.0 W_dense_mul = similarityMatrixTopK(W_dense_mul, k=TopK).toarray() assert np.allclose(W_dense_Cython, W_dense_mul, atol=1e-4), "W_sparse_Cython not matching control" assert np.allclose(W_dense_Python, W_dense_mul, atol=1e-4), "W_dense_Python not matching control" assert np.allclose(W_dense_Parallel, W_dense_mul, atol=1e-4), "W_dense_Parallel not matching control"
def fit(self, topK=100, alpha=0.5): self.topK = topK self.alpha = alpha W = self.Similarity_1 * self.alpha + self.Similarity_2 * (1 - self.alpha) self.W_sparse = similarityMatrixTopK(W, k=self.topK).tocsr()
def fit(self, epochs=15): """ Train SLIM wit BPR. If the model was already trained, overwrites matrix S :param epochs: :return: - """ # Initialize similarity with random values and zero-out diagonal self.S = np.random.random( (self.n_items, self.n_items)).astype('float32') self.S[np.arange(self.n_items), np.arange(self.n_items)] = 0 start_time_train = time.time() for currentEpoch in range(epochs): start_time_epoch = time.time() self.epochIteration() print("Epoch {} of {} complete in {:.2f} minutes".format( currentEpoch + 1, epochs, float(time.time() - start_time_epoch) / 60)) print("Train completed in {:.2f} minutes".format( float(time.time() - start_time_train) / 60)) # The similarity matrix is learnt row-wise # To be used in the product URM*S must be transposed to be column-wise self.W = self.S.T self.W = similarityMatrixTopK( self.W, k=20 ) #(k=500, MAP = 0.0174) (k=100, MAP=0.0201) (k=50, MAP = 0.0215) (k=20, MAP = 0.0222 ma quando submittato viene MAP= 0.01321) del self.S
def fit(self, topK=100, epochs=25, lambda_i=0.0025, lambda_j=0.00025, learning_rate=0.05): self.topK = topK self.epochs = epochs self.lambda_i = lambda_i self.lambda_j = lambda_j self.learning_rate = learning_rate self.URM_train = sp.csc_matrix(URM) self.n_users = self.URM_train.shape[0] self.n_items = self.URM_train.shape[1] # Initialize similarity with zero values self.item_item_S = np.zeros((self.n_items, self.n_items), dtype=np.float) start_time_train = time.time() for n_epoch in range(self.epochs): self._run_epoch(n_epoch) print("Train completed in {:.2f} minutes".format( float(time.time() - start_time_train) / 60)) self.W_sparse = similarityMatrixTopK(self.item_item_S, k=self.topK, verbose=False) self.W_sparse = sp.csr_matrix(self.W_sparse) self.RECS = self.URM_train.dot(self.W_sparse)
def fit(self, topK=100, normalize=False): self.topK = topK folder = 'for_sub' if self.submission else 'hybrid_search' filename = 'fors_sub' if self.submission else f'{str(self.seed)}_fold-{str(self.fold)}' # load the models if already trained for that particular seed and fold try: self.__rec1.load_model( f'stored_recommenders/seed_{str(self.seed)}_{folder}/{self.__rec1.RECOMMENDER_NAME}/', filename) print(f"{self.__rec1.RECOMMENDER_NAME} loaded. [seed={self.seed}, fold={self.fold}]") except: print(f"Fitting {self.__rec1.RECOMMENDER_NAME} ... [seed={self.seed}, fold={self.fold}]") self.__rec1.fit(**self.__rec1_keywargs) print(f"done.") self.__rec1.save_model( f'stored_recommenders/seed_{str(self.seed)}_{folder}/{self.__rec1.RECOMMENDER_NAME}/', filename) w_sparse = self.__rec1.W_sparse w_sparse = similarityMatrixTopK(w_sparse, k=self.topK).tocsr() URM_train = self.URM_train.dot(w_sparse) try: self.__rec2 = self.__rec2_class(URM_train, self.ICM_train, verbose=False) except: self.__rec2 = self.__rec2_class(URM_train, verbose=False) print(f"Fitting {self.__rec2.RECOMMENDER_NAME}... [topk={topK}, seed={self.seed}, fold={self.fold}]") self.__rec2.fit(**self.__rec2_keywargs) print(f"Overwriting the URM of the rec2...") self.__rec2.URM_train = self.URM_train print(f"done.")
def test_cosine_similarity_TopK_big(self): from Base.Cython.cosine_similarity import Cosine_Similarity as Cosine_Similarity_Cython from Base.cosine_similarity import Compute_Similarity as Cosine_Similarity_Python from Base.cosine_similarity_parallel import Cosine_Similarity_Parallel as Cosine_Similarity_Parallel n_items = 500 n_users = 1000 TopK = n_items data_matrix = sps.random(n_users, n_items, density=0.1) cosine_similarity = Cosine_Similarity_Cython(data_matrix, topK=TopK, normalize=False) W_dense_Cython = cosine_similarity.compute_similarity().toarray() cosine_similarity = Cosine_Similarity_Python(data_matrix, topK=TopK, normalize=False) W_dense_Python = cosine_similarity.compute_similarity().toarray() cosine_similarity = Cosine_Similarity_Parallel(data_matrix, topK=TopK, normalize=False) W_dense_Parallel = cosine_similarity.compute_similarity().toarray() W_dense_mul = data_matrix.T.dot(data_matrix) W_dense_mul[np.arange(W_dense_mul.shape[0]), np.arange(W_dense_mul.shape[0])] = 0.0 W_dense_mul = similarityMatrixTopK(W_dense_mul, k=TopK).toarray() assert np.allclose(W_dense_Cython, W_dense_mul, atol=1e-4), "W_sparse_Cython not matching control" assert np.allclose(W_dense_Python, W_dense_mul, atol=1e-4), "W_dense_Python not matching control" assert np.allclose(W_dense_Parallel, W_dense_mul, atol=1e-4), "W_dense_Parallel not matching control"
def fit(self, topK=100, alpha=0.5): self.topK = topK self.alpha = alpha W = self.Similarity_1 * self.alpha + self.Similarity_2 * (1 - self.alpha) if self.sparse_weights: self.W_sparse = similarityMatrixTopK(W, forceSparseOutput=True, k=self.topK) else: self.W = similarityMatrixTopK(W, forceSparseOutput=False, k=self.topK)
def fit(self, similarities, weights=None, topK=100, normalize_weights=True): # Initialize weights array if not already initialized if weights is None: weights = np.array([1 for _ in similarities]) # Checking the input parameters are well formatted assert len(similarities) == len(weights) assert len(similarities) > 0 # Cast weights to numpy array if it is not weights = np.array(weights, dtype=np.float) # Normalize the weights if normalize_weights: weights /= weights.max() # Create a list of pairs (similarity, weight) similarity_and_weight = zip(similarities, weights) # Initialize the result W_sparse = sps.csr_matrix(similarities[0].shape, dtype=np.float) # Compute the new Similarity matrix for similarity, weight in similarity_and_weight: W_sparse += (similarity * weight) self.W_sparse = similarityMatrixTopK(W_sparse, k=topK) self.W_sparse = check_matrix(self.W_sparse, format='csr')
def fit(self, topK=100, epochs=25, lambda_i=0.0025, lambda_j=0.00025, learning_rate=0.05): """ :param topK: :param epochs: :param lambda_i: :param lambda_j: :param learning_rate: :return: """ # Initialize similarity with zero values self.item_item_S = np.zeros((self.n_items, self.n_items), dtype=np.float) self.lambda_i = lambda_i self.lambda_j = lambda_j self.learning_rate = learning_rate start_time_train = time.time() for n_epoch in range(epochs): self._run_epoch(n_epoch) print("Train completed in {:.2f} minutes".format( float(time.time() - start_time_train) / 60)) self.W_sparse = similarityMatrixTopK(self.item_item_S, k=topK, verbose=False) self.W_sparse = sps.csr_matrix(self.W_sparse)
def fit(self, item_weights, URM_train, selectTopK=False): self.URM_train = check_matrix(URM_train, format='csc') if self.URM_train.shape[1] != item_weights.shape[0]: raise ValueError( "ItemKNNCustomSimilarityRecommender: URM_train and item_weights matrices are not consistent. " "The number of columns in URM_train must be equal to the rows in item_weights." "Current shapes are: URM_train {}, item_weights {}".format( self.URM_train.shape, item_weights.shape)) if item_weights.shape[0] != item_weights.shape[1]: raise ValueError( "ItemKNNCustomSimilarityRecommender: item_weights matrice is not square. " "Current shape is {}".format(item_weights.shape)) # If no topK selection is required, just save the similarity if not selectTopK: if isinstance(item_weights, np.ndarray): self.W = item_weights self.sparse_weights = False else: self.W_sparse = check_matrix(item_weights, format='csr') self.sparse_weights = True return else: self.W_sparse = similarityMatrixTopK(item_weights, forceSparseOutput=True, k=self.topK) self.sparse_weights = True
def test_similarityMatrixTopK_sparseToSparse(self): numRows = 20 TopK = 5 dense_input = np.random.random((numRows, numRows)) topk_on_dense_input = similarityMatrixTopK(dense_input, k=TopK) sparse_input = sps.csc_matrix(dense_input) topk_on_sparse_input = similarityMatrixTopK(sparse_input, k=TopK) topk_on_dense_input = topk_on_dense_input.toarray() topk_on_sparse_input = topk_on_sparse_input.toarray() self.assertTrue(np.allclose(topk_on_dense_input, topk_on_sparse_input), "sparseToSparse CSC incorrect")
def fit(self, topK=100, alpha=0.5): self.topK = topK self.alpha = alpha W_sparse = self.Similarity_1 * self.alpha + self.Similarity_2 * (1 - self.alpha) self.W_sparse = similarityMatrixTopK(W_sparse, k=self.topK) self.W_sparse = check_matrix(self.W_sparse, format="csr")
def fit(self, topK=100, alpha = 0.5, beta = 0.5): self.topK = topK self.alpha = alpha self.beta = beta W_sparse = self.Similarity_1*self.alpha + self.Similarity_2*self.beta + self.Similarity_3*(1-self.alpha-self.beta) self.W_sparse = similarityMatrixTopK(W_sparse, k=self.topK) self.W_sparse = check_matrix(self.W_sparse, format='csr')
def get_S_incremental_and_set_W(self): self.S_incremental = self.cythonEpoch.get_S() if self.train_with_sparse_weights: self.W_sparse = self.S_incremental self.W_sparse = check_matrix(self.W_sparse, format="csr") else: self.W_sparse = similarityMatrixTopK(self.S_incremental, k=self.topK) self.W_sparse = check_matrix(self.W_sparse, format="csr")
def fit(self, topK=10, alpha=0.5, beta=0.3, gamma=0.2, delta=0.1): self.topK = topK self.alpha = alpha self.beta = beta self.gamma = gamma self.delta = delta W = self.Similarity_1 * self.alpha + self.Similarity_2 * self.beta + self.Similarity_3 * self.gamma + \ self.Similarity_4 * self.delta self.W_sparse = similarityMatrixTopK(W, k=self.topK).tocsr()
def fit(self, topK=None, l2_norm=1e3, normalize_matrix=False, verbose=True): self.verbose = verbose start_time = time.time() self._print("Fitting model... ") if normalize_matrix: # Normalize rows and then columns self.URM_train = normalize(self.URM_train, norm="l2", axis=1) self.URM_train = normalize(self.URM_train, norm="l2", axis=0) self.URM_train = sps.csr_matrix(self.URM_train) # Grahm matrix is X X^t, compute dot product similarity = Compute_Similarity( self.URM_train, shrink=0, topK=self.URM_train.shape[1], normalize=False, similarity="cosine", ) grahm_matrix = similarity.compute_similarity().toarray() diag_indices = np.diag_indices(grahm_matrix.shape[0]) grahm_matrix[diag_indices] += l2_norm P = np.linalg.inv(grahm_matrix) B = P / (-np.diag(P)) B[diag_indices] = 0.0 new_time_value, new_time_unit = seconds_to_biggest_unit(time.time() - start_time) self._print("Fitting model... done in {:.2f} {}".format( new_time_value, new_time_unit)) # Check if the matrix should be saved in a sparse or dense format # The matrix is sparse, regardless of the presence of the topK, if nonzero cells are less than sparse_threshold_quota % if topK is not None: B = similarityMatrixTopK(B, k=topK, verbose=False) if self._is_content_sparse_check(B): self._print("Detected model matrix to be sparse, changing format.") self.W_sparse = check_matrix(B, format="csr", dtype=np.float32) else: self.W_sparse = check_matrix(B, format="npy", dtype=np.float32) self._W_sparse_format_checked = True self._compute_item_score = self._compute_score_W_dense
def fit(self, learning_rate=0.01, epochs=50, k=100): self.learning_rate = learning_rate self.epochs = epochs for numEpoch in range(self.epochs): self.epochIteration() self.similarity_matrix = self.similarity_matrix.T self.similarity_matrix = similarityMatrixTopK(self.similarity_matrix, k=k)
def fit(self, alpha=0.5, l1_ratio=0.5, topK=100): self.__a = alpha * l1_ratio self.__b = alpha - self.__a self.__c = 1 - self.__a - self.__b self.topK = topK W = self.__W1 * self.__a \ + self.__W2 * self.__b \ + self.__W3 * self.__c self.W_sparse = similarityMatrixTopK(W, k=self.topK).tocsr()
def fit(self, topK=None, alpha=0.5): self.topK = topK self.alpha = alpha W_sparse = self.Similarity_1 * self.alpha W_sparse += self.Similarity_2 * (1 - self.alpha) self.W_sparse = W_sparse if topK is not None: self.W_sparse = similarityMatrixTopK(W_sparse, k=self.topK) self.W_sparse = check_matrix(self.W_sparse, format='csr')
def get_S_incremental_and_set_W(self): self.S_incremental = self.cythonEpoch.get_S() if self.train_with_sparse_weights: self.W_sparse = self.S_incremental else: if self.sparse_weights: self.W_sparse = similarityMatrixTopK(self.S_incremental, k=self.topK) else: self.W = self.S_incremental
def fit(self, topK=None, l2_norm=1e3, normalize_matrix=False, verbose=True): self.verbose = verbose start_time = time.time() self._print("Fitting model... ") if normalize_matrix: # Normalize rows and then columns self.URM_train = normalize(self.URM_train, norm='l2', axis=1) self.URM_train = normalize(self.URM_train, norm='l2', axis=0) self.URM_train = sps.csr_matrix(self.URM_train) # Grahm matrix is X^t X, compute dot product similarity = Compute_Similarity(self.URM_train, shrink=0, topK=self.URM_train.shape[1], normalize=False, similarity="cosine") grahm_matrix = similarity.compute_similarity().toarray() diag_indices = np.diag_indices(grahm_matrix.shape[0]) # The Compute_Similarity object ensures the diagonal of the similarity matrix is zero # in this case we need the diagonal as well, which is just the item popularity item_popularity = np.ediff1d(self.URM_train.tocsc().indptr) grahm_matrix[diag_indices] = item_popularity + l2_norm P = np.linalg.inv(grahm_matrix) B = P / (-np.diag(P)) B[diag_indices] = 0.0 new_time_value, new_time_unit = seconds_to_biggest_unit(time.time() - start_time) self._print("Fitting model... done in {:.2f} {}".format( new_time_value, new_time_unit)) if topK is None: self.W_sparse = B self._W_sparse_format_checked = True self._compute_item_score = self._compute_score_W_dense else: self.W_sparse = similarityMatrixTopK(B, k=topK, verbose=False) self.W_sparse = sps.csr_matrix(self.W_sparse)
def test_similarityMatrixTopK_denseToDense(self): numRows = 100 TopK = 20 dense_input = np.random.random((numRows, numRows)) dense_output = similarityMatrixTopK(dense_input, k=TopK) numExpectedNonZeroCells = TopK * numRows numNonZeroCells = np.sum(dense_output != 0) self.assertEqual(numExpectedNonZeroCells, numNonZeroCells, "DenseToDense incorrect")
def fit(self, W_sparse, selectTopK=False, topK=100): assert W_sparse.shape[0] == W_sparse.shape[1],\ "ItemKNNCustomSimilarityRecommender: W_sparse matrice is not square. Current shape is {}".format(W_sparse.shape) assert self.URM_train.shape[1] == W_sparse.shape[0],\ "ItemKNNCustomSimilarityRecommender: URM_train and W_sparse matrices are not consistent. " \ "The number of columns in URM_train must be equal to the rows in W_sparse. " \ "Current shapes are: URM_train {}, W_sparse {}".format(self.URM_train.shape, W_sparse.shape) if selectTopK: W_sparse = similarityMatrixTopK(W_sparse, k=topK) self.W_sparse = check_matrix(W_sparse, format='csr')
def fit(self, alpha=0.5, l1_ratio=0.5, topK=100): self.__a = alpha * l1_ratio self.__b = alpha - self.__a self.__c = 1 - self.__a - self.__b self.topK = topK W1_max = self.__rec1.W_sparse.max() W2_max = self.__rec2.W_sparse.max() W3_max = self.__rec3.W_sparse.max() W1 = self.__rec1.W_sparse W2 = self.__rec2.W_sparse W3 = self.__rec3.W_sparse if W1_max != 0: W1 = W1 / W1_max if W2_max != 0: W2 = W2 / W2_max if W3_max != 0: W3 = W3 / W3_max W = W1 * self.__a + W2 * self.__b + W3 * self.__c self.W_sparse = similarityMatrixTopK(W, k=self.topK).tocsr()
def fit(self, alpha=0.5, topK=100): self.alpha = alpha self.topK = topK W = self.rec1_W_sparse * self.alpha + self.rec2_W_sparse * (1 - self.alpha) self.W_sparse = similarityMatrixTopK(W, k=self.topK).tocsr()