def fit(self): self.URM = apply_feature_weighting(self.URM, self.feature_weighting, K=self.K, B=self.B) self._train()
def fit(self, verbose=True): self.URM = apply_feature_weighting(self.URM, self.feature_weighting) # URM matrix is 0-1s already URM_train_positive = self.URM.copy() self.cythonEpoch = SLIM_BPR_Cython_Epoch( URM_train_positive, train_with_sparse_weights=self.train_with_sparse_weights, final_model_sparse_weights=True, topK=self.topK, learning_rate=self.learning_rate, li_reg=self.lambda_i, lj_reg=self.lambda_j, batch_size=1, symmetric=self.symmetric, sgd_mode=self.sgd_mode, verbose=verbose, random_seed=None, gamma=self.gamma, beta_1=self.beta_1, beta_2=self.beta_2) # MAIN LOOP of training convergence = False best_MAP = 0 epochs_current = 0 lower_epochs = 0 while epochs_current < self.epochs and not convergence: # run an epoch self.cythonEpoch.epochIteration_Cython() if self.patience != None: # prepare for validation self.get_S_set_W_set_predicted_URM() _, _, MAP = utils.evaluate.evaluate_algorithm( self.URM_test, self) if MAP > best_MAP: # best model so far print( "Found new best MAP! Epoch:{}, New MAP:{}, Old MAP:{}". format(epochs_current, MAP, best_MAP)) self.S_best = self.S_incremental.copy() best_MAP = MAP lower_epochs = 0 else: # one more run without improvements lower_epochs += 1 if lower_epochs > self.patience: convergence = True epochs_current += 1 if self.patience != None: # Restore best model so far: self.use_Sbest_set_W_set_predicted_URM() else: self.get_S_set_W_set_predicted_URM() self.cythonEpoch._dealloc() sys.stdout.flush()
def fit(self): self.Cui = apply_feature_weighting(self.URM, self.feature_weighting, K=self.K, B=self.B) """ Fits the ALS MF model """ self._train()
def fit(self): self.URM = apply_feature_weighting(self.URM, self.feature_weighting, K=self.K, B=self.B) similarity_object = Compute_Similarity(self.URM, shrink=self.shrink, topK=self.topK, normalize=self.normalize, similarity=self.similarity, asymmetric_alpha=self.asymmetric_alpha) self.W_sparse = similarity_object.compute_similarity() # Precompute URM self.predicted_URM = self.URM.dot(self.W_sparse)
def fit(self, model_name='als', verbose=True): self.URM = apply_feature_weighting(self.URM, self.feature_weighting, K=self.K, B=self.B) """ train the ALS model using the implicit module """ start_time = time.time() # creates ALS model if model_name == 'als': self.model = impl.als.AlternatingLeastSquares( factors=self.latent_factors, regularization=self.lambda_val, iterations=self.iterations) elif model_name == 'nmslibals': self.model = impl.approximate_als.NMSLibAlternatingLeastSquares( factors=self.latent_factors, regularization=self.lambda_val, iterations=self.iterations, ) elif model_name == 'faissals': self.model = impl.approximate_als.FaissAlternatingLeastSquares( factors=self.latent_factors, regularization=self.lambda_val, iterations=self.iterations) elif model_name == 'annoyals': self.model = impl.approximate_als.AnnoyAlternatingLeastSquares( factors=self.latent_factors, regularization=self.lambda_val, iterations=self.iterations) else: exit('Invalid model name') # fit the ALS model # since the model is expecting a item-user matrix we need to pass the transpose of URM A = self.URM.T.copy() self.model.fit(A) # gets the results of the training self.user_factors = self.model.user_factors self.item_factors = self.model.item_factors if verbose: print("IMPLICIT ALS training computed in {:.2f} seconds".format( time.time() - start_time))
def fit(self): self.URM = apply_feature_weighting(self.URM, self.feature_weighting, K=self.K, B=self.B) # # if X.dtype != np.float32: # print("P3ALPHA fit: For memory usage reasons, we suggest to use np.float32 as dtype for the dataset") if self.min_rating > 0: self.URM.data[self.URM.data < self.min_rating] = 0 self.URM.eliminate_zeros() if self.implicit: self.URM.data = np.ones(self.URM.data.size, dtype=np.float32) #Pui is the row-normalized urm Pui = normalize(self.URM, norm='l1', axis=1) #Piu is the column-normalized, "boolean" urm transposed X_bool = self.URM.transpose(copy=True) X_bool.data = np.ones(X_bool.data.size, np.float32) #ATTENTION: axis is still 1 because i transposed before the normalization Piu = normalize(X_bool, norm='l1', axis=1) del(X_bool) # Alfa power if self.alpha != 1.: Pui = Pui.power(self.alpha) Piu = Piu.power(self.alpha) # Final matrix is computed as Pui * Piu * Pui # Multiplication unpacked for memory usage reasons block_dim = 200 d_t = Piu # Use array as it reduces memory requirements compared to lists dataBlock = 10000000 rows = np.zeros(dataBlock, dtype=np.int32) cols = np.zeros(dataBlock, dtype=np.int32) values = np.zeros(dataBlock, dtype=np.float32) numCells = 0 start_time = time.time() start_time_printBatch = start_time for current_block_start_row in range(0, Pui.shape[1], block_dim): if current_block_start_row + block_dim > Pui.shape[1]: block_dim = Pui.shape[1] - current_block_start_row similarity_block = d_t[current_block_start_row:current_block_start_row + block_dim, :] * Pui similarity_block = similarity_block.toarray() for row_in_block in range(block_dim): row_data = similarity_block[row_in_block, :] row_data[current_block_start_row + row_in_block] = 0 best = row_data.argsort()[::-1][:self.topK] notZerosMask = row_data[best] != 0.0 values_to_add = row_data[best][notZerosMask] cols_to_add = best[notZerosMask] for index in range(len(values_to_add)): if numCells == len(rows): rows = np.concatenate((rows, np.zeros(dataBlock, dtype=np.int32))) cols = np.concatenate((cols, np.zeros(dataBlock, dtype=np.int32))) values = np.concatenate((values, np.zeros(dataBlock, dtype=np.float32))) rows[numCells] = current_block_start_row + row_in_block cols[numCells] = cols_to_add[index] values[numCells] = values_to_add[index] numCells += 1 if time.time() - start_time_printBatch > 60: print("Processed {} ( {:.2f}% ) in {:.2f} minutes. Rows per second: {:.0f}".format( current_block_start_row, 100.0 * float(current_block_start_row) / Pui.shape[1], (time.time() - start_time) / 60, float(current_block_start_row) / (time.time() - start_time))) sys.stdout.flush() sys.stderr.flush() start_time_printBatch = time.time() self.W_sparse = sp.csr_matrix((values[:numCells], (rows[:numCells], cols[:numCells])), shape=(Pui.shape[1], Pui.shape[1])) if self.normalize_similarity: self.W_sparse = normalize(self.W_sparse, norm='l1', axis=1) if self.topK != False: self.W_sparse = similarityMatrixTopK(self.W_sparse, k=self.topK) self.W_sparse = check_matrix(self.W_sparse, format='csr') # Precompute URM self.predicted_URM = self.URM.dot(self.W_sparse)