def model(): '''computes p@k and map@k evaluation metrics and saves model''' sparse_item_user = load_npz( "/Users/maxmaiberger/Documents/board-game-recommender/import/Data/test_data_saved/sparse_item_user.npz" ) train, test = train_test_split(sparse_item_user, train_percentage=0.8) model = implicit.als.AlternatingLeastSquares(factors=100, regularization=0.1, iterations=20, calculate_training_loss=False) model.fit(train) with open( '/Users/maxmaiberger/Documents/board-game-recommender/import/Data/test_data_saved/model.sav', 'wb') as pickle_out: pickle.dump(model, pickle_out) p_at_k = precision_at_k(model, train_user_items=train, test_user_items=test, K=10) m_at_k = mean_average_precision_at_k(model, train, test, K=10) print('precision at k:', p_at_k) print('mean average precision at k:', m_at_k) return p_at_k, m_at_k
def fit(self, list_name, save=True): self.product_user_matrix, self.user_mappings = self.create_item_user_matrix_light(list_name, self.max_users, self.max_products) #movies, ratings = implicit.datasets.movielens.get_movielens("1m") train, test = train_test_split(self.product_user_matrix,0.8) self.model = implicit.als.AlternatingLeastSquares(factors=100, iterations=15) # train the model on a sparse matrix of item/user/confidence weights #self.model.fit(self.product_user_matrix) self.model.fit(train) p = precision_at_k(self.model, train.T.tocsr(), test.T.tocsr(), K=10, num_threads=4) print(p) if(save): saveobj = { 'product_user_matrix': self.product_user_matrix, 'model': self.model, 'user_mappings': self.user_mappings, 'product_mappings': self.product_mappings, 'max_users' : self.max_users, 'index_to_product_id' : self.index_to_product_id, 'max_products' : self.max_products #'product_user_matrix_lil' : self. } save_object(saveobj, self.save_path) return self.model
def model(sparse_user_item_file_path='files/sparse_user_item.npz'): """Computes p@k and map@k evaluation mettrics and saves model. Args: sparse_user_item_file_path (str): file location for a scipy.sparse.csr_matrix sparse user * item matrix Returns: p_at_k (float): precision @ k recommendations, with k=10 m_at_k (float): mean average precision @ k recommendations, with k=10 """ sparse_user_item = load_npz(sparse_user_item_file_path) train, test = train_test_split(sparse_user_item, train_percentage=0.8) model = implicit.als.AlternatingLeastSquares(factors=100, regularization=0.1, iterations=100, calculate_training_loss=False) model.fit(train) with open('files/model.sav', 'wb') as pickle_out: pickle.dump(model, pickle_out) p_at_k = precision_at_k(model, train_user_items=train, test_user_items=test, K=10) map_at_k = mean_average_precision_at_k(model, train, test, K=10) return p_at_k, map_at_k
def evaluate_bpr_model(hyperparameters, train, test, validation): h = hyperparameters model = BayesianPersonalizedRanking(factors=h['factors'], iterations=h['n_iter'], num_threads=nproc) model.fit(train) test_eval = { 'p@k': precision_at_k(model, train.T.tocsr(), test.T.tocsr(), K=10) } val_eval = { 'p@k': precision_at_k(model, train.T.tocsr(), validation.T.tocsr(), K=10) } return test_eval, val_eval
def evaluate_lmf_model(hyperparameters, train, test, validation): h = hyperparameters model = LogisticMatrixFactorization(factors=h['factors'], iterations=h['n_iter'], num_threads=nproc) model.fit(train) test_eval = { 'p@k': precision_at_k(model, train.T.tocsr(), test.T.tocsr(), K=10) } val_eval = { 'p@k': precision_at_k(model, train.T.tocsr(), validation.T.tocsr(), K=10) } return test_eval, val_eval
def evaluate_als_model(hyperparameters, train, test, validation): h = hyperparameters model = AlternatingLeastSquares(factors=h['factors'], iterations=h['n_iter'], num_threads=nproc) model.fit(train) test_eval = { 'p@k': precision_at_k(model, train.T.tocsr(), test.T.tocsr(), K=10) } val_eval = { 'p@k': precision_at_k(model, train.T.tocsr(), validation.T.tocsr(), K=10) } return test_eval, val_eval
def evaloutput(self, K=10): with open('model_NR.sav', 'rb') as pickle_in: model = pickle.load(pickle_in) sparse_item_user = load_npz("sparse_user_item_NR.npz") train, test = train_test_split(sparse_item_user, train_percentage=0.8) #p_at_k = precision_at_k(model, K, train_user_items=train, test_user_items=test) print("test", test.shape) print("train", train.shape) p_at_k = precision_at_k(model, train, test, K) m_at_k = mean_average_precision_at_k(model, train, test, K) return p_at_k, m_at_k
def test_evaluation(self): item_users = self.get_checker_board(50) user_items = item_users.T.tocsr() model = self._get_model() model.fit(item_users, show_progress=False) # we've withheld the diagnoal for testing, and have verified that in test_recommend # it is returned for each user. So p@1 should be 1.0 p = precision_at_k(model, user_items.tocsr(), csr_matrix(np.eye(50)), K=1, show_progress=False) self.assertEqual(p, 1)
def test_evaluation(self): item_users = self.get_checker_board(50) user_items = item_users.T.tocsr() model = self._get_model() model.fit(item_users, show_progress=False) # we've withheld the diagnoal for testing, and have verified that in test_recommend # it is returned for each user. So p@1 should be 1.0 p = precision_at_k(model, user_items.tocsr(), csr_matrix(np.eye(50)), K=1, show_progress=False) self.assertEqual(p, 1)
def evaluate_model(model_name="als"): """evaluate the model by cross-validation""" # train the model based off input params artists, users, plays = get_twitter() # create a model from the input data model = get_model(model_name) # split data_set to train set and testing set train, testing = train_test_split(plays) # evaluation result = precision_at_k(model=model, train_user_items=train, test_user_items=testing) print('precision@k = ', result)
def get_train(self, train_config: MatrixTrainingConfig, report_test: Optional[bool] = True, test_df=None, overwrite=False): if self.model and not overwrite: raise Exception( 'Already trained and does not allow overwrite (consider access via model instance).' ) assert train_config, 'train configuration has to be provided.' if report_test: logger.info('-- Performing MM sanity check on {} {}'.format( self.col1, self.col2)) if test_df is None: if train_config.random_state: np.random.seed(train_config.random_state) train_csr, test_csr = train_test_split( self.coo, train_percentage=train_config.train_percentage) else: assert len(test_df) > 0 train_csr = self.coo test_csr = self._to_coo(test_df) _model = implicit.als.AlternatingLeastSquares( factors=train_config.factor, regularization=train_config.regularization, iterations=train_config.iterations) _model.fit(train_csr * train_config.conf_scale) prec = precision_at_k(_model, train_csr.T, test_csr.T, K=train_config.top_n) logger.warning('ACCURACY REPORT at top {}: {:.5f}%'.format( train_config.top_n, prec * 100)) if train_config.safe_pass: assert prec > train_config.safe_pass # training on complete matrix logger.info('Training on complete matrix') _model = implicit.als.AlternatingLeastSquares( factors=train_config.factor, regularization=train_config.regularization, iterations=train_config.iterations) _model.fit(self.coo * train_config.conf_scale) self.model = _model
def model(): '''computes p@k and map@k evaluation mettrics and saves model''' sparse_item_user = load_npz("sparse_item_user.npz") train, test = train_test_split(sparse_item_user, train_percentage=0.8) model = implicit.als.AlternatingLeastSquares(factors=100, regularization=0.1, iterations=20, calculate_training_loss=False) model.fit(train) with open('model.sav', 'wb') as pickle_out: pickle.dump(model, pickle_out) p_at_k = precision_at_k(model, train_user_items=train, test_user_items=test, K=10) m_at_k = mean_average_precision_at_k(model, train, test, K=10) return p_at_k, m_at_k
def train_test_split_recommend(self,model,user_item_csr,user_lookup,user_id): train, test = train_test_split(user_item_csr) # train the model on a sparse matrix of item/user/confidence weights model.fit(train.T.tocsr()) """Calculate Precision@N & NDCG@N""" precision = precision_at_k(model, train, test, K=20) ndcg = ndcg_at_k(model, train, test, K=20) print('Precision@20: {0}\n NDCG@20: {1}\n'.format(precision, ndcg)) """Recommend items to every user""" top_rec_4all = model.recommend_all(test,filter_already_liked_items=True) top_rec_4all = top_rec_4all.T # top_rec_4all = pd.DataFrame(top_rec_4all) top_rec_4all = pd.DataFrame(data=top_rec_4all, columns=user_lookup.index.categories) print('Recommendations Dataframe:\n{}'.format(top_rec_4all)) top_products = top_rec_4all[user_id] return top_products
def learningCurve(model, train, test, epochs, outFile=None, k=5, showProgress=True, numThreads=12): # if not userIndex: # userIndex = range(train.shape[0]) prevEpoch = 0 pAtK = [] MAPatK = [] NDCGatK = [] AUCatK = [] headers = ["epochs", f"p@{k}", f"MAP@{k}", f"NDCG@{k}", f"AUC@{k}"] printLog(headers, header=True, outFile=outFile) for epoch in epochs: model.iterations = epoch - prevEpoch if not hasattr(model, "user_vectors"): model.fit(train, show_progress=showProgress) else: model.fit_partial(train, show_progress=showProgress) pAtK.append(precision_at_k(model, train.T.tocsr(), test.T.tocsr(), K=k, show_progress=showProgress, num_threads=numThreads)) MAPatK.append(mean_average_precision_at_k(model, train.T.tocsr(), test.T.tocsr(), K=k, show_progress=showProgress, num_threads=numThreads)) NDCGatK.append(ndcg_at_k(model, train.T.tocsr(), test.T.tocsr(), K=k, show_progress=showProgress, num_threads=numThreads)) AUCatK.append(AUC_at_k(model, train.T.tocsr(), test.T.tocsr(), K=k, show_progress=showProgress, num_threads=numThreads)) row = [epoch, pAtK[-1], MAPatK[-1], NDCGatK[-1], AUCatK[-1]] printLog(row, outFile=outFile) prevEpoch = epoch return model, pAtK, MAPatK, NDCGatK, AUCatK
print("Training model") print(asctime(localtime()), flush=True) t0 = time() model.fit(train, show_progress=args.progressBar) print(f"Δt: {time() - t0:5.1f}s", flush=True) trainTscr = train.T.tocsr() testTscr = test.T.tocsr() k = args.k print(f"Computing p@{k} ...", flush=True) t0 = time() pAtK = precision_at_k(model, trainTscr, testTscr, K=k, show_progress=args.progressBar, num_threads=args.numThreads) print(f"Δt: {time() - t0:5.1f}s") print(f"Computing MAP@{k} ...", flush=True) t0 = time() MAPatK = mean_average_precision_at_k(model, trainTscr, testTscr, K=k, show_progress=args.progressBar, num_threads=args.numThreads) print(f"Δt: {time() - t0:5.1f}s") print(f"Computing NDCG@{k} ...", flush=True) t0 = time() NDCGatK = ndcg_at_k(model, trainTscr, testTscr, K=k, show_progress=args.progressBar, num_threads=args.numThreads) AUCatK = AUC_at_k(model, trainTscr, testTscr, K=k, show_progress=args.progressBar,
def run(modelName, datasetName, factorCt, k, λ, α, maxIters, showProgress, useGPU, threadCt): if modelName == 'als': model = getModel(modelName, volubility=2, params={'factors': factorCt, 'regularization': λ, 'iterations': maxIters, 'use_gpu': useGPU}) else: model = getModel(modelName, volubility=2, params={'factors': factorCt, 'regularization': λ, 'alpha': α, 'iterations': maxIters, 'use_gpu': useGPU}) artists, users, plays = fetchDataset(datasetName, volubility=2) print(artists.shape, users.shape, plays.shape, flush=True) if issubclass(model.__class__, AlternatingLeastSquares): # lets weight these models by bm25weight. print("weighting matrix by bm25_weight") plays = bm25_weight(plays, K1=100, B=0.8) # also disable building approximate recommend index model.approximate_recommend = False # print(asctime(localtime())) # t0 = time() plays = plays.tocsr() # print(f"Δt: {time() - t0:5.1f}s") train, test = train_test_split(plays, train_percentage=0.8) print("Training model") print(asctime(localtime()), flush=True) t0 = time() model.fit(train, show_progress=showProgress) print(f"Δt: {time() - t0:5.1f}s", flush=True) trainTscr = train.T.tocsr() testTscr = test.T.tocsr() print(f"Computing p@{k} ...", flush=True) t0 = time() pAtK = precision_at_k(model, trainTscr, testTscr, K=k, show_progress=showProgress, num_threads=threadCt) ex.log_scalar(f"p@{k}", pAtK) print(f"Δt: {time() - t0:5.1f}s") print(f"Computing MAP@{k} ...", flush=True) t0 = time() MAPatK = mean_average_precision_at_k(model, trainTscr, testTscr, K=k, show_progress=showProgress, num_threads=threadCt) ex.log_scalar(f"MAP@{k}", MAPatK) print(f"Δt: {time() - t0:5.1f}s") print(f"Computing NDCG@{k} ...", flush=True) t0 = time() NDCGatK = ndcg_at_k(model, trainTscr, testTscr, K=k, show_progress=showProgress, num_threads=threadCt) ex.log_scalar(f"NDCG@{k}", NDCGatK) AUCatK = AUC_at_k(model, trainTscr, testTscr, K=k, show_progress=showProgress, num_threads=threadCt) ex.log_scalar(f"AUC@{k}", AUCatK) print(f"Δt: {time() - t0:5.1f}s") print(f"p@{k}: {pAtK:6.4f}, MAP@{k}: {MAPatK:6.4f}" f"NDCG@{k}: {NDCGatK:6.4f}, AUC@{k}: {AUCatK:6.4f}", flush=True)
def train_evaluate_als_model(csr_prd_cli_matrix): """ Define, fit and tune ALS model Returns an optimized instance of the implicit-ALS model. Implements a Grid Search over some hyperparamters. Uses Precision@K as the evaluation metric, analyzing 10% of the data given. Parameters ---------- csr_prd_cli_matrix: scipy.csr_matrix Sparse CSR representation of df_long, with shape prd_col x cli_col. Returns ------- model: implicit.als.AlternatingLeastSquares model """ params = { 'factors': [50, 100, 150], 'regularization': [0.01, 0.05, 0.1], 'dtype': [npfloat64], 'use_native': [True], 'use_cg': [False], 'use_gpu': [False], 'iterations': [15, 30, 50], 'num_threads': [0], 'random_state': [42] } param_grid = ParameterGrid(params) df_grid, df_test = train_test_split(csr_prd_cli_matrix, train_percentage=0.8) df_train, df_eval = train_test_split(df_grid, train_percentage=0.8) eval_k_size = int(df_eval.shape[0] * 0.1) test_k_size = int(df_test.shape[0] * 0.1) grid_score = {} for i, grid in enumerate(param_grid): m = AlternatingLeastSquares(**grid) m.fit(df_train, show_progress=False) score = precision_at_k(m, df_train, df_eval, K=eval_k_size, num_threads=0, show_progress=False) grid_score[i] = score print('Best evaluation Mean Average Precision (@ K={}): {}'.format( eval_k_size, pd.Series(grid_score).max())) best = pd.Series(grid_score).idxmax() best_params = param_grid[best] model = AlternatingLeastSquares(**best_params) model.fit(csr_prd_cli_matrix) test_score = precision_at_k(model, df_train, df_test, K=test_k_size, num_threads=0, show_progress=False) print('Best test Mean Average Precision (@ K={}): {}'.format( test_k_size, test_score)) return model
def test(self, train_size=0.8, K=10): train, test = train_test_split(self.product_user_matrix,0.8) p = precision_at_k(self.model, train.T.tocsr(), test.T.tocsr(), K, num_threads=2) print ("precision at K =", K, ":", p)
csr_data, user_lookup, item_lookup = create_sparse_matrix( data, userkey, itemkey) #print(csr_data) csr_data = csr_data.T.tocsr() print(csr_data) train, test = train_test_split(csr_data) print(train, test) #print(user_lookup,item_lookup) """initialize a model --- choose a model""" #model = implicit.als.AlternatingLeastSquares(factors=20,regularization=0.1,iterations=50) model = implicit.als.AlternatingLeastSquares(factors=50) #model = implicit.bpr.BayesianPersonalizedRanking(factors=100) #model = implicit.lmf.LogisticMatrixFactorization(factors=100) #model = implicit.approximate_als.AnnoyAlternatingLeastSquares() print(train.T.tocsr()) """Train the model on a sparse matrix of item/user/confidence weights""" model.fit(train.T.tocsr()) """Evaluation Metrics Calculation""" precision = precision_at_k(model, train, test, K=20) ndcg = ndcg_at_k(model, train, test, K=20) print('Precision@20: {0}\n NDCG@20: {1}\n'.format(precision, ndcg)) """Recommend N best items for each user""" top_rec_4all = model.recommend_all(test, N=20) top_rec_4all = top_rec_4all.T #top_rec_4all = pd.DataFrame(data=top_rec_4all,columns=user_lookup.index.categories) top_rec_4all = pd.DataFrame(data=top_rec_4all, columns=user_lookup.index.values) print(top_rec_4all)