def train_als(trn,tst): model.fit(trn,show_progress=True) MAP = mean_average_precision_at_k(model, trn, tst, K=10, show_progress=True, num_threads=1) NDCG = ndcg_at_k(model, trn, tst, K=10, show_progress=True, num_threads=1) print("MAP is %.4f and NDCG is %.4f: " %(MAP,NDCG)) return model
def train_test_split_recommend(self,model,user_item_csr,user_lookup,user_id): train, test = train_test_split(user_item_csr) # train the model on a sparse matrix of item/user/confidence weights model.fit(train.T.tocsr()) """Calculate Precision@N & NDCG@N""" precision = precision_at_k(model, train, test, K=20) ndcg = ndcg_at_k(model, train, test, K=20) print('Precision@20: {0}\n NDCG@20: {1}\n'.format(precision, ndcg)) """Recommend items to every user""" top_rec_4all = model.recommend_all(test,filter_already_liked_items=True) top_rec_4all = top_rec_4all.T # top_rec_4all = pd.DataFrame(top_rec_4all) top_rec_4all = pd.DataFrame(data=top_rec_4all, columns=user_lookup.index.categories) print('Recommendations Dataframe:\n{}'.format(top_rec_4all)) top_products = top_rec_4all[user_id] return top_products
def learningCurve(model, train, test, epochs, outFile=None, k=5, showProgress=True, numThreads=12): # if not userIndex: # userIndex = range(train.shape[0]) prevEpoch = 0 pAtK = [] MAPatK = [] NDCGatK = [] AUCatK = [] headers = ["epochs", f"p@{k}", f"MAP@{k}", f"NDCG@{k}", f"AUC@{k}"] printLog(headers, header=True, outFile=outFile) for epoch in epochs: model.iterations = epoch - prevEpoch if not hasattr(model, "user_vectors"): model.fit(train, show_progress=showProgress) else: model.fit_partial(train, show_progress=showProgress) pAtK.append(precision_at_k(model, train.T.tocsr(), test.T.tocsr(), K=k, show_progress=showProgress, num_threads=numThreads)) MAPatK.append(mean_average_precision_at_k(model, train.T.tocsr(), test.T.tocsr(), K=k, show_progress=showProgress, num_threads=numThreads)) NDCGatK.append(ndcg_at_k(model, train.T.tocsr(), test.T.tocsr(), K=k, show_progress=showProgress, num_threads=numThreads)) AUCatK.append(AUC_at_k(model, train.T.tocsr(), test.T.tocsr(), K=k, show_progress=showProgress, num_threads=numThreads)) row = [epoch, pAtK[-1], MAPatK[-1], NDCGatK[-1], AUCatK[-1]] printLog(row, outFile=outFile) prevEpoch = epoch return model, pAtK, MAPatK, NDCGatK, AUCatK
print(f"Computing p@{k} ...", flush=True) t0 = time() pAtK = precision_at_k(model, trainTscr, testTscr, K=k, show_progress=args.progressBar, num_threads=args.numThreads) print(f"Δt: {time() - t0:5.1f}s") print(f"Computing MAP@{k} ...", flush=True) t0 = time() MAPatK = mean_average_precision_at_k(model, trainTscr, testTscr, K=k, show_progress=args.progressBar, num_threads=args.numThreads) print(f"Δt: {time() - t0:5.1f}s") print(f"Computing NDCG@{k} ...", flush=True) t0 = time() NDCGatK = ndcg_at_k(model, trainTscr, testTscr, K=k, show_progress=args.progressBar, num_threads=args.numThreads) AUCatK = AUC_at_k(model, trainTscr, testTscr, K=k, show_progress=args.progressBar, num_threads=args.numThreads) print(f"Δt: {time() - t0:5.1f}s") print(f"p@{k}: {pAtK:6.4f}, MAP@{k}: {MAPatK:6.4f}" f"NDCG@{k}: {NDCGatK:6.4f}, AUC@{k}: {AUCatK:6.4f}", flush=True) # thang = [curves[x]["params"] for x in range(len(curves))] # df0 = pd.DataFrame(thang) # blah = [curves[x][f"p@{k}"] for x in range(len(curves))] # df1 = pd.DataFrame(blah)
def run(modelName, datasetName, factorCt, k, λ, α, maxIters, showProgress, useGPU, threadCt): if modelName == 'als': model = getModel(modelName, volubility=2, params={'factors': factorCt, 'regularization': λ, 'iterations': maxIters, 'use_gpu': useGPU}) else: model = getModel(modelName, volubility=2, params={'factors': factorCt, 'regularization': λ, 'alpha': α, 'iterations': maxIters, 'use_gpu': useGPU}) artists, users, plays = fetchDataset(datasetName, volubility=2) print(artists.shape, users.shape, plays.shape, flush=True) if issubclass(model.__class__, AlternatingLeastSquares): # lets weight these models by bm25weight. print("weighting matrix by bm25_weight") plays = bm25_weight(plays, K1=100, B=0.8) # also disable building approximate recommend index model.approximate_recommend = False # print(asctime(localtime())) # t0 = time() plays = plays.tocsr() # print(f"Δt: {time() - t0:5.1f}s") train, test = train_test_split(plays, train_percentage=0.8) print("Training model") print(asctime(localtime()), flush=True) t0 = time() model.fit(train, show_progress=showProgress) print(f"Δt: {time() - t0:5.1f}s", flush=True) trainTscr = train.T.tocsr() testTscr = test.T.tocsr() print(f"Computing p@{k} ...", flush=True) t0 = time() pAtK = precision_at_k(model, trainTscr, testTscr, K=k, show_progress=showProgress, num_threads=threadCt) ex.log_scalar(f"p@{k}", pAtK) print(f"Δt: {time() - t0:5.1f}s") print(f"Computing MAP@{k} ...", flush=True) t0 = time() MAPatK = mean_average_precision_at_k(model, trainTscr, testTscr, K=k, show_progress=showProgress, num_threads=threadCt) ex.log_scalar(f"MAP@{k}", MAPatK) print(f"Δt: {time() - t0:5.1f}s") print(f"Computing NDCG@{k} ...", flush=True) t0 = time() NDCGatK = ndcg_at_k(model, trainTscr, testTscr, K=k, show_progress=showProgress, num_threads=threadCt) ex.log_scalar(f"NDCG@{k}", NDCGatK) AUCatK = AUC_at_k(model, trainTscr, testTscr, K=k, show_progress=showProgress, num_threads=threadCt) ex.log_scalar(f"AUC@{k}", AUCatK) print(f"Δt: {time() - t0:5.1f}s") print(f"p@{k}: {pAtK:6.4f}, MAP@{k}: {MAPatK:6.4f}" f"NDCG@{k}: {NDCGatK:6.4f}, AUC@{k}: {AUCatK:6.4f}", flush=True)
csr_data, user_lookup, item_lookup = create_sparse_matrix( data, userkey, itemkey) #print(csr_data) csr_data = csr_data.T.tocsr() print(csr_data) train, test = train_test_split(csr_data) print(train, test) #print(user_lookup,item_lookup) """initialize a model --- choose a model""" #model = implicit.als.AlternatingLeastSquares(factors=20,regularization=0.1,iterations=50) model = implicit.als.AlternatingLeastSquares(factors=50) #model = implicit.bpr.BayesianPersonalizedRanking(factors=100) #model = implicit.lmf.LogisticMatrixFactorization(factors=100) #model = implicit.approximate_als.AnnoyAlternatingLeastSquares() print(train.T.tocsr()) """Train the model on a sparse matrix of item/user/confidence weights""" model.fit(train.T.tocsr()) """Evaluation Metrics Calculation""" precision = precision_at_k(model, train, test, K=20) ndcg = ndcg_at_k(model, train, test, K=20) print('Precision@20: {0}\n NDCG@20: {1}\n'.format(precision, ndcg)) """Recommend N best items for each user""" top_rec_4all = model.recommend_all(test, N=20) top_rec_4all = top_rec_4all.T #top_rec_4all = pd.DataFrame(data=top_rec_4all,columns=user_lookup.index.categories) top_rec_4all = pd.DataFrame(data=top_rec_4all, columns=user_lookup.index.values) print(top_rec_4all)