def train_als(trn,tst):
    model.fit(trn,show_progress=True)  
    MAP = mean_average_precision_at_k(model, trn, tst, K=10,
                                      show_progress=True, num_threads=1)
    NDCG = ndcg_at_k(model, trn, tst, K=10,
                  show_progress=True, num_threads=1)
    print("MAP is %.4f and NDCG is %.4f: " %(MAP,NDCG))
    return model
Beispiel #2
0
    def train_test_split_recommend(self,model,user_item_csr,user_lookup,user_id):

        train, test = train_test_split(user_item_csr)
        # train the model on a sparse matrix of item/user/confidence weights
        model.fit(train.T.tocsr())

        """Calculate Precision@N & NDCG@N"""
        precision = precision_at_k(model, train, test, K=20)
        ndcg = ndcg_at_k(model, train, test, K=20)

        print('Precision@20: {0}\n NDCG@20: {1}\n'.format(precision, ndcg))

        """Recommend items to every user"""
        top_rec_4all = model.recommend_all(test,filter_already_liked_items=True)
        top_rec_4all = top_rec_4all.T
        # top_rec_4all = pd.DataFrame(top_rec_4all)
        top_rec_4all = pd.DataFrame(data=top_rec_4all, columns=user_lookup.index.categories)
        print('Recommendations Dataframe:\n{}'.format(top_rec_4all))

        top_products = top_rec_4all[user_id]

        return top_products
Beispiel #3
0
def learningCurve(model, train, test, epochs, outFile=None,
                  k=5, showProgress=True, numThreads=12):
    # if not userIndex:
    #     userIndex = range(train.shape[0])
    prevEpoch = 0

    pAtK = []
    MAPatK = []
    NDCGatK = []
    AUCatK = []

    headers = ["epochs", f"p@{k}", f"MAP@{k}", f"NDCG@{k}", f"AUC@{k}"]
    printLog(headers, header=True, outFile=outFile)

    for epoch in epochs:
        model.iterations = epoch - prevEpoch
        if not hasattr(model, "user_vectors"):
            model.fit(train, show_progress=showProgress)
        else:
            model.fit_partial(train, show_progress=showProgress)
        pAtK.append(precision_at_k(model, train.T.tocsr(), test.T.tocsr(),
                                   K=k, show_progress=showProgress,
                                   num_threads=numThreads))
        MAPatK.append(mean_average_precision_at_k(model, train.T.tocsr(),
                                                  test.T.tocsr(), K=k,
                                                  show_progress=showProgress,
                                                  num_threads=numThreads))
        NDCGatK.append(ndcg_at_k(model, train.T.tocsr(), test.T.tocsr(),
                                 K=k, show_progress=showProgress,
                                 num_threads=numThreads))
        AUCatK.append(AUC_at_k(model, train.T.tocsr(), test.T.tocsr(),
                               K=k, show_progress=showProgress,
                               num_threads=numThreads))
        row = [epoch, pAtK[-1], MAPatK[-1], NDCGatK[-1], AUCatK[-1]]
        printLog(row, outFile=outFile)
        prevEpoch = epoch

    return model, pAtK, MAPatK, NDCGatK, AUCatK
Beispiel #4
0
    print(f"Computing p@{k} ...", flush=True)
    t0 = time()
    pAtK = precision_at_k(model, trainTscr, testTscr, K=k,
                          show_progress=args.progressBar,
                          num_threads=args.numThreads)
    print(f"Δt: {time() - t0:5.1f}s")
    print(f"Computing MAP@{k} ...", flush=True)
    t0 = time()
    MAPatK = mean_average_precision_at_k(model, trainTscr, testTscr, K=k,
                                         show_progress=args.progressBar,
                                         num_threads=args.numThreads)
    print(f"Δt: {time() - t0:5.1f}s")
    print(f"Computing NDCG@{k} ...", flush=True)
    t0 = time()
    NDCGatK = ndcg_at_k(model, trainTscr, testTscr, K=k,
                        show_progress=args.progressBar,
                        num_threads=args.numThreads)
    AUCatK = AUC_at_k(model, trainTscr, testTscr, K=k,
                      show_progress=args.progressBar,
                      num_threads=args.numThreads)
    print(f"Δt: {time() - t0:5.1f}s")

    print(f"p@{k}: {pAtK:6.4f}, MAP@{k}: {MAPatK:6.4f}"
          f"NDCG@{k}: {NDCGatK:6.4f}, AUC@{k}: {AUCatK:6.4f}", flush=True)

    # thang = [curves[x]["params"] for x in range(len(curves))]
    # df0 = pd.DataFrame(thang)

    # blah = [curves[x][f"p@{k}"] for x in range(len(curves))]
    # df1 = pd.DataFrame(blah)
Beispiel #5
0
def run(modelName, datasetName, factorCt, k, λ, α,
        maxIters, showProgress, useGPU, threadCt):

    if modelName == 'als':
        model = getModel(modelName, volubility=2,
                         params={'factors': factorCt,
                                 'regularization': λ,
                                 'iterations': maxIters,
                                 'use_gpu': useGPU})
    else:
        model = getModel(modelName, volubility=2,
                         params={'factors': factorCt,
                                 'regularization': λ,
                                 'alpha': α,
                                 'iterations': maxIters,
                                 'use_gpu': useGPU})

    artists, users, plays = fetchDataset(datasetName, volubility=2)

    print(artists.shape, users.shape, plays.shape, flush=True)

    if issubclass(model.__class__, AlternatingLeastSquares):
        # lets weight these models by bm25weight.
        print("weighting matrix by bm25_weight")
        plays = bm25_weight(plays, K1=100, B=0.8)

        # also disable building approximate recommend index
        model.approximate_recommend = False

    # print(asctime(localtime()))
    # t0 = time()
    plays = plays.tocsr()
    # print(f"Δt: {time() - t0:5.1f}s")

    train, test = train_test_split(plays, train_percentage=0.8)

    print("Training model")
    print(asctime(localtime()), flush=True)
    t0 = time()

    model.fit(train, show_progress=showProgress)
    print(f"Δt: {time() - t0:5.1f}s", flush=True)

    trainTscr = train.T.tocsr()
    testTscr = test.T.tocsr()

    print(f"Computing p@{k} ...", flush=True)
    t0 = time()
    pAtK = precision_at_k(model, trainTscr, testTscr, K=k,
                          show_progress=showProgress,
                          num_threads=threadCt)
    ex.log_scalar(f"p@{k}", pAtK)
    print(f"Δt: {time() - t0:5.1f}s")
    print(f"Computing MAP@{k} ...", flush=True)
    t0 = time()
    MAPatK = mean_average_precision_at_k(model, trainTscr, testTscr, K=k,
                                         show_progress=showProgress,
                                         num_threads=threadCt)
    ex.log_scalar(f"MAP@{k}", MAPatK)
    print(f"Δt: {time() - t0:5.1f}s")
    print(f"Computing NDCG@{k} ...", flush=True)
    t0 = time()
    NDCGatK = ndcg_at_k(model, trainTscr, testTscr, K=k,
                        show_progress=showProgress,
                        num_threads=threadCt)
    ex.log_scalar(f"NDCG@{k}", NDCGatK)
    AUCatK = AUC_at_k(model, trainTscr, testTscr, K=k,
                      show_progress=showProgress,
                      num_threads=threadCt)
    ex.log_scalar(f"AUC@{k}", AUCatK)
    print(f"Δt: {time() - t0:5.1f}s")
    print(f"p@{k}: {pAtK:6.4f}, MAP@{k}: {MAPatK:6.4f}"
          f"NDCG@{k}: {NDCGatK:6.4f}, AUC@{k}: {AUCatK:6.4f}", flush=True)
Beispiel #6
0
    csr_data, user_lookup, item_lookup = create_sparse_matrix(
        data, userkey, itemkey)
    #print(csr_data)

    csr_data = csr_data.T.tocsr()
    print(csr_data)
    train, test = train_test_split(csr_data)
    print(train, test)

    #print(user_lookup,item_lookup)
    """initialize a model --- choose a model"""
    #model = implicit.als.AlternatingLeastSquares(factors=20,regularization=0.1,iterations=50)
    model = implicit.als.AlternatingLeastSquares(factors=50)
    #model = implicit.bpr.BayesianPersonalizedRanking(factors=100)
    #model = implicit.lmf.LogisticMatrixFactorization(factors=100)
    #model = implicit.approximate_als.AnnoyAlternatingLeastSquares()
    print(train.T.tocsr())
    """Train the model on a sparse matrix of item/user/confidence weights"""
    model.fit(train.T.tocsr())
    """Evaluation Metrics Calculation"""
    precision = precision_at_k(model, train, test, K=20)
    ndcg = ndcg_at_k(model, train, test, K=20)

    print('Precision@20: {0}\n NDCG@20: {1}\n'.format(precision, ndcg))
    """Recommend N best items for each user"""
    top_rec_4all = model.recommend_all(test, N=20)
    top_rec_4all = top_rec_4all.T
    #top_rec_4all = pd.DataFrame(data=top_rec_4all,columns=user_lookup.index.categories)
    top_rec_4all = pd.DataFrame(data=top_rec_4all,
                                columns=user_lookup.index.values)
    print(top_rec_4all)