Esempio n. 1
0
def model():
    '''computes p@k and map@k evaluation metrics and saves model'''
    sparse_item_user = load_npz(
        "/Users/maxmaiberger/Documents/board-game-recommender/import/Data/test_data_saved/sparse_item_user.npz"
    )

    train, test = train_test_split(sparse_item_user, train_percentage=0.8)

    model = implicit.als.AlternatingLeastSquares(factors=100,
                                                 regularization=0.1,
                                                 iterations=20,
                                                 calculate_training_loss=False)
    model.fit(train)

    with open(
            '/Users/maxmaiberger/Documents/board-game-recommender/import/Data/test_data_saved/model.sav',
            'wb') as pickle_out:
        pickle.dump(model, pickle_out)

    p_at_k = precision_at_k(model,
                            train_user_items=train,
                            test_user_items=test,
                            K=10)
    m_at_k = mean_average_precision_at_k(model, train, test, K=10)
    print('precision at k:', p_at_k)
    print('mean average precision at k:', m_at_k)

    return p_at_k, m_at_k
Esempio n. 2
0
    def fit(self, list_name, save=True):
        self.product_user_matrix, self.user_mappings = self.create_item_user_matrix_light(list_name, self.max_users, self.max_products)

        #movies, ratings = implicit.datasets.movielens.get_movielens("1m")
        train, test = train_test_split(self.product_user_matrix,0.8)
        self.model = implicit.als.AlternatingLeastSquares(factors=100, iterations=15)

        # train the model on a sparse matrix of item/user/confidence weights
        #self.model.fit(self.product_user_matrix)
        self.model.fit(train)
        p = precision_at_k(self.model, train.T.tocsr(), test.T.tocsr(), K=10, num_threads=4)
        print(p)
        
        if(save):
            saveobj = {
                'product_user_matrix': self.product_user_matrix,
                'model': self.model,
                'user_mappings': self.user_mappings,
                'product_mappings': self.product_mappings,
                'max_users' : self.max_users,
                'index_to_product_id' : self.index_to_product_id,
                'max_products' : self.max_products
                #'product_user_matrix_lil' : self.
            }
            save_object(saveobj, self.save_path)
            
        return self.model
Esempio n. 3
0
def model(sparse_user_item_file_path='files/sparse_user_item.npz'):
    """Computes p@k and map@k evaluation mettrics and saves model.

    Args:
        sparse_user_item_file_path (str): file location for a scipy.sparse.csr_matrix sparse user * item matrix

    Returns:
        p_at_k (float): precision @ k recommendations, with k=10
        m_at_k (float): mean average precision @ k recommendations, with k=10
    """
    sparse_user_item = load_npz(sparse_user_item_file_path)

    train, test = train_test_split(sparse_user_item, train_percentage=0.8)

    model = implicit.als.AlternatingLeastSquares(factors=100,
                                                 regularization=0.1,
                                                 iterations=100,
                                                 calculate_training_loss=False)
    model.fit(train)

    with open('files/model.sav', 'wb') as pickle_out:
        pickle.dump(model, pickle_out)

    p_at_k = precision_at_k(model,
                            train_user_items=train,
                            test_user_items=test,
                            K=10)
    map_at_k = mean_average_precision_at_k(model, train, test, K=10)

    return p_at_k, map_at_k
Esempio n. 4
0
def evaluate_bpr_model(hyperparameters, train, test, validation):
    h = hyperparameters

    model = BayesianPersonalizedRanking(factors=h['factors'],
                                        iterations=h['n_iter'],
                                        num_threads=nproc)

    model.fit(train)
    test_eval = {
        'p@k': precision_at_k(model, train.T.tocsr(), test.T.tocsr(), K=10)
    }
    val_eval = {
        'p@k': precision_at_k(model,
                              train.T.tocsr(),
                              validation.T.tocsr(),
                              K=10)
    }
    return test_eval, val_eval
Esempio n. 5
0
def evaluate_lmf_model(hyperparameters, train, test, validation):
    h = hyperparameters

    model = LogisticMatrixFactorization(factors=h['factors'],
                                        iterations=h['n_iter'],
                                        num_threads=nproc)

    model.fit(train)
    test_eval = {
        'p@k': precision_at_k(model, train.T.tocsr(), test.T.tocsr(), K=10)
    }
    val_eval = {
        'p@k': precision_at_k(model,
                              train.T.tocsr(),
                              validation.T.tocsr(),
                              K=10)
    }
    return test_eval, val_eval
Esempio n. 6
0
def evaluate_als_model(hyperparameters, train, test, validation):
    h = hyperparameters

    model = AlternatingLeastSquares(factors=h['factors'],
                                    iterations=h['n_iter'],
                                    num_threads=nproc)

    model.fit(train)
    test_eval = {
        'p@k': precision_at_k(model, train.T.tocsr(), test.T.tocsr(), K=10)
    }
    val_eval = {
        'p@k': precision_at_k(model,
                              train.T.tocsr(),
                              validation.T.tocsr(),
                              K=10)
    }
    return test_eval, val_eval
    def evaloutput(self, K=10):
        with open('model_NR.sav', 'rb') as pickle_in:
            model = pickle.load(pickle_in)
        sparse_item_user = load_npz("sparse_user_item_NR.npz")
        train, test = train_test_split(sparse_item_user, train_percentage=0.8)
        #p_at_k = precision_at_k(model, K, train_user_items=train, test_user_items=test)
        print("test", test.shape)
        print("train", train.shape)
        p_at_k = precision_at_k(model, train, test, K)
        m_at_k = mean_average_precision_at_k(model, train, test, K)

        return p_at_k, m_at_k
Esempio n. 8
0
    def test_evaluation(self):
        item_users = self.get_checker_board(50)
        user_items = item_users.T.tocsr()

        model = self._get_model()
        model.fit(item_users, show_progress=False)

        # we've withheld the diagnoal for testing, and have verified that in test_recommend
        # it is returned for each user. So p@1 should be 1.0
        p = precision_at_k(model, user_items.tocsr(), csr_matrix(np.eye(50)), K=1,
                           show_progress=False)
        self.assertEqual(p, 1)
Esempio n. 9
0
    def test_evaluation(self):
        item_users = self.get_checker_board(50)
        user_items = item_users.T.tocsr()

        model = self._get_model()
        model.fit(item_users, show_progress=False)

        # we've withheld the diagnoal for testing, and have verified that in test_recommend
        # it is returned for each user. So p@1 should be 1.0
        p = precision_at_k(model, user_items.tocsr(), csr_matrix(np.eye(50)), K=1,
                           show_progress=False)
        self.assertEqual(p, 1)
Esempio n. 10
0
def evaluate_model(model_name="als"):
    """evaluate the model by cross-validation"""

    # train the model based off input params
    artists, users, plays = get_twitter()

    # create a model from the input data
    model = get_model(model_name)

    # split data_set to train set and testing set
    train, testing = train_test_split(plays)

    # evaluation
    result = precision_at_k(model=model, train_user_items=train, test_user_items=testing)

    print('precision@k = ', result)
    def get_train(self,
                  train_config: MatrixTrainingConfig,
                  report_test: Optional[bool] = True,
                  test_df=None,
                  overwrite=False):
        if self.model and not overwrite:
            raise Exception(
                'Already trained and does not allow overwrite (consider access via model instance).'
            )

        assert train_config, 'train configuration has to be provided.'

        if report_test:
            logger.info('-- Performing MM sanity check on {} {}'.format(
                self.col1, self.col2))
            if test_df is None:
                if train_config.random_state:
                    np.random.seed(train_config.random_state)
                train_csr, test_csr = train_test_split(
                    self.coo, train_percentage=train_config.train_percentage)
            else:
                assert len(test_df) > 0
                train_csr = self.coo
                test_csr = self._to_coo(test_df)
            _model = implicit.als.AlternatingLeastSquares(
                factors=train_config.factor,
                regularization=train_config.regularization,
                iterations=train_config.iterations)
            _model.fit(train_csr * train_config.conf_scale)
            prec = precision_at_k(_model,
                                  train_csr.T,
                                  test_csr.T,
                                  K=train_config.top_n)
            logger.warning('ACCURACY REPORT at top {}: {:.5f}%'.format(
                train_config.top_n, prec * 100))
            if train_config.safe_pass:
                assert prec > train_config.safe_pass

        # training on complete matrix
        logger.info('Training on complete matrix')
        _model = implicit.als.AlternatingLeastSquares(
            factors=train_config.factor,
            regularization=train_config.regularization,
            iterations=train_config.iterations)
        _model.fit(self.coo * train_config.conf_scale)
        self.model = _model
Esempio n. 12
0
def model():
    '''computes p@k and map@k evaluation mettrics and saves model'''
    sparse_item_user = load_npz("sparse_item_user.npz")

    train, test = train_test_split(sparse_item_user, train_percentage=0.8)

    model = implicit.als.AlternatingLeastSquares(factors=100,
                                                 regularization=0.1,
                                                 iterations=20,
                                                 calculate_training_loss=False)
    model.fit(train)

    with open('model.sav', 'wb') as pickle_out:
        pickle.dump(model, pickle_out)

    p_at_k = precision_at_k(model,
                            train_user_items=train,
                            test_user_items=test,
                            K=10)
    m_at_k = mean_average_precision_at_k(model, train, test, K=10)

    return p_at_k, m_at_k
Esempio n. 13
0
    def train_test_split_recommend(self,model,user_item_csr,user_lookup,user_id):

        train, test = train_test_split(user_item_csr)
        # train the model on a sparse matrix of item/user/confidence weights
        model.fit(train.T.tocsr())

        """Calculate Precision@N & NDCG@N"""
        precision = precision_at_k(model, train, test, K=20)
        ndcg = ndcg_at_k(model, train, test, K=20)

        print('Precision@20: {0}\n NDCG@20: {1}\n'.format(precision, ndcg))

        """Recommend items to every user"""
        top_rec_4all = model.recommend_all(test,filter_already_liked_items=True)
        top_rec_4all = top_rec_4all.T
        # top_rec_4all = pd.DataFrame(top_rec_4all)
        top_rec_4all = pd.DataFrame(data=top_rec_4all, columns=user_lookup.index.categories)
        print('Recommendations Dataframe:\n{}'.format(top_rec_4all))

        top_products = top_rec_4all[user_id]

        return top_products
Esempio n. 14
0
def learningCurve(model, train, test, epochs, outFile=None,
                  k=5, showProgress=True, numThreads=12):
    # if not userIndex:
    #     userIndex = range(train.shape[0])
    prevEpoch = 0

    pAtK = []
    MAPatK = []
    NDCGatK = []
    AUCatK = []

    headers = ["epochs", f"p@{k}", f"MAP@{k}", f"NDCG@{k}", f"AUC@{k}"]
    printLog(headers, header=True, outFile=outFile)

    for epoch in epochs:
        model.iterations = epoch - prevEpoch
        if not hasattr(model, "user_vectors"):
            model.fit(train, show_progress=showProgress)
        else:
            model.fit_partial(train, show_progress=showProgress)
        pAtK.append(precision_at_k(model, train.T.tocsr(), test.T.tocsr(),
                                   K=k, show_progress=showProgress,
                                   num_threads=numThreads))
        MAPatK.append(mean_average_precision_at_k(model, train.T.tocsr(),
                                                  test.T.tocsr(), K=k,
                                                  show_progress=showProgress,
                                                  num_threads=numThreads))
        NDCGatK.append(ndcg_at_k(model, train.T.tocsr(), test.T.tocsr(),
                                 K=k, show_progress=showProgress,
                                 num_threads=numThreads))
        AUCatK.append(AUC_at_k(model, train.T.tocsr(), test.T.tocsr(),
                               K=k, show_progress=showProgress,
                               num_threads=numThreads))
        row = [epoch, pAtK[-1], MAPatK[-1], NDCGatK[-1], AUCatK[-1]]
        printLog(row, outFile=outFile)
        prevEpoch = epoch

    return model, pAtK, MAPatK, NDCGatK, AUCatK
Esempio n. 15
0
    print("Training model")
    print(asctime(localtime()), flush=True)
    t0 = time()

    model.fit(train, show_progress=args.progressBar)
    print(f"Δt: {time() - t0:5.1f}s", flush=True)

    trainTscr = train.T.tocsr()
    testTscr = test.T.tocsr()

    k = args.k

    print(f"Computing p@{k} ...", flush=True)
    t0 = time()
    pAtK = precision_at_k(model, trainTscr, testTscr, K=k,
                          show_progress=args.progressBar,
                          num_threads=args.numThreads)
    print(f"Δt: {time() - t0:5.1f}s")
    print(f"Computing MAP@{k} ...", flush=True)
    t0 = time()
    MAPatK = mean_average_precision_at_k(model, trainTscr, testTscr, K=k,
                                         show_progress=args.progressBar,
                                         num_threads=args.numThreads)
    print(f"Δt: {time() - t0:5.1f}s")
    print(f"Computing NDCG@{k} ...", flush=True)
    t0 = time()
    NDCGatK = ndcg_at_k(model, trainTscr, testTscr, K=k,
                        show_progress=args.progressBar,
                        num_threads=args.numThreads)
    AUCatK = AUC_at_k(model, trainTscr, testTscr, K=k,
                      show_progress=args.progressBar,
Esempio n. 16
0
def run(modelName, datasetName, factorCt, k, λ, α,
        maxIters, showProgress, useGPU, threadCt):

    if modelName == 'als':
        model = getModel(modelName, volubility=2,
                         params={'factors': factorCt,
                                 'regularization': λ,
                                 'iterations': maxIters,
                                 'use_gpu': useGPU})
    else:
        model = getModel(modelName, volubility=2,
                         params={'factors': factorCt,
                                 'regularization': λ,
                                 'alpha': α,
                                 'iterations': maxIters,
                                 'use_gpu': useGPU})

    artists, users, plays = fetchDataset(datasetName, volubility=2)

    print(artists.shape, users.shape, plays.shape, flush=True)

    if issubclass(model.__class__, AlternatingLeastSquares):
        # lets weight these models by bm25weight.
        print("weighting matrix by bm25_weight")
        plays = bm25_weight(plays, K1=100, B=0.8)

        # also disable building approximate recommend index
        model.approximate_recommend = False

    # print(asctime(localtime()))
    # t0 = time()
    plays = plays.tocsr()
    # print(f"Δt: {time() - t0:5.1f}s")

    train, test = train_test_split(plays, train_percentage=0.8)

    print("Training model")
    print(asctime(localtime()), flush=True)
    t0 = time()

    model.fit(train, show_progress=showProgress)
    print(f"Δt: {time() - t0:5.1f}s", flush=True)

    trainTscr = train.T.tocsr()
    testTscr = test.T.tocsr()

    print(f"Computing p@{k} ...", flush=True)
    t0 = time()
    pAtK = precision_at_k(model, trainTscr, testTscr, K=k,
                          show_progress=showProgress,
                          num_threads=threadCt)
    ex.log_scalar(f"p@{k}", pAtK)
    print(f"Δt: {time() - t0:5.1f}s")
    print(f"Computing MAP@{k} ...", flush=True)
    t0 = time()
    MAPatK = mean_average_precision_at_k(model, trainTscr, testTscr, K=k,
                                         show_progress=showProgress,
                                         num_threads=threadCt)
    ex.log_scalar(f"MAP@{k}", MAPatK)
    print(f"Δt: {time() - t0:5.1f}s")
    print(f"Computing NDCG@{k} ...", flush=True)
    t0 = time()
    NDCGatK = ndcg_at_k(model, trainTscr, testTscr, K=k,
                        show_progress=showProgress,
                        num_threads=threadCt)
    ex.log_scalar(f"NDCG@{k}", NDCGatK)
    AUCatK = AUC_at_k(model, trainTscr, testTscr, K=k,
                      show_progress=showProgress,
                      num_threads=threadCt)
    ex.log_scalar(f"AUC@{k}", AUCatK)
    print(f"Δt: {time() - t0:5.1f}s")
    print(f"p@{k}: {pAtK:6.4f}, MAP@{k}: {MAPatK:6.4f}"
          f"NDCG@{k}: {NDCGatK:6.4f}, AUC@{k}: {AUCatK:6.4f}", flush=True)
Esempio n. 17
0
def train_evaluate_als_model(csr_prd_cli_matrix):
    """
    Define, fit and tune ALS model

    Returns an optimized instance of the implicit-ALS model. Implements a Grid
    Search over some hyperparamters. Uses Precision@K as the evaluation metric,
    analyzing 10% of the data given.

    Parameters
    ----------
    csr_prd_cli_matrix: scipy.csr_matrix
        Sparse CSR representation of df_long, with shape prd_col x cli_col.

    Returns
    -------
    model: implicit.als.AlternatingLeastSquares model
    """
    params = {
        'factors': [50, 100, 150],
        'regularization': [0.01, 0.05, 0.1],
        'dtype': [npfloat64],
        'use_native': [True],
        'use_cg': [False],
        'use_gpu': [False],
        'iterations': [15, 30, 50],
        'num_threads': [0],
        'random_state': [42]
    }
    param_grid = ParameterGrid(params)

    df_grid, df_test = train_test_split(csr_prd_cli_matrix,
                                        train_percentage=0.8)
    df_train, df_eval = train_test_split(df_grid, train_percentage=0.8)
    eval_k_size = int(df_eval.shape[0] * 0.1)
    test_k_size = int(df_test.shape[0] * 0.1)
    grid_score = {}

    for i, grid in enumerate(param_grid):
        m = AlternatingLeastSquares(**grid)
        m.fit(df_train, show_progress=False)
        score = precision_at_k(m,
                               df_train,
                               df_eval,
                               K=eval_k_size,
                               num_threads=0,
                               show_progress=False)
        grid_score[i] = score

    print('Best evaluation Mean Average Precision (@ K={}): {}'.format(
        eval_k_size,
        pd.Series(grid_score).max()))
    best = pd.Series(grid_score).idxmax()
    best_params = param_grid[best]
    model = AlternatingLeastSquares(**best_params)
    model.fit(csr_prd_cli_matrix)
    test_score = precision_at_k(model,
                                df_train,
                                df_test,
                                K=test_k_size,
                                num_threads=0,
                                show_progress=False)
    print('Best test Mean Average Precision (@ K={}): {}'.format(
        test_k_size, test_score))

    return model
Esempio n. 18
0
 def test(self, train_size=0.8, K=10):
     train, test = train_test_split(self.product_user_matrix,0.8)
     p = precision_at_k(self.model, train.T.tocsr(), test.T.tocsr(), K, num_threads=2)
     print ("precision at K =", K, ":", p)
Esempio n. 19
0
    csr_data, user_lookup, item_lookup = create_sparse_matrix(
        data, userkey, itemkey)
    #print(csr_data)

    csr_data = csr_data.T.tocsr()
    print(csr_data)
    train, test = train_test_split(csr_data)
    print(train, test)

    #print(user_lookup,item_lookup)
    """initialize a model --- choose a model"""
    #model = implicit.als.AlternatingLeastSquares(factors=20,regularization=0.1,iterations=50)
    model = implicit.als.AlternatingLeastSquares(factors=50)
    #model = implicit.bpr.BayesianPersonalizedRanking(factors=100)
    #model = implicit.lmf.LogisticMatrixFactorization(factors=100)
    #model = implicit.approximate_als.AnnoyAlternatingLeastSquares()
    print(train.T.tocsr())
    """Train the model on a sparse matrix of item/user/confidence weights"""
    model.fit(train.T.tocsr())
    """Evaluation Metrics Calculation"""
    precision = precision_at_k(model, train, test, K=20)
    ndcg = ndcg_at_k(model, train, test, K=20)

    print('Precision@20: {0}\n NDCG@20: {1}\n'.format(precision, ndcg))
    """Recommend N best items for each user"""
    top_rec_4all = model.recommend_all(test, N=20)
    top_rec_4all = top_rec_4all.T
    #top_rec_4all = pd.DataFrame(data=top_rec_4all,columns=user_lookup.index.categories)
    top_rec_4all = pd.DataFrame(data=top_rec_4all,
                                columns=user_lookup.index.values)
    print(top_rec_4all)