コード例 #1
0
def main():
    movielens = fetch_movielens()

    train = movielens['train']
    test = movielens['test']
    print(train.shape)
    print(test.shape)

    model = LightFM(learning_rate=0.05, loss='bpr')
    model.fit(train, epochs=5)

    k = 10
    train_recall = recall_at_k(model, train, k=k).mean()
    test_recall = recall_at_k(model, test, k=k).mean()
    print(f'recall_at_{k}(train): {train_recall}')
    print(f'recall_at_{k}(test) : {test_recall}')

    train_auc = auc_score(model, train).mean()
    test_auc = auc_score(model, test).mean()
    print(f'auc_score(train): {train_auc}')
    print(f'auc_score(test) : {test_auc}')

    y_train_preds = model.predict_rank(train)
    y_test_preds = model.predict_rank(test)
    train_dcg = dcg_score(train.toarray(), y_train_preds.toarray())
    test_dcg = dcg_score(test.toarray(), y_test_preds.toarray())
    print(f'dcg_score(train): {train_dcg}')
    print(f'dcg_score(test) : {test_dcg}')

    print('DONE')

    return 0
コード例 #2
0
    def evaluate(self, model, user_items_train):

        print("Splitting the data into train/test set...\n")
        train, test = cross_validation.random_train_test_split(
            user_items_train)
        print(train, test)

        print("Evaluating methods...\n")

        train_recall_10 = recall_at_k(model, train, k=10).mean()
        test_recall_10 = recall_at_k(model, test, k=10).mean()

        train_recall_20 = recall_at_k(model, train, k=20).mean()
        test_recall_20 = recall_at_k(model, test, k=20).mean()

        train_precision_10 = precision_at_k(model, train, k=10).mean()
        test_precision_10 = precision_at_k(model, test, k=10).mean()

        train_precision_20 = precision_at_k(model, train, k=20).mean()
        test_precision_20 = precision_at_k(model, test, k=20).mean()

        print("Train : Recall@10:{0:.3f}, Recall@20:{1:.3f}".format(
            train_recall_10, train_recall_20))
        print("Test : Recall@10:{0:.3f}, Recall@20:{1:.3f}".format(
            test_recall_10, test_recall_20))

        print("Train: Precision@10:{0:.3f}, Precision@20:{1:.3f}".format(
            train_precision_10, train_precision_20))
        print("Test: Precision@10:{0:.3f}, Precision@20:{1:.3f}".format(
            test_precision_10, test_precision_20))
コード例 #3
0
def test(train_matrix_path, test_matrix_path):
    train_rating_df = pd.read_csv(train_matrix_path,
                                  sep=',',
                                  names=['profile', 'item', 'rating'])
    train_row, train_col, train_ratings = train_rating_df['profile'].values, train_rating_df['item'].values, \
                                          train_rating_df['rating'].values

    n_user = 627  #np.max(train_row) + 1
    n_item = 12  #np.max(train_col) + 1

    train_data = csr_matrix((train_ratings, (train_row, train_col)),
                            shape=(n_user, n_item))
    model = LightFM(loss='warp',
                    no_components=200,
                    item_alpha=0.001,
                    user_alpha=0.001)
    model.fit(train_data, epochs=20, num_threads=30)

    test_rating_df = pd.read_csv(test_matrix_path,
                                 sep=',',
                                 names=['profile', 'item', 'rating'])
    test_row, test_col, test_ratings = test_rating_df['profile'].values, test_rating_df['item'].values, \
                                       test_rating_df['rating'].values

    test_data = csr_matrix((test_ratings, (test_row, test_col)),
                           shape=(n_user, n_item))
    print("Train precision: %.5f" %
          recall_at_k(model, train_data, k=6, num_threads=1).mean())
    print("Test precision: %.5f" %
          recall_at_k(model, test_data, k=6, num_threads=1).mean())
コード例 #4
0
ファイル: test_evaluation.py プロジェクト: linggom/lightfm
def test_recall_at_k():

    no_users, no_items = (10, 100)

    train, test = _generate_data(no_users, no_items)

    model = LightFM(loss="bpr")
    model.fit_partial(test)

    for k in (10, 5, 1):

        # Without omitting train interactions
        recall = evaluation.recall_at_k(model, test, k=k)
        expected_mean_recall = _recall_at_k(model, test, k)

        assert np.allclose(recall.mean(), expected_mean_recall)
        assert len(recall) == (test.getnnz(axis=1) > 0).sum()
        assert (
            len(evaluation.recall_at_k(model, train, preserve_rows=True))
            == test.shape[0]
        )

        # With omitting train interactions
        recall = evaluation.recall_at_k(model, test, k=k, train_interactions=train)
        expected_mean_recall = _recall_at_k(model, test, k, train=train)

        assert np.allclose(recall.mean(), expected_mean_recall)
コード例 #5
0
def test_recall_at_k():

    no_users, no_items = (10, 100)

    train, test = _generate_data(no_users, no_items)

    model = LightFM(loss="bpr")
    model.fit_partial(test)

    for k in (10, 5, 1):

        # Without omitting train interactions
        recall = evaluation.recall_at_k(model, test, k=k)
        expected_mean_recall = _recall_at_k(model, test, k)

        assert np.allclose(recall.mean(), expected_mean_recall)
        assert len(recall) == (test.getnnz(axis=1) > 0).sum()
        assert (len(evaluation.recall_at_k(
            model, train, preserve_rows=True)) == test.shape[0])

        # With omitting train interactions
        recall = evaluation.recall_at_k(model,
                                        test,
                                        k=k,
                                        train_interactions=train)
        expected_mean_recall = _recall_at_k(model, test, k, train=train)

        assert np.allclose(recall.mean(), expected_mean_recall)
コード例 #6
0
def collab_filtering():
    """
    implements collaborative filtering version
    by using only the rating data from movielens dataset
    :return:
    """
    data = fetch_movielens()

    for key, value in data.items():
        print(key, type(value), value.shape)

    train = data['train']
    test = data['test']
    print(
        'The dataset has %s users and %s items, '
        'with %s interactions in the test and %s interactions in the training set.'
        % (train.shape[0], train.shape[1], test.getnnz(), train.getnnz()))

    model = LightFM(learning_rate=0.05, loss='bpr')
    model.fit(train, epochs=50, num_threads=5)

    train_precision = precision_at_k(model, train, k=10).mean()
    test_precision = precision_at_k(model, test, k=10).mean()
    train_recall = recall_at_k(model, test, k=10).mean()
    test_recall = recall_at_k(model, test, k=10).mean()

    train_auc = auc_score(model, train).mean()
    test_auc = auc_score(model, test).mean()

    print('Precision: train %.2f, test %.2f.' %
          (train_precision, test_precision))
    print('Recall: train %.2f, test %.2f.' % (train_recall, test_recall))
    print('AUC: train %.2f, test %.2f.' % (train_auc, test_auc))

    model = LightFM(learning_rate=0.05, loss='warp')

    #resume training from the model's previous state
    model.fit_partial(train, epochs=50, num_threads=5)

    train_precision = precision_at_k(model, train, k=10).mean()
    test_precision = precision_at_k(model, test, k=10).mean()
    train_recall = recall_at_k(model, test, k=10).mean()
    test_recall = recall_at_k(model, test, k=10).mean()

    train_auc = auc_score(model, train).mean()
    test_auc = auc_score(model, test).mean()

    print("*****************")
    print("After re-training")
    print('Precision: train %.2f, test %.2f.' %
          (train_precision, test_precision))
    print('Recall: train %.2f, test %.2f.' % (train_recall, test_recall))
    print('AUC: train %.2f, test %.2f.' % (train_auc, test_auc))

    #check sample recommendation
    sample_recommendation(model, data, [3, 25, 450])
コード例 #7
0
ファイル: lightFM_funcs.py プロジェクト: haru26/Work-sample
    def train (self, interactions, test_percentage=0.25,
               n_components=30, learning_rate = 0.5, loss='warp', model_k=15, n_jobs = 4, 
               epoch=30, evaluate_k = 50):

        from lightfm.evaluation import precision_at_k
        from lightfm.evaluation import recall_at_k
        from lightfm.cross_validation import random_train_test_split
        
        train, test = random_train_test_split(interactions, test_percentage=test_percentage, random_state=None)
        
        mf_model = self.runMF(interactions = train,
                         n_components = n_components,
                         learning_rate = learning_rate,
                         loss = loss,
                         k = model_k,
                         epoch = epoch,
                         n_jobs = n_jobs)
        
        precise = precision_at_k(mf_model, test_interactions = test, k = evaluate_k)
        recall = recall_at_k(mf_model, test_interactions = test, k = evaluate_k)
        
        precise_test = precise.mean()
        recall_test = recall.mean()
        
        return mf_model, precise_test, recall_test
        
コード例 #8
0
def run_BPR(split, user_item, prec_rec_at):
    # train_sparse = scipy.sparse.csr_matrix(train_splt.values)
    # test_sparse = scipy.sparse.csr_matrix(test_split.values)
    train = user_item
    test = user_item * 0

    for index, row in split.iterrows():
        test['u' + str(row['userId'])]['m' +
                                       str(row['movieId'])] = row['rating']
        train['u' + str(row['userId'])]['m' + str(row['movieId'])] = 0

    train_sparse = scipy.sparse.csr_matrix(train.values)
    test_sparse = scipy.sparse.csr_matrix(test.values)

    model = LightFM(loss='bpr')
    model.fit(train_sparse, epochs=30, num_threads=4)

    # print("####################################################################################")
    # print("Test precision: %.2f" % precision_at_k(model, test_sparse, k=prec_rec_at).mean())
    # print("Test recall: %.2f" % recall_at_k(model, test_sparse, k=prec_rec_at).mean())
    # print("####################################################################################")

    precision = precision_at_k(model, test_sparse, k=prec_rec_at).mean()
    recall = recall_at_k(model, test_sparse, k=prec_rec_at).mean()

    return precision, recall
コード例 #9
0
def test_LightFM_model(model,
                       test_interactions,
                       train_interactions,
                       user_features,
                       movie_features,
                       k=5):
    test_precision = precision_at_k(model,
                                    test_interactions,
                                    train_interactions,
                                    k=k,
                                    user_features=user_features,
                                    item_features=movie_features,
                                    num_threads=2).mean()
    test_recall = recall_at_k(model,
                              test_interactions,
                              train_interactions,
                              k=k,
                              user_features=user_features,
                              item_features=movie_features,
                              num_threads=2).mean()
    test_auc = auc_score(model,
                         test_interactions,
                         train_interactions,
                         user_features=user_features,
                         item_features=movie_features,
                         num_threads=2).mean()
    print('Model')
    print('Precision at k=', str(k), ': ', round(test_precision, 3), sep='')
    print('Recall at k=', str(k) + ': ', round(test_recall, 3), sep='')
    print('AUC: ', round(test_auc, 3), sep='')
    return ({
        'precision': round(test_precision, 3),
        'recall': round(test_recall, 3),
        'auc': round(test_auc, 3)
    })
コード例 #10
0
 def evaluate_model(model, metric, test, train):
     """
     Evaluate trained model on the test set, using one of the three available accuracy metrics
         AUC: the probability that a randomly chosen positive example has a higher score than a randomly chosen
         negative example.
         Precision: the fraction of known positives in the first k positions of the ranked list of results.
         Recall: the number of positive items in the first k positions of the ranked list of results divided by the
         number of positive items in the test period.
     :param model:(LightFM, required) - model to be evaluated
     :param metric:(string, required) - accuracy metric to be used, one of ['auc', 'precision', 'recall']
     :param test:(COO matrix, required) - known positives used to test the model
     :param train:(COO matrix, required) - training set; these interactions will be omitted from the score
            calculations to avoid re-recommending known positives.
     :return: test_score (float) - score computed on the test set
     """
     try:
         # make sure the metric is correct
         assert metric in ['auc', 'precision', 'recall']
         if metric == 'auc':
             test_score = auc_score(model, test, train).mean()
         elif metric == 'precision':
             test_score = precision_at_k(model, test, train, k=5).mean()
         else:
             test_score = recall_at_k(model, test, train, k=5).mean()
         return test_score
     except AssertionError:
         print('The metric provided is not correct or available!')
コード例 #11
0
def best_reccomendation():

    #define variables
    best = 0.0
    best_model = ''

    for model in models:
        score = 0.0
        pak_score = evaluation.precision_at_k(model, data2['test'])
        score += np.mean(pak_score)

        rak_score = evaluation.recall_at_k(model, data2['test'])
        score += np.mean(rak_score)

        auc_score = evaluation.auc_score(model, data2['test'])
        score += np.mean(auc_score)

        rr_score = evaluation.reciprocal_rank(model, data2['test'])
        score += np.mean(rr_score)

        print(score)
        if score >= best:
            best = score
            best_model = model

    return best_model
コード例 #12
0
    def resultados_colaborativo(self):
        """
        Método resultados_colaboraivo. Obtiene los resultados del modelo colaborativo.

        Este método solo se utiliza en la interfaz de texto.
        """

        global train, test, modelo

        # Se obtienen los resultados
        precision = precision_at_k(modelo,
                                   test,
                                   train_interactions=train,
                                   k=10,
                                   num_threads=self.CPU_THREADS).mean()
        auc = auc_score(modelo,
                        test,
                        train_interactions=train,
                        num_threads=self.CPU_THREADS).mean()
        recall = recall_at_k(modelo,
                             test,
                             train_interactions=train,
                             k=10,
                             num_threads=self.CPU_THREADS).mean()
        reciprocal = reciprocal_rank(modelo,
                                     test,
                                     train_interactions=train,
                                     num_threads=self.CPU_THREADS).mean()

        # Se imprimen los resultados
        imprimir_resultados_clasico(precision, auc, recall, reciprocal)
コード例 #13
0
def recall_at_k_on_ranks(ranks,
                         test_interactions,
                         train_interactions=None,
                         k=10,
                         preserve_rows=False):
    return recall_at_k(
        model=ModelMockRanksCacher(ranks.copy()),
        test_interactions=test_interactions,
        train_interactions=train_interactions,
        k=k,
        preserve_rows=preserve_rows,
    )
コード例 #14
0
def test_intersections_check():

    no_users, no_items = (10, 100)

    train, test = _generate_data(no_users, no_items)

    model = LightFM(loss="bpr")
    model.fit_partial(train)

    # check error is raised when train and test have interactions in common
    with pytest.raises(ValueError):
        evaluation.auc_score(model, train, train_interactions=train, check_intersections=True)

    with pytest.raises(ValueError):
        evaluation.recall_at_k(model, train, train_interactions=train, check_intersections=True)

    with pytest.raises(ValueError):
        evaluation.precision_at_k(model, train, train_interactions=train, check_intersections=True)

    with pytest.raises(ValueError):
        evaluation.reciprocal_rank(model, train, train_interactions=train, check_intersections=True)

    # check no errors raised when train and test have no interactions in common
    evaluation.auc_score(model, test, train_interactions=train, check_intersections=True)
    evaluation.recall_at_k(model, test, train_interactions=train, check_intersections=True)
    evaluation.precision_at_k(model, test, train_interactions=train, check_intersections=True)
    evaluation.reciprocal_rank(model, test, train_interactions=train, check_intersections=True)

    # check no error is raised when there are intersections but flag is False
    evaluation.auc_score(model, train, train_interactions=train, check_intersections=False)
    evaluation.recall_at_k(model, train, train_interactions=train, check_intersections=False)
    evaluation.precision_at_k(model, train, train_interactions=train, check_intersections=False)
    evaluation.reciprocal_rank(model, train, train_interactions=train, check_intersections=False)
コード例 #15
0
    def recall_at_k_lightfm(self, model_lightfm, sparse_user_item, k=5):
        """Recall встроенный в LightFM"""

        self.recall_res = recall_at_k(
            model_lightfm,
            sparse_user_item,
            user_features=csr_matrix(
                self.user_feat_lightfm_fixed.values).tocsr(),
            item_features=csr_matrix(
                self.item_feat_lightfm_fixed.values).tocsr(),
            k=k)

        return self.recall_res
コード例 #16
0
def random_search_recall_at_k(train, test, num_samples=10, num_threads=1,k=5):
    

    for hyperparams in itertools.islice(sample_hyperparameters(), num_samples):
        num_epochs = hyperparams.pop("num_epochs")

        model = LightFM(**hyperparams)
        model.fit(train, epochs=num_epochs, num_threads=num_threads)

        r_at_k_score = recall_at_k(model, test, train_interactions=train, num_threads=num_threads, k=k).mean()
        
        hyperparams["num_epochs"] = num_epochs

        yield (r_at_k_score, hyperparams, model)
コード例 #17
0
ファイル: train.py プロジェクト: nhanpotter/iRead_server
    def evaluate(self):
        # Data
        interactions_train, weights_train, interactions_test, weights_test = \
            DataFit.fit_evaluate()

        new_model = LightFM(loss='warp')
        new_model.fit(interactions_train,
                      item_features=self.books_features,
                      epochs=100,
                      num_threads=2,
                      sample_weight=weights_train)
        print('Precision @k(Train): {0}'.format(
            precision_at_k(new_model, interactions_train).mean()))
        print('Precision @k(Test): {0}'.format(
            precision_at_k(new_model, interactions_test).mean()))
        print('Recall @k(Train): {0}'.format(
            recall_at_k(new_model, interactions_train).mean()))
        print('Recall @k(Test): {0}'.format(
            recall_at_k(new_model, interactions_test).mean()))
        print('Auc Score(Train): {0}'.format(
            auc_score(new_model, interactions_train).mean()))
        print('Auc Score(Test): {0}'.format(
            auc_score(new_model, interactions_test).mean()))
コード例 #18
0
ファイル: test_evaluation.py プロジェクト: linggom/lightfm
def test_intersections_check():

    no_users, no_items = (10, 100)

    train, test = _generate_data(no_users, no_items)

    model = LightFM(loss="bpr")
    model.fit_partial(train)

    # check error is raised when train and test have interactions in common
    with pytest.raises(ValueError):
        evaluation.auc_score(
            model, train, train_interactions=train, check_intersections=True
        )

    with pytest.raises(ValueError):
        evaluation.recall_at_k(
            model, train, train_interactions=train, check_intersections=True
        )

    with pytest.raises(ValueError):
        evaluation.precision_at_k(
            model, train, train_interactions=train, check_intersections=True
        )

    with pytest.raises(ValueError):
        evaluation.reciprocal_rank(
            model, train, train_interactions=train, check_intersections=True
        )

    # check no errors raised when train and test have no interactions in common
    evaluation.auc_score(
        model, test, train_interactions=train, check_intersections=True
    )
    evaluation.recall_at_k(
        model, test, train_interactions=train, check_intersections=True
    )
    evaluation.precision_at_k(
        model, test, train_interactions=train, check_intersections=True
    )
    evaluation.reciprocal_rank(
        model, test, train_interactions=train, check_intersections=True
    )

    # check no error is raised when there are intersections but flag is False
    evaluation.auc_score(
        model, train, train_interactions=train, check_intersections=False
    )
    evaluation.recall_at_k(
        model, train, train_interactions=train, check_intersections=False
    )
    evaluation.precision_at_k(
        model, train, train_interactions=train, check_intersections=False
    )
    evaluation.reciprocal_rank(
        model, train, train_interactions=train, check_intersections=False
    )
コード例 #19
0
def random_search(train,
                  test,
                  item_features=None,
                  user_features=None,
                  num_samples=10,
                  num_threads=16):
    """
    Sample random hyperparameters, fit a LightFM model, and evaluate it
    on the test set.

    Parameters
    ----------

    train: np.float32 coo_matrix of shape [n_users, n_items]
        Training data.
    test: np.float32 coo_matrix of shape [n_users, n_items]
        Test data.
    num_samples: int, optional
        Number of hyperparameter choices to evaluate.


    Returns
    -------

    generator of (auc_score, hyperparameter dict, fitted model)

    """

    for hyperparams in itertools.islice(sample_hyperparameters(), num_samples):
        num_epochs = hyperparams.pop("num_epochs")
        model = LightFM(**hyperparams)  # ,learning_rate=.03
        model.fit(train,
                  epochs=num_epochs,
                  num_threads=num_threads,
                  item_features=item_features,
                  user_features=user_features)
        ### should i pass in train_interactions (when i have repeats) ?
        #         score = auc_score(model, test, train_interactions=train, num_threads=num_threads).mean() # ORIG
        #         score = precision_at_k(model, test,  num_threads=num_threads,k=4).mean()
        score = recall_at_k(model,
                            test,
                            num_threads=num_threads,
                            k=90,
                            item_features=item_features,
                            user_features=user_features).mean()

        hyperparams["num_epochs"] = num_epochs

        yield (score, hyperparams, model)
コード例 #20
0
def recall_at_k(model,
                test_interactions,
                train_interactions=None,
                k=10,
                user_features=None,
                item_features=None,
                preserve_rows=False,
                num_threads=1,
                check_intersections=True):
    return evaluation.recall_at_k(model,
                                  test_interactions=test_interactions,
                                  train_interactions=train_interactions,
                                  k=k,
                                  user_features=user_features,
                                  item_features=item_features,
                                  preserve_rows=preserve_rows,
                                  num_threads=num_threads,
                                  check_intersections=check_intersections)
コード例 #21
0
        def precrec():
            """Evaluates models on Precision@K/Recall@K and also outputs F1 Score.

            Measure the precision at k metric for a model: the fraction of known positives in the first k 
            positions of the ranked list of results. A perfect score is 1.0.

            Measure the recall at k metric for a model: the number of positive items in the first k 
            positions of the ranked list of results divided by the number of positive items in the test period. #
            A perfect score is 1.0.

            Compute the F1 score, also known as balanced F-score or F-measure: The F1 score can be interpreted as a weighted 
            average of the precision and recall, where an F1 score reaches its best value at 1 and worst score at 0. 
            The relative contribution of precision and recall to the F1 score are equal.

            """

            # train_precision = precision_at_k(model, 
            #                     train, 
            #                     k=k, 
            #                     user_features=user_features if user_features is not None else None,
            #                     item_features=item_features if item_features is not None else None, 
            #                     num_threads=NUM_THREADS).mean()
            # logger.info(model_name+' training set Precision@%s: %s' % (k, train_precision))

            precision = lightfm_precision_at_k(
                    model=model,
                    train_interactions=train,
                    test_interactions=test,
                    k=k,
                    item_features=item_features
                ).mean()
            logger.info(model_name+' Precision@%s: %s' % (k, precision))

            recall = recall_at_k(
                    model=model, 
                    train_interactions=train,
                    test_interactions=test,
                    k=k,
                    item_features=item_features
                ).mean()
            logger.info(model_name+' Recall@%s: %s' % (k, recall))

            fmeasure = 2*((precision*recall)/(precision+recall))
            logger.info(model_name+' F-Measure: %s' % fmeasure)
コード例 #22
0
    def resultados_por_contenido(self):
        """
        Método resultados_por_contenido. Obtiene los resultados del modelo basado en contenido.

        Este método solo se utiliza en la interfaz de texto.
        """

        global train, test, modelo, item_features, user_features

        # Se obtienen los resultados
        precision = precision_at_k(modelo,
                                   test,
                                   train_interactions=train,
                                   k=10,
                                   user_features=user_features,
                                   item_features=item_features,
                                   num_threads=self.CPU_THREADS).mean()
        auc = auc_score(modelo,
                        test,
                        train_interactions=train,
                        user_features=user_features,
                        item_features=item_features,
                        num_threads=self.CPU_THREADS).mean()
        recall = recall_at_k(modelo,
                             test,
                             train_interactions=train,
                             k=10,
                             user_features=user_features,
                             item_features=item_features,
                             num_threads=self.CPU_THREADS).mean()
        reciprocal = reciprocal_rank(modelo,
                                     test,
                                     train_interactions=train,
                                     user_features=user_features,
                                     item_features=item_features,
                                     num_threads=self.CPU_THREADS).mean()

        # Se imprimen los resultados
        imprimir_resultados_clasico(precision, auc, recall, reciprocal)
コード例 #23
0
def train():

    start = time.time()

    from lightfm.evaluation import precision_at_k, recall_at_k, auc_score
    from utils import clean_data, to_sparse, create_user_dict, create_item_dict, fit_mf_model
    from utils import items_to_user, items_to_item, create_item_emdedding_distance_matrix, users_to_item
    print('Modules loaded...')
    training_metrics={}

    data = pd.read_csv(training_data)
    piv, cols, interactions_ = to_sparse(data)
    interactions_.to_csv(interactions, index=True)

    user_dict_ = create_user_dict(interactions=interactions_)
    item_dict_ = create_item_dict(df = data, id_col = 'StockCode', name_col = 'Description')
    
    with open(user_dict, 'w') as json_file:
        json.dump(user_dict_, json_file)
    with open(item_dict, 'w') as json_file:
        json.dump(item_dict_, json_file)
    
    print('Data preparations ready...')
    mf_model = fit_mf_model(interactions = interactions_,
                            n_components = 140,
                            loss = 'warp',
                            epoch = 10,
                            n_jobs = 6)
    print('Model fit...')
    training_metrics["precision_at_3"] = round(precision_at_k(mf_model, piv, k=3).mean()*100)
    training_metrics["recall_at_3"] = round(recall_at_k(mf_model, piv, k=3).mean()*100)
    training_metrics["auc_score"]=round(auc_score(mf_model, piv).mean()*100)

    pickle.dump(mf_model, open(str(model_directory + "/" +"recomender.pkl"), "wb"))
    print('Model trained & serialized in %.1f seconds' % (time.time() - start))
    
    return jsonify(training_metrics)
コード例 #24
0
ファイル: main.py プロジェクト: wararaki718/scrapbox3
def main():
    data = fetch_movielens()

    # check dataset
    for key, value in data.items():
        print(key, type(value), value.shape)
    print()

    # get dataset
    train = data['train']
    test = data['test']

    # modeling
    model = LightFM(learning_rate=0.05, loss='bpr')
    model.fit(train, epochs=10)

    p_test = precision_at_k(model, test, k=10, train_interactions=train)
    r_test = recall_at_k(model, test, k=10, train_interactions=train)
    a_test = auc_score(model, test, train_interactions=train)

    print(p_test)
    print(r_test)
    print(a_test)
    print('DONE')
コード例 #25
0
def run_lightfm(ratings, train, test, k_items, dataset):
    def create_interaction_matrix(df,
                                  user_col,
                                  item_col,
                                  rating_col,
                                  norm=False,
                                  threshold=None):
        '''
        Function to create an interaction matrix dataframe from transactional type interactions
        Required Input -
            - df = Pandas DataFrame containing user-item interactions
            - user_col = column name containing user's identifier
            - item_col = column name containing item's identifier
            - rating col = column name containing user feedback on interaction with a given item
            - norm (optional) = True if a normalization of ratings is needed
            - threshold (required if norm = True) = value above which the rating is favorable
        Expected output -
            - Pandas dataframe with user-item interactions ready to be fed in a recommendation algorithm
        '''
        interactions = df.groupby([user_col, item_col])[rating_col] \
                .sum().unstack().reset_index(). \
                fillna(0).set_index(user_col)
        if norm:
            interactions = interactions.applymap(lambda x: 1
                                                 if x > threshold else 0)
        return interactions

    test_interactions = create_interaction_matrix(df=test,
                                                  user_col='userId',
                                                  item_col='movieId',
                                                  rating_col='rating')

    budget_l = dataset.budget.unique().tolist()
    gross_l = dataset.gross.unique().tolist()
    awards_l = dataset.awards.unique().tolist()
    nom_l = dataset.nominations.unique().tolist()
    votes_l = dataset.votes.unique().tolist()
    item_ids = np.unique(train.movieId.astype(int))
    print(f'length dataset: {len(dataset)}')
    dataset = dataset[dataset.movieId.isin(item_ids)]
    print(f'length dataset: {len(dataset)}')
    item_features_list = [f'rating_{f}' for f in range(11)]
    gen = [
        'Action', 'Adventure', 'Animation', "Children's", 'Comedy', 'Crime',
        'Documentary', 'Drama', 'Fantasy', 'Film-Noir', 'Horror', 'IMAX',
        'Musical', 'Mystery', 'Romance', 'Sci-Fi', 'Thriller', 'War', 'Western'
    ]  # 'unknown' add unknown for movielens100k
    item_features_list += gen
    item_features_list += budget_l
    item_features_list += gross_l
    item_features_list += awards_l
    item_features_list += nom_l
    item_features_list += votes_l
    item_features = []
    for y, x in dataset.iterrows():
        genres = x['genres']
        tmp_row = (int(x['movieId']), [
            x['rating'], x['budget'], x['gross'], x['awards'],
            x['nominations'], x['votes']
        ])
        for g in genres:
            tmp_row[1].append(g)
        item_features.append(tmp_row)
    #item_features = [(int(x['movieId']), [x['rating'], z, x['budget'], x['gross'], x['awards'], x['votes']]) for y, x in dataset.iterrows() for z in x['genres']] #x['nominations']
    user_ids = np.unique(train.userId)
    built_dif = Dataset()
    built_dif.fit_partial(users=user_ids)
    built_dif.fit_partial(items=item_ids)
    built_dif.fit_partial(item_features=item_features_list)
    dataset_item_features = built_dif.build_item_features(item_features)
    (interactions, weights) = built_dif.build_interactions(
        ((int(x['userId']), int(x['movieId'])) for y, x in train.iterrows()))
    modelx = LightFM(no_components=30, loss='bpr', k=15, random_state=1)
    modelx.fit(interactions,
               epochs=30,
               num_threads=4,
               item_features=dataset_item_features
               )  #item_features=dataset_item_features
    test = sparse.csr_matrix(test_interactions.values)
    test = test.tocoo()
    num_users, num_items = built_dif.interactions_shape()
    print('Num users: {}, num_items {}.'.format(num_users, num_items))

    prec_list = dict()
    rec_list = dict()

    for num_k in k_items:
        trainprecision = precision_at_k(
            modelx, test, k=num_k, item_features=dataset_item_features).mean(
            )  #item_features=dataset_item_features,
        print('Hybrid training set precision: %s' % trainprecision)
        trainrecall = recall_at_k(modelx,
                                  test,
                                  k=num_k,
                                  item_features=dataset_item_features).mean(
                                  )  #item_features=dataset_item_features
        print('Hybrid training set recall: %s' % trainrecall)
        if num_k in prec_list:
            prec_list[num_k].append(trainprecision)
        else:
            prec_list[num_k] = trainprecision

        if num_k in rec_list:
            rec_list[num_k].append(trainrecall)
        else:
            rec_list[num_k] = trainrecall

    return prec_list, rec_list
コード例 #26
0
from lightfm import LightFM

model = LightFM(loss='warp', random_state=0)
model.fit(train_interactions,
          user_features=users_features,
          item_features=items_features,
          epochs=200,
          num_threads=1)

from lightfm.evaluation import recall_at_k
from lightfm.evaluation import precision_at_k

print("Train recall@7: %.2f" %
      recall_at_k(model,
                  train_interactions,
                  k=7,
                  user_features=users_features,
                  item_features=items_features).mean())
print("Test recall@7: %.2f" % recall_at_k(model,
                                          test_interactions,
                                          train_interactions,
                                          k=7,
                                          user_features=users_features,
                                          item_features=items_features).mean())
print("Train precision@7: %.2f" %
      precision_at_k(model,
                     train_interactions,
                     k=7,
                     user_features=users_features,
                     item_features=items_features).mean())
print("Test precision@7: %.2f" %
コード例 #27
0
    

    #Below are the results of our random optimiaztion, hardcoded as parameters now.
    #Best score 0.9843319654464722 at
    bestparams = {'no_components': 59,
    'learning_schedule': 'adagrad', 'loss': 'warp', 'learning_rate': 0.08565020895037347,
     'item_alpha': 7.345729662383957e-10, 'user_alpha': 4.776609106732949e-09,
     'max_sampled': 14, 'random_state':69}
    model = LightFM(**bestparams)
    num_epochs = 8

    #uncomment below to optimize the number of epochs we train
    #num_epochs = optimize_epochs(model,cvinteractions,interactions,item_features,15)

    model.fit(interactions,item_features=item_features,epochs=num_epochs)

    testscore = auc_score(model,testinteractions,interactions,item_features=item_features).mean()
    testprec = precision_at_k(model, testinteractions,interactions,item_features=item_features,k=10).mean()
    testrecall = recall_at_k(model, testinteractions,interactions,item_features=item_features,k=10).mean()
    print('Test AUC Score = '+str(testscore))
    print('Test Precision = '+str(testprec))
    print('Test Recall = '+str(testrecall))

    #below we have our model predict for a User between 1 and 20. Simply change
    #the first argument to get predictions for other users.
    ranks = model.predict(1, np.arange(num_items))
    top_items = np.argsort(-ranks)
    print("Recommendations for user 1:")
    for i in range(0,11):
        print('Recommendation #' + str(i+1) + ' is: ' + winedict[str(top_items[i])])
コード例 #28
0
    prev_prec = test_precision
    epoch = 1
    while test_precision >= prev_prec:
        logging.info('Epoch: %s, Test prec: %.2f' % (epoch, test_precision))
        prev_prec = test_precision
        test_precision = precision_at_k(model, test, k=5).mean()
    # logging.info('Model fit done')
    # similarItems = similar_items(item_id=sparseDf.getItemIndexById(4105331), item_features=None, model=model)
    # for tuple in similarItems:
    #     print('Item id: %s, Accuracy: %s' % (sparseDf.getItemIdFromIndex(tuple[0]), tuple[1]))

    train_precision = precision_at_k(model, train, k=5).mean()
    logging.info('Train precision computed')
    test_precision = precision_at_k(model, test, k=5).mean()
    logging.info('Test precision computed')
    logging.info('Precision: train %.2f, test %.2f.' %
                 (train_precision, test_precision))
    #
    train_auc = auc_score(model, train).mean()
    logging.info('Train auc computed')
    test_auc = auc_score(model, test).mean()
    logging.info('Test auc computed')
    logging.info('AUC: train %.2f, test %.2f.' % (train_auc, test_auc))

    train_recall = recall_at_k(model, train, k=5).mean()
    logging.info('Train recall computed')
    test_recall = recall_at_k(model, test, k=5).mean()
    logging.info('Test recall computed')
    logging.info('Recall: train %.2f, test %.2f.' %
                 (train_recall, test_recall))
コード例 #29
0
def track_model_metrics(model,
                        train_interactions,
                        test_interactions,
                        k=10,
                        no_epochs=100,
                        no_threads=8,
                        show_plot=True,
                        **kwargs):
    """Function to record model's performance at each epoch, formats the performance into tidy format,
    plots the performance and outputs the performance data.

    Args:
        model (LightFM instance): fitted LightFM model
        train_interactions (scipy sparse COO matrix): train interactions set
        test_interactions (scipy sparse COO matrix): test interaction set
        k (int): number of recommendations, optional
        no_epochs (int): Number of epochs to run, optional
        no_threads (int): Number of parallel threads to use, optional
        **kwargs: other keyword arguments to be passed down

    Returns:
        pandas.DataFrame, LightFM model, matplotlib axes:
        - Performance traces of the fitted model
        - Fitted model
        - Side effect of the method
    """
    # initialising temp data storage
    model_prec_train = [0] * no_epochs
    model_prec_test = [0] * no_epochs

    model_rec_train = [0] * no_epochs
    model_rec_test = [0] * no_epochs

    # fit model and store train/test metrics at each epoch
    for epoch in range(no_epochs):
        model.fit_partial(interactions=train_interactions,
                          epochs=1,
                          num_threads=no_threads,
                          **kwargs)
        model_prec_train[epoch] = precision_at_k(model,
                                                 train_interactions,
                                                 k=k,
                                                 **kwargs).mean()
        model_prec_test[epoch] = precision_at_k(model,
                                                test_interactions,
                                                k=k,
                                                **kwargs).mean()

        model_rec_train[epoch] = recall_at_k(model,
                                             train_interactions,
                                             k=k,
                                             **kwargs).mean()
        model_rec_test[epoch] = recall_at_k(model,
                                            test_interactions,
                                            k=k,
                                            **kwargs).mean()

    # collect the performance metrics into a dataframe
    fitting_metrics = pd.DataFrame(
        zip(model_prec_train, model_prec_test, model_rec_train,
            model_rec_test),
        columns=[
            "model_prec_train",
            "model_prec_test",
            "model_rec_train",
            "model_rec_test",
        ],
    )
    # convert into tidy format
    fitting_metrics = fitting_metrics.stack().reset_index()
    fitting_metrics.columns = ["epoch", "level", "value"]
    # exact the labels for each observation
    fitting_metrics["stage"] = fitting_metrics.level.str.split("_").str[-1]
    fitting_metrics["metric"] = fitting_metrics.level.str.split("_").str[1]
    fitting_metrics.drop(["level"], axis=1, inplace=True)
    # replace the metric keys to improve visualisation
    metric_keys = {"prec": "Precision", "rec": "Recall"}
    fitting_metrics.metric.replace(metric_keys, inplace=True)
    # plots the performance data
    if show_plot:
        model_perf_plots(fitting_metrics)
    return fitting_metrics, model
コード例 #30
0
    def run(self,
            epochs: int = 1,
            no_components: int = 50,
            learning_rate: float = 0.05) -> Dict[str, float]:
        """
         build interaction matrix -> build movie features -> build model

        Example (5000 samples, 50 components, 5 epochs, learning_rate=0.05)
        =================================
        {'auc_train': 0.66268414, 'auc_test': 0.67257625,
         'precision_train@10': 0.035984848, 'precision_test@10': 0.014193548,
         'recall_train@10': 0.06827082513973247, 'recall_test@10': 0.0646373101211811}

        ###########################
        #### Random Stratified ####
        ###########################
        Example (2 million samples, 50 components, 1 epochs, learning_rate=0.05)
        =================================
        {'auc_train': 0.5171841, 'auc_test': 0.51610065,
         'precision_train@10': 0.018248174, 'precision_test@10': 0.0040145987,
         'recall_train@10': 0.0008001067196610589, 'recall_t0.018248174est@10': 0.0007001527280332769}

        ########################
        #### Popular Active ####
        ########################
        Example (333000 samples, 150 components, 1 epochs, learning_rate=0.05)  20% test data
        =================================
        {'auc_train': 0.63388383, 'auc_test': 0.5569484,
        'precision_train@10': 0.7255412, 'precision_test@10': 0.17099567,
        'recall_train@10': 0.006322884137545113, 'recall_test@10': 0.006053869700910709}

        Example (333000 samples, 50 components, 1 epochs, learning_rate=0.05)  40% test data
        =================================
        {'auc_train': 0.6001097, 'auc_test': 0.56429684,
         'precision_train@10': 0.56060606, 'precision_test@10': 0.33030304,
         'recall_train@10': 0.006517918240037026, 'recall_test@10': 0.005792534657980192}

        Example (333000 samples, 50 components, 20 epochs, learning_rate=0.05)  40% test data
        =================================
        {'auc_train': 0.6077434, 'auc_test': 0.5688331,
         'precision_train@10': 0.5874459, 'precision_test@10': 0.32424247,
         'recall_train@10': 0.0068082500065638684, 'recall_test@10': 0.005756504594433489}

        Example (333000 samples, 50 components, 1 epochs, learning_rate=0.05)  40% test data with normalization
        =================================
        {'auc_train': 0.60080063, 'auc_test': 0.56425303,
         'precision_train@10': 0.56926405, 'precision_test@10': 0.33679655,
         'recall_train@10': 0.006628036812872702, 'recall_test@10': 0.005913302996971047}
         """
        ## Build Matrix Factorization between Customer and Movie
        data = self._filter_data

        dataset = Dataset()
        dataset.fit(data['Cust_Id'].unique(),
                    data['Movie_Id'].unique(),
                    item_features=self.get_combination)
        (interactions, weights) = dataset.build_interactions([
            (x['Cust_Id'], x['Movie_Id'], x['Rating'])
            for index, x in data.iterrows()
        ])

        train, test = random_train_test_split(
            interactions,
            test_percentage=0.4,
            random_state=np.random.RandomState(7))
        print("Finished creating interactions matrix!")

        ## Build movie features
        movies_id, tfidf_data = self.get_tfidf_matrix
        features_lists = [list(x) for x in tfidf_data.values]
        movies_features = dataset.build_item_features(
            data=self.get_movies_tuple(features_lists, movies_id, tfidf_data),
            normalize=True)
        print("Finished building movie features!")

        ## Build model
        model = LightFM(no_components=no_components,
                        learning_rate=learning_rate,
                        loss='warp',
                        k=15)
        model.fit(train,
                  epochs=epochs,
                  item_features=movies_features,
                  num_threads=4)
        print("Finished building LightFM model!")

        with open('hybrid_model_popular_active.pickle', 'wb') as fle:
            pickle.dump(model, fle, protocol=pickle.HIGHEST_PROTOCOL)
        print("Finished saving LightFM model!")

        return {
            "auc_train":
            auc_score(model, train, item_features=movies_features).mean(),
            "auc_test":
            auc_score(model, test, item_features=movies_features).mean(),
            "precision_train@10":
            precision_at_k(model, train, item_features=movies_features,
                           k=10).mean(),
            "precision_test@10":
            precision_at_k(model, test, item_features=movies_features,
                           k=10).mean(),
            "recall_train@10":
            recall_at_k(model, train, item_features=movies_features,
                        k=10).mean(),
            "recall_test@10":
            recall_at_k(model, test, item_features=movies_features,
                        k=10).mean()
        }
コード例 #31
0
        #with open(MODEL_CHECKPOINT_PATH, 'wb') as fle:
        #    pickle.dump(model, fle, protocol=pickle.HIGHEST_PROTOCOL)

        train_precision = precision_at_k(model,
                                         data['train'],
                                         k=10,
                                         item_features=item_features).mean()
        test_precision = precision_at_k(model,
                                        data['test'],
                                        k=10,
                                        train_interactions=data['train'],
                                        item_features=item_features).mean()

        train_recall = recall_at_k(model,
                                   data['train'],
                                   k=10,
                                   item_features=item_features).mean()
        test_recall = recall_at_k(model,
                                  data['test'],
                                  k=10,
                                  train_interactions=data['train'],
                                  item_features=item_features).mean()

        train_auc = auc_score(model,
                              data['train'],
                              item_features=item_features).mean()
        test_auc = auc_score(model,
                             data['test'],
                             train_interactions=data['train'],
                             item_features=item_features).mean()
コード例 #32
0
    def obtener_metricas_gui(self):
        """
        Método obtener_metricas_gui. Obtiene las métricas del modelo escogido.

        Este método solo se utiliza en la interfaz web.

        Returns
        -------

        metricas_devueltas: dict
            diccionario con las métricas del modelo
        """

        global train, test, modelo, item_features, user_features

        # Se guardan las métricas en un diccionario para su futura muestra en la interfaz web
        metricas = dict()

        # Se calculan las métricas
        if self.opcion_modelo == 1:
            precision = precision_at_k(modelo,
                                       test,
                                       train_interactions=train,
                                       k=10,
                                       num_threads=self.CPU_THREADS).mean()
            auc = auc_score(modelo,
                            test,
                            train_interactions=train,
                            num_threads=self.CPU_THREADS).mean()
            recall = recall_at_k(modelo,
                                 test,
                                 train_interactions=train,
                                 k=10,
                                 num_threads=self.CPU_THREADS).mean()
            reciprocal = reciprocal_rank(modelo,
                                         test,
                                         train_interactions=train,
                                         num_threads=self.CPU_THREADS).mean()
        else:
            precision = precision_at_k(modelo,
                                       test,
                                       train_interactions=train,
                                       k=10,
                                       user_features=user_features,
                                       item_features=item_features,
                                       num_threads=self.CPU_THREADS).mean()
            auc = auc_score(modelo,
                            test,
                            train_interactions=train,
                            user_features=user_features,
                            item_features=item_features,
                            num_threads=self.CPU_THREADS).mean()
            recall = recall_at_k(modelo,
                                 test,
                                 train_interactions=train,
                                 k=10,
                                 user_features=user_features,
                                 item_features=item_features,
                                 num_threads=self.CPU_THREADS).mean()
            reciprocal = reciprocal_rank(modelo,
                                         test,
                                         train_interactions=train,
                                         user_features=user_features,
                                         item_features=item_features,
                                         num_threads=self.CPU_THREADS).mean()

        # Se guardan las métricas en el diccionario y se formatea su salida
        metricas_devueltas = {
            "Precisión k": format(precision, '.4f'),
            "AUC Score": format(auc, '.4f'),
            "Recall k": format(recall, '.4f'),
            "Ranking recíproco": format(reciprocal, '.4f')
        }
        metricas_a_guardar = {
            "Precisión k": [format(precision, '.4f')],
            "AUC Score": [format(auc, '.4f')],
            "Recall k": [format(recall, '.4f')],
            "Ranking recíproco": [format(reciprocal, '.4f')]
        }

        # Se guardan las métricas en un archivo .csv
        guardar_resultados(metricas_a_guardar)

        return metricas_devueltas