Beispiel #1
0
def evaluate_fm(model_, te_, tr_, items_features=None, users_features=None):
    if not tr_.multiply(te_).nnz == 0:
        print('train test interaction are not fully disjoin')

    # Compute and print the AUC score
    train_auc = auc_score(model_,
                          tr_,
                          item_features=items_features,
                          user_features=users_features,
                          num_threads=NUM_THREADS).mean()
    print('Collaborative filtering train AUC: %s' % train_auc)

    test_auc = auc_score(model_,
                         te_,
                         train_interactions=tr_,
                         item_features=items_features,
                         user_features=users_features,
                         num_threads=NUM_THREADS).mean()
    print('Collaborative filtering test AUC: %s' % test_auc)
    p_at_k_train = precision_at_k(model_,
                                  tr_,
                                  item_features=items_features,
                                  user_features=users_features,
                                  k=5,
                                  num_threads=NUM_THREADS).mean()
    p_at_k_test = precision_at_k(model_,
                                 te_,
                                 train_interactions=tr_,
                                 item_features=items_features,
                                 user_features=users_features,
                                 k=5,
                                 num_threads=NUM_THREADS).mean()

    print("Train precision: %.2f" % p_at_k_train)
    print("Test precision: %.2f" % p_at_k_test)
Beispiel #2
0
        def auc():
            """Evaluates models on the ROC AUC metric.

            Measure the ROC AUC metric for a model: the probability that a randomly chosen positive example
            has a higher score than a randomly chosen negative example. A perfect score is 1.0.

            """

            auc = auc_score(model=model,
                            train_interactions=train,
                            test_interactions=test,
                            item_features=item_features).mean()
            logger.info(model_name + ' AUC: %s' % auc)

            train_auc = auc_score(model,
                                  train,
                                  item_features=item_features
                                  if item_features is not None else None,
                                  num_threads=NUM_THREADS).mean()
            logger.info(model_name + ' training set AUC: %s' % train_auc)

            test_auc = auc_score(model,
                                 test,
                                 train_interactions=train,
                                 item_features=item_features
                                 if item_features is not None else None,
                                 num_threads=NUM_THREADS).mean()
            logger.info(model_name + ' test set AUC: %s' % auc)
Beispiel #3
0
def train_model(train, test, user_features, item_features):
    log.info("Initializing model")
    model = LightFM(loss="warp",
                    item_alpha=ITEM_ALPHA,
                    no_components=NUM_COMPONENTS)

    log.info("Training model")
    model = model.fit(train,
                      user_features=user_features,
                      item_features=item_features,
                      epochs=NUM_EPOCHS,
                      num_threads=NUM_THREADS)

    log.info("Scoring")
    train_auc = auc_score(model,
                          train,
                          user_features=user_features,
                          item_features=item_features,
                          num_threads=NUM_THREADS).mean()
    log.info(f"Training set AUC: {train_auc}")
    test_auc = auc_score(model,
                         test,
                         train_interactions=train,
                         user_features=user_features,
                         item_features=item_features,
                         num_threads=NUM_THREADS).mean()
    log.info(f"Test set AUC: {test_auc}")
    return model
Beispiel #4
0
def main():
    movielens = fetch_movielens()

    train = movielens['train']
    test = movielens['test']
    print(train.shape)
    print(test.shape)

    model = LightFM(learning_rate=0.05, loss='bpr')
    model.fit(train, epochs=5)

    k = 10
    train_recall = recall_at_k(model, train, k=k).mean()
    test_recall = recall_at_k(model, test, k=k).mean()
    print(f'recall_at_{k}(train): {train_recall}')
    print(f'recall_at_{k}(test) : {test_recall}')

    train_auc = auc_score(model, train).mean()
    test_auc = auc_score(model, test).mean()
    print(f'auc_score(train): {train_auc}')
    print(f'auc_score(test) : {test_auc}')

    y_train_preds = model.predict_rank(train)
    y_test_preds = model.predict_rank(test)
    train_dcg = dcg_score(train.toarray(), y_train_preds.toarray())
    test_dcg = dcg_score(test.toarray(), y_test_preds.toarray())
    print(f'dcg_score(train): {train_dcg}')
    print(f'dcg_score(test) : {test_dcg}')

    print('DONE')

    return 0
Beispiel #5
0
def train(data, user_features=None, item_features=None, use_features=False):
    loss_type = "warp"  # "bpr"

    model = LightFM(learning_rate=0.05, loss=loss_type, max_sampled=100)

    if use_features:
        model.fit_partial(data,
                          epochs=20,
                          user_features=friends_features,
                          item_features=item_features)
        train_precision = precision_at_k(model,
                                         data,
                                         k=10,
                                         user_features=friends_features,
                                         item_features=item_features).mean()

        train_auc = auc_score(model,
                              data,
                              user_features=friends_features,
                              item_features=item_features).mean()

        print(f'Precision: train {train_precision:.2f}')
        print(f'AUC: train {train_auc:.2f}')
    else:
        model.fit_partial(data, epochs=20)

        train_precision = precision_at_k(model, data, k=10).mean()

        train_auc = auc_score(model, data).mean()

        print(f'Precision: train {train_precision:.2f}')
        print(f'AUC: train {train_auc:.2f}')

    return model
Beispiel #6
0
def evaluate(model, train, test, hybrid=False, features=None):
    
    if hybrid:
        auc_train = np.mean(auc_score(model, train, item_features=features))
        pre_train = np.mean(precision_at_k(model, train, item_features=features))
        mrr_train = np.mean(reciprocal_rank(model, train, item_features=features))
        
        auc_test = np.mean(auc_score(model, test, item_features=features))
        pre_test = np.mean(precision_at_k(model, test, item_features=features))
        mrr_test = np.mean(reciprocal_rank(model, test, item_features=features))   
    else:
        auc_train = np.mean(auc_score(model, train))
        pre_train = np.mean(precision_at_k(model, train))
        mrr_train = np.mean(reciprocal_rank(model, train))
        
        auc_test = np.mean(auc_score(model, test))
        pre_test = np.mean(precision_at_k(model, test))
        mrr_test = np.mean(reciprocal_rank(model, test))    
    
    res_dict = {'auc_train': auc_train, 
                'pre_train': pre_train,
                'mrr_train': mrr_train, 
                'auc_test': auc_test, 
                'pre_test': pre_test, 
                'mrr_test': mrr_test}
                  
    print('The AUC Score is in training/validation:                 ',
          auc_train,' / ', auc_test)
    print('The mean precision at k Score in training/validation is: ',
          pre_train, ' / ', pre_test)
    print('The mean reciprocal rank in training/validation is:      ', 
          mrr_train, ' / ', mrr_test)
    print('_________________________________________________________')
    
    return res_dict
Beispiel #7
0
def test_auc_score():

    no_users, no_items = (10, 100)

    train, test = _generate_data(no_users, no_items)

    model = LightFM(loss='bpr')
    model.fit_partial(train)

    auc = evaluation.auc_score(model, test, num_threads=2)
    expected_auc = np.array(_auc(model, test))

    assert auc.shape == expected_auc.shape
    assert np.abs(auc.mean() - expected_auc.mean()) < 0.01
    assert len(auc) == (test.getnnz(axis=1) > 0).sum()
    assert len(evaluation.auc_score(model, train,
                                    preserve_rows=True)) == test.shape[0]

    # With omitting train interactions
    auc = evaluation.auc_score(model,
                               test,
                               train_interactions=train,
                               num_threads=2)
    expected_auc = np.array(_auc(model, test, train))
    assert np.abs(auc.mean() - expected_auc.mean()) < 0.01
Beispiel #8
0
def lightfm_model(data, prec_at_k=100, train_split=0.8, epochs=10):
    """
        Code to evaluate LightFm model
        Data is a scipy sparse matrix
        
        https://arxiv.org/abs/1507.08439
    """
    model = LightFM(learning_rate=0.05, loss='logistic')

    train, test = random_train_test_split(data,
                                          test_percentage=1 - train_split)

    model.fit(train, epochs=epochs)  #, num_threads=1)

    train_precision = precision_at_k(model, train, k=prec_at_k)
    test_precision = precision_at_k(model,
                                    test,
                                    k=prec_at_k,
                                    train_interactions=train)

    train_auc = auc_score(model, train)
    test_auc = auc_score(model, test, train_interactions=train)

    print('Performance of LightFm Model \n')
    print(
        f'Precision \t Train: {train_precision.mean():.2f} \t Test: {test_precision.mean():.2f}'
    )
    print(
        f'AUC \t\t Train: {train_auc.mean():.2f} \t Test: {test_auc.mean():.2f}'
    )

    return (train_auc, test_auc, train_precision, test_precision, prec_at_k)
Beispiel #9
0
def test_auc_score():

    no_users, no_items = (10, 100)

    train, test = _generate_data(no_users, no_items)

    model = LightFM(loss='bpr')
    model.fit_partial(train)

    auc = evaluation.auc_score(model,
                               test,
                               num_threads=2)
    expected_auc = np.array(_auc(model,
                                 test))

    assert auc.shape == expected_auc.shape
    assert np.abs(auc.mean() - expected_auc.mean()) < 0.01
    assert len(auc) == (test.getnnz(axis=1) > 0).sum()
    assert len(evaluation.auc_score(model,
                                    train,
                                    preserve_rows=True)) == test.shape[0]

    # With omitting train interactions
    auc = evaluation.auc_score(model,
                               test,
                               train_interactions=train,
                               num_threads=2)
    expected_auc = np.array(_auc(model,
                                 test,
                                 train))
    assert np.abs(auc.mean() - expected_auc.mean()) < 0.01
Beispiel #10
0
def main():
    movielens = fetch_movielens()

    train = movielens['train']
    print(type(train))
    print(train.toarray()[:5, :])
    test = movielens['test']
    print(type(test))
    print(test.toarray()[:5, :])

    model = LightFM(learning_rate=0.05, loss='bpr')
    model.fit(train, epochs=10)

    train_precision = precision_at_k(model, train, k=10).mean()
    test_precision = precision_at_k(model,
                                    test,
                                    k=10,
                                    train_interactions=train).mean()

    train_auc = auc_score(model, train).mean()
    test_auc = auc_score(model, test, train_interactions=train).mean()

    print(f'train precision: {train_precision}')
    print(f'test precision: {test_precision}')

    print(f'train auc: {train_auc}')
    print(f'test auc: {test_auc}')
    print('DONE')
Beispiel #11
0
	def evaluate(self, model, train, test, k=10):
		train_precision = precision_at_k(model, train, k=k).mean()
		test_precision = precision_at_k(model, test, k=k).mean()

		train_auc = auc_score(model, train).mean()
		test_auc = auc_score(model, test).mean()

		return train_precision, test_precision, train_auc, test_auc
Beispiel #12
0
def validate_item_features(ctx, data_home):

    data = fetch_stackexchange('crossvalidated',
                               test_set_fraction=0.1,
                               indicator_features=False,
                               tag_features=True, data_home=data_home)

    train = data['train']
    test = data['test']

    # Set the number of threads; you can increase this
    # ify you have more physical cores available.
    NUM_COMPONENTS = 30
    NUM_EPOCHS = 3
    ITEM_ALPHA = 1e-6

    # Let's fit a WARP model: these generally have the best performance.
    model = LightFM(loss='warp',
                    item_alpha=ITEM_ALPHA,
                    no_components=NUM_COMPONENTS)

    # Run 3 epochs and time it.
    model = model.fit(train, epochs=NUM_EPOCHS)

    train_auc = auc_score(model, train).mean()
    print('Collaborative filtering train AUC: %s' % train_auc)

    test_auc = auc_score(model, test, train_interactions=train).mean()
    print('Collaborative filtering test AUC: %s' % test_auc)

    # Set biases to zero
    model.item_biases *= 0.0

    test_auc = auc_score(model, test, train_interactions=train).mean()
    print('Collaborative filtering test AUC: %s' % test_auc)

    item_features = data['item_features']
    tag_labels = data['item_feature_labels']

    print('There are %s distinct tags, with values like %s.' % (item_features.shape[1], tag_labels[:3].tolist()))

    # Define a new model instance
    model = LightFM(loss='warp',
                    item_alpha=ITEM_ALPHA,
                    no_components=NUM_COMPONENTS)

    # Fit the hybrid model. Note that this time, we pass
    # in the item features matrix.
    model = model.fit(train,
                      item_features=item_features,
                      epochs=NUM_EPOCHS)

    # Don't forget the pass in the item features again!
    train_auc = auc_score(model,  train, item_features=item_features).mean()
    print('Hybrid training set AUC: %s' % train_auc)

    test_auc = auc_score(model,  test, train_interactions=train, item_features=item_features).mean()
    print('Hybrid test set AUC: %s' % test_auc)
Beispiel #13
0
def lightfm_model(data, prec_at_k=10, train_split=0.8):
    """
        Code to evaluate LightFm model
        Data is a scipy sparse matrix
        
        https://arxiv.org/abs/1507.08439
    """
    model = LightFM(learning_rate=0.05, loss='bpr')
    train, test = random_train_test_split(data,
                                          test_percentage=1 - train_split)

    model.fit(train, epochs=10)

    train_precision = precision_at_k(model, train, k=10)
    test_precision = precision_at_k(model,
                                    test,
                                    k=10,
                                    train_interactions=train)

    train_auc = auc_score(model, train)
    test_auc = auc_score(model, test, train_interactions=train)

    print('Performance of LightFm Model \n')
    print(
        f'Precision \t Train: {train_precision.mean():.2f} \t Test: {test_precision.mean():.2f}'
    )
    print(
        f'AUC \t\t Train: {train_auc.mean():.2f} \t Test: {test_auc.mean():.2f}'
    )

    fig, ax = plt.subplots(2, 2, figsize=(15, 10))

    ax[0, 0].hist(train_auc, bins='auto')
    ax[0, 0].title.set_text('Distribution of Train AUC score over users')
    ax[0, 0].set_ylabel('Count')
    ax[0, 0].set_xlabel('AUC Score')

    ax[0, 1].hist(test_auc, bins='auto')
    ax[0, 1].title.set_text('Distribution of Test AUC score over users')
    ax[0, 1].set_ylabel('Count')
    ax[0, 1].set_xlabel('AUC Score')

    ax[1, 0].hist(train_precision, bins='auto')
    ax[1, 0].title.set_text(
        f'Distribution of Train Precision @ {prec_at_k} for all users')
    ax[1, 0].set_ylabel('Count')
    ax[1, 0].set_xlabel(f'Precision @ {prec_at_k}')

    ax[1, 1].hist(test_precision, bins='auto')
    ax[1, 1].title.set_text(
        f'Distribution of Test Precision @ {prec_at_k} for all users')
    ax[1, 1].set_ylabel('Count')
    ax[1, 1].set_xlabel(f'Precision @ {prec_at_k}')

    plt.show()

    print('\n')
def collab_filtering():
    """
    implements collaborative filtering version
    by using only the rating data from movielens dataset
    :return:
    """
    data = fetch_movielens()

    for key, value in data.items():
        print(key, type(value), value.shape)

    train = data['train']
    test = data['test']
    print(
        'The dataset has %s users and %s items, '
        'with %s interactions in the test and %s interactions in the training set.'
        % (train.shape[0], train.shape[1], test.getnnz(), train.getnnz()))

    model = LightFM(learning_rate=0.05, loss='bpr')
    model.fit(train, epochs=50, num_threads=5)

    train_precision = precision_at_k(model, train, k=10).mean()
    test_precision = precision_at_k(model, test, k=10).mean()
    train_recall = recall_at_k(model, test, k=10).mean()
    test_recall = recall_at_k(model, test, k=10).mean()

    train_auc = auc_score(model, train).mean()
    test_auc = auc_score(model, test).mean()

    print('Precision: train %.2f, test %.2f.' %
          (train_precision, test_precision))
    print('Recall: train %.2f, test %.2f.' % (train_recall, test_recall))
    print('AUC: train %.2f, test %.2f.' % (train_auc, test_auc))

    model = LightFM(learning_rate=0.05, loss='warp')

    #resume training from the model's previous state
    model.fit_partial(train, epochs=50, num_threads=5)

    train_precision = precision_at_k(model, train, k=10).mean()
    test_precision = precision_at_k(model, test, k=10).mean()
    train_recall = recall_at_k(model, test, k=10).mean()
    test_recall = recall_at_k(model, test, k=10).mean()

    train_auc = auc_score(model, train).mean()
    test_auc = auc_score(model, test).mean()

    print("*****************")
    print("After re-training")
    print('Precision: train %.2f, test %.2f.' %
          (train_precision, test_precision))
    print('Recall: train %.2f, test %.2f.' % (train_recall, test_recall))
    print('AUC: train %.2f, test %.2f.' % (train_auc, test_auc))

    #check sample recommendation
    sample_recommendation(model, data, [3, 25, 450])
def eval(model, train, val):
    # auc
    print("Train auc: %.2f" % auc_score(model, train).mean())
    print("Val auc: %.2f" % auc_score(model, val).mean())
    # precision_at_k
    print("Train precision: %.2f" % precision_at_k(model, train, k=5).mean())
    print("Val precision: %.2f" % precision_at_k(model, val, k=5).mean())
    # recall_at_k
    print("Train recall: %.2f" % precision_at_k(model, train, k=5).mean())
    print("Val recall: %.2f" % precision_at_k(model, val, k=5).mean())
Beispiel #16
0
def evaluate_model(model, train, test, item_fetures=None, user_features=None, num_threads=1):

    train_precision = precision_at_k(model, train, k=10, user_features=user_features, item_features=item_fetures, num_threads=num_threads).mean()
    test_precision = precision_at_k(model, test, train_interactions=train, k=10, user_features=user_features, item_features=item_fetures, num_threads=num_threads).mean()

    train_auc = auc_score(model, train, user_features=user_features, item_features=item_fetures, num_threads=num_threads).mean()
    test_auc = auc_score(model, test, train_interactions=train, user_features=user_features, item_features=item_fetures, num_threads=num_threads).mean()

    print('Precision: train %.2f, test %.2f.' % (train_precision, test_precision))
    print('AUC: train %.2f, test %.2f.' % (train_auc, test_auc))

    return train_precision, test_precision, train_auc, test_auc
Beispiel #17
0
def patk_learning_curve(model,
                        train,
                        test,
                        iterarray,
                        user_features=None,
                        item_features=None,
                        k=5,
                        **fit_params):
    old_epoch = 0
    train_patk = []
    test_patk = []
    warp_duration = []
    #    bpr_duration = []
    train_warp_auc = []
    test_warp_auc = []
    #   bpr_auc = []
    headers = ['Epoch', 'train p@5', 'train_auc', 'test p@5', 'test_auc']
    print_log(headers, header=True)
    for epoch in iterarray:
        more = epoch - old_epoch
        start = time.time()
        model.fit_partial(train,
                          user_features=user_features,
                          epochs=more,
                          item_features=item_features,
                          **fit_params)
        warp_duration.append(time.time() - start)
        train_warp_auc.append(
            auc_score(model, train, item_features=item_features).mean())
        test_warp_auc.append(
            auc_score(model,
                      test,
                      item_features=item_features,
                      train_interactions=train).mean())
        this_test = precision_at_k(model,
                                   test,
                                   train_interactions=train,
                                   item_features=item_features,
                                   k=k)
        this_train = precision_at_k(model,
                                    train,
                                    train_interactions=None,
                                    item_features=item_features,
                                    k=k)

        train_patk.append(np.mean(this_train))
        test_patk.append(np.mean(this_test))
        row = [
            epoch, train_patk[-1], train_warp_auc[-1], test_patk[-1],
            test_warp_auc[-1]
        ]
        print_log(row)
    return model, train_patk, test_patk, warp_duration, train_warp_auc, test_warp_auc
def model_train_test(spark,
                     train,
                     val,
                     test,
                     no_components=20,
                     learning_rate=0.01,
                     epochs=15,
                     k=10):
    '''
    Function to train Lightfm collaborative filtering recommender system and test on validation data and testing data

    Parameters
    ----------
    spark : spark session object
    train: Type-Sparse Matrix: Processed training data
    val: Type-Sparse Matrix: Processed validation data
    test: Type-Sparse Matrix: Processed testing data
    No_components: type-Int: latent factors
    learning_rate: type-int: learning rate
    epochs: type-int: Iterations
    k: type-int: Top-k predictions for every user

    Return
    ----------
    None
    '''
    start_time = time.time()
    #Create Lightfm object
    model = LightFM(no_components=no_components,
                    learning_rate=learning_rate,
                    loss='warp')
    #Fit model
    model.fit(train, epochs=epochs, num_threads=1)
    #Record time (Fit time)
    fit_time = time.time()
    #Calculate AUC value
    auc_val = auc_score(model, val).mean()
    score_calc_time = time.time()
    auc_test = auc_score(model, test).mean()
    auc_train = auc_score(model, train).mean()

    #Calculate Precision_at_k
    P_at_K = precision_at_k(model, test, k=k)
    precision_value = np.mean(P_at_K)
    print("For no_components = {}, learning_rate = {} ".format(
        no_components, learning_rate))
    print("Train AUC Score: {}".format(auc_train))
    print("Val AUC Score: {}".format(auc_val))
    print("Test AUC Score: {}".format(auc_test))
    print("Precision at k={} Score: {}".format(k, precision_value))
    print("--- Fit time:  {} mins ---".format(fit_time - start_time))
    print("--- Score time:  {} mins ---".format(score_calc_time - fit_time))
Beispiel #19
0
def evaluate_model(df,
                   user_id_col='user_id',
                   item_id_col='business_id',
                   stratify=None):
    """ Model evaluation.

    Args:
        df: the input dataframe.
        user_id_col: user id column.
        item_id_col: item id column.
        stratify: if use stratification.

    Returns:
        train_auc: training set auc score.
        test_auc: testing set auc score.

    """
    # model evaluation
    # create test and train datasets
    print('model evaluation')
    train, test = train_test_split(df, test_size=0.2, stratify=stratify)
    ds = Dataset()

    # we call fit to supply userid, item id and user/item features
    ds.fit(
        df[user_id_col].unique(),  # all the users
        df[item_id_col].unique(),  # all the items
    )

    # plugging in the interactions
    (train_interactions, train_weights) = ds.build_interactions([
        (x[0], x[1], x[2]) for x in train.values
    ])
    (test_interactions, _) = ds.build_interactions([(x[0], x[1], x[2])
                                                    for x in test.values])
    # model
    model = LightFM(no_components=100,
                    learning_rate=0.05,
                    loss='warp',
                    max_sampled=50)
    model.fit(train_interactions,
              sample_weight=train_weights,
              epochs=10,
              num_threads=10)

    # auc-roc
    train_auc = auc_score(model, train_interactions, num_threads=20).mean()
    print('Training set AUC: %s' % train_auc)
    test_auc = auc_score(model, test_interactions, num_threads=20).mean()
    print('Testing set AUC: %s' % test_auc)
Beispiel #20
0
def measure_accuracies(model, data):
    print("\nMeasuring accuracies of the model...")

    # evaluate the precision@k metric
    training_precision = precision_at_k(model, data["train"], k=PRECISION_K).mean()
    test_precision = precision_at_k(model, data["test"], k=PRECISION_K).mean()

    # evaluate the AUROC metric
    training_auc = auc_score(model, data["train"]).mean()
    test_auc = auc_score(model, data["test"]).mean()

    # print them out
    print("Precision@k: training %.2f, test %.2f" % (training_precision, test_precision))
    print("AUC: training %.2f, test %.2f" % (training_auc, test_auc))
Beispiel #21
0
def _get_metrics(model, train_set, test_set):

    train_set = train_set.tocsr()
    test_set = test_set.tocsr()

    train_set.data[train_set.data < 0] = 0.0
    test_set.data[test_set.data < 0] = 0.0

    train_set.eliminate_zeros()
    test_set.eliminate_zeros()

    return (precision_at_k(model, train_set).mean(),
            precision_at_k(model, test_set).mean(),
            auc_score(model, train_set).mean(),
            auc_score(model, test_set).mean())
 def evaluate_model(model, metric, test, train):
     """
     Evaluate trained model on the test set, using one of the three available accuracy metrics
         AUC: the probability that a randomly chosen positive example has a higher score than a randomly chosen
         negative example.
         Precision: the fraction of known positives in the first k positions of the ranked list of results.
         Recall: the number of positive items in the first k positions of the ranked list of results divided by the
         number of positive items in the test period.
     :param model:(LightFM, required) - model to be evaluated
     :param metric:(string, required) - accuracy metric to be used, one of ['auc', 'precision', 'recall']
     :param test:(COO matrix, required) - known positives used to test the model
     :param train:(COO matrix, required) - training set; these interactions will be omitted from the score
            calculations to avoid re-recommending known positives.
     :return: test_score (float) - score computed on the test set
     """
     try:
         # make sure the metric is correct
         assert metric in ['auc', 'precision', 'recall']
         if metric == 'auc':
             test_score = auc_score(model, test, train).mean()
         elif metric == 'precision':
             test_score = precision_at_k(model, test, train, k=5).mean()
         else:
             test_score = recall_at_k(model, test, train, k=5).mean()
         return test_score
     except AssertionError:
         print('The metric provided is not correct or available!')
def Model_fit_part1(train, test):
    # initialising model with warp loss function

    model_without_features = LightFM(loss="warp")
    start = time.time()
    #===================

    model_without_features.fit(train,
                               user_features=None,
                               item_features=None,
                               sample_weight=None,
                               epochs=1,
                               num_threads=4,
                               verbose=False)

    #===================
    end = time.time()
    print("time taken = {0:.{1}f} seconds".format(end - start, 2))

    print("checking accuracy and results with test data")
    # auc metric score (ranging from 0 to 1)

    start = time.time()
    #===================

    auc_without_features = auc_score(model=model_without_features,
                                     test_interactions=test,
                                     num_threads=4,
                                     check_intersections=False)
    print(auc_without_features)

    #===================
    end = time.time()
    print("accurcay model time taken = {0:.{1}f} seconds".format(
        end - start, 2))
def Model_fit_part2(train, prod_features, test):
    # initialising model with warp loss function
    from lightfm import LightFM

    from lightfm.evaluation import auc_score
    model_with_features = LightFM(loss="warp")

    # fitting the model with hybrid collaborative filtering + content based (product + features)
    start = time.time()
    #===================

    model_with_features.fit(train,
                            user_features=None,
                            item_features=prod_features,
                            sample_weight=None,
                            epochs=1,
                            num_threads=4,
                            verbose=False)
    auc_with_features = auc_score(model=model_with_features,
                                  test_interactions=test,
                                  train_interactions=train,
                                  item_features=prod_features,
                                  num_threads=4,
                                  check_intersections=False)
    #===================
    end = time.time()
    print("time taken = {0:.{1}f} seconds".format(end - start, 2))
    print("average AUC without adding item-feature interaction = {0:.{1}f}".
          format(auc_with_features.mean(), 2))
 def testAUC(self):
     self.train_auc = auc_score(self.model,
                                self.item_user,
                                item_features=self.item,
                                user_features=self.user,
                                num_threads=self.num_threads).mean()
     print('Hybrid testing set AUC: %s' % self.train_auc)
Beispiel #26
0
def test_LightFM_model(model,
                       test_interactions,
                       train_interactions,
                       user_features,
                       movie_features,
                       k=5):
    test_precision = precision_at_k(model,
                                    test_interactions,
                                    train_interactions,
                                    k=k,
                                    user_features=user_features,
                                    item_features=movie_features,
                                    num_threads=2).mean()
    test_recall = recall_at_k(model,
                              test_interactions,
                              train_interactions,
                              k=k,
                              user_features=user_features,
                              item_features=movie_features,
                              num_threads=2).mean()
    test_auc = auc_score(model,
                         test_interactions,
                         train_interactions,
                         user_features=user_features,
                         item_features=movie_features,
                         num_threads=2).mean()
    print('Model')
    print('Precision at k=', str(k), ': ', round(test_precision, 3), sep='')
    print('Recall at k=', str(k) + ': ', round(test_recall, 3), sep='')
    print('AUC: ', round(test_auc, 3), sep='')
    return ({
        'precision': round(test_precision, 3),
        'recall': round(test_recall, 3),
        'auc': round(test_auc, 3)
    })
Beispiel #27
0
def runMF(product_type, num_samples=20, num_threads=2):
	"""

	헤비유저를 위한 화장품 추천 모델을 만들기 위한 함수입니다.
	LightFM 모델을 사용합니다.
	sample_hyperparameters 함수를 이용하여 하이퍼 파라미터를 구해서 모델 파라미터에 적용시켜 모델을 만들어 줍니다.
	
	-------
	
	interactions:유저와 화장품 정보로 만든 희소 행렬 
	num_samples: 하이퍼 파라미터를 구할때 만들어지는 랜덤한 숫자들을 지정한 개수만큼만 나오게 합니다
		
	"""
	interactions=pickle.load(open("./pickle_data/"+product_type+"/interactions.p", "rb"))
	user_features=pickle.load(open("./pickle_data/user_features.p", "rb"))
	
	x = sparse.csr_matrix(interactions.values)

	train, test = random_train_test_split(x, test_percentage=0.2, random_state=RandomState(523))

	#itertools 패키지의 islice기능을 사용하여 위에서 설정한 파라미터값을 설정한 num_samples값만큼 반복적으로 나오게하여 적용합니다.
	for hyperparams in itertools.islice(sample_hyperparameters(), num_samples):
		num_epochs = hyperparams.pop("num_epochs")

		model = LightFM(**hyperparams)
		model.fit(train, user_features, epochs=num_epochs, num_threads=num_threads, verbose=True)

		auc = auc_score(model, test, train_interactions = train, num_threads=num_threads,user_features=user_features).mean()

		hyperparams["num_epochs"] = num_epochs

		yield (auc, model)
Beispiel #28
0
    def run(self):
        self.random = RandomState(self.random_seed)

        orders_path = self.requires()['orders'].output().path
        _, features, interactions = self._generate_matrices(orders_path)

        train_features, val_features, train_interactiosn, val_interactions = \
            train_test_split(features, interactions, test_size=0.1, random_state=self.random)

        model = LightFM(loss='logistic', no_components=self.no_components, random_state=self.random)

        wait = 0
        best_val_auc = None
        for epoch in range(1, self.epochs + 1):
            model.fit_partial(train_interactiosn, user_features=train_features,
                              epochs=self.epochs, num_threads=self.num_threads)
            auc_scores = auc_score(model, val_interactions, user_features=val_features,
                                   num_threads=self.num_threads)
            current_val_auc = np.nan_to_num(auc_scores).mean()
            if best_val_auc is None or current_val_auc > best_val_auc:
                joblib.dump(model, self.output().path)
                best_val_auc = current_val_auc
                wait = 0
            else:
                wait += 1
                if wait == self.patience:
                    break
            print('Epoch {}/{} - AUC: {:.6g}'.format(epoch, self.epochs, best_val_auc))
    def resultados_colaborativo(self):
        """
        Método resultados_colaboraivo. Obtiene los resultados del modelo colaborativo.

        Este método solo se utiliza en la interfaz de texto.
        """

        global train, test, modelo

        # Se obtienen los resultados
        precision = precision_at_k(modelo,
                                   test,
                                   train_interactions=train,
                                   k=10,
                                   num_threads=self.CPU_THREADS).mean()
        auc = auc_score(modelo,
                        test,
                        train_interactions=train,
                        num_threads=self.CPU_THREADS).mean()
        recall = recall_at_k(modelo,
                             test,
                             train_interactions=train,
                             k=10,
                             num_threads=self.CPU_THREADS).mean()
        reciprocal = reciprocal_rank(modelo,
                                     test,
                                     train_interactions=train,
                                     num_threads=self.CPU_THREADS).mean()

        # Se imprimen los resultados
        imprimir_resultados_clasico(precision, auc, recall, reciprocal)
Beispiel #30
0
def best_reccomendation():

    #define variables
    best = 0.0
    best_model = ''

    for model in models:
        score = 0.0
        pak_score = evaluation.precision_at_k(model, data2['test'])
        score += np.mean(pak_score)

        rak_score = evaluation.recall_at_k(model, data2['test'])
        score += np.mean(rak_score)

        auc_score = evaluation.auc_score(model, data2['test'])
        score += np.mean(auc_score)

        rr_score = evaluation.reciprocal_rank(model, data2['test'])
        score += np.mean(rr_score)

        print(score)
        if score >= best:
            best = score
            best_model = model

    return best_model
def plot_roc(data):
	alpha = 1e-05
	epochs = 50
	num_components = 32

	warp_model = LightFM(no_components=num_components,
	                    loss='warp',
	                    learning_schedule='adagrad',
	                    max_sampled=3,
	                    user_alpha=alpha,
	                    item_alpha=alpha)

	bpr_model = LightFM(no_components=num_components,
	                    loss='bpr',
	                    learning_schedule='adagrad',
	                    user_alpha=alpha,
	                    item_alpha=alpha)

	logistic_model=LightFM(no_components=num_components,
	                    loss='logistic',
	                    learning_schedule='adagrad',
	                    user_alpha=alpha,
	                    item_alpha=alpha)

	warp_auc = []
	bpr_auc = []
	logistic_auc = []

	for epoch in range(epochs):
	    warp_model.fit_partial(data['matrix'], epochs=5)
	    warp_auc.append(auc_score(warp_model, data['matrix']).mean())
	    
	for epoch in range(epochs):
	    bpr_model.fit_partial(data['matrix'], epochs=5)
	    bpr_auc.append(auc_score(bpr_model, data['matrix']).mean())

	for epoch in range(epochs):
	    logistic_model.fit_partial(data['matrix'], epochs=5)
	    logistic_auc.append(auc_score(bpr_model, data['matrix']).mean())

	x = np.arange(epochs)
	plt.plot(x, np.array(warp_auc))
	plt.plot(x, np.array(bpr_auc))
	plt.plot(x, np.array(logistic_auc))
	plt.legend(['WARP AUC', 'BPR AUC', 'LOGISTIC AUC'], loc='upper right')
	
	return plt.show(block=False)
Beispiel #32
0
def _get_metrics(model, train_set, test_set):

    train_set = train_set.tocsr()
    test_set = test_set.tocsr()

    train_set.data[train_set.data < 0] = 0.0
    test_set.data[test_set.data < 0] = 0.0

    train_set.eliminate_zeros()
    test_set.eliminate_zeros()

    train_users = train_set.getnnz(axis=1) > 0
    test_users = test_set.getnnz(axis=1) > 0

    return (precision_at_k(model, train_set)[train_users].mean(),
            precision_at_k(model, test_set)[test_users].mean(),
            auc_score(model, train_set)[train_users].mean(),
            auc_score(model, test_set)[test_users].mean())
Beispiel #33
0
def test_intersections_check():

    no_users, no_items = (10, 100)

    train, test = _generate_data(no_users, no_items)

    model = LightFM(loss="bpr")
    model.fit_partial(train)

    # check error is raised when train and test have interactions in common
    with pytest.raises(ValueError):
        evaluation.auc_score(
            model, train, train_interactions=train, check_intersections=True
        )

    with pytest.raises(ValueError):
        evaluation.recall_at_k(
            model, train, train_interactions=train, check_intersections=True
        )

    with pytest.raises(ValueError):
        evaluation.precision_at_k(
            model, train, train_interactions=train, check_intersections=True
        )

    with pytest.raises(ValueError):
        evaluation.reciprocal_rank(
            model, train, train_interactions=train, check_intersections=True
        )

    # check no errors raised when train and test have no interactions in common
    evaluation.auc_score(
        model, test, train_interactions=train, check_intersections=True
    )
    evaluation.recall_at_k(
        model, test, train_interactions=train, check_intersections=True
    )
    evaluation.precision_at_k(
        model, test, train_interactions=train, check_intersections=True
    )
    evaluation.reciprocal_rank(
        model, test, train_interactions=train, check_intersections=True
    )

    # check no error is raised when there are intersections but flag is False
    evaluation.auc_score(
        model, train, train_interactions=train, check_intersections=False
    )
    evaluation.recall_at_k(
        model, train, train_interactions=train, check_intersections=False
    )
    evaluation.precision_at_k(
        model, train, train_interactions=train, check_intersections=False
    )
    evaluation.reciprocal_rank(
        model, train, train_interactions=train, check_intersections=False
    )
Beispiel #34
0
def test_auc_score():

    no_users, no_items = (10, 100)

    train = sp.rand(no_users, no_items, format='coo')
    train.data = np.ones_like(train.data)

    model = LightFM(loss='bpr')
    model.fit_partial(train)

    auc = evaluation.auc_score(model,
                               train,
                               num_threads=2)[train.getnnz(axis=1) > 0]
    expected_auc = np.array(_auc(model,
                                 train))

    assert auc.shape == expected_auc.shape
    assert np.abs(auc.mean() - expected_auc.mean()) < 0.01
NUM_EPOCHS = 3
ITEM_ALPHA = 1e-6

# Let's fit a WARP model: these generally have the best performance.
model = LightFM(loss='warp',
                item_alpha=ITEM_ALPHA,
               no_components=NUM_COMPONENTS)

# Run 3 epochs and time it.
model = model.fit(train, epochs=NUM_EPOCHS, num_threads=NUM_THREADS)

# Import the evaluation routines
from lightfm.evaluation import auc_score

# Compute and print the AUC score
train_auc = auc_score(model, train, num_threads=NUM_THREADS).mean()
print('Collaborative filtering train AUC: %s' % train_auc)


# We pass in the train interactions to exclude them from predictions.
# This is to simulate a recommender system where we do not
# re-recommend things the user has already interacted with in the train
# set.
test_auc = auc_score(model, test, train_interactions=train, num_threads=NUM_THREADS).mean()
print('Collaborative filtering test AUC: %s' % test_auc)


# Set biases to zero
model.item_biases *= 0.0

test_auc = auc_score(model, test, train_interactions=train, num_threads=NUM_THREADS).mean()
def do_fiber_training(visualization = False):

    if not os.path.isfile(rc.RECOMMENDER_TRAINING) or not os.path.isfile(rc.RECOMMENDER_MODEL):

        yarn_data_matrix = pickle.load(open( rc.YARN_DATA_MATRIX, "rb" ))
        yarn_data_train = sps.coo_matrix(
                                yarn_data_matrix[:int(len(yarn_data_matrix)*0.5)]
                        ) > 0
        yarn_data_test = sps.coo_matrix(
                                yarn_data_matrix[int(len(yarn_data_matrix)*0.5):]
                        ) > 0
        if visualization:
            print yarn_data_train.shape[0],yarn_data_test.shape[0], len(yarn_data_matrix)

        # Taken from: https://github.com/lyst/lightfm/blob/master/examples/stackexchange/hybrid_crossvalidated.ipynb
        # Set the number of threads; you can increase this
        # ify you have more physical cores available.
        NUM_THREADS = 2
        NUM_COMPONENTS = 30
        NUM_EPOCHS = 3
        ITEM_ALPHA = 1e-6

        # Let's fit a WARP model: these generally have the best performance.
        model = LightFM(loss='warp',
                        item_alpha=ITEM_ALPHA,
                       no_components=NUM_COMPONENTS)

        # Run 3 epochs and time it.
        model = model.fit(yarn_data_train, epochs=NUM_EPOCHS, num_threads=NUM_THREADS)



        # Compute and print the AUC score
        train_auc = auc_score(model, yarn_data_train, num_threads=NUM_THREADS).mean()
        print('Collaborative filtering train AUC: %s' % train_auc)


        # We pass in the train interactions to exclude them from predictions.
        # This is to simulate a recommender system where we do not
        # re-recommend things the user has already interacted with in the train
        # set.
        test_auc = auc_score(model, yarn_data_test, train_interactions=yarn_data_train, num_threads=NUM_THREADS).mean()
        print('Collaborative filtering test AUC: %s' % test_auc)

        pickle.dump(yarn_data_matrix,open(rc.RECOMMENDER_TRAINING, 'wb'))
        pickle.dump(model,open(rc.RECOMMENDER_MODEL, 'wb'))
    else:
        yarn_data_matrix = pickle.load(open(rc.RECOMMENDER_TRAINING, 'rb'))
        model = pickle.load(open(rc.RECOMMENDER_MODEL, 'rb'))


    translation_dict = pickle.load(open(rc.YARN_TRANSLATION_DATA, 'rb'))
    print len(yarn_data_matrix)
    for matrix_id in xrange(0,len(yarn_data_matrix)):
        print matrix_id
        predictions = model.predict(matrix_id,yarn_data_matrix[matrix_id])
        matches = []
        predictions += abs(np.min(predictions)) # make non-negative
        _max = np.max(predictions) # find max for normalization
        predictions /= _max # Normalize predictions
        for prediction in xrange(0,len(predictions)):

            if predictions[prediction] > 0.9:
                matches.append([translation_dict[prediction],prediction,predictions[prediction]])

        print translation_dict[matrix_id],matches