Esempio n. 1
0
def test_ii_batch_accuracy():
    from lenskit.algorithms import basic
    from lenskit.algorithms import bias
    import lenskit.crossfold as xf
    from lenskit import batch
    import lenskit.metrics.predict as pm

    ratings = lktu.ml100k.ratings

    ii_algo = knn.ItemItem(30)
    algo = basic.Fallback(ii_algo, bias.Bias())

    def eval(train, test):
        _log.info('running training')
        algo.fit(train)
        _log.info('testing %d users', test.user.nunique())
        return batch.predict(algo, test, n_jobs=4)

    preds = pd.concat(
        (eval(train, test)
         for (train,
              test) in xf.partition_users(ratings, 5, xf.SampleFrac(0.2))))
    mae = pm.mae(preds.prediction, preds.rating)
    assert mae == approx(0.70, abs=0.025)

    user_rmse = preds.groupby('user').apply(
        lambda df: pm.rmse(df.prediction, df.rating))
    assert user_rmse.mean() == approx(0.90, abs=0.05)
Esempio n. 2
0
def test_batch_rmse():
    import lenskit.crossfold as xf
    import lenskit.batch as batch
    import lenskit.algorithms.basic as bl

    ratings = lktu.ml100k.ratings
    algo = bl.Bias(damping=5)

    def eval(train, test):
        algo.fit(train)
        preds = batch.predict(algo, test)
        return preds.set_index(['user', 'item'])

    results = pd.concat(
        (eval(train, test)
         for (train, test) in xf.partition_users(ratings, 5, xf.SampleN(5))))

    user_rmse = results.groupby('user').apply(
        lambda df: pm.rmse(df.prediction, df.rating))

    # we should have all users
    users = ratings.user.unique()
    assert len(user_rmse) == len(users)
    missing = np.setdiff1d(users, user_rmse.index)
    assert len(missing) == 0

    # we should not have any missing values
    assert all(user_rmse.notna())

    # we should have a reasonable mean
    assert user_rmse.mean() == approx(0.93, abs=0.05)
Esempio n. 3
0
def test_tf_bmf_batch_accuracy(tf_session):
    from lenskit.algorithms import basic
    import lenskit.crossfold as xf
    from lenskit import batch
    import lenskit.metrics.predict as pm

    ratings = lktu.ml100k.ratings

    algo = lktf.BiasedMF(25,
                         damping=10,
                         batch_size=1024,
                         epochs=20,
                         rng_spec=42)
    algo = basic.Fallback(algo, basic.Bias(damping=10))

    def eval(train, test):
        _log.info('running training')
        algo.fit(train)
        _log.info('testing %d users', test.user.nunique())
        return batch.predict(algo, test)

    folds = xf.partition_users(ratings, 5, xf.SampleFrac(0.2))
    preds = pd.concat(eval(train, test) for (train, test) in folds)
    mae = pm.mae(preds.prediction, preds.rating)
    assert mae == approx(0.83, abs=0.025)

    user_rmse = preds.groupby('user').apply(
        lambda df: pm.rmse(df.prediction, df.rating))
    assert user_rmse.mean() == approx(1.03, abs=0.05)
Esempio n. 4
0
def test_fsvd_batch_accuracy():
    from lenskit.algorithms import basic
    import lenskit.crossfold as xf
    from lenskit import batch
    import lenskit.metrics.predict as pm

    ratings = lktu.ml100k.load_ratings()

    svd_algo = svd.FunkSVD(25, 125, damping=10)
    algo = basic.Fallback(svd_algo, basic.Bias(damping=10))

    def eval(train, test):
        _log.info('running training')
        algo.fit(train)
        _log.info('testing %d users', test.user.nunique())
        return batch.predict(algo, test)

    folds = xf.partition_users(ratings, 5, xf.SampleFrac(0.2))
    preds = pd.concat(eval(train, test) for (train, test) in folds)
    mae = pm.mae(preds.prediction, preds.rating)
    assert mae == approx(0.74, abs=0.025)

    user_rmse = preds.groupby('user').apply(
        lambda df: pm.rmse(df.prediction, df.rating))
    assert user_rmse.mean() == approx(0.92, abs=0.05)
Esempio n. 5
0
def test_bias_batch_predict(ncpus):
    from lenskit.algorithms import basic
    import lenskit.crossfold as xf
    from lenskit import batch
    import lenskit.metrics.predict as pm

    ratings = lktu.ml100k.ratings

    algo = basic.Bias(damping=5)

    def eval(train, test):
        _log.info('running training')
        algo.fit(train)
        _log.info('testing %d users', test.user.nunique())
        recs = batch.predict(algo, test, n_jobs=ncpus)
        return recs

    preds = pd.concat((eval(train, test)
                       for (train, test)
                       in xf.partition_users(ratings, 5, xf.SampleFrac(0.2))))

    _log.info('analyzing predictions')
    rmse = pm.rmse(preds.prediction, preds.rating)
    _log.info('RMSE is %f', rmse)
    assert rmse == pytest.approx(0.95, abs=0.1)
Esempio n. 6
0
def eval(algo, train, test):
    fittable = util.clone(algo)
    algo.fit(train)
    users = test.user.unique()
    preds = algo.predict(test)

    rmse = predict.rmse(preds, test['rating'])
    return rmse
Esempio n. 7
0
def test_als_batch_accuracy():
    from lenskit.algorithms import basic
    import lenskit.crossfold as xf
    import lenskit.metrics.predict as pm

    ratings = lktu.ml100k.ratings

    lu_algo = als.BiasedMF(25, iterations=20, damping=5, method='lu')
    cd_algo = als.BiasedMF(25, iterations=25, damping=5, method='cd')

    # algo = basic.Fallback(svd_algo, basic.Bias(damping=5))

    def eval(train, test):
        _log.info('training LU')
        lu_algo.fit(train)
        _log.info('training CD')
        cd_algo.fit(train)
        _log.info('testing %d users', test.user.nunique())
        return test.assign(lu_pred=lu_algo.predict(test),
                           cd_pred=cd_algo.predict(test))

    folds = xf.partition_users(ratings, 5, xf.SampleFrac(0.2))
    preds = pd.concat(eval(train, test) for (train, test) in folds)
    preds['abs_diff'] = np.abs(preds.lu_pred - preds.cd_pred)
    _log.info('predictions:\n%s', preds.sort_values('abs_diff',
                                                    ascending=False))
    _log.info('diff summary:\n%s', preds.abs_diff.describe())

    lu_mae = pm.mae(preds.lu_pred, preds.rating)
    assert lu_mae == approx(0.73, abs=0.03)
    cd_mae = pm.mae(preds.cd_pred, preds.rating)
    assert cd_mae == approx(0.73, abs=0.03)

    user_rmse = preds.groupby('user').apply(
        lambda df: pm.rmse(df.lu_pred, df.rating))
    assert user_rmse.mean() == approx(0.91, abs=0.05)
    user_rmse = preds.groupby('user').apply(
        lambda df: pm.rmse(df.cd_pred, df.rating))
    assert user_rmse.mean() == approx(0.91, abs=0.05)
Esempio n. 8
0
def test_tf_isvd(ml20m):
    algo = lenskit_tf.IntegratedBiasMF(20)

    def eval(train, test):
        _log.info('running training')
        algo.fit(train)
        _log.info('testing %d users', test.user.nunique())
        return batch.predict(algo, test)

    folds = xf.sample_users(ml20m, 2, 5000, xf.SampleFrac(0.2))
    preds = pd.concat(eval(train, test) for (train, test) in folds)
    mae = pm.mae(preds.prediction, preds.rating)
    assert mae == approx(0.60, abs=0.025)

    user_rmse = preds.groupby('user').apply(
        lambda df: pm.rmse(df.prediction, df.rating))
    assert user_rmse.mean() == approx(0.92, abs=0.05)
Esempio n. 9
0
def test_global_metric():
    import lenskit.crossfold as xf
    import lenskit.batch as batch
    from lenskit.algorithms.bias import Bias

    train, test = next(
        xf.sample_users(lktu.ml_test.ratings, 1, 200, xf.SampleFrac(0.5)))
    algo = Bias()
    algo.fit(train)

    preds = batch.predict(algo, test)

    rmse = pm.global_metric(preds)
    assert rmse == pm.rmse(preds.prediction, preds.rating)

    mae = pm.global_metric(preds, metric=pm.mae)
    assert mae == pm.mae(preds.prediction, preds.rating)
Esempio n. 10
0
def test_uu_batch_accuracy():
    from lenskit.algorithms import basic
    import lenskit.crossfold as xf
    import lenskit.metrics.predict as pm

    ratings = lktu.ml100k.ratings

    uu_algo = knn.UserUser(30)
    algo = basic.Fallback(uu_algo, basic.Bias())

    folds = xf.partition_users(ratings, 5, xf.SampleFrac(0.2))
    preds = [__batch_eval((algo, train, test)) for (train, test) in folds]
    preds = pd.concat(preds)
    mae = pm.mae(preds.prediction, preds.rating)
    assert mae == approx(0.71, abs=0.028)

    user_rmse = preds.groupby('user').apply(lambda df: pm.rmse(df.prediction, df.rating))
    assert user_rmse.mean() == approx(0.91, abs=0.055)
Esempio n. 11
0
def test_user_metric():
    import lenskit.crossfold as xf
    import lenskit.batch as batch
    from lenskit.algorithms.bias import Bias

    train, test = next(
        xf.sample_users(lktu.ml_test.ratings, 1, 200, xf.SampleFrac(0.5)))
    algo = Bias()
    algo.fit(train)

    preds = batch.predict(algo, test)

    rmse = pm.user_metric(preds)
    u_rmse = preds.groupby('user').apply(
        lambda df: pm.rmse(df.prediction, df.rating))
    assert rmse == approx(u_rmse.mean())

    mae = pm.user_metric(preds, metric=pm.mae)
    u_mae = preds.groupby('user').apply(
        lambda df: pm.mae(df.prediction, df.rating))
    assert mae == approx(u_mae.mean())
    def test(self, path):
        algo_pop = Bias()
        algo_als5 = als.BiasedMF(5)

        def eval(aname, algo, train, test, all_preds):
            fittable = util.clone(algo)
            fittable = Recommender.adapt(fittable)
            fittable.fit(train)
            # predict ratings
            preds = batch.predict(fittable, test)
            preds['Algorithm'] = aname
            all_preds.append(preds)

        if '100k' in path:
            ml100k = ML100K(path)
            ratings = ml100k.ratings
        elif '1m' in path:
            ml100k = ML1M(path)
            ratings = ml100k.ratings
        elif '10m' in path:
            ml100k = ML10M(path)
            ratings = ml100k.ratings
        else:
            mlsmall = MovieLens(path)
            ratings = mlsmall.ratings
        print(ratings.head())

        all_preds = []
        test_data = []
        for train, test in xf.partition_users(
                ratings[['user', 'item', 'rating']], 5, xf.SampleFrac(0.2)):
            test_data.append(test)
            eval('MF', algo_als5, train, test, all_preds)
        preds = pd.concat(all_preds, ignore_index=True)
        preds_mf = preds[preds['Algorithm'].str.match('MF')]
        test_data = pd.concat(test_data, ignore_index=True)
        print('RMSE MF:', rmse(preds_mf['prediction'], preds_mf['rating']))
Esempio n. 13
0
def test_als_batch_accuracy():
    from lenskit.algorithms import basic
    import lenskit.crossfold as xf
    import lenskit.metrics.predict as pm

    ratings = lktu.ml100k.ratings

    svd_algo = als.BiasedMF(25, iterations=20, damping=5)
    algo = basic.Fallback(svd_algo, basic.Bias(damping=5))

    def eval(train, test):
        _log.info('running training')
        algo.fit(train)
        _log.info('testing %d users', test.user.nunique())
        return test.assign(prediction=algo.predict(test))

    folds = xf.partition_users(ratings, 5, xf.SampleFrac(0.2))
    preds = pd.concat(eval(train, test) for (train, test) in folds)
    mae = pm.mae(preds.prediction, preds.rating)
    assert mae == approx(0.73, abs=0.025)

    user_rmse = preds.groupby('user').apply(
        lambda df: pm.rmse(df.prediction, df.rating))
    assert user_rmse.mean() == approx(0.91, abs=0.05)
Esempio n. 14
0
def test_rmse_series_missing_value_ignore():
    rmse = pm.rmse(pd.Series([1, 3], ['a', 'd']), pd.Series([3, 4, 1], ['a', 'b', 'c']),
                   missing='ignore')
    assert rmse == approx(2)
Esempio n. 15
0
def test_rmse_series_missing_value_error():
    with raises(ValueError):
        pm.rmse(pd.Series([1, 3], ['a', 'd']), pd.Series([3, 4, 1], ['a', 'b', 'c']))
Esempio n. 16
0
def test_rmse_series_subset_axis():
    rmse = pm.rmse(pd.Series([1, 3], ['a', 'c']), pd.Series([3, 4, 1], ['a', 'b', 'c']))
    assert rmse == approx(2)
Esempio n. 17
0

test_bool = True

train = pd.read_pickle("../data/ml-1m-split/train.pkl")
val = pd.read_pickle("../data/ml-1m-split/val.pkl")
test = pd.read_pickle("../data/ml-1m-split/test.pkl")

num_factors = 30
num_iters = 100

model = BiasedMF(num_factors, iterations=num_iters)
print("Fitting model...")
model.fit(train)
print("Making validation predictions...")
val_preds = predict(model, val)
val_result = rmse(val_preds["prediction"], val_preds["rating"])

if test_bool:
    print("Making test predictions...")
    test_preds = predict(model, test)
    test_result = rmse(test_preds["prediction"], test_preds["rating"])
else:
    test_result = 0

print("============= RESULTS =============\nFactors: {}\nIterations: {}\nValidation RMSE: {}\nTest RMSE: {}" \
    .format(num_factors, num_iters, val_result, test_result))



Esempio n. 18
0
algo_pop = Bias()
algo_ii = knn.ItemItem(20)

def eval(aname, algo, train, test, all_preds):
    fittable = util.clone(algo)
    fittable = Recommender.adapt(fittable)
    fittable.fit(train)
    preds = batch.predict(fittable, test)
    preds['Algorithm'] = aname
    all_preds.append(preds)

all_preds = []
test_data = []
for train, test in xf.partition_users(df_ratings, 5, xf.SampleFrac(0.2)):
    test_data.append(test)
    eval('BIAS', algo_pop, train, test, all_preds)
    eval('II', algo_ii, train, test, all_preds)

preds = pd.concat(all_preds, ignore_index=True)

preds_ii = preds[preds['Algorithm'].str.match('BIAS')]
print(preds_ii.head())

preds_bias = preds[preds['Algorithm'].str.match('BIAS')]
print(preds_bias.head())

test_data = pd.concat(test_data, ignore_index=True)

print('RMSE BIAS: ', rmse(preds_bias['prediction'], preds_bias['rating']))
print('RMSE II: ', rmse(preds_ii['prediction'], preds_ii['rating']))
Esempio n. 19
0
set_matplotlib_formats('svg')
results.groupby('Algorithm').ndcg.mean().plot.bar()

# %%
results.groupby('Algorithm').precision.mean()

# %%
results.groupby('Algorithm').recall.mean()

# %% [markdown]
# ### 2. Prediction Metrics: RMSE

# %%
user_rmse = (preds.groupby(
    ['Algorithm',
     'user']).apply(lambda df: rmse(df.prediction, df.rating))).dropna()
print(user_rmse.groupby('Algorithm').mean())

print(user_rmse['domex'])

plt.boxplot((user_rmse['ii'], user_rmse['trst'], user_rmse['socsim'],
             user_rmse['domex'], user_rmse['hierch'], user_rmse['socap'],
             user_rmse['soxsim'], user_rmse['symp'], user_rmse['rel']),
            labels=[
                'ii', 'trst', 'socsim', 'domex', 'hierch', 'socap', 'soxsim',
                'symp', 'rel'
            ])
plt.show()
"""
print('Item Item Knn: max rmse per user = '******', min rmse per user = '******', average rmse per user = '******'Trust: max rmse per user = '******', min rmse per user = '******', average rmse per user = ' + str(user_rmse_trst.mean()))