Ejemplo n.º 1
0
def test_tf_bmf_batch_accuracy(tf_session):
    from lenskit.algorithms import basic
    from lenskit.algorithms import bias
    import lenskit.crossfold as xf
    from lenskit import batch
    import lenskit.metrics.predict as pm

    ratings = lktu.ml100k.ratings

    algo = lktf.BiasedMF(25,
                         damping=10,
                         batch_size=1024,
                         epochs=20,
                         rng_spec=42)
    algo = basic.Fallback(algo, bias.Bias(damping=10))

    def eval(train, test):
        _log.info('running training')
        algo.fit(train)
        _log.info('testing %d users', test.user.nunique())
        return batch.predict(algo, test)

    folds = xf.partition_users(ratings, 5, xf.SampleFrac(0.2))
    preds = pd.concat(eval(train, test) for (train, test) in folds)
    mae = pm.mae(preds.prediction, preds.rating)
    assert mae == approx(0.83, abs=0.025)

    user_rmse = preds.groupby('user').apply(
        lambda df: pm.rmse(df.prediction, df.rating))
    assert user_rmse.mean() == approx(1.03, abs=0.05)
Ejemplo n.º 2
0
def test_fsvd_batch_accuracy():
    from lenskit.algorithms import basic
    from lenskit.algorithms import bias
    import lenskit.crossfold as xf
    from lenskit import batch
    import lenskit.metrics.predict as pm

    ratings = lktu.ml100k.ratings

    svd_algo = svd.FunkSVD(25, 125, damping=10)
    algo = basic.Fallback(svd_algo, bias.Bias(damping=10))

    def eval(train, test):
        _log.info('running training')
        algo.fit(train)
        _log.info('testing %d users', test.user.nunique())
        return batch.predict(algo, test)

    folds = xf.partition_users(ratings, 5, xf.SampleFrac(0.2))
    preds = pd.concat(eval(train, test) for (train, test) in folds)
    mae = pm.mae(preds.prediction, preds.rating)
    assert mae == approx(0.74, abs=0.025)

    user_rmse = preds.groupby('user').apply(lambda df: pm.rmse(df.prediction, df.rating))
    assert user_rmse.mean() == approx(0.92, abs=0.05)
Ejemplo n.º 3
0
def test_batch_rmse():
    import lenskit.crossfold as xf
    import lenskit.batch as batch
    import lenskit.algorithms.bias as bs

    ratings = lktu.ml100k.ratings
    algo = bs.Bias(damping=5)

    def eval(train, test):
        algo.fit(train)
        preds = batch.predict(algo, test)
        return preds.set_index(['user', 'item'])

    results = pd.concat(
        (eval(train, test)
         for (train, test) in xf.partition_users(ratings, 5, xf.SampleN(5))))

    user_rmse = results.groupby('user').apply(
        lambda df: pm.rmse(df.prediction, df.rating))

    # we should have all users
    users = ratings.user.unique()
    assert len(user_rmse) == len(users)
    missing = np.setdiff1d(users, user_rmse.index)
    assert len(missing) == 0

    # we should not have any missing values
    assert all(user_rmse.notna())

    # we should have a reasonable mean
    assert user_rmse.mean() == approx(0.93, abs=0.05)
Ejemplo n.º 4
0
def test_bias_batch_predict(ncpus):
    from lenskit.algorithms import bias
    import lenskit.crossfold as xf
    from lenskit import batch
    import lenskit.metrics.predict as pm

    ratings = lktu.ml100k.ratings

    algo = bias.Bias(damping=5)

    def eval(train, test):
        _log.info('running training')
        algo.fit(train)
        _log.info('testing %d users', test.user.nunique())
        recs = batch.predict(algo, test, n_jobs=ncpus)
        return recs

    preds = pd.concat(
        (eval(train, test)
         for (train,
              test) in xf.partition_users(ratings, 5, xf.SampleFrac(0.2))))

    _log.info('analyzing predictions')
    rmse = pm.rmse(preds.prediction, preds.rating)
    _log.info('RMSE is %f', rmse)
    assert rmse == pytest.approx(0.95, abs=0.1)
Ejemplo n.º 5
0
def test_batch_predict_preshared():
    "Test batch prediction with isolated training and a pre-serialized algorithm."
    from lenskit.algorithms import bias
    import lenskit.crossfold as xf

    algo = bias.Bias()
    splits = xf.sample_users(lktu.ml_test.ratings, 1, 100, xf.SampleN(5))
    train, test = next(splits)

    ares = lkb.train_isolated(algo, train)
    preds = lkb.predict(ares, test)
    assert len(preds) == len(test)
    assert not any(preds['prediction'].isna())
Ejemplo n.º 6
0
def test_uu_batch_accuracy():
    from lenskit.algorithms import basic
    from lenskit.algorithms import bias
    import lenskit.crossfold as xf
    import lenskit.metrics.predict as pm

    ratings = lktu.ml100k.ratings

    uu_algo = knn.UserUser(30)
    algo = basic.Fallback(uu_algo, bias.Bias())

    folds = xf.partition_users(ratings, 5, xf.SampleFrac(0.2))
    preds = [__batch_eval((algo, train, test)) for (train, test) in folds]
    preds = pd.concat(preds)
    mae = pm.mae(preds.prediction, preds.rating)
    assert mae == approx(0.71, abs=0.05)

    user_rmse = preds.groupby('user').apply(lambda df: pm.rmse(df.prediction, df.rating))
    assert user_rmse.mean() == approx(0.91, abs=0.055)
Ejemplo n.º 7
0
def test_topn_big():
    ratings = lktu.ml_test.ratings
    users = ratings.user.unique()
    items = ratings.item.unique()
    user_items = ratings.set_index('user').item

    algo = basic.TopN(bias.Bias())
    a2 = algo.fit(ratings)
    assert a2 is algo

    # test 100 random users
    for u in np.random.choice(users, 100, False):
        recs = algo.recommend(u, 100)
        assert len(recs) == 100
        rated = user_items.loc[u]
        assert all(~recs['item'].isin(rated))
        unrated = np.setdiff1d(items, rated)
        scores = algo.predictor.predict_for_user(u, unrated)
        top = scores.nlargest(100)
        assert top.values == approx(recs.score.values)