Ejemplo n.º 1
0
def test_tf_ibmf_batch_accuracy(n_jobs, tf_session):
    from lenskit.algorithms import basic
    from lenskit.algorithms import bias
    import lenskit.crossfold as xf
    from lenskit import batch
    import lenskit.metrics.predict as pm

    ratings = lktu.ml100k.ratings

    algo = lktf.IntegratedBiasMF(20, batch_size=1024, epochs=20, rng_spec=42)
    algo = basic.Fallback(algo, bias.Bias(damping=10))

    def eval(train, test):
        _log.info('running training')
        algo.fit(train)
        _log.info('testing %d users', test.user.nunique())
        return batch.predict(algo, test, n_jobs=n_jobs)

    folds = xf.partition_users(ratings, 5, xf.SampleFrac(0.2))
    preds = pd.concat(eval(train, test) for (train, test) in folds)
    mae = pm.mae(preds.prediction, preds.rating)
    assert mae == approx(0.73, abs=0.025)

    user_rmse = preds.groupby('user').apply(
        lambda df: pm.rmse(df.prediction, df.rating))
    assert user_rmse.mean() == approx(0.93, abs=0.05)
Ejemplo n.º 2
0
def test_tf_ibmf_general(tmp_path, tf_session):
    "Training, saving, loading, and using an integrated bias model."
    fn = tmp_path / 'bias.bpk'
    ratings = lktu.ml_test.ratings

    original = lktf.IntegratedBiasMF(20,
                                     batch_size=1024,
                                     epochs=20,
                                     rng_spec=42)
    original.fit(ratings)

    ue = original.model.get_layer('user-embed')
    assert ue.get_weights()[0].shape == (ratings.user.nunique(), 20)
    ie = original.model.get_layer('item-embed')
    assert ie.get_weights()[0].shape == (ratings.item.nunique(), 20)

    binpickle.dump(original, fn)

    _log.info('serialized to %d bytes', fn.stat().st_size)
    algo = binpickle.load(fn)

    # does predicting work?
    preds = algo.predict_for_user(100, [5, 10, 30])
    assert all(preds.notna())

    # can we include a nonexistent item?
    preds = algo.predict_for_user(100, [5, 10, 230413804])
    assert len(preds) == 3
    assert all(preds.loc[[230413804]].isna())
    assert preds.isna().sum() == 1
Ejemplo n.º 3
0
def test_tf_isvd(ml20m):
    algo = lktf.IntegratedBiasMF(20)

    def eval(train, test):
        _log.info('running training')
        algo.fit(train)
        _log.info('testing %d users', test.user.nunique())
        return batch.predict(algo, test)

    folds = xf.sample_users(ml20m, 2, 5000, xf.SampleFrac(0.2))
    preds = pd.concat(eval(train, test) for (train, test) in folds)
    mae = pm.mae(preds.prediction, preds.rating)
    assert mae == approx(0.60, abs=0.025)

    user_rmse = preds.groupby('user').apply(
        lambda df: pm.rmse(df.prediction, df.rating))
    assert user_rmse.mean() == approx(0.92, abs=0.05)
Ejemplo n.º 4
0
"""
This module defines the algorithms, and their default configurations, that
we are going to use.
"""

from lenskit.algorithms import item_knn, user_knn, als, tf
from lenskit.algorithms import basic

Bias = basic.Bias(damping=5)
Pop = basic.Popular()
II = item_knn.ItemItem(20, save_nbrs=2500)
UU = user_knn.UserUser(30)
ALS = als.BiasedMF(50)
IALS = als.ImplicitMF(50)
BPR = tf.BPR(50)
TFMF = tf.IntegratedBiasMF(50)