コード例 #1
0
def test_bias_item_predict():
    algo = Bias(users=False)
    algo.fit(simple_df)
    p = algo.predict_for_user(10, [1, 2, 3])

    assert len(p) == 3
    assert p.values == approx((algo.item_offsets_ + algo.mean_).values)
コード例 #2
0
def test_bias_predict_unknown_user():
    algo = Bias()
    algo.fit(simple_df)

    p = algo.predict_for_user(15, [1, 3])

    assert len(p) == 2
    assert p.values == approx((algo.item_offsets_.loc[[1, 3]] + algo.mean_).values)
コード例 #3
0
def test_bias_global_predict():
    algo = Bias(items=False, users=False)
    algo.fit(simple_df)
    p = algo.predict_for_user(10, [1, 2, 3])

    assert len(p) == 3
    assert (p == algo.mean_).all()
    assert p.values == approx(algo.mean_)
コード例 #4
0
def test_bias_no_item():
    algo = Bias(items=False)
    algo.fit(simple_df)
    assert algo.mean_ == approx(3.5)
    assert algo.item_offsets_ is None

    assert algo.user_offsets_ is not None
    assert algo.user_offsets_.index.name == 'user'
    assert set(algo.user_offsets_.index) == set([10, 12, 13])
    assert algo.user_offsets_.loc[[10, 12, 13]].values == approx(np.array([1.0, -0.5, -1.5]))
コード例 #5
0
def test_bias_predict_unknown_item():
    algo = Bias()
    algo.fit(simple_df)

    p = algo.predict_for_user(10, [1, 3, 4])

    assert len(p) == 3
    intended = algo.item_offsets_.loc[[1, 3]] + algo.mean_ + 0.25
    assert p.loc[[1, 3]].values == approx(intended.values)
    assert p.loc[4] == approx(algo.mean_ + 0.25)
コード例 #6
0
def test_bias_transform():
    algo = Bias()
    ratings = ml_test.ratings

    normed = algo.fit_transform(ratings)

    assert all(normed['user'] == ratings['user'])
    assert all(normed['item'] == ratings['item'])
    denorm = algo.inverse_transform(normed)
    assert approx(denorm['rating'] == ratings['rating'], 1.0e-6)
コード例 #7
0
def test_bias_no_user():
    algo = Bias(users=False)
    algo.fit(simple_df)
    assert algo.mean_ == approx(3.5)

    assert algo.item_offsets_ is not None
    assert algo.item_offsets_.index.name == 'item'
    assert set(algo.item_offsets_.index) == set([1, 2, 3])
    assert algo.item_offsets_.loc[1:3].values == approx(np.array([0, 1.5, -1.5]))

    assert algo.user_offsets_ is None
コード例 #8
0
def test_bias_user_damp():
    algo = Bias(items=False, damping=5)
    algo.fit(simple_df)
    assert algo.mean_ == approx(3.5)
    assert algo.item_offsets_ is None

    assert algo.user_offsets_ is not None
    assert algo.user_offsets_.index.name == 'user'
    assert set(algo.user_offsets_.index) == set([10, 12, 13])
    assert algo.user_offsets_.loc[[10, 12, 13]].values == \
        approx(np.array([0.2857, -0.08333, -0.25]), abs=1.0e-4)
コード例 #9
0
def test_transform_user_without_user_bias():
    user = 12
    algo = Bias()
    algo.fit(simple_df)

    new_ratings = pd.Series([-0.5, 1.5], index=[2, 3]) # items as index and ratings as values

    v = algo.inverse_transform_user(user, new_ratings)

    assert v[2] == new_ratings[2] + algo.user_offsets_.loc[user] + algo.item_offsets_.loc[2] + algo.mean_
    assert v[3] == new_ratings[3] + algo.user_offsets_.loc[user] + algo.item_offsets_.loc[3] + algo.mean_
コード例 #10
0
def test_bias_check_arguments():
    # negative damping is not allowed
    with raises(ValueError):
        Bias(damping=-1)

    # negative user damping not allowed
    with raises(ValueError):
        Bias(damping=(-1, 5))

    # negative user damping not allowed
    with raises(ValueError):
        Bias(damping=(5, -1))
コード例 #11
0
def test_bias_transform_indexes():
    algo = Bias()
    ratings = ml_test.ratings

    normed = algo.fit_transform(ratings, indexes=True)

    assert all(normed['user'] == ratings['user'])
    assert all(normed['item'] == ratings['item'])
    assert all(normed['uidx'] == algo.user_offsets_.index.get_indexer(ratings['user']))
    assert all(normed['iidx'] == algo.item_offsets_.index.get_indexer(ratings['item']))
    denorm = algo.inverse_transform(normed)
    assert approx(denorm['rating'] == ratings['rating'], 1.0e-6)
コード例 #12
0
def test_bias_clone():
    algo = Bias()
    algo.fit(simple_df)

    params = algo.get_params()
    assert sorted(params.keys()) == ['damping', 'items', 'users']

    a2 = lku.clone(algo)
    assert a2 is not algo
    assert getattr(a2, 'mean_', None) is None
    assert getattr(a2, 'item_offsets_', None) is None
    assert getattr(a2, 'user_offsets_', None) is None
コード例 #13
0
def test_sweep_combine(tmp_path):
    work = pathlib.Path(tmp_path)
    sweep = batch.MultiEval(tmp_path, combine=False)

    ratings = ml_test.ratings
    sweep.add_datasets(lambda: xf.partition_users(ratings, 5, xf.SampleN(5)), name='ml-small')

    sweep.add_algorithms([Bias(damping=0), Bias(damping=5)],
                         attrs=['damping'])
    sweep.add_algorithms(Popular())

    sweep.persist_data()

    for i in range(1, 6):
        assert (work / 'ds{}-train.parquet'.format(i)).exists()
        assert (work / 'ds{}-test.parquet'.format(i)).exists()

    for ds, cf, dsa in sweep.datasets:
        assert isinstance(ds, tuple)
        train, test = ds
        assert isinstance(train, pathlib.Path)
        assert isinstance(test, pathlib.Path)

    assert sweep.run_count() == 5 * 3

    try:
        sweep.run()
    finally:
        if (work / 'runs.csv').exists():
            runs = pd.read_csv(work / 'runs.csv')
            print(runs)

    assert not (work / 'runs.csv').exists()
    assert not (work / 'runs.parquet').exists()
    assert not (work / 'predictions.parquet').exists()
    assert not (work / 'recommendations.parquet').exists()

    for i, (ds, a) in enumerate(sweep._flat_runs()):
        run = i + 1
        assert (work / 'run-{}.json'.format(run)).exists()
        if isinstance(a.algorithm, Predictor):
            assert (work / 'predictions-{}.parquet'.format(run)).exists()
        assert (work / 'recommendations-{}.parquet'.format(run)).exists()

    sweep.collect_results()

    assert (work / 'runs.csv').exists()
    assert (work / 'runs.parquet').exists()
    assert (work / 'predictions.parquet').exists()
    assert (work / 'recommendations.parquet').exists()

    runs = pd.read_parquet(work / 'runs.parquet')
    assert len(runs) == 5 * 3
コード例 #14
0
def test_bias_new_user_predict():
    algo = Bias()
    algo.fit(simple_df)

    ratings = pd.DataFrame({'item': [1, 2, 3], 'rating': [1.5, 2.5, 3.5]})
    ratings = ratings.set_index('item').rating
    p = algo.predict_for_user(None, [1, 3], ratings=ratings)

    offs = ratings - algo.mean_ - algo.item_offsets_
    umean = offs.mean()
    _log.info('user mean is %f', umean)

    assert len(p) == 2
    assert p.values == approx((algo.mean_ + algo.item_offsets_ + umean).loc[[1, 3]].values)
コード例 #15
0
def test_bias_full():
    algo = Bias()
    algo.fit(simple_df)
    assert algo.mean_ == approx(3.5)

    assert algo.item_offsets_ is not None
    assert algo.item_offsets_.index.name == 'item'
    assert set(algo.item_offsets_.index) == set([1, 2, 3])
    assert algo.item_offsets_.loc[1:3].values == approx(np.array([0, 1.5, -1.5]))

    assert algo.user_offsets_ is not None
    assert algo.user_offsets_.index.name == 'user'
    assert set(algo.user_offsets_.index) == set([10, 12, 13])
    assert algo.user_offsets_.loc[[10, 12, 13]].values == approx(np.array([0.25, -0.5, 0]))
コード例 #16
0
ファイル: test_bias.py プロジェクト: keener101/lkpy
def test_bias_transform():
    algo = Bias()
    ratings = ml_test.ratings

    normed = algo.fit_transform(ratings)

    assert all(normed['user'] == ratings['user'])
    assert all(normed['item'] == ratings['item'])
    denorm = algo.inverse_transform(normed)
    assert denorm['rating'].values == approx(ratings['rating'], 1.0e-6)

    n2 = ratings.join(algo.item_offsets_, on='item')
    n2 = n2.join(algo.user_offsets_, on='user')
    nr = n2.rating - algo.mean_ - n2.i_off - n2.u_off
    assert normed['rating'].values == approx(nr.values)
コード例 #17
0
def test_bias_damped():
    algo = Bias(damping=5)
    algo.fit(simple_df)
    assert algo.mean_ == approx(3.5)

    assert algo.item_offsets_ is not None
    assert algo.item_offsets_.index.name == 'item'
    assert set(algo.item_offsets_.index) == set([1, 2, 3])
    assert algo.item_offsets_.loc[1:3].values == approx(np.array([0, 0.25, -0.25]))

    assert algo.user_offsets_ is not None
    assert algo.user_offsets_.index.name == 'user'
    assert set(algo.user_offsets_.index) == set([10, 12, 13])
    assert algo.user_offsets_.loc[[10, 12, 13]].values == \
        approx(np.array([0.25, -00.08333, -0.20833]), abs=1.0e-4)
コード例 #18
0
def test_sweep_oneshot(tmp_path):
    work = pathlib.Path(tmp_path)
    sweep = batch.MultiEval(tmp_path, combine=False)

    ratings = ml_test.ratings
    sweep.add_datasets(lambda: xf.partition_users(ratings, 5, xf.SampleN(5)), name='ml-small')
    sweep.add_algorithms(Bias(damping=5))

    try:
        sweep.run(3)
    finally:
        if (work / 'runs.csv').exists():
            runs = pd.read_csv(work / 'runs.csv')
            print(runs)

    assert not (work / 'runs.csv').exists()
    assert not (work / 'runs.parquet').exists()
    assert not (work / 'predictions.parquet').exists()
    assert not (work / 'recommendations.parquet').exists()

    assert (work / 'run-3.json').exists()
    assert (work / 'predictions-3.parquet').exists()
    assert (work / 'recommendations-3.parquet').exists()

    with (work / 'run-3.json').open() as f:
        run = json.load(f)
    assert run['RunId'] == 3
コード例 #19
0
def test_sweep_nopreds(tmp_path):
    work = pathlib.Path(tmp_path)
    sweep = batch.MultiEval(tmp_path, eval_n_jobs=1)

    ratings = ml_test.ratings
    folds = [(train, test.drop(columns=['rating']))
             for (train, test) in xf.partition_users(ratings, 5, xf.SampleN(5))]
    sweep.add_datasets(folds, DataSet='ml-small')
    sweep.add_algorithms(Popular())
    sweep.add_algorithms(Bias(damping=0))

    try:
        sweep.run()
    finally:
        if (work / 'runs.csv').exists():
            runs = pd.read_csv(work / 'runs.csv')
            print(runs)

    assert (work / 'runs.csv').exists()
    assert (work / 'runs.parquet').exists()
    assert not (work / 'predictions.parquet').exists()
    assert (work / 'recommendations.parquet').exists()

    runs = pd.read_parquet(work / 'runs.parquet')
    # 2 algorithms by 5 partitions
    assert len(runs) == 10
    assert all(np.sort(runs.AlgoClass.unique()) == ['Bias', 'Popular'])
    bias_runs = runs[runs.AlgoClass == 'Bias']

    recs = pd.read_parquet(work / 'recommendations.parquet')
    assert all(recs.RunId.isin(runs.RunId))
    assert recs['score'].dtype == np.float64
コード例 #20
0
def test_sweep_norecs(tmp_path):
    work = pathlib.Path(tmp_path)
    sweep = batch.MultiEval(tmp_path, recommend=None)

    ratings = ml_test.ratings
    folds = xf.partition_users(ratings, 5, xf.SampleN(5))
    sweep.add_datasets(folds, DataSet='ml-small')
    sweep.add_algorithms(Bias(damping=0))
    sweep.add_algorithms(Popular())

    try:
        sweep.run()
    finally:
        if (work / 'runs.csv').exists():
            runs = pd.read_csv(work / 'runs.csv')
            print(runs)

    assert (work / 'runs.csv').exists()
    assert (work / 'runs.parquet').exists()
    assert (work / 'predictions.parquet').exists()
    assert not (work / 'recommendations.parquet').exists()

    runs = pd.read_parquet(work / 'runs.parquet')
    # 2 algorithms by 5 partitions
    assert len(runs) == 10
    assert all(np.sort(runs.AlgoClass.unique()) == ['Bias', 'Popular'])
    bias_runs = runs[runs.AlgoClass == 'Bias']

    preds = pd.read_parquet(work / 'predictions.parquet')
    assert all(preds.RunId.isin(bias_runs.RunId))
コード例 #21
0
def test_sweep_save(tmp_path):
    work = pathlib.Path(tmp_path)
    sweep = batch.MultiEval(tmp_path)

    ratings = ml_test.ratings
    sweep.add_datasets(lambda: xf.partition_users(ratings, 5, xf.SampleN(5)), name='ml-small')
    sweep.add_algorithms(Bias(damping=5))

    sweep.persist_data()
    pf = work / 'sweep.dat'
    with pf.open('wb') as f:
        pickle.dump(sweep, f)

    with pf.open('rb') as f:
        sweep = pickle.load(f)

    try:
        sweep.run()
    finally:
        if (work / 'runs.csv').exists():
            runs = pd.read_csv(work / 'runs.csv')
            print(runs)

    assert (work / 'runs.csv').exists()
    assert (work / 'runs.parquet').exists()
    assert (work / 'predictions.parquet').exists()
    assert (work / 'recommendations.parquet').exists()

    runs = pd.read_parquet(work / 'runs.parquet')
    # 1 algorithms by 5 partitions
    assert len(runs) == 5
コード例 #22
0
def test_fallback_train_one():
    algo = basic.Fallback(Bias())
    algo.fit(lktu.ml_test.ratings)
    assert len(algo.algorithms) == 1
    assert isinstance(algo.algorithms[0], Bias)
    assert algo.algorithms[0].mean_ == approx(
        lktu.ml_test.ratings.rating.mean())
コード例 #23
0
def test_bias_separate_damping():
    algo = Bias(damping=(5, 10))
    algo.fit(simple_df)
    assert algo.mean_ == approx(3.5)

    assert algo.item_offsets_ is not None
    assert algo.item_offsets_.index.name == 'item'
    assert set(algo.item_offsets_.index) == set([1, 2, 3])
    assert algo.item_offsets_.loc[1:3].values == \
        approx(np.array([0, 0.136364, -0.13636]), abs=1.0e-4)

    assert algo.user_offsets_ is not None
    assert algo.user_offsets_.index.name == 'user'
    assert set(algo.user_offsets_.index) == set([10, 12, 13])
    assert algo.user_offsets_.loc[[10, 12, 13]].values == \
        approx(np.array([0.266234, -0.08333, -0.22727]), abs=1.0e-4)
コード例 #24
0
def test_bias_batch_recommend(ml_folds: MLFolds, ncpus, isolate):
    algo = Bias(damping=5)
    algo = TopN(algo)
    ml_folds.isolate = isolate

    recs = ml_folds.eval_all(algo, n_jobs=ncpus)

    ml_folds.check_positive_ndcg(recs)
コード例 #25
0
def test_global_metric():
    import lenskit.crossfold as xf
    import lenskit.batch as batch
    from lenskit.algorithms.bias import Bias

    train, test = next(
        xf.sample_users(lktu.ml_test.ratings, 1, 200, xf.SampleFrac(0.5)))
    algo = Bias()
    algo.fit(train)

    preds = batch.predict(algo, test)

    rmse = pm.global_metric(preds)
    assert rmse == pm.rmse(preds.prediction, preds.rating)

    mae = pm.global_metric(preds, metric=pm.mae)
    assert mae == pm.mae(preds.prediction, preds.rating)
コード例 #26
0
ファイル: test_knn_item_item.py プロジェクト: mdekstrand/lkpy
def _build_predict(ratings, fold):
    algo = Fallback(knn.ItemItem(20), Bias(5))
    train = ratings[ratings['partition'] != fold]
    algo.fit(train)

    test = ratings[ratings['partition'] == fold]
    preds = batch.predict(algo, test, n_jobs=1)
    return preds
コード例 #27
0
def test_bias_train_ml_ratings():
    algo = Bias()
    ratings = ml_test.ratings
    algo.fit(ratings)

    assert algo.mean_ == approx(ratings.rating.mean())
    imeans_data = ratings.groupby('item').rating.mean()
    imeans_algo = algo.item_offsets_ + algo.mean_
    ares, data = imeans_algo.align(imeans_data)
    assert ares.values == approx(data.values)

    urates = ratings.set_index('user').loc[2].set_index('item').rating
    umean = (urates - imeans_data[urates.index]).mean()
    p = algo.predict_for_user(2, [10, 11, -1])
    assert len(p) == 3
    assert p.iloc[0] == approx(imeans_data.loc[10] + umean)
    assert p.iloc[1] == approx(imeans_data.loc[11] + umean)
    assert p.iloc[2] == approx(ratings.rating.mean() + umean)
コード例 #28
0
def test_fallback_list():
    algo = basic.Fallback([basic.Memorized(simple_df), Bias()])
    algo.fit(lktu.ml_test.ratings)
    assert len(algo.algorithms) == 2

    params = algo.get_params()
    assert list(params.keys()) == ['algorithms']
    assert len(params['algorithms']) == 2
    assert isinstance(params['algorithms'][0], basic.Memorized)
    assert isinstance(params['algorithms'][1], Bias)
コード例 #29
0
def test_fallback_clone():
    algo = basic.Fallback([basic.Memorized(simple_df), Bias()])
    algo.fit(lktu.ml_test.ratings)
    assert len(algo.algorithms) == 2

    clone = lku.clone(algo)
    assert clone is not algo
    for a1, a2 in zip(algo.algorithms, clone.algorithms):
        assert a1 is not a2
        assert type(a2) == type(a1)
コード例 #30
0
ファイル: test_batch_sweep.py プロジェクト: hhhhzy/lkpy
def test_sweep_filenames(tmp_path):
    work = pathlib.Path(tmp_path)
    sweep = batch.MultiEval(tmp_path)

    ratings = ml_test.ratings
    folds = []
    for part, (train,
               test) in enumerate(xf.partition_users(ratings, 2,
                                                     xf.SampleN(5))):
        trfn = work / 'p{}-train.csv'.format(part)
        tefn = work / 'p{}-test.csv'.format(part)
        train.to_csv(trfn)
        test.to_csv(tefn)
        folds.append((trfn, tefn))

    sweep.add_datasets(folds, DataSet='ml-small')
    sweep.add_algorithms(
        [Bias(damping=0), Bias(damping=5),
         Bias(damping=10)],
        attrs=['damping'])
    sweep.add_algorithms(Popular())

    def progress(iter, total=None):
        assert total == len(folds) * 4
        return iter

    try:
        sweep.run(progress=progress)
    finally:
        if (work / 'runs.csv').exists():
            runs = pd.read_csv(work / 'runs.csv')
            print(runs)

    assert (work / 'runs.csv').exists()
    assert (work / 'runs.parquet').exists()
    assert (work / 'predictions.parquet').exists()
    assert (work / 'recommendations.parquet').exists()

    runs = pd.read_parquet(work / 'runs.parquet')
    # 4 algorithms by 2 partitions
    assert len(runs) == 8