Ejemplo n.º 1
0
def test_als_binpickle(tmp_path):
    "Test saving ALS with BinPickle"

    original = als.BiasedMF(20, iterations=5, method='lu')
    ratings = lktu.ml_test.ratings
    original.fit(ratings)

    assert original.global_bias_ == approx(ratings.rating.mean())

    file = tmp_path / 'als.bpk'
    binpickle.dump(original, file)

    with binpickle.BinPickleFile(file) as bpf:
        # the pickle data should be small
        _log.info('serialized to %d pickle bytes', bpf.entries[-1].dec_length)
        pickle_dis(bpf._read_buffer(bpf.entries[-1]))
        assert bpf.entries[-1].dec_length < 1024

        algo = bpf.load()

        assert algo.global_bias_ == original.global_bias_
        assert np.all(algo.user_bias_ == original.user_bias_)
        assert np.all(algo.item_bias_ == original.item_bias_)
        assert np.all(algo.user_features_ == original.user_features_)
        assert np.all(algo.item_features_ == original.item_features_)
        assert np.all(algo.item_index_ == original.item_index_)
        assert np.all(algo.user_index_ == original.user_index_)
Ejemplo n.º 2
0
def test_tf_ibmf_general(tmp_path, tf_session):
    "Training, saving, loading, and using an integrated bias model."
    fn = tmp_path / 'bias.bpk'
    ratings = lktu.ml_test.ratings

    original = lktf.IntegratedBiasMF(20,
                                     batch_size=1024,
                                     epochs=20,
                                     rng_spec=42)
    original.fit(ratings)

    ue = original.model.get_layer('user-embed')
    assert ue.get_weights()[0].shape == (ratings.user.nunique(), 20)
    ie = original.model.get_layer('item-embed')
    assert ie.get_weights()[0].shape == (ratings.item.nunique(), 20)

    binpickle.dump(original, fn)

    _log.info('serialized to %d bytes', fn.stat().st_size)
    algo = binpickle.load(fn)

    # does predicting work?
    preds = algo.predict_for_user(100, [5, 10, 30])
    assert all(preds.notna())

    # can we include a nonexistent item?
    preds = algo.predict_for_user(100, [5, 10, 230413804])
    assert len(preds) == 3
    assert all(preds.loc[[230413804]].isna())
    assert preds.isna().sum() == 1
Ejemplo n.º 3
0
def test_als_binpickle(tmp_path):
    "Test saving ALS with BinPickle"

    original = als.BiasedMF(20, iterations=5, method='lu')
    ratings = lktu.ml_test.ratings
    original.fit(ratings)

    assert original.bias.mean_ == approx(ratings.rating.mean())

    file = tmp_path / 'als.bpk'
    binpickle.dump(original, file)

    with binpickle.BinPickleFile(file) as bpf:
        # the pickle data should be small
        _log.info('serialized to %d pickle bytes', bpf.entries[-1].dec_length)
        pickle_dis(bpf._read_buffer(bpf.entries[-1]))
        assert bpf.entries[-1].dec_length < 2048

        algo = bpf.load()

        assert algo.bias.mean_ == original.bias.mean_
        assert np.all(algo.bias.user_offsets_ == original.bias.user_offsets_)
        assert np.all(algo.bias.item_offsets_ == original.bias.item_offsets_)
        assert np.all(algo.user_features_ == original.user_features_)
        assert np.all(algo.item_features_ == original.item_features_)
        assert np.all(algo.item_index_ == original.item_index_)
        assert np.all(algo.user_index_ == original.user_index_)

        # make sure it still works
        preds = algo.predict_for_user(10, np.arange(0, 50, dtype='i8'))
        assert len(preds) == 50
Ejemplo n.º 4
0
def store(data, file_name, sharingmode=True):
    full_file_name = Path(get_value("models_folder_path")) / file_name

    if full_file_name.exists():
        os.remove(full_file_name)

    if sharingmode:
        with sharing_mode():
            dump(data, full_file_name, mappable=True)
    else:
        dump(data, full_file_name)
Ejemplo n.º 5
0
def store_model(data, file_name, sharingmode=True):
    if not file_name.count(extension):
        file_name += extension
    full_file_name = Path(directory_path) / file_name

    if full_file_name.exists():
        os.remove(full_file_name)

    if sharingmode:
        with sharing_mode():
            dump(data, full_file_name, mappable=True)
    else:
        dump(data, full_file_name)
Ejemplo n.º 6
0
def test_als_save_load(tmp_path):
    "Test saving and loading ALS models, and regularized training."
    algo = als.ImplicitMF(5, iterations=5, reg=(2, 1))
    ratings = lktu.ml_test.ratings
    algo.fit(ratings)

    fn = tmp_path / 'model.bpk'
    binpickle.dump(algo, fn, codec=None)

    restored = binpickle.load(fn)
    assert np.all(restored.user_features_ == algo.user_features_)
    assert np.all(restored.item_features_ == algo.item_features_)
    assert np.all(restored.item_index_ == algo.item_index_)
    assert np.all(restored.user_index_ == algo.user_index_)
Ejemplo n.º 7
0
def store(data, file_name, sharingmode=True):
    models_folder_path = get_value("models_folder_path")
    full_file_name = Path(models_folder_path) / file_name

    if full_file_name.exists():
        os.remove(full_file_name)

    if not os.path.exists(models_folder_path):
        os.makedirs(models_folder_path)

    if sharingmode:
        with sharing_mode():
            dump(data, full_file_name, mappable=True)
    else:
        dump(data, full_file_name)
Ejemplo n.º 8
0
def test_fallback_save_load(tmp_path):
    original = basic.Fallback(basic.Memorized(simple_df), basic.Bias())
    original.fit(lktu.ml_test.ratings)

    fn = tmp_path / 'fb.mod'

    binpickle.dump(original, fn)

    algo = binpickle.load(fn)

    bias = algo.algorithms[1]
    assert bias.mean_ == approx(lktu.ml_test.ratings.rating.mean())

    def exp_val(user, item):
        v = bias.mean_
        if user is not None:
            v += bias.user_offsets_.loc[user]
        if item is not None:
            v += bias.item_offsets_.loc[item]
        return v

    # first user + item
    preds = algo.predict_for_user(10, [1])
    assert preds.loc[1] == 4.0
    # second user + first item
    preds = algo.predict_for_user(15, [1])
    assert preds.loc[1] == approx(exp_val(15, 1))

    # second item + user item
    preds = algo.predict_for_user(12, [2])
    assert preds.loc[2] == approx(exp_val(12, 2))

    # blended
    preds = algo.predict_for_user(10, [1, 5])
    assert preds.loc[1] == 4.0
    assert preds.loc[5] == approx(exp_val(10, 5))

    # blended unknown
    preds = algo.predict_for_user(10, [5, 1, -23081])
    assert len(preds) == 3
    assert preds.loc[1] == 4.0
    assert preds.loc[5] == approx(exp_val(10, 5))
    assert preds.loc[-23081] == approx(exp_val(10, None))
Ejemplo n.º 9
0
def test_bias_binpickle(tmp_path):
    original = Bias(damping=5)
    original.fit(simple_df)
    assert original.mean_ == approx(3.5)

    _log.info('saving baseline model')
    fn = tmp_path / 'bias.bpk'
    binpickle.dump(original, fn)
    algo = binpickle.load(fn)

    assert algo.mean_ == original.mean_

    assert algo.item_offsets_ is not None
    assert algo.item_offsets_.index.name == 'item'
    assert set(algo.item_offsets_.index) == set([1, 2, 3])
    assert algo.item_offsets_.loc[1:3].values == approx(np.array([0, 0.25, -0.25]))

    assert algo.user_offsets_ is not None
    assert algo.user_offsets_.index.name == 'user'
    assert set(algo.user_offsets_.index) == set([10, 12, 13])
    assert algo.user_offsets_.loc[[10, 12, 13]].values == \
        approx(np.array([0.25, -00.08333, -0.20833]), abs=1.0e-4)
Ejemplo n.º 10
0
def test_tf_bmf_save_load(tmp_path, tf_session):
    "Training, saving, and loading a bias model."
    fn = tmp_path / 'bias.bpk'
    ratings = lktu.ml_test.ratings

    original = lktf.BiasedMF(20, batch_size=1024, epochs=20)
    original.fit(ratings)
    assert original.user_features_.shape == (ratings.user.nunique(), 20)
    assert original.item_features_.shape == (ratings.item.nunique(), 20)

    binpickle.dump(original, fn)

    _log.info('serialized to %d bytes', fn.stat().st_size)
    algo = binpickle.load(fn)

    assert algo.bias.mean_ == original.bias.mean_
    assert np.all(algo.bias.user_offsets_ == original.bias.user_offsets_)
    assert np.all(algo.bias.item_offsets_ == original.bias.item_offsets_)
    assert np.all(algo.user_features_ == original.user_features_)
    assert np.all(algo.item_features_ == original.item_features_)

    preds = algo.predict_for_user(100, [5, 10, 30])
    assert all(preds.notna())