def test_als_binpickle(tmp_path): "Test saving ALS with BinPickle" original = als.BiasedMF(20, iterations=5, method='lu') ratings = lktu.ml_test.ratings original.fit(ratings) assert original.global_bias_ == approx(ratings.rating.mean()) file = tmp_path / 'als.bpk' binpickle.dump(original, file) with binpickle.BinPickleFile(file) as bpf: # the pickle data should be small _log.info('serialized to %d pickle bytes', bpf.entries[-1].dec_length) pickle_dis(bpf._read_buffer(bpf.entries[-1])) assert bpf.entries[-1].dec_length < 1024 algo = bpf.load() assert algo.global_bias_ == original.global_bias_ assert np.all(algo.user_bias_ == original.user_bias_) assert np.all(algo.item_bias_ == original.item_bias_) assert np.all(algo.user_features_ == original.user_features_) assert np.all(algo.item_features_ == original.item_features_) assert np.all(algo.item_index_ == original.item_index_) assert np.all(algo.user_index_ == original.user_index_)
def test_tf_ibmf_general(tmp_path, tf_session): "Training, saving, loading, and using an integrated bias model." fn = tmp_path / 'bias.bpk' ratings = lktu.ml_test.ratings original = lktf.IntegratedBiasMF(20, batch_size=1024, epochs=20, rng_spec=42) original.fit(ratings) ue = original.model.get_layer('user-embed') assert ue.get_weights()[0].shape == (ratings.user.nunique(), 20) ie = original.model.get_layer('item-embed') assert ie.get_weights()[0].shape == (ratings.item.nunique(), 20) binpickle.dump(original, fn) _log.info('serialized to %d bytes', fn.stat().st_size) algo = binpickle.load(fn) # does predicting work? preds = algo.predict_for_user(100, [5, 10, 30]) assert all(preds.notna()) # can we include a nonexistent item? preds = algo.predict_for_user(100, [5, 10, 230413804]) assert len(preds) == 3 assert all(preds.loc[[230413804]].isna()) assert preds.isna().sum() == 1
def test_als_binpickle(tmp_path): "Test saving ALS with BinPickle" original = als.BiasedMF(20, iterations=5, method='lu') ratings = lktu.ml_test.ratings original.fit(ratings) assert original.bias.mean_ == approx(ratings.rating.mean()) file = tmp_path / 'als.bpk' binpickle.dump(original, file) with binpickle.BinPickleFile(file) as bpf: # the pickle data should be small _log.info('serialized to %d pickle bytes', bpf.entries[-1].dec_length) pickle_dis(bpf._read_buffer(bpf.entries[-1])) assert bpf.entries[-1].dec_length < 2048 algo = bpf.load() assert algo.bias.mean_ == original.bias.mean_ assert np.all(algo.bias.user_offsets_ == original.bias.user_offsets_) assert np.all(algo.bias.item_offsets_ == original.bias.item_offsets_) assert np.all(algo.user_features_ == original.user_features_) assert np.all(algo.item_features_ == original.item_features_) assert np.all(algo.item_index_ == original.item_index_) assert np.all(algo.user_index_ == original.user_index_) # make sure it still works preds = algo.predict_for_user(10, np.arange(0, 50, dtype='i8')) assert len(preds) == 50
def store(data, file_name, sharingmode=True): full_file_name = Path(get_value("models_folder_path")) / file_name if full_file_name.exists(): os.remove(full_file_name) if sharingmode: with sharing_mode(): dump(data, full_file_name, mappable=True) else: dump(data, full_file_name)
def store_model(data, file_name, sharingmode=True): if not file_name.count(extension): file_name += extension full_file_name = Path(directory_path) / file_name if full_file_name.exists(): os.remove(full_file_name) if sharingmode: with sharing_mode(): dump(data, full_file_name, mappable=True) else: dump(data, full_file_name)
def test_als_save_load(tmp_path): "Test saving and loading ALS models, and regularized training." algo = als.ImplicitMF(5, iterations=5, reg=(2, 1)) ratings = lktu.ml_test.ratings algo.fit(ratings) fn = tmp_path / 'model.bpk' binpickle.dump(algo, fn, codec=None) restored = binpickle.load(fn) assert np.all(restored.user_features_ == algo.user_features_) assert np.all(restored.item_features_ == algo.item_features_) assert np.all(restored.item_index_ == algo.item_index_) assert np.all(restored.user_index_ == algo.user_index_)
def store(data, file_name, sharingmode=True): models_folder_path = get_value("models_folder_path") full_file_name = Path(models_folder_path) / file_name if full_file_name.exists(): os.remove(full_file_name) if not os.path.exists(models_folder_path): os.makedirs(models_folder_path) if sharingmode: with sharing_mode(): dump(data, full_file_name, mappable=True) else: dump(data, full_file_name)
def test_fallback_save_load(tmp_path): original = basic.Fallback(basic.Memorized(simple_df), basic.Bias()) original.fit(lktu.ml_test.ratings) fn = tmp_path / 'fb.mod' binpickle.dump(original, fn) algo = binpickle.load(fn) bias = algo.algorithms[1] assert bias.mean_ == approx(lktu.ml_test.ratings.rating.mean()) def exp_val(user, item): v = bias.mean_ if user is not None: v += bias.user_offsets_.loc[user] if item is not None: v += bias.item_offsets_.loc[item] return v # first user + item preds = algo.predict_for_user(10, [1]) assert preds.loc[1] == 4.0 # second user + first item preds = algo.predict_for_user(15, [1]) assert preds.loc[1] == approx(exp_val(15, 1)) # second item + user item preds = algo.predict_for_user(12, [2]) assert preds.loc[2] == approx(exp_val(12, 2)) # blended preds = algo.predict_for_user(10, [1, 5]) assert preds.loc[1] == 4.0 assert preds.loc[5] == approx(exp_val(10, 5)) # blended unknown preds = algo.predict_for_user(10, [5, 1, -23081]) assert len(preds) == 3 assert preds.loc[1] == 4.0 assert preds.loc[5] == approx(exp_val(10, 5)) assert preds.loc[-23081] == approx(exp_val(10, None))
def test_bias_binpickle(tmp_path): original = Bias(damping=5) original.fit(simple_df) assert original.mean_ == approx(3.5) _log.info('saving baseline model') fn = tmp_path / 'bias.bpk' binpickle.dump(original, fn) algo = binpickle.load(fn) assert algo.mean_ == original.mean_ assert algo.item_offsets_ is not None assert algo.item_offsets_.index.name == 'item' assert set(algo.item_offsets_.index) == set([1, 2, 3]) assert algo.item_offsets_.loc[1:3].values == approx(np.array([0, 0.25, -0.25])) assert algo.user_offsets_ is not None assert algo.user_offsets_.index.name == 'user' assert set(algo.user_offsets_.index) == set([10, 12, 13]) assert algo.user_offsets_.loc[[10, 12, 13]].values == \ approx(np.array([0.25, -00.08333, -0.20833]), abs=1.0e-4)
def test_tf_bmf_save_load(tmp_path, tf_session): "Training, saving, and loading a bias model." fn = tmp_path / 'bias.bpk' ratings = lktu.ml_test.ratings original = lktf.BiasedMF(20, batch_size=1024, epochs=20) original.fit(ratings) assert original.user_features_.shape == (ratings.user.nunique(), 20) assert original.item_features_.shape == (ratings.item.nunique(), 20) binpickle.dump(original, fn) _log.info('serialized to %d bytes', fn.stat().st_size) algo = binpickle.load(fn) assert algo.bias.mean_ == original.bias.mean_ assert np.all(algo.bias.user_offsets_ == original.bias.user_offsets_) assert np.all(algo.bias.item_offsets_ == original.bias.item_offsets_) assert np.all(algo.user_features_ == original.user_features_) assert np.all(algo.item_features_ == original.item_features_) preds = algo.predict_for_user(100, [5, 10, 30]) assert all(preds.notna())