def test_memorized_batch_missing(): algo = basic.Memorized(simple_df) preds = algo.predict(pd.DataFrame({'user': [10, 12, 12], 'item': [1, 1, 3]})) assert set(preds.index) == set([0, 1, 2]) assert all(preds.iloc[:2] == [4.0, 3.0]) assert np.isnan(preds.iloc[2])
def test_fallback_predict(): algo = basic.Fallback(basic.Memorized(simple_df), basic.Bias()) algo.fit(lktu.ml_pandas.renamed.ratings) assert len(algo.algorithms) == 2 bias = algo.algorithms[1] assert isinstance(bias, basic.Bias) assert bias.mean_ == approx(lktu.ml_pandas.ratings.rating.mean()) # first user + item preds = algo.predict_for_user(10, [1]) assert preds.loc[1] == 4.0 # second user + first item preds = algo.predict_for_user(15, [1]) assert preds.loc[1] == approx(bias.mean_ + bias.user_offsets_.loc[15] + bias.item_offsets_.loc[1]) # second item + user item preds = algo.predict_for_user(12, [2]) assert preds.loc[2] == approx(bias.mean_ + bias.user_offsets_.loc[12] + bias.item_offsets_.loc[2]) # blended preds = algo.predict_for_user(10, [1, 5]) assert preds.loc[1] == 4.0 assert preds.loc[5] == approx(bias.mean_ + bias.user_offsets_.loc[10] + bias.item_offsets_.loc[5]) # blended unknown preds = algo.predict_for_user(10, [5, 1, -23081]) assert len(preds) == 3 assert preds.loc[1] == 4.0 assert preds.loc[5] == approx(bias.mean_ + bias.user_offsets_.loc[10] + bias.item_offsets_.loc[5]) assert preds.loc[-23081] == approx(bias.mean_ + bias.user_offsets_.loc[10])
def test_memorized_batch_keep_index(): algo = basic.Memorized(simple_df) query = pd.DataFrame({'user': [10, 10, 12], 'item': [1, 2, 1]}, index=np.random.choice(np.arange(10), 3, False)) preds = algo.predict(query) assert all(preds.index == query.index) assert all(preds == [4.0, 5.0, 3.0])
def test_memorized_batch(): algo = basic.Memorized(simple_df) preds = algo.predict(pd.DataFrame({'user': [10, 10, 12], 'item': [1, 2, 1]})) assert isinstance(preds, pd.Series) assert preds.name == 'prediction' assert set(preds.index) == set([0, 1, 2]) assert all(preds == [4.0, 5.0, 3.0])
def test_fallback_save_load(tmp_path): tmp_path = lktu.norm_path(tmp_path) original = basic.Fallback(basic.Memorized(simple_df), basic.Bias()) original.fit(lktu.ml_pandas.renamed.ratings) fn = tmp_path / 'fallback' original.save(fn) algo = basic.Fallback(basic.Memorized(simple_df), basic.Bias()) algo.load(fn) bias = algo.algorithms[1] assert bias.mean_ == approx(lktu.ml_pandas.ratings.rating.mean()) def exp_val(user, item): v = bias.mean_ if user is not None: v += bias.user_offsets_.loc[user] if item is not None: v += bias.item_offsets_.loc[item] return v # first user + item preds = algo.predict_for_user(10, [1]) assert preds.loc[1] == 4.0 # second user + first item preds = algo.predict_for_user(15, [1]) assert preds.loc[1] == approx(exp_val(15, 1)) # second item + user item preds = algo.predict_for_user(12, [2]) assert preds.loc[2] == approx(exp_val(12, 2)) # blended preds = algo.predict_for_user(10, [1, 5]) assert preds.loc[1] == 4.0 assert preds.loc[5] == approx(exp_val(10, 5)) # blended unknown preds = algo.predict_for_user(10, [5, 1, -23081]) assert len(preds) == 3 assert preds.loc[1] == 4.0 assert preds.loc[5] == approx(exp_val(10, 5)) assert preds.loc[-23081] == approx(exp_val(10, None))
def test_memorized_batch_ord(): algo = basic.Memorized(simple_df) preds = algo.predict( pd.DataFrame({ 'user': [10, 12, 10], 'item': [1, 1, 2] })) assert set(preds.index) == set([0, 1, 2]) assert all(preds == [4.0, 3.0, 5.0])
def test_fallback_clone(): algo = basic.Fallback([basic.Memorized(simple_df), basic.Bias()]) algo.fit(lktu.ml_test.ratings) assert len(algo.algorithms) == 2 clone = lku.clone(algo) assert clone is not algo for a1, a2 in zip(algo.algorithms, clone.algorithms): assert a1 is not a2 assert type(a2) == type(a1)
def test_fallback_list(): algo = basic.Fallback([basic.Memorized(simple_df), basic.Bias()]) algo.fit(lktu.ml_test.ratings) assert len(algo.algorithms) == 2 params = algo.get_params() assert list(params.keys()) == ['algorithms'] assert len(params['algorithms']) == 2 assert isinstance(params['algorithms'][0], basic.Memorized) assert isinstance(params['algorithms'][1], basic.Bias)
def test_memorized(): algo = basic.Memorized(simple_df) preds = algo.predict_for_user(10, [1, 2]) assert set(preds.index) == set([1, 2]) assert all(preds == pd.Series({1: 4.0, 2: 5.0})) preds = algo.predict_for_user(12, [1, 3]) assert set(preds.index) == set([1, 3]) assert preds.loc[1] == 3.0 assert np.isnan(preds.loc[3])
def test_fallback_save_load(tmp_path): tmp_path = lktu.norm_path(tmp_path) original = basic.Fallback(basic.Memorized(simple_df), basic.Bias()) original.fit(lktu.ml_pandas.renamed.ratings) fn = tmp_path / 'fallback' original.save(fn) algo = basic.Fallback(basic.Memorized(simple_df), basic.Bias()) algo.load(fn) bias = algo.algorithms[1] assert bias.mean_ == approx(lktu.ml_pandas.ratings.rating.mean()) # first user + item preds = algo.predict_for_user(10, [1]) assert preds.loc[1] == 4.0 # second user + first item preds = algo.predict_for_user(15, [1]) assert preds.loc[1] == approx(bias.mean_ + bias.user_offsets_.loc[15] + bias.item_offsets_.loc[1]) # second item + user item preds = algo.predict_for_user(12, [2]) assert preds.loc[2] == approx(bias.mean_ + bias.user_offsets_.loc[12] + bias.item_offsets_.loc[2]) # blended preds = algo.predict_for_user(10, [1, 5]) assert preds.loc[1] == 4.0 assert preds.loc[5] == approx(bias.mean_ + bias.user_offsets_.loc[10] + bias.item_offsets_.loc[5]) # blended unknown preds = algo.predict_for_user(10, [5, 1, -23081]) assert len(preds) == 3 assert preds.loc[1] == 4.0 assert preds.loc[5] == approx(bias.mean_ + bias.user_offsets_.loc[10] + bias.item_offsets_.loc[5]) assert preds.loc[-23081] == approx(bias.mean_ + bias.user_offsets_.loc[10])
def test_fallback_train_one_pred_impossible(): algo = basic.Fallback(basic.Memorized(simple_df)) algo.fit(lktu.ml_test.ratings) preds = algo.predict_for_user(10, [1, 2]) assert set(preds.index) == set([1, 2]) assert all(preds == pd.Series({1: 4.0, 2: 5.0})) preds = algo.predict_for_user(12, [1, 3]) assert set(preds.index) == set([1, 3]) assert preds.loc[1] == 3.0 assert np.isnan(preds.loc[3])
def test_topn_recommend(): pred = basic.Memorized(simple_df) rec = basic.TopN(pred) rec10 = rec.recommend(10, candidates=[1, 2]) assert all(rec10.item == [2, 1]) assert all(rec10.score == [5, 4]) rec2 = rec.recommend(12, candidates=[1, 2]) assert len(rec2) == 1 assert all(rec2.item == [1]) assert all(rec2.score == [3]) rec10 = rec.recommend(10, n=1, candidates=[1, 2]) assert len(rec10) == 1 assert all(rec10.item == [2]) assert all(rec10.score == [5])
def test_fallback_save_load(tmp_path): original = basic.Fallback(basic.Memorized(simple_df), basic.Bias()) original.fit(lktu.ml_test.ratings) fn = tmp_path / 'fb.mod' with fn.open('wb') as f: pickle.dump(original, f) with fn.open('rb') as f: algo = pickle.load(f) bias = algo.algorithms[1] assert bias.mean_ == approx(lktu.ml_test.ratings.rating.mean()) def exp_val(user, item): v = bias.mean_ if user is not None: v += bias.user_offsets_.loc[user] if item is not None: v += bias.item_offsets_.loc[item] return v # first user + item preds = algo.predict_for_user(10, [1]) assert preds.loc[1] == 4.0 # second user + first item preds = algo.predict_for_user(15, [1]) assert preds.loc[1] == approx(exp_val(15, 1)) # second item + user item preds = algo.predict_for_user(12, [2]) assert preds.loc[2] == approx(exp_val(12, 2)) # blended preds = algo.predict_for_user(10, [1, 5]) assert preds.loc[1] == 4.0 assert preds.loc[5] == approx(exp_val(10, 5)) # blended unknown preds = algo.predict_for_user(10, [5, 1, -23081]) assert len(preds) == 3 assert preds.loc[1] == 4.0 assert preds.loc[5] == approx(exp_val(10, 5)) assert preds.loc[-23081] == approx(exp_val(10, None))
def test_fallback_predict(): algo = basic.Fallback(basic.Memorized(simple_df), basic.Bias()) algo.fit(lktu.ml_test.ratings) assert len(algo.algorithms) == 2 bias = algo.algorithms[1] assert isinstance(bias, basic.Bias) assert bias.mean_ == approx(lktu.ml_test.ratings.rating.mean()) def exp_val(user, item): v = bias.mean_ if user is not None: v += bias.user_offsets_.loc[user] if item is not None: v += bias.item_offsets_.loc[item] return v # first user + item preds = algo.predict_for_user(10, [1]) assert preds.loc[1] == 4.0 # second user + first item preds = algo.predict_for_user(15, [1]) assert preds.loc[1] == approx(exp_val(15, 1)) # second item + user item preds = algo.predict_for_user(12, [2]) assert preds.loc[2] == approx(exp_val(12, 2)) # blended preds = algo.predict_for_user(10, [1, 5]) assert preds.loc[1] == 4.0 assert preds.loc[5] == approx(exp_val(10, 5)) # blended unknown preds = algo.predict_for_user(10, [5, 1, -23081]) assert len(preds) == 3 assert preds.loc[1] == 4.0 assert preds.loc[5] == approx(exp_val(10, 5)) assert preds.loc[-23081] == approx(exp_val(10, None))
def test_topn_config(): pred = basic.Memorized(simple_df) rec = basic.TopN(pred) rs = str(rec) assert rs.startswith('TopN/')
def test_fallback_string(): algo = basic.Fallback([basic.Memorized(simple_df), basic.Bias()]) assert 'Fallback' in str(algo)