def test_hpf_train_large(tmp_path): algo = hpf.HPF(20) ratings = lktu.ml_pandas.renamed.ratings ratings = ratings.assign(rating=ratings.rating + 0.5) algo.fit(ratings) assert algo.n_users == ratings.user.nunique() assert algo.n_items == ratings.item.nunique() mfile = tmp_path / 'hpf.dat' with mfile.open('wb') as mf: pickle.dump(algo, mf) with mfile.open('rb') as mf: a2 = pickle.load(mf) assert a2.n_users == algo.n_users assert a2.n_items == algo.n_items csel = basic.UnratedItemCandidateSelector() csel.fit(ratings) rec = basic.TopN(algo, csel) for u in np.random.choice(ratings.user.unique(), size=50, replace=False): recs = rec.recommend(u, 50) assert len(recs) == 50 assert recs.item.nunique() == 50
def test_unrated_selector(): sel = basic.UnratedItemCandidateSelector() s2 = sel.fit(simple_df) assert s2 is sel assert set(sel.candidates(10)) == set([3]) assert set(sel.candidates(12)) == set([3, 2]) assert set(sel.candidates(11)) == set([1, 2, 3])
def test_unrated_big(): ratings = lktu.ml_test.ratings users = ratings.user.unique() items = ratings.item.unique() user_items = ratings.set_index('user').item sel = basic.UnratedItemCandidateSelector() s2 = sel.fit(ratings) assert s2 is sel # test 100 random users for u in np.random.choice(users, 100, False): candidates = sel.candidates(u) candidates = pd.Series(candidates) uis = user_items.loc[u] assert len(uis) + len(candidates) == len(items) assert candidates.nunique() == len(candidates) assert all(~candidates.isin(uis))
def test_unrated_override(): sel = basic.UnratedItemCandidateSelector() sel.fit(simple_df) assert set(sel.candidates(10, [2])) == set([1, 3])