def test_restriction_local(U: int, I: int) -> None: try: from sklearn.metrics import ndcg_score except: pytest.skip() rns = np.random.RandomState(42) recommendables: List[np.ndarray] = [] for _ in range(U): recommendables.append( rns.choice(np.arange(I), replace=False, size=rns.randint(2, I)) ) scores = rns.randn(U, I) X_gt = (rns.rand(U, I) >= 0.3).astype(np.float64) eval = Evaluator( sps.csr_matrix(X_gt), offset=0, cutoff=I, n_threads=1, per_user_recommendable_items=recommendables, ) # empty mask mock_rec = MockRecommender(sps.csr_matrix(X_gt.shape), scores) my_score = eval.get_score(mock_rec) sklearn_metrics = defaultdict(list) for i in range(scores.shape[0]): if X_gt[i, recommendables[i]].sum() == 0: continue ndcg = ndcg_score( X_gt[i, recommendables[i]][None, :], scores[i, recommendables[i]][None, :] ) sklearn_metrics["ndcg"].append(ndcg) assert my_score["ndcg"] == pytest.approx(np.mean(sklearn_metrics["ndcg"]), abs=1e-8)
def test_recs(RecommenderClass: Type[BaseRecommender]) -> None: """Test the learning of recommenders exit normally, and they are picklable. Args: RecommenderClass (Type[BaseRecommender]): The recommender class to be tested. """ rec = RecommenderClass(X_train) rec.learn() scores = rec.get_score(np.arange(X_train.shape[0])) eval = Evaluator(X_test, 0, 20) with pytest.raises(ValueError): eval.get_score(rec) metrics = eval.get_scores(rec, cutoffs=[X_train.shape[1]]) assert np.all(np.isfinite(scores)) assert np.all(~np.isnan(scores)) for value in metrics.values(): assert ~np.isnan(value) assert np.isfinite(value) with open("temp.pkl", "wb") as ofs: pickle.dump(rec, ofs) with open("temp.pkl", "rb") as ifs: rec_dumped: BaseRecommender = pickle.load(ifs) score_from_dumped = rec_dumped.get_score(np.arange(X_train.shape[0])) np.testing.assert_allclose(scores, score_from_dumped)
def test_restriction_global(U: int, I: int, R: int) -> None: rns = np.random.RandomState(42) recommendable = rns.choice(np.arange(I), replace=False, size=R) scores = rns.randn(U, I) X_gt = (rns.rand(U, I) >= 0.3).astype(np.float64) eval = Evaluator( sps.csr_matrix(X_gt), offset=0, cutoff=I, n_threads=1, recommendable_items=recommendable, ) # empty mask mock_rec = MockRecommender(sps.csr_matrix(X_gt.shape), scores) my_score = eval.get_score(mock_rec) sklearn_metrics = defaultdict(list) for i in range(scores.shape[0]): if X_gt[i, recommendable].sum() == 0: continue ndcg = ndcg_score(X_gt[i, recommendable][None, :], scores[i, recommendable][None, :]) sklearn_metrics["ndcg"].append(ndcg) assert my_score["ndcg"] == pytest.approx(np.mean(sklearn_metrics["ndcg"]), abs=1e-8)
def test_recommender_check(U: int, I: int, C: int) -> None: rns = np.random.RandomState(42) scores = rns.randn(U, I) X_gt = (rns.rand(U, I) >= 0.3).astype(np.float64) eval = Evaluator(sps.csr_matrix(X_gt), offset=0, cutoff=C, n_threads=2) mock_rec_too_few_users = MockRecommender(sps.csr_matrix((U - 1, I)), scores[1:]) with pytest.raises(ValueError): eval.get_score(mock_rec_too_few_users) mock_rec_too_few_items = MockRecommender(sps.csr_matrix((U, I - 1)), scores[:, 1:]) with pytest.raises(ValueError): eval.get_score(mock_rec_too_few_items) moc_rec_valid = MockRecommender(sps.csr_matrix((U, I)), scores) eval.get_score(moc_rec_valid)
def test_metrics(U: int, I: int) -> None: rns = np.random.RandomState(42) scores = rns.randn(U, I) X_gt = (rns.rand(U, I) >= 0.3).astype(np.float64) eval = Evaluator(sps.csr_matrix(X_gt), offset=0, cutoff=I, n_threads=4) # empty mask mock_rec = MockRecommender(sps.csr_matrix(X_gt.shape), scores) my_score = eval.get_score(mock_rec) sklearn_metrics = defaultdict(list) for i in range(scores.shape[0]): if X_gt[i].sum() == 0: continue sklearn_metrics["map"].append( average_precision_score(X_gt[i], scores[i])) sklearn_metrics["ndcg"].append( ndcg_score(X_gt[i][None, :], scores[i][None, :])) for key in ["map", "ndcg"]: assert my_score[key] == pytest.approx(np.mean(sklearn_metrics[key]), abs=1e-8)
def test_optimizer_by_mock(X: InteractionMatrix, target_epoch: int) -> None: X_train, X_val = rowwise_train_test_split(X) evaluator = Evaluator(X_val, 0) optimizer = MockOptimizer( X_train, evaluator, fixed_params=dict(X_test=X_val.toarray(), target_epoch=target_epoch), logger=getLogger("IGNORE"), ) config, _ = optimizer.optimize(n_trials=1, random_seed=42) assert config["max_epoch"] == target_epoch
def test_metrics_with_cutoff(U: int, I: int, C: int) -> None: rns = np.random.RandomState(42) scores = rns.randn(U, I) X_gt = (rns.rand(U, I) >= 0.3).astype(np.float64) eval = Evaluator(sps.csr_matrix(X_gt), offset=0, cutoff=C, n_threads=2) eval_finer_chunk = Evaluator( sps.csr_matrix(X_gt), offset=0, cutoff=C, n_threads=2, mb_size=1 ) # empty mask mock_rec = MockRecommender(sps.csr_matrix(X_gt.shape), scores) my_score = eval.get_score(mock_rec) my_score_finer = eval_finer_chunk.get_score(mock_rec) for key in my_score: assert my_score_finer[key] == pytest.approx(my_score[key]) ndcg = 0.0 valid_users = 0 map = 0.0 precision = 0.0 recall = 0.0 item_appearance_count = np.zeros((I,), dtype=np.float64) for i in range(U): nzs = set(X_gt[i].nonzero()[0]) if len(nzs) == 0: continue valid_users += 1 ndcg += ndcg_score(X_gt[[i]], scores[[i]], k=C) recommended = scores[i].argsort()[::-1][:C] recall_denom = min(C, len(nzs)) ap = 0.0 current_hit = 0 for i, rec in enumerate(recommended): item_appearance_count[rec] += 1.0 if rec in nzs: current_hit += 1 ap += current_hit / float(i + 1) ap /= recall_denom map += ap recall += current_hit / recall_denom precision += current_hit / C entropy = (lambda p: -p.dot(np.log(p)))( item_appearance_count / item_appearance_count.sum() ) item_appearance_sorted_normalized = ( np.sort(item_appearance_count) / item_appearance_count.sum() ) lorentz_curve = np.cumsum(item_appearance_sorted_normalized) gini_index = 0 delta = 1 / I for i in range(I): f = 2 * (((i + 1) / I) - lorentz_curve[i]) gini_index += delta * f assert my_score["ndcg"] == pytest.approx(ndcg / valid_users) assert my_score["map"] == pytest.approx(map / valid_users, abs=1e-8) assert my_score["precision"] == pytest.approx(precision / valid_users, abs=1e-8) assert my_score["recall"] == pytest.approx(recall / valid_users, abs=1e-8) assert my_score["entropy"] == pytest.approx(entropy) assert my_score["gini_index"] == pytest.approx(gini_index)
def test_optimizer_by_mock(X: sps.csr_matrix) -> None: X_train, X_val = rowwise_train_test_split(X, test_ratio=0.5, random_seed=0) evaluator = Evaluator(X_val, 0) optimizer = MockOptimizer(X_train, evaluator, logger=None, fixed_params=dict(X_test=X_val)) config, _ = optimizer.optimize(n_trials=40, random_seed=42) assert config["p1"] >= 0.9 assert (config["reg"] >= 0.99) and (config["reg"] <= 1.01) assert (config["I1"] >= 100) and (config["I1"] <= 102) assert (config["I2"] >= 500) and (config["I2"] <= 502) assert config["flag"] in ["foo", "bar"]
def test_metrics_ColdUser(U: int, I: int, U_test: int) -> None: rns = np.random.RandomState(42) uvec = rns.randn(U + U_test, 3) ivec = rns.randn(I, 3) true_score = uvec.dot(ivec.T) # + rns.randn(U, I) X = sps.csr_matrix((true_score > 0).astype(np.float64)) X_train = X[:U] X_val = X[U:] X_val_learn, X_val_target = rowwise_train_test_split(X_val, random_seed=0) X_train_all = sps.vstack([X_train, X_val_learn]) hot_evaluator = Evaluator( sps.csr_matrix(X_val_target), offset=U, cutoff=I // 2, n_threads=2 ) rec = P3alphaRecommender(X_train_all) rec.learn() hot_score = hot_evaluator.get_score(rec) with pytest.warns(UserWarning): cold_evaluator = EvaluatorWithColdUser( X_val_learn.tocsc(), X_val_target, cutoff=I // 2, mb_size=5 ) # csc matrix input should raise warning about # memory ordering, as csc-csc matrix product will be csc, # hence col-major matrix when made dense. cold_score = cold_evaluator.get_score(rec) shuffle_index = np.arange(X_val_learn.shape[0]) rns.shuffle(shuffle_index) cold_evaluator_shuffled = EvaluatorWithColdUser( X_val_learn[shuffle_index], X_val_target[shuffle_index], cutoff=I // 2 ) cold_score_shuffled = cold_evaluator_shuffled.get_score(rec) for key in cold_score: assert cold_score_shuffled[key] == pytest.approx(cold_score[key]) for key in hot_score: assert hot_score[key] == pytest.approx(cold_score[key], abs=1e-8)
def test_irregular(U: int, I: int) -> None: rns = np.random.RandomState(42) recommendables: List[np.ndarray] = [] X_gt = (rns.rand(U, I) >= 0.3).astype(np.float64) _ = Evaluator( sps.csr_matrix(X_gt), offset=0, cutoff=I, n_threads=1, per_user_recommendable_items=[], ) _ = Evaluator( sps.csr_matrix(X_gt), offset=0, cutoff=I, n_threads=1, per_user_recommendable_items=[[0]], ) _ = Evaluator( sps.csr_matrix(X_gt), offset=0, cutoff=I, n_threads=1, per_user_recommendable_items=[[0] for _ in range(X_gt.shape[0])], ) with pytest.raises(ValueError): _ = Evaluator( sps.csr_matrix(X_gt), offset=0, cutoff=I, n_threads=1, per_user_recommendable_items=[[0], [0]], ) with pytest.raises(ValueError): eval = Evaluator( sps.csr_matrix(X_gt), offset=0, cutoff=I, n_threads=1, per_user_recommendable_items=[[0, 0]], ) with pytest.raises(ValueError): eval = Evaluator( sps.csr_matrix(X_gt), offset=0, cutoff=I, n_threads=1, per_user_recommendable_items=[ [ I, ] ], )
def test_cb2cf(X: InteractionMatrix, profile: ProfileMatrix) -> None: """Fit IALS & let mlp overfit. Args: X (InteractionMatrix): user_item interaction matrix profile (ProfileMatrix): profile """ try: from irspack.user_cold_start.cb2cf import CB2IALSOptimizer except: pytest.skip("Failed to import jax.") raise X_cf_train_all, X_val = rowwise_train_test_split(X_cf, test_ratio=0.5, random_seed=0) evaluator = Evaluator(X_val, 0) optim = CB2IALSOptimizer( X_cf_train_all, evaluator, profile, ) cb2cfrec, t, mlp_config = optim.search_all( 20, cf_fixed_params=dict(n_components=5, alpha=0, reg=1e-3, max_cg_steps=30), random_seed=0, ) vec_reconstruction = cb2cfrec.mlp.predict( profile.astype(np.float32).toarray()) vec_target = cb2cfrec.cf_rec.get_user_embedding() residual = ( (vec_reconstruction - vec_target)**2).sum() / (vec_target**2).sum() assert residual <= 1e-1
def test_metrics(U: int, I: int) -> None: rns = np.random.RandomState(42) scores = rns.randn(U, I) X_gt = (rns.rand(U, I) >= 0.7).astype(np.float64) eval = Evaluator(sps.csr_matrix(X_gt), offset=0, cutoff=I, n_threads=4) mock_rec = MockRecommender(sps.csr_matrix(X_gt.shape), scores) my_score = eval.get_score(mock_rec) sklearn_metrics = defaultdict(list) for i in range(scores.shape[0]): if X_gt[i].sum() == 0: continue sklearn_metrics["map"].append(average_precision_score(X_gt[i], scores[i])) sklearn_metrics["ndcg"].append(ndcg_score(X_gt[i][None, :], scores[i][None, :])) for key in ["map", "ndcg"]: assert my_score[key] == pytest.approx(np.mean(sklearn_metrics[key]), abs=1e-8) with pytest.raises(ValueError): eval_emptymask = Evaluator( sps.csr_matrix(X_gt), offset=0, cutoff=I, n_threads=None, masked_interactions=sps.csr_matrix( (X_gt.shape[0] + 1, X_gt.shape[1]) ), # empty ) X_gt = X_gt[(X_gt.sum(axis=1) > 0) & ((X_gt > 0).sum(axis=1) < I)] eval_emptymask = Evaluator( sps.csr_matrix(X_gt), offset=0, cutoff=1, n_threads=None, mb_size=3, masked_interactions=sps.csr_matrix(X_gt.shape), # empty recall_with_cutoff=True, ) mock_rec = MockRecommender(sps.csr_matrix(X_gt), X_gt) perfect_score = eval_emptymask.get_score(mock_rec) assert perfect_score["recall"] == pytest.approx(1.0) eval_vicious = Evaluator( sps.csr_matrix(X_gt), offset=0, cutoff=1, n_threads=1, masked_interactions=X_gt, ) vicious_score = eval_vicious.get_score(mock_rec) assert vicious_score["recall"] == 0.0
heldout_ratio_test=0.5, heldout_ratio_val=0.5, ) data_train = data_all["train"] data_val = data_all["val"] data_test = data_all["test"] X_train_all: sps.csr_matrix = sps.vstack( [data_train.X_train, data_val.X_train, data_test.X_train], format="csr") X_train_val_all: sps.csr_matrix = sps.vstack( [data_train.X_all, data_val.X_all, data_test.X_train], format="csr") valid_evaluator = Evaluator( ground_truth=data_val.X_test, offset=data_train.n_users, cutoff=BASE_CUTOFF, ) test_evaluator = Evaluator( ground_truth=data_test.X_test, offset=data_train.n_users + data_val.n_users, cutoff=BASE_CUTOFF, ) test_results = [] test_configs: List[Tuple[Type[BaseOptimizer], int]] = [ (TopPopOptimizer, 1), (CosineKNNOptimizer, 40), (AsymmetricCosineKNNOptimizer, 40), (TverskyIndexKNNOptimizer, 40),