def test_metrics_colduser_mask(U: int, I: int, U_test: int) -> None: rns = np.random.RandomState(42) X_gt = (rns.rand(U, I) >= 0.5).astype(np.float64) X_gt = X_gt[(X_gt.sum(axis=1) > 0)] X_gt = sps.csr_matrix(X_gt) rec = TopPopRecommender(X_gt).learn() vicious_eval = EvaluatorWithColdUser(X_gt, X_gt, cutoff=1) vicious_metric = vicious_eval.get_score(rec) assert vicious_metric["hit"] == 0.0 popularity = rec.get_score_cold_user(X_gt[:1, :]).ravel() most_pop_indices = np.where(popularity.max() == popularity)[0] X_gt_pop = np.zeros(X_gt.shape) X_gt_pop[:, most_pop_indices] = 1 X_gt_pop = sps.csr_matrix(X_gt_pop) generous_eval = EvaluatorWithColdUser( X_gt_pop, X_gt_pop, cutoff=1, masked_interactions=sps.csr_matrix(X_gt.shape), recall_with_cutoff=True, ) generous_metric = generous_eval.get_score(rec) assert generous_metric["recall"] == 1.0 pickle_content = BytesIO() pickle.dump(generous_eval, pickle_content) pickle_content.seek(0) generous_eval_pickled = pickle.load(pickle_content) assert generous_eval_pickled.get_score(rec)["recall"] == 1.0
def test_metrics_ColdUser(U: int, I: int, U_test: int) -> None: rns = np.random.RandomState(42) uvec = rns.randn(U + U_test, 3) ivec = rns.randn(I, 3) true_score = uvec.dot(ivec.T) # + rns.randn(U, I) X = sps.csr_matrix((true_score > 0).astype(np.float64)) X_train = X[:U] X_val = X[U:] X_val_learn, X_val_target = rowwise_train_test_split(X_val, random_seed=0) X_train_all = sps.vstack([X_train, X_val_learn]) hot_evaluator = Evaluator( sps.csr_matrix(X_val_target), offset=U, cutoff=I // 2, n_threads=2 ) rec = P3alphaRecommender(X_train_all) rec.learn() hot_score = hot_evaluator.get_score(rec) with pytest.warns(UserWarning): cold_evaluator = EvaluatorWithColdUser( X_val_learn.tocsc(), X_val_target, cutoff=I // 2, mb_size=5 ) # csc matrix input should raise warning about # memory ordering, as csc-csc matrix product will be csc, # hence col-major matrix when made dense. cold_score = cold_evaluator.get_score(rec) shuffle_index = np.arange(X_val_learn.shape[0]) rns.shuffle(shuffle_index) cold_evaluator_shuffled = EvaluatorWithColdUser( X_val_learn[shuffle_index], X_val_target[shuffle_index], cutoff=I // 2 ) cold_score_shuffled = cold_evaluator_shuffled.get_score(rec) for key in cold_score: assert cold_score_shuffled[key] == pytest.approx(cold_score[key]) for key in hot_score: assert hot_score[key] == pytest.approx(cold_score[key], abs=1e-8)
"movieId", n_test_user=10000, n_val_user=10000, heldout_ratio_val=0.2, heldout_ratio_test=0.2, ) data_train = data_all["train"] data_val = data_all["val"] data_test = data_all["test"] X_train_val_all: sps.csr_matrix = sps.vstack( [data_train.X_all, data_val.X_all], format="csr") valid_evaluator = EvaluatorWithColdUser( input_interaction=data_val.X_train, ground_truth=data_val.X_test, cutoff=BASE_CUTOFF, ) test_evaluator = EvaluatorWithColdUser( input_interaction=data_test.X_train, ground_truth=data_test.X_test, cutoff=BASE_CUTOFF, ) test_results = [] validation_results = [] test_configs: List[Tuple[Type[BaseOptimizer], int, Dict[str, Any]]] = [ (TopPopOptimizer, 1, dict()), (CosineKNNOptimizer, 40, dict()), (AsymmetricCosineKNNOptimizer, 40, dict()),