Example #1
0
def test_metrics_colduser_mask(U: int, I: int, U_test: int) -> None:
    rns = np.random.RandomState(42)
    X_gt = (rns.rand(U, I) >= 0.5).astype(np.float64)
    X_gt = X_gt[(X_gt.sum(axis=1) > 0)]
    X_gt = sps.csr_matrix(X_gt)
    rec = TopPopRecommender(X_gt).learn()
    vicious_eval = EvaluatorWithColdUser(X_gt, X_gt, cutoff=1)
    vicious_metric = vicious_eval.get_score(rec)
    assert vicious_metric["hit"] == 0.0

    popularity = rec.get_score_cold_user(X_gt[:1, :]).ravel()
    most_pop_indices = np.where(popularity.max() == popularity)[0]

    X_gt_pop = np.zeros(X_gt.shape)
    X_gt_pop[:, most_pop_indices] = 1
    X_gt_pop = sps.csr_matrix(X_gt_pop)
    generous_eval = EvaluatorWithColdUser(
        X_gt_pop,
        X_gt_pop,
        cutoff=1,
        masked_interactions=sps.csr_matrix(X_gt.shape),
        recall_with_cutoff=True,
    )
    generous_metric = generous_eval.get_score(rec)
    assert generous_metric["recall"] == 1.0

    pickle_content = BytesIO()
    pickle.dump(generous_eval, pickle_content)

    pickle_content.seek(0)

    generous_eval_pickled = pickle.load(pickle_content)

    assert generous_eval_pickled.get_score(rec)["recall"] == 1.0
Example #2
0
def test_metrics_ColdUser(U: int, I: int, U_test: int) -> None:
    rns = np.random.RandomState(42)
    uvec = rns.randn(U + U_test, 3)
    ivec = rns.randn(I, 3)
    true_score = uvec.dot(ivec.T)  # + rns.randn(U, I)
    X = sps.csr_matrix((true_score > 0).astype(np.float64))
    X_train = X[:U]
    X_val = X[U:]
    X_val_learn, X_val_target = rowwise_train_test_split(X_val, random_seed=0)
    X_train_all = sps.vstack([X_train, X_val_learn])
    hot_evaluator = Evaluator(
        sps.csr_matrix(X_val_target), offset=U, cutoff=I // 2, n_threads=2
    )

    rec = P3alphaRecommender(X_train_all)
    rec.learn()
    hot_score = hot_evaluator.get_score(rec)
    with pytest.warns(UserWarning):
        cold_evaluator = EvaluatorWithColdUser(
            X_val_learn.tocsc(), X_val_target, cutoff=I // 2, mb_size=5
        )  # csc matrix input should raise warning about
        # memory ordering, as csc-csc matrix product will be csc,
        # hence col-major matrix when made dense.
        cold_score = cold_evaluator.get_score(rec)

    shuffle_index = np.arange(X_val_learn.shape[0])
    rns.shuffle(shuffle_index)
    cold_evaluator_shuffled = EvaluatorWithColdUser(
        X_val_learn[shuffle_index], X_val_target[shuffle_index], cutoff=I // 2
    )
    cold_score_shuffled = cold_evaluator_shuffled.get_score(rec)
    for key in cold_score:
        assert cold_score_shuffled[key] == pytest.approx(cold_score[key])

    for key in hot_score:
        assert hot_score[key] == pytest.approx(cold_score[key], abs=1e-8)
Example #3
0
        "movieId",
        n_test_user=10000,
        n_val_user=10000,
        heldout_ratio_val=0.2,
        heldout_ratio_test=0.2,
    )

    data_train = data_all["train"]
    data_val = data_all["val"]
    data_test = data_all["test"]

    X_train_val_all: sps.csr_matrix = sps.vstack(
        [data_train.X_all, data_val.X_all], format="csr")
    valid_evaluator = EvaluatorWithColdUser(
        input_interaction=data_val.X_train,
        ground_truth=data_val.X_test,
        cutoff=BASE_CUTOFF,
    )
    test_evaluator = EvaluatorWithColdUser(
        input_interaction=data_test.X_train,
        ground_truth=data_test.X_test,
        cutoff=BASE_CUTOFF,
    )

    test_results = []
    validation_results = []

    test_configs: List[Tuple[Type[BaseOptimizer], int, Dict[str, Any]]] = [
        (TopPopOptimizer, 1, dict()),
        (CosineKNNOptimizer, 40, dict()),
        (AsymmetricCosineKNNOptimizer, 40, dict()),