Ejemplo n.º 1
0
def test_python_errors(rating_true, rating_pred):
    """Test raise errors."""
    with pytest.raises(ValueError):
        rmse(rating_true, rating_true, col_user="******")

    with pytest.raises(ValueError):
        mae(
            rating_pred,
            rating_pred,
            col_rating=DEFAULT_PREDICTION_COL,
            col_user="******",
        )

    with pytest.raises(ValueError):
        rsquared(rating_true, rating_pred, col_item="not_item")

    with pytest.raises(ValueError):
        exp_var(
            rating_pred,
            rating_pred,
            col_rating=DEFAULT_PREDICTION_COL,
            col_item="not_item",
        )

    with pytest.raises(ValueError):
        precision_at_k(rating_true, rating_pred, col_rating="not_rating")

    with pytest.raises(ValueError):
        recall_at_k(rating_true, rating_pred, col_prediction="not_prediction")

    with pytest.raises(ValueError):
        ndcg_at_k(rating_true, rating_true, col_user="******")

    with pytest.raises(ValueError):
        map_at_k(
            rating_pred,
            rating_pred,
            col_rating=DEFAULT_PREDICTION_COL,
            col_user="******",
        )
Ejemplo n.º 2
0
def test_python_precision(rating_true, rating_pred, rating_nohit):
    assert (
        precision_at_k(
            rating_true=rating_true,
            rating_pred=rating_true,
            col_prediction=DEFAULT_RATING_COL,
            k=10,
        )
        == 0.6
    )
    assert precision_at_k(rating_true, rating_nohit, k=10) == 0.0
    assert precision_at_k(rating_true, rating_pred, k=10) == pytest.approx(0.26666, TOL)

    # Check normalization
    single_user = pd.DataFrame(
        {
            DEFAULT_USER_COL: [1, 1, 1],
            DEFAULT_ITEM_COL: [1, 2, 3],
            DEFAULT_RATING_COL: [5, 4, 3],
        }
    )
    assert (
        precision_at_k(
            rating_true=single_user,
            rating_pred=single_user,
            col_rating=DEFAULT_RATING_COL,
            col_prediction=DEFAULT_RATING_COL,
            k=3,
        )
        == 1
    )

    same_items = pd.DataFrame(
        {
            DEFAULT_USER_COL: [1, 1, 1, 2, 2, 2],
            DEFAULT_ITEM_COL: [1, 2, 3, 1, 2, 3],
            DEFAULT_RATING_COL: [5, 4, 3, 5, 5, 3],
        }
    )
    assert (
        precision_at_k(
            rating_true=same_items,
            rating_pred=same_items,
            col_prediction=DEFAULT_RATING_COL,
            k=3,
        )
        == 1
    )

    # Check that if the sample size is smaller than k, the maximum precision can not be 1
    # if we do precision@5 when there is only 3 items, we can get a maximum of 3/5.
    assert (
        precision_at_k(
            rating_true=same_items,
            rating_pred=same_items,
            col_prediction=DEFAULT_RATING_COL,
            k=5,
        )
        == 0.6
    )