def test_spark_recall(spark_data, target_metrics):
    df_true, df_pred = spark_data

    evaluator = SparkRankingEvaluation(df_true, df_pred)
    assert evaluator.recall_at_k() == target_metrics["recall"]

    evaluator1 = SparkRankingEvaluation(
        df_true, df_pred, relevancy_method="by_threshold", threshold=3.5
    )
    assert evaluator1.recall_at_k() == target_metrics["recall"]
def ranking_metrics_pyspark(test, predictions, k=DEFAULT_K):
    rank_eval = SparkRankingEvaluation(test, 
                                       predictions, 
                                       k=k, 
                                       relevancy_method="top_k",
                                       **COL_DICT)
    return {
        "MAP": rank_eval.map_at_k(),
        "nDCG@k": rank_eval.ndcg_at_k(),
        "Precision@k": rank_eval.precision_at_k(),
        "Recall@k": rank_eval.recall_at_k()
    }
Beispiel #3
0
def test_spark_ndcg(spark_data, target_metrics):
    df_true, df_pred = spark_data

    evaluator0 = SparkRankingEvaluation(df_true, df_true, k=10, col_prediction="rating")
    assert evaluator0.ndcg_at_k() == 1.0

    evaluator = SparkRankingEvaluation(df_true, df_pred, k=10)
    assert evaluator.ndcg_at_k() == target_metrics["ndcg"]

    evaluator1 = SparkRankingEvaluation(
        df_true, df_pred, relevancy_method="by_threshold", threshold=3.5
    )
    assert evaluator1.ndcg_at_k() == target_metrics["ndcg"]
Beispiel #4
0
def test_spark_map(spark_data, target_metrics):
    df_true, df_pred = spark_data

    evaluator1 = SparkRankingEvaluation(
        k=10, rating_true=df_true, rating_pred=df_true, col_prediction="rating"
    )
    assert evaluator1.map_at_k() == 1.0

    evaluator = SparkRankingEvaluation(df_true, df_pred, k=10)
    assert evaluator.map_at_k() == target_metrics["map"]

    evaluator1 = SparkRankingEvaluation(
        df_true, df_pred, relevancy_method="by_threshold", threshold=3.5
    )
    assert evaluator1.map_at_k() == target_metrics["map"]
def test_spark_precision(spark_data, target_metrics, spark):
    df_true, df_pred = spark_data

    evaluator = SparkRankingEvaluation(df_true, df_pred, k=10)
    assert evaluator.precision_at_k() == target_metrics["precision"]

    evaluator1 = SparkRankingEvaluation(
        df_true, df_pred, relevancy_method="by_threshold", threshold=3.5
    )
    assert evaluator1.precision_at_k() == target_metrics["precision"]

    # Check normalization
    single_user = pd.DataFrame(
        {"userID": [1, 1, 1], "itemID": [1, 2, 3], "rating": [5, 4, 3]}
    )
    df_single = spark.createDataFrame(single_user)
    evaluator2 = SparkRankingEvaluation(
        df_single, df_single, k=3, col_prediction="rating"
    )
    assert evaluator2.precision_at_k() == 1

    same_items = pd.DataFrame(
        {
            "userID": [1, 1, 1, 2, 2, 2],
            "itemID": [1, 2, 3, 1, 2, 3],
            "rating": [5, 4, 3, 5, 5, 3],
        }
    )
    df_same = spark.createDataFrame(same_items)
    evaluator3 = SparkRankingEvaluation(df_same, df_same, k=3, col_prediction="rating")
    assert evaluator3.precision_at_k() == 1

    # Check that if the sample size is smaller than k, the maximum precision can not be 1
    # if we do precision@5 when there is only 3 items, we can get a maximum of 3/5.
    evaluator4 = SparkRankingEvaluation(df_same, df_same, k=5, col_prediction="rating")
    assert evaluator4.precision_at_k() == 0.6
Beispiel #6
0
def test_spark_precision(spark_data, target_metrics):
    df_true, df_pred = spark_data

    evaluator = SparkRankingEvaluation(df_true, df_pred, k=10)
    assert evaluator.precision_at_k() == target_metrics["precision"]

    evaluator1 = SparkRankingEvaluation(df_true,
                                        df_pred,
                                        relevancy_method="by_threshold")
    assert evaluator1.precision_at_k() == target_metrics["precision"]
Beispiel #7
0
def test_spark_recall(spark_data, target_metrics):
    df_true, df_pred = spark_data

    evaluator = SparkRankingEvaluation(df_true, df_pred)
    assert evaluator.recall_at_k() == target_metrics["recall"]

    evaluator1 = SparkRankingEvaluation(df_true,
                                        df_pred,
                                        relevancy_method="by_threshold")
    assert evaluator1.recall_at_k() == target_metrics["recall"]
Beispiel #8
0
def ranking_metrics_pyspark(test, predictions, k=DEFAULT_K):
    rank_eval = SparkRankingEvaluation(
        test, predictions, k=k, relevancy_method="top_k", **COL_DICT
    )
    return {
        "MAP": rank_eval.map_at_k(),
        "nDCG@k": rank_eval.ndcg_at_k(),
        "Precision@k": rank_eval.precision_at_k(),
        "Recall@k": rank_eval.recall_at_k(),
    }
Beispiel #9
0
def test_spark_python_match(python_data, spark):
    # Test on the original data with k = 10.

    df_true, df_pred = python_data

    dfs_true = spark.createDataFrame(df_true)
    dfs_pred = spark.createDataFrame(df_pred)

    eval_spark1 = SparkRankingEvaluation(dfs_true, dfs_pred, k=10)

    match1 = [
        recall_at_k(df_true, df_pred,
                    k=10) == pytest.approx(eval_spark1.recall_at_k(), TOL),
        precision_at_k(df_true, df_pred,
                       k=10) == pytest.approx(eval_spark1.precision_at_k(),
                                              TOL),
        ndcg_at_k(df_true, df_pred,
                  k=10) == pytest.approx(eval_spark1.ndcg_at_k(), TOL),
        map_at_k(df_true, df_pred,
                 k=10) == pytest.approx(eval_spark1.map_at_k(), TOL),
    ]

    assert all(match1)

    # Test on the original data with k = 3.

    dfs_true = spark.createDataFrame(df_true)
    dfs_pred = spark.createDataFrame(df_pred)

    eval_spark2 = SparkRankingEvaluation(dfs_true, dfs_pred, k=3)

    match2 = [
        recall_at_k(df_true, df_pred,
                    k=3) == pytest.approx(eval_spark2.recall_at_k(), TOL),
        precision_at_k(df_true, df_pred,
                       k=3) == pytest.approx(eval_spark2.precision_at_k(),
                                             TOL),
        ndcg_at_k(df_true, df_pred,
                  k=3) == pytest.approx(eval_spark2.ndcg_at_k(), TOL),
        map_at_k(df_true, df_pred,
                 k=3) == pytest.approx(eval_spark2.map_at_k(), TOL),
    ]

    assert all(match2)

    # Remove the first row from the original data.

    df_pred = df_pred[1:-1]

    dfs_true = spark.createDataFrame(df_true)
    dfs_pred = spark.createDataFrame(df_pred)

    eval_spark3 = SparkRankingEvaluation(dfs_true, dfs_pred, k=10)

    match3 = [
        recall_at_k(df_true, df_pred,
                    k=10) == pytest.approx(eval_spark3.recall_at_k(), TOL),
        precision_at_k(df_true, df_pred,
                       k=10) == pytest.approx(eval_spark3.precision_at_k(),
                                              TOL),
        ndcg_at_k(df_true, df_pred,
                  k=10) == pytest.approx(eval_spark3.ndcg_at_k(), TOL),
        map_at_k(df_true, df_pred,
                 k=10) == pytest.approx(eval_spark3.map_at_k(), TOL),
    ]

    assert all(match3)

    # Test with one user

    df_pred = df_pred[df_pred["userID"] == 3]
    df_true = df_true[df_true["userID"] == 3]

    dfs_true = spark.createDataFrame(df_true)
    dfs_pred = spark.createDataFrame(df_pred)

    eval_spark4 = SparkRankingEvaluation(dfs_true, dfs_pred, k=10)

    match4 = [
        recall_at_k(df_true, df_pred,
                    k=10) == pytest.approx(eval_spark4.recall_at_k(), TOL),
        precision_at_k(df_true, df_pred,
                       k=10) == pytest.approx(eval_spark4.precision_at_k(),
                                              TOL),
        ndcg_at_k(df_true, df_pred,
                  k=10) == pytest.approx(eval_spark4.ndcg_at_k(), TOL),
        map_at_k(df_true, df_pred,
                 k=10) == pytest.approx(eval_spark4.map_at_k(), TOL),
    ]

    assert all(match4)
def test_spark_precision(spark_data, target_metrics, spark):
    df_true, df_pred = spark_data

    evaluator = SparkRankingEvaluation(df_true, df_pred, k=10)
    assert evaluator.precision_at_k() == target_metrics["precision"]

    evaluator1 = SparkRankingEvaluation(df_true,
                                        df_pred,
                                        relevancy_method="by_threshold",
                                        threshold=3.5)
    assert evaluator1.precision_at_k() == target_metrics["precision"]

    # Check normalization
    single_user = pd.DataFrame({
        "userID": [1, 1, 1],
        "itemID": [1, 2, 3],
        "rating": [5, 4, 3]
    })
    df_single = spark.createDataFrame(single_user)
    evaluator2 = SparkRankingEvaluation(df_single,
                                        df_single,
                                        k=3,
                                        col_prediction="rating")
    assert evaluator2.precision_at_k() == 1

    same_items = pd.DataFrame({
        "userID": [1, 1, 1, 2, 2, 2],
        "itemID": [1, 2, 3, 1, 2, 3],
        "rating": [5, 4, 3, 5, 5, 3],
    })
    df_same = spark.createDataFrame(same_items)
    evaluator3 = SparkRankingEvaluation(df_same,
                                        df_same,
                                        k=3,
                                        col_prediction="rating")
    assert evaluator3.precision_at_k() == 1

    # Check that if the sample size is smaller than k, the maximum precision can not be 1
    # if we do precision@5 when there is only 3 items, we can get a maximum of 3/5.
    evaluator4 = SparkRankingEvaluation(df_same,
                                        df_same,
                                        k=5,
                                        col_prediction="rating")
    assert evaluator4.precision_at_k() == 0.6
def test_spark_python_match(python_data, spark):
    # Test on the original data with k = 10.
    df_true, df_pred = python_data

    dfs_true = spark.createDataFrame(df_true)
    dfs_pred = spark.createDataFrame(df_pred)

    eval_spark1 = SparkRankingEvaluation(dfs_true, dfs_pred, k=10)

    match1 = [
        recall_at_k(df_true, df_pred, k=10)
        == pytest.approx(eval_spark1.recall_at_k(), TOL),
        precision_at_k(df_true, df_pred, k=10)
        == pytest.approx(eval_spark1.precision_at_k(), TOL),
        ndcg_at_k(df_true, df_pred, k=10)
        == pytest.approx(eval_spark1.ndcg_at_k(), TOL),
        map_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark1.map_at_k(), TOL),
    ]
    assert all(match1)

    # Test on the original data with k = 3.
    dfs_true = spark.createDataFrame(df_true)
    dfs_pred = spark.createDataFrame(df_pred)

    eval_spark2 = SparkRankingEvaluation(dfs_true, dfs_pred, k=3)

    match2 = [
        recall_at_k(df_true, df_pred, k=3)
        == pytest.approx(eval_spark2.recall_at_k(), TOL),
        precision_at_k(df_true, df_pred, k=3)
        == pytest.approx(eval_spark2.precision_at_k(), TOL),
        ndcg_at_k(df_true, df_pred, k=3) == pytest.approx(eval_spark2.ndcg_at_k(), TOL),
        map_at_k(df_true, df_pred, k=3) == pytest.approx(eval_spark2.map_at_k(), TOL),
    ]
    assert all(match2)

    # Remove the first row from the original data.
    df_pred = df_pred[1:-1]

    dfs_true = spark.createDataFrame(df_true)
    dfs_pred = spark.createDataFrame(df_pred)

    eval_spark3 = SparkRankingEvaluation(dfs_true, dfs_pred, k=10)

    match3 = [
        recall_at_k(df_true, df_pred, k=10)
        == pytest.approx(eval_spark3.recall_at_k(), TOL),
        precision_at_k(df_true, df_pred, k=10)
        == pytest.approx(eval_spark3.precision_at_k(), TOL),
        ndcg_at_k(df_true, df_pred, k=10)
        == pytest.approx(eval_spark3.ndcg_at_k(), TOL),
        map_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark3.map_at_k(), TOL),
    ]
    assert all(match3)

    # Test with one user
    df_pred = df_pred.loc[df_pred["userID"] == 3]
    df_true = df_true.loc[df_true["userID"] == 3]

    dfs_true = spark.createDataFrame(df_true)
    dfs_pred = spark.createDataFrame(df_pred)

    eval_spark4 = SparkRankingEvaluation(dfs_true, dfs_pred, k=10)

    match4 = [
        recall_at_k(df_true, df_pred, k=10)
        == pytest.approx(eval_spark4.recall_at_k(), TOL),
        precision_at_k(df_true, df_pred, k=10)
        == pytest.approx(eval_spark4.precision_at_k(), TOL),
        ndcg_at_k(df_true, df_pred, k=10)
        == pytest.approx(eval_spark4.ndcg_at_k(), TOL),
        map_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark4.map_at_k(), TOL),
    ]
    assert all(match4)