def test_spark_precision(spark_data, target_metrics): df_true, df_pred = spark_data evaluator = SparkRankingEvaluation(df_true, df_pred, k=10) assert evaluator.precision_at_k() == target_metrics["precision"] evaluator1 = SparkRankingEvaluation(df_true, df_pred, relevancy_method="by_threshold") assert evaluator1.precision_at_k() == target_metrics["precision"]
def test_spark_precision(spark_data, target_metrics, spark): df_true, df_pred = spark_data evaluator = SparkRankingEvaluation(df_true, df_pred, k=10) assert evaluator.precision_at_k() == target_metrics["precision"] evaluator1 = SparkRankingEvaluation(df_true, df_pred, relevancy_method="by_threshold", threshold=3.5) assert evaluator1.precision_at_k() == target_metrics["precision"] # Check normalization single_user = pd.DataFrame({ "userID": [1, 1, 1], "itemID": [1, 2, 3], "rating": [5, 4, 3] }) df_single = spark.createDataFrame(single_user) evaluator2 = SparkRankingEvaluation(df_single, df_single, k=3, col_prediction="rating") assert evaluator2.precision_at_k() == 1 same_items = pd.DataFrame({ "userID": [1, 1, 1, 2, 2, 2], "itemID": [1, 2, 3, 1, 2, 3], "rating": [5, 4, 3, 5, 5, 3], }) df_same = spark.createDataFrame(same_items) evaluator3 = SparkRankingEvaluation(df_same, df_same, k=3, col_prediction="rating") assert evaluator3.precision_at_k() == 1 # Check that if the sample size is smaller than k, the maximum precision can not be 1 # if we do precision@5 when there is only 3 items, we can get a maximum of 3/5. evaluator4 = SparkRankingEvaluation(df_same, df_same, k=5, col_prediction="rating") assert evaluator4.precision_at_k() == 0.6
def ranking_metrics_pyspark(test, predictions, k=DEFAULT_K): rank_eval = SparkRankingEvaluation( test, predictions, k=k, relevancy_method="top_k", **COL_DICT ) return { "MAP": rank_eval.map_at_k(), "nDCG@k": rank_eval.ndcg_at_k(), "Precision@k": rank_eval.precision_at_k(), "Recall@k": rank_eval.recall_at_k(), }
def ranking_metrics_pyspark(test, predictions, k=DEFAULT_K): rank_eval = SparkRankingEvaluation(test, predictions, k=k, relevancy_method="top_k", **COL_DICT) return { "MAP": rank_eval.map_at_k(), "nDCG@k": rank_eval.ndcg_at_k(), "Precision@k": rank_eval.precision_at_k(), "Recall@k": rank_eval.recall_at_k() }
def test_spark_precision(spark_data, target_metrics, spark): df_true, df_pred = spark_data evaluator = SparkRankingEvaluation(df_true, df_pred, k=10) assert evaluator.precision_at_k() == target_metrics["precision"] evaluator1 = SparkRankingEvaluation( df_true, df_pred, relevancy_method="by_threshold", threshold=3.5 ) assert evaluator1.precision_at_k() == target_metrics["precision"] # Check normalization single_user = pd.DataFrame( {"userID": [1, 1, 1], "itemID": [1, 2, 3], "rating": [5, 4, 3]} ) df_single = spark.createDataFrame(single_user) evaluator2 = SparkRankingEvaluation( df_single, df_single, k=3, col_prediction="rating" ) assert evaluator2.precision_at_k() == 1 same_items = pd.DataFrame( { "userID": [1, 1, 1, 2, 2, 2], "itemID": [1, 2, 3, 1, 2, 3], "rating": [5, 4, 3, 5, 5, 3], } ) df_same = spark.createDataFrame(same_items) evaluator3 = SparkRankingEvaluation(df_same, df_same, k=3, col_prediction="rating") assert evaluator3.precision_at_k() == 1 # Check that if the sample size is smaller than k, the maximum precision can not be 1 # if we do precision@5 when there is only 3 items, we can get a maximum of 3/5. evaluator4 = SparkRankingEvaluation(df_same, df_same, k=5, col_prediction="rating") assert evaluator4.precision_at_k() == 0.6
def test_spark_python_match(python_data, spark): # Test on the original data with k = 10. df_true, df_pred = python_data dfs_true = spark.createDataFrame(df_true) dfs_pred = spark.createDataFrame(df_pred) eval_spark1 = SparkRankingEvaluation(dfs_true, dfs_pred, k=10) match1 = [ recall_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark1.recall_at_k(), TOL), precision_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark1.precision_at_k(), TOL), ndcg_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark1.ndcg_at_k(), TOL), map_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark1.map_at_k(), TOL), ] assert all(match1) # Test on the original data with k = 3. dfs_true = spark.createDataFrame(df_true) dfs_pred = spark.createDataFrame(df_pred) eval_spark2 = SparkRankingEvaluation(dfs_true, dfs_pred, k=3) match2 = [ recall_at_k(df_true, df_pred, k=3) == pytest.approx(eval_spark2.recall_at_k(), TOL), precision_at_k(df_true, df_pred, k=3) == pytest.approx(eval_spark2.precision_at_k(), TOL), ndcg_at_k(df_true, df_pred, k=3) == pytest.approx(eval_spark2.ndcg_at_k(), TOL), map_at_k(df_true, df_pred, k=3) == pytest.approx(eval_spark2.map_at_k(), TOL), ] assert all(match2) # Remove the first row from the original data. df_pred = df_pred[1:-1] dfs_true = spark.createDataFrame(df_true) dfs_pred = spark.createDataFrame(df_pred) eval_spark3 = SparkRankingEvaluation(dfs_true, dfs_pred, k=10) match3 = [ recall_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark3.recall_at_k(), TOL), precision_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark3.precision_at_k(), TOL), ndcg_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark3.ndcg_at_k(), TOL), map_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark3.map_at_k(), TOL), ] assert all(match3) # Test with one user df_pred = df_pred[df_pred["userID"] == 3] df_true = df_true[df_true["userID"] == 3] dfs_true = spark.createDataFrame(df_true) dfs_pred = spark.createDataFrame(df_pred) eval_spark4 = SparkRankingEvaluation(dfs_true, dfs_pred, k=10) match4 = [ recall_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark4.recall_at_k(), TOL), precision_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark4.precision_at_k(), TOL), ndcg_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark4.ndcg_at_k(), TOL), map_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark4.map_at_k(), TOL), ] assert all(match4)
def test_spark_python_match(python_data, spark): # Test on the original data with k = 10. df_true, df_pred = python_data dfs_true = spark.createDataFrame(df_true) dfs_pred = spark.createDataFrame(df_pred) eval_spark1 = SparkRankingEvaluation(dfs_true, dfs_pred, k=10) match1 = [ recall_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark1.recall_at_k(), TOL), precision_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark1.precision_at_k(), TOL), ndcg_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark1.ndcg_at_k(), TOL), map_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark1.map_at_k(), TOL), ] assert all(match1) # Test on the original data with k = 3. dfs_true = spark.createDataFrame(df_true) dfs_pred = spark.createDataFrame(df_pred) eval_spark2 = SparkRankingEvaluation(dfs_true, dfs_pred, k=3) match2 = [ recall_at_k(df_true, df_pred, k=3) == pytest.approx(eval_spark2.recall_at_k(), TOL), precision_at_k(df_true, df_pred, k=3) == pytest.approx(eval_spark2.precision_at_k(), TOL), ndcg_at_k(df_true, df_pred, k=3) == pytest.approx(eval_spark2.ndcg_at_k(), TOL), map_at_k(df_true, df_pred, k=3) == pytest.approx(eval_spark2.map_at_k(), TOL), ] assert all(match2) # Remove the first row from the original data. df_pred = df_pred[1:-1] dfs_true = spark.createDataFrame(df_true) dfs_pred = spark.createDataFrame(df_pred) eval_spark3 = SparkRankingEvaluation(dfs_true, dfs_pred, k=10) match3 = [ recall_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark3.recall_at_k(), TOL), precision_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark3.precision_at_k(), TOL), ndcg_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark3.ndcg_at_k(), TOL), map_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark3.map_at_k(), TOL), ] assert all(match3) # Test with one user df_pred = df_pred.loc[df_pred["userID"] == 3] df_true = df_true.loc[df_true["userID"] == 3] dfs_true = spark.createDataFrame(df_true) dfs_pred = spark.createDataFrame(df_pred) eval_spark4 = SparkRankingEvaluation(dfs_true, dfs_pred, k=10) match4 = [ recall_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark4.recall_at_k(), TOL), precision_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark4.precision_at_k(), TOL), ndcg_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark4.ndcg_at_k(), TOL), map_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark4.map_at_k(), TOL), ] assert all(match4)