def test_python_ndcg_at_k(rating_true, rating_pred, rating_nohit):
    assert (ndcg_at_k(
        rating_true=rating_true,
        rating_pred=rating_true,
        col_prediction=DEFAULT_RATING_COL,
        k=10,
    ) == pytest.approx(1.0, TOL))
    assert ndcg_at_k(rating_true, rating_nohit, k=10) == 0.0
    assert ndcg_at_k(rating_true, rating_pred,
                     k=10) == pytest.approx(0.38172, TOL)
Ejemplo n.º 2
0
    def on_epoch_end(self, batch, logs={}):
        """At the end of each epoch calculate NDCG@k of the validation set.

        If the model performance is improved, the model weights are saved.
        Update the list of validation NDCG@k by adding obtained value

        """
        # recommend top k items based on training part of validation set
        top_k = self.recommend_k_items(x=self.val_tr, k=self.k, remove_seen=True)

        # convert recommendations from sparse matrix to dataframe
        top_k_df = self.mapper.map_back_sparse(top_k, kind="prediction")
        test_df = self.mapper.map_back_sparse(self.val_te, kind="ratings")

        # calculate NDCG@k
        NDCG = ndcg_at_k(test_df, top_k_df, col_prediction="prediction", k=self.k)

        # check if there is an improvement in NDCG, if so, update the weights of the saved model
        if NDCG > self.best_ndcg:
            self.best_ndcg = NDCG

            # save the weights of the optimal model
            if self.save_path is not None:
                self.model.save(self.save_path)

        self._data.append(NDCG)
Ejemplo n.º 3
0
def ranking_metrics_python(test, predictions, k=DEFAULT_K):
    return {
        "MAP": map_at_k(test, predictions, k=k, **COL_DICT),
        "nDCG@k": ndcg_at_k(test, predictions, k=k, **COL_DICT),
        "Precision@k": precision_at_k(test, predictions, k=k, **COL_DICT),
        "Recall@k": recall_at_k(test, predictions, k=k, **COL_DICT),
    }
def test_python_errors(rating_true, rating_pred):
    with pytest.raises(ValueError):
        rmse(rating_true, rating_true, col_user="******")

    with pytest.raises(ValueError):
        mae(
            rating_pred,
            rating_pred,
            col_rating=DEFAULT_PREDICTION_COL,
            col_user="******",
        )

    with pytest.raises(ValueError):
        rsquared(rating_true, rating_pred, col_item="not_item")

    with pytest.raises(ValueError):
        exp_var(
            rating_pred,
            rating_pred,
            col_rating=DEFAULT_PREDICTION_COL,
            col_item="not_item",
        )

    with pytest.raises(ValueError):
        precision_at_k(rating_true, rating_pred, col_rating="not_rating")

    with pytest.raises(ValueError):
        recall_at_k(rating_true, rating_pred, col_prediction="not_prediction")

    with pytest.raises(ValueError):
        ndcg_at_k(rating_true, rating_true, col_user="******")

    with pytest.raises(ValueError):
        map_at_k(
            rating_pred,
            rating_pred,
            col_rating=DEFAULT_PREDICTION_COL,
            col_user="******",
        )
Ejemplo n.º 5
0
def test_recommend_k_items(rating_true):
    train_set = cornac.data.Dataset.from_uir(
        rating_true.itertuples(index=False), seed=42)
    bpr = cornac.models.BPR(k=100, max_iter=10000, seed=42).fit(train_set)

    preds = predict_ranking(bpr, rating_true, remove_seen=False)

    n_users = len(rating_true["userID"].unique())
    n_items = len(rating_true["itemID"].unique())
    assert preds.shape[0] == n_users * n_items

    assert set(preds.columns) == {"userID", "itemID", "prediction"}
    assert preds["userID"].dtypes == rating_true["userID"].dtypes
    assert preds["itemID"].dtypes == rating_true["itemID"].dtypes
    # perfect ranking achieved
    assert 1e-10 > 1 - ndcg_at_k(rating_true, preds)
    assert 1e-10 > 1 - recall_at_k(rating_true, preds)
Ejemplo n.º 6
0
    def run_eval(self):
        """Run evaluation on self.data.test.

        Returns:
            dict: Results of all metrics in `self.metrics`.
        """
        topk_scores = self.recommend_k_items(self.data.test,
                                             top_k=self.top_k,
                                             use_id=True)
        ret = []
        for metric in self.metrics:
            if metric == "map":
                ret.append(
                    map_at_k(self.data.test,
                             topk_scores,
                             relevancy_method=None,
                             k=self.top_k))
            elif metric == "ndcg":
                ret.append(
                    ndcg_at_k(self.data.test,
                              topk_scores,
                              relevancy_method=None,
                              k=self.top_k))
            elif metric == "precision":
                ret.append(
                    precision_at_k(self.data.test,
                                   topk_scores,
                                   relevancy_method=None,
                                   k=self.top_k))
            elif metric == "recall":
                ret.append(
                    recall_at_k(self.data.test,
                                topk_scores,
                                relevancy_method=None,
                                k=self.top_k))
        return ret
Ejemplo n.º 7
0
def test_spark_python_match(python_data, spark):
    # Test on the original data with k = 10.
    df_true, df_pred = python_data

    dfs_true = spark.createDataFrame(df_true)
    dfs_pred = spark.createDataFrame(df_pred)

    eval_spark1 = SparkRankingEvaluation(dfs_true, dfs_pred, k=10)

    assert recall_at_k(df_true, df_pred,
                       k=10) == pytest.approx(eval_spark1.recall_at_k(), TOL)
    assert precision_at_k(df_true, df_pred,
                          k=10) == pytest.approx(eval_spark1.precision_at_k(),
                                                 TOL)
    assert ndcg_at_k(df_true, df_pred,
                     k=10) == pytest.approx(eval_spark1.ndcg_at_k(), TOL)
    assert map_at_k(df_true, df_pred,
                    k=10) == pytest.approx(eval_spark1.map_at_k(), TOL)

    # Test on the original data with k = 3.
    dfs_true = spark.createDataFrame(df_true)
    dfs_pred = spark.createDataFrame(df_pred)

    eval_spark2 = SparkRankingEvaluation(dfs_true, dfs_pred, k=3)

    assert recall_at_k(df_true, df_pred,
                       k=3) == pytest.approx(eval_spark2.recall_at_k(), TOL)
    assert precision_at_k(df_true, df_pred,
                          k=3) == pytest.approx(eval_spark2.precision_at_k(),
                                                TOL)
    assert ndcg_at_k(df_true, df_pred,
                     k=3) == pytest.approx(eval_spark2.ndcg_at_k(), TOL)
    assert map_at_k(df_true, df_pred,
                    k=3) == pytest.approx(eval_spark2.map_at_k(), TOL)

    # Remove the first row from the original data.
    df_pred = df_pred[1:-1]

    dfs_true = spark.createDataFrame(df_true)
    dfs_pred = spark.createDataFrame(df_pred)

    eval_spark3 = SparkRankingEvaluation(dfs_true, dfs_pred, k=10)

    assert recall_at_k(df_true, df_pred,
                       k=10) == pytest.approx(eval_spark3.recall_at_k(), TOL)
    assert precision_at_k(df_true, df_pred,
                          k=10) == pytest.approx(eval_spark3.precision_at_k(),
                                                 TOL)
    assert ndcg_at_k(df_true, df_pred,
                     k=10) == pytest.approx(eval_spark3.ndcg_at_k(), TOL)
    assert map_at_k(df_true, df_pred,
                    k=10) == pytest.approx(eval_spark3.map_at_k(), TOL)

    # Test with one user
    df_pred = df_pred.loc[df_pred["userID"] == 3]
    df_true = df_true.loc[df_true["userID"] == 3]

    dfs_true = spark.createDataFrame(df_true)
    dfs_pred = spark.createDataFrame(df_pred)

    eval_spark4 = SparkRankingEvaluation(dfs_true, dfs_pred, k=10)

    assert recall_at_k(df_true, df_pred,
                       k=10) == pytest.approx(eval_spark4.recall_at_k(), TOL)
    assert precision_at_k(df_true, df_pred,
                          k=10) == pytest.approx(eval_spark4.precision_at_k(),
                                                 TOL)
    assert ndcg_at_k(df_true, df_pred,
                     k=10) == pytest.approx(eval_spark4.ndcg_at_k(), TOL)
    assert map_at_k(df_true, df_pred,
                    k=10) == pytest.approx(eval_spark4.map_at_k(), TOL)
Ejemplo n.º 8
0
    logger.debug(f"Prediction: {col_prediction}")
    logger.debug(f"Relevancy:  {relevancy_method}")
    logger.debug(f"K:          {k}")
    logger.debug(f"Threshold:  {threshold}")

    logger.debug(f"Rating True path: {args.rating_true}")
    logger.debug(f"Shape of loaded DataFrame: {rating_true.shape}")
    logger.debug(f"Rating Pred path: {args.rating_pred}")
    logger.debug(f"Shape of loaded DataFrame: {rating_pred.shape}")

    eval_ndcg = ndcg_at_k(
        rating_true,
        rating_pred,
        col_user=col_user,
        col_item=col_item,
        col_rating=col_rating,
        col_prediction=col_prediction,
        relevancy_method=relevancy_method,
        k=k,
        threshold=threshold,
    )

    logger.debug(f"Score: {eval_ndcg}")

    # Log to AzureML dashboard
    run = Run.get_context()
    run.parent.log("nDCG at {}".format(k), eval_ndcg)

    score_result = pd.DataFrame({"ndcg_at_k": [eval_ndcg]})
    save_data_frame_to_directory(
        args.score_result,