def test_python_errors(python_data):
    rating_true, rating_pred, _ = python_data(binary_rating=False)

    with pytest.raises(ValueError):
        rmse(rating_true, rating_true, col_user="******")

    with pytest.raises(ValueError):
        mae(rating_pred,
            rating_pred,
            col_rating=PREDICTION_COL,
            col_user="******")

    with pytest.raises(ValueError):
        rsquared(rating_true, rating_pred, col_item="not_item")

    with pytest.raises(ValueError):
        exp_var(rating_pred,
                rating_pred,
                col_rating=PREDICTION_COL,
                col_item="not_item")

    with pytest.raises(ValueError):
        precision_at_k(rating_true, rating_pred, col_rating="not_rating")

    with pytest.raises(ValueError):
        recall_at_k(rating_true, rating_pred, col_prediction="not_prediction")

    with pytest.raises(ValueError):
        ndcg_at_k(rating_true, rating_true, col_user="******")

    with pytest.raises(ValueError):
        map_at_k(rating_pred,
                 rating_pred,
                 col_rating=PREDICTION_COL,
                 col_user="******")
def test_python_errors(rating_true, rating_pred):
    with pytest.raises(ValueError):
        rmse(rating_true, rating_true, col_user="******")

    with pytest.raises(ValueError):
        mae(rating_pred, rating_pred, col_rating=DEFAULT_PREDICTION_COL, col_user="******")

    with pytest.raises(ValueError):
        rsquared(rating_true, rating_pred, col_item="not_item")

    with pytest.raises(ValueError):
        exp_var(
            rating_pred, rating_pred, col_rating=DEFAULT_PREDICTION_COL, col_item="not_item"
        )

    with pytest.raises(ValueError):
        precision_at_k(rating_true, rating_pred, col_rating="not_rating")

    with pytest.raises(ValueError):
        recall_at_k(rating_true, rating_pred, col_prediction="not_prediction")

    with pytest.raises(ValueError):
        ndcg_at_k(rating_true, rating_true, col_user="******")

    with pytest.raises(ValueError):
        map_at_k(
            rating_pred, rating_pred, col_rating=DEFAULT_PREDICTION_COL, col_user="******"
        )
Ejemplo n.º 3
0
def test_python_errors(rating_true, rating_pred):
    with pytest.raises(ValueError):
        rmse(rating_true, rating_true, col_user="******")

    with pytest.raises(ValueError):
        mae(rating_pred,
            rating_pred,
            col_rating=DEFAULT_PREDICTION_COL,
            col_user="******")

    with pytest.raises(ValueError):
        rsquared(rating_true, rating_pred, col_item="not_item")

    with pytest.raises(ValueError):
        exp_var(rating_pred,
                rating_pred,
                col_rating=DEFAULT_PREDICTION_COL,
                col_item="not_item")

    with pytest.raises(ValueError):
        precision_at_k(rating_true, rating_pred, col_rating="not_rating")

    with pytest.raises(ValueError):
        recall_at_k(rating_true, rating_pred, col_prediction="not_prediction")

    with pytest.raises(ValueError):
        ndcg_at_k(rating_true, rating_true, col_user="******")

    with pytest.raises(ValueError):
        map_at_k(rating_pred,
                 rating_pred,
                 col_rating=DEFAULT_PREDICTION_COL,
                 col_user="******")
Ejemplo n.º 4
0
def test_python_ndcg_at_k(python_data, target_metrics):
    rating_true, rating_pred = python_data
    assert (ndcg_at_k(
        k=10,
        rating_true=rating_true,
        rating_pred=rating_true,
        col_prediction="rating",
    ) == 1)
    assert ndcg_at_k(rating_true, rating_pred, k=10) == target_metrics["ndcg"]
Ejemplo n.º 5
0
def test_python_ndcg_at_k(rating_true, rating_pred, rating_nohit):
    assert (ndcg_at_k(
        rating_true=rating_true,
        rating_pred=rating_true,
        col_prediction=DEFAULT_RATING_COL,
        k=10,
    ) == 1)
    assert ndcg_at_k(rating_true, rating_nohit, k=10) == 0.0
    assert ndcg_at_k(rating_true, rating_pred,
                     k=10) == pytest.approx(0.38172, TOL)
def test_python_ndcg_at_k(python_data, target_metrics):
    rating_true, rating_pred, rating_nohit = python_data(binary_rating=False)

    assert ndcg_at_k(
        k=10,
        rating_true=rating_true,
        rating_pred=rating_true,
        col_prediction=DEFAULT_RATING_COL,
    ) == 1
    assert ndcg_at_k(rating_true, rating_nohit, k=10) == 0.0
    assert ndcg_at_k(rating_true, rating_pred, k=10) == target_metrics["ndcg"]
def test_python_ndcg_at_k(rating_true, rating_pred, rating_nohit):
    assert (
        ndcg_at_k(
            rating_true=rating_true,
            rating_pred=rating_true,
            col_prediction=DEFAULT_RATING_COL,
            k=10,
        )
        == 1
    )
    assert ndcg_at_k(rating_true, rating_nohit, k=10) == 0.0
    assert ndcg_at_k(rating_true, rating_pred, k=10) == pytest.approx(0.38172, TOL)
Ejemplo n.º 8
0
    def on_epoch_end(self, batch, logs={}):
        """At the end of each epoch calculate NDCG@k of the validation set.
        If the model performance is improved, the model weights are saved. 
        Update the list of validation NDCG@k by adding obtained value."""
        # recommend top k items based on training part of validation set
        top_k = self.recommend_k_items(x=self.val_tr, 
                                       k=self.k,
                                       remove_seen=True
                                       )
        
        # convert recommendations from sparse matrix to dataframe
        top_k_df = self.mapper.map_back_sparse(top_k, kind='prediction')
        test_df = self.mapper.map_back_sparse(self.val_te, kind='ratings')

        # calculate NDCG@k 
        NDCG = ndcg_at_k(test_df, top_k_df, col_prediction='prediction', k=self.k)

        # check if there is an improvement in NDCG, if so, update the weights of the saved model 
        if NDCG > self.best_ndcg:
            self.best_ndcg = NDCG

            # save the weights of the optimal model
            if self.save_path is not None:
                self.model.save(self.save_path)

        self._data.append(NDCG)
Ejemplo n.º 9
0
def ranking_metrics_python(test, predictions, k=DEFAULT_K):
    return {
        "MAP": map_at_k(test, predictions, k=k, **COL_DICT),
        "nDCG@k": ndcg_at_k(test, predictions, k=k, **COL_DICT),
        "Precision@k": precision_at_k(test, predictions, k=k, **COL_DICT),
        "Recall@k": recall_at_k(test, predictions, k=k, **COL_DICT),
    }
Ejemplo n.º 10
0
def ranking_metrics_python(test, predictions, k=DEFAULT_K):
    return {
        "MAP": map_at_k(test, predictions, k=k, **COL_DICT),
        "nDCG@k": ndcg_at_k(test, predictions, k=k, **COL_DICT),
        "Precision@k": precision_at_k(test, predictions, k=k, **COL_DICT),
        "Recall@k": recall_at_k(test, predictions, k=k, **COL_DICT)
    }
Ejemplo n.º 11
0
def ranking_metrics(data_size, data_true, data_pred, time_train, time_test, K):

    eval_map = map_at_k(data_true,
                        data_pred,
                        col_user="******",
                        col_item="MovieID",
                        col_rating="Rating",
                        col_prediction="prediction",
                        relevancy_method="top_k",
                        k=K)

    eval_ndcg = ndcg_at_k(data_true,
                          data_pred,
                          col_user="******",
                          col_item="MovieID",
                          col_rating="Rating",
                          col_prediction="prediction",
                          relevancy_method="top_k",
                          k=K)

    eval_precision = precision_at_k(data_true,
                                    data_pred,
                                    col_user="******",
                                    col_item="MovieID",
                                    col_rating="Rating",
                                    col_prediction="prediction",
                                    relevancy_method="top_k",
                                    k=K)

    eval_recall = recall_at_k(data_true,
                              data_pred,
                              col_user="******",
                              col_item="MovieID",
                              col_rating="Rating",
                              col_prediction="prediction",
                              relevancy_method="top_k",
                              k=K)

    df_result = pd.DataFrame(
        {
            "Dataset": data_size,
            "K": TOPK,
            "MAP": eval_map,
            "nDCG@k": eval_ndcg,
            "Precision@k": eval_precision,
            "Recall@k": eval_recall,
            "Train time (s)": time_train,
            "Test time (s)": time_test
        },
        index=[0])

    return df_result
Ejemplo n.º 12
0
def test_predict_ranking(rating_true):
    train_set = cornac.data.Dataset.from_uir(rating_true.itertuples(index=False), seed=42)
    bpr = cornac.models.BPR(k=100, max_iter=10000, seed=42).fit(train_set)

    preds = predict_ranking(bpr, rating_true, remove_seen=False)

    n_users = len(rating_true["userID"].unique())
    n_items = len(rating_true["itemID"].unique())
    assert preds.shape[0] == n_users * n_items

    assert set(preds.columns) == {"userID", "itemID", "prediction"}
    assert preds["userID"].dtypes == rating_true["userID"].dtypes
    assert preds["itemID"].dtypes == rating_true["itemID"].dtypes
    # perfect ranking achieved
    assert 1e-10 > 1 - ndcg_at_k(rating_true, preds)
    assert 1e-10 > 1 - recall_at_k(rating_true, preds)
Ejemplo n.º 13
0
    def run_eval(self):
        """Run evaluation on self.data.test.

        Returns:
            dict: Results of all metrics in self.metrics.
        """
        topk_scores = self.recommend_k_items(self.data.test,
                                             top_k=self.top_k,
                                             use_id=True)
        ret = []
        for metric in self.metrics:
            if metric == "map":
                ret.append(
                    map_at_k(self.data.test,
                             topk_scores,
                             relevancy_method=None,
                             k=self.top_k))
            elif metric == "ndcg":
                ret.append(
                    ndcg_at_k(self.data.test,
                              topk_scores,
                              relevancy_method=None,
                              k=self.top_k))
            elif metric == "precision":
                ret.append(
                    precision_at_k(self.data.test,
                                   topk_scores,
                                   relevancy_method=None,
                                   k=self.top_k))
            elif metric == "recall":
                ret.append(
                    recall_at_k(self.data.test,
                                topk_scores,
                                relevancy_method=None,
                                k=self.top_k))
        return ret
Ejemplo n.º 14
0
top_k['UserId'] = pd.to_numeric(top_k['UserId'])
top_k['MovieId'] = pd.to_numeric(top_k['MovieId'])

# evaluate
eval_map = map_at_k(test,
                    top_k,
                    col_user="******",
                    col_item="MovieId",
                    col_rating="Rating",
                    col_prediction="prediction",
                    relevancy_method="top_k",
                    k=args.top_k)
eval_ndcg = ndcg_at_k(test,
                      top_k,
                      col_user="******",
                      col_item="MovieId",
                      col_rating="Rating",
                      col_prediction="prediction",
                      relevancy_method="top_k",
                      k=args.top_k)
eval_precision = precision_at_k(test,
                                top_k,
                                col_user="******",
                                col_item="MovieId",
                                col_rating="Rating",
                                col_prediction="prediction",
                                relevancy_method="top_k",
                                k=args.top_k)
eval_recall = recall_at_k(test,
                          top_k,
                          col_user="******",
                          col_item="MovieId",
Ejemplo n.º 15
0
def test_spark_python_match(python_data, spark):
    # Test on the original data with k = 10.

    df_true, df_pred = python_data

    dfs_true = spark.createDataFrame(df_true)
    dfs_pred = spark.createDataFrame(df_pred)

    eval_spark1 = SparkRankingEvaluation(dfs_true, dfs_pred, k=10)

    match1 = [
        recall_at_k(df_true, df_pred,
                    k=10) == pytest.approx(eval_spark1.recall_at_k(), TOL),
        precision_at_k(df_true, df_pred,
                       k=10) == pytest.approx(eval_spark1.precision_at_k(),
                                              TOL),
        ndcg_at_k(df_true, df_pred,
                  k=10) == pytest.approx(eval_spark1.ndcg_at_k(), TOL),
        map_at_k(df_true, df_pred,
                 k=10) == pytest.approx(eval_spark1.map_at_k(), TOL),
    ]

    assert all(match1)

    # Test on the original data with k = 3.

    dfs_true = spark.createDataFrame(df_true)
    dfs_pred = spark.createDataFrame(df_pred)

    eval_spark2 = SparkRankingEvaluation(dfs_true, dfs_pred, k=3)

    match2 = [
        recall_at_k(df_true, df_pred,
                    k=3) == pytest.approx(eval_spark2.recall_at_k(), TOL),
        precision_at_k(df_true, df_pred,
                       k=3) == pytest.approx(eval_spark2.precision_at_k(),
                                             TOL),
        ndcg_at_k(df_true, df_pred,
                  k=3) == pytest.approx(eval_spark2.ndcg_at_k(), TOL),
        map_at_k(df_true, df_pred,
                 k=3) == pytest.approx(eval_spark2.map_at_k(), TOL),
    ]

    assert all(match2)

    # Remove the first row from the original data.

    df_pred = df_pred[1:-1]

    dfs_true = spark.createDataFrame(df_true)
    dfs_pred = spark.createDataFrame(df_pred)

    eval_spark3 = SparkRankingEvaluation(dfs_true, dfs_pred, k=10)

    match3 = [
        recall_at_k(df_true, df_pred,
                    k=10) == pytest.approx(eval_spark3.recall_at_k(), TOL),
        precision_at_k(df_true, df_pred,
                       k=10) == pytest.approx(eval_spark3.precision_at_k(),
                                              TOL),
        ndcg_at_k(df_true, df_pred,
                  k=10) == pytest.approx(eval_spark3.ndcg_at_k(), TOL),
        map_at_k(df_true, df_pred,
                 k=10) == pytest.approx(eval_spark3.map_at_k(), TOL),
    ]

    assert all(match3)

    # Test with one user

    df_pred = df_pred[df_pred["userID"] == 3]
    df_true = df_true[df_true["userID"] == 3]

    dfs_true = spark.createDataFrame(df_true)
    dfs_pred = spark.createDataFrame(df_pred)

    eval_spark4 = SparkRankingEvaluation(dfs_true, dfs_pred, k=10)

    match4 = [
        recall_at_k(df_true, df_pred,
                    k=10) == pytest.approx(eval_spark4.recall_at_k(), TOL),
        precision_at_k(df_true, df_pred,
                       k=10) == pytest.approx(eval_spark4.precision_at_k(),
                                              TOL),
        ndcg_at_k(df_true, df_pred,
                  k=10) == pytest.approx(eval_spark4.ndcg_at_k(), TOL),
        map_at_k(df_true, df_pred,
                 k=10) == pytest.approx(eval_spark4.map_at_k(), TOL),
    ]

    assert all(match4)
Ejemplo n.º 16
0
    logger.debug(f"Prediction: {col_prediction}")
    logger.debug(f"Relevancy:  {relevancy_method}")
    logger.debug(f"K:          {k}")
    logger.debug(f"Threshold:  {threshold}")

    logger.debug(f"Rating True path: {args.rating_true}")
    logger.debug(f"Shape of loaded DataFrame: {rating_true.shape}")
    logger.debug(f"Rating Pred path: {args.rating_pred}")
    logger.debug(f"Shape of loaded DataFrame: {rating_pred.shape}")

    eval_ndcg = ndcg_at_k(
        rating_true,
        rating_pred,
        col_user=col_user,
        col_item=col_item,
        col_rating=col_rating,
        col_prediction=col_prediction,
        relevancy_method=relevancy_method,
        k=k,
        threshold=threshold,
    )

    logger.debug(f"Score: {eval_ndcg}")

    # Log to AzureML dashboard
    run = Run.get_context()
    run.parent.log("nDCG at {}".format(k), eval_ndcg)

    score_result = pd.DataFrame({"ndcg_at_k": [eval_ndcg]})
    save_data_frame_to_directory(
        args.score_result,
Ejemplo n.º 17
0
 start = batch_idx * BATCH_SIZE
 end = min((batch_idx + 1) * BATCH_SIZE, n_users)
 batch_users = all_users[start:end]
 batch_predictions = all_predictions[all_predictions["userID"].isin(
     batch_users)]
 batch_train = train[train["userID"].isin(batch_users)]
 batch_merged = pd.merge(batch_train,
                         batch_predictions,
                         on=["userID", "itemID"],
                         how="outer")
 batch_predictions = batch_merged[batch_merged.rating.isnull()].drop(
     'rating', axis=1)
 batch_test = test[test["userID"].isin(batch_users)]
 # eval_map = map_at_k(batch_test, batch_predictions, col_prediction='prediction', k=TOP_K)
 eval_ndcg = ndcg_at_k(batch_test,
                       batch_predictions,
                       col_prediction='prediction',
                       k=TOP_K)
 eval_precision = precision_at_k(batch_test,
                                 batch_predictions,
                                 col_prediction='prediction',
                                 k=TOP_K)
 eval_recall = recall_at_k(batch_test,
                           batch_predictions,
                           col_prediction='prediction',
                           k=TOP_K)
 ndcg.append(eval_ndcg)
 hr.append(eval_precision)
 recall.append(eval_recall)
 del batch_train
 del batch_predictions
 del batch_merged
Ejemplo n.º 18
0
with Timer() as test_time:
    top_k = model.recommend_k_items(test, remove_seen=True)

# print("Took {} seconds for prediction.".format(test_time.interval))

# top_k.head()

eval_map = map_at_k(test,
                    top_k,
                    col_user='******',
                    col_item='itemID',
                    col_rating='rating',
                    k=TOP_K)
eval_ndcg = ndcg_at_k(test,
                      top_k,
                      col_user='******',
                      col_item='itemID',
                      col_rating='rating',
                      k=TOP_K)
eval_precision = precision_at_k(test,
                                top_k,
                                col_user='******',
                                col_item='itemID',
                                col_rating='rating',
                                k=TOP_K)
eval_recall = recall_at_k(test,
                          top_k,
                          col_user='******',
                          col_item='itemID',
                          col_rating='rating',
                          k=TOP_K)
eval_rmse = rmse(test,
def test_spark_python_match(python_data, spark):
    # Test on the original data with k = 10.
    df_true, df_pred = python_data

    dfs_true = spark.createDataFrame(df_true)
    dfs_pred = spark.createDataFrame(df_pred)

    eval_spark1 = SparkRankingEvaluation(dfs_true, dfs_pred, k=10)

    match1 = [
        recall_at_k(df_true, df_pred, k=10)
        == pytest.approx(eval_spark1.recall_at_k(), TOL),
        precision_at_k(df_true, df_pred, k=10)
        == pytest.approx(eval_spark1.precision_at_k(), TOL),
        ndcg_at_k(df_true, df_pred, k=10)
        == pytest.approx(eval_spark1.ndcg_at_k(), TOL),
        map_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark1.map_at_k(), TOL),
    ]
    assert all(match1)

    # Test on the original data with k = 3.
    dfs_true = spark.createDataFrame(df_true)
    dfs_pred = spark.createDataFrame(df_pred)

    eval_spark2 = SparkRankingEvaluation(dfs_true, dfs_pred, k=3)

    match2 = [
        recall_at_k(df_true, df_pred, k=3)
        == pytest.approx(eval_spark2.recall_at_k(), TOL),
        precision_at_k(df_true, df_pred, k=3)
        == pytest.approx(eval_spark2.precision_at_k(), TOL),
        ndcg_at_k(df_true, df_pred, k=3) == pytest.approx(eval_spark2.ndcg_at_k(), TOL),
        map_at_k(df_true, df_pred, k=3) == pytest.approx(eval_spark2.map_at_k(), TOL),
    ]
    assert all(match2)

    # Remove the first row from the original data.
    df_pred = df_pred[1:-1]

    dfs_true = spark.createDataFrame(df_true)
    dfs_pred = spark.createDataFrame(df_pred)

    eval_spark3 = SparkRankingEvaluation(dfs_true, dfs_pred, k=10)

    match3 = [
        recall_at_k(df_true, df_pred, k=10)
        == pytest.approx(eval_spark3.recall_at_k(), TOL),
        precision_at_k(df_true, df_pred, k=10)
        == pytest.approx(eval_spark3.precision_at_k(), TOL),
        ndcg_at_k(df_true, df_pred, k=10)
        == pytest.approx(eval_spark3.ndcg_at_k(), TOL),
        map_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark3.map_at_k(), TOL),
    ]
    assert all(match3)

    # Test with one user
    df_pred = df_pred.loc[df_pred["userID"] == 3]
    df_true = df_true.loc[df_true["userID"] == 3]

    dfs_true = spark.createDataFrame(df_true)
    dfs_pred = spark.createDataFrame(df_pred)

    eval_spark4 = SparkRankingEvaluation(dfs_true, dfs_pred, k=10)

    match4 = [
        recall_at_k(df_true, df_pred, k=10)
        == pytest.approx(eval_spark4.recall_at_k(), TOL),
        precision_at_k(df_true, df_pred, k=10)
        == pytest.approx(eval_spark4.precision_at_k(), TOL),
        ndcg_at_k(df_true, df_pred, k=10)
        == pytest.approx(eval_spark4.ndcg_at_k(), TOL),
        map_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark4.map_at_k(), TOL),
    ]
    assert all(match4)
Ejemplo n.º 20
0
        top_k_scores[USER] = top_k_scores['UserId']
        top_k_scores[ITEM] = top_k_scores['SnackId']
        top_k_scores[PREDICTION] = top_k_scores['Prediction']

        top_k_scores.head()
       # st.write("Top k Scores:",top_k_scores)

        data_test.head()
      #  st.write("Data_Test:",data_test)

        eval_map = map_at_k(data_test, top_k_scores, col_user=USER, col_item=ITEM, 
                            col_rating=RATING, col_prediction=PREDICTION,
                            relevancy_method="top_k", k=TOP_K)
        st.write("MAP:",eval_map)
        eval_ndcg = ndcg_at_k(data_test, top_k_scores, col_user=USER, col_item=ITEM, 
                              col_rating=RATING, col_prediction=PREDICTION, 
                              relevancy_method="top_k", k=TOP_K)
        st.write("NDCG:",eval_ndcg)
        eval_precision = precision_at_k(data_test, top_k_scores, col_user=USER, col_item=ITEM, 
                                        col_rating=RATING, col_prediction=PREDICTION, 
                                        relevancy_method="top_k", k=TOP_K)
        st.write("Precision:",eval_precision)
        eval_recall = recall_at_k(data_test, top_k_scores, col_user=USER, col_item=ITEM, 
                                  col_rating=RATING, col_prediction=PREDICTION, 
                                  relevancy_method="top_k", k=TOP_K)
        st.write("Recall:",eval_recall)
        print("Model:\t" + learn.__class__.__name__,
              "Top K:\t%d" % TOP_K,
              "MAP:\t%f" % eval_map,
              "NDCG:\t%f" % eval_ndcg,
              "Precision@K:\t%f" % eval_precision,
Ejemplo n.º 21
0
    model.fit(train)

    top_k = model.recommend_k_items(test, remove_seen=True)

    top_k_with_titles = (top_k.join(
        data[['MovieId', 'Title']].drop_duplicates().set_index('MovieId'),
        on='MovieId',
        how='inner').sort_values(by=['UserId', 'Prediction'], ascending=False))

    args = [test, top_k]
    kwargs = dict(col_user='******',
                  col_item='MovieId',
                  col_rating='Rating',
                  col_prediction='Prediction',
                  relevancy_method='top_k',
                  k=TOP_K)

    eval_map = map_at_k(*args, **kwargs)
    eval_ndcg = ndcg_at_k(*args, **kwargs)
    eval_precision = precision_at_k(*args, **kwargs)
    eval_recall = recall_at_k(*args, **kwargs)

    print(f"Model:",
          f"Top K:\t\t {TOP_K}",
          f"MAP:\t\t {eval_map:f}",
          f"NDCG:\t\t {eval_ndcg:f}",
          f"Precision@K:\t {eval_precision:f}",
          f"Recall@K:\t {eval_recall:f}",
          sep='\n')