def test_python_ndcg_at_k(rating_true, rating_pred, rating_nohit): assert (ndcg_at_k( rating_true=rating_true, rating_pred=rating_true, col_prediction=DEFAULT_RATING_COL, k=10, ) == pytest.approx(1.0, TOL)) assert ndcg_at_k(rating_true, rating_nohit, k=10) == 0.0 assert ndcg_at_k(rating_true, rating_pred, k=10) == pytest.approx(0.38172, TOL)
def on_epoch_end(self, batch, logs={}): """At the end of each epoch calculate NDCG@k of the validation set. If the model performance is improved, the model weights are saved. Update the list of validation NDCG@k by adding obtained value """ # recommend top k items based on training part of validation set top_k = self.recommend_k_items(x=self.val_tr, k=self.k, remove_seen=True) # convert recommendations from sparse matrix to dataframe top_k_df = self.mapper.map_back_sparse(top_k, kind="prediction") test_df = self.mapper.map_back_sparse(self.val_te, kind="ratings") # calculate NDCG@k NDCG = ndcg_at_k(test_df, top_k_df, col_prediction="prediction", k=self.k) # check if there is an improvement in NDCG, if so, update the weights of the saved model if NDCG > self.best_ndcg: self.best_ndcg = NDCG # save the weights of the optimal model if self.save_path is not None: self.model.save(self.save_path) self._data.append(NDCG)
def ranking_metrics_python(test, predictions, k=DEFAULT_K): return { "MAP": map_at_k(test, predictions, k=k, **COL_DICT), "nDCG@k": ndcg_at_k(test, predictions, k=k, **COL_DICT), "Precision@k": precision_at_k(test, predictions, k=k, **COL_DICT), "Recall@k": recall_at_k(test, predictions, k=k, **COL_DICT), }
def test_python_errors(rating_true, rating_pred): with pytest.raises(ValueError): rmse(rating_true, rating_true, col_user="******") with pytest.raises(ValueError): mae( rating_pred, rating_pred, col_rating=DEFAULT_PREDICTION_COL, col_user="******", ) with pytest.raises(ValueError): rsquared(rating_true, rating_pred, col_item="not_item") with pytest.raises(ValueError): exp_var( rating_pred, rating_pred, col_rating=DEFAULT_PREDICTION_COL, col_item="not_item", ) with pytest.raises(ValueError): precision_at_k(rating_true, rating_pred, col_rating="not_rating") with pytest.raises(ValueError): recall_at_k(rating_true, rating_pred, col_prediction="not_prediction") with pytest.raises(ValueError): ndcg_at_k(rating_true, rating_true, col_user="******") with pytest.raises(ValueError): map_at_k( rating_pred, rating_pred, col_rating=DEFAULT_PREDICTION_COL, col_user="******", )
def test_recommend_k_items(rating_true): train_set = cornac.data.Dataset.from_uir( rating_true.itertuples(index=False), seed=42) bpr = cornac.models.BPR(k=100, max_iter=10000, seed=42).fit(train_set) preds = predict_ranking(bpr, rating_true, remove_seen=False) n_users = len(rating_true["userID"].unique()) n_items = len(rating_true["itemID"].unique()) assert preds.shape[0] == n_users * n_items assert set(preds.columns) == {"userID", "itemID", "prediction"} assert preds["userID"].dtypes == rating_true["userID"].dtypes assert preds["itemID"].dtypes == rating_true["itemID"].dtypes # perfect ranking achieved assert 1e-10 > 1 - ndcg_at_k(rating_true, preds) assert 1e-10 > 1 - recall_at_k(rating_true, preds)
def run_eval(self): """Run evaluation on self.data.test. Returns: dict: Results of all metrics in `self.metrics`. """ topk_scores = self.recommend_k_items(self.data.test, top_k=self.top_k, use_id=True) ret = [] for metric in self.metrics: if metric == "map": ret.append( map_at_k(self.data.test, topk_scores, relevancy_method=None, k=self.top_k)) elif metric == "ndcg": ret.append( ndcg_at_k(self.data.test, topk_scores, relevancy_method=None, k=self.top_k)) elif metric == "precision": ret.append( precision_at_k(self.data.test, topk_scores, relevancy_method=None, k=self.top_k)) elif metric == "recall": ret.append( recall_at_k(self.data.test, topk_scores, relevancy_method=None, k=self.top_k)) return ret
def test_spark_python_match(python_data, spark): # Test on the original data with k = 10. df_true, df_pred = python_data dfs_true = spark.createDataFrame(df_true) dfs_pred = spark.createDataFrame(df_pred) eval_spark1 = SparkRankingEvaluation(dfs_true, dfs_pred, k=10) assert recall_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark1.recall_at_k(), TOL) assert precision_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark1.precision_at_k(), TOL) assert ndcg_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark1.ndcg_at_k(), TOL) assert map_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark1.map_at_k(), TOL) # Test on the original data with k = 3. dfs_true = spark.createDataFrame(df_true) dfs_pred = spark.createDataFrame(df_pred) eval_spark2 = SparkRankingEvaluation(dfs_true, dfs_pred, k=3) assert recall_at_k(df_true, df_pred, k=3) == pytest.approx(eval_spark2.recall_at_k(), TOL) assert precision_at_k(df_true, df_pred, k=3) == pytest.approx(eval_spark2.precision_at_k(), TOL) assert ndcg_at_k(df_true, df_pred, k=3) == pytest.approx(eval_spark2.ndcg_at_k(), TOL) assert map_at_k(df_true, df_pred, k=3) == pytest.approx(eval_spark2.map_at_k(), TOL) # Remove the first row from the original data. df_pred = df_pred[1:-1] dfs_true = spark.createDataFrame(df_true) dfs_pred = spark.createDataFrame(df_pred) eval_spark3 = SparkRankingEvaluation(dfs_true, dfs_pred, k=10) assert recall_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark3.recall_at_k(), TOL) assert precision_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark3.precision_at_k(), TOL) assert ndcg_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark3.ndcg_at_k(), TOL) assert map_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark3.map_at_k(), TOL) # Test with one user df_pred = df_pred.loc[df_pred["userID"] == 3] df_true = df_true.loc[df_true["userID"] == 3] dfs_true = spark.createDataFrame(df_true) dfs_pred = spark.createDataFrame(df_pred) eval_spark4 = SparkRankingEvaluation(dfs_true, dfs_pred, k=10) assert recall_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark4.recall_at_k(), TOL) assert precision_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark4.precision_at_k(), TOL) assert ndcg_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark4.ndcg_at_k(), TOL) assert map_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark4.map_at_k(), TOL)
logger.debug(f"Prediction: {col_prediction}") logger.debug(f"Relevancy: {relevancy_method}") logger.debug(f"K: {k}") logger.debug(f"Threshold: {threshold}") logger.debug(f"Rating True path: {args.rating_true}") logger.debug(f"Shape of loaded DataFrame: {rating_true.shape}") logger.debug(f"Rating Pred path: {args.rating_pred}") logger.debug(f"Shape of loaded DataFrame: {rating_pred.shape}") eval_ndcg = ndcg_at_k( rating_true, rating_pred, col_user=col_user, col_item=col_item, col_rating=col_rating, col_prediction=col_prediction, relevancy_method=relevancy_method, k=k, threshold=threshold, ) logger.debug(f"Score: {eval_ndcg}") # Log to AzureML dashboard run = Run.get_context() run.parent.log("nDCG at {}".format(k), eval_ndcg) score_result = pd.DataFrame({"ndcg_at_k": [eval_ndcg]}) save_data_frame_to_directory( args.score_result,