def test_python_errors(python_data): rating_true, rating_pred, _ = python_data(binary_rating=False) with pytest.raises(ValueError): rmse(rating_true, rating_true, col_user="******") with pytest.raises(ValueError): mae(rating_pred, rating_pred, col_rating=PREDICTION_COL, col_user="******") with pytest.raises(ValueError): rsquared(rating_true, rating_pred, col_item="not_item") with pytest.raises(ValueError): exp_var(rating_pred, rating_pred, col_rating=PREDICTION_COL, col_item="not_item") with pytest.raises(ValueError): precision_at_k(rating_true, rating_pred, col_rating="not_rating") with pytest.raises(ValueError): recall_at_k(rating_true, rating_pred, col_prediction="not_prediction") with pytest.raises(ValueError): ndcg_at_k(rating_true, rating_true, col_user="******") with pytest.raises(ValueError): map_at_k(rating_pred, rating_pred, col_rating=PREDICTION_COL, col_user="******")
def test_python_errors(rating_true, rating_pred): with pytest.raises(ValueError): rmse(rating_true, rating_true, col_user="******") with pytest.raises(ValueError): mae(rating_pred, rating_pred, col_rating=DEFAULT_PREDICTION_COL, col_user="******") with pytest.raises(ValueError): rsquared(rating_true, rating_pred, col_item="not_item") with pytest.raises(ValueError): exp_var(rating_pred, rating_pred, col_rating=DEFAULT_PREDICTION_COL, col_item="not_item") with pytest.raises(ValueError): precision_at_k(rating_true, rating_pred, col_rating="not_rating") with pytest.raises(ValueError): recall_at_k(rating_true, rating_pred, col_prediction="not_prediction") with pytest.raises(ValueError): ndcg_at_k(rating_true, rating_true, col_user="******") with pytest.raises(ValueError): map_at_k(rating_pred, rating_pred, col_rating=DEFAULT_PREDICTION_COL, col_user="******")
def test_python_errors(rating_true, rating_pred): with pytest.raises(ValueError): rmse(rating_true, rating_true, col_user="******") with pytest.raises(ValueError): mae(rating_pred, rating_pred, col_rating=DEFAULT_PREDICTION_COL, col_user="******") with pytest.raises(ValueError): rsquared(rating_true, rating_pred, col_item="not_item") with pytest.raises(ValueError): exp_var( rating_pred, rating_pred, col_rating=DEFAULT_PREDICTION_COL, col_item="not_item" ) with pytest.raises(ValueError): precision_at_k(rating_true, rating_pred, col_rating="not_rating") with pytest.raises(ValueError): recall_at_k(rating_true, rating_pred, col_prediction="not_prediction") with pytest.raises(ValueError): ndcg_at_k(rating_true, rating_true, col_user="******") with pytest.raises(ValueError): map_at_k( rating_pred, rating_pred, col_rating=DEFAULT_PREDICTION_COL, col_user="******" )
def test_python_recall(python_data, target_metrics): rating_true, rating_pred = python_data assert recall_at_k(k=10, rating_true=rating_true, rating_pred=rating_true, col_prediction="rating") == pytest.approx(1, 0.1) assert recall_at_k(rating_true, rating_pred, k=10) == target_metrics["recall"]
def test_python_recall(rating_true, rating_pred, rating_nohit): assert recall_at_k( rating_true=rating_true, rating_pred=rating_true, col_prediction=DEFAULT_RATING_COL, k=10, ) == pytest.approx(1, TOL) assert recall_at_k(rating_true, rating_nohit, k=10) == 0.0 assert recall_at_k(rating_true, rating_pred, k=10) == pytest.approx(0.37777, TOL)
def test_python_recall(rating_true, rating_pred, rating_nohit): assert recall_at_k( rating_true=rating_true, rating_pred=rating_true, col_prediction=DEFAULT_RATING_COL, k=10, ) == pytest.approx(1, TOL) assert recall_at_k(rating_true, rating_nohit, k=10) == 0.0 assert recall_at_k(rating_true, rating_pred, k=10) == pytest.approx(0.37777, TOL)
def test_python_recall(python_data, target_metrics): rating_true, rating_pred, rating_nohit = python_data(binary_rating=False) assert recall_at_k(k=10, rating_true=rating_true, rating_pred=rating_true, col_prediction=DEFAULT_RATING_COL) == pytest.approx( 1, TOL) assert recall_at_k(rating_true, rating_nohit, k=10) == 0.0 assert recall_at_k(rating_true, rating_pred, k=10) == target_metrics["recall"]
def ranking_metrics_python(test, predictions, k=DEFAULT_K): return { "MAP": map_at_k(test, predictions, k=k, **COL_DICT), "nDCG@k": ndcg_at_k(test, predictions, k=k, **COL_DICT), "Precision@k": precision_at_k(test, predictions, k=k, **COL_DICT), "Recall@k": recall_at_k(test, predictions, k=k, **COL_DICT), }
def ranking_metrics_python(test, predictions, k=DEFAULT_K): return { "MAP": map_at_k(test, predictions, k=k, **COL_DICT), "nDCG@k": ndcg_at_k(test, predictions, k=k, **COL_DICT), "Precision@k": precision_at_k(test, predictions, k=k, **COL_DICT), "Recall@k": recall_at_k(test, predictions, k=k, **COL_DICT) }
def ranking_metrics(data_size, data_true, data_pred, time_train, time_test, K): eval_map = map_at_k(data_true, data_pred, col_user="******", col_item="MovieID", col_rating="Rating", col_prediction="prediction", relevancy_method="top_k", k=K) eval_ndcg = ndcg_at_k(data_true, data_pred, col_user="******", col_item="MovieID", col_rating="Rating", col_prediction="prediction", relevancy_method="top_k", k=K) eval_precision = precision_at_k(data_true, data_pred, col_user="******", col_item="MovieID", col_rating="Rating", col_prediction="prediction", relevancy_method="top_k", k=K) eval_recall = recall_at_k(data_true, data_pred, col_user="******", col_item="MovieID", col_rating="Rating", col_prediction="prediction", relevancy_method="top_k", k=K) df_result = pd.DataFrame( { "Dataset": data_size, "K": TOPK, "MAP": eval_map, "nDCG@k": eval_ndcg, "Precision@k": eval_precision, "Recall@k": eval_recall, "Train time (s)": time_train, "Test time (s)": time_test }, index=[0]) return df_result
def test_predict_ranking(rating_true): train_set = cornac.data.Dataset.from_uir(rating_true.itertuples(index=False), seed=42) bpr = cornac.models.BPR(k=100, max_iter=10000, seed=42).fit(train_set) preds = predict_ranking(bpr, rating_true, remove_seen=False) n_users = len(rating_true["userID"].unique()) n_items = len(rating_true["itemID"].unique()) assert preds.shape[0] == n_users * n_items assert set(preds.columns) == {"userID", "itemID", "prediction"} assert preds["userID"].dtypes == rating_true["userID"].dtypes assert preds["itemID"].dtypes == rating_true["itemID"].dtypes # perfect ranking achieved assert 1e-10 > 1 - ndcg_at_k(rating_true, preds) assert 1e-10 > 1 - recall_at_k(rating_true, preds)
def run_eval(self): """Run evaluation on self.data.test. Returns: dict: Results of all metrics in self.metrics. """ topk_scores = self.recommend_k_items(self.data.test, top_k=self.top_k, use_id=True) ret = [] for metric in self.metrics: if metric == "map": ret.append( map_at_k(self.data.test, topk_scores, relevancy_method=None, k=self.top_k)) elif metric == "ndcg": ret.append( ndcg_at_k(self.data.test, topk_scores, relevancy_method=None, k=self.top_k)) elif metric == "precision": ret.append( precision_at_k(self.data.test, topk_scores, relevancy_method=None, k=self.top_k)) elif metric == "recall": ret.append( recall_at_k(self.data.test, topk_scores, relevancy_method=None, k=self.top_k)) return ret
col_rating="Rating", col_prediction="prediction", relevancy_method="top_k", k=args.top_k) eval_precision = precision_at_k(test, top_k, col_user="******", col_item="MovieId", col_rating="Rating", col_prediction="prediction", relevancy_method="top_k", k=args.top_k) eval_recall = recall_at_k(test, top_k, col_user="******", col_item="MovieId", col_rating="Rating", col_prediction="prediction", relevancy_method="top_k", k=args.top_k) run.log("map", eval_map) run.log("ndcg", eval_ndcg) run.log("precision", eval_precision) run.log("recall", eval_recall) # run.log_table("topk", top_k.to_dict()) # automatic upload of everything in ./output folder doesn't work for very large model file # model file has to be saved to a temp location, then uploaded by upload_file function joblib.dump(value=model, filename=MODEL_FILE_NAME) run.upload_file(OUTPUT_FILE_NAME, MODEL_FILE_NAME)
def test_spark_python_match(python_data, spark): # Test on the original data with k = 10. df_true, df_pred = python_data dfs_true = spark.createDataFrame(df_true) dfs_pred = spark.createDataFrame(df_pred) eval_spark1 = SparkRankingEvaluation(dfs_true, dfs_pred, k=10) match1 = [ recall_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark1.recall_at_k(), TOL), precision_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark1.precision_at_k(), TOL), ndcg_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark1.ndcg_at_k(), TOL), map_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark1.map_at_k(), TOL), ] assert all(match1) # Test on the original data with k = 3. dfs_true = spark.createDataFrame(df_true) dfs_pred = spark.createDataFrame(df_pred) eval_spark2 = SparkRankingEvaluation(dfs_true, dfs_pred, k=3) match2 = [ recall_at_k(df_true, df_pred, k=3) == pytest.approx(eval_spark2.recall_at_k(), TOL), precision_at_k(df_true, df_pred, k=3) == pytest.approx(eval_spark2.precision_at_k(), TOL), ndcg_at_k(df_true, df_pred, k=3) == pytest.approx(eval_spark2.ndcg_at_k(), TOL), map_at_k(df_true, df_pred, k=3) == pytest.approx(eval_spark2.map_at_k(), TOL), ] assert all(match2) # Remove the first row from the original data. df_pred = df_pred[1:-1] dfs_true = spark.createDataFrame(df_true) dfs_pred = spark.createDataFrame(df_pred) eval_spark3 = SparkRankingEvaluation(dfs_true, dfs_pred, k=10) match3 = [ recall_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark3.recall_at_k(), TOL), precision_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark3.precision_at_k(), TOL), ndcg_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark3.ndcg_at_k(), TOL), map_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark3.map_at_k(), TOL), ] assert all(match3) # Test with one user df_pred = df_pred[df_pred["userID"] == 3] df_true = df_true[df_true["userID"] == 3] dfs_true = spark.createDataFrame(df_true) dfs_pred = spark.createDataFrame(df_pred) eval_spark4 = SparkRankingEvaluation(dfs_true, dfs_pred, k=10) match4 = [ recall_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark4.recall_at_k(), TOL), precision_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark4.precision_at_k(), TOL), ndcg_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark4.ndcg_at_k(), TOL), map_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark4.map_at_k(), TOL), ] assert all(match4)
on=["userID", "itemID"], how="outer") batch_predictions = batch_merged[batch_merged.rating.isnull()].drop( 'rating', axis=1) batch_test = test[test["userID"].isin(batch_users)] # eval_map = map_at_k(batch_test, batch_predictions, col_prediction='prediction', k=TOP_K) eval_ndcg = ndcg_at_k(batch_test, batch_predictions, col_prediction='prediction', k=TOP_K) eval_precision = precision_at_k(batch_test, batch_predictions, col_prediction='prediction', k=TOP_K) eval_recall = recall_at_k(batch_test, batch_predictions, col_prediction='prediction', k=TOP_K) ndcg.append(eval_ndcg) hr.append(eval_precision) recall.append(eval_recall) del batch_train del batch_predictions del batch_merged del batch_test gc.collect() print( #"MAP:\t%f" % eval_map, "NDCG:\t%f" % np.mean(ndcg), "Precision@K:\t%f" % np.mean(hr), "Recall@K:\t%f" % np.mean(recall), sep='\n')
logger.debug(f"Prediction: {col_prediction}") logger.debug(f"Relevancy: {relevancy_method}") logger.debug(f"K: {k}") logger.debug(f"Threshold: {threshold}") logger.debug(f"Rating True path: {args.rating_true}") logger.debug(f"Shape of loaded DataFrame: {rating_true.shape}") logger.debug(f"Rating Pred path: {args.rating_pred}") logger.debug(f"Shape of loaded DataFrame: {rating_pred.shape}") eval_recall = recall_at_k( rating_true, rating_pred, col_user=col_user, col_item=col_item, col_rating=col_rating, col_prediction=col_prediction, relevancy_method=relevancy_method, k=k, threshold=threshold, ) logger.debug(f"Score: {eval_recall}") # Log to AzureML dashboard run = Run.get_context() run.parent.log("Recall at {}".format(k), eval_recall) score_result = pd.DataFrame({"recall_at_k": [eval_recall]}) save_data_frame_to_directory( args.score_result,
k=TOP_K) eval_ndcg = ndcg_at_k(test, top_k, col_user='******', col_item='itemID', col_rating='rating', k=TOP_K) eval_precision = precision_at_k(test, top_k, col_user='******', col_item='itemID', col_rating='rating', k=TOP_K) eval_recall = recall_at_k(test, top_k, col_user='******', col_item='itemID', col_rating='rating', k=TOP_K) eval_rmse = rmse(test, top_k, col_user='******', col_item='itemID', col_rating='rating') eval_mae = mae(test, top_k, col_user='******', col_item='itemID', col_rating='rating') eval_rsquared = rsquared(test, top_k, col_user='******',
def test_spark_python_match(python_data, spark): # Test on the original data with k = 10. df_true, df_pred = python_data dfs_true = spark.createDataFrame(df_true) dfs_pred = spark.createDataFrame(df_pred) eval_spark1 = SparkRankingEvaluation(dfs_true, dfs_pred, k=10) match1 = [ recall_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark1.recall_at_k(), TOL), precision_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark1.precision_at_k(), TOL), ndcg_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark1.ndcg_at_k(), TOL), map_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark1.map_at_k(), TOL), ] assert all(match1) # Test on the original data with k = 3. dfs_true = spark.createDataFrame(df_true) dfs_pred = spark.createDataFrame(df_pred) eval_spark2 = SparkRankingEvaluation(dfs_true, dfs_pred, k=3) match2 = [ recall_at_k(df_true, df_pred, k=3) == pytest.approx(eval_spark2.recall_at_k(), TOL), precision_at_k(df_true, df_pred, k=3) == pytest.approx(eval_spark2.precision_at_k(), TOL), ndcg_at_k(df_true, df_pred, k=3) == pytest.approx(eval_spark2.ndcg_at_k(), TOL), map_at_k(df_true, df_pred, k=3) == pytest.approx(eval_spark2.map_at_k(), TOL), ] assert all(match2) # Remove the first row from the original data. df_pred = df_pred[1:-1] dfs_true = spark.createDataFrame(df_true) dfs_pred = spark.createDataFrame(df_pred) eval_spark3 = SparkRankingEvaluation(dfs_true, dfs_pred, k=10) match3 = [ recall_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark3.recall_at_k(), TOL), precision_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark3.precision_at_k(), TOL), ndcg_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark3.ndcg_at_k(), TOL), map_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark3.map_at_k(), TOL), ] assert all(match3) # Test with one user df_pred = df_pred.loc[df_pred["userID"] == 3] df_true = df_true.loc[df_true["userID"] == 3] dfs_true = spark.createDataFrame(df_true) dfs_pred = spark.createDataFrame(df_pred) eval_spark4 = SparkRankingEvaluation(dfs_true, dfs_pred, k=10) match4 = [ recall_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark4.recall_at_k(), TOL), precision_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark4.precision_at_k(), TOL), ndcg_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark4.ndcg_at_k(), TOL), map_at_k(df_true, df_pred, k=10) == pytest.approx(eval_spark4.map_at_k(), TOL), ] assert all(match4)
# st.write("Data_Test:",data_test) eval_map = map_at_k(data_test, top_k_scores, col_user=USER, col_item=ITEM, col_rating=RATING, col_prediction=PREDICTION, relevancy_method="top_k", k=TOP_K) st.write("MAP:",eval_map) eval_ndcg = ndcg_at_k(data_test, top_k_scores, col_user=USER, col_item=ITEM, col_rating=RATING, col_prediction=PREDICTION, relevancy_method="top_k", k=TOP_K) st.write("NDCG:",eval_ndcg) eval_precision = precision_at_k(data_test, top_k_scores, col_user=USER, col_item=ITEM, col_rating=RATING, col_prediction=PREDICTION, relevancy_method="top_k", k=TOP_K) st.write("Precision:",eval_precision) eval_recall = recall_at_k(data_test, top_k_scores, col_user=USER, col_item=ITEM, col_rating=RATING, col_prediction=PREDICTION, relevancy_method="top_k", k=TOP_K) st.write("Recall:",eval_recall) print("Model:\t" + learn.__class__.__name__, "Top K:\t%d" % TOP_K, "MAP:\t%f" % eval_map, "NDCG:\t%f" % eval_ndcg, "Precision@K:\t%f" % eval_precision, "Recall@K:\t%f" % eval_recall, sep='\n') # scores = score(learner, # test_df=data_test.copy(), # user_col=USER, # item_col=ITEM, # prediction_col=PREDICTION)
model.fit(train) top_k = model.recommend_k_items(test, remove_seen=True) top_k_with_titles = (top_k.join( data[['MovieId', 'Title']].drop_duplicates().set_index('MovieId'), on='MovieId', how='inner').sort_values(by=['UserId', 'Prediction'], ascending=False)) args = [test, top_k] kwargs = dict(col_user='******', col_item='MovieId', col_rating='Rating', col_prediction='Prediction', relevancy_method='top_k', k=TOP_K) eval_map = map_at_k(*args, **kwargs) eval_ndcg = ndcg_at_k(*args, **kwargs) eval_precision = precision_at_k(*args, **kwargs) eval_recall = recall_at_k(*args, **kwargs) print(f"Model:", f"Top K:\t\t {TOP_K}", f"MAP:\t\t {eval_map:f}", f"NDCG:\t\t {eval_ndcg:f}", f"Precision@K:\t {eval_precision:f}", f"Recall@K:\t {eval_recall:f}", sep='\n')