def test_eval_format(): y_true = np.array([0, 0, 1, 1, 0]) y_pred = np.array([0, 1, 0, 1, 0]) result = classification_report(y_true, y_pred) assert eval_format( tips="test_eval_format", iteration=10, train_time=137.1234, loss_name_value={"loss": 2.59}, eval_name_value=result, extra_info={"a": 123} ) == a.strip("\n") y_true = [3, -0.5, 2, 7] y_pred = [2.5, 0.0, 2, 8] result = regression_report(y_true, y_pred) assert eval_format( eval_name_value=result, dump_file=None, keep="msg" ) == b.strip() assert eval_format( eval_name_value=result, dump_file=None, keep=None ) == b.strip() assert eval_format( eval_name_value=result, dump_file=None, keep={"msg", "data"} )[0] == b.strip()
def eval_f(_net, test_data, *args, **kwargs): y_true = [] y_pred = [] for x, y in test_data: pred = _net(x).argmax(-1).tolist() y_pred.extend(pred) y_true.extend(y.tolist()) return classification_report(y_true, y_pred)
def eval_f(_net, test_data, ctx=mx.cpu(), *args, **kwargs): y_true = [] y_pred = [] for x, y in test_data: x = x.as_in_context(ctx) pred = _net(x).argmax(-1).asnumpy().tolist() y_pred.extend(pred) y_true.extend(y.asnumpy().tolist()) return classification_report(y_true, y_pred)
def eval_f(_net, test_data, ctx=mx.cpu()): k = test_data[1]["k"] k = as_list(k) if k is not None else [] max_k = max(k) if k else None top_k_ground_truth = [] top_k_prediction = [] ground_truth = [] prediction = [] for batch_data in tqdm(test_data[0], "evaluating"): ctx_data = split_and_load(ctx, *batch_data, even_split=False) for (user, item, label) in ctx_data: output = _net(user, item) pred = output label = label.asnumpy().astype("int") pred = pred.asnumpy() ground_truth.append(label.tolist()) prediction.append(pred.tolist()) if max_k: top_k_indices = np.argsort(pred)[::-1] _top_k_indices = top_k_indices[:max_k] padding = [0] * (max_k - len(_top_k_indices)) if len( _top_k_indices) < max_k else [] top_k_prediction.append(pred[_top_k_indices].tolist() + padding) top_k_ground_truth.append(label[_top_k_indices].tolist() + padding) chained_ground_truth = list(chain(*ground_truth)) chained_prediction = list(chain(*prediction)) metrics = { "rmse": mean_squared_error(chained_ground_truth, chained_prediction), "mae": median_absolute_error(chained_ground_truth, chained_prediction), } metrics.update( classification_report( chained_ground_truth, [0 if v < 0.5 else 1 for v in chained_prediction], chained_prediction)) if k: metrics_k = {"ndcg": {}, "HR": {}} for _k in k: metrics_k["ndcg"][_k] = ndcg_score(top_k_ground_truth, top_k_prediction, k=_k) metrics_k["HR"][_k] = _hit_rate(top_k_ground_truth, k=_k) metrics.update(metrics_k) return metrics
def eval_irt(int_df, user_params, item_params): df = _as_df(int_df) user_df = _as_df(user_params) item_df = _as_df(item_params) df = df.merge(user_df, on="user_id") df = df.merge(item_df, on="item_id") labels = df["score"] preds = irt3pl(df["theta"], df["a"], df["b"], df["c"]) print( result_format( classification_report(y_true=labels, y_pred=[0 if p < 0.5 else 1 for p in preds], y_score=preds)))
def eval_f(_net, test_data, ctx=mx.cpu()): ground_truth = [] prediction = [] for batch_data in tqdm(test_data, "evaluating"): ctx_data = split_and_load( ctx, *batch_data, even_split=False ) for (user, item, score) in ctx_data: output = _net(user, item) pred = output ground_truth.extend(score.asnumpy().tolist()) prediction.extend(pred.asnumpy().tolist()) return classification_report( ground_truth, y_pred=[0 if p < 0.5 else 1 for p in prediction], y_score=prediction )
def eval_f(_net, test_data, ctx=mx.cpu()): ground_truth = [] prediction = [] pred_labels = [] for batch_data in tqdm(test_data, "evaluating"): ctx_data = split_and_load(ctx, *batch_data, even_split=False) for (data, data_mask, label, pick_index, label_mask) in ctx_data: output, _ = _net(data, data_mask) output = mx.nd.slice(output, (None, None), (None, -1)) output = mx.nd.pick(output, pick_index) pred = output.asnumpy().tolist() label = label.asnumpy().tolist() for i, length in enumerate(label_mask.asnumpy().tolist()): length = int(length) ground_truth.extend(label[i][:length]) prediction.extend(pred[i][:length]) pred_labels.extend( [0 if p < 0.5 else 1 for p in pred[i][:length]]) return classification_report(ground_truth, y_pred=pred_labels, y_score=prediction)