Beispiel #1
0
def test_eval_format():
    y_true = np.array([0, 0, 1, 1, 0])
    y_pred = np.array([0, 1, 0, 1, 0])
    result = classification_report(y_true, y_pred)

    assert eval_format(
        tips="test_eval_format",
        iteration=10,
        train_time=137.1234,
        loss_name_value={"loss": 2.59},
        eval_name_value=result,
        extra_info={"a": 123}
    ) == a.strip("\n")

    y_true = [3, -0.5, 2, 7]
    y_pred = [2.5, 0.0, 2, 8]
    result = regression_report(y_true, y_pred)

    assert eval_format(
        eval_name_value=result,
        dump_file=None,
        keep="msg"
    ) == b.strip()

    assert eval_format(
        eval_name_value=result,
        dump_file=None,
        keep=None
    ) == b.strip()

    assert eval_format(
        eval_name_value=result,
        dump_file=None,
        keep={"msg", "data"}
    )[0] == b.strip()
Beispiel #2
0
def eval_f(_net, test_data, *args, **kwargs):
    y_true = []
    y_pred = []
    for x, y in test_data:
        pred = _net(x).argmax(-1).tolist()
        y_pred.extend(pred)
        y_true.extend(y.tolist())
    return classification_report(y_true, y_pred)
Beispiel #3
0
def eval_f(_net, test_data, ctx=mx.cpu(), *args, **kwargs):
    y_true = []
    y_pred = []
    for x, y in test_data:
        x = x.as_in_context(ctx)
        pred = _net(x).argmax(-1).asnumpy().tolist()
        y_pred.extend(pred)
        y_true.extend(y.asnumpy().tolist())

    return classification_report(y_true, y_pred)
Beispiel #4
0
def eval_f(_net, test_data, ctx=mx.cpu()):
    k = test_data[1]["k"]
    k = as_list(k) if k is not None else []
    max_k = max(k) if k else None
    top_k_ground_truth = []
    top_k_prediction = []
    ground_truth = []
    prediction = []

    for batch_data in tqdm(test_data[0], "evaluating"):
        ctx_data = split_and_load(ctx, *batch_data, even_split=False)
        for (user, item, label) in ctx_data:
            output = _net(user, item)
            pred = output
            label = label.asnumpy().astype("int")
            pred = pred.asnumpy()
            ground_truth.append(label.tolist())
            prediction.append(pred.tolist())
            if max_k:
                top_k_indices = np.argsort(pred)[::-1]
                _top_k_indices = top_k_indices[:max_k]
                padding = [0] * (max_k - len(_top_k_indices)) if len(
                    _top_k_indices) < max_k else []
                top_k_prediction.append(pred[_top_k_indices].tolist() +
                                        padding)
                top_k_ground_truth.append(label[_top_k_indices].tolist() +
                                          padding)

    chained_ground_truth = list(chain(*ground_truth))
    chained_prediction = list(chain(*prediction))
    metrics = {
        "rmse": mean_squared_error(chained_ground_truth, chained_prediction),
        "mae": median_absolute_error(chained_ground_truth, chained_prediction),
    }

    metrics.update(
        classification_report(
            chained_ground_truth,
            [0 if v < 0.5 else 1
             for v in chained_prediction], chained_prediction))

    if k:
        metrics_k = {"ndcg": {}, "HR": {}}
        for _k in k:
            metrics_k["ndcg"][_k] = ndcg_score(top_k_ground_truth,
                                               top_k_prediction,
                                               k=_k)
            metrics_k["HR"][_k] = _hit_rate(top_k_ground_truth, k=_k)
        metrics.update(metrics_k)
    return metrics
Beispiel #5
0
def eval_irt(int_df, user_params, item_params):
    df = _as_df(int_df)
    user_df = _as_df(user_params)
    item_df = _as_df(item_params)
    df = df.merge(user_df, on="user_id")
    df = df.merge(item_df, on="item_id")
    labels = df["score"]
    preds = irt3pl(df["theta"], df["a"], df["b"], df["c"])

    print(
        result_format(
            classification_report(y_true=labels,
                                  y_pred=[0 if p < 0.5 else 1 for p in preds],
                                  y_score=preds)))
Beispiel #6
0
def eval_f(_net, test_data, ctx=mx.cpu()):
    ground_truth = []
    prediction = []

    for batch_data in tqdm(test_data, "evaluating"):
        ctx_data = split_and_load(
            ctx, *batch_data,
            even_split=False
        )
        for (user, item, score) in ctx_data:
            output = _net(user, item)
            pred = output
            ground_truth.extend(score.asnumpy().tolist())
            prediction.extend(pred.asnumpy().tolist())

    return classification_report(
        ground_truth,
        y_pred=[0 if p < 0.5 else 1 for p in prediction],
        y_score=prediction
    )
Beispiel #7
0
def eval_f(_net, test_data, ctx=mx.cpu()):
    ground_truth = []
    prediction = []
    pred_labels = []

    for batch_data in tqdm(test_data, "evaluating"):
        ctx_data = split_and_load(ctx, *batch_data, even_split=False)
        for (data, data_mask, label, pick_index, label_mask) in ctx_data:
            output, _ = _net(data, data_mask)
            output = mx.nd.slice(output, (None, None), (None, -1))
            output = mx.nd.pick(output, pick_index)
            pred = output.asnumpy().tolist()
            label = label.asnumpy().tolist()
            for i, length in enumerate(label_mask.asnumpy().tolist()):
                length = int(length)
                ground_truth.extend(label[i][:length])
                prediction.extend(pred[i][:length])
                pred_labels.extend(
                    [0 if p < 0.5 else 1 for p in pred[i][:length]])

    return classification_report(ground_truth,
                                 y_pred=pred_labels,
                                 y_score=prediction)