Ejemplo n.º 1
0
def do_eval(tfrecord_path, prediction_path):
    tfrecord = list(load_tfrecord(tfrecord_path))
    predictions = list(load_preditions(prediction_path))
    golds, preds = zip(*compare(tfrecord, predictions))
    golds = golds[:len(preds)]

    count = Counter()

    for label, pred in zip(golds, preds):
        count[(label, pred)] += 1

    print("\t0\t1\t2")
    for i in range(3):
        print("Gold {}".format(i), end="\t")
        for j in range(3):
            print(count[(i,j)], end="\t")
        print("")
    acc = (count[(0,0)] + count[(1,1)] +count[(2,2)]  ) / sum(count.values())
    print("Acc : ", acc)
    for i in range(3):
        prec = count[(i, i)] / sum([count[(j, i)] for j in range(3)])
        recall = count[(i, i)] / sum([count[(i, j)] for j in range(3)])
        f1 = 2 * prec * recall / (prec + recall)
        print("Label ", i)
        print("P/R/F1", prec, recall, f1)

    all_result = eval_3label(preds, golds)
    f1 = sum([result['f1'] for result in all_result]) / 3
    print("Macro Avg F1:", f1)
Ejemplo n.º 2
0
 def print_eval(pred_y, gold_y):
     all_result = eval_3label(pred_y, gold_y)
     for_result = all_result[idx_for]
     against_result = all_result[idx_against]
     f1 = sum([result['f1'] for result in all_result]) / 3
     print("F1", f1)
     print("P_arg+", for_result['precision'])
     print("R_arg+", for_result['recall'])
     print("P_arg-", against_result['precision'])
     print("R_arg-", against_result['recall'])
Ejemplo n.º 3
0
def get_f1_score(tfrecord_path, prediction_path, n_label=3):
    tfrecord = list(load_tfrecord(tfrecord_path))
    predictions = list(load_preditions(prediction_path))
    golds, preds = zip(*compare(tfrecord, predictions))
    golds = golds[:len(preds)]
    if n_label == 3:
        all_result = eval_3label(preds, golds)
    elif n_label == 2:
        all_result = eval_2label(preds, golds)
    else:
        assert False

    f1 = sum([result['f1'] for result in all_result]) / n_label
    return {"f1": f1}
Ejemplo n.º 4
0
def eval(
    score_pred_file_name: FileName,
    cpid_resolute_file: FileName,
    n_way=3,
):
    topic = "abortion"
    pred_path: FilePath = pjoin(output_path, score_pred_file_name)
    dpid_resolute: Dict[str, DPID] = load_dpid_resolute(cpid_resolute_file)
    score_d: Dict[DPID,
                  np.ndarray] = get_datapoint_score(pred_path, dpid_resolute,
                                                    "avg")

    def argmax(arr: np.ndarray) -> int:
        return arr.argmax()

    pred_d: Dict[DPID, int] = dict_value_map(argmax, score_d)

    dev_labels = get_dev_labels(topic)
    if n_way == 2:

        def merge_label(e):
            dpid, label = e
            return dpid, {
                0: 0,
                1: 1,
                2: 1,
            }[label]

        dev_labels = lmap(merge_label, dev_labels)

    def fetch_pred(e: Tuple[DPID, int]):
        dpid, label = e
        pred = pred_d[dpid]
        return pred

    gold_list: List[int] = right(dev_labels)
    pred_list: List[int] = lmap(fetch_pred, dev_labels)
    if n_way == 3:
        all_result = eval_3label(gold_list, pred_list)
    elif n_way == 2:
        all_result = eval_2label(gold_list, pred_list)
    else:
        assert False
    print(all_result)
    f1 = sum([result['f1'] for result in all_result]) / n_way
    print("Avg F1 : ", f1)
Ejemplo n.º 5
0
def main():
    # Label 0
    # [{'precision': 0.8733572281959379, 'recall': 0.9624753127057275, 'f1': 0.915753210147197},
    # Label 1
    # {'precision': 0.25, 'recall': 0.12244897959183673, 'f1': 0.1643835616438356}]
    #
    # [{'precision': 0.8733572281959379, 'recall': 0.9624753127057275, 'f1': 0.915753210147197},
    # {'precision': 0.25, 'recall': 0.12244897959183673, 'f1': 0.1643835616438356},
    # {'precision': 0.24193548387096775, 'recall': 0.078125, 'f1': 0.11811023622047244}]

    train_x, train_y, dev_x, dev_y = get_aawd_binary_train_dev()
    tprint("training and testing")
    use_char_ngram = False
    print("Use char ngram", use_char_ngram )
    pred_svm_ngram = svm.train_svm_and_test(svm.NGramFeature(use_char_ngram, 4), train_x, train_y, dev_x)
    # pred_svm_ngram = list([random.randint(0,1) for _ in dev_y])
    result = eval_3label(pred_svm_ngram, dev_y)
    print(result)
Ejemplo n.º 6
0
def get_ranking_metrics(tfrecord_path, prediction_path):
    tfrecord = list(load_tfrecord(tfrecord_path))
    predictions = list(load_preditions(prediction_path))

    label_and_prediction = list(join_label(tfrecord, predictions))

    golds, preds = zip(*compare(tfrecord, predictions))
    golds = golds[:len(preds)]

    for result in eval_3label(preds, golds):
        print(result)

    def get_score(label_idx, entry):
        label, logits = entry
        return logits[label_idx]

    ap_list = []
    for target_label in [0, 1, 2]:
        print("Label : ", target_label)
        key_fn = partial(get_score, target_label)
        label_and_prediction.sort(key=key_fn, reverse=True)
        labels = left(label_and_prediction)

        correctness_list = list([l == target_label for l in labels])
        num_gold = sum(correctness_list)

        ap = AP_from_binary(correctness_list, num_gold)
        ap_list.append(ap)
        print("AP: ", ap)

        k_list = [1, 5, 10, 100]
        print("P at {}".format(k_list), end="\t")
        show_all_p_at_k(correctness_list, label_and_prediction)

    MAP = average(ap_list)
    return {"MAP": MAP}