Example #1
0
def span_prf1_type_map(
    reference_docs: Sequence[Doc],
    test_docs: Sequence[Doc],
    type_map: Optional[Mapping[str, str]] = None,
) -> Dict[str, PRF1]:
    """
    if type_map is not None:
        remapping(reference_docs, type_map) # ugly code, but otherwise the
        remapping(test_docs, type_map)
    """
    counts = defaultdict(lambda: defaultdict(int))
    for i in range(len(reference_docs)):
        ents_from_ref = {ent for ent in reference_docs[i].ents}
        ents_from_test = {ent for ent in test_docs[i].ents}
        if type_map is not None:
            ents_from_ref = remapping(ents_from_ref,
                                      type_map)  # ugly code, but otherwise the
            ents_from_test = remapping(ents_from_test, type_map)
        for ent_test in ents_from_test:
            if is_ent_in_list(ent_test, ents_from_ref):
                counts[ent_test.label_]["tp"] += 1
            else:
                counts[ent_test.label_]["fp"] += 1
        for ent_ref in ents_from_ref:
            if not is_ent_in_list(ent_ref, ents_from_test):
                counts[ent_ref.label_]["fn"] += 1
    prf1 = dict()
    for key, value in counts.items():
        precision = get_precision(counts[key]["tp"], counts[key]["fp"])
        recall = get_recall(counts[key]["tp"], counts[key]["fn"])
        f1 = get_f1(precision, recall)
        prf1[key] = PRF1(precision, recall, f1)
    get_prf1_all(counts, prf1)
    #print(counts)
    return prf1
Example #2
0
def get_prf1_all(counts, prf1):
    tp_all = 0
    fp_all = 0
    fn_all = 0
    for ent, values in counts.items():
        tp_all += counts[ent]["tp"]
        fp_all += counts[ent]["fp"]
        fn_all += counts[ent]["fn"]
    precision_all = get_precision(tp_all, fp_all)
    recall_all = get_recall(tp_all, fn_all)
    prf1[""] = PRF1(precision_all, recall_all, get_f1(precision_all,
                                                      recall_all))
Example #3
0
def span_prf1_type_map(
        reference_docs: Sequence[Doc],
        test_docs: Sequence[Doc],
        type_map: Optional[Mapping[str, str]] = None) -> Dict[str, PRF1]:

    mapped_ents = type_map.keys() if type_map is not None else {}
    counts = defaultdict(lambda: defaultdict(int))
    for test_doc, ref_doc in zip(test_docs, reference_docs):

        test_spans = [
            (ent.start, ent.end,
             type_map[ent.label_] if ent.label_ in mapped_ents else ent.label_)
            for ent in test_doc.ents
        ]
        ref_spans = [
            (ent.start, ent.end,
             type_map[ent.label_] if ent.label_ in mapped_ents else ent.label_)
            for ent in ref_doc.ents
        ]

        for span in test_spans:
            if span in ref_spans:  # true positive
                counts[span[2]]['tp'] += 1
                counts[""]['tp'] += 1
            else:  # false positive
                counts[span[2]]['fp'] += 1
                counts[""]['fp'] += 1
        for span in ref_spans:
            if span not in test_spans:  # false negative
                counts[span[2]]['fn'] += 1
                counts[""]['fn'] += 1

    prf1_dict = {}
    for ent_type, count in counts.items():
        p = count['tp'] / (count['tp'] + count['fp']) if (
            count['tp'] + count['fp']) != 0 else 0.0
        r = count['tp'] / (count['tp'] + count['fn']) if (
            count['tp'] + count['fn']) != 0 else 0.0
        f1 = (2 * (p * r) / (p + r)) if (p + r) != 0 else 0.0
        prf1 = PRF1(p, r, f1)
        prf1_dict[ent_type] = prf1

    return prf1_dict
Example #4
0
def span_prf1(reference_docs: Sequence[Doc],
              test_docs: Sequence[Doc],
              typed: bool = True) -> Dict[str, PRF1]:

    counts = defaultdict(lambda: defaultdict(int))
    for test_doc, ref_doc in zip(test_docs, reference_docs):

        test_spans = [(ent.start, ent.end, ent.label_) if typed else
                      (ent.start, ent.end, "") for ent in test_doc.ents]
        ref_spans = [(ent.start, ent.end, ent.label_) if typed else
                     (ent.start, ent.end, "") for ent in ref_doc.ents]

        for span in test_spans:
            if span in ref_spans:  # true positive
                counts[span[2]]['tp'] += 1
                counts["all"]['tp'] += 1
            else:  # false positive
                counts[span[2]]['fp'] += 1
                counts["all"]['fp'] += 1
        for span in ref_spans:
            if span not in test_spans:  # false negative
                counts[span[2]]['fn'] += 1
                counts["all"]['fn'] += 1

    prf1_dict = {}
    for ent_type, count in counts.items():
        p = count['tp'] / (count['tp'] + count['fp']) if (
            count['tp'] + count['fp']) != 0 else 0.0
        r = count['tp'] / (count['tp'] + count['fn']) if (
            count['tp'] + count['fn']) != 0 else 0.0
        f1 = (2 * (p * r) / (p + r)) if (p + r) != 0 else 0.0
        prf1 = PRF1(p, r, f1)
        prf1_dict[ent_type] = prf1

    if typed:
        prf1_dict[""] = prf1_dict.pop("all")
    else:
        del prf1_dict["all"]

    return prf1_dict
Example #5
0
def span_prf1_type_map(
    reference_docs: Sequence[Doc],
    test_docs: Sequence[Doc],
    type_map: Optional[Mapping[str, str]] = None,
) -> Dict[str, PRF1]:

    # Make good use of sets here.
    # print(reference_docs[0].ents)
    # print(test_docs[0].ents)
    #type_map = {"GPE": "GPE_LOC", "LOC": "GPE_LOC"}

    if len(reference_docs) != len(test_docs):
        # Should always be same length
        raise ValueError
    refset = set()
    tesset = set()

    label_options = set([""])
    if type_map == None:
        type_map = {}

    for i in range(len(reference_docs)):
        ref = reference_docs[i]
        tes = test_docs[i]
        refents = remap(ref.ents, type_map, i)  # i to keep docs discernable.
        tesents = remap(tes.ents, type_map, i)

        for ent in refents:
            refset.add(ent)
            label_options.add(ent[0].label_)
        for ent in tesents:
            tesset.add(ent)
            label_options.add(ent[0].label_)  # eh, why not

    to_return = {}
    for label in label_options:
        if label == "":
            refset1 = set([(x[0].start, x[0].end, x[0].label_, x[1])
                           for x in refset])
            tesset1 = set([(x[0].start, x[0].end, x[0].label_, x[1])
                           for x in tesset])
        else:
            refset1 = set([(x[0].start, x[0].end, x[0].label_, x[1])
                           for x in refset if x[0].label_ == label])
            tesset1 = set([(x[0].start, x[0].end, x[0].label_, x[1])
                           for x in tesset if x[0].label_ == label])
        #print([x for x in refset1], [x for x in tesset1])

        # I wanna use sets, but it's not playing nice with checking the label :(
        #set([x for x in refset1 for y in tesset1 if x == y and x.label_ == y.label_])#
        tp = refset1.intersection(tesset1)  #['EPG', 'COL', 'REP']
        fp = tesset1 - tp
        fn = refset1 - tp

        # print(label + "TP:", tp, tp)
        # print(label + "FP:", fp)
        # print(label + "FN:", fn)

        if len(tp) == 0:
            p = 0
            r = 0
        else:
            p = (float(len(tp)) / (len(tp) + len(fp)))
            r = (float(len(tp)) / (len(tp) + len(fn)))
        if p + r == 0:
            f1 = 0
        else:
            f1 = (2 * p * r) / (p + r)
        #f1 = 2 / (1.0 / p + 1.0 / r)
        to_return[label] = PRF1(precision=p, recall=r, f1=f1)

    #print(tp, fp, fn)
    #Dict[str, PRF1]
    # print(to_return)
    return to_return
Example #6
0
def span_prf1(reference_docs: Sequence[Doc],
              test_docs: Sequence[Doc],
              typed: bool = True) -> Dict[str, PRF1]:
    decoded_gold1 = [doc.ents for doc in reference_docs]
    decoded_gold = []
    for ents in decoded_gold1:
        decoded_gold.extend(ents)
    decoded_prediction1 = [doc.ents for doc in test_docs]
    decoded_prediction = []
    for ents in decoded_prediction1:
        decoded_prediction.extend(ents)
    cur_pred = 0
    cur_gold = 0

    true_pos = []
    false_pos = []
    false_neg = []

    for _ in range(
            len(decoded_gold) +
            len(decoded_prediction)):  # this is the upper bound for iterating
        # Do something akin to merge sort. Iterate through each list, 'popping' off the one earlier in docs
        # Could probably refactor to actually use popping and holding current values being considered
        # Wouldn't save a lot of lines though

        if cur_pred >= len(decoded_prediction):
            # put the rest into the false negative bin and stop iterating
            false_neg.extend(decoded_gold[cur_gold:])
            break

        elif cur_gold >= len(decoded_gold):
            # put the rest in the false positive bin
            false_pos.extend(decoded_prediction[cur_pred:])
            break

        elif decoded_prediction[cur_pred].start == decoded_gold[cur_pred].start and \
                decoded_prediction[cur_pred].end == decoded_gold[cur_pred].end:
            if typed and decoded_prediction[cur_pred].label_ != decoded_gold[
                    cur_pred].label_:
                # False pos and false neg
                false_pos.append(decoded_prediction[cur_pred])
                false_neg.append(decoded_gold[cur_gold])
            else:
                # True positive, only time it'll actually be appended to!
                true_pos.append(decoded_prediction[cur_pred])
            # Increment both regardless.
            cur_pred += 1
            cur_gold += 1

        else:
            # Not a match on length.
            if decoded_prediction[cur_pred].start < decoded_gold[
                    cur_gold].start:
                # false positive
                false_pos.append(decoded_prediction[cur_pred])
                cur_pred += 1
            elif decoded_prediction[cur_pred].start > decoded_gold[
                    cur_gold].start:
                # false negative
                false_neg.append(decoded_gold[cur_gold])
                cur_gold += 1
            else:
                #Obvs neither matches
                false_pos.append(decoded_prediction[cur_pred])
                false_neg.append(decoded_gold[cur_gold])

                cur_gold += 1
                cur_pred += 1

    # NOW go through each of the three lists and count it all up.
    to_return = {}
    types = set()
    types.add("")  # Look at this lifehack

    if typed:
        for tag in decoded_gold:
            types.add(tag.label_)
        for tag in decoded_prediction:
            types.add(tag.label_)
        #print(types)
    for label in types:
        # It's still O(N)
        tp = len([x for x in true_pos
                  if label in x.label_])  # "" is in everything
        fp = len([x for x in false_pos if label in x.label_])
        fn = len([x for x in false_neg if label in x.label_])
        if (tp == 0 and fp == 0):
            precision = 0
        else:
            precision = tp / (tp + fp)
        if tp == 0 and fn == 0:
            recall = 0
        else:
            recall = tp / (tp + fn)
        if precision == 0 or recall == 0:
            f1 = 0
        else:
            f1 = 2 / (1.0 / precision + 1.0 / recall)  # Harmonic mean woo
        to_return[label] = PRF1(precision, recall, f1)

    return to_return
Example #7
0
    def test_span_prf1_type_map(self):
        docs = [[["George", "Washington"]], [["Maryland"]], [["Asia"]]]

        # Check that testing reference is perfect
        correct_bio = [["B-PER", "I-PER"], ["B-GPE"], ["B-LOC"]]
        ref = _create_docs(docs, correct_bio, NLP)
        self.assertPRF1DictAlmostEqual(
            {
                "": PRF1(1.0, 1.0, 1.0),
                "PER": PRF1(1.0, 1.0, 1.0),
                "GPE": PRF1(1.0, 1.0, 1.0),
                "LOC": PRF1(1.0, 1.0, 1.0),
            },
            span_prf1_type_map(ref, ref),
        )
        # Empty type map has no effect
        self.assertPRF1DictAlmostEqual(
            {
                "": PRF1(1.0, 1.0, 1.0),
                "PER": PRF1(1.0, 1.0, 1.0),
                "GPE": PRF1(1.0, 1.0, 1.0),
                "LOC": PRF1(1.0, 1.0, 1.0),
            },
            span_prf1_type_map(ref, ref, type_map={}),
        )
        # Remapping types without changing performance
        backwards_type_map = {"GPE": "EPG", "LOC": "COL", "PER": "REP"}
        self.assertPRF1DictAlmostEqual(
            {
                "": PRF1(1.0, 1.0, 1.0),
                "REP": PRF1(1.0, 1.0, 1.0),
                "EPG": PRF1(1.0, 1.0, 1.0),
                "COL": PRF1(1.0, 1.0, 1.0),
            },
            span_prf1_type_map(ref, ref, type_map=backwards_type_map),
        )

        # Two incorrect entities: first PER is truncated, final GPE is a LOC
        incorrect_bio1 = [["B-PER", "O"], ["B-GPE"], ["B-GPE"]]
        incorrect1 = _create_docs(docs, incorrect_bio1, NLP)
        self.assertPRF1DictAlmostEqual(
            {
                "": PRF1(0.3333, 0.3333, 0.3333),
                "PER": PRF1(0.0, 0.0, 0.0),
                "GPE": PRF1(0.5, 1.0, 0.6666),
                "LOC": PRF1(0.0, 0.0, 0.0),
            },
            span_prf1_type_map(ref, incorrect1),
        )
        # When GPE and LOC are collapsed, only one error
        gpe_loc_map = {"GPE": "GPE_LOC", "LOC": "GPE_LOC"}
        self.assertPRF1DictAlmostEqual(
            {
                "": PRF1(0.6666, 0.6666, 0.6666),
                "PER": PRF1(0.0, 0.0, 0.0),
                "GPE_LOC": PRF1(1.0, 1.0, 1.0),
            },
            span_prf1_type_map(ref, incorrect1, type_map=gpe_loc_map),
        )