예제 #1
0
def load_predictions(data, file):
    predictions = []

    evaluate_instances = []
    all_instances = []

    with open(file) as file:
        for line in file:
            predictions.append(json.loads(line))

    for idx, instance in enumerate(data):
        instance = deepcopy(instance)

        instance["predicted_label"] = predictions[idx]["predicted_label"]
        instance["predicted_evidence"] = predictions[idx]["predicted_evidence"]

        all_instances.append(instance)

        if evidence_macro_recall(instance)[0] < evidence_macro_recall(
                instance)[1]:
            evaluate_instances.append(instance)
            pass

        if is_correct_label(instance):
            #print("Correct label")
            pass
        else:
            if evidence_macro_recall(instance)[0] >= 1.0 or \
                instance["label"] == "NOT ENOUGH INFO":
                pass
                #evaluate_instances.append(instance)

    return evaluate_instances, all_instances
예제 #2
0
    def test_recall_nei_no_contribution_to_score(self):
        instance = {
            "label": "not enough info",
            "predicted_label": "not enough info"
        }

        p, h = evidence_macro_recall(instance)
        self.assertEqual(p, 0)
예제 #3
0
    def test_recall_partial_predictions_same_groups_zero_score(self):
        instance = {
            "label": "supports",
            "predicted_label": "supports",
            "evidence": [[[None, None, "page", 0], [None, None, "page", 1]]],
            "predicted_evidence": [["page", 0], ["page", 1]]
        }

        p, h = evidence_macro_recall(instance, max_evidence=1)
        self.assertEqual(p, 0)
예제 #4
0
    def test_recall_wrong_predictions_line_zero_score(self):
        instance = {
            "label": "supports",
            "predicted_label": "supports",
            "evidence": [[[None, None, "page", 1]]],
            "predicted_evidence": [["page", 0]]
        }

        p, h = evidence_macro_recall(instance)
        self.assertEqual(p, 0)
예제 #5
0
    def test_recall_no_predictions_zero_score(self):
        instance = {
            "label": "supports",
            "predicted_label": "supports",
            "evidence": [[[None, None, "other", 0]]],
            "predicted_evidence": []
        }

        p, h = evidence_macro_recall(instance)
        self.assertEqual(p, 0)
예제 #6
0
    def test_recall_no_evidence_prediction_perfect_score(self):
        instance = {
            "label": "supports",
            "predicted_label": "supports",
            "evidence": [],
            "predicted_evidence": [["page", 0]]
        }

        p, h = evidence_macro_recall(instance)
        self.assertEqual(p, 1)
예제 #7
0
    def test_recall_not_nei_contribution_to_hits(self):
        instance = {
            "label": "supports",
            "predicted_label": "supports",
            "evidence": [],
            "predicted_evidence": []
        }

        p, h = evidence_macro_recall(instance)
        self.assertEqual(h, 1)
예제 #8
0
    def test_recall_correct_predictions_exact_one_score_multi_evidence(self):
        instance = {
            "label": "supports",
            "predicted_label": "supports",
            "evidence": [[[None, None, "page", 0], [None, None, "page", 1]]],
            "predicted_evidence": [["page", 0], ["page", 1]]
        }

        p, h = evidence_macro_recall(instance)
        self.assertEqual(p, 1)
예제 #9
0
    def test_recall_correct_predictions_partial_other_one_score_multi_evidence_groups(
            self):
        instance = {
            "label": "supports",
            "predicted_label": "supports",
            "evidence": [[[None, None, "page", 0]], [[None, None, "page", 1]]],
            "predicted_evidence": [["page", 0], ["page", 2]]
        }

        p, h = evidence_macro_recall(instance)
        self.assertAlmostEqual(p, 1)