def load_predictions(data, file): predictions = [] evaluate_instances = [] all_instances = [] with open(file) as file: for line in file: predictions.append(json.loads(line)) for idx, instance in enumerate(data): instance = deepcopy(instance) instance["predicted_label"] = predictions[idx]["predicted_label"] instance["predicted_evidence"] = predictions[idx]["predicted_evidence"] all_instances.append(instance) if evidence_macro_recall(instance)[0] < evidence_macro_recall( instance)[1]: evaluate_instances.append(instance) pass if is_correct_label(instance): #print("Correct label") pass else: if evidence_macro_recall(instance)[0] >= 1.0 or \ instance["label"] == "NOT ENOUGH INFO": pass #evaluate_instances.append(instance) return evaluate_instances, all_instances
def test_recall_nei_no_contribution_to_score(self): instance = { "label": "not enough info", "predicted_label": "not enough info" } p, h = evidence_macro_recall(instance) self.assertEqual(p, 0)
def test_recall_partial_predictions_same_groups_zero_score(self): instance = { "label": "supports", "predicted_label": "supports", "evidence": [[[None, None, "page", 0], [None, None, "page", 1]]], "predicted_evidence": [["page", 0], ["page", 1]] } p, h = evidence_macro_recall(instance, max_evidence=1) self.assertEqual(p, 0)
def test_recall_wrong_predictions_line_zero_score(self): instance = { "label": "supports", "predicted_label": "supports", "evidence": [[[None, None, "page", 1]]], "predicted_evidence": [["page", 0]] } p, h = evidence_macro_recall(instance) self.assertEqual(p, 0)
def test_recall_no_predictions_zero_score(self): instance = { "label": "supports", "predicted_label": "supports", "evidence": [[[None, None, "other", 0]]], "predicted_evidence": [] } p, h = evidence_macro_recall(instance) self.assertEqual(p, 0)
def test_recall_no_evidence_prediction_perfect_score(self): instance = { "label": "supports", "predicted_label": "supports", "evidence": [], "predicted_evidence": [["page", 0]] } p, h = evidence_macro_recall(instance) self.assertEqual(p, 1)
def test_recall_not_nei_contribution_to_hits(self): instance = { "label": "supports", "predicted_label": "supports", "evidence": [], "predicted_evidence": [] } p, h = evidence_macro_recall(instance) self.assertEqual(h, 1)
def test_recall_correct_predictions_exact_one_score_multi_evidence(self): instance = { "label": "supports", "predicted_label": "supports", "evidence": [[[None, None, "page", 0], [None, None, "page", 1]]], "predicted_evidence": [["page", 0], ["page", 1]] } p, h = evidence_macro_recall(instance) self.assertEqual(p, 1)
def test_recall_correct_predictions_partial_other_one_score_multi_evidence_groups( self): instance = { "label": "supports", "predicted_label": "supports", "evidence": [[[None, None, "page", 0]], [[None, None, "page", 1]]], "predicted_evidence": [["page", 0], ["page", 2]] } p, h = evidence_macro_recall(instance) self.assertAlmostEqual(p, 1)