def make_annotation(self, pronoun, a_coref, b_coref): annotation = Annotation() annotation.name_a_coref = a_coref annotation.name_b_coref = b_coref gender = PRONOUNS.get(pronoun.lower(), Gender.UNKNOWN) assert gender != Gender.UNKNOWN annotation.gender = gender return annotation
def read_annotations(filename, is_gold): """Reads coreference annotations for the examples in the given file. Args: filename: Path to .tsv file to read. is_gold: Whether or not we are reading the gold annotations. Returns: A dict mapping example ID strings to their Annotation representation. If reading gold, 'Pronoun' field is used to determine gender. """ def is_true(value): if value.lower() == 'true': return True elif value.lower() == 'false': return False else: print('Unexpected label!', value,"in file:",filename) return None fieldnames = GOLD_FIELDNAMES if is_gold else SYSTEM_FIELDNAMES annotations = defaultdict(Annotation) with open(filename, 'rU') as f: reader = csv.DictReader(f, fieldnames=fieldnames, delimiter='\t') # Skip the header line in the gold data if is_gold: next(reader, None) first_row = True for row in reader: if first_row: first_row = False continue example_id = row['ID'] if example_id in annotations: print('Multiple annotations for', example_id) continue annotations[example_id].name_a_coref = is_true(row['A-coref']) annotations[example_id].name_b_coref = is_true(row['B-coref']) if is_gold: gender = PRONOUNS.get(row['Pronoun'].lower(), Gender.UNKNOWN) assert gender != Gender.UNKNOWN, row annotations[example_id].gender = gender return annotations