예제 #1
0
 def make_annotation(self, pronoun, a_coref, b_coref):
     annotation = Annotation()
     annotation.name_a_coref = a_coref
     annotation.name_b_coref = b_coref
     gender = PRONOUNS.get(pronoun.lower(), Gender.UNKNOWN)
     assert gender != Gender.UNKNOWN
     annotation.gender = gender
     return annotation
예제 #2
0
def read_annotations(filename, is_gold):
  """Reads coreference annotations for the examples in the given file.

  Args:
    filename: Path to .tsv file to read.
    is_gold: Whether or not we are reading the gold annotations.

  Returns:
    A dict mapping example ID strings to their Annotation representation. If
    reading gold, 'Pronoun' field is used to determine gender.
  """

  def is_true(value):
    if value.lower() == 'true':
      return True
    elif value.lower() == 'false':
      return False
    else:
      print('Unexpected label!', value,"in file:",filename)
      return None

  fieldnames = GOLD_FIELDNAMES if is_gold else SYSTEM_FIELDNAMES

  annotations = defaultdict(Annotation)
  with open(filename, 'rU') as f:
    reader = csv.DictReader(f, fieldnames=fieldnames, delimiter='\t')

    # Skip the header line in the gold data
    if is_gold:
      next(reader, None)

    first_row = True
    for row in reader:
      if first_row:
        first_row = False
        continue

      example_id = row['ID']
      if example_id in annotations:
        print('Multiple annotations for', example_id)
        continue

      annotations[example_id].name_a_coref = is_true(row['A-coref'])
      annotations[example_id].name_b_coref = is_true(row['B-coref'])
      if is_gold:
        gender = PRONOUNS.get(row['Pronoun'].lower(), Gender.UNKNOWN)
        assert gender != Gender.UNKNOWN, row
        annotations[example_id].gender = gender
  return annotations