Beispiel #1
0
def ann_to_labels(ann_file: Union[str, Path],
                  document: Document,
                  label_index_name_prefix: str,
                  encoding: Optional[str],
                  create_indices: Optional[Iterable[str]] = None):
    """Reads all of the annotations in a brat annotations file into a document.

    Parameters
    ----------
    ann_file: str or Path
        A BRAT .ann file to load annotations from.
    document: Document
        The document to add labels to.
    label_index_name_prefix: str
        A prefix to append to the brat annotation names.
    encoding: optional str
        The encoding to use when reading the ann file.
    create_indices: optional iterable of str
        These indices will be created no matter what, even if empty.
    """
    labelers = {}
    if create_indices is not None:
        for index in create_indices:
            labelers[index] = document.get_labeler(index)
    ann_file = Path(ann_file)
    with ann_file.open('r', encoding=encoding) as f:
        for line in f.readlines():
            splits = line.split('\t')
            if len(splits) < 3 or not splits[0].startswith('T'):
                continue
            name, bounds = splits[1].split(' ', maxsplit=1)
            name = label_index_name_prefix + name
            bounds = bounds.split(';')
            min_start = float('Inf')
            max_end = 0
            for pair in bounds:
                start_index, end_index = pair.split(' ')
                min_start = min(min_start, int(start_index))
                max_end = max(max_end, int(end_index))
            try:
                labeler = labelers[name]
            except KeyError:
                labeler = document.get_labeler(name)
                labelers[name] = labeler
            labeler(min_start, max_end)
    for labeler in labelers.values():
        labeler.done()
Beispiel #2
0
def test_copy_document():
    e = Event()
    doc = Document(document_name='first', text='The quick brown fox jumped over the lazy dog.')
    e.add_document(doc)
    with doc.get_labeler('some_index') as label:
        label(0, 3, word='The')
        label(4, 9, word='quick')
        label(10, 15, word='brown')
    processor = CopyDocument('first', 'second')
    processor.process(e, {})
    second = e.documents['second']
    assert second is not None
    assert second.get_label_index('some_index') == [
        GenericLabel(0, 3, word='The'),
        GenericLabel(4, 9, word='quick'),
        GenericLabel(10, 15, word='brown')
    ]
Beispiel #3
0
    def process_document(self, document: Document,
                         params: Dict[str, Any]) -> Optional[Dict[str, Any]]:
        if params['do_work']:
            with self.started_stopwatch('fetch_time'):
                text = document.text

            a_count = text.count('a')
            b_count = text.count('b')

            with document.get_labeler(
                    'mtap.examples.letter_counts') as label_letter_count:
                label_letter_count(start_index=0,
                                   end_index=len(document.text),
                                   letter='a',
                                   count=a_count)
                label_letter_count(start_index=0,
                                   end_index=len(document.text),
                                   letter='b',
                                   count=b_count)

        return {'answer': 42}