def copy_document(event: mtap.Event, source_document_name: str, target_document_name: str, index_names: typing.Sequence[str] = ...): """Copies one document to another on the same event. Parameters ---------- event: Event The event. source_document_name: str The source document name. target_document_name: str The target document name. index_names: Sequence[str] If specified will only copy the specified label indices, by default all indices will be copied. """ source_document = event.documents[source_document_name] target_document = mtap.Document(target_document_name, text=source_document.text) event.add_document(target_document) if index_names is ...: index_names = list(source_document.labels) for index_name in index_names: index = source_document.labels[index_name] target_document.add_labels(index_name, index, distinct=index.distinct)
def test_yml_serializer(): event = Event(event_id='1') event.metadata['foo'] = "bar" document = Document('plaintext', text='Some text.') event.add_document(document) one = label(start_index=0, end_index=5, x=10) two = label(start_index=6, end_index=10, x=15) document.add_labels('one', [one, two]) document.add_labels('two', [label(start_index=0, end_index=25, a='b', b=one), label(start_index=26, end_index=42, a='c', b=two)]) document.add_labels('three', [ label(start_index=0, end_index=10, foo=True), label(start_index=11, end_index=15, foo=False) ], distinct=True) with TemporaryFile('w+') as tf: YamlSerializer.event_to_file(event, tf) tf.flush() tf.seek(0) e = YamlSerializer.file_to_event(tf) assert e.event_id == event.event_id assert e.metadata['foo'] == 'bar' d = e.documents['plaintext'] assert d.text == document.text index_one = d.labels['one'] assert index_one == [one, two] index_two = d.labels['two'] assert index_two == [label(start_index=0, end_index=25, a='b', b=one), label(start_index=26, end_index=42, a='c', b=two)] index_three = d.labels['three'] assert index_three == [label(start_index=0, end_index=10, foo=True), label(start_index=11, end_index=15, foo=False)]
def test_copy_document(): e = Event() doc = Document(document_name='first', text='The quick brown fox jumped over the lazy dog.') e.add_document(doc) with doc.get_labeler('some_index') as label: label(0, 3, word='The') label(4, 9, word='quick') label(10, 15, word='brown') processor = CopyDocument('first', 'second') processor.process(e, {}) second = e.documents['second'] assert second is not None assert second.labels['some_index'] == [ GenericLabel(0, 3, word='The'), GenericLabel(4, 9, word='quick'), GenericLabel(10, 15, word='brown') ]
def test_yml_serializer(): event = Event(event_id='1') event.metadata['foo'] = "bar" document = Document('plaintext', text='Some text.') event.add_document(document) document.add_labels('one', [ label(start_index=0, end_index=5, x=10), label(start_index=6, end_index=10, x=15) ]) document.add_labels('two', [ label(start_index=0, end_index=25, a='b'), label(start_index=26, end_index=42, a='c') ]) document.add_labels('three', [ label(start_index=0, end_index=10, foo=True), label(start_index=11, end_index=15, foo=False) ], distinct=True) with TemporaryFile('w+') as tf: YamlSerializer.event_to_file(event, tf) tf.flush() tf.seek(0) o = yaml.load(tf, Loader=Loader) assert o['event_id'] == '1' assert o['metadata']['foo'] == 'bar' d = o['documents']['plaintext'] assert d['text'] == 'Some text.' assert len(d['label_indices']) == 3 assert d['label_indices']['one'] == { 'json_labels': [{ 'start_index': 0, 'end_index': 5, 'x': 10 }, { 'start_index': 6, 'end_index': 10, 'x': 15 }], 'distinct': False } assert d['label_indices']['two'] == { 'json_labels': [{ 'start_index': 0, 'end_index': 25, 'a': 'b' }, { 'start_index': 26, 'end_index': 42, 'a': 'c' }], 'distinct': False } assert d['label_indices']['three'] == { 'json_labels': [{ 'start_index': 0, 'end_index': 10, 'foo': True }, { 'start_index': 11, 'end_index': 15, 'foo': False }], 'distinct': True }