Ejemplos de extract_from_annotated_document en Python

Lenguaje de programación: Python

Namespace/Package Name: sciencebeam_gym.inference_model.extract_from_annotated_document

Método / Función: extract_from_annotated_document

Ejemplos en hotexamples.com: 8

Python extract_from_annotated_document - 8 ejemplos encontrados. Estos son los ejemplos en Python del mundo real mejor valorados de sciencebeam_gym.inference_model.extract_from_annotated_document.extract_from_annotated_document extraídos de proyectos de código abierto. Puedes valorar ejemplos para ayudarnos a mejorar la calidad de los ejemplos.

Ejemplo n.º 1

0

Mostrar archivo

Archivo: extract_from_annotated_document_test.py Proyecto: pranavshah01/sciencebeam-gym

 def test_should_extract_single_annotated_line(self):
     lines = [annotated_line(TEXT_1, TAG_1)]
     structured_document = SimpleStructuredDocument(lines=lines)
     result = [(x.tag, x.text)
               for x in extract_from_annotated_document(structured_document)
               ]
     assert result == [(TAG_1, TEXT_1)]

Ejemplo n.º 2

0

Mostrar archivo

Archivo: extract_from_annotated_document_test.py Proyecto: pranavshah01/sciencebeam-gym

 def test_should_extract_sub_tags_from_single_item(self):
     tokens = [
         _token_with_sub_tag(VALUE_1,
                             tag=TAG_1,
                             tag_prefix=B_TAG_PREFIX,
                             sub_tag=TAG_2,
                             sub_tag_prefix=B_TAG_PREFIX),
         _token_with_sub_tag(VALUE_2,
                             tag=TAG_1,
                             tag_prefix=I_TAG_PREFIX,
                             sub_tag=TAG_2,
                             sub_tag_prefix=I_TAG_PREFIX),
         _token_with_sub_tag(VALUE_3,
                             tag=TAG_1,
                             tag_prefix=I_TAG_PREFIX,
                             sub_tag=TAG_3,
                             sub_tag_prefix=B_TAG_PREFIX)
     ]
     structured_document = SimpleStructuredDocument(
         lines=[SimpleLine(tokens)])
     extracted_items = list(
         extract_from_annotated_document(structured_document))
     result = [(x.tag, x.text, [(sub.tag, sub.text) for sub in x.sub_items])
               for x in extracted_items]
     get_logger().debug('result: %s', result)
     assert result == [(TAG_1, ' '.join([VALUE_1, VALUE_2, VALUE_3]),
                        [(TAG_2, ' '.join([VALUE_1, VALUE_2])),
                         (TAG_3, VALUE_3)])]

Ejemplo n.º 3

0

Mostrar archivo

Archivo: extract_from_annotated_document_test.py Proyecto: pranavshah01/sciencebeam-gym

 def test_should_extract_from_different_tag_scope(self):
     lines = [
         SimpleLine([SimpleToken(TEXT_1, tag=TAG_1, tag_scope=TAG_SCOPE_1)])
     ]
     structured_document = SimpleStructuredDocument(lines=lines)
     result = [(x.tag, x.text) for x in extract_from_annotated_document(
         structured_document, tag_scope=TAG_SCOPE_1)]
     assert result == [(TAG_1, TEXT_1)]

Ejemplo n.º 4

0

Mostrar archivo

Archivo: extract_from_annotated_document_test.py Proyecto: pranavshah01/sciencebeam-gym

 def test_should_combine_multiple_lines(self):
     lines = [annotated_line(TEXT_1, TAG_1), annotated_line(TEXT_2, TAG_1)]
     structured_document = SimpleStructuredDocument(lines=lines)
     result = [(x.tag, x.text)
               for x in extract_from_annotated_document(structured_document)
               ]
     get_logger().debug('result: %s', result)
     assert result == [(TAG_1, '\n'.join([TEXT_1, TEXT_2]))]

Ejemplo n.º 5

0

Mostrar archivo

Archivo: extract_from_annotated_document_test.py Proyecto: pranavshah01/sciencebeam-gym

 def test_should_extract_multiple_annotations_on_single_line(self):
     lines = [
         to_line(
             annotated_tokens(TEXT_1, TAG_1) + to_tokens(TEXT_2) +
             annotated_tokens(TEXT_3, TAG_3))
     ]
     structured_document = SimpleStructuredDocument(lines=lines)
     result = [(x.tag, x.text)
               for x in extract_from_annotated_document(structured_document)
               ]
     assert result == [(TAG_1, TEXT_1), (None, TEXT_2), (TAG_3, TEXT_3)]

Ejemplo n.º 6

0

Mostrar archivo

Archivo: extract_from_annotated_document_test.py Proyecto: pranavshah01/sciencebeam-gym

 def test_should_separate_items_based_on_tag_prefix(self):
     tokens = [
         SimpleToken(VALUE_1, tag=TAG_1, tag_prefix=B_TAG_PREFIX),
         SimpleToken(VALUE_2, tag=TAG_1, tag_prefix=I_TAG_PREFIX),
         SimpleToken(VALUE_3, tag=TAG_1, tag_prefix=I_TAG_PREFIX),
         SimpleToken(VALUE_1, tag=TAG_1, tag_prefix=B_TAG_PREFIX),
         SimpleToken(VALUE_2, tag=TAG_1, tag_prefix=I_TAG_PREFIX),
         SimpleToken(VALUE_3, tag=TAG_1, tag_prefix=I_TAG_PREFIX)
     ]
     structured_document = SimpleStructuredDocument(
         lines=[SimpleLine(tokens)])
     result = [(x.tag, x.text)
               for x in extract_from_annotated_document(structured_document)
               ]
     get_logger().debug('result: %s', result)
     assert result == [(TAG_1, ' '.join([VALUE_1, VALUE_2, VALUE_3])),
                       (TAG_1, ' '.join([VALUE_1, VALUE_2, VALUE_3]))]

Ejemplo n.º 7

0

Mostrar archivo

Archivo: extract_from_annotated_document_test.py Proyecto: pranavshah01/sciencebeam-gym

 def test_should_not_fail_on_empty_document(self):
     structured_document = SimpleStructuredDocument()
     extract_from_annotated_document(structured_document)

Ejemplo n.º 8

0

Mostrar archivo

Archivo: extract_to_xml.py Proyecto: pvk444/sciencebeam-gym

def extract_structured_document_to_xml(structured_document, tag_scope=None):
    return extracted_items_to_xml(
        extract_from_annotated_document(structured_document,
                                        tag_scope=tag_scope))