def test_should_add_parsed_references(self):
     semantic_document = SemanticDocument()
     semantic_ref = SemanticReference([
         SemanticTitle(
             layout_block=LayoutBlock.for_text('Reference Title 1')),
         SemanticRawReferenceText(
             layout_block=LayoutBlock.for_text('Reference 1'))
     ])
     semantic_ref.content_id = 'b0'
     semantic_document.back_section.add_content(
         SemanticReferenceList([
             SemanticHeading(
                 layout_block=LayoutBlock.for_text('References')),
             semantic_ref
         ]))
     tei_document = get_tei_for_semantic_document(semantic_document)
     LOGGER.debug('tei xml: %r', etree.tostring(tei_document.root))
     assert tei_document.get_xpath_text_content_list(
         '//tei:back/tei:div[@type="references"]/tei:listBibl/tei:head'
     ) == ['References']
     assert tei_document.get_xpath_text_content_list(
         '//tei:back/tei:div[@type="references"]/tei:listBibl'
         '/tei:biblStruct/tei:analytic/tei:title[@type="main"]') == [
             'Reference Title 1'
         ]
     assert tei_document.get_xpath_text_content_list(
         '//tei:back/tei:div[@type="references"]/tei:listBibl'
         '/tei:biblStruct/tei:note[@type="raw_reference"]') == [
             'Reference 1'
         ]
     assert tei_document.get_xpath_text_content_list(
         '//tei:back/tei:div[@type="references"]/tei:listBibl'
         '/tei:biblStruct/@xml:id') == ['b0']
 def test_should_add_section_figures_to_back(self):
     semantic_document = SemanticDocument()
     semantic_document.back_section.add_content(
         SemanticSection([
             SemanticFigure([
                 SemanticLabel(
                     layout_block=LayoutBlock.for_text('Label 1')),
                 SemanticCaption(
                     layout_block=LayoutBlock.for_text('Caption 1'))
             ],
                            content_id='fig_0')
         ]))
     tei_document = get_tei_for_semantic_document(semantic_document)
     LOGGER.debug('tei xml: %r', etree.tostring(tei_document.root))
     figure_xpath = (
         '//tei:back/tei:div[@type="annex"]/tei:figure[not(contains(@type, "table"))]'
     )
     assert tei_document.get_xpath_text_content_list(
         f'{figure_xpath}/tei:head') == ['Label 1']
     assert tei_document.get_xpath_text_content_list(
         f'{figure_xpath}/tei:label') == ['Label 1']
     assert tei_document.get_xpath_text_content_list(
         f'{figure_xpath}/tei:figDesc') == ['Caption 1']
     assert tei_document.get_xpath_text_content_list(
         f'{figure_xpath}/@xml:id') == ['fig_0']
     assert not tei_document.xpath(
         '//tei:back/tei:div[@type="annex"]/tei:div')
 def test_should_add_single_author(self):
     semantic_document = SemanticDocument()
     title = SemanticNameTitle(layout_block=LayoutBlock.for_text('Title1'))
     given_name = SemanticGivenName(
         layout_block=LayoutBlock.for_text('Given1'))
     middle_name = SemanticMiddleName(
         layout_block=LayoutBlock.for_text('Middle1'))
     surname = SemanticSurname(
         layout_block=LayoutBlock.for_text('Surname1'))
     suffix = SemanticNameSuffix(
         layout_block=LayoutBlock.for_text('Suffix1'))
     author = SemanticAuthor(
         [title, given_name, middle_name, surname, suffix])
     semantic_document.front.add_content(author)
     tei_document = get_tei_for_semantic_document(semantic_document)
     LOGGER.debug('tei xml: %r', etree.tostring(tei_document.root))
     assert tei_document.get_xpath_text_content_list(
         '//tei:author//tei:roleName') == ['Title1']
     assert tei_document.get_xpath_text_content_list(
         '//tei:author//tei:forename[@type="first"]') == ['Given1']
     assert tei_document.get_xpath_text_content_list(
         '//tei:author//tei:forename[@type="middle"]') == ['Middle1']
     assert tei_document.get_xpath_text_content_list(
         '//tei:author//tei:surname') == ['Surname1']
     assert tei_document.get_xpath_text_content_list(
         '//tei:author//tei:genName') == ['Suffix1']
 def test_should_add_asset_citation_for_resolved_reference(self):
     semantic_document = SemanticDocument()
     semantic_document.body_section.add_content(
         SemanticSection([
             SemanticParagraph([
                 SemanticTextContentWrapper(
                     layout_block=LayoutBlock.for_text('See')),
                 SemanticReferenceCitation(
                     layout_block=LayoutBlock.for_text('Ref 1'),
                     target_content_id='b0')
             ]),
             SemanticReferenceList([
                 SemanticReference([
                     SemanticLabel(layout_block=LayoutBlock.for_text('1'))
                 ],
                                   content_id='b0')
             ])
         ]))
     tei_document = get_tei_for_semantic_document(semantic_document)
     LOGGER.debug('tei xml: %r', etree.tostring(tei_document.root))
     assert tei_document.get_xpath_text_content_list(
         '//tei:body/tei:div/tei:p') == ['See Ref 1']
     assert tei_document.get_xpath_text_content_list(
         '//tei:body/tei:div/tei:p/tei:ref[@type="bibr"]') == ['Ref 1']
     assert tei_document.get_xpath_text_content_list(
         '//tei:body/tei:div/tei:p/tei:ref[@type="bibr"]/@target') == [
             '#b0'
         ]
 def test_should_set_abstract(self):
     semantic_document = SemanticDocument()
     semantic_document.front.add_content(
         SemanticAbstract(LayoutBlock.for_text(TOKEN_1)))
     tei_document = get_tei_for_semantic_document(semantic_document)
     LOGGER.debug('tei xml: %r', etree.tostring(tei_document.root))
     assert tei_document.get_xpath_text_content_list(
         '//tei:abstract/tei:p') == [TOKEN_1]
 def test_should_add_notes_to_body(self):
     semantic_document = SemanticDocument()
     semantic_document.body_section.add_note(LayoutBlock.for_text(TOKEN_1),
                                             'other')
     tei_document = get_tei_for_semantic_document(semantic_document)
     LOGGER.debug('tei xml: %r', etree.tostring(tei_document.root))
     assert tei_document.get_xpath_text_content_list(
         '//tei:body/tei:note[@type="other"]') == [TOKEN_1]
 def test_should_set_manuscript_title(self):
     semantic_document = SemanticDocument()
     semantic_document.front.add_content(
         SemanticTitle(layout_block=LayoutBlock.for_text(TOKEN_1)))
     tei_document = get_tei_for_semantic_document(semantic_document)
     LOGGER.debug('tei xml: %r', etree.tostring(tei_document.root))
     assert tei_document.get_xpath_text_content_list(
         '//tei:fileDesc/tei:titleStmt/tei:title[@level="a"][@type="main"]'
     ) == [TOKEN_1]
 def get_tei_document_for_layout_document(
     self,
     layout_document: LayoutDocument
 ) -> TeiDocument:
     return get_tei_for_semantic_document(
         self.get_semantic_document_for_layout_document(
             layout_document
         )
     )
 def test_should_create_back_section(self):
     semantic_document = SemanticDocument()
     section = semantic_document.back_section.add_new_section()
     section.add_heading_block(LayoutBlock.for_text(TOKEN_1))
     paragraph = section.add_new_paragraph()
     paragraph.add_block_content(LayoutBlock.for_text(TOKEN_2))
     tei_document = get_tei_for_semantic_document(semantic_document)
     LOGGER.debug('tei xml: %r', etree.tostring(tei_document.root))
     assert tei_document.get_xpath_text_content_list(
         '//tei:back/tei:div[@type="annex"]/tei:div/tei:head') == [TOKEN_1]
     assert tei_document.get_xpath_text_content_list(
         '//tei:back/tei:div[@type="annex"]/tei:div/tei:p') == [TOKEN_2]
 def test_should_unmatched_graphics_to_back(self):
     semantic_document = SemanticDocument()
     semantic_document.back_section.add_content(
         SemanticMixedNote([
             SemanticGraphic(
                 layout_graphic=LayoutGraphic(coordinates=COORDINATES_1),
                 relative_path='image1.svg')
         ],
                           note_type='unmatched_graphics'))
     tei_document = get_tei_for_semantic_document(semantic_document)
     LOGGER.debug('tei xml: %r', etree.tostring(tei_document.root))
     graphics_xpath = '//tei:note[@type="unmatched_graphics"]//tei:graphic'
     assert tei_document.xpath_nodes(graphics_xpath)
     assert tei_document.get_xpath_text_content_list(
         f'{graphics_xpath}/@url') == ['image1.svg']
 def test_should_add_orphan_affiliation(self):
     semantic_document = SemanticDocument()
     aff_marker = SemanticMarker(layout_block=LayoutBlock.for_text('1'))
     institution = SemanticInstitution(
         layout_block=LayoutBlock.for_text('Institution1'))
     aff = SemanticAffiliationAddress([aff_marker, institution],
                                      content_id='aff0')
     semantic_document.front.add_content(aff)
     tei_document = get_tei_for_semantic_document(semantic_document)
     LOGGER.debug('tei xml: %r', etree.tostring(tei_document.root))
     assert tei_document.get_xpath_text_content_list(
         '//tei:author/tei:affiliation/tei:note[@type="raw_affiliation"]'
     ) == [aff.get_text()]
     assert tei_document.get_xpath_text_content_list(
         '//tei:author/tei:affiliation/tei:note[@type="raw_affiliation"]/tei:label'
     ) == [aff_marker.get_text()]
     assert tei_document.get_xpath_text_content_list(
         '//tei:author/tei:affiliation/@key') == ['aff0']
 def test_should_add_single_author_with_affiliation(self):
     semantic_document = SemanticDocument()
     title = SemanticNameTitle(layout_block=LayoutBlock.for_text('Title1'))
     given_name = SemanticGivenName(
         layout_block=LayoutBlock.for_text('Given1'))
     middle_name = SemanticMiddleName(
         layout_block=LayoutBlock.for_text('Middle1'))
     surname = SemanticSurname(
         layout_block=LayoutBlock.for_text('Surname1'))
     suffix = SemanticNameSuffix(
         layout_block=LayoutBlock.for_text('Suffix1'))
     author_marker = SemanticMarker(layout_block=LayoutBlock.for_text('1'))
     author = SemanticAuthor(
         [title, given_name, middle_name, surname, suffix, author_marker])
     aff_marker = SemanticMarker(layout_block=LayoutBlock.for_text('1'))
     institution = SemanticInstitution(
         layout_block=LayoutBlock.for_text('Institution1'))
     aff = SemanticAffiliationAddress([aff_marker, institution],
                                      content_id='aff0')
     semantic_document.front.add_content(author)
     semantic_document.front.add_content(aff)
     tei_document = get_tei_for_semantic_document(semantic_document)
     LOGGER.debug('tei xml: %r', etree.tostring(tei_document.root))
     assert tei_document.get_xpath_text_content_list(
         '//tei:author//tei:roleName') == ['Title1']
     assert tei_document.get_xpath_text_content_list(
         '//tei:author//tei:forename[@type="first"]') == ['Given1']
     assert tei_document.get_xpath_text_content_list(
         '//tei:author//tei:forename[@type="middle"]') == ['Middle1']
     assert tei_document.get_xpath_text_content_list(
         '//tei:author//tei:surname') == ['Surname1']
     assert tei_document.get_xpath_text_content_list(
         '//tei:author//tei:genName') == ['Suffix1']
     assert tei_document.get_xpath_text_content_list(
         '//tei:author/tei:affiliation/tei:note[@type="raw_affiliation"]'
     ) == [aff.get_text()]
     assert tei_document.get_xpath_text_content_list(
         '//tei:author/tei:affiliation/tei:note[@type="raw_affiliation"]/tei:label'
     ) == [aff_marker.get_text()]
     assert tei_document.get_xpath_text_content_list(
         '//tei:author/tei:affiliation/@key') == ['aff0']
     assert tei_document.get_xpath_text_content_list(
         '//tei:author/tei:affiliation/tei:orgName[@type="institution"]'
     ) == [institution.get_text()]
 def test_should_use_author_name_part_values(self):
     semantic_document = SemanticDocument()
     given_name = SemanticGivenName(
         layout_block=LayoutBlock.for_text('GIVEN1'))
     given_name.value = 'Given1'
     middle_name = SemanticMiddleName(
         layout_block=LayoutBlock.for_text('MIDDLE1'))
     middle_name.value = 'Middle1'
     surname = SemanticSurname(
         layout_block=LayoutBlock.for_text('SURNAME1'))
     surname.value = 'Surname1'
     author = SemanticAuthor([given_name, middle_name, surname])
     semantic_document.front.add_content(author)
     tei_document = get_tei_for_semantic_document(semantic_document)
     LOGGER.debug('tei xml: %r', etree.tostring(tei_document.root))
     assert tei_document.get_xpath_text_content_list(
         '//tei:author//tei:forename[@type="first"]') == ['Given1']
     assert tei_document.get_xpath_text_content_list(
         '//tei:author//tei:forename[@type="middle"]') == ['Middle1']
     assert tei_document.get_xpath_text_content_list(
         '//tei:author//tei:surname') == ['Surname1']
Exemple #14
0
 def get_local_file_for_response_media_type(
         self, response_media_type: str) -> str:
     if response_media_type not in self.get_supported_response_media_type():
         raise UnsupportedResponseMediaTypeScienceBeamParserError()
     tei_document = get_tei_for_semantic_document(self.semantic_document)
     xml_root = tei_document.root
     relative_xml_filename = 'tei.xml'
     if response_media_type in JATS_MEDIA_TYPES:
         xml_root = self._get_tei_to_jats_xml_root(xml_root)
         relative_xml_filename = 'jats.xml'
     local_xml_filename = os.path.join(self.temp_dir, relative_xml_filename)
     self._serialize_xml_to_file(xml_root, local_xml_filename)
     LOGGER.debug('local_xml_filename: %r', local_xml_filename)
     if response_media_type in ASSET_ZIP_MEDIA_TYPES:
         zip_filename = os.path.join(self.temp_dir, 'results.zip')
         create_asset_zip_for_semantic_document(
             zip_filename,
             semantic_document=self.semantic_document,
             local_xml_filename=local_xml_filename,
             relative_xml_filename=relative_xml_filename)
         return zip_filename
     return local_xml_filename
 def test_should_add_raw_equation_with_label_to_paragraph(self):
     # to be consistent with Java GROBID
     semantic_document = SemanticDocument()
     semantic_document.body_section.add_content(
         SemanticSection([
             SemanticParagraph([
                 SemanticTextContentWrapper(
                     layout_block=LayoutBlock.for_text('Next')),
                 SemanticRawEquation([
                     SemanticRawEquationContent(
                         layout_block=LayoutBlock.for_text('Equation 1')),
                     SemanticLabel(layout_block=LayoutBlock.for_text('(1)'))
                 ])
             ]),
         ]))
     tei_document = get_tei_for_semantic_document(semantic_document)
     LOGGER.debug('tei xml: %r', etree.tostring(tei_document.root))
     assert tei_document.get_xpath_text_content_list(
         '//tei:body/tei:div/tei:formula') == ['Equation 1 (1)']
     assert tei_document.get_xpath_text_content_list(
         '//tei:body/tei:div/tei:formula/tei:label') == ['(1)']
     assert tei_document.get_xpath_text_content_list(
         '//tei:body/tei:div/tei:p') == ['Next']
 def test_should_add_section_tables_to_body(self):
     semantic_document = SemanticDocument()
     semantic_document.body_section.add_content(
         SemanticSection([
             SemanticTable([
                 SemanticLabel(
                     layout_block=LayoutBlock.for_text('Label 1')),
                 SemanticCaption(
                     layout_block=LayoutBlock.for_text('Caption 1'))
             ],
                           content_id='tab_0')
         ]))
     tei_document = get_tei_for_semantic_document(semantic_document)
     LOGGER.debug('tei xml: %r', etree.tostring(tei_document.root))
     table_xpath = '//tei:body/tei:figure[@type="table"]'
     assert tei_document.get_xpath_text_content_list(
         f'{table_xpath}/tei:head') == ['Label 1']
     assert tei_document.get_xpath_text_content_list(
         f'{table_xpath}/tei:label') == ['Label 1']
     assert tei_document.get_xpath_text_content_list(
         f'{table_xpath}/tei:figDesc') == ['Caption 1']
     assert tei_document.get_xpath_text_content_list(
         f'{table_xpath}/@xml:id') == ['tab_0']
     assert not tei_document.xpath('//tei:body/tei:div')
 def test_should_return_empty_document(self):
     semantic_document = SemanticDocument()
     tei_document = get_tei_for_semantic_document(semantic_document)
     assert not tei_document.xpath('//tei:div')