def test_should_ignore_sub_fields_if_excluded(
            self, test_helper: SingleFileAutoAnnotateEndToEndTestHelper,
            segment_affiliation: bool, sub_fields: str, expected_country: str):
        target_jats_xml = etree.tostring(
            get_target_xml_node(affiliation_nodes=[
                E.aff(E.label(LABEL_1), ' ', E.country(COUNTRY_1)),
            ]))
        test_helper.tei_raw_file_path.write_bytes(
            etree.tostring(
                get_affiliation_tei_node(
                    [TEI_E.affiliation(TEI_E.other(LABEL_1), ' ',
                                       COUNTRY_1)])))
        LOGGER.debug('target_jats_xml: %s', target_jats_xml)
        test_helper.xml_file_path.write_bytes(target_jats_xml)
        main(dict_to_args({
            **test_helper.main_args_dict, 'matcher': 'simple',
            'segment-affiliation': segment_affiliation,
            'fields': 'author_aff',
            'sub-fields': sub_fields,
            'preserve-sub-tags': True,
            'no-preserve-sub-fields': 'author_aff-label'
        }),
             save_main_session=False)

        tei_auto_root = test_helper.get_tei_auto_root()
        first_aff = get_first_affiliation(tei_auto_root)
        assert get_tei_xpath_text(first_aff, './tei:marker') == LABEL_1
        assert get_tei_xpath_text(
            first_aff, './tei:address/tei:country') == expected_country
コード例 #2
0
    def test_should_auto_annotate_single_figure_with_label_and_caption(
            self, test_helper: SingleFileAutoAnnotateEndToEndTestHelper):
        target_figure_content_nodes = [
            E.label(LABEL_1),
            ' ',
            E.caption(E.p(TEXT_1))
        ]
        target_jats_xml = etree.tostring(
            get_target_xml_node(body_nodes=[
                E.fig(*target_figure_content_nodes),
            ])
        )
        test_helper.tei_raw_file_path.write_bytes(etree.tostring(
            get_training_tei_node([
                E.figure(get_nodes_text(target_figure_content_nodes))
            ])
        ))
        LOGGER.debug('target_jats_xml: %s', target_jats_xml)
        test_helper.xml_file_path.write_bytes(target_jats_xml)
        main(dict_to_args({
            **test_helper.main_args_dict,
            'matcher': 'simple',
            'fields': 'figure'
        }), save_main_session=False)

        tei_auto_root = test_helper.get_tei_auto_root()
        first_figure = get_first_figure(tei_auto_root)
        assert get_tei_xpath_text(first_figure, './/label') == (
            LABEL_1
        )
        assert get_tei_xpath_text(first_figure, './figDesc') == (
            TEXT_1
        )
    def test_should_not_preserve_existing_sub_tags_if_provided_by_excluded(
            self, test_helper: SingleFileAutoAnnotateEndToEndTestHelper,
            segment_affiliation: bool):
        target_jats_xml = etree.tostring(
            get_target_xml_node(affiliation_nodes=[
                E.aff(E.label(LABEL_1), ' ' + LABEL_2 + ' ' + TEXT_1),
            ]))
        test_helper.tei_raw_file_path.write_bytes(
            etree.tostring(
                get_affiliation_tei_node([
                    TEI_E.affiliation(TEI_E.other(LABEL_1), ' ',
                                      TEI_E.marker(LABEL_2), ' ',
                                      TEI_E.preserved(TEXT_1))
                ])))
        LOGGER.debug('target_jats_xml: %s', target_jats_xml)
        test_helper.xml_file_path.write_bytes(target_jats_xml)
        main(dict_to_args({
            **test_helper.main_args_dict, 'matcher': 'simple',
            'segment-affiliation': segment_affiliation,
            'fields': 'author_aff',
            'preserve-sub-tags': True,
            'no-preserve-sub-fields': 'author_aff-label'
        }),
             save_main_session=False)

        tei_auto_root = test_helper.get_tei_auto_root()
        first_aff = get_first_affiliation(tei_auto_root)
        assert get_tei_xpath_text(first_aff, './tei:marker') == LABEL_1
        assert get_tei_xpath_text(first_aff, './tei:preserved') == TEXT_1
    def test_should_group_preserved_and_auto_annotated_address_fields(
            self, test_helper: SingleFileAutoAnnotateEndToEndTestHelper):
        target_jats_xml = etree.tostring(
            get_target_xml_node(affiliation_nodes=[
                E.aff(E.label(LABEL_1), TEXT_1 + ' ' + CITY_1 +
                      ', ', E.country(COUNTRY_1)),
            ]))
        test_helper.tei_raw_file_path.write_bytes(
            etree.tostring(
                get_affiliation_tei_node([
                    TEI_E.affiliation(
                        TEI_E.marker(LABEL_1), ' ' + TEXT_1 + ' ',
                        TEI_E.address(TEI_E.settlement(CITY_1), ', ',
                                      TEI_E.country(COUNTRY_1)))
                ])))
        LOGGER.debug('target_jats_xml: %s', target_jats_xml)
        test_helper.xml_file_path.write_bytes(target_jats_xml)
        main(dict_to_args({
            **test_helper.main_args_dict, 'matcher': 'simple',
            'segment-affiliation': True,
            'preserve-sub-tags': True,
            'no-preserve-sub-fields': 'author_aff-label,author_aff-country',
            'fields': 'author_aff'
        }),
             save_main_session=False)

        tei_auto_root = test_helper.get_tei_auto_root()
        tei_auto_aff = get_first_affiliation(tei_auto_root)
        assert get_tei_xpath_text(tei_auto_aff, './tei:marker') == LABEL_1
        assert get_tei_xpath_text(tei_auto_aff,
                                  './tei:address/tei:settlement') == CITY_1
        assert get_tei_xpath_text(tei_auto_aff,
                                  './tei:address/tei:country') == COUNTRY_1
        assert get_tei_xpath_text(tei_auto_aff, './tei:address',
                                  delimiter='|') == (CITY_1 + ', ' + COUNTRY_1)
    def test_should_preserve_existing_sub_tags(
            self, test_helper: SingleFileAutoAnnotateEndToEndTestHelper,
            segment_affiliation: bool):
        untagged_text = ' some affiliation '
        other1_sub_tag_text = 'other-sub-tag1'
        other2_sub_tag_text = 'other-sub-tag2'
        target_jats_xml = etree.tostring(
            get_target_xml_node(affiliation_nodes=[
                E.aff(other1_sub_tag_text + LABEL_1 + untagged_text +
                      other2_sub_tag_text),
            ]))
        test_helper.tei_raw_file_path.write_bytes(
            etree.tostring(
                get_affiliation_tei_node([
                    TEI_E.affiliation(TEI_E.other1(other1_sub_tag_text),
                                      TEI_E.marker(LABEL_1),
                                      get_nodes_text([untagged_text]),
                                      TEI_E.other2(other2_sub_tag_text))
                ])))
        LOGGER.debug('target_jats_xml: %s', target_jats_xml)
        test_helper.xml_file_path.write_bytes(target_jats_xml)
        main(dict_to_args({
            **test_helper.main_args_dict, 'matcher': 'simple',
            'segment-affiliation': segment_affiliation,
            'fields': 'author_aff',
            'preserve-sub-tags': True
        }),
             save_main_session=False)

        tei_auto_root = test_helper.get_tei_auto_root()
        first_aff = get_first_affiliation(tei_auto_root)
        assert get_tei_xpath_text(first_aff, './tei:marker') == LABEL_1
        assert get_tei_xpath_text(first_aff,
                                  './tei:other1') == other1_sub_tag_text
        assert get_tei_xpath_text(first_aff,
                                  './tei:other2') == other2_sub_tag_text
    def test_should_auto_annotate_single_affiliation_with_all_supported_fields(
            self, test_helper: SingleFileAutoAnnotateEndToEndTestHelper):
        target_reference_content_nodes = [
            E('label', LABEL_1), ' ',
            E('addr-line',
              E('named-content', {'content-type': 'department'},
                DEPARTMENT_1)), ' ',
            E.institution(INSTITUTION_1), ' ',
            E('addr-line', E('named-content', {'content-type': 'city'},
                             CITY_1)), ' ',
            E('addr-line',
              E('named-content', {'content-type': 'postcode'}, POST_CODE_1)),
            ' ',
            E('addr-line',
              E('named-content', {'content-type': 'state'}, STATE_1)), ' ',
            E.country(COUNTRY_1)
        ]
        target_jats_xml = etree.tostring(
            get_target_xml_node(affiliation_nodes=[
                E.aff(*target_reference_content_nodes),
            ]))
        test_helper.tei_raw_file_path.write_bytes(
            etree.tostring(
                get_affiliation_tei_node([
                    TEI_E.affiliation(
                        get_nodes_text(target_reference_content_nodes))
                ])))
        LOGGER.debug('target_jats_xml: %s', target_jats_xml)
        test_helper.xml_file_path.write_bytes(target_jats_xml)
        main(dict_to_args({
            **test_helper.main_args_dict, 'matcher': 'simple',
            'fields': 'author_aff'
        }),
             save_main_session=False)

        tei_auto_root = test_helper.get_tei_auto_root()
        first_aff = get_first_affiliation(tei_auto_root)
        assert get_tei_xpath_text(first_aff, './tei:marker') == LABEL_1
        assert get_tei_xpath_text(
            first_aff, './tei:orgName[@type="department"]') == (DEPARTMENT_1)
        assert get_tei_xpath_text(
            first_aff, './tei:orgName[@type="institution"]') == (INSTITUTION_1)
        assert get_tei_xpath_text(first_aff,
                                  './tei:address/tei:settlement') == CITY_1
        assert get_tei_xpath_text(first_aff,
                                  './tei:address/tei:postCode') == POST_CODE_1
        assert get_tei_xpath_text(first_aff,
                                  './tei:address/tei:region') == STATE_1
        assert get_tei_xpath_text(first_aff,
                                  './tei:address/tei:country') == COUNTRY_1
    def test_should_auto_annotate_single_affiliation_with_single_field(
            self, test_helper: SingleFileAutoAnnotateEndToEndTestHelper):
        target_reference_content_nodes = [E('label', LABEL_1), ' Some text']
        target_jats_xml = etree.tostring(
            get_target_xml_node(affiliation_nodes=[
                E.aff(*target_reference_content_nodes),
            ]))
        test_helper.tei_raw_file_path.write_bytes(
            etree.tostring(
                get_affiliation_tei_node([
                    TEI_E.affiliation(
                        get_nodes_text(target_reference_content_nodes))
                ])))
        LOGGER.debug('target_jats_xml: %s', target_jats_xml)
        test_helper.xml_file_path.write_bytes(target_jats_xml)
        main(dict_to_args({
            **test_helper.main_args_dict, 'matcher': 'simple',
            'fields': 'author_aff'
        }),
             save_main_session=False)

        tei_auto_root = test_helper.get_tei_auto_root()
        first_aff = get_first_affiliation(tei_auto_root)
        assert get_tei_xpath_text(first_aff, './tei:marker') == (LABEL_1)