Ejemplo n.º 1
0
    def test_should_auto_annotate_single_figure_with_label_and_caption(
            self, test_helper: SingleFileAutoAnnotateEndToEndTestHelper):
        target_figure_content_nodes = [
            E.label(LABEL_1),
            ' ',
            E.caption(E.p(TEXT_1))
        ]
        target_jats_xml = etree.tostring(
            get_target_xml_node(body_nodes=[
                E.fig(*target_figure_content_nodes),
            ])
        )
        test_helper.tei_raw_file_path.write_bytes(etree.tostring(
            get_training_tei_node([
                E.figure(get_nodes_text(target_figure_content_nodes))
            ])
        ))
        LOGGER.debug('target_jats_xml: %s', target_jats_xml)
        test_helper.xml_file_path.write_bytes(target_jats_xml)
        main(dict_to_args({
            **test_helper.main_args_dict,
            'matcher': 'simple',
            'fields': 'figure'
        }), save_main_session=False)

        tei_auto_root = test_helper.get_tei_auto_root()
        first_figure = get_first_figure(tei_auto_root)
        assert get_tei_xpath_text(first_figure, './/label') == (
            LABEL_1
        )
        assert get_tei_xpath_text(first_figure, './figDesc') == (
            TEXT_1
        )
Ejemplo n.º 2
0
    def test_should_tolerate_invalid_closing_content_element(
            self, test_helper: SingleFileAutoAnnotateEndToEndTestHelper):
        target_figure_content_nodes_1 = [
            E.label(LABEL_1),
            ' ',
            E.caption(E.p(TEXT_1))
        ]
        target_figure_content_nodes_2 = [
            E.label(LABEL_2),
            ' ',
            E.caption(E.p(TEXT_2))
        ]
        target_jats_xml = etree.tostring(
            get_target_xml_node(body_nodes=[
                E.fig(*target_figure_content_nodes_1),
                E.fig(*target_figure_content_nodes_2)
            ])
        )
        test_helper.tei_raw_file_path.write_text(''.join([
            '<tei><text>',
            '<figure>',
            get_nodes_text(target_figure_content_nodes_1),
            '</content>',
            '</figure>',
            '<figure>',
            get_nodes_text(target_figure_content_nodes_2),
            '</content>',
            '</figure>',
            '</text></tei>'
        ]))
        LOGGER.debug('target_jats_xml: %s', target_jats_xml)
        test_helper.xml_file_path.write_bytes(target_jats_xml)
        main(dict_to_args({
            **test_helper.main_args_dict,
            'matcher': 'simple',
            'fields': 'figure'
        }), save_main_session=False)

        tei_auto_root = test_helper.get_tei_auto_root()
        assert get_tei_xpath_text_list(tei_auto_root, './/figure//label') == [
            LABEL_1, LABEL_2
        ]
        assert get_tei_xpath_text_list(tei_auto_root, './/figure/figDesc') == [
            TEXT_1, TEXT_2
        ]
Ejemplo n.º 3
0
    def test_should_segment_figures_if_enabled(
            self, test_helper: SingleFileAutoAnnotateEndToEndTestHelper):
        target_figure_content_nodes_1 = [
            E.label(LABEL_1),
            ' ',
            E.caption(E.p(TEXT_1))
        ]
        target_figure_content_nodes_2 = [
            E.label(LABEL_2),
            ' ',
            E.caption(E.p(TEXT_2))
        ]
        target_jats_xml = etree.tostring(
            get_target_xml_node(body_nodes=[
                E.fig(*target_figure_content_nodes_1),
                E.fig(*target_figure_content_nodes_2)
            ])
        )
        test_helper.tei_raw_file_path.write_bytes(etree.tostring(
            get_training_tei_node([
                E.figure(get_nodes_text(
                    target_figure_content_nodes_1
                    + [' ']
                    + target_figure_content_nodes_2
                ))
            ])
        ))
        LOGGER.debug('target_jats_xml: %s', target_jats_xml)
        test_helper.xml_file_path.write_bytes(target_jats_xml)
        main(dict_to_args({
            **test_helper.main_args_dict,
            'matcher': 'simple',
            'segment-figures': True,
            'fields': 'figure'
        }), save_main_session=False)

        tei_auto_root = test_helper.get_tei_auto_root()
        assert get_text_content_list(get_all_figures(tei_auto_root)) == [
            get_nodes_text(target_figure_content_nodes_1),
            get_nodes_text(target_figure_content_nodes_2)
        ]