def test_should_replace_affiliation_with_author_if_single_tokens( self, test_helper: SingleFileAutoAnnotateEndToEndTestHelper): author_text = 'Mary Maison 1, John Smith 1' affiliation_text = '1 University of Science, Smithonia' test_helper.tei_raw_file_path.write_bytes( etree.tostring( get_header_tei_node([ E.note(author_text), E.lb(), E.note(affiliation_text), E.lb() ]))) test_helper.xml_file_path.write_bytes( etree.tostring( get_target_xml_node(author_nodes=[ E.contrib( E.name(E.surname('Maison'), E('given-names', 'Mary'))), E.contrib( E.name(E.surname('Smith'), E('given-names', 'John'))), E.aff(E.label('1'), E.institution('University of Science'), E.country('Smithonia')) ]))) main(dict_to_args({ **test_helper.main_args_dict, 'fields': ','.join(['title', 'author', 'author_aff', 'abstract']), 'matcher': 'simple' }), save_main_session=False) tei_auto_root = test_helper.get_tei_auto_root() assert get_xpath_text(tei_auto_root, '//byline/docAuthor') == author_text assert get_xpath_text(tei_auto_root, '//byline/affiliation') == affiliation_text
def test_should_auto_annotate_title( self, test_helper: SingleFileAutoAnnotateEndToEndTestHelper): test_helper.tei_raw_file_path.write_bytes( etree.tostring(get_header_tei_node([E.note(TEXT_1)]))) test_helper.xml_file_path.write_bytes( etree.tostring(get_target_xml_node(title=TEXT_1))) main([*test_helper.main_args], save_main_session=False) tei_auto_root = test_helper.get_tei_auto_root() assert get_xpath_text(tei_auto_root, '//docTitle/titlePart') == TEXT_1
def test_should_auto_annotate_affiliation_preceding_number_using_simple_matcher( self, test_helper: SingleFileAutoAnnotateEndToEndTestHelper): author_text = 'Mary Maison 1, John Smith 1' affiliation_text_1 = '1' affiliation_text_2 = 'University of Science, Smithonia' affiliation_text = ' '.join([affiliation_text_1, affiliation_text_2]) test_helper.tei_raw_file_path.write_bytes( etree.tostring( get_header_tei_node([ E.note(TITLE_1), E.lb(), E.note(author_text), E.lb(), E.note(affiliation_text_1), E.lb(), E.note(affiliation_text_2), E.lb(), E.note(ABSTRACT_PREFIX_1, E.lb(), ABSTRACT_1) ]))) test_helper.xml_file_path.write_bytes( etree.tostring( get_target_xml_node( title=TITLE_1, author_nodes=[ E.contrib( E.name(E.surname('Maison'), E('given-names', 'Mary'))), E.contrib( E.name(E.surname('Smith'), E('given-names', 'John'))), E.aff(E.institution('University of Science'), E.country('Smithonia')) ], abstract_node=E.abstract(E.p(ABSTRACT_1))))) main(dict_to_args({ **test_helper.main_args_dict, 'fields': ','.join(['title', 'author', 'author_aff', 'abstract']), 'matcher': 'simple' }), save_main_session=False) tei_auto_root = test_helper.get_tei_auto_root() assert get_xpath_text(tei_auto_root, '//docTitle/titlePart') == TITLE_1 assert get_xpath_text(tei_auto_root, '//byline/docAuthor') == author_text assert get_xpath_text(tei_auto_root, '//byline/affiliation') == affiliation_text assert get_xpath_text( tei_auto_root, '//div[@type="abstract"]') == (ABSTRACT_PREFIX_1 + ABSTRACT_1)
def test_should_extend_title_annotation_to_whole_line( self, test_helper: SingleFileAutoAnnotateEndToEndTestHelper): title_text = 'Chocolate bars for mice' test_helper.tei_raw_file_path.write_bytes( etree.tostring( get_header_tei_node([E.note('Title: ' + title_text)]))) test_helper.xml_file_path.write_bytes( etree.tostring(get_target_xml_node(title=title_text))) main([*test_helper.main_args, '--matcher=simple'], save_main_session=False) tei_auto_root = test_helper.get_tei_auto_root() assert get_xpath_text(tei_auto_root, '//docTitle/titlePart') == title_text
def test_should_skip_errors( self, test_helper: SingleFileAutoAnnotateEndToEndTestHelper): tei_raw_other_file_path = test_helper.tei_raw_path.joinpath( 'document0.header.tei.xml') tei_raw_other_file_path.write_bytes( etree.tostring(get_header_tei_node([E.note(TEXT_1)]))) xml_other_file_path = test_helper.xml_path.joinpath('document0.xml') xml_other_file_path.write_bytes( etree.tostring(get_target_xml_node(title=TEXT_1)) + b'error') test_helper.tei_raw_file_path.write_bytes( etree.tostring(get_header_tei_node([E.note(TEXT_1)]))) test_helper.xml_file_path.write_bytes( etree.tostring(get_target_xml_node(title=TEXT_1))) main([*test_helper.main_args, '--matcher=simple', '--skip-errors'], save_main_session=False) tei_auto_root = test_helper.get_tei_auto_root() assert get_xpath_text(tei_auto_root, '//docTitle/titlePart') == TEXT_1
def test_should_filter_out_xml_if_selected_fields_are_not_matching( self, test_helper: SingleFileAutoAnnotateEndToEndTestHelper, actual_abstract: str, expected_abstract: str, expected_match: bool, required_fields: str, relative_failed_output_path: str, temp_dir: Path): test_helper.tei_raw_file_path.write_bytes( etree.tostring( get_header_tei_node([ E.note(TITLE_1), E.lb(), E.note(ABSTRACT_PREFIX_1, E.lb(), actual_abstract) ]))) test_helper.xml_file_path.write_bytes( etree.tostring( get_target_xml_node( title=TITLE_1, abstract_node=(E.abstract(E.p(expected_abstract)) if expected_abstract else None)))) failed_output_path: str = (str(temp_dir / relative_failed_output_path) if relative_failed_output_path else '') main(dict_to_args({ **test_helper.main_args_dict, 'fields': ','.join(['title', 'author', 'author_aff', 'abstract']), 'require-matching-fields': ','.join(['abstract']), 'required-fields': required_fields, 'failed-output-path': failed_output_path, 'matcher': 'simple' }), save_main_session=False) if not expected_match: assert not test_helper.tei_auto_file_path.exists() if failed_output_path: assert (Path(failed_output_path) / test_helper.tei_auto_file_path.name).exists() else: assert test_helper.tei_auto_file_path.exists()