def test_should_remove_doi_prefix_from_doi(self): original_ref = get_jats_mixed_ref('some text', get_jats_doi_element('doi:' + DOI_1)) fixed_ref = fix_reference(clone_node(original_ref)) fixed_doi = '|'.join( get_text_content_list(fixed_ref.xpath(JatsXpaths.DOI))) assert fixed_doi == DOI_1
def test_should_remove_duplicate_doi_with_tail(self): original_ref = get_jats_mixed_ref( 'doi: ', get_jats_doi_element(DOI_1 + '; ' + DOI_1), 'tail text') fixed_ref = fix_reference(clone_node(original_ref)) fixed_doi = '|'.join( get_text_content_list(fixed_ref.xpath(JatsXpaths.DOI))) assert fixed_doi == DOI_1
def test_should_remove_doi_suffix_from_doi_without_tail(self): original_ref = get_jats_mixed_ref( 'doi: ', get_jats_doi_element(DOI_1 + ' [doi]')) fixed_ref = fix_reference(clone_node(original_ref)) fixed_doi = '|'.join( get_text_content_list(fixed_ref.xpath(JatsXpaths.DOI))) assert fixed_doi == DOI_1
def test_should_remove_doi_prefix_after_preceeding_element_with_tail_text( self): original_ref = get_jats_mixed_ref(E.other('other text'), 'tail text', get_jats_doi_element('doi:' + DOI_1)) fixed_ref = fix_reference(clone_node(original_ref)) fixed_doi = '|'.join( get_text_content_list(fixed_ref.xpath(JatsXpaths.DOI))) assert fixed_doi == DOI_1
def test_should_convert_doi_with_outside_url_prefix_to_ext_link(self): original_ref = get_jats_mixed_ref('some text ' + HTTPS_DOI_URL_PREFIX, get_jats_doi_element(DOI_1), 'tail text') fixed_ref = fix_reference(clone_node(original_ref)) ext_link_text = '|'.join( get_text_content_list(fixed_ref.xpath(JatsXpaths.EXT_LINK))) assert ext_link_text == HTTPS_DOI_URL_PREFIX + DOI_1
def test_should_remove_doi_pub_id_element_if_not_containing_valid_doi( self): original_ref = get_jats_mixed_ref('doi: ', get_jats_doi_element('not a doi')) fixed_ref = fix_reference(clone_node(original_ref)) fixed_doi = '|'.join( get_text_content_list(fixed_ref.xpath(JatsXpaths.DOI))) assert fixed_doi == ''
def test_should_remove_duplicate_doi_ignoring_punct_with_tail(self): doi_1_a = DOI_1 + '.ab-123' doi_1_b = DOI_1 + '.ab.123' original_ref = get_jats_mixed_ref( 'doi: ', get_jats_doi_element(doi_1_a + '; ' + doi_1_b), 'tail text') fixed_ref = fix_reference(clone_node(original_ref)) fixed_doi = '|'.join( get_text_content_list(fixed_ref.xpath(JatsXpaths.DOI))) assert fixed_doi == doi_1_a
def test_should_not_include_doi_colon_in_pii(self): original_ref = get_jats_mixed_ref( 'doi:', get_jats_doi_element(PII_1 + ' [pii]; ' + DOI_1 + ' [doi]')) fixed_ref = fix_reference(clone_node(original_ref)) fixed_doi = '|'.join( get_text_content_list(fixed_ref.xpath(JatsXpaths.DOI))) fixed_pii = '|'.join( get_text_content_list(fixed_ref.xpath(JatsXpaths.PII))) assert fixed_doi == DOI_1 assert fixed_pii == PII_1
def test_should_separately_annotate_pii_with_preceding_element(self): original_ref = get_jats_mixed_ref( E.other('other text'), 'doi: ', get_jats_doi_element(PII_1 + ' [pii]; ' + DOI_1 + ' [doi]')) fixed_ref = fix_reference(clone_node(original_ref)) fixed_doi = '|'.join( get_text_content_list(fixed_ref.xpath(JatsXpaths.DOI))) fixed_pii = '|'.join( get_text_content_list(fixed_ref.xpath(JatsXpaths.PII))) assert fixed_doi == DOI_1 assert fixed_pii == PII_1
def test_should_remove_doi_duplicate_pii_suffix_from_doi_with_tail(self): doi_fragment_duplicate = 'doi-duplicate' doi = DOI_1 + '.' + doi_fragment_duplicate original_ref = get_jats_mixed_ref( 'doi: ', get_jats_doi_element(doi + ' ' + doi_fragment_duplicate + ' [pii]'), 'tail text') fixed_ref = fix_reference(clone_node(original_ref)) fixed_doi = '|'.join( get_text_content_list(fixed_ref.xpath(JatsXpaths.DOI))) assert fixed_doi == doi
def test_should_separately_annotate_invalid_pii_as_other_pub_id(self): original_ref = get_jats_mixed_ref( 'doi: ', get_jats_doi_element(INVALID_PII_1 + ' [pii]; ' + DOI_1 + ' [doi]')) fixed_ref = fix_reference(clone_node(original_ref)) fixed_doi = '|'.join( get_text_content_list(fixed_ref.xpath(JatsXpaths.DOI))) other_pub_id = '|'.join( get_text_content_list(fixed_ref.xpath(JatsXpaths.OTHER_PUB_ID))) assert fixed_doi == DOI_1 assert other_pub_id == INVALID_PII_1
def test_should_convert_doi_with_outside_spaced_url_prefix_to_ext_link( self): original_ref = get_jats_mixed_ref( 'some text ' + HTTPS_SPACED_DOI_URL_PREFIX, get_jats_doi_element(DOI_1), 'tail text') fixed_ref = fix_reference(clone_node(original_ref)) ext_links = fixed_ref.xpath(JatsXpaths.EXT_LINK) ext_link_text = '|'.join(get_text_content_list(ext_links)) assert ext_link_text == HTTPS_SPACED_DOI_URL_PREFIX + DOI_1 assert ext_links[0].attrib == { 'ext-link-type': 'uri', XLINK_HREF: HTTPS_DOI_URL_PREFIX + DOI_1 }
def test_should_fix_jats_xml_using_source_path(self, input_dir: Path, output_dir: Path): original_ref = get_jats_mixed_ref('doi: ', get_jats_doi_element('doi:' + DOI_1)) input_file = input_dir / 'file1.xml' input_file.parent.mkdir() input_file.write_bytes( etree.tostring(get_jats(references=[original_ref]))) output_file = output_dir / 'file1.xml' main( ['--source-path=%s' % input_file, '--output-path=%s' % output_dir]) assert output_file.exists() fixed_root = parse_xml(str(output_file)) fixed_doi = '|'.join( get_text_content_list(fixed_root.xpath(JatsXpaths.DOI))) assert fixed_doi == DOI_1
def test_should_fix_jats_xml_using_source_file_list_in_sub_directory( self, input_dir: Path, output_dir: Path): original_ref = get_jats_mixed_ref('doi: ', get_jats_doi_element('doi:' + DOI_1)) input_file = input_dir / 'sub' / 'file1.xml' input_file.parent.mkdir(parents=True) input_file.write_bytes( etree.tostring(get_jats(references=[original_ref]))) output_file = output_dir / 'sub' / 'file1.xml' source_file_list_path = input_dir / 'file-list.tsv' source_file_list_path.write_text('\n'.join( ['xml_url', 'sub/file1.xml'])) main([ '--source-file-list=%s' % source_file_list_path, '--output-path=%s' % output_dir ]) assert output_file.exists() fixed_root = parse_xml(str(output_file)) fixed_doi = '|'.join( get_text_content_list(fixed_root.xpath(JatsXpaths.DOI))) assert fixed_doi == DOI_1
def test_should_not_change_valid_doi(self): original_ref = get_jats_mixed_ref('doi: ', get_jats_doi_element(DOI_1)) fixed_ref = fix_reference(clone_node(original_ref)) fixed_doi = '|'.join( get_text_content_list(fixed_ref.xpath(JatsXpaths.DOI))) assert fixed_doi == DOI_1