def test_should_not_include_pubmed_prefix_in_doi(self): original_ref = get_jats_mixed_ref('doi: ', DOI_1, '. PubMed PMID: ', PMID_1) fixed_ref = fix_reference(clone_node(original_ref)) fixed_doi = '|'.join( get_text_content_list(fixed_ref.xpath(JatsXpaths.DOI))) assert fixed_doi == DOI_1
def test_should_remove_pmcid_from_article_title(self): original_ref = get_jats_mixed_ref( 'title: ', E('article-title', ARTICLE_TITLE_1 + '; ' + PMCID_1)) fixed_ref = fix_reference(clone_node(original_ref)) fixed_article_title = '|'.join( get_text_content_list(fixed_ref.xpath(JatsXpaths.ARTICLE_TITLE))) assert fixed_article_title == ARTICLE_TITLE_1
def test_should_remove_duplicate_doi_with_tail(self): original_ref = get_jats_mixed_ref( 'doi: ', get_jats_doi_element(DOI_1 + '; ' + DOI_1), 'tail text') fixed_ref = fix_reference(clone_node(original_ref)) fixed_doi = '|'.join( get_text_content_list(fixed_ref.xpath(JatsXpaths.DOI))) assert fixed_doi == DOI_1
def test_should_convert_pub_id_type_to_lower_case(self): original_ref = get_jats_mixed_ref( get_jats_pub_id_element(PMCID_1, 'PMCID')) fixed_ref = fix_reference(clone_node(original_ref)) fixed_pmcid = '|'.join( get_text_content_list(fixed_ref.xpath(JatsXpaths.PMCID))) assert fixed_pmcid == PMCID_1
def test_should_remove_doi_suffix_from_doi_without_tail(self): original_ref = get_jats_mixed_ref( 'doi: ', get_jats_doi_element(DOI_1 + ' [doi]')) fixed_ref = fix_reference(clone_node(original_ref)) fixed_doi = '|'.join( get_text_content_list(fixed_ref.xpath(JatsXpaths.DOI))) assert fixed_doi == DOI_1
def test_should_keep_original_pmid_if_already_present_and_valid(self): original_ref = get_jats_mixed_ref(get_jats_pmid_element(PMID_1), ', alternative PMID: 123') fixed_ref = fix_reference(clone_node(original_ref)) fixed_pmid = '|'.join( get_text_content_list(fixed_ref.xpath(JatsXpaths.PMID))) assert fixed_pmid == PMID_1
def test_should_remove_doi_prefix_from_doi(self): original_ref = get_jats_mixed_ref('some text', get_jats_doi_element('doi:' + DOI_1)) fixed_ref = fix_reference(clone_node(original_ref)) fixed_doi = '|'.join( get_text_content_list(fixed_ref.xpath(JatsXpaths.DOI))) assert fixed_doi == DOI_1
def test_should_remove_pmid_non_digit_text(self): original_ref = get_jats_mixed_ref( get_jats_pmid_element('PMID: ' + PMID_1)) fixed_ref = fix_reference(clone_node(original_ref)) fixed_pmid = '|'.join( get_text_content_list(fixed_ref.xpath(JatsXpaths.PMID))) assert fixed_pmid == PMID_1
def test_should_remove_double_pmc_prefix_from_pmcid(self): original_ref = get_jats_mixed_ref( 'PMCID: ', get_jats_pmcid_element('PMC' + PMCID_1)) fixed_ref = fix_reference(clone_node(original_ref)) fixed_pmcid = '|'.join( get_text_content_list(fixed_ref.xpath(JatsXpaths.PMCID))) assert fixed_pmcid == PMCID_1
def test_should_separately_annotate_pmid_with_preceding_element(self): original_ref = get_jats_mixed_ref(E.other('other text'), 'PMID:' + PMID_1) fixed_ref = fix_reference(clone_node(original_ref)) fixed_pmid = '|'.join( get_text_content_list(fixed_ref.xpath(JatsXpaths.PMID))) assert fixed_pmid == PMID_1
def test_should_remove_doi_prefix_after_preceeding_element_with_tail_text( self): original_ref = get_jats_mixed_ref(E.other('other text'), 'tail text', get_jats_doi_element('doi:' + DOI_1)) fixed_ref = fix_reference(clone_node(original_ref)) fixed_doi = '|'.join( get_text_content_list(fixed_ref.xpath(JatsXpaths.DOI))) assert fixed_doi == DOI_1
def test_should_not_remove_other_square_brackets_from_ext_link(self): url = HTTPS_DOI_URL_PREFIX + DOI_1 + '[other]' original_ref = get_jats_mixed_ref(get_jats_ext_link_element(url)) fixed_ref = fix_reference(clone_node(original_ref)) fixed_ext_links = fixed_ref.xpath(JatsXpaths.EXT_LINK) fixed_ext_link = '|'.join(get_text_content_list(fixed_ext_links)) assert fixed_ext_link == url assert fixed_ext_links[0].attrib[XLINK_HREF] == url
def test_should_remove_doi_pub_id_element_if_not_containing_valid_doi( self): original_ref = get_jats_mixed_ref('doi: ', get_jats_doi_element('not a doi')) fixed_ref = fix_reference(clone_node(original_ref)) fixed_doi = '|'.join( get_text_content_list(fixed_ref.xpath(JatsXpaths.DOI))) assert fixed_doi == ''
def test_should_convert_doi_with_outside_url_prefix_to_ext_link(self): original_ref = get_jats_mixed_ref('some text ' + HTTPS_DOI_URL_PREFIX, get_jats_doi_element(DOI_1), 'tail text') fixed_ref = fix_reference(clone_node(original_ref)) ext_link_text = '|'.join( get_text_content_list(fixed_ref.xpath(JatsXpaths.EXT_LINK))) assert ext_link_text == HTTPS_DOI_URL_PREFIX + DOI_1
def test_should_remove_quotes_and_trailing_comma_from_article_title(self): original_ref = get_jats_mixed_ref( 'title: ', E('article-title', SpecialChars.LDQUO, E.italic(ARTICLE_TITLE_1), ', '), SpecialChars.RDQUO) fixed_ref = fix_reference(clone_node(original_ref)) fixed_article_title = '|'.join( get_text_content_list(fixed_ref.xpath(JatsXpaths.ARTICLE_TITLE))) assert fixed_article_title == ARTICLE_TITLE_1
def test_should_remove_left_right_single_quotes_from_article_title(self): original_ref = get_jats_mixed_ref( 'title: ', E('article-title', SpecialChars.LSQUO + ARTICLE_TITLE_1 + SpecialChars.RSQUO)) fixed_ref = fix_reference(clone_node(original_ref)) fixed_article_title = '|'.join( get_text_content_list(fixed_ref.xpath(JatsXpaths.ARTICLE_TITLE))) assert fixed_article_title == ARTICLE_TITLE_1
def test_should_split_ext_link_containing_multiple_http_links(self): url_1 = HTTP_DOI_URL_PREFIX + DOI_1 url_2 = HTTP_DOI_URL_PREFIX + DOI_2 original_ref = get_jats_mixed_ref( get_jats_ext_link_element(url_1 + url_2)) fixed_ref = fix_reference(clone_node(original_ref)) fixed_ext_links = fixed_ref.xpath(JatsXpaths.EXT_LINK) fixed_ext_link_urls = get_text_content_list(fixed_ext_links) assert fixed_ext_link_urls == [url_1, url_2]
def test_should_replace_pmid_with_too_many_digits_text(self): original_ref = get_jats_mixed_ref( 'PMID: ', get_jats_pmid_element('WOS: ' + WOS_1)) fixed_ref = fix_reference(clone_node(original_ref)) fixed_pmid = '|'.join( get_text_content_list(fixed_ref.xpath(JatsXpaths.PMID))) assert fixed_pmid == '' fixed_other = '|'.join( get_text_content_list(fixed_ref.xpath(JatsXpaths.OTHER_PUB_ID))) assert fixed_other == WOS_1
def test_should_remove_duplicate_doi_ignoring_punct_with_tail(self): doi_1_a = DOI_1 + '.ab-123' doi_1_b = DOI_1 + '.ab.123' original_ref = get_jats_mixed_ref( 'doi: ', get_jats_doi_element(doi_1_a + '; ' + doi_1_b), 'tail text') fixed_ref = fix_reference(clone_node(original_ref)) fixed_doi = '|'.join( get_text_content_list(fixed_ref.xpath(JatsXpaths.DOI))) assert fixed_doi == doi_1_a
def test_should_remove_double_doi_in_ext_link_square_brackets(self): original_ref = get_jats_mixed_ref( get_jats_ext_link_element(HTTPS_DOI_URL_PREFIX + DOI_1 + '[' + DOI_1 + ']')) fixed_ref = fix_reference(clone_node(original_ref)) fixed_ext_links = fixed_ref.xpath(JatsXpaths.EXT_LINK) fixed_ext_link = '|'.join(get_text_content_list(fixed_ext_links)) assert fixed_ext_link == HTTPS_DOI_URL_PREFIX + DOI_1 assert fixed_ext_links[0].attrib[ XLINK_HREF] == HTTPS_DOI_URL_PREFIX + DOI_1
def test_should_remove_doi_duplicate_pii_suffix_from_doi_with_tail(self): doi_fragment_duplicate = 'doi-duplicate' doi = DOI_1 + '.' + doi_fragment_duplicate original_ref = get_jats_mixed_ref( 'doi: ', get_jats_doi_element(doi + ' ' + doi_fragment_duplicate + ' [pii]'), 'tail text') fixed_ref = fix_reference(clone_node(original_ref)) fixed_doi = '|'.join( get_text_content_list(fixed_ref.xpath(JatsXpaths.DOI))) assert fixed_doi == doi
def test_should_separately_annotate_pii_with_preceding_element(self): original_ref = get_jats_mixed_ref( E.other('other text'), 'doi: ', get_jats_doi_element(PII_1 + ' [pii]; ' + DOI_1 + ' [doi]')) fixed_ref = fix_reference(clone_node(original_ref)) fixed_doi = '|'.join( get_text_content_list(fixed_ref.xpath(JatsXpaths.DOI))) fixed_pii = '|'.join( get_text_content_list(fixed_ref.xpath(JatsXpaths.PII))) assert fixed_doi == DOI_1 assert fixed_pii == PII_1
def test_should_not_include_doi_colon_in_pii(self): original_ref = get_jats_mixed_ref( 'doi:', get_jats_doi_element(PII_1 + ' [pii]; ' + DOI_1 + ' [doi]')) fixed_ref = fix_reference(clone_node(original_ref)) fixed_doi = '|'.join( get_text_content_list(fixed_ref.xpath(JatsXpaths.DOI))) fixed_pii = '|'.join( get_text_content_list(fixed_ref.xpath(JatsXpaths.PII))) assert fixed_doi == DOI_1 assert fixed_pii == PII_1
def test_should_separately_annotate_invalid_pii_as_other_pub_id(self): original_ref = get_jats_mixed_ref( 'doi: ', get_jats_doi_element(INVALID_PII_1 + ' [pii]; ' + DOI_1 + ' [doi]')) fixed_ref = fix_reference(clone_node(original_ref)) fixed_doi = '|'.join( get_text_content_list(fixed_ref.xpath(JatsXpaths.DOI))) other_pub_id = '|'.join( get_text_content_list(fixed_ref.xpath(JatsXpaths.OTHER_PUB_ID))) assert fixed_doi == DOI_1 assert other_pub_id == INVALID_PII_1
def test_should_convert_doi_with_outside_spaced_url_prefix_to_ext_link( self): original_ref = get_jats_mixed_ref( 'some text ' + HTTPS_SPACED_DOI_URL_PREFIX, get_jats_doi_element(DOI_1), 'tail text') fixed_ref = fix_reference(clone_node(original_ref)) ext_links = fixed_ref.xpath(JatsXpaths.EXT_LINK) ext_link_text = '|'.join(get_text_content_list(ext_links)) assert ext_link_text == HTTPS_SPACED_DOI_URL_PREFIX + DOI_1 assert ext_links[0].attrib == { 'ext-link-type': 'uri', XLINK_HREF: HTTPS_DOI_URL_PREFIX + DOI_1 }
def test_should_be_able_to_clone_with_unicode(self): text = '\u002A\u002B\u0026\u00E9\u2122' root = E.root(text) cloned_root = clone_node(root) assert cloned_root.text == text
def test_should_annotate_missing_doi_excluding_dot(self): original_ref = get_jats_mixed_ref(DOI_1 + '.') fixed_ref = fix_reference(clone_node(original_ref)) fixed_doi = '|'.join( get_text_content_list(fixed_ref.xpath(JatsXpaths.DOI))) assert fixed_doi == DOI_1
def test_should_annotate_missing_doi_in_square_brackets(self): original_ref = get_jats_mixed_ref('[' + DOI_1 + ']') fixed_ref = fix_reference(clone_node(original_ref)) fixed_doi = '|'.join( get_text_content_list(fixed_ref.xpath(JatsXpaths.DOI))) assert fixed_doi == DOI_1
def test_should_annotate_missing_pmcid_in_comment(self): original_ref = get_jats_mixed_ref(E.comment(PMCID_1)) fixed_ref = fix_reference(clone_node(original_ref)) fixed_pmcid = '|'.join( get_text_content_list(fixed_ref.xpath(JatsXpaths.PMCID))) assert fixed_pmcid == PMCID_1
def test_should_separately_annotate_pmid_with_spaces(self): original_ref = get_jats_mixed_ref(' PMID : ' + PMID_1 + ' ') fixed_ref = fix_reference(clone_node(original_ref)) fixed_pmid = '|'.join( get_text_content_list(fixed_ref.xpath(JatsXpaths.PMID))) assert fixed_pmid == PMID_1