Python clone_node Examples, sciencebeam_trainer_grobid_tools.fix_jats_xml.clone_node Python Examples

Example #1

0

Show file

File: fix_jats_xml_test.py Project: elifesciences/sciencebeam-trainer-grobid-tools

 def test_should_not_include_pubmed_prefix_in_doi(self):
     original_ref = get_jats_mixed_ref('doi: ', DOI_1, '. PubMed PMID: ',
                                       PMID_1)
     fixed_ref = fix_reference(clone_node(original_ref))
     fixed_doi = '|'.join(
         get_text_content_list(fixed_ref.xpath(JatsXpaths.DOI)))
     assert fixed_doi == DOI_1

Example #2

0

Show file

File: fix_jats_xml_test.py Project: elifesciences/sciencebeam-trainer-grobid-tools

 def test_should_remove_pmcid_from_article_title(self):
     original_ref = get_jats_mixed_ref(
         'title: ', E('article-title', ARTICLE_TITLE_1 + '; ' + PMCID_1))
     fixed_ref = fix_reference(clone_node(original_ref))
     fixed_article_title = '|'.join(
         get_text_content_list(fixed_ref.xpath(JatsXpaths.ARTICLE_TITLE)))
     assert fixed_article_title == ARTICLE_TITLE_1

Example #3

0

Show file

File: fix_jats_xml_test.py Project: elifesciences/sciencebeam-trainer-grobid-tools

 def test_should_remove_duplicate_doi_with_tail(self):
     original_ref = get_jats_mixed_ref(
         'doi: ', get_jats_doi_element(DOI_1 + '; ' + DOI_1), 'tail text')
     fixed_ref = fix_reference(clone_node(original_ref))
     fixed_doi = '|'.join(
         get_text_content_list(fixed_ref.xpath(JatsXpaths.DOI)))
     assert fixed_doi == DOI_1

Example #4

0

Show file

File: fix_jats_xml_test.py Project: elifesciences/sciencebeam-trainer-grobid-tools

 def test_should_convert_pub_id_type_to_lower_case(self):
     original_ref = get_jats_mixed_ref(
         get_jats_pub_id_element(PMCID_1, 'PMCID'))
     fixed_ref = fix_reference(clone_node(original_ref))
     fixed_pmcid = '|'.join(
         get_text_content_list(fixed_ref.xpath(JatsXpaths.PMCID)))
     assert fixed_pmcid == PMCID_1

Example #5

0

Show file

File: fix_jats_xml_test.py Project: elifesciences/sciencebeam-trainer-grobid-tools

 def test_should_remove_doi_suffix_from_doi_without_tail(self):
     original_ref = get_jats_mixed_ref(
         'doi: ', get_jats_doi_element(DOI_1 + ' [doi]'))
     fixed_ref = fix_reference(clone_node(original_ref))
     fixed_doi = '|'.join(
         get_text_content_list(fixed_ref.xpath(JatsXpaths.DOI)))
     assert fixed_doi == DOI_1

Example #6

0

Show file

File: fix_jats_xml_test.py Project: elifesciences/sciencebeam-trainer-grobid-tools

 def test_should_keep_original_pmid_if_already_present_and_valid(self):
     original_ref = get_jats_mixed_ref(get_jats_pmid_element(PMID_1),
                                       ', alternative PMID: 123')
     fixed_ref = fix_reference(clone_node(original_ref))
     fixed_pmid = '|'.join(
         get_text_content_list(fixed_ref.xpath(JatsXpaths.PMID)))
     assert fixed_pmid == PMID_1

Example #7

0

Show file

File: fix_jats_xml_test.py Project: elifesciences/sciencebeam-trainer-grobid-tools

 def test_should_remove_doi_prefix_from_doi(self):
     original_ref = get_jats_mixed_ref('some text',
                                       get_jats_doi_element('doi:' + DOI_1))
     fixed_ref = fix_reference(clone_node(original_ref))
     fixed_doi = '|'.join(
         get_text_content_list(fixed_ref.xpath(JatsXpaths.DOI)))
     assert fixed_doi == DOI_1

Example #8

0

Show file

File: fix_jats_xml_test.py Project: elifesciences/sciencebeam-trainer-grobid-tools

 def test_should_remove_pmid_non_digit_text(self):
     original_ref = get_jats_mixed_ref(
         get_jats_pmid_element('PMID: ' + PMID_1))
     fixed_ref = fix_reference(clone_node(original_ref))
     fixed_pmid = '|'.join(
         get_text_content_list(fixed_ref.xpath(JatsXpaths.PMID)))
     assert fixed_pmid == PMID_1

Example #9

0

Show file

File: fix_jats_xml_test.py Project: elifesciences/sciencebeam-trainer-grobid-tools

 def test_should_remove_double_pmc_prefix_from_pmcid(self):
     original_ref = get_jats_mixed_ref(
         'PMCID: ', get_jats_pmcid_element('PMC' + PMCID_1))
     fixed_ref = fix_reference(clone_node(original_ref))
     fixed_pmcid = '|'.join(
         get_text_content_list(fixed_ref.xpath(JatsXpaths.PMCID)))
     assert fixed_pmcid == PMCID_1

Example #10

0

Show file

File: fix_jats_xml_test.py Project: elifesciences/sciencebeam-trainer-grobid-tools

 def test_should_separately_annotate_pmid_with_preceding_element(self):
     original_ref = get_jats_mixed_ref(E.other('other text'),
                                       'PMID:' + PMID_1)
     fixed_ref = fix_reference(clone_node(original_ref))
     fixed_pmid = '|'.join(
         get_text_content_list(fixed_ref.xpath(JatsXpaths.PMID)))
     assert fixed_pmid == PMID_1

Example #11

0

Show file

File: fix_jats_xml_test.py Project: elifesciences/sciencebeam-trainer-grobid-tools

 def test_should_remove_doi_prefix_after_preceeding_element_with_tail_text(
         self):
     original_ref = get_jats_mixed_ref(E.other('other text'), 'tail text',
                                       get_jats_doi_element('doi:' + DOI_1))
     fixed_ref = fix_reference(clone_node(original_ref))
     fixed_doi = '|'.join(
         get_text_content_list(fixed_ref.xpath(JatsXpaths.DOI)))
     assert fixed_doi == DOI_1

Example #12

0

Show file

File: fix_jats_xml_test.py Project: elifesciences/sciencebeam-trainer-grobid-tools

 def test_should_not_remove_other_square_brackets_from_ext_link(self):
     url = HTTPS_DOI_URL_PREFIX + DOI_1 + '[other]'
     original_ref = get_jats_mixed_ref(get_jats_ext_link_element(url))
     fixed_ref = fix_reference(clone_node(original_ref))
     fixed_ext_links = fixed_ref.xpath(JatsXpaths.EXT_LINK)
     fixed_ext_link = '|'.join(get_text_content_list(fixed_ext_links))
     assert fixed_ext_link == url
     assert fixed_ext_links[0].attrib[XLINK_HREF] == url

Example #13

0

Show file

File: fix_jats_xml_test.py Project: elifesciences/sciencebeam-trainer-grobid-tools

 def test_should_remove_doi_pub_id_element_if_not_containing_valid_doi(
         self):
     original_ref = get_jats_mixed_ref('doi: ',
                                       get_jats_doi_element('not a doi'))
     fixed_ref = fix_reference(clone_node(original_ref))
     fixed_doi = '|'.join(
         get_text_content_list(fixed_ref.xpath(JatsXpaths.DOI)))
     assert fixed_doi == ''

Example #14

0

Show file

File: fix_jats_xml_test.py Project: elifesciences/sciencebeam-trainer-grobid-tools

 def test_should_convert_doi_with_outside_url_prefix_to_ext_link(self):
     original_ref = get_jats_mixed_ref('some text ' + HTTPS_DOI_URL_PREFIX,
                                       get_jats_doi_element(DOI_1),
                                       'tail text')
     fixed_ref = fix_reference(clone_node(original_ref))
     ext_link_text = '|'.join(
         get_text_content_list(fixed_ref.xpath(JatsXpaths.EXT_LINK)))
     assert ext_link_text == HTTPS_DOI_URL_PREFIX + DOI_1

Example #15

0

Show file

File: fix_jats_xml_test.py Project: elifesciences/sciencebeam-trainer-grobid-tools

 def test_should_remove_quotes_and_trailing_comma_from_article_title(self):
     original_ref = get_jats_mixed_ref(
         'title: ',
         E('article-title', SpecialChars.LDQUO, E.italic(ARTICLE_TITLE_1),
           ', '), SpecialChars.RDQUO)
     fixed_ref = fix_reference(clone_node(original_ref))
     fixed_article_title = '|'.join(
         get_text_content_list(fixed_ref.xpath(JatsXpaths.ARTICLE_TITLE)))
     assert fixed_article_title == ARTICLE_TITLE_1

Example #16

0

Show file

File: fix_jats_xml_test.py Project: elifesciences/sciencebeam-trainer-grobid-tools

 def test_should_remove_left_right_single_quotes_from_article_title(self):
     original_ref = get_jats_mixed_ref(
         'title: ',
         E('article-title',
           SpecialChars.LSQUO + ARTICLE_TITLE_1 + SpecialChars.RSQUO))
     fixed_ref = fix_reference(clone_node(original_ref))
     fixed_article_title = '|'.join(
         get_text_content_list(fixed_ref.xpath(JatsXpaths.ARTICLE_TITLE)))
     assert fixed_article_title == ARTICLE_TITLE_1

Example #17

0

Show file

File: fix_jats_xml_test.py Project: elifesciences/sciencebeam-trainer-grobid-tools

 def test_should_split_ext_link_containing_multiple_http_links(self):
     url_1 = HTTP_DOI_URL_PREFIX + DOI_1
     url_2 = HTTP_DOI_URL_PREFIX + DOI_2
     original_ref = get_jats_mixed_ref(
         get_jats_ext_link_element(url_1 + url_2))
     fixed_ref = fix_reference(clone_node(original_ref))
     fixed_ext_links = fixed_ref.xpath(JatsXpaths.EXT_LINK)
     fixed_ext_link_urls = get_text_content_list(fixed_ext_links)
     assert fixed_ext_link_urls == [url_1, url_2]

Example #18

0

Show file

File: fix_jats_xml_test.py Project: elifesciences/sciencebeam-trainer-grobid-tools

 def test_should_replace_pmid_with_too_many_digits_text(self):
     original_ref = get_jats_mixed_ref(
         'PMID: ', get_jats_pmid_element('WOS: ' + WOS_1))
     fixed_ref = fix_reference(clone_node(original_ref))
     fixed_pmid = '|'.join(
         get_text_content_list(fixed_ref.xpath(JatsXpaths.PMID)))
     assert fixed_pmid == ''
     fixed_other = '|'.join(
         get_text_content_list(fixed_ref.xpath(JatsXpaths.OTHER_PUB_ID)))
     assert fixed_other == WOS_1

Example #19

0

Show file

File: fix_jats_xml_test.py Project: elifesciences/sciencebeam-trainer-grobid-tools

 def test_should_remove_duplicate_doi_ignoring_punct_with_tail(self):
     doi_1_a = DOI_1 + '.ab-123'
     doi_1_b = DOI_1 + '.ab.123'
     original_ref = get_jats_mixed_ref(
         'doi: ', get_jats_doi_element(doi_1_a + '; ' + doi_1_b),
         'tail text')
     fixed_ref = fix_reference(clone_node(original_ref))
     fixed_doi = '|'.join(
         get_text_content_list(fixed_ref.xpath(JatsXpaths.DOI)))
     assert fixed_doi == doi_1_a

Example #20

0

Show file

File: fix_jats_xml_test.py Project: elifesciences/sciencebeam-trainer-grobid-tools

 def test_should_remove_double_doi_in_ext_link_square_brackets(self):
     original_ref = get_jats_mixed_ref(
         get_jats_ext_link_element(HTTPS_DOI_URL_PREFIX + DOI_1 + '[' +
                                   DOI_1 + ']'))
     fixed_ref = fix_reference(clone_node(original_ref))
     fixed_ext_links = fixed_ref.xpath(JatsXpaths.EXT_LINK)
     fixed_ext_link = '|'.join(get_text_content_list(fixed_ext_links))
     assert fixed_ext_link == HTTPS_DOI_URL_PREFIX + DOI_1
     assert fixed_ext_links[0].attrib[
         XLINK_HREF] == HTTPS_DOI_URL_PREFIX + DOI_1

Example #21

0

Show file

File: fix_jats_xml_test.py Project: elifesciences/sciencebeam-trainer-grobid-tools

 def test_should_remove_doi_duplicate_pii_suffix_from_doi_with_tail(self):
     doi_fragment_duplicate = 'doi-duplicate'
     doi = DOI_1 + '.' + doi_fragment_duplicate
     original_ref = get_jats_mixed_ref(
         'doi: ',
         get_jats_doi_element(doi + '  ' + doi_fragment_duplicate +
                              ' [pii]'), 'tail text')
     fixed_ref = fix_reference(clone_node(original_ref))
     fixed_doi = '|'.join(
         get_text_content_list(fixed_ref.xpath(JatsXpaths.DOI)))
     assert fixed_doi == doi

Example #22

0

Show file

File: fix_jats_xml_test.py Project: elifesciences/sciencebeam-trainer-grobid-tools

 def test_should_separately_annotate_pii_with_preceding_element(self):
     original_ref = get_jats_mixed_ref(
         E.other('other text'), 'doi: ',
         get_jats_doi_element(PII_1 + ' [pii]; ' + DOI_1 + ' [doi]'))
     fixed_ref = fix_reference(clone_node(original_ref))
     fixed_doi = '|'.join(
         get_text_content_list(fixed_ref.xpath(JatsXpaths.DOI)))
     fixed_pii = '|'.join(
         get_text_content_list(fixed_ref.xpath(JatsXpaths.PII)))
     assert fixed_doi == DOI_1
     assert fixed_pii == PII_1

Example #23

0

Show file

File: fix_jats_xml_test.py Project: elifesciences/sciencebeam-trainer-grobid-tools

 def test_should_not_include_doi_colon_in_pii(self):
     original_ref = get_jats_mixed_ref(
         'doi:',
         get_jats_doi_element(PII_1 + ' [pii]; ' + DOI_1 + ' [doi]'))
     fixed_ref = fix_reference(clone_node(original_ref))
     fixed_doi = '|'.join(
         get_text_content_list(fixed_ref.xpath(JatsXpaths.DOI)))
     fixed_pii = '|'.join(
         get_text_content_list(fixed_ref.xpath(JatsXpaths.PII)))
     assert fixed_doi == DOI_1
     assert fixed_pii == PII_1

Example #24

0

Show file

File: fix_jats_xml_test.py Project: elifesciences/sciencebeam-trainer-grobid-tools

 def test_should_separately_annotate_invalid_pii_as_other_pub_id(self):
     original_ref = get_jats_mixed_ref(
         'doi: ',
         get_jats_doi_element(INVALID_PII_1 + ' [pii]; ' + DOI_1 +
                              ' [doi]'))
     fixed_ref = fix_reference(clone_node(original_ref))
     fixed_doi = '|'.join(
         get_text_content_list(fixed_ref.xpath(JatsXpaths.DOI)))
     other_pub_id = '|'.join(
         get_text_content_list(fixed_ref.xpath(JatsXpaths.OTHER_PUB_ID)))
     assert fixed_doi == DOI_1
     assert other_pub_id == INVALID_PII_1

Example #25

0

Show file

File: fix_jats_xml_test.py Project: elifesciences/sciencebeam-trainer-grobid-tools

 def test_should_convert_doi_with_outside_spaced_url_prefix_to_ext_link(
         self):
     original_ref = get_jats_mixed_ref(
         'some text ' + HTTPS_SPACED_DOI_URL_PREFIX,
         get_jats_doi_element(DOI_1), 'tail text')
     fixed_ref = fix_reference(clone_node(original_ref))
     ext_links = fixed_ref.xpath(JatsXpaths.EXT_LINK)
     ext_link_text = '|'.join(get_text_content_list(ext_links))
     assert ext_link_text == HTTPS_SPACED_DOI_URL_PREFIX + DOI_1
     assert ext_links[0].attrib == {
         'ext-link-type': 'uri',
         XLINK_HREF: HTTPS_DOI_URL_PREFIX + DOI_1
     }

Example #26

0

Show file

File: fix_jats_xml_test.py Project: elifesciences/sciencebeam-trainer-grobid-tools

 def test_should_be_able_to_clone_with_unicode(self):
     text = '\u002A\u002B\u0026\u00E9\u2122'
     root = E.root(text)
     cloned_root = clone_node(root)
     assert cloned_root.text == text

Example #27

0

Show file

File: fix_jats_xml_test.py Project: elifesciences/sciencebeam-trainer-grobid-tools

 def test_should_annotate_missing_doi_excluding_dot(self):
     original_ref = get_jats_mixed_ref(DOI_1 + '.')
     fixed_ref = fix_reference(clone_node(original_ref))
     fixed_doi = '|'.join(
         get_text_content_list(fixed_ref.xpath(JatsXpaths.DOI)))
     assert fixed_doi == DOI_1

Example #28

0

Show file

File: fix_jats_xml_test.py Project: elifesciences/sciencebeam-trainer-grobid-tools

 def test_should_annotate_missing_doi_in_square_brackets(self):
     original_ref = get_jats_mixed_ref('[' + DOI_1 + ']')
     fixed_ref = fix_reference(clone_node(original_ref))
     fixed_doi = '|'.join(
         get_text_content_list(fixed_ref.xpath(JatsXpaths.DOI)))
     assert fixed_doi == DOI_1

Example #29

0

Show file

File: fix_jats_xml_test.py Project: elifesciences/sciencebeam-trainer-grobid-tools

 def test_should_annotate_missing_pmcid_in_comment(self):
     original_ref = get_jats_mixed_ref(E.comment(PMCID_1))
     fixed_ref = fix_reference(clone_node(original_ref))
     fixed_pmcid = '|'.join(
         get_text_content_list(fixed_ref.xpath(JatsXpaths.PMCID)))
     assert fixed_pmcid == PMCID_1

Example #30

0

Show file

File: fix_jats_xml_test.py Project: elifesciences/sciencebeam-trainer-grobid-tools

 def test_should_separately_annotate_pmid_with_spaces(self):
     original_ref = get_jats_mixed_ref(' PMID : ' + PMID_1 + ' ')
     fixed_ref = fix_reference(clone_node(original_ref))
     fixed_pmid = '|'.join(
         get_text_content_list(fixed_ref.xpath(JatsXpaths.PMID)))
     assert fixed_pmid == PMID_1