def mock_get_ft(*args, **kwargs): data = open(EPMC_FT, "r").read() xml = etree.fromstring(data) l = xml.xpath("//license") l[0].getparent().remove(l[0]) s = etree.tostring(xml) return epmc.EPMCFullText(s)
def mock_get_ft(*args, **kwargs): data = open(EPMC_FT, "r").read() xml = etree.fromstring(data) aids = xml.xpath("//article-id[@pub-id-type='manuscript']") aids[0].getparent().remove(aids[0]) s = etree.tostring(xml) return epmc.EPMCFullText(s)
def test_07_ft_info(self): record = models.Record() msg = workflow.WorkflowMessage(record=record) data = open(EPMC_FT, "r").read() ft = epmc.EPMCFullText(data) workflow.extract_fulltext_info(msg, ft) assert record.has_ft_xml is True assert len(record.provenance) == 2 assert record.aam is True assert record.aam_from_xml is True
def mock_get_ft(*args, **kwargs): data = open(EPMC_FT, "r").read() return epmc.EPMCFullText(data)
def test_08_ft_licence(self): data = open(EPMC_FT, "r").read() xml = etree.fromstring(data) l = xml.xpath("//license") lp = l[0].find("license-p") # licence in type attribute l[0].set( "license-type", "cc by" ) # note the missing "-"; to test the licence representation variations at the same time l[0].set("{http://www.w3.org/1999/xlink}href", "http://random.url") lp.clear() s = etree.tostring(xml) ft = epmc.EPMCFullText(s) record = models.Record() msg = workflow.WorkflowMessage(record=record) workflow.extract_fulltext_licence(msg, ft) assert record.licence_type == "cc-by" assert record.licence_source == "epmc_xml" assert len(record.provenance) == 1 # licence in href attribute l[0].set("license-type", "open access") l[0].set("{http://www.w3.org/1999/xlink}href", "http://creativecommons.org/licenses/by-nd/3.0") s = etree.tostring(xml) ft = epmc.EPMCFullText(s) record = models.Record() msg = workflow.WorkflowMessage(record=record) workflow.extract_fulltext_licence(msg, ft) assert record.licence_type == "cc-by-nd" assert record.licence_source == "epmc_xml" assert len(record.provenance) == 1 # licence in text l[0].set("license-type", "open access") l[0].set("{http://www.w3.org/1999/xlink}href", "http://random.url") lp.text = "licence is <a href='http://creativecommons.org/licenses/by-nc-nd/3.0'>http://creativecommons.org/licenses/by-nc-nd/3.0</a>" s = etree.tostring(xml) ft = epmc.EPMCFullText(s) record = models.Record() msg = workflow.WorkflowMessage(record=record) workflow.extract_fulltext_licence(msg, ft) assert record.licence_type == "cc-by-nc-nd" assert record.licence_source == "epmc_xml" assert len(record.provenance) == 1 # licence in /second/ licence paragraph lp.text = "some waffle" lp2 = etree.SubElement(l[0], "license-p") lp2.text = "licence is <a href='http://creativecommons.org/licenses/by/3.0'>http://creativecommons.org/licenses/by/3.0</a>" s = etree.tostring(xml) ft = epmc.EPMCFullText(s) record = models.Record() msg = workflow.WorkflowMessage(record=record) workflow.extract_fulltext_licence(msg, ft) assert record.licence_type == "cc-by" assert record.licence_source == "epmc_xml" assert len(record.provenance) == 1 # licence in words in text l[0].set("license-type", "open access") l[0].set("{http://www.w3.org/1999/xlink}href", "http://random.url") lp.text = "This is a Creative Commons Attribution-NonCommercial licenced article" l[0].remove(lp2) s = etree.tostring(xml) ft = epmc.EPMCFullText(s) record = models.Record() msg = workflow.WorkflowMessage(record=record) workflow.extract_fulltext_licence(msg, ft) assert record.licence_type == "cc-by-nc" assert record.licence_source == "epmc_xml" assert len(record.provenance) == 1 # licence present but unrecognised lp.text = "wibble wibble wobble" s = etree.tostring(xml) ft = epmc.EPMCFullText(s) record = models.Record() msg = workflow.WorkflowMessage(record=record) workflow.extract_fulltext_licence(msg, ft) assert record.licence_type == "non-standard-licence" assert record.licence_source == "epmc_xml" assert len(record.provenance) == 1 # no licence element present p = l[0].getparent() p.remove(l[0]) s = etree.tostring(xml) ft = epmc.EPMCFullText(s) record = models.Record() msg = workflow.WorkflowMessage(record=record) workflow.extract_fulltext_licence(msg, ft) assert record.licence_type is None assert record.licence_source is None assert len(record.provenance) == 0