Exemple #1
0
 def mock_get_ft(*args, **kwargs):
     data = open(EPMC_FT, "r").read()
     xml = etree.fromstring(data)
     l = xml.xpath("//license")
     l[0].getparent().remove(l[0])
     s = etree.tostring(xml)
     return epmc.EPMCFullText(s)
Exemple #2
0
 def mock_get_ft(*args, **kwargs):
     data = open(EPMC_FT, "r").read()
     xml = etree.fromstring(data)
     aids = xml.xpath("//article-id[@pub-id-type='manuscript']")
     aids[0].getparent().remove(aids[0])
     s = etree.tostring(xml)
     return epmc.EPMCFullText(s)
Exemple #3
0
    def test_07_ft_info(self):
        record = models.Record()
        msg = workflow.WorkflowMessage(record=record)

        data = open(EPMC_FT, "r").read()
        ft = epmc.EPMCFullText(data)

        workflow.extract_fulltext_info(msg, ft)

        assert record.has_ft_xml is True
        assert len(record.provenance) == 2
        assert record.aam is True
        assert record.aam_from_xml is True
Exemple #4
0
 def mock_get_ft(*args, **kwargs):
     data = open(EPMC_FT, "r").read()
     return epmc.EPMCFullText(data)
Exemple #5
0
    def test_08_ft_licence(self):
        data = open(EPMC_FT, "r").read()
        xml = etree.fromstring(data)

        l = xml.xpath("//license")
        lp = l[0].find("license-p")

        # licence in type attribute
        l[0].set(
            "license-type", "cc by"
        )  # note the missing "-"; to test the licence representation variations at the same time
        l[0].set("{http://www.w3.org/1999/xlink}href", "http://random.url")
        lp.clear()
        s = etree.tostring(xml)
        ft = epmc.EPMCFullText(s)
        record = models.Record()
        msg = workflow.WorkflowMessage(record=record)
        workflow.extract_fulltext_licence(msg, ft)
        assert record.licence_type == "cc-by"
        assert record.licence_source == "epmc_xml"
        assert len(record.provenance) == 1

        # licence in href attribute
        l[0].set("license-type", "open access")
        l[0].set("{http://www.w3.org/1999/xlink}href",
                 "http://creativecommons.org/licenses/by-nd/3.0")
        s = etree.tostring(xml)
        ft = epmc.EPMCFullText(s)
        record = models.Record()
        msg = workflow.WorkflowMessage(record=record)
        workflow.extract_fulltext_licence(msg, ft)
        assert record.licence_type == "cc-by-nd"
        assert record.licence_source == "epmc_xml"
        assert len(record.provenance) == 1

        # licence in text
        l[0].set("license-type", "open access")
        l[0].set("{http://www.w3.org/1999/xlink}href", "http://random.url")
        lp.text = "licence is <a href='http://creativecommons.org/licenses/by-nc-nd/3.0'>http://creativecommons.org/licenses/by-nc-nd/3.0</a>"
        s = etree.tostring(xml)
        ft = epmc.EPMCFullText(s)
        record = models.Record()
        msg = workflow.WorkflowMessage(record=record)
        workflow.extract_fulltext_licence(msg, ft)
        assert record.licence_type == "cc-by-nc-nd"
        assert record.licence_source == "epmc_xml"
        assert len(record.provenance) == 1

        # licence in /second/ licence paragraph
        lp.text = "some waffle"
        lp2 = etree.SubElement(l[0], "license-p")
        lp2.text = "licence is <a href='http://creativecommons.org/licenses/by/3.0'>http://creativecommons.org/licenses/by/3.0</a>"
        s = etree.tostring(xml)
        ft = epmc.EPMCFullText(s)
        record = models.Record()
        msg = workflow.WorkflowMessage(record=record)
        workflow.extract_fulltext_licence(msg, ft)
        assert record.licence_type == "cc-by"
        assert record.licence_source == "epmc_xml"
        assert len(record.provenance) == 1

        # licence in words in text
        l[0].set("license-type", "open access")
        l[0].set("{http://www.w3.org/1999/xlink}href", "http://random.url")
        lp.text = "This is a Creative Commons Attribution-NonCommercial licenced article"
        l[0].remove(lp2)
        s = etree.tostring(xml)
        ft = epmc.EPMCFullText(s)
        record = models.Record()
        msg = workflow.WorkflowMessage(record=record)
        workflow.extract_fulltext_licence(msg, ft)
        assert record.licence_type == "cc-by-nc"
        assert record.licence_source == "epmc_xml"
        assert len(record.provenance) == 1

        # licence present but unrecognised
        lp.text = "wibble wibble wobble"
        s = etree.tostring(xml)
        ft = epmc.EPMCFullText(s)
        record = models.Record()
        msg = workflow.WorkflowMessage(record=record)
        workflow.extract_fulltext_licence(msg, ft)
        assert record.licence_type == "non-standard-licence"
        assert record.licence_source == "epmc_xml"
        assert len(record.provenance) == 1

        # no licence element present
        p = l[0].getparent()
        p.remove(l[0])
        s = etree.tostring(xml)
        ft = epmc.EPMCFullText(s)
        record = models.Record()
        msg = workflow.WorkflowMessage(record=record)
        workflow.extract_fulltext_licence(msg, ft)
        assert record.licence_type is None
        assert record.licence_source is None
        assert len(record.provenance) == 0