Esempio n. 1
0
    def _normalize_article_dir_with_dtd(self, path):
        """
        main.xml from Elsevier assume the existence of a local DTD.
        This procedure install the DTDs next to the main.xml file
        and normalize it using xmllint in order to resolve all namespaces
        and references.
        """
        if exists(join(path, 'resolved_main.xml')):
            return
        main_xml_content = open(join(path, 'main.xml')).read()
        arts = ['art501.dtd', 'art510.dtd', 'art520.dtd', 'art540.dtd']
        tmp_extracted = 0
        for art in arts:
            if art in main_xml_content:
                self._extract_correct_dtd_package(art.split('.')[0], path)
                tmp_extracted = 1

        if not tmp_extracted:
            message = "It looks like the path " + path
            message += "does not contain an art501, art510, art520 or art540 in main.xml file"
            self.logger.error(message)
            raise ValueError(message)
        command = [
            "xmllint", "--format", "--loaddtd",
            join(path, 'main.xml'), "--output",
            join(path, 'resolved_main.xml')
        ]
        dummy, dummy, cmd_err = run_shell_command(command)
        if cmd_err:
            message = "Error in cleaning %s: %s" % (join(path,
                                                         'main.xml'), cmd_err)
            self.logger.error(message)
            raise ValueError(message)
Esempio n. 2
0
    def test_run_shell_for_xmllint_with_dtd540(self):
        """Test if run_shell_command works for xmllint."""
        xmllint_resources = pkg_resources.resource_filename(
            'harvestingkit.tests', os.path.join('data', 'si540'))
        xmllint = pkg_resources.resource_filename(
            'harvestingkit.tests',
            os.path.join('data', 'sample_elsevier_540_issue', 'issue.xml'))
        xmllint_output = pkg_resources.resource_string(
            'harvestingkit.tests',
            os.path.join('data', 'sample_elsevier_540_issue',
                         'resolved_issue.xml'))

        command = [
            'xmllint', '--format', '--path', xmllint_resources, '--loaddtd',
            xmllint
        ]
        code, out, err = run_shell_command(command)
        self.assertEqual(out, xmllint_output)
Esempio n. 3
0
 def test_run_shell(self):
     """Test if run_shell_command works."""
     code, out, err = run_shell_command(['echo', 'hello world'])
     self.assertEqual(out, "hello world\n")