예제 #1
0
    def __init__(self, schema_file, xml_path):
        # locate the schema file
        # try git path
        dirname = os.path.dirname(sys.argv[0])
        schema_path = os.path.join(dirname, schema_file)
        if not os.path.exists(schema_path):
            # try configuration
            res_dir = lnst_config.get_option("environment", "resource_dir")
            schema_path = os.path.join(res_dir, schema_file)

        if not os.path.exists(schema_path):
            raise Exception("The recipe schema file was not found. " + \
                            "Your LNST installation is corrupt!")

        self._template_proc = XmlTemplates()

        self._path = xml_path
        relaxng_doc = etree.parse(schema_path)
        self._schema = etree.RelaxNG(relaxng_doc)
예제 #2
0
파일: XmlParser.py 프로젝트: vicgc/lnst
    def __init__(self, schema_file, xml_path):
        # locate the schema file
        # try git path
        dirname = os.path.dirname(sys.argv[0])
        schema_path = os.path.join(dirname, schema_file)
        if not os.path.exists(schema_path):
            # try configuration
            res_dir = lnst_config.get_option("environment", "resource_dir")
            schema_path = os.path.join(res_dir, schema_file)

        if not os.path.exists(schema_path):
            raise Exception("The recipe schema file was not found. " + "Your LNST installation is corrupt!")

        self._template_proc = XmlTemplates()

        self._path = xml_path
        relaxng_doc = etree.parse(schema_path)
        self._schema = etree.RelaxNG(relaxng_doc)
예제 #3
0
class XmlParser(object):
    XINCLUDE_RE = r"\{http\:\/\/www\.w3\.org\/[0-9]{4}\/XInclude\}include"

    def __init__(self, schema_file, xml_path):
        # locate the schema file
        # try git path
        dirname = os.path.dirname(sys.argv[0])
        schema_path = os.path.join(dirname, schema_file)
        if not os.path.exists(schema_path):
            # try configuration
            res_dir = lnst_config.get_option("environment", "resource_dir")
            schema_path = os.path.join(res_dir, schema_file)

        if not os.path.exists(schema_path):
            raise Exception("The recipe schema file was not found. " + \
                            "Your LNST installation is corrupt!")

        self._template_proc = XmlTemplates()

        self._path = xml_path
        relaxng_doc = etree.parse(schema_path)
        self._schema = etree.RelaxNG(relaxng_doc)

    def parse(self):
        doc = self._parse(self._path)
        self._remove_comments(doc)

        # Due to a weird implementation of XInclude in lxml, the
        # XmlParser resolves included documents on it's own.
        #
        # To be able to tell later on where each tag was located
        # in the XML document, we add a '__file' attribute to
        # each element of the tree during the parsing.
        #
        # However, these special attributes are of course not
        # valid according to our schemas. To solve this, a copy of
        # the tree is made and the '__file' attributes are removed
        # before validation.
        #
        # XXX This is a *EXTREMELY* dirty hack. Ideas/proposals
        # for cleaner solutions are more than welcome!
        root_tag = self._init_loc(doc.getroot(), self._path)
        self._expand_xinclude(root_tag, os.path.dirname(self._path))

        self._template_proc.process_aliases(root_tag)

        try:
            self._validate(doc)
        except:
            err = self._schema.error_log[0]
            loc = {
                "file": os.path.basename(err.filename),
                "line": err.line,
                "col": err.column
            }
            exc = XmlProcessingError(err.message)
            exc.set_loc(loc)
            raise exc

        return self._process(root_tag)

    def _parse(self, path):
        try:
            doc = etree.parse(path)
        except etree.LxmlError as err:
            # A workaround for cases when lxml (quite strangely)
            # sets the filename to <string>.
            if err.error_log[0].filename == "<string>":
                filename = self._path
            else:
                filename = err.error_log[0].filename
            loc = {
                "file": os.path.basename(filename),
                "line": err.error_log[0].line,
                "col": err.error_log[0].column
            }
            exc = XmlProcessingError(err.error_log[0].message)
            exc.set_loc(loc)
            raise exc
        except Exception as err:
            loc = {
                "file": os.path.basename(self._path),
                "line": None,
                "col": None
            }
            exc = XmlProcessingError(str(err))
            exc.set_loc(loc)
            raise exc

        return doc

    def _process(self, root_tag):
        pass

    def set_machines(self, machines):
        self._template_proc.set_machines(machines)

    def set_aliases(self, defined, overriden):
        self._template_proc.set_aliases(defined, overriden)

    def _has_attribute(self, element, attr):
        return attr in element.attrib

    def _get_attribute(self, element, attr):
        text = element.attrib[attr].strip()
        return self._template_proc.expand_functions(text)

    def _get_content(self, element):
        text = etree.tostring(element, method="text").strip()
        return self._template_proc.expand_functions(text)

    def _expand_xinclude(self, elem, base_url=""):
        for e in elem:
            if re.match(self.XINCLUDE_RE, str(e.tag)):
                href = os.path.join(base_url, e.get("href"))
                filename = os.path.basename(href)

                doc = self._parse(href)
                self._remove_comments(doc)
                node = doc.getroot()

                node = self._init_loc(node, href)

                if e.tail:
                    node.tail = (node.tail or "") + e.tail
                self._expand_xinclude(node, os.path.dirname(href))

                parent = e.getparent()
                if parent is None:
                    return node

                parent.replace(e, node)
            else:
                self._expand_xinclude(e, base_url)
        return elem

    def _remove_comments(self, doc):
        comments = doc.xpath('//comment()')
        for c in comments:
            p = c.getparent()
            if p is not None:
                p.remove(c)

    def _init_loc(self, elem, filename):
        """ Remove all coment tags from the tree """

        elem.attrib["__file"] = filename
        for e in elem:
            self._init_loc(e, filename)

        return elem

    def _validate(self, original):
        """
            Make a copy of the tree, remove the '__file' attributes
            and validate against the appropriate schema.

            Very unfortunate solution.
        """
        doc = copy.deepcopy(original)
        root = doc.getroot()

        self._prepare_tree_for_validation(root)
        self._schema.assertValid(doc)

    def _prepare_tree_for_validation(self, elem):
        if "__file" in elem.attrib:
            del elem.attrib["__file"]
        for e in elem:
            self._prepare_tree_for_validation(e)
예제 #4
0
파일: XmlParser.py 프로젝트: idosch/lnst
class XmlParser(object):
    XINCLUDE_RE = r"\{http\:\/\/www\.w3\.org\/[0-9]{4}\/XInclude\}include"

    def __init__(self, schema_file, xml_path):
        # locate the schema file
        # try git path
        dirname = os.path.dirname(sys.argv[0])
        schema_path = os.path.join(dirname, schema_file)
        if not os.path.exists(schema_path):
            # try configuration
            res_dir = lnst_config.get_option("environment", "resource_dir")
            schema_path = os.path.join(res_dir, schema_file)

        if not os.path.exists(schema_path):
            raise Exception("The recipe schema file was not found. " + \
                            "Your LNST installation is corrupt!")

        self._template_proc = XmlTemplates()

        self._path = xml_path
        relaxng_doc = etree.parse(schema_path)
        self._schema = etree.RelaxNG(relaxng_doc)

    def parse(self):
        doc = self._parse(self._path)
        self._remove_comments(doc)

        # Due to a weird implementation of XInclude in lxml, the
        # XmlParser resolves included documents on it's own.
        #
        # To be able to tell later on where each tag was located
        # in the XML document, we add a '__file' attribute to
        # each element of the tree during the parsing.
        #
        # However, these special attributes are of course not
        # valid according to our schemas. To solve this, a copy of
        # the tree is made and the '__file' attributes are removed
        # before validation.
        #
        # XXX This is a *EXTREMELY* dirty hack. Ideas/proposals
        # for cleaner solutions are more than welcome!
        root_tag = self._init_loc(doc.getroot(), self._path)
        self._expand_xinclude(root_tag, os.path.dirname(self._path))

        self._template_proc.process_aliases(root_tag)

        try:
            self._validate(doc)
        except:
            err = self._schema.error_log[0]
            loc = {"file": os.path.basename(err.filename),
                   "line": err.line, "col": err.column}
            exc = XmlProcessingError(err.message)
            exc.set_loc(loc)
            raise exc

        return self._process(root_tag)

    def _parse(self, path):
        try:
            if path.startswith('https'):
                doc = etree.parse(urlopen(path))
            else:
                doc = etree.parse(path)
        except etree.LxmlError as err:
            # A workaround for cases when lxml (quite strangely)
            # sets the filename to <string>.
            if err.error_log[0].filename == "<string>":
                filename = self._path
            else:
                filename = err.error_log[0].filename
            loc = {"file": os.path.basename(filename),
                   "line": err.error_log[0].line,
                   "col": err.error_log[0].column}
            exc = XmlProcessingError(err.error_log[0].message)
            exc.set_loc(loc)
            raise exc
        except Exception as err:
            loc = {"file": os.path.basename(self._path),
                   "line": None,
                   "col": None}
            exc = XmlProcessingError(str(err))
            exc.set_loc(loc)
            raise exc

        return doc

    def _process(self, root_tag):
        pass

    def set_machines(self, machines):
        self._template_proc.set_machines(machines)

    def set_aliases(self, defined, overriden):
        self._template_proc.set_aliases(defined, overriden)

    def _has_attribute(self, element, attr):
        return attr in element.attrib

    def _get_attribute(self, element, attr):
        text = element.attrib[attr].strip()
        return self._template_proc.expand_functions(text)

    def _get_content(self, element):
        text = etree.tostring(element, method="text").strip()
        return self._template_proc.expand_functions(text)

    def _expand_xinclude(self, elem, base_url=""):
        for e in elem:
            if re.match(self.XINCLUDE_RE, str(e.tag)):
                href = os.path.join(base_url, e.get("href"))
                filename = os.path.basename(href)

                doc = self._parse(href)
                self._remove_comments(doc)
                node = doc.getroot()

                node = self._init_loc(node, href)

                if e.tail:
                    node.tail = (node.tail or "") + e.tail
                self._expand_xinclude(node, os.path.dirname(href))

                parent = e.getparent()
                if parent is None:
                    return node

                parent.replace(e, node)
            else:
                self._expand_xinclude(e, base_url)
        return elem

    def _remove_comments(self, doc):
        comments = doc.xpath('//comment()')
        for c in comments:
            p = c.getparent()
            if p is not None:
                p.remove(c)

    def _init_loc(self, elem, filename):
        """ Remove all coment tags from the tree """

        elem.attrib["__file"] = filename
        for e in elem:
            self._init_loc(e, filename)

        return elem

    def _validate(self, original):
        """
            Make a copy of the tree, remove the '__file' attributes
            and validate against the appropriate schema.

            Very unfortunate solution.
        """
        doc = copy.deepcopy(original)
        root = doc.getroot()

        self._prepare_tree_for_validation(root)
        self._schema.assertValid(doc)

    def _prepare_tree_for_validation(self, elem):
        if "__file" in elem.attrib:
            del elem.attrib["__file"]
        for e in elem:
            self._prepare_tree_for_validation(e)