Example #1
0
    def add_document_by_url(self, url):
        schema_node = load_external(url,
                                    self._transport,
                                    strict=self.settings.strict)

        document = self.create_new_document(schema_node, url=url)
        document.resolve()
Example #2
0
    def add_document_by_url(self, url):
        schema_node = load_external(
            url,
            self._transport,
            strict=self.strict)

        document = self.create_new_document(schema_node, url=url)
        document.resolve()
Example #3
0
    def _get_xml_document(self, location):
        """Load the XML content from the given location and return an
        lxml.Element object.

        :param location: The URL of the document to load
        :type location: string

        """
        return load_external(
            location, self.transport, self.location, settings=self.settings)
Example #4
0
    def _get_xml_document(self, location: typing.IO) -> etree._Element:
        """Load the XML content from the given location and return an
        lxml.Element object.

        :param location: The URL of the document to load
        :type location: string

        """
        return load_external(
            location, self.transport, self.location, settings=self.settings
        )
Example #5
0
    def visit_include(self, node, parent):
        """

        Definition::

            <include
              id = ID
              schemaLocation = anyURI
              {any attributes with non-schema Namespace}...>
            Content: (annotation?)
            </include>

        :param node: The XML node
        :type node: lxml.etree._Element
        :param parent: The parent XML node
        :type parent: lxml.etree._Element

        """
        if not node.get('schemaLocation'):
            raise NotImplementedError("schemaLocation is required")
        location = node.get('schemaLocation')

        if location in self._includes:
            return

        schema_node = load_external(location,
                                    self.schema._transport,
                                    base_url=self.document._base_url,
                                    strict=self.schema.strict)
        self._includes.add(location)

        # When the included document has no default namespace defined but the
        # parent document does have this then we should (atleast for #360)
        # transfer the default namespace to the included schema. We can't
        # update the nsmap of elements in lxml so we create a new schema with
        # the correct nsmap and move all the content there.
        if not schema_node.nsmap.get(None) and node.nsmap.get(None):
            nsmap = {None: node.nsmap[None]}
            nsmap.update(schema_node.nsmap)
            new = etree.Element(schema_node.tag, nsmap=nsmap)
            for child in schema_node:
                new.append(child)
            schema_node = new

        # Iterate directly over the children
        for child in schema_node:
            self.process(child, parent=schema_node)
Example #6
0
 def _retrieve_data(self, url: typing.IO, base_url=None):
     return load_external(url,
                          self.schema._transport,
                          base_url,
                          settings=self.schema.settings)
Example #7
0
    def visit_include(self, node, parent):
        """

        Definition::

            <include
              id = ID
              schemaLocation = anyURI
              {any attributes with non-schema Namespace}...>
            Content: (annotation?)
            </include>

        :param node: The XML node
        :type node: lxml.etree._Element
        :param parent: The parent XML node
        :type parent: lxml.etree._Element

        """
        if not node.get('schemaLocation'):
            raise NotImplementedError("schemaLocation is required")
        location = node.get('schemaLocation')

        if location in self._includes:
            return

        schema_node = load_external(location,
                                    self.schema._transport,
                                    base_url=self.document._base_url,
                                    settings=self.schema.settings)
        self._includes.add(location)

        # When the included document has no default namespace defined but the
        # parent document does have this then we should (atleast for #360)
        # transfer the default namespace to the included schema. We can't
        # update the nsmap of elements in lxml so we create a new schema with
        # the correct nsmap and move all the content there.
        if not schema_node.nsmap.get(None) and node.nsmap.get(None):
            nsmap = {None: node.nsmap[None]}
            nsmap.update(schema_node.nsmap)
            new = etree.Element(schema_node.tag, nsmap=nsmap)
            for child in schema_node:
                new.append(child)
            for key, value in schema_node.attrib.items():
                new.set(key, value)
            schema_node = new

        # Use the element/attribute form defaults from the schema while
        # processing the nodes.
        element_form_default = self.document._element_form
        attribute_form_default = self.document._attribute_form
        base_url = self.document._base_url

        self.document._element_form = schema_node.get('elementFormDefault',
                                                      'unqualified')
        self.document._attribute_form = schema_node.get(
            'attributeFormDefault', 'unqualified')
        self.document._base_url = absolute_location(location,
                                                    self.document._base_url)

        # Iterate directly over the children.
        for child in schema_node:
            self.process(child, parent=schema_node)

        self.document._element_form = element_form_default
        self.document._attribute_form = attribute_form_default
        self.document._base_url = base_url
Example #8
0
    def visit_import(self, node, parent):
        """

        Definition::

            <import
              id = ID
              namespace = anyURI
              schemaLocation = anyURI
              {any attributes with non-schema Namespace}...>
            Content: (annotation?)
            </import>

        :param node: The XML node
        :type node: lxml.etree._Element
        :param parent: The parent XML node
        :type parent: lxml.etree._Element

        """
        schema_node = None
        namespace = node.get('namespace')
        location = node.get('schemaLocation')
        if location:
            location = absolute_location(location, self.document._base_url)

        if not namespace and not self.document._target_namespace:
            raise XMLParseError(
                "The attribute 'namespace' must be existent if the "
                "importing schema has no target namespace.",
                filename=self._document.location,
                sourceline=node.sourceline)

        # Check if the schema is already imported before based on the
        # namespace. Schema's without namespace are registered as 'None'
        document = self.schema.documents.get_by_namespace_and_location(
            namespace, location)
        if document:
            logger.debug("Returning existing schema: %r", location)
            self.register_import(namespace, document)
            return document

        # Hardcode the mapping between the xml namespace and the xsd for now.
        # This seems to fix issues with exchange wsdl's, see #220
        if not location and namespace == 'http://www.w3.org/XML/1998/namespace':
            location = 'https://www.w3.org/2001/xml.xsd'

        # Silently ignore import statements which we can't resolve via the
        # namespace and doesn't have a schemaLocation attribute.
        if not location:
            logger.debug(
                "Ignoring import statement for namespace %r " +
                "(missing schemaLocation)", namespace)
            return

        # Load the XML
        schema_node = load_external(location,
                                    self.schema._transport,
                                    settings=self.schema.settings)

        # Check if the xsd:import namespace matches the targetNamespace. If
        # the xsd:import statement didn't specify a namespace then make sure
        # that the targetNamespace wasn't declared by another schema yet.
        schema_tns = schema_node.get('targetNamespace')
        if namespace and schema_tns and namespace != schema_tns:
            raise XMLParseError(
                ("The namespace defined on the xsd:import doesn't match the "
                 "imported targetNamespace located at %r ") % (location),
                filename=self.document._location,
                sourceline=node.sourceline)

        schema = self.schema.create_new_document(schema_node, location)
        self.register_import(namespace, schema)
        return schema
Example #9
0
    def visit_import(self, node, parent):
        """

        Definition::

            <import
              id = ID
              namespace = anyURI
              schemaLocation = anyURI
              {any attributes with non-schema Namespace}...>
            Content: (annotation?)
            </import>

        :param node: The XML node
        :type node: lxml.etree._Element
        :param parent: The parent XML node
        :type parent: lxml.etree._Element

        """
        schema_node = None
        namespace = node.get('namespace')
        location = node.get('schemaLocation')
        if location:
            location = normalize_location(
                self.schema.settings, location, self.document._location
            )

        if not namespace and not self.document._target_namespace:
            raise XMLParseError(
                "The attribute 'namespace' must be existent if the "
                "importing schema has no target namespace.",
                filename=self._document.location,
                sourceline=node.sourceline)

        # We found an empty <import/> statement, this needs to trigger 4.1.2
        # from https://www.w3.org/TR/2012/REC-xmlschema11-1-20120405/#src-resolve
        # for QName resolving.
        # In essence this means we will resolve QNames without a namespace to no
        # namespace instead of the target namespace.
        # The following code snippet works because imports have to occur before we
        # visit elements.
        if not namespace and not location:
            self.document._has_empty_import = True

        # Check if the schema is already imported before based on the
        # namespace. Schema's without namespace are registered as 'None'
        document = self.schema.documents.get_by_namespace_and_location(namespace, location)
        if document:
            logger.debug("Returning existing schema: %r", location)
            self.register_import(namespace, document)
            return document

        # Hardcode the mapping between the xml namespace and the xsd for now.
        # This seems to fix issues with exchange wsdl's, see #220
        if not location and namespace == 'http://www.w3.org/XML/1998/namespace':
            location = 'https://www.w3.org/2001/xml.xsd'

        # Silently ignore import statements which we can't resolve via the
        # namespace and doesn't have a schemaLocation attribute.
        if not location:
            logger.debug(
                "Ignoring import statement for namespace %r " +
                "(missing schemaLocation)", namespace)
            return

        # Load the XML
        schema_node = load_external(
            location,
            transport=self.schema._transport,
            base_url=self.document._location,
            settings=self.schema.settings)

        # Check if the xsd:import namespace matches the targetNamespace. If
        # the xsd:import statement didn't specify a namespace then make sure
        # that the targetNamespace wasn't declared by another schema yet.
        schema_tns = schema_node.get('targetNamespace')
        if namespace and schema_tns and namespace != schema_tns:
            raise XMLParseError((
                "The namespace defined on the xsd:import doesn't match the "
                "imported targetNamespace located at %r "
                ) % (location),
                filename=self.document._location,
                sourceline=node.sourceline)

        # If the imported schema doesn't define a target namespace and the
        # node doesn't specify it either then inherit the existing target
        # namespace.
        elif not schema_tns and not namespace:
            namespace = self.document._target_namespace

        schema = self.schema.create_new_document(schema_node, location, target_namespace=namespace)
        self.register_import(namespace, schema)
        return schema
Example #10
0
    def visit_include(self, node, parent):
        """

        Definition::

            <include
              id = ID
              schemaLocation = anyURI
              {any attributes with non-schema Namespace}...>
            Content: (annotation?)
            </include>

        :param node: The XML node
        :type node: lxml.etree._Element
        :param parent: The parent XML node
        :type parent: lxml.etree._Element

        """
        if not node.get("schemaLocation"):
            raise NotImplementedError("schemaLocation is required")
        location = node.get("schemaLocation")

        if location in self._includes:
            return

        schema_node = load_external(
            location,
            self.schema._transport,
            base_url=self.document._base_url,
            settings=self.schema.settings,
        )
        self._includes.add(location)

        # When the included document has no default namespace defined but the
        # parent document does have this then we should (atleast for #360)
        # transfer the default namespace to the included schema. We can't
        # update the nsmap of elements in lxml so we create a new schema with
        # the correct nsmap and move all the content there.

        # Included schemas must have targetNamespace equal to parent schema (the including) or None.
        # If included schema doesn't have default ns, then it should be set to parent's targetNs.
        # See Chameleon Inclusion https://www.w3.org/TR/xmlschema11-1/#chameleon-xslt
        if not schema_node.nsmap.get(None) and (
                node.nsmap.get(None) or parent.attrib.get("targetNamespace")):
            nsmap = {
                None: node.nsmap.get(None) or parent.attrib["targetNamespace"]
            }
            nsmap.update(schema_node.nsmap)
            new = etree.Element(schema_node.tag, nsmap=nsmap)
            for child in schema_node:
                new.append(child)
            for key, value in schema_node.attrib.items():
                new.set(key, value)
            if not new.attrib.get("targetNamespace"):
                new.attrib["targetNamespace"] = parent.attrib[
                    "targetNamespace"]
            schema_node = new

        # Use the element/attribute form defaults from the schema while
        # processing the nodes.
        element_form_default = self.document._element_form
        attribute_form_default = self.document._attribute_form
        base_url = self.document._base_url

        self.document._element_form = schema_node.get("elementFormDefault",
                                                      "unqualified")
        self.document._attribute_form = schema_node.get(
            "attributeFormDefault", "unqualified")
        self.document._base_url = absolute_location(location,
                                                    self.document._base_url)

        # Iterate directly over the children.
        for child in schema_node:
            self.process(child, parent=schema_node)

        self.document._element_form = element_form_default
        self.document._attribute_form = attribute_form_default
        self.document._base_url = base_url
Example #11
0
    def visit_include(self, node, parent):
        """

        Definition::

            <include
              id = ID
              schemaLocation = anyURI
              {any attributes with non-schema Namespace}...>
            Content: (annotation?)
            </include>

        :param node: The XML node
        :type node: lxml.etree._Element
        :param parent: The parent XML node
        :type parent: lxml.etree._Element

        """
        if not node.get('schemaLocation'):
            raise NotImplementedError("schemaLocation is required")
        location = node.get('schemaLocation')

        if location in self._includes:
            return

        schema_node = load_external(
            location, self.schema._transport,
            base_url=self.document._base_url,
            strict=self.schema.strict)
        self._includes.add(location)

        # When the included document has no default namespace defined but the
        # parent document does have this then we should (atleast for #360)
        # transfer the default namespace to the included schema. We can't
        # update the nsmap of elements in lxml so we create a new schema with
        # the correct nsmap and move all the content there.
        if not schema_node.nsmap.get(None) and node.nsmap.get(None):
            nsmap = {None: node.nsmap[None]}
            nsmap.update(schema_node.nsmap)
            new = etree.Element(schema_node.tag, nsmap=nsmap)
            for child in schema_node:
                new.append(child)
            for key, value in schema_node.attrib.items():
                new.set(key, value)
            schema_node = new

        # Use the element/attribute form defaults from the schema while
        # processing the nodes.
        element_form_default = self.document._element_form
        attribute_form_default = self.document._attribute_form
        base_url = self.document._base_url

        self.document._element_form = schema_node.get('elementFormDefault', 'unqualified')
        self.document._attribute_form = schema_node.get('attributeFormDefault', 'unqualified')
        self.document._base_url = absolute_location(location, self.document._base_url)

        # Iterate directly over the children.
        for child in schema_node:
            self.process(child, parent=schema_node)

        self.document._element_form = element_form_default
        self.document._attribute_form = attribute_form_default
        self.document._base_url = base_url
Example #12
0
    def visit_import(self, node, parent):
        """

        Definition::

            <import
              id = ID
              namespace = anyURI
              schemaLocation = anyURI
              {any attributes with non-schema Namespace}...>
            Content: (annotation?)
            </import>

        :param node: The XML node
        :type node: lxml.etree._Element
        :param parent: The parent XML node
        :type parent: lxml.etree._Element

        """
        schema_node = None
        namespace = node.get('namespace')
        location = node.get('schemaLocation')
        if location:
            location = absolute_location(location, self.document._base_url)

        if not namespace and not self.document._target_namespace:
            raise XMLParseError(
                "The attribute 'namespace' must be existent if the "
                "importing schema has no target namespace.",
                filename=self._document.location,
                sourceline=node.sourceline)

        # Check if the schema is already imported before based on the
        # namespace. Schema's without namespace are registered as 'None'
        document = self.schema.documents.get_by_namespace_and_location(namespace, location)
        if document:
            logger.debug("Returning existing schema: %r", location)
            self.register_import(namespace, document)
            return document

        # Hardcode the mapping between the xml namespace and the xsd for now.
        # This seems to fix issues with exchange wsdl's, see #220
        if not location and namespace == 'http://www.w3.org/XML/1998/namespace':
            location = 'https://www.w3.org/2001/xml.xsd'

        # Silently ignore import statements which we can't resolve via the
        # namespace and doesn't have a schemaLocation attribute.
        if not location:
            logger.debug(
                "Ignoring import statement for namespace %r " +
                "(missing schemaLocation)", namespace)
            return

        # Load the XML
        schema_node = load_external(
            location,
            self.schema._transport,
            strict=self.schema.strict)

        # Check if the xsd:import namespace matches the targetNamespace. If
        # the xsd:import statement didn't specify a namespace then make sure
        # that the targetNamespace wasn't declared by another schema yet.
        schema_tns = schema_node.get('targetNamespace')
        if namespace and schema_tns and namespace != schema_tns:
            raise XMLParseError((
                "The namespace defined on the xsd:import doesn't match the "
                "imported targetNamespace located at %r "
                ) % (location),
                filename=self.document._location,
                sourceline=node.sourceline)

        schema = self.schema.create_new_document(schema_node, location)
        self.register_import(namespace, schema)
        return schema