def add_document_by_url(self, url): schema_node = load_external(url, self._transport, strict=self.settings.strict) document = self.create_new_document(schema_node, url=url) document.resolve()
def add_document_by_url(self, url): schema_node = load_external( url, self._transport, strict=self.strict) document = self.create_new_document(schema_node, url=url) document.resolve()
def _get_xml_document(self, location): """Load the XML content from the given location and return an lxml.Element object. :param location: The URL of the document to load :type location: string """ return load_external( location, self.transport, self.location, settings=self.settings)
def _get_xml_document(self, location: typing.IO) -> etree._Element: """Load the XML content from the given location and return an lxml.Element object. :param location: The URL of the document to load :type location: string """ return load_external( location, self.transport, self.location, settings=self.settings )
def visit_include(self, node, parent): """ Definition:: <include id = ID schemaLocation = anyURI {any attributes with non-schema Namespace}...> Content: (annotation?) </include> :param node: The XML node :type node: lxml.etree._Element :param parent: The parent XML node :type parent: lxml.etree._Element """ if not node.get('schemaLocation'): raise NotImplementedError("schemaLocation is required") location = node.get('schemaLocation') if location in self._includes: return schema_node = load_external(location, self.schema._transport, base_url=self.document._base_url, strict=self.schema.strict) self._includes.add(location) # When the included document has no default namespace defined but the # parent document does have this then we should (atleast for #360) # transfer the default namespace to the included schema. We can't # update the nsmap of elements in lxml so we create a new schema with # the correct nsmap and move all the content there. if not schema_node.nsmap.get(None) and node.nsmap.get(None): nsmap = {None: node.nsmap[None]} nsmap.update(schema_node.nsmap) new = etree.Element(schema_node.tag, nsmap=nsmap) for child in schema_node: new.append(child) schema_node = new # Iterate directly over the children for child in schema_node: self.process(child, parent=schema_node)
def _retrieve_data(self, url: typing.IO, base_url=None): return load_external(url, self.schema._transport, base_url, settings=self.schema.settings)
def visit_include(self, node, parent): """ Definition:: <include id = ID schemaLocation = anyURI {any attributes with non-schema Namespace}...> Content: (annotation?) </include> :param node: The XML node :type node: lxml.etree._Element :param parent: The parent XML node :type parent: lxml.etree._Element """ if not node.get('schemaLocation'): raise NotImplementedError("schemaLocation is required") location = node.get('schemaLocation') if location in self._includes: return schema_node = load_external(location, self.schema._transport, base_url=self.document._base_url, settings=self.schema.settings) self._includes.add(location) # When the included document has no default namespace defined but the # parent document does have this then we should (atleast for #360) # transfer the default namespace to the included schema. We can't # update the nsmap of elements in lxml so we create a new schema with # the correct nsmap and move all the content there. if not schema_node.nsmap.get(None) and node.nsmap.get(None): nsmap = {None: node.nsmap[None]} nsmap.update(schema_node.nsmap) new = etree.Element(schema_node.tag, nsmap=nsmap) for child in schema_node: new.append(child) for key, value in schema_node.attrib.items(): new.set(key, value) schema_node = new # Use the element/attribute form defaults from the schema while # processing the nodes. element_form_default = self.document._element_form attribute_form_default = self.document._attribute_form base_url = self.document._base_url self.document._element_form = schema_node.get('elementFormDefault', 'unqualified') self.document._attribute_form = schema_node.get( 'attributeFormDefault', 'unqualified') self.document._base_url = absolute_location(location, self.document._base_url) # Iterate directly over the children. for child in schema_node: self.process(child, parent=schema_node) self.document._element_form = element_form_default self.document._attribute_form = attribute_form_default self.document._base_url = base_url
def visit_import(self, node, parent): """ Definition:: <import id = ID namespace = anyURI schemaLocation = anyURI {any attributes with non-schema Namespace}...> Content: (annotation?) </import> :param node: The XML node :type node: lxml.etree._Element :param parent: The parent XML node :type parent: lxml.etree._Element """ schema_node = None namespace = node.get('namespace') location = node.get('schemaLocation') if location: location = absolute_location(location, self.document._base_url) if not namespace and not self.document._target_namespace: raise XMLParseError( "The attribute 'namespace' must be existent if the " "importing schema has no target namespace.", filename=self._document.location, sourceline=node.sourceline) # Check if the schema is already imported before based on the # namespace. Schema's without namespace are registered as 'None' document = self.schema.documents.get_by_namespace_and_location( namespace, location) if document: logger.debug("Returning existing schema: %r", location) self.register_import(namespace, document) return document # Hardcode the mapping between the xml namespace and the xsd for now. # This seems to fix issues with exchange wsdl's, see #220 if not location and namespace == 'http://www.w3.org/XML/1998/namespace': location = 'https://www.w3.org/2001/xml.xsd' # Silently ignore import statements which we can't resolve via the # namespace and doesn't have a schemaLocation attribute. if not location: logger.debug( "Ignoring import statement for namespace %r " + "(missing schemaLocation)", namespace) return # Load the XML schema_node = load_external(location, self.schema._transport, settings=self.schema.settings) # Check if the xsd:import namespace matches the targetNamespace. If # the xsd:import statement didn't specify a namespace then make sure # that the targetNamespace wasn't declared by another schema yet. schema_tns = schema_node.get('targetNamespace') if namespace and schema_tns and namespace != schema_tns: raise XMLParseError( ("The namespace defined on the xsd:import doesn't match the " "imported targetNamespace located at %r ") % (location), filename=self.document._location, sourceline=node.sourceline) schema = self.schema.create_new_document(schema_node, location) self.register_import(namespace, schema) return schema
def visit_import(self, node, parent): """ Definition:: <import id = ID namespace = anyURI schemaLocation = anyURI {any attributes with non-schema Namespace}...> Content: (annotation?) </import> :param node: The XML node :type node: lxml.etree._Element :param parent: The parent XML node :type parent: lxml.etree._Element """ schema_node = None namespace = node.get('namespace') location = node.get('schemaLocation') if location: location = normalize_location( self.schema.settings, location, self.document._location ) if not namespace and not self.document._target_namespace: raise XMLParseError( "The attribute 'namespace' must be existent if the " "importing schema has no target namespace.", filename=self._document.location, sourceline=node.sourceline) # We found an empty <import/> statement, this needs to trigger 4.1.2 # from https://www.w3.org/TR/2012/REC-xmlschema11-1-20120405/#src-resolve # for QName resolving. # In essence this means we will resolve QNames without a namespace to no # namespace instead of the target namespace. # The following code snippet works because imports have to occur before we # visit elements. if not namespace and not location: self.document._has_empty_import = True # Check if the schema is already imported before based on the # namespace. Schema's without namespace are registered as 'None' document = self.schema.documents.get_by_namespace_and_location(namespace, location) if document: logger.debug("Returning existing schema: %r", location) self.register_import(namespace, document) return document # Hardcode the mapping between the xml namespace and the xsd for now. # This seems to fix issues with exchange wsdl's, see #220 if not location and namespace == 'http://www.w3.org/XML/1998/namespace': location = 'https://www.w3.org/2001/xml.xsd' # Silently ignore import statements which we can't resolve via the # namespace and doesn't have a schemaLocation attribute. if not location: logger.debug( "Ignoring import statement for namespace %r " + "(missing schemaLocation)", namespace) return # Load the XML schema_node = load_external( location, transport=self.schema._transport, base_url=self.document._location, settings=self.schema.settings) # Check if the xsd:import namespace matches the targetNamespace. If # the xsd:import statement didn't specify a namespace then make sure # that the targetNamespace wasn't declared by another schema yet. schema_tns = schema_node.get('targetNamespace') if namespace and schema_tns and namespace != schema_tns: raise XMLParseError(( "The namespace defined on the xsd:import doesn't match the " "imported targetNamespace located at %r " ) % (location), filename=self.document._location, sourceline=node.sourceline) # If the imported schema doesn't define a target namespace and the # node doesn't specify it either then inherit the existing target # namespace. elif not schema_tns and not namespace: namespace = self.document._target_namespace schema = self.schema.create_new_document(schema_node, location, target_namespace=namespace) self.register_import(namespace, schema) return schema
def visit_include(self, node, parent): """ Definition:: <include id = ID schemaLocation = anyURI {any attributes with non-schema Namespace}...> Content: (annotation?) </include> :param node: The XML node :type node: lxml.etree._Element :param parent: The parent XML node :type parent: lxml.etree._Element """ if not node.get("schemaLocation"): raise NotImplementedError("schemaLocation is required") location = node.get("schemaLocation") if location in self._includes: return schema_node = load_external( location, self.schema._transport, base_url=self.document._base_url, settings=self.schema.settings, ) self._includes.add(location) # When the included document has no default namespace defined but the # parent document does have this then we should (atleast for #360) # transfer the default namespace to the included schema. We can't # update the nsmap of elements in lxml so we create a new schema with # the correct nsmap and move all the content there. # Included schemas must have targetNamespace equal to parent schema (the including) or None. # If included schema doesn't have default ns, then it should be set to parent's targetNs. # See Chameleon Inclusion https://www.w3.org/TR/xmlschema11-1/#chameleon-xslt if not schema_node.nsmap.get(None) and ( node.nsmap.get(None) or parent.attrib.get("targetNamespace")): nsmap = { None: node.nsmap.get(None) or parent.attrib["targetNamespace"] } nsmap.update(schema_node.nsmap) new = etree.Element(schema_node.tag, nsmap=nsmap) for child in schema_node: new.append(child) for key, value in schema_node.attrib.items(): new.set(key, value) if not new.attrib.get("targetNamespace"): new.attrib["targetNamespace"] = parent.attrib[ "targetNamespace"] schema_node = new # Use the element/attribute form defaults from the schema while # processing the nodes. element_form_default = self.document._element_form attribute_form_default = self.document._attribute_form base_url = self.document._base_url self.document._element_form = schema_node.get("elementFormDefault", "unqualified") self.document._attribute_form = schema_node.get( "attributeFormDefault", "unqualified") self.document._base_url = absolute_location(location, self.document._base_url) # Iterate directly over the children. for child in schema_node: self.process(child, parent=schema_node) self.document._element_form = element_form_default self.document._attribute_form = attribute_form_default self.document._base_url = base_url
def visit_include(self, node, parent): """ Definition:: <include id = ID schemaLocation = anyURI {any attributes with non-schema Namespace}...> Content: (annotation?) </include> :param node: The XML node :type node: lxml.etree._Element :param parent: The parent XML node :type parent: lxml.etree._Element """ if not node.get('schemaLocation'): raise NotImplementedError("schemaLocation is required") location = node.get('schemaLocation') if location in self._includes: return schema_node = load_external( location, self.schema._transport, base_url=self.document._base_url, strict=self.schema.strict) self._includes.add(location) # When the included document has no default namespace defined but the # parent document does have this then we should (atleast for #360) # transfer the default namespace to the included schema. We can't # update the nsmap of elements in lxml so we create a new schema with # the correct nsmap and move all the content there. if not schema_node.nsmap.get(None) and node.nsmap.get(None): nsmap = {None: node.nsmap[None]} nsmap.update(schema_node.nsmap) new = etree.Element(schema_node.tag, nsmap=nsmap) for child in schema_node: new.append(child) for key, value in schema_node.attrib.items(): new.set(key, value) schema_node = new # Use the element/attribute form defaults from the schema while # processing the nodes. element_form_default = self.document._element_form attribute_form_default = self.document._attribute_form base_url = self.document._base_url self.document._element_form = schema_node.get('elementFormDefault', 'unqualified') self.document._attribute_form = schema_node.get('attributeFormDefault', 'unqualified') self.document._base_url = absolute_location(location, self.document._base_url) # Iterate directly over the children. for child in schema_node: self.process(child, parent=schema_node) self.document._element_form = element_form_default self.document._attribute_form = attribute_form_default self.document._base_url = base_url
def visit_import(self, node, parent): """ Definition:: <import id = ID namespace = anyURI schemaLocation = anyURI {any attributes with non-schema Namespace}...> Content: (annotation?) </import> :param node: The XML node :type node: lxml.etree._Element :param parent: The parent XML node :type parent: lxml.etree._Element """ schema_node = None namespace = node.get('namespace') location = node.get('schemaLocation') if location: location = absolute_location(location, self.document._base_url) if not namespace and not self.document._target_namespace: raise XMLParseError( "The attribute 'namespace' must be existent if the " "importing schema has no target namespace.", filename=self._document.location, sourceline=node.sourceline) # Check if the schema is already imported before based on the # namespace. Schema's without namespace are registered as 'None' document = self.schema.documents.get_by_namespace_and_location(namespace, location) if document: logger.debug("Returning existing schema: %r", location) self.register_import(namespace, document) return document # Hardcode the mapping between the xml namespace and the xsd for now. # This seems to fix issues with exchange wsdl's, see #220 if not location and namespace == 'http://www.w3.org/XML/1998/namespace': location = 'https://www.w3.org/2001/xml.xsd' # Silently ignore import statements which we can't resolve via the # namespace and doesn't have a schemaLocation attribute. if not location: logger.debug( "Ignoring import statement for namespace %r " + "(missing schemaLocation)", namespace) return # Load the XML schema_node = load_external( location, self.schema._transport, strict=self.schema.strict) # Check if the xsd:import namespace matches the targetNamespace. If # the xsd:import statement didn't specify a namespace then make sure # that the targetNamespace wasn't declared by another schema yet. schema_tns = schema_node.get('targetNamespace') if namespace and schema_tns and namespace != schema_tns: raise XMLParseError(( "The namespace defined on the xsd:import doesn't match the " "imported targetNamespace located at %r " ) % (location), filename=self.document._location, sourceline=node.sourceline) schema = self.schema.create_new_document(schema_node, location) self.register_import(namespace, schema) return schema