def parse_imports(self, doc): """Import other WSDL definitions in this document. Note that imports are non-transitive, so only import definitions which are defined in the imported document and ignore definitions imported in that document. This should handle recursive imports though: A -> B -> A A -> B -> C -> A :param doc: The source document :type doc: lxml.etree._Element """ for import_node in doc.findall("wsdl:import", namespaces=NSMAP): namespace = import_node.get('namespace') location = import_node.get('location') location = absolute_location(location, self.location) key = (namespace, location) if key in self.wsdl._definitions: self.imports[key] = self.wsdl._definitions[key] else: document = self.wsdl._get_xml_document(location) if etree.QName(document.tag).localname == 'schema': self.types.add_documents([document], location) else: wsdl = Definition(self.wsdl, document, location) self.imports[key] = wsdl
def parse_imports(self, doc): """Import other WSDL definitions in this document. Note that imports are non-transitive, so only import definitions which are defined in the imported document and ignore definitions imported in that document. This should handle recursive imports though: A -> B -> A A -> B -> C -> A :param doc: The source document :type doc: lxml.etree._Element """ for import_node in doc.findall("wsdl:import", namespaces=NSMAP): namespace = import_node.get('namespace') location = import_node.get('location') if not location: logger.debug( "Skipping import for namespace %s (empty location)", namespace) continue location = absolute_location(location, self.location) key = (namespace, location) if key in self.wsdl._definitions: self.imports[key] = self.wsdl._definitions[key] else: document = self.wsdl._get_xml_document(location) if etree.QName(document.tag).localname == 'schema': self.types.add_documents([document], location) else: wsdl = Definition(self.wsdl, document, location) self.imports[key] = wsdl
def visit_include(self, node, parent): """ Definition:: <include id = ID schemaLocation = anyURI {any attributes with non-schema Namespace}...> Content: (annotation?) </include> :param node: The XML node :type node: lxml.etree._Element :param parent: The parent XML node :type parent: lxml.etree._Element """ if not node.get("schemaLocation"): raise NotImplementedError("schemaLocation is required") location = node.get("schemaLocation") if location in self._includes: return schema_node = self._retrieve_data(location, base_url=self.document._base_url) self._includes.add(location) # When the included document has no default namespace defined but the # parent document does have this then we should (atleast for #360) # transfer the default namespace to the included schema. We can't # update the nsmap of elements in lxml so we create a new schema with # the correct nsmap and move all the content there. # Included schemas must have targetNamespace equal to parent schema (the including) or None. # If included schema doesn't have default ns, then it should be set to parent's targetNs. # See Chameleon Inclusion https://www.w3.org/TR/xmlschema11-1/#chameleon-xslt if not schema_node.nsmap.get(None) and ( node.nsmap.get(None) or parent.attrib.get("targetNamespace")): nsmap = { None: node.nsmap.get(None) or parent.attrib["targetNamespace"] } nsmap.update(schema_node.nsmap) new = etree.Element(schema_node.tag, nsmap=nsmap) for child in schema_node: new.append(child) for key, value in schema_node.attrib.items(): new.set(key, value) if not new.attrib.get("targetNamespace"): new.attrib["targetNamespace"] = parent.attrib[ "targetNamespace"] schema_node = new # Use the element/attribute form defaults from the schema while # processing the nodes. element_form_default = self.document._element_form attribute_form_default = self.document._attribute_form base_url = self.document._base_url self.document._element_form = schema_node.get("elementFormDefault", "unqualified") self.document._attribute_form = schema_node.get( "attributeFormDefault", "unqualified") self.document._base_url = absolute_location(location, self.document._base_url) # Iterate directly over the children. for child in schema_node: self.process(child, parent=schema_node) self.document._element_form = element_form_default self.document._attribute_form = attribute_form_default self.document._base_url = base_url
def visit_include(self, node, parent): """ Definition:: <include id = ID schemaLocation = anyURI {any attributes with non-schema Namespace}...> Content: (annotation?) </include> :param node: The XML node :type node: lxml.etree._Element :param parent: The parent XML node :type parent: lxml.etree._Element """ if not node.get('schemaLocation'): raise NotImplementedError("schemaLocation is required") location = node.get('schemaLocation') if location in self._includes: return schema_node = load_external(location, self.schema._transport, base_url=self.document._base_url, settings=self.schema.settings) self._includes.add(location) # When the included document has no default namespace defined but the # parent document does have this then we should (atleast for #360) # transfer the default namespace to the included schema. We can't # update the nsmap of elements in lxml so we create a new schema with # the correct nsmap and move all the content there. if not schema_node.nsmap.get(None) and node.nsmap.get(None): nsmap = {None: node.nsmap[None]} nsmap.update(schema_node.nsmap) new = etree.Element(schema_node.tag, nsmap=nsmap) for child in schema_node: new.append(child) for key, value in schema_node.attrib.items(): new.set(key, value) schema_node = new # Use the element/attribute form defaults from the schema while # processing the nodes. element_form_default = self.document._element_form attribute_form_default = self.document._attribute_form base_url = self.document._base_url self.document._element_form = schema_node.get('elementFormDefault', 'unqualified') self.document._attribute_form = schema_node.get( 'attributeFormDefault', 'unqualified') self.document._base_url = absolute_location(location, self.document._base_url) # Iterate directly over the children. for child in schema_node: self.process(child, parent=schema_node) self.document._element_form = element_form_default self.document._attribute_form = attribute_form_default self.document._base_url = base_url
def visit_import(self, node, parent): """ Definition:: <import id = ID namespace = anyURI schemaLocation = anyURI {any attributes with non-schema Namespace}...> Content: (annotation?) </import> :param node: The XML node :type node: lxml.etree._Element :param parent: The parent XML node :type parent: lxml.etree._Element """ schema_node = None namespace = node.get('namespace') location = node.get('schemaLocation') if location: location = absolute_location(location, self.document._base_url) if not namespace and not self.document._target_namespace: raise XMLParseError( "The attribute 'namespace' must be existent if the " "importing schema has no target namespace.", filename=self._document.location, sourceline=node.sourceline) # Check if the schema is already imported before based on the # namespace. Schema's without namespace are registered as 'None' document = self.schema.documents.get_by_namespace_and_location( namespace, location) if document: logger.debug("Returning existing schema: %r", location) self.register_import(namespace, document) return document # Hardcode the mapping between the xml namespace and the xsd for now. # This seems to fix issues with exchange wsdl's, see #220 if not location and namespace == 'http://www.w3.org/XML/1998/namespace': location = 'https://www.w3.org/2001/xml.xsd' # Silently ignore import statements which we can't resolve via the # namespace and doesn't have a schemaLocation attribute. if not location: logger.debug( "Ignoring import statement for namespace %r " + "(missing schemaLocation)", namespace) return # Load the XML schema_node = load_external(location, self.schema._transport, settings=self.schema.settings) # Check if the xsd:import namespace matches the targetNamespace. If # the xsd:import statement didn't specify a namespace then make sure # that the targetNamespace wasn't declared by another schema yet. schema_tns = schema_node.get('targetNamespace') if namespace and schema_tns and namespace != schema_tns: raise XMLParseError( ("The namespace defined on the xsd:import doesn't match the " "imported targetNamespace located at %r ") % (location), filename=self.document._location, sourceline=node.sourceline) schema = self.schema.create_new_document(schema_node, location) self.register_import(namespace, schema) return schema
def visit_import(self, node, parent): """ Definition:: <import id = ID namespace = anyURI schemaLocation = anyURI {any attributes with non-schema Namespace}...> Content: (annotation?) </import> :param node: The XML node :type node: lxml.etree._Element :param parent: The parent XML node :type parent: lxml.etree._Element """ schema_node = None namespace = node.get('namespace') location = node.get('schemaLocation') if location: location = absolute_location(location, self.document._base_url) if not namespace and not self.document._target_namespace: raise XMLParseError( "The attribute 'namespace' must be existent if the " "importing schema has no target namespace.", filename=self._document.location, sourceline=node.sourceline) # We found an empty <import/> statement, this needs to trigger 4.1.2 # from https://www.w3.org/TR/2012/REC-xmlschema11-1-20120405/#src-resolve # for QName resolving. # In essence this means we will resolve QNames without a namespace to no # namespace instead of the target namespace. # The following code snippet works because imports have to occur before we # visit elements. if not namespace and not location: self.document._has_empty_import = True # Check if the schema is already imported before based on the # namespace. Schema's without namespace are registered as 'None' document = self.schema.documents.get_by_namespace_and_location( namespace, location) if document: logger.debug("Returning existing schema: %r", location) self.register_import(namespace, document) return document # Hardcode the mapping between the xml namespace and the xsd for now. # This seems to fix issues with exchange wsdl's, see #220 if not location and namespace == 'http://www.w3.org/XML/1998/namespace': location = 'https://www.w3.org/2001/xml.xsd' # Silently ignore import statements which we can't resolve via the # namespace and doesn't have a schemaLocation attribute. if not location: logger.debug( "Ignoring import statement for namespace %r " + "(missing schemaLocation)", namespace) return # Load the XML schema_node = load_external(location, self.schema._transport, settings=self.schema.settings) # Check if the xsd:import namespace matches the targetNamespace. If # the xsd:import statement didn't specify a namespace then make sure # that the targetNamespace wasn't declared by another schema yet. schema_tns = schema_node.get('targetNamespace') if namespace and schema_tns and namespace != schema_tns: raise XMLParseError( ("The namespace defined on the xsd:import doesn't match the " "imported targetNamespace located at %r ") % (location), filename=self.document._location, sourceline=node.sourceline) # If the imported schema doesn't define a target namespace and the # node doesn't specify it either then inherit the existing target # namespace. elif not schema_tns and not namespace: namespace = self.document._target_namespace schema = self.schema.create_new_document(schema_node, location, target_namespace=namespace) self.register_import(namespace, schema) return schema
def visit_include(self, node, parent): """ Definition:: <include id = ID schemaLocation = anyURI {any attributes with non-schema Namespace}...> Content: (annotation?) </include> :param node: The XML node :type node: lxml.etree._Element :param parent: The parent XML node :type parent: lxml.etree._Element """ if not node.get('schemaLocation'): raise NotImplementedError("schemaLocation is required") location = node.get('schemaLocation') if location in self._includes: return schema_node = load_external( location, self.schema._transport, base_url=self.document._base_url, strict=self.schema.strict) self._includes.add(location) # When the included document has no default namespace defined but the # parent document does have this then we should (atleast for #360) # transfer the default namespace to the included schema. We can't # update the nsmap of elements in lxml so we create a new schema with # the correct nsmap and move all the content there. if not schema_node.nsmap.get(None) and node.nsmap.get(None): nsmap = {None: node.nsmap[None]} nsmap.update(schema_node.nsmap) new = etree.Element(schema_node.tag, nsmap=nsmap) for child in schema_node: new.append(child) for key, value in schema_node.attrib.items(): new.set(key, value) schema_node = new # Use the element/attribute form defaults from the schema while # processing the nodes. element_form_default = self.document._element_form attribute_form_default = self.document._attribute_form base_url = self.document._base_url self.document._element_form = schema_node.get('elementFormDefault', 'unqualified') self.document._attribute_form = schema_node.get('attributeFormDefault', 'unqualified') self.document._base_url = absolute_location(location, self.document._base_url) # Iterate directly over the children. for child in schema_node: self.process(child, parent=schema_node) self.document._element_form = element_form_default self.document._attribute_form = attribute_form_default self.document._base_url = base_url
def visit_import(self, node, parent): """ Definition:: <import id = ID namespace = anyURI schemaLocation = anyURI {any attributes with non-schema Namespace}...> Content: (annotation?) </import> :param node: The XML node :type node: lxml.etree._Element :param parent: The parent XML node :type parent: lxml.etree._Element """ schema_node = None namespace = node.get('namespace') location = node.get('schemaLocation') if location: location = absolute_location(location, self.document._base_url) if not namespace and not self.document._target_namespace: raise XMLParseError( "The attribute 'namespace' must be existent if the " "importing schema has no target namespace.", filename=self._document.location, sourceline=node.sourceline) # Check if the schema is already imported before based on the # namespace. Schema's without namespace are registered as 'None' document = self.schema.documents.get_by_namespace_and_location(namespace, location) if document: logger.debug("Returning existing schema: %r", location) self.register_import(namespace, document) return document # Hardcode the mapping between the xml namespace and the xsd for now. # This seems to fix issues with exchange wsdl's, see #220 if not location and namespace == 'http://www.w3.org/XML/1998/namespace': location = 'https://www.w3.org/2001/xml.xsd' # Silently ignore import statements which we can't resolve via the # namespace and doesn't have a schemaLocation attribute. if not location: logger.debug( "Ignoring import statement for namespace %r " + "(missing schemaLocation)", namespace) return # Load the XML schema_node = load_external( location, self.schema._transport, strict=self.schema.strict) # Check if the xsd:import namespace matches the targetNamespace. If # the xsd:import statement didn't specify a namespace then make sure # that the targetNamespace wasn't declared by another schema yet. schema_tns = schema_node.get('targetNamespace') if namespace and schema_tns and namespace != schema_tns: raise XMLParseError(( "The namespace defined on the xsd:import doesn't match the " "imported targetNamespace located at %r " ) % (location), filename=self.document._location, sourceline=node.sourceline) schema = self.schema.create_new_document(schema_node, location) self.register_import(namespace, schema) return schema