def substitute_namespaces(element, element_dict, attribute_dict=None, deep=True): # NB: You probably need to call normalize_namespaces after using this. assert element.nodeType == element.ELEMENT_NODE # The default attribute_dict is the same as the element_dict, but with the # entry for EMPTY_NAMESPACE removed. if attribute_dict is None: attribute_dict = element_dict.copy() if EMPTY_NAMESPACE in attribute_dict: del attribute_dict[EMPTY_NAMESPACE] # Replace attribute namespaces for attrNode in list(getAttributeNodes(element)): if attrNode.namespaceURI in attribute_dict: attrNode = change_attribute_namespace(attrNode, attribute_dict[attrNode.namespaceURI]) # Set element namespace if element.namespaceURI in element_dict: element.namespaceURI = element_dict[element.namespaceURI] # Iterate over child elements if deep: for node in element.childNodes: if node.nodeType != node.ELEMENT_NODE: continue substitute_namespaces(node, element_dict, attribute_dict, deep)
def normalize_namespaces(element, strip_dups=False, parent_prefixes=None): assert element.nodeType == element.ELEMENT_NODE if parent_prefixes is None: # Walk up the document tree to find the list of elements that might # declare namespaces. top_element = element.ownerDocument.documentElement ancestry = [] e = element while not top_element.isSameNode(e): e = e.parentNode if e is None: raise ValueError("element %r not connected to document" % (element,)) ancestry.append(e) # Walk down the tree to determine the current scope ancestry.reverse() parent_prefixes = NamespaceScope() for e in ancestry: parent_prefixes.update(get_namespace_declarations(e)) # Build list of local namespace declarations new_prefixes = get_namespace_declarations(element) # Merge the parent and new prefixes current_prefixes = parent_prefixes.copy() if strip_dups: duplicate_prefixes = current_prefixes.update(new_prefixes) for prefix in duplicate_prefixes: del new_prefixes[prefix] else: current_prefixes.update(new_prefixes) # Fix the element's namespace declarations if element.prefix in current_prefixes and current_prefixes[element.prefix] == element.namespaceURI: # Elements prefix and namespaceURI are in-scope. Do nothing pass elif element.namespaceURI == EMPTY_NAMESPACE and element.prefix is not None: # You can't have a non-empty prefix with an empty namespace with # XML Namespaces 1.0 raise ValueError("Cannot undeclare non-default namespace prefix") else: # Create a local namespace declaration attribute for this # namespace. Don't care if a conflicting prefix existed before. new_prefixes[element.prefix] = element.namespaceURI current_prefixes[element.prefix] = element.namespaceURI # Fix the attributes' namespace declarations for attrNode in list(getAttributeNodes(element)): # Skip xmlns attributes if attrNode.namespaceURI == XMLNS_NAMESPACE: continue # Skip attributes with no namespace URI if attrNode.namespaceURI == EMPTY_NAMESPACE: assert hasattr(attrNode, 'localName') # Require at least DOM level 2 for now continue # Do nothing, because attributes don't have namespaces by default. # Skip attributes whose prefixes and namespace URIs match the current in-scope prefixes. if attrNode.prefix and attrNode.prefix in current_prefixes and current_prefixes[attrNode.prefix] == attrNode.namespaceURI: # Attribute and prefix are matched. Do nothing. continue # If the attribute's namespace has a declared prefix in the current # scope, change the attribute's prefix to that prefix. If there is # more than one such prefix, pick one that has the "most local" binding. if current_prefixes.has_namespace(attrNode.namespaceURI): prefix = current_prefix.prefix_from_namespace(attrNode.namespaceURI) change_attribute_prefix(attrNode, prefix) continue # The attribute's namespace has no associated prefix yet. We need to # declare one. If we can use the existing prefix (i.e. it's not # already defined), do so. if attrNode.prefix not in current_prefixes: new_prefixes[attrNode.prefix] = attrNode.namespaceURI current_prefixes[attrNode.prefix] = attrNode.namespaceURI continue # The attribute's namespace has no associated prefix, and the # attribute's current prefix is already taken. Generate one. j = 1 while True: generated_prefix = "NS%d" % (j,) if generated_prefix not in current_prefixes: break j += 1 # Declare the new prefix new_prefixes[generated_prefix] = attrNode.namespaceURI current_prefixes[generated_prefix] = attrNode.namespaceURI # Change attribute's prefix change_attribute_prefix(attrNode, generated_prefix) # Update the namespace declarations on the element. replace_namespace_declarations(element, new_prefixes) # HACK - Remove xmlns='' attribute if it doesn't need to be there. if (new_prefixes.get(None, object()) == EMPTY_NAMESPACE and parent_prefixes.get(None, object()) == EMPTY_NAMESPACE and element.hasAttributeNS(XMLNS_NAMESPACE, 'xmlns') and not element.getAttributeNS(XMLNS_NAMESPACE, 'xmlns')): element.removeAttributeNS(XMLNS_NAMESPACE, 'xmlns') # Recurse over child elements for e in element.childNodes: if e.nodeType == element.ELEMENT_NODE: normalize_namespaces(e, strip_dups=strip_dups, parent_prefixes=current_prefixes)