Beispiel #1
0
def StripElements(node, stripElements, stripState=0):
    if node.nodeType == Node.DOCUMENT_NODE:
        for c in node.childNodes:
            StripElements(c, stripElements, stripState)
    elif node.nodeType == Node.ELEMENT_NODE:

        #See if we need to change the strip state
        if node.getAttributeNS(XML_NAMESPACE, 'space') == 'preserve':
            #Force the state to preserve
            stripState = 0
        elif node.getAttributeNS(XML_NAMESPACE, 'space'):
            #Force to strip
            stripState = 1
        else:
            #See if it is a perserve or strip element
            for (uri, local, strip) in stripElements:
                if (uri, local) in [(node.namespaceURI, node.localName),
                                    (EMPTY_NAMESPACE, '*'),
                                    (node.namespaceURI, '*')]:
                    stripState = strip
                    break

        for c in node.childNodes:
            StripElements(c, stripElements, stripState)
    elif node.nodeType == Node.TEXT_NODE:
        if stripState and IsXmlSpace(node.data):
            #Kill'em all
            node.parentNode.removeChild(node)
    def characters(self, data):
        state = self._state_stack[-1]
        # verify that the current element can have text children
        validation = state.validation
        token = ContentInfo.TEXT_NODE
        next = validation.get(token)
        if next is None and ContentInfo.ELSE in validation:
            next = validation[ContentInfo.ELSE].get(token)
        if next is None:
            # If the parent can have element children, but not text nodes,
            # ignore pure whitespace nodes. This clarification is from
            # XSLT 2.0 [3.4] Whitespace Stripping.
            # e.g. xsl:stylesheet, xsl:apply-templates, xsl:choose
            if not (ContentInfo.EMPTY not in validation and IsXmlSpace(data)):
                raise XsltParserException(Error.ILLEGAL_TEXT_CHILD_PARSE,
                                          self._locator,
                                          repr(Truncate(data, 10)),
                                          state.node.nodeName)
            #self._debug_validation(expandedName)
        else:
            # update validation
            state.validation = next

            node = StylesheetTree.XsltText(self._ownerDoc,
                                           self._locator.getSystemId(), data)
            state.node.appendChild(node)
        return
Beispiel #3
0
 def __completeTextNode(self):
     #FIXME: This does not allow multiple root nodes, which is required to be supported
     if self.__currText:
         if IsXmlSpace(self.__currText):
             self.__saxHandler.ignorableWhitespace(self.__currText)
         else:
             self.__saxHandler.characters(self.__currText)
         self.__currText = u''
     return
Beispiel #4
0
def _TryEntityAsDocumentEntity(entity):
    # If the entity only has 1 top-level element, strip top-level whitespace
    # only text nodes to match how a document entity would have been parsed.
    elements = 0
    for node in entity.childNodes:
        elements += int(node.nodeType == Node.ELEMENT_NODE)

    if elements == 1:
        # OK to strip WS-only text nodes
        nodes = [
            x for x in entity.childNodes
            if x.nodeType == Node.TEXT_NODE and IsXmlSpace(x.data)
        ]

        for node in nodes:
            entity.removeChild(node)
    return
Beispiel #5
0
 def _visit_text(self, context, node, preserveSpace):
     if preserveSpace or not IsXmlSpace(node.data):
         self.writers[-1].text(node.data)
     return
Beispiel #6
0
def NodeCompare(node1,
                node2,
                ignoreWhitespace=0,
                ignoreComments=0,
                ignoreNsDecls=0):
    """
    A function that compares two XML DOM nodes by traversing their
    attributes and descendants recursively until a mismatch is found.
    It has the side effect of reporting differences to stdout. Returns
    true if the nodes compare equal.

    ignoreWhitespace controls whether whitespace differences in text
    nodes are ignored.

    ignoreComments controls whether comment nodes are ignored.

    ignoreNsDecls controls whether namespace declarations are ignored.
    """
    if node1.nodeType != node2.nodeType:
        return __ReportError(node1, node2, 'nodeType')

    # -- Document Nodes --------------------------------------
    if node1.nodeType in (Node.DOCUMENT_NODE, Node.DOCUMENT_FRAGMENT_NODE):
        if ignoreComments:
            children1 = filter(lambda n: n.nodeType != Node.COMMENT_NODE,
                               node1.childNodes)
            children2 = filter(lambda n: n.nodeType != Node.COMMENT_NODE,
                               node2.childNodes)
        else:
            children1 = node1.childNodes
            children2 = node2.childNodes
        for child1, child2 in map(None, children1, children2):
            if not NodeCompare(child1, child2, ignoreWhitespace,
                               ignoreComments, ignoreNsDecls):
                return 0

    # -- Document Type Nodes ---------------------------------
    elif node1.nodeType == Node.DOCUMENT_TYPE_NODE:
        if node1.name != node2.name:
            return __ReportError(node1, node2, 'name')
        if node1.publicId != node2.publicId:
            return __ReportError(node1, node2, 'publicId')
        if node1.systemId != node2.systemId:
            return __ReportError(node1, node2, 'systemId')
        if node1.internalSubset != node2.internalSubset:
            return __ReportError(node1, node2, 'internalSubset')
        if len(node1.entities) != len(node2.entities):
            return __ReportError(node1, node2, 'entities')
        if len(node1.notations) != len(node2.notations):
            return __ReportError(node1, node2, 'notations')

    # -- Element Nodes ---------------------------------------
    elif node1.nodeType == Node.ELEMENT_NODE:
        if node1.localName != node2.localName:
            return __ReportError(node1, node2, 'localName')
        if node1.namespaceURI != node2.namespaceURI:
            return __ReportError(node1, node2, 'namespaceURI')

        # Compare attributes
        attrs1 = node1.attributes.values()
        attrs2 = node2.attributes.values()
        if ignoreNsDecls:
            # Remove XML Namespace declarations
            attrs1 = [a for a in attrs1 if a.namespaceURI != XMLNS_NAMESPACE]
            attrs2 = [a for a in attrs2 if a.namespaceURI != XMLNS_NAMESPACE]
        if len(attrs1) != len(attrs2):
            return __ReportError(node1, node2, 'attributes')
        # Sort the attributes by qualified name
        attrs1.sort(lambda a, b: cmp(a.name, b.name))
        attrs2.sort(lambda a, b: cmp(a.name, b.name))
        for attr1, attr2 in zip(attrs1, attrs2):
            if attr1.localName != attr2.localName:
                print node1.attributes.keys()
                print node2.attributes.keys()
                return __ReportError(attr1, attr2, 'localName')
            if attr1.namespaceURI != attr2.namespaceURI:
                return __ReportError(attr1, attr2, 'namespaceURI')

        # Compare children
        if ignoreComments:
            # Remove comment nodes
            children1 = [
                c for c in node1.childNodes if c.nodeType != Node.COMMENT_NODE
            ]
            children2 = [
                c for c in node2.childNodes if c.nodeType != Node.COMMENT_NODE
            ]
        else:
            children1 = node1.childNodes
            children2 = node2.childNodes
        if len(children1) != len(children2):
            return __ReportError(node1, node2, 'childNodes')
        for child1, child2 in zip(children1, children2):
            if not NodeCompare(child1, child2, ignoreWhitespace,
                               ignoreComments, ignoreNsDecls):
                return 0

    # -- Text Nodes ------------------------------------------
    elif node1.nodeType == Node.TEXT_NODE:
        text1 = node1.data
        text2 = node2.data
        if ignoreWhitespace:
            if IsXmlSpace(text1):
                text1 = None
            if IsXmlSpace(text2):
                text2 = None
        if cmp(text1, text2):
            return __ReportError(node1, node2, 'data')

    # -- Comment Nodes ---------------------------------------
    elif node1.nodeType == Node.COMMENT_NODE:
        if node1.data != node2.data:
            return __ReportError(node1, node2, 'data')

    # -- Processing Instruction Nodes ------------------------
    elif node1.nodeType == Node.PROCESSING_INSTRUCTION_NODE:
        if node1.target != node2.target:
            return __ReportError(node1, node2, 'target')
        if node1.data != node2.data:
            return __ReportError(node1, node2, 'data')

    # All tests pass, they are the same
    return 1
 def text(self, *args, **kw):
     self._stack.append(('text', args, kw))
     # Non-whitespace characters, cannot be HTML/XHTML
     if not IsXmlSpace(args[0]):
         self._finalize(_XML_METHOD)
     return