Example #1
0
 def __init__(self, tree):
     if hasattr(tree, "getroot"):
         tree = Root(tree)
     elif isinstance(tree, list):
         tree = FragmentRoot(tree)
     _base.NonRecursiveTreeWalker.__init__(self, tree)
     self.filter = ihatexml.InfosetFilter()
Example #2
0
def testSerializer(element):
    rv = []
    finalText = None
    filter = ihatexml.InfosetFilter()

    def serializeElement(element, indent=0):
        if not hasattr(element, "tag"):
            if hasattr(element, "getroot"):
                #Full tree case
                rv.append("#document")
                if element.docinfo.internalDTD:
                    if not (element.docinfo.public_id
                            or element.docinfo.system_url):
                        dtd_str = "<!DOCTYPE %s>" % element.docinfo.root_name
                    else:
                        dtd_str = """<!DOCTYPE %s "%s" "%s">""" % (
                            element.docinfo.root_name,
                            element.docinfo.public_id,
                            element.docinfo.system_url)
                    rv.append("|%s%s" % (' ' * (indent + 2), dtd_str))
                next_element = element.getroot()
                while next_element.getprevious() is not None:
                    next_element = next_element.getprevious()
                while next_element is not None:
                    serializeElement(next_element, indent + 2)
                    next_element = next_element.getnext()
            elif isinstance(element, basestring):
                #Text in a fragment
                rv.append("|%s\"%s\"" % (' ' * indent, element))
            else:
                #Fragment case
                rv.append("#document-fragment")
                for next_element in element:
                    serializeElement(next_element, indent + 2)
        elif type(element.tag) == type(etree.Comment):
            rv.append("|%s<!-- %s -->" % (' ' * indent, element.text))
        else:
            rv.append("|%s<%s>" %
                      (' ' * indent, filter.fromXmlName(element.tag)))
            if hasattr(element, "attrib"):
                for name, value in element.attrib.iteritems():
                    rv.append('|%s%s="%s"' %
                              (' ' *
                               (indent + 2), filter.fromXmlName(name), value))
            if element.text:
                rv.append("|%s\"%s\"" % (' ' * (indent + 2), element.text))
            indent += 2
            for child in element.getchildren():
                serializeElement(child, indent)
        if hasattr(element, "tail") and element.tail:
            rv.append("|%s\"%s\"" % (' ' * (indent - 2), element.tail))

    serializeElement(element, 0)

    if finalText is not None:
        rv.append("|%s\"%s\"" % (' ' * 2, finalText))

    return "\n".join(rv)
Example #3
0
    def tostring(element):
        """Serialize an element and its child nodes to a string"""
        rv = []
        finalText = None
        filter = ihatexml.InfosetFilter()

        def serializeElement(element):
            if type(element) == type(ElementTree.ElementTree):
                element = element.getroot()

            if element.tag == "<!DOCTYPE>":
                if element.get("publicId") or element.get("systemId"):
                    publicId = element.get("publicId") or ""
                    systemId = element.get("systemId") or ""
                    rv.append("""<!DOCTYPE %s PUBLIC "%s" "%s">""" %
                              (element.text, publicId, systemId))
                else:
                    rv.append("<!DOCTYPE %s>" % (element.text, ))
            elif element.tag == "<DOCUMENT_ROOT>":
                if element.text:
                    rv.append(element.text)
                if element.tail:
                    finalText = element.tail

                for child in element.getchildren():
                    serializeElement(child)

            elif type(element.tag) == type(ElementTree.Comment):
                rv.append("<!--%s-->" % (element.text, ))
            else:
                #This is assumed to be an ordinary element
                if not element.attrib:
                    rv.append("<%s>" % (filter.fromXmlName(element.tag), ))
                else:
                    attr = " ".join([
                        "%s=\"%s\"" % (filter.fromXmlName(name), value)
                        for name, value in element.attrib.iteritems()
                    ])
                    rv.append("<%s %s>" % (element.tag, attr))
                if element.text:
                    rv.append(element.text)

                for child in element.getchildren():
                    serializeElement(child)

                rv.append("</%s>" % (element.tag, ))

            if element.tail:
                rv.append(element.tail)

        serializeElement(element)

        if finalText is not None:
            rv.append("%s\"" % (' ' * 2, finalText))

        return "".join(rv)
Example #4
0
def testSerializer(element):
    rv = []
    finalText = None
    infosetFilter = ihatexml.InfosetFilter()

    def serializeElement(element, indent=0):
        if not hasattr(element, "tag"):
            if hasattr(element, "getroot"):
                #Full tree case
                rv.append("#document")
                if element.docinfo.internalDTD:
                    if not (element.docinfo.public_id
                            or element.docinfo.system_url):
                        dtd_str = "<!DOCTYPE %s>" % element.docinfo.root_name
                    else:
                        dtd_str = """<!DOCTYPE %s "%s" "%s">""" % (
                            element.docinfo.root_name,
                            element.docinfo.public_id,
                            element.docinfo.system_url)
                    rv.append("|%s%s" % (' ' * (indent + 2), dtd_str))
                next_element = element.getroot()
                while next_element.getprevious() is not None:
                    next_element = next_element.getprevious()
                while next_element is not None:
                    serializeElement(next_element, indent + 2)
                    next_element = next_element.getnext()
            elif isinstance(element, str) or isinstance(element, bytes):
                #Text in a fragment
                assert isinstance(element, str) or sys.version_info.major == 2
                rv.append("|%s\"%s\"" % (' ' * indent, element))
            else:
                #Fragment case
                rv.append("#document-fragment")
                for next_element in element:
                    serializeElement(next_element, indent + 2)
        elif type(element.tag) == type(etree.Comment):
            rv.append("|%s<!-- %s -->" % (' ' * indent, element.text))
            if hasattr(element, "tail") and element.tail:
                rv.append("|%s\"%s\"" % (' ' * indent, element.tail))
        else:
            assert isinstance(element, etree._Element)
            nsmatch = etree_builders.tag_regexp.match(element.tag)
            if nsmatch is not None:
                ns = nsmatch.group(1)
                tag = nsmatch.group(2)
                prefix = constants.prefixes[ns]
                rv.append(
                    "|%s<%s %s>" %
                    (' ' * indent, prefix, infosetFilter.fromXmlName(tag)))
            else:
                rv.append(
                    "|%s<%s>" %
                    (' ' * indent, infosetFilter.fromXmlName(element.tag)))

            if hasattr(element, "attrib"):
                attributes = []
                for name, value in element.attrib.items():
                    nsmatch = tag_regexp.match(name)
                    if nsmatch is not None:
                        ns, name = nsmatch.groups()
                        name = infosetFilter.fromXmlName(name)
                        prefix = constants.prefixes[ns]
                        attr_string = "%s %s" % (prefix, name)
                    else:
                        attr_string = infosetFilter.fromXmlName(name)
                    attributes.append((attr_string, value))

                for name, value in sorted(attributes):
                    rv.append('|%s%s="%s"' % (' ' * (indent + 2), name, value))

            if element.text:
                rv.append("|%s\"%s\"" % (' ' * (indent + 2), element.text))
            indent += 2
            for child in element.getchildren():
                serializeElement(child, indent)
            if hasattr(element, "tail") and element.tail:
                rv.append("|%s\"%s\"" % (' ' * (indent - 2), element.tail))

    serializeElement(element, 0)

    if finalText is not None:
        rv.append("|%s\"%s\"" % (' ' * 2, finalText))

    return "\n".join(rv)
Example #5
0
    def __init__(self, namespaceHTMLElements, fullTree=False):
        builder = etree_builders.getETreeModule(etree, fullTree=fullTree)
        infosetFilter = self.infosetFilter = ihatexml.InfosetFilter()
        self.namespaceHTMLElements = namespaceHTMLElements

        class Attributes(dict):
            def __init__(self, element, value={}):
                self._element = element
                dict.__init__(self, value)
                for key, value in self.items():
                    if isinstance(key, tuple):
                        name = "{%s}%s" % (
                            key[2], infosetFilter.coerceAttribute(key[1]))
                    else:
                        name = infosetFilter.coerceAttribute(key)
                    self._element._element.attrib[name] = value

            def __setitem__(self, key, value):
                dict.__setitem__(self, key, value)
                if isinstance(key, tuple):
                    name = "{%s}%s" % (key[2],
                                       infosetFilter.coerceAttribute(key[1]))
                else:
                    name = infosetFilter.coerceAttribute(key)
                self._element._element.attrib[name] = value

        class Element(builder.Element):
            def __init__(self, name, namespace):
                name = infosetFilter.coerceElement(name)
                builder.Element.__init__(self, name, namespace=namespace)
                self._attributes = Attributes(self)

            def _setName(self, name):
                self._name = infosetFilter.coerceElement(name)
                self._element.tag = self._getETreeTag(self._name,
                                                      self._namespace)

            def _getName(self):
                return infosetFilter.fromXmlName(self._name)

            name = property(_getName, _setName)

            def _getAttributes(self):
                return self._attributes

            def _setAttributes(self, attributes):
                self._attributes = Attributes(self, attributes)

            attributes = property(_getAttributes, _setAttributes)

            def insertText(self, data, insertBefore=None):
                data = infosetFilter.coerceCharacters(data)
                builder.Element.insertText(self, data, insertBefore)

            def appendChild(self, child):
                builder.Element.appendChild(self, child)

        class Comment(builder.Comment):
            def __init__(self, data):
                data = infosetFilter.coerceComment(data)
                builder.Comment.__init__(self, data)

            def _setData(self, data):
                data = infosetFilter.coerceComment(data)
                self._element.text = data

            def _getData(self):
                return self._element.text

            data = property(_getData, _setData)

        self.elementClass = Element
        self.commentClass = builder.Comment
        #self.fragmentClass = builder.DocumentFragment
        _base.TreeBuilder.__init__(self, namespaceHTMLElements)
Example #6
0
def getDomBuilder(DomImplementation):
    Dom = DomImplementation
    infoset_filter = ihatexml.InfosetFilter()

    class AttrList:
        def __init__(self, element):
            self.element = element

        def __iter__(self):
            return self.element.attributes.items().__iter__()

        def __setitem__(self, name, value):
            self.element.setAttribute(infoset_filter.coerceAttribute(name),
                                      infoset_filter.coerceCharacters(value))

        def items(self):
            return [(infoset_filter.fromXmlName(item[0]), item[1])
                    for item in self.element.attributes.items()]

        def keys(self):
            return [
                infoset_filter.fromXmlName(item)
                for item in self.element.attributes.keys()
            ]

        def __getitem__(self, name):
            name = infoset_filter.toXmlName(name)
            return self.element.getAttribute(name)

        def __contains__(self, name):
            if isinstance(name, tuple):
                raise NotImplementedError
            else:
                return self.element.hasAttribute(
                    infoset_filter.toXmlName(name))

    class NodeBuilder(_base.Node):
        def __init__(self, element):
            _base.Node.__init__(self, element.localName)
            self.element = element

        namespace = property(lambda self: hasattr(self.element, "namespaceURI")
                             and self.element.namespaceURI or None)

        def appendChild(self, node):
            node.parent = self
            self.element.appendChild(node.element)

        def insertText(self, data, insertBefore=None):
            data = infoset_filter.coerceCharacters(data)
            text = self.element.ownerDocument.createTextNode(data)
            if insertBefore:
                self.element.insertBefore(text, insertBefore.element)
            else:
                self.element.appendChild(text)

        def insertBefore(self, node, refNode):
            self.element.insertBefore(node.element, refNode.element)
            node.parent = self

        def removeChild(self, node):
            if node.element.parentNode == self.element:
                self.element.removeChild(node.element)
            node.parent = None

        def reparentChildren(self, newParent):
            while self.element.hasChildNodes():
                child = self.element.firstChild
                self.element.removeChild(child)
                newParent.element.appendChild(child)
            self.childNodes = []

        def getAttributes(self):
            return AttrList(self.element)

        def setAttributes(self, attributes):
            if attributes:
                for name, value in attributes.items():
                    if isinstance(name, tuple):
                        if name[0] is not None:
                            qualifiedName = (
                                name[0] + ":" +
                                infoset_filter.coerceAttribute(name[1]))
                        else:
                            qualifiedName = infoset_filter.coerceAttribute(
                                name[1])
                        self.element.setAttributeNS(name[2], qualifiedName,
                                                    value)
                    else:
                        self.element.setAttribute(
                            infoset_filter.coerceAttribute(name), value)

        attributes = property(getAttributes, setAttributes)

        def cloneNode(self):
            return NodeBuilder(self.element.cloneNode(False))

        def hasContent(self):
            return self.element.hasChildNodes()

        def getNameTuple(self):
            if self.namespace == None:
                return namespaces["html"], self.name
            else:
                return self.namespace, self.name

        nameTuple = property(getNameTuple)

    class TreeBuilder(_base.TreeBuilder):
        def documentClass(self):
            self.dom = Dom.getDOMImplementation().createDocument(
                None, None, None)
            return self

        def insertDoctype(self, token):
            name = token["name"]
            publicId = token["publicId"]
            systemId = token["systemId"]

            domimpl = Dom.getDOMImplementation()
            doctype = domimpl.createDocumentType(name, publicId, systemId)
            self.document.appendChild(NodeBuilder(doctype))
            if Dom == minidom:
                doctype.ownerDocument = self.dom

        def elementClass(self, name, namespace=None):
            if namespace is None and self.defaultNamespace is None:
                node = self.dom.createElement(name)
            else:
                node = self.dom.createElementNS(namespace, name)

            return NodeBuilder(node)

        def commentClass(self, data):
            return NodeBuilder(self.dom.createComment(data))

        def fragmentClass(self):
            return NodeBuilder(self.dom.createDocumentFragment())

        def appendChild(self, node):
            self.dom.appendChild(node.element)

        def testSerializer(self, element):
            return testSerializer(element)

        def getDocument(self):
            return self.dom

        def getFragment(self):
            return _base.TreeBuilder.getFragment(self).element

        def insertText(self, data, parent=None):
            data = infoset_filter.coerceCharacters(data)
            if parent <> self:
                _base.TreeBuilder.insertText(self, data, parent)
            else:
                # HACK: allow text nodes as children of the document node
                if hasattr(self.dom, '_child_node_types'):
                    if not Node.TEXT_NODE in self.dom._child_node_types:
                        self.dom._child_node_types = list(
                            self.dom._child_node_types)
                        self.dom._child_node_types.append(Node.TEXT_NODE)
                self.dom.appendChild(self.dom.createTextNode(data))

        name = None

    def testSerializer(element):
        element.normalize()
        rv = []

        def serializeElement(element, indent=0):
            if element.nodeType == Node.DOCUMENT_TYPE_NODE:
                if element.name:
                    if element.publicId or element.systemId:
                        publicId = element.publicId or ""
                        systemId = element.systemId or ""
                        rv.append(
                            """|%s<!DOCTYPE %s "%s" "%s">""" %
                            (' ' * indent, element.name, publicId, systemId))
                    else:
                        rv.append("|%s<!DOCTYPE %s>" %
                                  (' ' * indent, element.name))
                else:
                    rv.append("|%s<!DOCTYPE >" % (' ' * indent, ))
            elif element.nodeType == Node.DOCUMENT_NODE:
                rv.append("#document")
            elif element.nodeType == Node.DOCUMENT_FRAGMENT_NODE:
                rv.append("#document-fragment")
            elif element.nodeType == Node.COMMENT_NODE:
                rv.append("|%s<!-- %s -->" % (' ' * indent, element.nodeValue))
            elif element.nodeType == Node.TEXT_NODE:
                rv.append("|%s\"%s\"" % (' ' * indent, element.nodeValue))
            else:
                if (hasattr(element, "namespaceURI") and element.namespaceURI
                        not in (None, constants.namespaces["html"])):
                    name = "%s %s" % (constants.prefixes[element.namespaceURI],
                                      element.nodeName)
                else:
                    name = element.nodeName
                rv.append("|%s<%s>" % (' ' * indent, name))
                if element.hasAttributes():
                    i = 0
                    attr = element.attributes.item(i)
                    while attr:
                        name = infoset_filter.fromXmlName(attr.localName)
                        value = attr.value
                        ns = attr.namespaceURI
                        if ns:
                            name = "%s %s" % (constants.prefixes[ns], name)
                        i += 1
                        attr = element.attributes.item(i)

                        rv.append('|%s%s="%s"' % (' ' *
                                                  (indent + 2), name, value))
            indent += 2
            for child in element.childNodes:
                serializeElement(child, indent)

        serializeElement(element, 0)

        return "\n".join(rv)

    def dom2sax(node, handler, nsmap={'xml': XML_NAMESPACE}):
        if node.nodeType == Node.ELEMENT_NODE:
            if not nsmap:
                handler.startElement(node.nodeName, node.attributes)
                for child in node.childNodes:
                    dom2sax(child, handler, nsmap)
                handler.endElement(node.nodeName)
            else:
                attributes = dict(node.attributes.itemsNS())

                # gather namespace declarations
                prefixes = []
                for attrname in node.attributes.keys():
                    attr = node.getAttributeNode(attrname)
                    if (attr.namespaceURI == XMLNS_NAMESPACE
                            or (attr.namespaceURI == None
                                and attr.nodeName.startswith('xmlns'))):
                        prefix = (attr.localName != 'xmlns' and attr.localName
                                  or None)
                        handler.startPrefixMapping(prefix, attr.nodeValue)
                        prefixes.append(prefix)
                        nsmap = nsmap.copy()
                        nsmap[prefix] = attr.nodeValue
                        del attributes[(attr.namespaceURI, attr.localName)]

                # apply namespace declarations
                for attrname in node.attributes.keys():
                    attr = node.getAttributeNode(attrname)
                    if attr.namespaceURI == None and ':' in attr.nodeName:
                        prefix = attr.nodeName.split(':')[0]
                        if nsmap.has_key(prefix):
                            del attributes[(attr.namespaceURI, attr.localName)]
                            attributes[(nsmap[prefix],
                                        attr.localName)] = attr.nodeValue

                # SAX events
                ns = node.namespaceURI or nsmap.get(None, None)
                handler.startElementNS((ns, node.nodeName), node.nodeName,
                                       attributes)
                for child in node.childNodes:
                    dom2sax(child, handler, nsmap)
                handler.endElementNS((ns, node.nodeName), node.nodeName)
                for prefix in prefixes:
                    handler.endPrefixMapping(prefix)

        elif node.nodeType in [Node.TEXT_NODE, Node.CDATA_SECTION_NODE]:
            handler.characters(node.nodeValue)

        elif node.nodeType == Node.DOCUMENT_NODE:
            handler.startDocument()
            for child in node.childNodes:
                dom2sax(child, handler, nsmap)
            handler.endDocument()

        elif node.nodeType == Node.DOCUMENT_FRAGMENT_NODE:
            for child in node.childNodes:
                dom2sax(child, handler, nsmap)

        else:
            # ATTRIBUTE_NODE
            # ENTITY_NODE
            # PROCESSING_INSTRUCTION_NODE
            # COMMENT_NODE
            # DOCUMENT_TYPE_NODE
            # NOTATION_NODE
            pass

    return locals()
Example #7
0
    def __init__(self, fullTree=False):
        builder = etree_builders.getETreeModule(etree, fullTree=fullTree)
        filter = self.filter = ihatexml.InfosetFilter()

        class Attributes(dict):
            def __init__(self, element, value={}):
                self._element = element
                dict.__init__(self, value)
                for k, v in self.iteritems():
                    self._element._element.attrib[filter.coerceAttribute(
                        k)] = v

            def __setitem__(self, key, value):
                dict.__setitem__(self, key, value)
                self._element._element.attrib[filter.coerceAttribute(
                    key)] = value

        class Element(builder.Element):
            def __init__(self, name):
                self._name = name
                builder.Element.__init__(self, filter.coerceElement(name))
                self._attributes = Attributes(self)

            def _setName(self, name):
                self._name = name
                self._element.tag = filter.coerceElement(name)

            def _getName(self):
                return self._name

            name = property(_getName, _setName)

            def _getAttributes(self):
                return self._attributes

            def _setAttributes(self, attributes):
                self._attributes = Attributes(self, attributes)

            attributes = property(_getAttributes, _setAttributes)

            def insertText(self, data, insertBefore=None):
                data = filter.coerceCharacters(data)
                builder.Element.insertText(self, data, insertBefore)

            def appendChild(self, child):
                builder.Element.appendChild(self, child)

        class Comment(builder.Comment):
            def __init__(self, data):
                data = filter.coerceComment(data)
                builder.Comment.__init__(self, data)

            def _setData(self, data):
                data = filter.coerceComment(data)
                self._element.text = data

            def _getData(self):
                return self._element.text

            data = property(_getData, _setData)

        self.elementClass = Element
        self.commentClass = builder.Comment
        #self.fragmentClass = builder.DocumentFragment
        _base.TreeBuilder.__init__(self)