예제 #1
0
def getTreeBuilder(treeType, implementation=None, **kwargs):
    """Get a TreeBuilder class for various types of tree with built-in support
    
    treeType - the name of the tree type required (case-insensitive). Supported
               values are "simpletree", "dom", "etree" and "beautifulsoup"
               
               "simpletree" - a built-in DOM-ish tree type with support for some
                              more pythonic idioms.
                "dom" - A generic builder for DOM implementations, defaulting to
                        a xml.dom.minidom based implementation for the sake of
                        backwards compatibility (as releases up until 0.10 had a
                        builder called "dom" that was a minidom implemenation).
                "etree" - A generic builder for tree implementations exposing an
                          elementtree-like interface (known to work with
                          ElementTree, cElementTree and lxml.etree).
                "beautifulsoup" - Beautiful soup (if installed)
               
    implementation - (Currently applies to the "etree" and "dom" tree types). A
                      module implementing the tree type e.g.
                      xml.etree.ElementTree or lxml.etree."""
    
    treeType = treeType.lower()
    if treeType not in treeBuilderCache:
        if treeType == "dom":
            import dom
            # XXX: Keep backwards compatibility by using minidom if no implementation is given
            if implementation == None:
                from xml.dom import minidom
                implementation = minidom
            # XXX: NEVER cache here, caching is done in the dom submodule
            return dom.getDomModule(implementation, **kwargs).TreeBuilder
        elif treeType == "simpletree":
            import simpletree
            treeBuilderCache[treeType] = simpletree.TreeBuilder
        elif treeType == "beautifulsoup":
            import soup
            treeBuilderCache[treeType] = soup.TreeBuilder
        elif treeType == "lxml":
            import etree_lxml
            treeBuilderCache[treeType] = etree_lxml.TreeBuilder
        elif treeType == "etree":
            # Come up with a sane default
            if implementation == None:
                try:
                    import xml.etree.cElementTree as ET
                except ImportError:
                    try:
                        import xml.etree.ElementTree as ET
                    except ImportError:
                        try:
                            import cElementTree as ET
                        except ImportError:
                            import elementtree.ElementTree as ET
                implementation = ET
            import etree
            # NEVER cache here, caching is done in the etree submodule
            return etree.getETreeModule(implementation, **kwargs).TreeBuilder
        else:
            raise ValueError("""Unrecognised treebuilder "%s" """%treeType)
    return treeBuilderCache.get(treeType)
예제 #2
0
def getTreeBuilder(treeType, implementation=None, **kwargs):
    """Get a TreeBuilder class for various types of tree with built-in support
    
    treeType - the name of the tree type required (case-insensitive). Supported
               values are "simpletree", "dom", "etree" and "beautifulsoup"
               
               "simpletree" - a built-in DOM-ish tree type with support for some
                              more pythonic idioms.
                "dom" - The xml.dom.minidom DOM implementation
                "etree" - A generic builder for tree implementations exposing an
                          elementtree-like interface (known to work with
                          ElementTree, cElementTree and lxml.etree).
                "beautifulsoup" - Beautiful soup (if installed)
               
    implementation - (Currently applies to the "etree" tree type only). A module
                      implementing the tree type e.g. xml.etree.ElementTree or
                      lxml.etree."""
    
    treeType = treeType.lower()
    if treeType not in treeBuilderCache:
        if treeType in ("dom", "simpletree"):
            mod = __import__(treeType, globals())
            treeBuilderCache[treeType] = mod.TreeBuilder
        elif treeType == "beautifulsoup":
            import soup
            treeBuilderCache[treeType] = soup.TreeBuilder
        elif treeType == "etree":
            import etree
            # XXX: NEVER cache here, caching is done in the etree submodule
            return etree.getETreeModule(implementation, **kwargs).TreeBuilder
    return treeBuilderCache.get(treeType)
예제 #3
0
def getTreeBuilder(treeType, implementation=None, **kwargs):
    """Get a TreeBuilder class for various types of tree with built-in support
    
    treeType - the name of the tree type required (case-insensitive). Supported
               values are "simpletree", "dom", "etree" and "beautifulsoup"
               
               "simpletree" - a built-in DOM-ish tree type with support for some
                              more pythonic idioms.
                "dom" - A generic builder for DOM implementations, defaulting to
                        a xml.dom.minidom based implementation for the sake of
                        backwards compatibility (as releases up until 0.10 had a
                        builder called "dom" that was a minidom implemenation).
                "etree" - A generic builder for tree implementations exposing an
                          elementtree-like interface (known to work with
                          ElementTree, cElementTree and lxml.etree).
                "beautifulsoup" - Beautiful soup (if installed)
               
    implementation - (Currently applies to the "etree" and "dom" tree types). A
                      module implementing the tree type e.g.
                      xml.etree.ElementTree or lxml.etree."""

    treeType = treeType.lower()
    if treeType not in treeBuilderCache:
        if treeType == "dom":
            import dom
            # XXX: Keep backwards compatibility by using minidom if no implementation is given
            if implementation == None:
                from xml.dom import minidom
                implementation = minidom
            # XXX: NEVER cache here, caching is done in the dom submodule
            return dom.getDomModule(implementation, **kwargs).TreeBuilder
        elif treeType == "simpletree":
            import simpletree
            treeBuilderCache[treeType] = simpletree.TreeBuilder
        elif treeType == "beautifulsoup":
            import soup
            treeBuilderCache[treeType] = soup.TreeBuilder
        elif treeType == "lxml":
            import etree_lxml
            treeBuilderCache[treeType] = etree_lxml.TreeBuilder
        elif treeType == "etree":
            # Come up with a sane default
            if implementation == None:
                try:
                    import xml.etree.cElementTree as ET
                except ImportError:
                    try:
                        import xml.etree.ElementTree as ET
                    except ImportError:
                        try:
                            import cElementTree as ET
                        except ImportError:
                            import elementtree.ElementTree as ET
                implementation = ET
            import etree
            # NEVER cache here, caching is done in the etree submodule
            return etree.getETreeModule(implementation, **kwargs).TreeBuilder
        else:
            raise ValueError("""Unrecognised treebuilder "%s" """ % treeType)
    return treeBuilderCache.get(treeType)
예제 #4
0
def getTreeBuilder(treeType, implementation=None, **kwargs):
    """Get a TreeBuilder class for various types of tree with built-in support
    
    treeType - the name of the tree type required (case-insensitive). Supported
               values are "simpletree", "dom", "etree" and "beautifulsoup"
               
               "simpletree" - a built-in DOM-ish tree type with support for some
                              more pythonic idioms.
                "dom" - The xml.dom.minidom DOM implementation
                "etree" - A generic builder for tree implementations exposing an
                          elementtree-like interface (known to work with
                          ElementTree, cElementTree and lxml.etree).
                "beautifulsoup" - Beautiful soup (if installed)
               
    implementation - (Currently applies to the "etree" tree type only). A module
                      implementing the tree type e.g. xml.etree.ElementTree or
                      lxml.etree."""

    treeType = treeType.lower()
    if treeType not in treeBuilderCache:
        if treeType in ("dom", "simpletree"):
            mod = __import__(treeType, globals())
            treeBuilderCache[treeType] = mod.TreeBuilder
        elif treeType == "beautifulsoup":
            import soup
            treeBuilderCache[treeType] = soup.TreeBuilder
        elif treeType == "etree":
            import etree
            # XXX: NEVER cache here, caching is done in the etree submodule
            return etree.getETreeModule(implementation, **kwargs).TreeBuilder
    return treeBuilderCache.get(treeType)
예제 #5
0
    def __init__(self, namespaceHTMLElements, fullTree = False):
        builder = etree_builders.getETreeModule(etree, fullTree=fullTree)
        filter = self.filter = ihatexml.InfosetFilter()
        self.namespaceHTMLElements = namespaceHTMLElements

        class Attributes(dict):
            def __init__(self, element, value={}):
                self._element = element
                dict.__init__(self, value)
                for key, value in self.iteritems():
                    if isinstance(key, tuple):
                        name = "{%s}%s"%(key[2], filter.coerceAttribute(key[1]))
                    else:
                        name = filter.coerceAttribute(key)
                    self._element._element.attrib[name] = value

            def __setitem__(self, key, value):
                dict.__setitem__(self, key, value)
                if isinstance(key, tuple):
                    name = "{%s}%s"%(key[2], filter.coerceAttribute(key[1]))
                else:
                    name = filter.coerceAttribute(key)
                self._element._element.attrib[name] = value

        class Element(builder.Element):
            def __init__(self, name, namespace):
                name = filter.coerceElement(name)
                builder.Element.__init__(self, name, namespace=namespace)
                self._attributes = Attributes(self)

            def _setName(self, name):
                self._name = filter.coerceElement(name)
                self._element.tag = self._getETreeTag(
                    self._name, self._namespace)
        
            def _getName(self):
                return filter.fromXmlName(self._name)
        
            name = property(_getName, _setName)

            def _getAttributes(self):
                return self._attributes

            def _setAttributes(self, attributes):
                self._attributes = Attributes(self, attributes)
    
            attributes = property(_getAttributes, _setAttributes)

            def insertText(self, data, insertBefore=None):
                data = filter.coerceCharacters(data)
                builder.Element.insertText(self, data, insertBefore)

            def appendChild(self, child):
                builder.Element.appendChild(self, child)
                

        class Comment(builder.Comment):
            def __init__(self, data):
                data = filter.coerceComment(data)
                builder.Comment.__init__(self, data)

            def _setData(self, data):
                data = filter.coerceComment(data)
                self._element.text = data

            def _getData(self):
                return self._element.text

            data = property(_getData, _setData)

        self.elementClass = Element
        self.commentClass = builder.Comment
        #self.fragmentClass = builder.DocumentFragment
        _base.TreeBuilder.__init__(self, namespaceHTMLElements)
예제 #6
0
    def __init__(self, namespaceHTMLElements, fullTree=False):
        builder = etree_builders.getETreeModule(etree, fullTree=fullTree)
        filter = self.filter = ihatexml.InfosetFilter()
        self.namespaceHTMLElements = namespaceHTMLElements

        class Attributes(dict):
            def __init__(self, element, value={}):
                self._element = element
                dict.__init__(self, value)
                for key, value in self.iteritems():
                    if isinstance(key, tuple):
                        name = "{%s}%s" % (key[2],
                                           filter.coerceAttribute(key[1]))
                    else:
                        name = filter.coerceAttribute(key)
                    self._element._element.attrib[name] = value

            def __setitem__(self, key, value):
                dict.__setitem__(self, key, value)
                if isinstance(key, tuple):
                    name = "{%s}%s" % (key[2], filter.coerceAttribute(key[1]))
                else:
                    name = filter.coerceAttribute(key)
                self._element._element.attrib[name] = value

        class Element(builder.Element):
            def __init__(self, name, namespace):
                name = filter.coerceElement(name)
                builder.Element.__init__(self, name, namespace=namespace)
                self._attributes = Attributes(self)

            def _setName(self, name):
                self._name = filter.coerceElement(name)
                self._element.tag = self._getETreeTag(self._name,
                                                      self._namespace)

            def _getName(self):
                return filter.fromXmlName(self._name)

            name = property(_getName, _setName)

            def _getAttributes(self):
                return self._attributes

            def _setAttributes(self, attributes):
                self._attributes = Attributes(self, attributes)

            attributes = property(_getAttributes, _setAttributes)

            def insertText(self, data, insertBefore=None):
                data = filter.coerceCharacters(data)
                builder.Element.insertText(self, data, insertBefore)

            def appendChild(self, child):
                builder.Element.appendChild(self, child)

        class Comment(builder.Comment):
            def __init__(self, data):
                data = filter.coerceComment(data)
                builder.Comment.__init__(self, data)

            def _setData(self, data):
                data = filter.coerceComment(data)
                self._element.text = data

            def _getData(self):
                return self._element.text

            data = property(_getData, _setData)

        self.elementClass = Element
        self.commentClass = builder.Comment
        #self.fragmentClass = builder.DocumentFragment
        _base.TreeBuilder.__init__(self, namespaceHTMLElements)
예제 #7
0
 def __init__(self, fullTree = False):
     builder = etree_builders.getETreeModule(etree, fullTree=fullTree)
     self.elementClass = builder.Element
     self.commentClass = builder.Comment
     #self.fragmentClass = builder.DocumentFragment
     _base.TreeBuilder.__init__(self)
예제 #8
0
    def __init__(self, fullTree = False):
        builder = etree_builders.getETreeModule(etree, fullTree=fullTree)
        filter = self.filter = ihatexml.InfosetFilter()

        class Attributes(dict):
            def __init__(self, element, value={}):
                self._element = element
                dict.__init__(self, value)
                for k, v in self.iteritems():
                    self._element._element.attrib[filter.coerceAttribute(k)] = v

            def __setitem__(self, key, value):
                dict.__setitem__(self, key, value)
                self._element._element.attrib[filter.coerceAttribute(key)] = value

        class Element(builder.Element):
            def __init__(self, name):
                self._name = name
                builder.Element.__init__(self, filter.coerceElement(name))
                self._attributes = Attributes(self)

            def _setName(self, name):
                self._name = name
                self._element.tag = filter.coerceElement(name)

            def _getName(self):
                return self._name

            name = property(_getName, _setName)

            def _getAttributes(self):
                return self._attributes

            def _setAttributes(self, attributes):
                self._attributes = Attributes(self, attributes)
    
            attributes = property(_getAttributes, _setAttributes)

            def insertText(self, data, insertBefore=None):
                data = filter.coerceCharacters(data)
                builder.Element.insertText(self, data, insertBefore)

            def appendChild(self, child):
                builder.Element.appendChild(self, child)
                

        class Comment(builder.Comment):
            def __init__(self, data):
                data = filter.coerceComment(data)
                builder.Comment.__init__(self, data)

            def _setData(self, data):
                data = filter.coerceComment(data)
                self._element.text = data

            def _getData(self):
                return self._element.text

            data = property(_getData, _setData)

        self.elementClass = Element
        self.commentClass = builder.Comment
        #self.fragmentClass = builder.DocumentFragment
        _base.TreeBuilder.__init__(self)
예제 #9
0
 def __init__(self, fullTree=False):
     builder = etree_builders.getETreeModule(etree, fullTree=fullTree)
     self.elementClass = builder.Element
     self.commentClass = builder.Comment
     #self.fragmentClass = builder.DocumentFragment
     _base.TreeBuilder.__init__(self)
예제 #10
0
    def __init__(self, fullTree=False):
        builder = etree_builders.getETreeModule(etree, fullTree=fullTree)
        filter = self.filter = ihatexml.InfosetFilter()

        class Attributes(dict):
            def __init__(self, element, value={}):
                self._element = element
                dict.__init__(self, value)
                for k, v in self.iteritems():
                    self._element._element.attrib[filter.coerceAttribute(
                        k)] = v

            def __setitem__(self, key, value):
                dict.__setitem__(self, key, value)
                self._element._element.attrib[filter.coerceAttribute(
                    key)] = value

        class Element(builder.Element):
            def __init__(self, name):
                self._name = name
                builder.Element.__init__(self, filter.coerceElement(name))
                self._attributes = Attributes(self)

            def _setName(self, name):
                self._name = name
                self._element.tag = filter.coerceElement(name)

            def _getName(self):
                return self._name

            name = property(_getName, _setName)

            def _getAttributes(self):
                return self._attributes

            def _setAttributes(self, attributes):
                self._attributes = Attributes(self, attributes)

            attributes = property(_getAttributes, _setAttributes)

            def insertText(self, data, insertBefore=None):
                data = filter.coerceCharacters(data)
                builder.Element.insertText(self, data, insertBefore)

            def appendChild(self, child):
                builder.Element.appendChild(self, child)

        class Comment(builder.Comment):
            def __init__(self, data):
                data = filter.coerceComment(data)
                builder.Comment.__init__(self, data)

            def _setData(self, data):
                data = filter.coerceComment(data)
                self._element.text = data

            def _getData(self):
                return self._element.text

            data = property(_getData, _setData)

        self.elementClass = Element
        self.commentClass = builder.Comment
        #self.fragmentClass = builder.DocumentFragment
        _base.TreeBuilder.__init__(self)