Esempio n. 1
0
    def decode(self, indent_level=0, **kwargs):
        attrs = []
        if self.attrs:
            for key, val in sorted(self.attrs.items()):
                if val is None:
                    decoded = key
                else:
                    if isinstance(val, list) or isinstance(val, tuple):
                        val = ' '.join(val)
                    elif not isinstance(val, basestring):
                        val = unicode(val)
                    elif isinstance(val, AttributeValueWithCharsetSubstitution):
                        val = val.encode('utf-8')
                    text = val
                    decoded = (unicode(key) + '='
                        + EntitySubstitution.quoted_attribute_value(text))
                attrs.append(decoded)
        close = ''
        closeTag = ''

        prefix = ''
        if self.prefix:
            prefix = self.prefix + ":"

        if self.is_empty_element:
            close = '/'
        else:
            closeTag = '</%s%s>' % (prefix, self.name)

        parent_flatten = kwargs.pop('flatten', False)
        if not parent_flatten:
            tag_needs_indent = lambda tag: isinstance(tag, Tag) and tag.needs_indent
            indent_contents = len(self.find_all(tag_needs_indent)) > 0
            flatten = not self.needs_indent and not indent_contents
        else:
            flatten = True

        pretty_print = self._should_pretty_print(indent_level)
        space = ''
        indent_space = ''
        if indent_level is not None:
            indent_space = (self.indent * (indent_level - 1))
        if pretty_print:
            space = indent_space
            indent_contents = indent_level + 1
        else:
            indent_contents = None
        contents = self.decode_contents(indent_contents, flatten=flatten)

        if self.hidden:
            s = contents
        else:
            s = []
            attribute_string = ''
            if attrs:
                attribute_string = ' ' + ' '.join(attrs)
            if indent_level is not None and not parent_flatten:
                s.append(indent_space)
            s.append('<%s%s%s%s>' % (
                prefix, self.name, attribute_string, close))
            if pretty_print and not flatten:
                s.append('\n')
            s.append(contents)
            if pretty_print and contents and contents[-1] != '\n' and not flatten:
                s.append('\n')
            if pretty_print and closeTag and not flatten:
                s.append(space)
            s.append(closeTag)
            if indent_level is not None and closeTag and not parent_flatten:
                s.append('\n')
            s = ''.join(s)
        return u'' + s
Esempio n. 2
0
def decode(self, indent_level=None,
           eventual_encoding=DEFAULT_OUTPUT_ENCODING,
           formatter="minimal", tabsize=4):
    """Returns a Unicode representation of this tag and its contents.

    :param eventual_encoding: The tag is destined to be
       encoded into this encoding. This method is _not_
       responsible for performing that encoding. This information
       is passed in so that it can be substituted in if the
       document contains a <META> tag that mentions the document's
       encoding.
    """
    attrs = []
    if self.attrs:
        for key, val in sorted(self.attrs.items()):
            if val is None:
                decoded = key
            else:
                if isinstance(val, list) or isinstance(val, tuple):
                    val = ' '.join(val)
                elif not isinstance(val, basestring):
                    val = str(val)
                if (self.contains_substitutions
                    and eventual_encoding is not None
                    and '%SOUP-ENCODING%' in val):
                    val = self.substitute_encoding(val, eventual_encoding)

                decoded = (str(key) + '='
                           + EntitySubstitution.substitute_xml(val, True))
            attrs.append(decoded)
    close = ''
    closeTag = ''
    if self.is_empty_element:
        close = '/'
    else:
        closeTag = '</%s>' % self.name

    prefix = ''
    if self.prefix:
        prefix = self.prefix + ":"

    pretty_print = (indent_level is not None)
    if pretty_print:
        space = (' ' * tabsize * (indent_level - 1))
        indent_contents = indent_level + 1
    else:
        space = ''
        indent_contents = None

    contents = decode_contents(self, indent_contents,
            eventual_encoding, formatter, tabsize)

    isempty = not contents.strip('\n').strip()

    if self.hidden:
        # This is the 'document root' object.
        s = contents
    else:
        s = []
        attribute_string = ''
        if attrs:
            attribute_string = ' ' + ' '.join(attrs)
        if pretty_print:
            s.append(space)
        s.append('<%s%s%s%s>' % (
                prefix, self.name, attribute_string, close))
        if pretty_print and (not isempty or not closeTag):
            s.append("\n")
        if not isempty:
            s.append(contents)
        if pretty_print and not isempty and contents[-1] != "\n":
            s.append("\n")
        if pretty_print and not isempty and closeTag:
            s.append(space)
        s.append(closeTag)
        if pretty_print and closeTag and self.next_sibling:
            s.append("\n")
        s = ''.join(s)
    return s