def decode(self, indent_level=0, **kwargs): attrs = [] if self.attrs: for key, val in sorted(self.attrs.items()): if val is None: decoded = key else: if isinstance(val, list) or isinstance(val, tuple): val = ' '.join(val) elif not isinstance(val, basestring): val = unicode(val) elif isinstance(val, AttributeValueWithCharsetSubstitution): val = val.encode('utf-8') text = val decoded = (unicode(key) + '=' + EntitySubstitution.quoted_attribute_value(text)) attrs.append(decoded) close = '' closeTag = '' prefix = '' if self.prefix: prefix = self.prefix + ":" if self.is_empty_element: close = '/' else: closeTag = '</%s%s>' % (prefix, self.name) parent_flatten = kwargs.pop('flatten', False) if not parent_flatten: tag_needs_indent = lambda tag: isinstance(tag, Tag) and tag.needs_indent indent_contents = len(self.find_all(tag_needs_indent)) > 0 flatten = not self.needs_indent and not indent_contents else: flatten = True pretty_print = self._should_pretty_print(indent_level) space = '' indent_space = '' if indent_level is not None: indent_space = (self.indent * (indent_level - 1)) if pretty_print: space = indent_space indent_contents = indent_level + 1 else: indent_contents = None contents = self.decode_contents(indent_contents, flatten=flatten) if self.hidden: s = contents else: s = [] attribute_string = '' if attrs: attribute_string = ' ' + ' '.join(attrs) if indent_level is not None and not parent_flatten: s.append(indent_space) s.append('<%s%s%s%s>' % ( prefix, self.name, attribute_string, close)) if pretty_print and not flatten: s.append('\n') s.append(contents) if pretty_print and contents and contents[-1] != '\n' and not flatten: s.append('\n') if pretty_print and closeTag and not flatten: s.append(space) s.append(closeTag) if indent_level is not None and closeTag and not parent_flatten: s.append('\n') s = ''.join(s) return u'' + s
def decode(self, indent_level=None, eventual_encoding=DEFAULT_OUTPUT_ENCODING, formatter="minimal", tabsize=4): """Returns a Unicode representation of this tag and its contents. :param eventual_encoding: The tag is destined to be encoded into this encoding. This method is _not_ responsible for performing that encoding. This information is passed in so that it can be substituted in if the document contains a <META> tag that mentions the document's encoding. """ attrs = [] if self.attrs: for key, val in sorted(self.attrs.items()): if val is None: decoded = key else: if isinstance(val, list) or isinstance(val, tuple): val = ' '.join(val) elif not isinstance(val, basestring): val = str(val) if (self.contains_substitutions and eventual_encoding is not None and '%SOUP-ENCODING%' in val): val = self.substitute_encoding(val, eventual_encoding) decoded = (str(key) + '=' + EntitySubstitution.substitute_xml(val, True)) attrs.append(decoded) close = '' closeTag = '' if self.is_empty_element: close = '/' else: closeTag = '</%s>' % self.name prefix = '' if self.prefix: prefix = self.prefix + ":" pretty_print = (indent_level is not None) if pretty_print: space = (' ' * tabsize * (indent_level - 1)) indent_contents = indent_level + 1 else: space = '' indent_contents = None contents = decode_contents(self, indent_contents, eventual_encoding, formatter, tabsize) isempty = not contents.strip('\n').strip() if self.hidden: # This is the 'document root' object. s = contents else: s = [] attribute_string = '' if attrs: attribute_string = ' ' + ' '.join(attrs) if pretty_print: s.append(space) s.append('<%s%s%s%s>' % ( prefix, self.name, attribute_string, close)) if pretty_print and (not isempty or not closeTag): s.append("\n") if not isempty: s.append(contents) if pretty_print and not isempty and contents[-1] != "\n": s.append("\n") if pretty_print and not isempty and closeTag: s.append(space) s.append(closeTag) if pretty_print and closeTag and self.next_sibling: s.append("\n") s = ''.join(s) return s