Example #1
0
    def decode(self,
               indent_level=None,
               eventual_encoding=DEFAULT_OUTPUT_ENCODING,
               formatter="minimal"):
        """Returns a Unicode representation of this tag and its contents.

        :param eventual_encoding: The tag is destined to be
           encoded into this encoding. This method is _not_
           responsible for performing that encoding. This information
           is passed in so that it can be substituted in if the
           document contains a <META> tag that mentions the document's
           encoding.
        """
        attrs = []
        if self.attrs:
            for key, val in sorted(self.attrs.items()):
                if val is None:
                    decoded = key
                else:
                    if isinstance(val, list) or isinstance(val, tuple):
                        val = ' '.join(val)
                    elif not isinstance(val, str):
                        val = str(val)
                    elif (isinstance(val,
                                     AttributeValueWithCharsetSubstitution)
                          and eventual_encoding is not None):
                        val = val.encode(eventual_encoding)

                    text = self.format_string(val, formatter)
                    decoded = (str(key) + '=' +
                               EntitySubstitution.quoted_attribute_value(text))
                attrs.append(decoded)
        close = ''
        closeTag = ''
        if self.is_empty_element:
            close = '/'
        else:
            closeTag = '</%s>' % self.name

        prefix = ''
        if self.prefix:
            prefix = self.prefix + ":"

        pretty_print = (indent_level is not None)
        if pretty_print:
            space = (' ' * (indent_level - 1))
            indent_contents = indent_level + 1
        else:
            space = ''
            indent_contents = None
        contents = self.decode_contents(indent_contents, eventual_encoding,
                                        formatter)

        if self.hidden:
            # This is the 'document root' object.
            s = contents
        else:
            s = []
            attribute_string = ''
            if attrs:
                attribute_string = ' ' + ' '.join(attrs)
            if pretty_print:
                s.append(space)
            s.append('<%s%s%s%s>' %
                     (prefix, self.name, attribute_string, close))
            if pretty_print:
                s.append("\n")
            s.append(contents)
            if pretty_print and contents and contents[-1] != "\n":
                s.append("\n")
            if pretty_print and closeTag:
                s.append(space)
            s.append(closeTag)
            if pretty_print and closeTag and self.next_sibling:
                s.append("\n")
            s = ''.join(s)
        return s
Example #2
0
    def decode(self, indent_level=None,
               eventual_encoding=DEFAULT_OUTPUT_ENCODING,
               formatter="minimal"):
        """Returns a Unicode representation of this tag and its contents.

        :param eventual_encoding: The tag is destined to be
           encoded into this encoding. This method is _not_
           responsible for performing that encoding. This information
           is passed in so that it can be substituted in if the
           document contains a <META> tag that mentions the document's
           encoding.
        """
        attrs = []
        if self.attrs:
            for key, val in sorted(self.attrs.items()):
                if val is None:
                    decoded = key
                else:
                    if isinstance(val, list) or isinstance(val, tuple):
                        val = ' '.join(val)
                    elif not isinstance(val, basestring):
                        val = str(val)
                    elif (
                        isinstance(val, AttributeValueWithCharsetSubstitution)
                        and eventual_encoding is not None):
                        val = val.encode(eventual_encoding)

                    text = self.format_string(val, formatter)
                    decoded = (
                        str(key) + '='
                        + EntitySubstitution.quoted_attribute_value(text))
                attrs.append(decoded)
        close = ''
        closeTag = ''
        if self.is_empty_element:
            close = '/'
        else:
            closeTag = '</%s>' % self.name

        prefix = ''
        if self.prefix:
            prefix = self.prefix + ":"

        pretty_print = (indent_level is not None)
        if pretty_print:
            space = (' ' * (indent_level - 1))
            indent_contents = indent_level + 1
        else:
            space = ''
            indent_contents = None
        contents = self.decode_contents(
            indent_contents, eventual_encoding, formatter)

        if self.hidden:
            # This is the 'document root' object.
            s = contents
        else:
            s = []
            attribute_string = ''
            if attrs:
                attribute_string = ' ' + ' '.join(attrs)
            if pretty_print:
                s.append(space)
            s.append('<%s%s%s%s>' % (
                    prefix, self.name, attribute_string, close))
            if pretty_print:
                s.append("\n")
            s.append(contents)
            if pretty_print and contents and contents[-1] != "\n":
                s.append("\n")
            if pretty_print and closeTag:
                s.append(space)
            s.append(closeTag)
            if pretty_print and closeTag and self.next_sibling:
                s.append("\n")
            s = ''.join(s)
        return s
Example #3
0
    def decode(self, indent_level=None,
               eventual_encoding=DEFAULT_OUTPUT_ENCODING,
               formatter="minimal"):
        
        attrs = []
        if self.attrs:
            for key, val in sorted(self.attrs.items()):
                if val is None:
                    decoded = key
                else:
                    if isinstance(val, list) or isinstance(val, tuple):
                        val = ' '.join(val)
                    elif not isinstance(val, basestring):
                        val = unicode(val)
                    elif (
                        isinstance(val, AttributeValueWithCharsetSubstitution)
                        and eventual_encoding is not None):
                        val = val.encode(eventual_encoding)

                    text = self.format_string(val, formatter)
                    decoded = (
                        unicode(key) + '='
                        + EntitySubstitution.quoted_attribute_value(text))
                attrs.append(decoded)
        close = ''
        closeTag = ''

        prefix = ''
        if self.prefix:
            prefix = self.prefix + ":"

        if self.is_empty_element:
            close = '/'
        else:
            closeTag = '</%s%s>' % (prefix, self.name)

        pretty_print = (indent_level is not None)
        if pretty_print:
            space = (' ' * (indent_level - 1))
            indent_contents = indent_level + 1
        else:
            space = ''
            indent_contents = None
        contents = self.decode_contents(
            indent_contents, eventual_encoding, formatter)

        if self.hidden:
            
            s = contents
        else:
            s = []
            attribute_string = ''
            if attrs:
                attribute_string = ' ' + ' '.join(attrs)
            if pretty_print:
                s.append(space)
            s.append('<%s%s%s%s>' % (
                    prefix, self.name, attribute_string, close))
            if pretty_print:
                s.append("\n")
            s.append(contents)
            if pretty_print and contents and contents[-1] != "\n":
                s.append("\n")
            if pretty_print and closeTag:
                s.append(space)
            s.append(closeTag)
            if pretty_print and closeTag and self.next_sibling:
                s.append("\n")
            s = ''.join(s)
        return s
Example #4
0
    def output_dom(self, tag):
        attrs = []
        if tag.attrs:
            for key, val in tag.attrs.iteritems():
                if val is None:
                    decoded = key
                else:
                    if isinstance(val, list) or isinstance(val, tuple):
                        val = ' '.join(val)
                    elif not isinstance(val, basestring):
                        val = unicode(val)
                    elif isinstance(val,
                                    AttributeValueWithCharsetSubstitution):
                        val = val.encode('utf-8')

                    text = tag.format_string(val)
                    decoded = (
                        unicode(key) + '='
                        + EntitySubstitution.quoted_attribute_value(text))
                attrs.append(decoded)
        close = ''
        close_tag = ''

        if tag.is_empty_element:
            close = '/'
        else:
            close_tag = '</%s>' % tag.name
        attribute_string = ''
        if attrs:
            attribute_string = ' ' + ' '.join(attrs)
        self.dom.write('<%s%s%s>' % (tag.name, attribute_string, close))
        is_visible_string = lambda s: (isinstance(s, NavigableString) and
                                       not isinstance(s, Comment))
        if tag.contents:
            has_print = False
            if tag.name == 'pre':
                self.dom.write(tag.encode_contents(encoding='utf-8'))
            elif len(tag.contents) == 1:
                sub_tag = tag.contents[0]
                if isinstance(sub_tag, Tag):
                    if not has_print:
                        self.dom.write('\n')
                        has_print = True
                    self.output_dom(sub_tag)
                elif is_visible_string(sub_tag):
                    self.dom.write(sub_tag.output_ready(formatter='html')
                                   .strip().encode('utf-8'))
            else:
                for sub_tag in tag.contents:
                    if isinstance(sub_tag, Tag):
                        if not has_print:
                            self.dom.write('\n')
                            has_print = True
                        self.output_dom(sub_tag)
                    elif is_visible_string(sub_tag) and not sub_tag.isspace():
                        prefix = postfix = ''
                        if sub_tag[0].isspace():
                            prefix = ' '
                        if sub_tag[-1].isspace():
                            postfix = ' '
                        self.dom.write(
                            prefix + sub_tag.output_ready(formatter='html')
                            .strip().encode('utf-8') + postfix)
        self.dom.write(close_tag)
        self.dom.write('\n')
Example #5
0
	def decode(self, indent_level=None,
			   eventual_encoding=DEFAULT_OUTPUT_ENCODING,
			   formatter="minimal"):
		"""Returns a Unicode representation of this tag and its contents.

		:param eventual_encoding: The tag is destined to be
		   encoded into this encoding. This method is _not_
		   responsible for performing that encoding. This information
		   is passed in so that it can be substituted in if the
		   document contains a <META> tag that mentions the document's
		   encoding.
		"""

		# First off, turn a string formatter into a function. This
		# will stop the lookup from happening over and over again.
		if not callable(formatter):
			formatter = self._formatter_for_name(formatter)

		attrs = []
		if self.attrs:
			for key, val in sorted(self.attrs.items()):
				if val is None:
					decoded = key
				else:
					if isinstance(val, list) or isinstance(val, tuple):
						val = ' '.join(val)
					elif not isinstance(val, basestring):
						val = unicode(val)
					elif (
						isinstance(val, AttributeValueWithCharsetSubstitution)
						and eventual_encoding is not None):
						val = val.encode(eventual_encoding)

					text = self.format_string(val, formatter)
					decoded = (
						unicode(key) + '='
						+ EntitySubstitution.quoted_attribute_value(text))
				attrs.append(decoded)
		close = ''
		closeTag = ''

		prefix = ''
		if self.prefix:
			prefix = self.prefix + ":"

		if self.is_empty_element:
			close = '/'
		else:
			closeTag = '</%s%s>' % (prefix, self.name)

		pretty_print = self._should_pretty_print(indent_level)
		space = ''
		indent_space = ''
		if indent_level is not None:
			indent_space = (' ' * (indent_level - 1))
		if pretty_print:
			space = indent_space
			indent_contents = indent_level + 1
		else:
			indent_contents = None
		contents = self.decode_contents(
			indent_contents, eventual_encoding, formatter)

		if self.hidden:
			# This is the 'document root' object.
			s = contents
		else:
			s = []
			attribute_string = ''
			if attrs:
				attribute_string = ' ' + ' '.join(attrs)
			if indent_level is not None:
				# Even if this particular tag is not pretty-printed,
				# we should indent up to the start of the tag.
				s.append(indent_space)
			s.append('<%s%s%s%s>' % (
					prefix, self.name, attribute_string, close))
			if pretty_print:
				s.append("\n")
			s.append(contents)
			if pretty_print and contents and contents[-1] != "\n":
				s.append("\n")
			if pretty_print and closeTag:
				s.append(space)
			s.append(closeTag)
			if indent_level is not None and closeTag and self.next_sibling:
				# Even if this particular tag is not pretty-printed,
				# we're now done with the tag, and we should add a
				# newline if appropriate.
				s.append("\n")
			s = ''.join(s)
		return s