Exemple #1
0
 def render_option(option_value, option_label):
     option_value = force_unicode(option_value)
     selected_html = (option_value in selected_choices
                      ) and u' selected="selected"' or ''
     return u'<option value="%s"%s>%s</option>' % (
         option_value, selected_html,
         conditional_escape(force_unicode(option_label)))
Exemple #2
0
    def render(self, name, value, attrs=None, choices=()):
        if value is None: value = []
        has_id = attrs and 'id' in attrs
        final_attrs = self.build_attrs(attrs, name=name)
        output = [u'<ul>']
        # Normalize to strings
        str_values = set([force_unicode(v) for v in value])
        for i, (option_value,
                option_label) in enumerate(chain(self.choices, choices)):
            # If an ID attribute was given, add a numeric index as a suffix,
            # so that the checkboxes don't all have the same ID attribute.
            if has_id:
                final_attrs = dict(final_attrs, id='%s_%s' % (attrs['id'], i))
                label_for = u' for="%s"' % final_attrs['id']
            else:
                label_for = ''

            cb = CheckboxInput(final_attrs,
                               check_test=lambda value: value in str_values)
            option_value = force_unicode(option_value)
            rendered_cb = cb.render(name, option_value)
            option_label = conditional_escape(force_unicode(option_label))
            output.append(u'<li><label%s>%s %s</label></li>' %
                          (label_for, rendered_cb, option_label))
        output.append(u'</ul>')
        return mark_safe(u'\n'.join(output))
Exemple #3
0
def render(tree, source):
    """Try rendering as HTML, then XML, then give up."""
    try:
        return force_unicode(_serialize(tree))
    except Exception, e:
        log.error('HTML: %r ::: %r' % (e, source))
        try:
            return force_unicode(tree.toxml())
        except Exception, e:
            log.error('XML: %r ::: %r' % (e, source))
            return u''
Exemple #4
0
def _render(tree):
    """Try rendering as HTML, then XML, then give up."""
    try:
        return force_unicode(_serialize(tree))
    except Exception, e:
        log.error('HTML: %r' % e, exc_info=sys.exc_info())
        try:
            return force_unicode(tree.toxml())
        except Exception, e:
            log.error('XML: %r' % e, exc_info=sys.exc_info())
            return u''
Exemple #5
0
 def _has_changed(self, initial, data):
     if initial is None:
         initial = []
     if data is None:
         data = []
     if len(initial) != len(data):
         return True
     for value1, value2 in zip(initial, data):
         if force_unicode(value1) != force_unicode(value2):
             return True
     return False
Exemple #6
0
def _render(tree, source):
    """Try rendering as HTML, then XML, then give up."""
    try:
        return force_unicode(_serialize(tree))
    except Exception, e:
        log.error('HTML: %r ::: %r' % (e, source))
        try:
            return force_unicode(tree.toxml())
        except Exception, e:
            log.error('XML: %r ::: %r' % (e, source))
            return u''
Exemple #7
0
 def _has_changed(self, initial, data):
     if initial is None:
         initial = []
     if data is None:
         data = []
     if len(initial) != len(data):
         return True
     for value1, value2 in zip(initial, data):
         if force_unicode(value1) != force_unicode(value2):
             return True
     return False
Exemple #8
0
 def __unicode__(self):
     if 'id' in self.attrs:
         label_for = ' for="%s_%s"' % (self.attrs['id'], self.index)
     else:
         label_for = ''
     choice_label = conditional_escape(force_unicode(self.choice_label))
     return mark_safe(u'<label%s>%s %s</label>' % (label_for, self.tag(), choice_label))
Exemple #9
0
def clean(text,
          tags=ALLOWED_TAGS,
          attributes=ALLOWED_ATTRIBUTES,
          styles=ALLOWED_STYLES,
          strip=False,
          strip_comments=True):
    """Clean an HTML fragment and return it"""
    if not text:
        return u''

    text = force_unicode(text)
    if text.startswith(u'<!--'):
        text = u' ' + text

    class s(BleachSanitizer):
        allowed_elements = tags
        allowed_attributes = attributes
        allowed_css_properties = styles
        strip_disallowed_elements = strip
        strip_html_comments = strip_comments

    #parser = html5lib.HTMLParser(tokenizer=s)
    # Since we really want to sanitize input
    parser = html5lib.HTMLParser(tokenizer=HTMLSanitizer)

    return _render(parser.parseFragment(text)).strip()
Exemple #10
0
 def render(self, name, value, attrs=None):
     if value is None: value = ''
     final_attrs = self.build_attrs(attrs, type=self.input_type, name=name)
     if value != '':
         # Only add the 'value' attribute if a value is non-empty.
         final_attrs['value'] = force_unicode(value)
     return mark_safe(u'<input%s />' % flatatt(final_attrs))
Exemple #11
0
 def get_renderer(self, name, value, attrs=None, choices=()):
     """Returns an instance of the renderer."""
     if value is None: value = ''
     str_value = force_unicode(value)  # Normalize to string.
     final_attrs = self.build_attrs(attrs)
     choices = list(chain(self.choices, choices))
     return self.renderer(name, str_value, final_attrs, choices)
Exemple #12
0
def clean(
    text,
    tags=ALLOWED_TAGS,
    attributes=ALLOWED_ATTRIBUTES,
    styles=ALLOWED_STYLES,
    kill_tags=KILL_TAGS,
    strip=False,
    strip_comments=True,
):
    """Clean an HTML fragment and return it"""
    if not text:
        return u""

    text = force_unicode(text)
    if text.startswith(u"<!--"):
        text = u" " + text

    if set(tags) & set(kill_tags):
        raise ValueError("The same tag cannot be in both tags and kill_tags")

    class s(BleachSanitizer):
        allowed_elements = tags
        allowed_attributes = attributes
        allowed_css_properties = styles
        kill_elements = kill_tags
        strip_disallowed_elements = strip
        strip_html_comments = strip_comments

    parser = html5lib.HTMLParser(tokenizer=s)

    return _render(parser.parseFragment(text)).strip()
Exemple #13
0
 def get_renderer(self, name, value, attrs=None, choices=()):
     """Returns an instance of the renderer."""
     if value is None: value = ''
     str_value = force_unicode(value) # Normalize to string.
     final_attrs = self.build_attrs(attrs)
     choices = list(chain(self.choices, choices))
     return self.renderer(name, str_value, final_attrs, choices)
Exemple #14
0
 def render(self, name, value, attrs=None):
     if value is None: value = ''
     final_attrs = self.build_attrs(attrs, type=self.input_type, name=name)
     if value != '':
         # Only add the 'value' attribute if a value is non-empty.
         final_attrs['value'] = force_unicode(value)
     return mark_safe(u'<input%s />' % flatatt(final_attrs))
Exemple #15
0
 def add_item(self, title, link, description, author_email=None,
     author_name=None, author_link=None, pubdate=None, comments=None,
     unique_id=None, enclosure=None, categories=(), item_copyright=None,
     ttl=None, **kwargs):
     """
     Adds an item to the feed. All args are expected to be Python Unicode
     objects except pubdate, which is a datetime.datetime object, and
     enclosure, which is an instance of the Enclosure class.
     """
     to_unicode = lambda s: force_unicode(s, strings_only=True)
     if categories:
         categories = [to_unicode(c) for c in categories]
     item = {
         'title': to_unicode(title),
         'link': iri_to_uri(link),
         'description': to_unicode(description),
         'author_email': to_unicode(author_email),
         'author_name': to_unicode(author_name),
         'author_link': iri_to_uri(author_link),
         'pubdate': pubdate,
         'comments': to_unicode(comments),
         'unique_id': to_unicode(unique_id),
         'enclosure': enclosure,
         'categories': categories or (),
         'item_copyright': to_unicode(item_copyright),
         'ttl': ttl,
     }
     item.update(kwargs)
     self.items.append(item)
Exemple #16
0
def urlquote(url, safe='/'):
    """
    A version of Python's urllib.quote() function that can operate on unicode
    strings. The url is first UTF-8 encoded before quoting. The returned string
    can safely be used as part of an argument to a subsequent iri_to_uri() call
    without double-quoting occurring.
    """
    return force_unicode(urllib.quote(smart_str(url), smart_str(safe)))
Exemple #17
0
 def __unicode__(self):
     if 'id' in self.attrs:
         label_for = ' for="%s_%s"' % (self.attrs['id'], self.index)
     else:
         label_for = ''
     choice_label = conditional_escape(force_unicode(self.choice_label))
     return mark_safe(u'<label%s>%s %s</label>' %
                      (label_for, self.tag(), choice_label))
Exemple #18
0
 def _has_changed(self, initial, data):
     """
     Return True if data differs from initial.
     """
     # For purposes of seeing whether something has changed, None is
     # the same as an empty string, if the data or inital value we get
     # is None, replace it w/ u''.
     if data is None:
         data_value = u''
     else:
         data_value = data
     if initial is None:
         initial_value = u''
     else:
         initial_value = initial
     if force_unicode(initial_value) != force_unicode(data_value):
         return True
     return False
Exemple #19
0
 def _has_changed(self, initial, data):
     """
     Return True if data differs from initial.
     """
     # For purposes of seeing whether something has changed, None is
     # the same as an empty string, if the data or inital value we get
     # is None, replace it w/ u''.
     if data is None:
         data_value = u''
     else:
         data_value = data
     if initial is None:
         initial_value = u''
     else:
         initial_value = initial
     if force_unicode(initial_value) != force_unicode(data_value):
         return True
     return False
Exemple #20
0
    def __init__(self, message, code=None, params=None):
        import operator
        from encoding import force_unicode
        """
        ValidationError can be passed any object that can be printed (usually
        a string), a list of objects or a dictionary.
        """
        if isinstance(message, dict):
            self.message_dict = message
            # Reduce each list of messages into a single list.
            message = reduce(operator.add, message.values())

        if isinstance(message, list):
            self.messages = [force_unicode(msg) for msg in message]
        else:
            self.code = code
            self.params = params
            message = force_unicode(message)
            self.messages = [message]
Exemple #21
0
 def render_options(self, choices, selected_choices):
     def render_option(option_value, option_label):
         option_value = force_unicode(option_value)
         selected_html = (option_value in selected_choices) and u' selected="selected"' or ''
         return u'<option value="%s"%s>%s</option>' % (
             option_value, selected_html,
             conditional_escape(force_unicode(option_label)))
     # Normalize to strings.
     selected_choices = set([force_unicode(v) for v in selected_choices])
     output = []
     for option_value, option_label in chain(self.choices, choices):
         if isinstance(option_label, (list, tuple)):
             output.append(u'<optgroup label="%s">' % force_unicode(option_value))
             for option in option_label:
                 output.append(render_option(*option))
             output.append(u'</optgroup>')
         else:
             output.append(render_option(option_value, option_label))
     return u'\n'.join(output)
Exemple #22
0
    def __init__(self, message, code=None, params=None):
        import operator
        from encoding import force_unicode

        """
        ValidationError can be passed any object that can be printed (usually
        a string), a list of objects or a dictionary.
        """
        if isinstance(message, dict):
            self.message_dict = message
            # Reduce each list of messages into a single list.
            message = reduce(operator.add, message.values())

        if isinstance(message, list):
            self.messages = [force_unicode(msg) for msg in message]
        else:
            self.code = code
            self.params = params
            message = force_unicode(message)
            self.messages = [message]
Exemple #23
0
 def render(self, name, value, attrs=None):
     final_attrs = self.build_attrs(attrs, type='checkbox', name=name)
     try:
         result = self.check_test(value)
     except:  # Silently catch exceptions
         result = False
     if result:
         final_attrs['checked'] = 'checked'
     if value not in ('', True, False, None):
         # Only add the 'value' attribute if a value is non-empty.
         final_attrs['value'] = force_unicode(value)
     return mark_safe(u'<input%s />' % flatatt(final_attrs))
Exemple #24
0
 def render(self, name, value, attrs=None):
     final_attrs = self.build_attrs(attrs, type='checkbox', name=name)
     try:
         result = self.check_test(value)
     except: # Silently catch exceptions
         result = False
     if result:
         final_attrs['checked'] = 'checked'
     if value not in ('', True, False, None):
         # Only add the 'value' attribute if a value is non-empty.
         final_attrs['value'] = force_unicode(value)
     return mark_safe(u'<input%s />' % flatatt(final_attrs))
Exemple #25
0
 def render(self, name, value, attrs=None, choices=()):
     if value is None: value = []
     final_attrs = self.build_attrs(attrs, type=self.input_type, name=name)
     id_ = final_attrs.get('id', None)
     inputs = []
     for i, v in enumerate(value):
         input_attrs = dict(value=force_unicode(v), **final_attrs)
         if id_:
             # An ID attribute was given. Add a numeric index as a suffix
             # so that the inputs don't all have the same ID attribute.
             input_attrs['id'] = '%s_%s' % (id_, i)
         inputs.append(u'<input%s />' % flatatt(input_attrs))
     return mark_safe(u'\n'.join(inputs))
Exemple #26
0
    def render_options(self, choices, selected_choices):
        def render_option(option_value, option_label):
            option_value = force_unicode(option_value)
            selected_html = (option_value in selected_choices
                             ) and u' selected="selected"' or ''
            return u'<option value="%s"%s>%s</option>' % (
                option_value, selected_html,
                conditional_escape(force_unicode(option_label)))

        # Normalize to strings.
        selected_choices = set([force_unicode(v) for v in selected_choices])
        output = []
        for option_value, option_label in chain(self.choices, choices):
            if isinstance(option_label, (list, tuple)):
                output.append(u'<optgroup label="%s">' %
                              force_unicode(option_value))
                for option in option_label:
                    output.append(render_option(*option))
                output.append(u'</optgroup>')
            else:
                output.append(render_option(option_value, option_label))
        return u'\n'.join(output)
Exemple #27
0
 def render(self, name, value, attrs=None, choices=()):
     if value is None: value = []
     final_attrs = self.build_attrs(attrs, type=self.input_type, name=name)
     id_ = final_attrs.get('id', None)
     inputs = []
     for i, v in enumerate(value):
         input_attrs = dict(value=force_unicode(v), **final_attrs)
         if id_:
             # An ID attribute was given. Add a numeric index as a suffix
             # so that the inputs don't all have the same ID attribute.
             input_attrs['id'] = '%s_%s' % (id_, i)
         inputs.append(u'<input%s />' % flatatt(input_attrs))
     return mark_safe(u'\n'.join(inputs))
Exemple #28
0
 def __init__(self, title, link, description, language=None, author_email=None,
         author_name=None, author_link=None, subtitle=None, categories=None,
         feed_url=None, feed_copyright=None, feed_guid=None, ttl=None, **kwargs):
     to_unicode = lambda s: force_unicode(s, strings_only=True)
     if categories:
         categories = [force_unicode(c) for c in categories]
     self.feed = {
         'title': to_unicode(title),
         'link': iri_to_uri(link),
         'description': to_unicode(description),
         'language': to_unicode(language),
         'author_email': to_unicode(author_email),
         'author_name': to_unicode(author_name),
         'author_link': iri_to_uri(author_link),
         'subtitle': to_unicode(subtitle),
         'categories': categories or (),
         'feed_url': iri_to_uri(feed_url),
         'feed_copyright': to_unicode(feed_copyright),
         'id': feed_guid or link,
         'ttl': ttl,
     }
     self.feed.update(kwargs)
     self.items = []
Exemple #29
0
    def clean(self, string, tags=ALLOWED_TAGS, attributes=ALLOWED_ATTRIBUTES):
        """Clean an HTML string and return it"""
        if not string:
            return u''
        elif string.startswith('<!--'):
            string = u' ' + string

        class s(BleachSanitizer):
            allowed_elements = tags
            allowed_attributes = attributes

        parser = html5lib.HTMLParser(tokenizer=s)

        return force_unicode(_serialize(parser.parseFragment(string))).strip()
Exemple #30
0
    def render(self, name, value, attrs=None, choices=()):
        if value is None: value = []
        has_id = attrs and 'id' in attrs
        final_attrs = self.build_attrs(attrs, name=name)
        output = [u'<ul>']
        # Normalize to strings
        str_values = set([force_unicode(v) for v in value])
        for i, (option_value, option_label) in enumerate(chain(self.choices, choices)):
            # If an ID attribute was given, add a numeric index as a suffix,
            # so that the checkboxes don't all have the same ID attribute.
            if has_id:
                final_attrs = dict(final_attrs, id='%s_%s' % (attrs['id'], i))
                label_for = u' for="%s"' % final_attrs['id']
            else:
                label_for = ''

            cb = CheckboxInput(final_attrs, check_test=lambda value: value in str_values)
            option_value = force_unicode(option_value)
            rendered_cb = cb.render(name, option_value)
            option_label = conditional_escape(force_unicode(option_label))
            output.append(u'<li><label%s>%s %s</label></li>' % (label_for, rendered_cb, option_label))
        output.append(u'</ul>')
        return mark_safe(u'\n'.join(output))
Exemple #31
0
def sanitize_address(addr, encoding):
    if isinstance(addr, basestring):
        addr = parseaddr(force_unicode(addr))
    nm, addr = addr
    nm = str(Header(nm, encoding))
    try:
        addr = addr.encode('ascii')
    except UnicodeEncodeError:  # IDN
        if u'@' in addr:
            localpart, domain = addr.split(u'@', 1)
            localpart = str(Header(localpart, encoding))
            domain = domain.encode('idna')
            addr = '@'.join([localpart, domain])
        else:
            addr = str(Header(addr, encoding))
    return formataddr((nm, addr))
Exemple #32
0
def sanitize_address(addr, encoding):
    if isinstance(addr, basestring):
        addr = parseaddr(force_unicode(addr))
    nm, addr = addr
    nm = str(Header(nm, encoding))
    try:
        addr = addr.encode('ascii')
    except UnicodeEncodeError:  # IDN
        if u'@' in addr:
            localpart, domain = addr.split(u'@', 1)
            localpart = str(Header(localpart, encoding))
            domain = domain.encode('idna')
            addr = '@'.join([localpart, domain])
        else:
            addr = str(Header(addr, encoding))
    return formataddr((nm, addr))
Exemple #33
0
def forbid_multi_line_headers(name, val, encoding):
    """Forbids multi-line headers, to prevent header injection."""
    encoding = encoding or DEFAULT_CHARSET
    val = force_unicode(val)
    if '\n' in val or '\r' in val:
        raise BadHeaderError("Header values can't contain newlines (got %r for header %r)" % (val, name))
    try:
        val = val.encode('ascii')
    except UnicodeEncodeError:
        if name.lower() in ADDRESS_HEADERS:
            val = ', '.join(sanitize_address(addr, encoding)
                for addr in getaddresses((val,)))
        else:
            val = str(Header(val, encoding))
    else:
        if name.lower() == 'subject':
            val = Header(val)
    return name, val
Exemple #34
0
def clean(text, tags=ALLOWED_TAGS, attributes=ALLOWED_ATTRIBUTES,
          styles=ALLOWED_STYLES, strip=False, strip_comments=True):
    """Clean an HTML fragment and return it"""
    if not text:
        return u''

    text = force_unicode(text)
    if text.startswith(u'<!--'):
        text = u' ' + text

    class s(BleachSanitizer):
        allowed_elements = tags
        allowed_attributes = attributes
        allowed_css_properties = styles
        strip_disallowed_elements = strip
        strip_html_comments = strip_comments

    parser = html5lib.HTMLParser(tokenizer=s)

    return _render(parser.parseFragment(text)).strip()
Exemple #35
0
def clean(text, tags=ALLOWED_TAGS, attributes=ALLOWED_ATTRIBUTES,
          styles=ALLOWED_STYLES, strip=False, strip_comments=True,
          parse_as_fragment=True, nofollow=False, filter_url=identity):
    """Clean an HTML fragment or document and return it"""

    if not text and parse_as_fragment:
        return u''

    # Allow things like body tag in full documents by default
    if not parse_as_fragment and tags == ALLOWED_TAGS:
        tags = ALLOWED_FULL_DOCUMENT_TAGS

    text = force_unicode(text)
    if text.startswith(u'<!--'):
        text = u' ' + text

    class s(BleachSanitizer):
        allowed_elements = tags
        allowed_attributes = attributes
        allowed_css_properties = styles
        strip_disallowed_elements = strip
        strip_html_comments = strip_comments

    parser = html5lib.HTMLParser(tokenizer=s)

    def add_nofollow(tree, filter_url=identity):
        for node in tree.childNodes:
            if node.name == 'a':
                if 'href' in node.attributes:
                    node.attributes['rel'] = 'nofollow'
                    href = node.attributes['href']
                    node.attributes['href'] = filter_url(href)
            else:
                add_nofollow(node)

    forest = parser.parseFragment(text) if parse_as_fragment else parser.parse(text)

    if nofollow:
        add_nofollow(forest)

    return _render(forest).strip()
Exemple #36
0
def forbid_multi_line_headers(name, val, encoding):
    """Forbids multi-line headers, to prevent header injection."""
    encoding = encoding or DEFAULT_CHARSET
    val = force_unicode(val)
    if '\n' in val or '\r' in val:
        raise BadHeaderError(
            "Header values can't contain newlines (got %r for header %r)" %
            (val, name))
    try:
        val = val.encode('ascii')
    except UnicodeEncodeError:
        if name.lower() in ADDRESS_HEADERS:
            val = ', '.join(
                sanitize_address(addr, encoding)
                for addr in getaddresses((val, )))
        else:
            val = str(Header(val, encoding))
    else:
        if name.lower() == 'subject':
            val = Header(val)
    return name, val
Exemple #37
0
def delinkify(text, allow_domains=None, allow_relative=False):
    """Remove links from text, except those allowed to stay."""
    text = force_unicode(text)
    if not text:
        return u''

    parser = html5lib.HTMLParser(tokenizer=HTMLSanitizer)
    forest = parser.parseFragment(text)

    if allow_domains is None:
        allow_domains = []
    elif isinstance(allow_domains, basestring):
        allow_domains = [allow_domains]

    def delinkify_nodes(tree):
        """Remove <a> tags and replace them with their contents."""
        for node in tree.childNodes:
            if node.name == 'a':
                if 'href' not in node.attributes:
                    continue
                parts = urlparse.urlparse(node.attributes['href'])
                host = parts.hostname
                if any(_domain_match(host, d) for d in allow_domains):
                    continue
                if host is None and allow_relative:
                    continue
                # Replace the node with its children.
                # You can't nest <a> tags, and html5lib takes care of that
                # for us in the tree-building step.
                for n in node.childNodes:
                    tree.insertBefore(n, node)
                tree.removeChild(node)
            elif node.type != NODE_TEXT:  # Don't try to delinkify text.
                delinkify_nodes(node)

    delinkify_nodes(forest)
    return _render(forest)
Exemple #38
0
def delinkify(text, allow_domains=None, allow_relative=False):
    """Remove links from text, except those allowed to stay."""
    text = force_unicode(text)
    if not text:
        return u""

    parser = html5lib.HTMLParser(tokenizer=HTMLSanitizer)
    forest = parser.parseFragment(text)

    if allow_domains is None:
        allow_domains = []
    elif isinstance(allow_domains, basestring):
        allow_domains = [allow_domains]

    def delinkify_nodes(tree):
        """Remove <a> tags and replace them with their contents."""
        for node in tree.childNodes:
            if node.name == "a":
                if "href" not in node.attributes:
                    continue
                parts = urlparse.urlparse(node.attributes["href"])
                host = parts.hostname
                if any(_domain_match(host, d) for d in allow_domains):
                    continue
                if host is None and allow_relative:
                    continue
                # Replace the node with its children.
                # You can't nest <a> tags, and html5lib takes care of that
                # for us in the tree-building step.
                for n in node.childNodes:
                    tree.insertBefore(n, node)
                tree.removeChild(node)
            elif node.type != NODE_TEXT:  # Don't try to delinkify text.
                delinkify_nodes(node)

    delinkify_nodes(forest)
    return _render(forest)
Exemple #39
0
 def __init__(self, name, value, attrs, choice, index):
     self.name, self.value = name, value
     self.attrs = attrs
     self.choice_value = force_unicode(choice[0])
     self.choice_label = force_unicode(choice[1])
     self.index = index
Exemple #40
0
 def render(self):
     """Outputs a <ul> for this set of radio fields."""
     return mark_safe(
         u'<ul>\n%s\n</ul>' %
         u'\n'.join([u'<li>%s</li>' % force_unicode(w) for w in self]))
Exemple #41
0
 def render_option(option_value, option_label):
     option_value = force_unicode(option_value)
     selected_html = (option_value in selected_choices) and u' selected="selected"' or ''
     return u'<option value="%s"%s>%s</option>' % (
         option_value, selected_html,
         conditional_escape(force_unicode(option_label)))
Exemple #42
0
def linkify(text,
            nofollow=True,
            target=None,
            filter_url=identity,
            filter_text=identity,
            skip_pre=False,
            parse_email=False,
            tokenizer=HTMLSanitizer):
    """Convert URL-like strings in an HTML fragment to links.

    linkify() converts strings that look like URLs or domain names in a
    blob of text that may be an HTML fragment to links, while preserving
    (a) links already in the string, (b) urls found in attributes, and
    (c) email addresses.

    If the nofollow argument is True (the default) then rel="nofollow"
    will be added to links created by linkify() as well as links already
    found in the text.

    The target argument will optionally add a target attribute with the
    given value to links created by linkify() as well as links already
    found in the text.

    linkify() uses up to two filters on each link. For links created by
    linkify(), the href attribute is passed through filter_url()
    and the text of the link is passed through filter_text(). For links
    already found in the document, the href attribute is passed through
    filter_url(), but the text is untouched.
    """
    text = force_unicode(text)

    if not text:
        return u''

    parser = html5lib.HTMLParser(tokenizer=tokenizer)

    forest = parser.parseFragment(text)

    if nofollow:
        rel = u'rel="nofollow"'
    else:
        rel = u''

    def replace_nodes(tree, new_frag, node):
        new_tree = parser.parseFragment(new_frag)
        for n in new_tree.childNodes:
            tree.insertBefore(n, node)
        tree.removeChild(node)

    def strip_wrapping_parentheses(fragment):
        """Strips wrapping parentheses.

        Returns a tuple of the following format::

            (string stripped from wrapping parentheses,
             count of stripped opening parentheses,
             count of stripped closing parentheses)
        """
        opening_parentheses = closing_parentheses = 0
        # Count consecutive opening parentheses
        # at the beginning of the fragment (string).
        for char in fragment:
            if char == '(':
                opening_parentheses += 1
            else:
                break

        if opening_parentheses:
            newer_frag = ''
            # Cut the consecutive opening brackets from the fragment.
            fragment = fragment[opening_parentheses:]
            # Reverse the fragment for easier detection of parentheses
            # inside the URL.
            reverse_fragment = fragment[::-1]
            skip = False
            for char in reverse_fragment:
                # Remove the closing parentheses if it has a matching
                # opening parentheses (they are balanced).
                if (char == ')' and closing_parentheses < opening_parentheses
                        and not skip):
                    closing_parentheses += 1
                    continue
                # Do not remove ')' from the URL itself.
                elif char != ')':
                    skip = True
                newer_frag += char
            fragment = newer_frag[::-1]

        return fragment, opening_parentheses, closing_parentheses

    def linkify_nodes(tree, parse_text=True):
        for node in tree.childNodes:
            if node.type == NODE_TEXT and parse_text:
                new_frag = node.toxml()
                if parse_email:
                    new_frag = re.sub(email_re, email_repl, new_frag)
                    if new_frag != node.toxml():
                        replace_nodes(tree, new_frag, node)
                        linkify_nodes(tree, False)
                        continue
                new_frag = re.sub(url_re, link_repl, new_frag)
                replace_nodes(tree, new_frag, node)
            elif node.name == 'a':
                if 'href' in node.attributes:
                    if nofollow:
                        node.attributes['rel'] = 'nofollow'
                    if target is not None:
                        node.attributes['target'] = target
                    href = node.attributes['href']
                    node.attributes['href'] = filter_url(href)
            elif skip_pre and node.name == 'pre':
                linkify_nodes(node, False)
            else:
                linkify_nodes(node)

    def email_repl(match):
        repl = u'<a href="mailto:%(mail)s">%(mail)s</a>'
        return repl % {'mail': match.group(0).replace('"', '&quot;')}

    def link_repl(match):
        url = match.group(0)
        open_brackets = close_brackets = 0
        if url.startswith('('):
            url, open_brackets, close_brackets = (
                strip_wrapping_parentheses(url))
        end = u''
        m = re.search(punct_re, url)
        if m:
            end = m.group(0)
            url = url[0:m.start()]
        if re.search(proto_re, url):
            href = url
        else:
            href = u''.join([u'http://', url])

        repl = u'%s<a href="%s" %s>%s</a>%s%s'

        attribs = [rel]
        if target is not None:
            attribs.append('target="%s"' % target)

        return repl % ('(' * open_brackets, filter_url(href),
                       ' '.join(attribs), filter_text(url), end,
                       ')' * close_brackets)

    linkify_nodes(forest)

    return _render(forest)
Exemple #43
0
 def as_text(self):
     if not self: return u''
     return u'\n'.join([u'* %s' % force_unicode(e) for e in self])
Exemple #44
0
 def render(self, name, value, attrs=None):
     if value is None: value = ''
     final_attrs = self.build_attrs(attrs, name=name)
     return mark_safe(u'<textarea%s>%s</textarea>' % (flatatt(final_attrs),
             conditional_escape(force_unicode(value))))
Exemple #45
0
    def _html_output(self, normal_row, error_row, row_ender, help_text_html,
                     errors_on_separate_row):
        "Helper function for outputting HTML. Used by as_table(), as_ul(), as_p()."
        top_errors = self.non_field_errors(
        )  # Errors that should be displayed above all fields.
        output, hidden_fields = [], []

        for name, field in self.fields.items():
            html_class_attr = ''
            bf = BoundField(self, field, name)
            bf_errors = self.error_class([
                error for error in bf.errors
            ])  # Escape and cache in local variable.
            if bf.is_hidden:
                if bf_errors:
                    top_errors.extend([
                        u'(Hidden field %s) %s' % (name, force_unicode(e))
                        for e in bf_errors
                    ])
                hidden_fields.append(unicode(bf))
            else:
                # Create a 'class="..."' atribute if the row should have any
                # CSS classes applied.
                css_classes = bf.css_classes()
                if css_classes:
                    html_class_attr = ' class="%s"' % css_classes

                if errors_on_separate_row and bf_errors:
                    output.append(error_row % force_unicode(bf_errors))

                if bf.label:
                    label = force_unicode(bf.label)
                    # Only add the suffix if the label does not end in
                    # punctuation.
                    if self.label_suffix:
                        if label[-1] not in ':?.!':
                            label += self.label_suffix
                    label = bf.label_tag(label) or ''
                else:
                    label = ''

                if field.help_text:
                    help_text = help_text_html % force_unicode(field.help_text)
                else:
                    help_text = u''
                if field.widget.__class__.__name__ == "Textarea":
                    normal_row = '''<div style="clear:both">%s</div>''' % normal_row
                output.append(
                    normal_row % {
                        'errors': force_unicode(bf_errors),
                        'label': force_unicode(label),
                        'field': unicode(bf),
                        'help_text': help_text,
                        'html_class_attr': html_class_attr
                    })

        if top_errors:
            output.insert(0, error_row % force_unicode(top_errors))

        if hidden_fields:  # Insert any hidden fields in the last row.
            str_hidden = u''.join(hidden_fields)
            if output:
                last_row = output[-1]
                # Chop off the trailing row_ender (e.g. '</td></tr>') and
                # insert the hidden fields.
                if not last_row.endswith(row_ender):
                    # This can happen in the as_p() case (and possibly others
                    # that users write): if there are only top errors, we may
                    # not be able to conscript the last row for our purposes,
                    # so insert a new, empty row.
                    last_row = (normal_row % {
                        'errors': '',
                        'label': '',
                        'field': '',
                        'help_text': '',
                        'html_class_attr': html_class_attr
                    })
                    output.append(last_row)
                output[
                    -1] = last_row[:-len(row_ender)] + str_hidden + row_ender
            else:
                # If there aren't any rows in the output, just append the
                # hidden fields.
                output.append(str_hidden)
        return u'\n'.join(output)
Exemple #46
0
 def render(self, name, value, attrs=None):
     if value is None: value = ''
     final_attrs = self.build_attrs(attrs, name=name)
     return mark_safe(
         u'<textarea%s>%s</textarea>' %
         (flatatt(final_attrs), conditional_escape(force_unicode(value))))
Exemple #47
0
 def __init__(self, name, value, attrs, choice, index):
     self.name, self.value = name, value
     self.attrs = attrs
     self.choice_value = force_unicode(choice[0])
     self.choice_label = force_unicode(choice[1])
     self.index = index
Exemple #48
0
 def as_ul(self):
     if not self: return u''
     return u'<ul class="errorlist">%s</ul>'% ''.join([u'<li>%s</li>' % force_unicode(e) for e in self])
Exemple #49
0
def linkify(text, nofollow=True, target=None, filter_url=identity,
            filter_text=identity, skip_pre=False, parse_email=False):
    """Convert URL-like strings in an HTML fragment to links.

    linkify() converts strings that look like URLs or domain names in a
    blob of text that may be an HTML fragment to links, while preserving
    (a) links already in the string, (b) urls found in attributes, and
    (c) email addresses.

    If the nofollow argument is True (the default) then rel="nofollow"
    will be added to links created by linkify() as well as links already
    found in the text.

    The target argument will optionally add a target attribute with the
    given value to links created by linkify() as well as links already
    found in the text.

    linkify() uses up to two filters on each link. For links created by
    linkify(), the href attribute is passed through filter_url()
    and the text of the link is passed through filter_text(). For links
    already found in the document, the href attribute is passed through
    filter_url(), but the text is untouched.
    """
    text = force_unicode(text)

    if not text:
        return u''

    parser = html5lib.HTMLParser(tokenizer=HTMLSanitizer)

    forest = parser.parseFragment(text)

    if nofollow:
        rel = u'rel="nofollow"'
    else:
        rel = u''

    def replace_nodes(tree, new_frag, node):
        new_tree = parser.parseFragment(new_frag)
        for n in new_tree.childNodes:
            tree.insertBefore(n, node)
        tree.removeChild(node)

    def strip_wrapping_parentheses(fragment):
        """Strips wrapping parentheses.

        Returns a tuple of the following format::

            (string stripped from wrapping parentheses,
             count of stripped opening parentheses,
             count of stripped closing parentheses)
        """
        opening_parentheses = closing_parentheses = 0
        # Count consecutive opening parentheses
        # at the beginning of the fragment (string).
        for char in fragment:
            if char == '(':
                opening_parentheses += 1
            else:
                break

        if opening_parentheses:
            newer_frag = ''
            # Cut the consecutive opening brackets from the fragment.
            fragment = fragment[opening_parentheses:]
            # Reverse the fragment for easier detection of parentheses
            # inside the URL.
            reverse_fragment = fragment[::-1]
            skip = False
            for char in reverse_fragment:
                # Remove the closing parentheses if it has a matching
                # opening parentheses (they are balanced).
                if (char == ')' and
                        closing_parentheses < opening_parentheses and
                        not skip):
                    closing_parentheses += 1
                    continue
                # Do not remove ')' from the URL itself.
                elif char != ')':
                    skip = True
                newer_frag += char
            fragment = newer_frag[::-1]

        return fragment, opening_parentheses, closing_parentheses

    def linkify_nodes(tree, parse_text=True):
        for node in tree.childNodes:
            if node.type == NODE_TEXT and parse_text:
                new_frag = node.toxml()
                if parse_email:
                    new_frag = re.sub(email_re, email_repl, new_frag)
                    if new_frag != node.toxml():
                        replace_nodes(tree, new_frag, node)
                        linkify_nodes(tree, False)
                        continue
                new_frag = re.sub(url_re, link_repl, new_frag)
                replace_nodes(tree, new_frag, node)
            elif node.name == 'a':
                if 'href' in node.attributes:
                    if nofollow:
                        node.attributes['rel'] = 'nofollow'
                    if target is not None:
                        node.attributes['target'] = target
                    href = node.attributes['href']
                    node.attributes['href'] = filter_url(href)
            elif skip_pre and node.name == 'pre':
                linkify_nodes(node, False)
            else:
                linkify_nodes(node)

    def email_repl(match):
        repl = u'<a href="mailto:%(mail)s">%(mail)s</a>'
        return repl % {'mail': match.group(0).replace('"', '&quot;')}

    def link_repl(match):
        url = match.group(0)
        open_brackets = close_brackets = 0
        if url.startswith('('):
            url, open_brackets, close_brackets = (
                    strip_wrapping_parentheses(url)
            )
        end = u''
        m = re.search(punct_re, url)
        if m:
            end = m.group(0)
            url = url[0:m.start()]
        if re.search(proto_re, url):
            href = url
        else:
            href = u''.join([u'http://', url])

        repl = u'%s<a href="%s" %s>%s</a>%s%s'

        attribs = [rel]
        if target is not None:
            attribs.append('target="%s"' % target)

        return repl % ('(' * open_brackets,
                       filter_url(href), ' '.join(attribs), filter_text(url),
                       end, ')' * close_brackets)

    linkify_nodes(forest)

    return _render(forest)
Exemple #50
0
    def _html_output(self, normal_row, error_row, row_ender, help_text_html, errors_on_separate_row):
        "Helper function for outputting HTML. Used by as_table(), as_ul(), as_p()."
        top_errors = self.non_field_errors() # Errors that should be displayed above all fields.
        output, hidden_fields = [], []

        for name, field in self.fields.items():
            html_class_attr = ''
            bf = BoundField(self, field, name)
            bf_errors = self.error_class([error for error in bf.errors]) # Escape and cache in local variable.
            if bf.is_hidden:
                if bf_errors:
                    top_errors.extend([u'(Hidden field %s) %s' % (name, force_unicode(e)) for e in bf_errors])
                hidden_fields.append(unicode(bf))
            else:
                # Create a 'class="..."' atribute if the row should have any
                # CSS classes applied.
                css_classes = bf.css_classes()
                if css_classes:
                    html_class_attr = ' class="%s"' % css_classes

                if errors_on_separate_row and bf_errors:
                    output.append(error_row % force_unicode(bf_errors))

                if bf.label:
                    label = force_unicode(bf.label)
                    # Only add the suffix if the label does not end in
                    # punctuation.
                    if self.label_suffix:
                        if label[-1] not in ':?.!':
                            label += self.label_suffix
                    label = bf.label_tag(label) or ''
                else:
                    label = ''

                if field.help_text:
                    help_text = help_text_html % force_unicode(field.help_text)
                else:
                    help_text = u''
                if field.widget.__class__.__name__ == "Textarea":
                    normal_row = '''<div style="clear:both">%s</div>'''%normal_row
                output.append(normal_row % {
                    'errors': force_unicode(bf_errors),
                    'label': force_unicode(label),
                    'field': unicode(bf),
                    'help_text': help_text,
                    'html_class_attr': html_class_attr
                })

        if top_errors:
            output.insert(0, error_row % force_unicode(top_errors))

        if hidden_fields: # Insert any hidden fields in the last row.
            str_hidden = u''.join(hidden_fields)
            if output:
                last_row = output[-1]
                # Chop off the trailing row_ender (e.g. '</td></tr>') and
                # insert the hidden fields.
                if not last_row.endswith(row_ender):
                    # This can happen in the as_p() case (and possibly others
                    # that users write): if there are only top errors, we may
                    # not be able to conscript the last row for our purposes,
                    # so insert a new, empty row.
                    last_row = (normal_row % {'errors': '', 'label': '',
                                              'field': '', 'help_text':'',
                                              'html_class_attr': html_class_attr})
                    output.append(last_row)
                output[-1] = last_row[:-len(row_ender)] + str_hidden + row_ender
            else:
                # If there aren't any rows in the output, just append the
                # hidden fields.
                output.append(str_hidden)
        return u'\n'.join(output)
Exemple #51
0
 def render(self):
     """Outputs a <ul> for this set of radio fields."""
     return mark_safe(u'<ul>\n%s\n</ul>' % u'\n'.join([u'<li>%s</li>'
             % force_unicode(w) for w in self]))
Exemple #52
0
 def as_ul(self):
     if not self: return u''
     return u'<ul class="errorlist">%s</ul>'% ''.join([u'<li>%s%s</li>' % (k, force_unicode(v))
                 for k, v in self.items()])
Exemple #53
0
 def as_text(self):
     return u'\n'.join([u'* %s\n%s' % (k, u'\n'.join([u'  * %s' % force_unicode(i) for i in v])) for k, v in self.items()])
Exemple #54
0
 def __repr__(self):
     return repr([force_unicode(e) for e in self])