def __init__(self, data="", mutable=False, encoding=None):
        self._json_values = {}
        self._data = data

        if not encoding:
            encoding = settings.DEFAULT_CHARSET
        self.encoding = encoding

        if not self._data:
            self.data_type = "empty"

        # JSON package
        if not self.data_type:
            try:
                self._json_values = simplejson.loads(self._data)
            except (JSONDecodeError, TypeError) as e:
                pass
            else:
                self.data_type = "json"

        # HTTP query string standard
        if not self.data_type:
            keys_and_values = parse_qsl(self._data)
            if keys_and_values:
                for key, value in keys_and_values:
                    key = force_unicode(key, encoding, errors="replace")
                    if key.endswith("[]"):
                        key = key[:-2]
                    value = force_unicode(value, encoding, errors="replace")
                    self.appendlist(key, value)
                self.data_type = "qsl"

        self._mutable = mutable
Exemple #2
0
def wrap(text, width):
    """
    A word-wrap function that preserves existing line breaks and most spaces in
    the text. Expects that existing line breaks are posix newlines.
    """
    text = force_unicode(text)
    def _generator():
        it = iter(text.split(' '))
        word = it.next()
        yield word
        pos = len(word) - word.rfind('\n') - 1
        for word in it:
            if "\n" in word:
                lines = word.split('\n')
            else:
                lines = (word,)
            pos += len(lines[0]) + 1
            if pos > width:
                yield '\n'
                pos = len(lines[-1])
            else:
                yield ' '
                if len(lines) > 1:
                    pos = len(lines[-1])
            yield word
    return unicode('').join(_generator())
 def clean_value(self, key, value):
     from london.db.models import PersistentModel, NestedQuerySet, ManyToManyQuerySet, QuerySet, RelatedQuerySet
     if key == '_id':
         return key, ObjectId(value)
     elif isinstance(value, (basestring, EncryptedString)) and not isinstance(value, unicode):
         return key, force_unicode(value, errors='replace')
     elif isinstance(value, (Decimal,Money)):
         return key, float(value)
     elif isinstance(value, PersistentModel):
         if value:
             value = {'_db_storage':value.__class__._meta.db_storage, 'pk':value['pk']}
         return key, value
     elif isinstance(value, NestedQuerySet):
         if value:
             value = [self.clean_values(item.get_values_to_save()) for item in value]
         return key, value
     elif isinstance(value, ManyToManyQuerySet):
         if value:
             value = [pk for pk in value._items] # FIXME bad code
         return key, value
     elif isinstance(value, datetime.date) and not isinstance(value, datetime.datetime):
         return key, datetime.datetime(value.year, value.month, value.day)
     elif isinstance(value, datetime.time):
         return key, value.strftime('%H:%M:%S')
     elif isinstance(value, (tuple, list, QuerySet, RelatedQuerySet)):
         return key, [self.clean_value(None, item)[1] for item in value]
     else:
         return key, value
Exemple #4
0
def clean_html(text):
    """
    Clean the given HTML.  Specifically, do the following:
        * Convert <b> and <i> to <strong> and <em>.
        * Encode all ampersands correctly.
        * Remove all "target" attributes from <a> tags.
        * Remove extraneous HTML, such as presentational tags that open and
          immediately close and <br clear="all">.
        * Convert hard-coded bullets into HTML unordered lists.
        * Remove stuff like "<p>&nbsp;&nbsp;</p>", but only if it's at the
          bottom of the text.
    """
    from london.utils.text import normalize_newlines
    text = normalize_newlines(force_unicode(text))
    text = re.sub(r'<(/?)\s*b\s*>', '<\\1strong>', text)
    text = re.sub(r'<(/?)\s*i\s*>', '<\\1em>', text)
    text = fix_ampersands(text)
    # Remove all target="" attributes from <a> tags.
    text = link_target_attribute_re.sub('\\1', text)
    # Trim stupid HTML such as <br clear="all">.
    text = html_gunk_re.sub('', text)
    # Convert hard-coded bullets into HTML unordered lists.
    def replace_p_tags(match):
        s = match.group().replace(u'</p>', u'</li>')
        for d in DOTS:
            s = s.replace(u'<p>%s' % d, u'<li>')
        return u'<ul>\n%s\n</ul>' % s
    text = hard_coded_bullets_re.sub(replace_p_tags, text)
    # Remove stuff like "<p>&nbsp;&nbsp;</p>", but only if it's at the bottom
    # of the text.
    text = trailing_empty_content_re.sub(u'', text)
    return text
Exemple #5
0
def urlquote_plus(url, safe=''):
    """
    A version of Python's urllib.quote_plus() function that can operate on
    unicode strings. The url is first UTF-8 encoded before quoting. The
    returned string can safely be used as part of an argument to a subsequent
    iri_to_uri() call without double-quoting occurring.
    """
    return force_unicode(urllib.quote_plus(smart_str(url), smart_str(safe)))
Exemple #6
0
def urlize(text, trim_url_limit=None, nofollow=False, autoescape=False):
    """
    Converts any URLs in text into clickable links.

    Works on http://, https://, www. links and links ending in .org, .net or
    .com. Links can have trailing punctuation (periods, commas, close-parens)
    and leading punctuation (opening parens) and it'll still do the right
    thing.

    If trim_url_limit is not None, the URLs in link text longer than this limit
    will truncated to trim_url_limit-3 characters and appended with an elipsis.

    If nofollow is True, the URLs in link text will get a rel="nofollow"
    attribute.

    If autoescape is True, the link text and URLs will get autoescaped.
    """
    trim_url = lambda x, limit=trim_url_limit: limit is not None and (len(x) > limit and ('%s...' % x[:max(0, limit - 3)])) or x
    safe_input = isinstance(text, SafeData)
    words = word_split_re.split(force_unicode(text))
    nofollow_attr = nofollow and ' rel="nofollow"' or ''
    for i, word in enumerate(words):
        match = None
        if '.' in word or '@' in word or ':' in word:
            match = punctuation_re.match(word)
        if match:
            lead, middle, trail = match.groups()
            # Make URL we want to point to.
            url = None
            if middle.startswith('http://') or middle.startswith('https://'):
                url = urlquote(middle, safe='/&=:;#?+*')
            elif middle.startswith('www.') or ('@' not in middle and \
                    middle and middle[0] in string.ascii_letters + string.digits and \
                    (middle.endswith('.org') or middle.endswith('.net') or middle.endswith('.com'))):
                url = urlquote('http://%s' % middle, safe='/&=:;#?+*')
            elif '@' in middle and not ':' in middle and simple_email_re.match(middle):
                url = 'mailto:%s' % middle
                nofollow_attr = ''
            # Make link.
            if url:
                trimmed = trim_url(middle)
                if autoescape and not safe_input:
                    lead, trail = escape(lead), escape(trail)
                    url, trimmed = escape(url), escape(trimmed)
                middle = '<a href="%s"%s>%s</a>' % (url, nofollow_attr, trimmed)
                words[i] = mark_safe('%s%s%s' % (lead, middle, trail))
            else:
                if safe_input:
                    words[i] = mark_safe(word)
                elif autoescape:
                    words[i] = escape(word)
        elif safe_input:
            words[i] = mark_safe(word)
        elif autoescape:
            words[i] = escape(word)
    return u''.join(words)
Exemple #7
0
def get_text_list(list_, last_word=ugettext_lazy(unicode('or'))):
    """
    >>> get_text_list(['a', 'b', 'c', 'd'])
    u'a, b, c or d'
    >>> get_text_list(['a', 'b', 'c'], 'and')
    u'a, b and c'
    >>> get_text_list(['a', 'b'], 'and')
    u'a and b'
    >>> get_text_list(['a'])
    u'a'
    >>> get_text_list([])
    u''
    """
    if len(list_) == 0: return unicode('')
    if len(list_) == 1: return force_unicode(list_[0])
    return unicode('%s %s %s') % (
        # Translators: This string is used as a separator between list elements
        _(', ').join([force_unicode(i) for i in list_][:-1]),
        force_unicode(last_word), force_unicode(list_[-1]))
Exemple #8
0
def get_valid_filename(s):
    """
    Returns the given string converted to a string that can be used for a clean
    filename. Specifically, leading and trailing spaces are removed; other
    spaces are converted to underscores; and anything that is not a unicode
    alphanumeric, dash, underscore, or dot, is removed.
    >>> get_valid_filename("john's portrait in 2004.jpg")
    unicode('johns_portrait_in_2004.jpg')
    """
    s = force_unicode(s).strip().replace(' ', '_')
    return re.sub(r'(?u)[^-\w.]', '', s)
Exemple #9
0
def safe_join(base, *paths):
    """
    Joins one or more path components to the base path component intelligently.
    Returns a normalized, absolute version of the final path.

    The final path must be located inside of the base path component (otherwise
    a ValueError is raised).
    """
    base = force_unicode(base)
    paths = [force_unicode(p) for p in paths]
    final_path = abspathu(join(base, *paths))
    base_path = abspathu(base)
    base_path_len = len(base_path)
    # Ensure final_path starts with base_path (using normcase to ensure we
    # don't false-negative on case insensitive operating systems like Windows)
    # and that the next character after the final path is os.sep (or nothing,
    # in which case final_path must be equal to base_path).
    if not normcase(final_path).startswith(normcase(base_path)) \
       or final_path[base_path_len:base_path_len+1] not in ('', sep):
        raise ValueError('The joined path (%s) is located outside of the base '
                         'path component (%s)' % (final_path, base_path))
    return final_path
 def handle_file_complete(self, old_field_name, counters):
     """
     Handle all the signalling that takes place when a file is complete.
     """
     for i, handler in enumerate(self._upload_handlers):
         file_obj = handler.file_complete(counters[i])
         if file_obj:
             # If it returns a file object, then set the files dict.
             self._files.appendlist(force_unicode(old_field_name,
                                                  self._encoding,
                                                  errors='replace'),
                                    file_obj)
             break
    def save(self, name, content):
        """
        Saves new content to the file specified by name. The content should be a
        proper File object, ready to be read from the beginning.
        """
        # Get the proper name for the file, as it will actually be saved.
        if name is None:
            name = content.name

        name = self.get_available_name(name)
        name = self._save(name, content)

        # Store filenames with forward slashes, even on Windows
        return force_unicode(name.replace('\\', '/'))
Exemple #12
0
 def add_truncation_text(self, text, truncate=None):
     if truncate is None:
         truncate = pgettext(
             'String to return when truncating text',
             unicode('%(truncated_text)s...'))
     truncate = force_unicode(truncate)
     if '%(truncated_text)s' in truncate:
         return truncate % {'truncated_text': text}
     # The truncation text didn't contain the %(truncated_text)s string
     # replacement argument so just append it to the text.
     if text.endswith(truncate):
         # But don't append the truncation text if the current text already
         # ends in this.
         return text
     return '%s%s' % (text, truncate)
def sanitize_address(addr, encoding):
    if isinstance(addr, basestring):
        addr = parseaddr(force_unicode(addr))
    nm, addr = addr
    nm = str(Header(nm, encoding))
    try:
        addr = addr.encode('ascii')
    except UnicodeEncodeError:  # IDN
        if u'@' in addr:
            localpart, domain = addr.split(u'@', 1)
            localpart = str(Header(localpart, encoding))
            domain = domain.encode('idna')
            addr = '@'.join([localpart, domain])
        else:
            addr = str(Header(addr, encoding))
    return formataddr((nm, addr))
Exemple #14
0
def smart_split(text):
    r"""
    Generator that splits a string by spaces, leaving quoted phrases together.
    Supports both single and double quotes, and supports escaping quotes with
    backslashes. In the output, strings will keep their initial and trailing
    quote marks and escaped quotes will remain escaped (the results can then
    be further processed with unescape_string_literal()).

    >>> list(smart_split(r'This is "a person\'s" test.'))
    [u'This', u'is', u'"a person\\\'s"', u'test.']
    >>> list(smart_split(r"Another 'person\'s' test."))
    [u'Another', u"'person\\'s'", u'test.']
    >>> list(smart_split(r'A "\"funky\" style" test.'))
    [u'A', u'"\\"funky\\" style"', u'test.']
    """
    text = force_unicode(text)
    for bit in smart_split_re.finditer(text):
        yield bit.group(0)
def forbid_multi_line_headers(name, val, encoding):
    """Forbids multi-line headers, to prevent header injection."""
    encoding = encoding or settings.DEFAULT_CHARSET
    val = force_unicode(val)
    if '\n' in val or '\r' in val:
        raise BadHeaderError("Header values can't contain newlines (got %r for header %r)" % (val, name))
    try:
        val = val.encode('ascii')
    except UnicodeEncodeError:
        if name.lower() in ADDRESS_HEADERS:
            val = ', '.join(sanitize_address(addr, encoding)
                for addr in getaddresses((val,)))
        else:
            val = str(Header(val, encoding))
    else:
        if name.lower() == 'subject':
            val = Header(val)
    return name, val
    def render(self, name, value, attrs=None):
        substitutions = {
            'initial_text': self.initial_text,
            'input_text': self.input_text,
            'clear_template': '',
            'clear_checkbox_label': self.clear_checkbox_label,
        }
        template = u'%(input)s'
        substitutions['input'] = super(ClearableFileInput, self).render(name, value, attrs)

        if value and hasattr(value, "url"):
            template = self.template_with_initial
            substitutions['initial'] = (u'<a href="%s" rel="nohistory">%s</a>'
                                        % (escape(value.url), escape(force_unicode(value))))
            if not self.is_required:
                checkbox_name = self.clear_checkbox_name(name)
                checkbox_id = self.clear_checkbox_id(checkbox_name)
                substitutions['clear_checkbox_name'] = conditional_escape(checkbox_name)
                substitutions['clear_checkbox_id'] = conditional_escape(checkbox_id)
                substitutions['clear'] = CheckboxInput().render(checkbox_name, False, attrs={'id': checkbox_id})
                substitutions['clear_template'] = self.template_with_clear % substitutions

        return mark_safe(template % substitutions)
Exemple #17
0
def fix_ampersands(value):
    """Returns the given HTML with all unencoded ampersands encoded correctly."""
    return unencoded_ampersands_re.sub('&amp;', force_unicode(value))
Exemple #18
0
 def get_directory_name(self):
     return os.path.normpath(force_unicode(datetime.datetime.now().strftime(smart_str(self.upload_to))))
Exemple #19
0
def escape(html):
    """
    Returns the given HTML with ampersands, quotes and angle brackets encoded.
    """
    return mark_safe(force_unicode(html).replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;').replace('"', '&quot;').replace("'", '&#39;'))
Exemple #20
0
def escapejs(value):
    """Hex encodes characters for use in JavaScript strings."""
    for bad, good in _js_escapes:
        value = mark_safe(force_unicode(value).replace(bad, good))
    return value
Exemple #21
0
def strip_tags(value):
    """Returns the given HTML with all tags stripped."""
    return re.sub(r'<[^>]*?>', '', force_unicode(value))
    def parse(self):
        """
        Parse the POST data and break it into a FILES MultiValueDict and a POST
        MultiValueDict.

        Returns a tuple containing the POST and FILES dictionary, respectively.
        """
        # We have to import QueryDict down here to avoid a circular import.
        from london.http import QueryDict

        encoding = self._encoding
        handlers = self._upload_handlers

        # HTTP spec says that Content-Length >= 0 is valid
        # handling content-length == 0 before continuing
        if self._content_length == 0:
            return QueryDict(MultiValueDict(), encoding=self._encoding), MultiValueDict()

        # See if the handler will want to take care of the parsing.
        # This allows overriding everything if somebody wants it.
        for handler in handlers:
            result = handler.handle_raw_input(self._input_data,
                                              self._meta,
                                              self._content_length,
                                              self._boundary,
                                              encoding)
            if result is not None:
                return result[0], result[1]

        # Create the data structures to be used later.
        self._post = QueryDict('', mutable=True)
        self._files = MultiValueDict()

        # Instantiate the parser and stream:
        stream = LazyStream(ChunkIter(self._input_data, self._chunk_size))

        # Whether or not to signal a file-completion at the beginning of the loop.
        old_field_name = None
        counters = [0] * len(handlers)

        try:
            for item_type, meta_data, field_stream in Parser(stream, self._boundary):
                if old_field_name:
                    # We run this at the beginning of the next loop
                    # since we cannot be sure a file is complete until
                    # we hit the next boundary/part of the multipart content.
                    self.handle_file_complete(old_field_name, counters)
                    old_field_name = None

                try:
                    disposition = meta_data['content-disposition'][1]
                    field_name = disposition['name'].strip()
                except (KeyError, IndexError, AttributeError):
                    continue

                transfer_encoding = meta_data.get('content-transfer-encoding')
                if transfer_encoding is not None:
                    transfer_encoding = transfer_encoding[0].strip()
                field_name = force_unicode(field_name, encoding, errors='replace')

                if item_type == FIELD:
                    # This is a post field, we can just set it in the post
                    if transfer_encoding == 'base64':
                        raw_data = field_stream.read()
                        try:
                            data = str(raw_data).decode('base64')
                        except:
                            data = raw_data
                    else:
                        data = field_stream.read()

                    self._post.appendlist(field_name,
                                          force_unicode(data, encoding, errors='replace'))
                elif item_type == FILE:
                    # This is a file, use the handler...
                    file_name = disposition.get('filename')
                    if not file_name:
                        continue
                    file_name = force_unicode(file_name, encoding, errors='replace')
                    file_name = self.IE_sanitize(unescape_entities(file_name))

                    content_type = meta_data.get('content-type', ('',))[0].strip()
                    try:
                        charset = meta_data.get('content-type', (0,{}))[1].get('charset', None)
                    except:
                        charset = None

                    try:
                        content_length = int(meta_data.get('content-length')[0])
                    except (IndexError, TypeError, ValueError):
                        content_length = None

                    counters = [0] * len(handlers)
                    try:
                        for handler in handlers:
                            try:
                                handler.new_file(field_name, file_name,
                                                 content_type, content_length,
                                                 charset)
                            except StopFutureHandlers:
                                break

                        for chunk in field_stream:
                            if transfer_encoding == 'base64':
                                # We only special-case base64 transfer encoding
                                try:
                                    chunk = str(chunk).decode('base64')
                                except BaseException as e:
                                    # Since this is only a chunk, any error is an unfixable error.
                                    raise MultiPartParserError("Could not decode base64 data: %r" % e)

                            for i, handler in enumerate(handlers):
                                chunk_length = len(chunk)
                                chunk = handler.receive_data_chunk(chunk, counters[i])
                                counters[i] += chunk_length
                                if chunk is None:
                                    # If the chunk received by the handler is None, then don't continue.
                                    break

                    except SkipFile as e:
                        # Just use up the rest of this file...
                        exhaust(field_stream)
                    else:
                        # Handle file upload completions on next iteration.
                        old_field_name = field_name
                else:
                    # If this is neither a FIELD or a FILE, just exhaust the stream.
                    exhaust(stream)
        except StopUpload as e:
            if not e.connection_reset:
                exhaust(self._input_data)
        else:
            # Make sure that the request data is all fed
            exhaust(self._input_data)

        # Signal that the upload has completed.
        for handler in handlers:
            retval = handler.upload_complete()
            if retval:
                break

        return self._post, self._files
Exemple #23
0
def strip_spaces_between_tags(value):
    """Returns the given HTML with spaces between tags removed."""
    return re.sub(r'>\s+<', '><', force_unicode(value))
Exemple #24
0
def strip_entities(value):
    """Returns the given HTML with all entities (&something;) stripped."""
    return re.sub(r'&(?:\w+|#\d+);', '', force_unicode(value))
Exemple #25
0
    try:
        from cStringIO import StringIO
    except ImportError:
        from StringIO import StringIO

from london.utils.encoding import force_unicode
from london.utils.functional import allow_lazy, SimpleLazyObject
from london.utils.translation import ugettext_lazy, ugettext as _, pgettext

try:
    unicode
except NameError:
    unicode = str

# Capitalizes the first letter of a string.
capfirst = lambda x: x and force_unicode(x)[0].upper() + force_unicode(x)[1:]
capfirst = allow_lazy(capfirst, unicode)

# Set up regular expressions
re_words = re.compile(r'&.*?;|<.*?>|(\w[\w-]*)', re.U)
re_tag = re.compile(r'<(/)?([^ ]+?)(?: (/)| .*?)?>')


def wrap(text, width):
    """
    A word-wrap function that preserves existing line breaks and most spaces in
    the text. Expects that existing line breaks are posix newlines.
    """
    text = force_unicode(text)
    def _generator():
        it = iter(text.split(' '))
Exemple #26
0
 def __init__(self, text):
     super(Truncator, self).__init__(lambda: force_unicode(text))
Exemple #27
0
def normalize_newlines(text):
    return force_unicode(re.sub(r'\r\n|\r|\n', '\n', text))
Exemple #28
0
def recapitalize(text):
    "Recapitalizes text, placing caps after end-of-sentence punctuation."
    text = force_unicode(text).lower()
    capsRE = re.compile(r'(?:^|(?<=[\.\?\!] ))([a-z])')
    text = capsRE.sub(lambda x: x.group(1).upper(), text)
    return text