__Y = r'(?P<year>\d{4})' __Y2 = r'(?P<year>\d{2})' __T = r'(?P<hour>\d{2}):(?P<min>\d{2}):(?P<sec>\d{2})' RFC1123_DATE = re.compile(r'^\w{3}, %s %s %s %s GMT$' % (__D, __M, __Y, __T)) RFC850_DATE = re.compile(r'^\w{6,9}, %s-%s-%s %s GMT$' % (__D, __M, __Y2, __T)) ASCTIME_DATE = re.compile(r'^\w{3} %s %s %s %s$' % (__M, __D2, __T, __Y)) def urlquote(url, safe='/'): """ A version of Python's urllib.quote() function that can operate on unicode strings. The url is first UTF-8 encoded before quoting. The returned string can safely be used as part of an argument to a subsequent iri_to_uri() call without double-quoting occurring. """ return force_text(urllib_parse.quote(force_str(url), force_str(safe))) urlquote = allow_lazy(urlquote, six.text_type) def urlquote_plus(url, safe=''): """ A version of Python's urllib.quote_plus() function that can operate on unicode strings. The url is first UTF-8 encoded before quoting. The returned string can safely be used as part of an argument to a subsequent iri_to_uri() call without double-quoting occurring. """ return force_text(urllib_parse.quote_plus(force_str(url), force_str(safe))) urlquote_plus = allow_lazy(urlquote_plus, six.text_type) def urlunquote(quoted_url): """ A wrapper for Python's urllib.unquote() function that can operate on the result of djangocg.utils.http.urlquote().
word_split_re = re.compile(r'(\s+)') simple_url_re = re.compile(r'^https?://\w', re.IGNORECASE) simple_url_2_re = re.compile(r'^www\.|^(?!http)\w[^@]+\.(com|edu|gov|int|mil|net|org)$', re.IGNORECASE) simple_email_re = re.compile(r'^\S+@\S+\.\S+$') link_target_attribute_re = re.compile(r'(<a [^>]*?)target=[^\s>]+') html_gunk_re = re.compile(r'(?:<br clear="all">|<i><\/i>|<b><\/b>|<em><\/em>|<strong><\/strong>|<\/?smallcaps>|<\/?uppercase>)', re.IGNORECASE) hard_coded_bullets_re = re.compile(r'((?:<p>(?:%s).*?[a-zA-Z].*?</p>\s*)+)' % '|'.join([re.escape(x) for x in DOTS]), re.DOTALL) trailing_empty_content_re = re.compile(r'(?:<p>(?: |\s|<br \/>)*?</p>\s*)+\Z') def escape(text): """ Returns the given text with ampersands, quotes and angle brackets encoded for use in HTML. """ return mark_safe(force_text(text).replace('&', '&').replace('<', '<').replace('>', '>').replace('"', '"').replace("'", ''')) escape = allow_lazy(escape, six.text_type) _base_js_escapes = ( ('\\', '\\u005C'), ('\'', '\\u0027'), ('"', '\\u0022'), ('>', '\\u003E'), ('<', '\\u003C'), ('&', '\\u0026'), ('=', '\\u003D'), ('-', '\\u002D'), (';', '\\u003B'), ('\u2028', '\\u2028'), ('\u2029', '\\u2029') )
from djangocg.utils.encoding import force_text from djangocg.utils.functional import allow_lazy, SimpleLazyObject from djangocg.utils import six from djangocg.utils.six.moves import html_entities from djangocg.utils.translation import ugettext_lazy, ugettext as _, pgettext from djangocg.utils.safestring import mark_safe if not six.PY3: # Import force_unicode even though this module doesn't use it, because some # people rely on it being here. from djangocg.utils.encoding import force_unicode # Capitalizes the first letter of a string. capfirst = lambda x: x and force_text(x)[0].upper() + force_text(x)[1:] capfirst = allow_lazy(capfirst, six.text_type) # Set up regular expressions re_words = re.compile(r'&.*?;|<.*?>|(\w[\w-]*)', re.U|re.S) re_tag = re.compile(r'<(/)?([^ ]+?)(?: (/)| .*?)?>', re.S) def wrap(text, width): """ A word-wrap function that preserves existing line breaks and most spaces in the text. Expects that existing line breaks are posix newlines. """ text = force_text(text) def _generator(): it = iter(text.split(' ')) word = next(it)