Example #1
0
    def test_json_decode(self):
        # json_decode accepts both bytes and unicode, but strings it returns
        # are always unicode.
        self.assertEqual(json_decode(b'"foo"'), u("foo"))
        self.assertEqual(json_decode(u('"foo"')), u("foo"))

        # Non-ascii bytes are interpreted as utf8
        self.assertEqual(json_decode(utf8(u('"\u00e9"'))), u("\u00e9"))
Example #2
0
 def test_json_encode(self):
     # json deals with strings, not bytes.  On python 2 byte strings will
     # convert automatically if they are utf8; on python 3 byte strings
     # are not allowed.
     self.assertEqual(json_decode(json_encode(u("\u00e9"))), u("\u00e9"))
     if bytes_type is str:
         self.assertEqual(json_decode(json_encode(utf8(u("\u00e9")))), u("\u00e9"))
         self.assertRaises(UnicodeDecodeError, json_encode, b"\xe9")
Example #3
0
    def test_url_escape_unicode(self):
        tests = [
            # byte strings are passed through as-is
            (u('\u00e9').encode('utf8'), '%C3%A9'),
            (u('\u00e9').encode('latin1'), '%E9'),

            # unicode strings become utf8
            (u('\u00e9'), '%C3%A9'),
        ]
        for unescaped, escaped in tests:
            self.assertEqual(url_escape(unescaped), escaped)
Example #4
0
 def test_utf8_logging(self):
     self.logger.error(u("\u00e9").encode("utf8"))
     if issubclass(bytes_type, basestring_type):
         # on python 2, utf8 byte strings (and by extension ascii byte
         # strings) are passed through as-is.
         self.assertEqual(self.get_output(), utf8(u("\u00e9")))
     else:
         # on python 3, byte strings always get repr'd even if
         # they're ascii-only, so this degenerates into another
         # copy of test_bytes_logging.
         self.assertEqual(self.get_output(), utf8(repr(utf8(u("\u00e9")))))
 def test_unicode_literal_expression(self):
     # Unicode literals should be usable in templates.  Note that this
     # test simulates unicode characters appearing directly in the
     # template file (with utf8 encoding), i.e. \u escapes would not
     # be used in the template file itself.
     if str is unicode_type:
         # python 3 needs a different version of this test since
         # 2to3 doesn't run on template internals
         template = Template(utf8(u('{{ "\u00e9" }}')))
     else:
         template = Template(utf8(u('{{ u"\u00e9" }}')))
     self.assertEqual(template.generate(), utf8(u("\u00e9")))
Example #6
0
 def test_url_unescape_unicode(self):
     tests = [
         ('%C3%A9', u('\u00e9'), 'utf8'),
         ('%C3%A9', u('\u00c3\u00a9'), 'latin1'),
         ('%C3%A9', utf8(u('\u00e9')), None),
     ]
     for escaped, unescaped, encoding in tests:
         # input strings to url_unescape should only contain ascii
         # characters, but make sure the function accepts both byte
         # and unicode strings.
         self.assertEqual(url_unescape(to_unicode(escaped), encoding), unescaped)
         self.assertEqual(url_unescape(utf8(escaped), encoding), unescaped)
Example #7
0
    def test_xhtml_escape(self):
        tests = [
            ("<foo>", "&lt;foo&gt;"),
            (u("<foo>"), u("&lt;foo&gt;")),
            (b"<foo>", b"&lt;foo&gt;"),

            ("<>&\"'", "&lt;&gt;&amp;&quot;&#39;"),
            ("&amp;", "&amp;amp;"),

            (u("<\u00e9>"), u("&lt;\u00e9&gt;")),
            (b"<\xc3\xa9>", b"&lt;\xc3\xa9&gt;"),
        ]
        for unescaped, escaped in tests:
            self.assertEqual(utf8(xhtml_escape(unescaped)), utf8(escaped))
            self.assertEqual(utf8(unescaped), utf8(xhtml_unescape(escaped)))
Example #8
0
 def test_gettext(self):
     webalchemy.tornado.locale.load_gettext_translations(
         os.path.join(os.path.dirname(__file__), 'gettext_translations'),
         "tornado_test")
     locale = webalchemy.tornado.locale.get("fr_FR")
     self.assertTrue(isinstance(locale, webalchemy.tornado.locale.GettextLocale))
     self.assertEqual(locale.translate("school"), u("\u00e9cole"))
Example #9
0
    def list(self, parts):
        """Returns a comma-separated list for the given list of parts.

        The format is, e.g., "A, B and C", "A and B" or just "A" for lists
        of size 1.
        """
        _ = self.translate
        if len(parts) == 0:
            return ""
        if len(parts) == 1:
            return parts[0]
        comma = u(' \u0648 ') if self.code.startswith("fa") else u(", ")
        return _("%(commas)s and %(last)s") % {
            "commas": comma.join(parts[:-1]),
            "last": parts[len(parts) - 1],
        }
Example #10
0
 def setUp(self):
     self.formatter = LogFormatter(color=False)
     # Fake color support.  We can't guarantee anything about the $TERM
     # variable when the tests are run, so just patch in some values
     # for testing.  (testing with color off fails to expose some potential
     # encoding issues from the control characters)
     self.formatter._colors = {
         logging.ERROR: u("\u0001"),
     }
     self.formatter._normal = u("\u0002")
     # construct a Logger directly to bypass getLogger's caching
     self.logger = logging.Logger('LogFormatterTest')
     self.logger.propagate = False
     self.tempdir = tempfile.mkdtemp()
     self.filename = os.path.join(self.tempdir, 'log.out')
     self.handler = self.make_handler(self.filename)
     self.handler.setFormatter(self.formatter)
     self.logger.addHandler(self.handler)
Example #11
0
    def make_link(m):
        url = m.group(1)
        proto = m.group(2)
        if require_protocol and not proto:
            return url  # not protocol, no linkify

        if proto and proto not in permitted_protocols:
            return url  # bad protocol, no linkify

        href = m.group(1)
        if not proto:
            href = "http://" + href   # no proto specified, use http

        if callable(extra_params):
            params = " " + extra_params(href).strip()
        else:
            params = extra_params

        # clip long urls. max_len is just an approximation
        max_len = 30
        if shorten and len(url) > max_len:
            before_clip = url
            if proto:
                proto_len = len(proto) + 1 + len(m.group(3) or "")  # +1 for :
            else:
                proto_len = 0

            parts = url[proto_len:].split("/")
            if len(parts) > 1:
                # Grab the whole host part plus the first bit of the path
                # The path is usually not that interesting once shortened
                # (no more slug, etc), so it really just provides a little
                # extra indication of shortening.
                url = url[:proto_len] + parts[0] + "/" + \
                    parts[1][:8].split('?')[0].split('.')[0]

            if len(url) > max_len * 1.5:  # still too long
                url = url[:max_len]

            if url != before_clip:
                amp = url.rfind('&')
                # avoid splitting html char entities
                if amp > max_len - 5:
                    url = url[:amp]
                url += "..."

                if len(url) >= len(before_clip):
                    url = before_clip
                else:
                    # full url is visible on mouse-over (for those who don't
                    # have a status bar, such as Safari by default)
                    params += ' title="%s"' % href

        return u('<a href="%s"%s>%s</a>') % (href, params, url)
    def test_body_encoding(self):
        unicode_body = u("\xe9")
        byte_body = binascii.a2b_hex(b"e9")

        # unicode string in body gets converted to utf8
        response = self.fetch("/echopost", method="POST", body=unicode_body,
                              headers={"Content-Type": "application/blah"})
        self.assertEqual(response.headers["Content-Length"], "2")
        self.assertEqual(response.body, utf8(unicode_body))

        # byte strings pass through directly
        response = self.fetch("/echopost", method="POST",
                              body=byte_body,
                              headers={"Content-Type": "application/blah"})
        self.assertEqual(response.headers["Content-Length"], "1")
        self.assertEqual(response.body, byte_body)

        # Mixing unicode in headers and byte string bodies shouldn't
        # break anything
        response = self.fetch("/echopost", method="POST", body=byte_body,
                              headers={"Content-Type": "application/blah"},
                              user_agent=u("foo"))
        self.assertEqual(response.headers["Content-Length"], "1")
        self.assertEqual(response.body, byte_body)
Example #13
0
    def __init__(self, code, translations):
        self.code = code
        self.name = LOCALE_NAMES.get(code, {}).get("name", u("Unknown"))
        self.rtl = False
        for prefix in ["fa", "ar", "he"]:
            if self.code.startswith(prefix):
                self.rtl = True
                break
        self.translations = translations

        # Initialize strings for date formatting
        _ = self.translate
        self._months = [
            _("January"), _("February"), _("March"), _("April"),
            _("May"), _("June"), _("July"), _("August"),
            _("September"), _("October"), _("November"), _("December")]
        self._weekdays = [
            _("Monday"), _("Tuesday"), _("Wednesday"), _("Thursday"),
            _("Friday"), _("Saturday"), _("Sunday")]
 def test_multipart_form(self):
     # Encodings here are tricky:  Headers are latin1, bodies can be
     # anything (we use utf8 by default).
     response = self.raw_fetch([
         b"POST /multipart HTTP/1.0",
         b"Content-Type: multipart/form-data; boundary=1234567890",
         b"X-Header-encoding-test: \xe9",
     ],
         b"\r\n".join([
             b"Content-Disposition: form-data; name=argument",
             b"",
             u("\u00e1").encode("utf-8"),
             b"--1234567890",
             u('Content-Disposition: form-data; name="files"; filename="\u00f3"').encode("utf8"),
             b"",
             u("\u00fa").encode("utf-8"),
             b"--1234567890--",
             b"",
         ]))
     data = json_decode(response.body)
     self.assertEqual(u("\u00e9"), data["header"])
     self.assertEqual(u("\u00e1"), data["argument"])
     self.assertEqual(u("\u00f3"), data["filename"])
     self.assertEqual(u("\u00fa"), data["filebody"])
Example #15
0
 def test_escape_return_types(self):
     # On python2 the escape methods should generally return the same
     # type as their argument
     self.assertEqual(type(xhtml_escape("foo")), str)
     self.assertEqual(type(xhtml_escape(u("foo"))), unicode_type)
Example #16
0
 def test_twitter_get_user(self):
     response = self.fetch(
         '/twitter/client/login?oauth_token=zxcv',
         headers={'Cookie': '_oauth_request_token=enhjdg==|MTIzNA=='})
     response.rethrow()
     parsed = json_decode(response.body)
     self.assertEqual(parsed,
                      {u('access_token'): {u('key'): u('hjkl'),
                                           u('screen_name'): u('foo'),
                                           u('secret'): u('vbnm')},
                       u('name'): u('Foo'),
                       u('screen_name'): u('foo'),
                       u('username'): u('foo')})
Example #17
0
 def test_utf8_in_file(self):
     tmpl = self.loader.load("utf8.html")
     result = tmpl.generate()
     self.assertEqual(to_unicode(result).strip(), u("H\u00e9llo"))
Example #18
0
 def test_unicode_template(self):
     template = Template(utf8(u("\u00e9")))
     self.assertEqual(template.generate(), utf8(u("\u00e9")))
Example #19
0
#!/usr/bin/env python


from __future__ import absolute_import, division, print_function, with_statement
import webalchemy.tornado.escape

from webalchemy.tornado.escape import utf8, xhtml_escape, xhtml_unescape, url_escape, url_unescape, to_unicode, json_decode, json_encode
from webalchemy.tornado.util import u, unicode_type, bytes_type
from webalchemy.tornado.test.util import unittest

linkify_tests = [
    # (input, linkify_kwargs, expected_output)

    ("hello http://world.com/!", {},
     u('hello <a href="http://world.com/">http://world.com/</a>!')),

    ("hello http://world.com/with?param=true&stuff=yes", {},
     u('hello <a href="http://world.com/with?param=true&amp;stuff=yes">http://world.com/with?param=true&amp;stuff=yes</a>')),

    # an opened paren followed by many chars killed Gruber's regex
    ("http://url.com/w(aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", {},
     u('<a href="http://url.com/w">http://url.com/w</a>(aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa')),

    # as did too many dots at the end
    ("http://url.com/withmany.......................................", {},
     u('<a href="http://url.com/withmany">http://url.com/withmany</a>.......................................')),

    ("http://url.com/withmany((((((((((((((((((((((((((((((((((a)", {},
     u('<a href="http://url.com/withmany">http://url.com/withmany</a>((((((((((((((((((((((((((((((((((a)')),

    # some examples from http://daringfireball.net/2009/11/liberal_regex_for_matching_urls
Example #20
0
 def test_unicode_apply(self):
     def upper(s):
         return to_unicode(s).upper()
     template = Template(utf8(u("{% apply upper %}foo \u00e9{% end %}")))
     self.assertEqual(template.generate(upper=upper), utf8(u("FOO \u00c9")))
 def test_empty_query_string(self):
     response = self.fetch("/echo?foo=&foo=")
     data = json_decode(response.body)
     self.assertEqual(data, {u("foo"): [u(""), u("")]})
Example #22
0
 def test_non_ascii_name(self):
     name = webalchemy.tornado.locale.LOCALE_NAMES['es_LA']['name']
     self.assertTrue(isinstance(name, unicode_type))
     self.assertEqual(name, u('Espa\u00f1ol'))
     self.assertEqual(utf8(name), b'Espa\xc3\xb1ol')
from __future__ import absolute_import, division, print_function, with_statement
from webalchemy.tornado.ioloop import IOLoop
from webalchemy.tornado.netutil import ThreadedResolver
from webalchemy.tornado.util import u

# When this module is imported, it runs getaddrinfo on a thread. Since
# the hostname is unicode, getaddrinfo attempts to import encodings.idna
# but blocks on the import lock. Verify that ThreadedResolver avoids
# this deadlock.

resolver = ThreadedResolver()
IOLoop.current().run_sync(lambda: resolver.resolve(u('localhost'), 80))
Example #24
0
 def test_csv(self):
     webalchemy.tornado.locale.load_translations(
         os.path.join(os.path.dirname(__file__), 'csv_translations'))
     locale = webalchemy.tornado.locale.get("fr_FR")
     self.assertTrue(isinstance(locale, webalchemy.tornado.locale.CSVLocale))
     self.assertEqual(locale.translate("school"), u("\u00e9cole"))
Example #25
0
 def test_unicode_logging(self):
     self.logger.error(u("\u00e9"))
     self.assertEqual(self.get_output(), utf8(u("\u00e9")))
 def test_empty_post_parameters(self):
     response = self.fetch("/echo", method="POST", body="foo=&bar=")
     data = json_decode(response.body)
     self.assertEqual(data, {u("foo"): [u("")], u("bar"): [u("")]})
Example #27
0
    def format_date(self, date, gmt_offset=0, relative=True, shorter=False,
                    full_format=False):
        """Formats the given date (which should be GMT).

        By default, we return a relative time (e.g., "2 minutes ago"). You
        can return an absolute date string with ``relative=False``.

        You can force a full format date ("July 10, 1980") with
        ``full_format=True``.

        This method is primarily intended for dates in the past.
        For dates in the future, we fall back to full format.
        """
        if isinstance(date, numbers.Real):
            date = datetime.datetime.utcfromtimestamp(date)
        now = datetime.datetime.utcnow()
        if date > now:
            if relative and (date - now).seconds < 60:
                # Due to click skew, things are some things slightly
                # in the future. Round timestamps in the immediate
                # future down to now in relative mode.
                date = now
            else:
                # Otherwise, future dates always use the full format.
                full_format = True
        local_date = date - datetime.timedelta(minutes=gmt_offset)
        local_now = now - datetime.timedelta(minutes=gmt_offset)
        local_yesterday = local_now - datetime.timedelta(hours=24)
        difference = now - date
        seconds = difference.seconds
        days = difference.days

        _ = self.translate
        format = None
        if not full_format:
            if relative and days == 0:
                if seconds < 50:
                    return _("1 second ago", "%(seconds)d seconds ago",
                             seconds) % {"seconds": seconds}

                if seconds < 50 * 60:
                    minutes = round(seconds / 60.0)
                    return _("1 minute ago", "%(minutes)d minutes ago",
                             minutes) % {"minutes": minutes}

                hours = round(seconds / (60.0 * 60))
                return _("1 hour ago", "%(hours)d hours ago",
                         hours) % {"hours": hours}

            if days == 0:
                format = _("%(time)s")
            elif days == 1 and local_date.day == local_yesterday.day and \
                    relative:
                format = _("yesterday") if shorter else \
                    _("yesterday at %(time)s")
            elif days < 5:
                format = _("%(weekday)s") if shorter else \
                    _("%(weekday)s at %(time)s")
            elif days < 334:  # 11mo, since confusing for same month last year
                format = _("%(month_name)s %(day)s") if shorter else \
                    _("%(month_name)s %(day)s at %(time)s")

        if format is None:
            format = _("%(month_name)s %(day)s, %(year)s") if shorter else \
                _("%(month_name)s %(day)s, %(year)s at %(time)s")

        tfhour_clock = self.code not in ("en", "en_US", "zh_CN")
        if tfhour_clock:
            str_time = "%d:%02d" % (local_date.hour, local_date.minute)
        elif self.code == "zh_CN":
            str_time = "%s%d:%02d" % (
                (u('\u4e0a\u5348'), u('\u4e0b\u5348'))[local_date.hour >= 12],
                local_date.hour % 12 or 12, local_date.minute)
        else:
            str_time = "%d:%02d %s" % (
                local_date.hour % 12 or 12, local_date.minute,
                ("am", "pm")[local_date.hour >= 12])

        return format % {
            "month_name": self._months[local_date.month - 1],
            "weekday": self._weekdays[local_date.weekday()],
            "day": str(local_date.day),
            "year": str(local_date.year),
            "time": str_time
        }
Example #28
0
from webalchemy.tornado.util import u, Configurable

if hasattr(ssl, 'match_hostname') and hasattr(ssl, 'CertificateError'):  # python 3.2+
    ssl_match_hostname = ssl.match_hostname
    SSLCertificateError = ssl.CertificateError
else:
    import backports.ssl_match_hostname
    ssl_match_hostname = backports.ssl_match_hostname.match_hostname
    SSLCertificateError = backports.ssl_match_hostname.CertificateError

# ThreadedResolver runs getaddrinfo on a thread. If the hostname is unicode,
# getaddrinfo attempts to import encodings.idna. If this is done at
# module-import time, the import lock is already held by the main thread,
# leading to deadlock. Avoid it by caching the idna encoder on the main
# thread now.
u('foo').encode('idna')


def bind_sockets(port, address=None, family=socket.AF_UNSPEC, backlog=128, flags=None):
    """Creates listening sockets bound to the given port and address.

    Returns a list of socket objects (multiple sockets are returned if
    the given address maps to multiple IP addresses, which is most common
    for mixed IPv4 and IPv6 use).

    Address may be either an IP address or hostname.  If it's a hostname,
    the server will listen on all IP addresses associated with the
    name.  Address may be an empty string or None to listen on all
    available interfaces.  Family may be set to either `socket.AF_INET`
    or `socket.AF_INET6` to restrict to IPv4 or IPv6 addresses, otherwise
    both will be used if available.
Example #29
0
            # python 3
            self.ngettext = translations.ngettext
            self.gettext = translations.gettext
        # self.gettext must exist before __init__ is called, since it
        # calls into self.translate
        super(GettextLocale, self).__init__(code, translations)

    def translate(self, message, plural_message=None, count=None):
        if plural_message is not None:
            assert count is not None
            return self.ngettext(message, plural_message, count)
        else:
            return self.gettext(message)

LOCALE_NAMES = {
    "af_ZA": {"name_en": u("Afrikaans"), "name": u("Afrikaans")},
    "am_ET": {"name_en": u("Amharic"), "name": u('\u12a0\u121b\u122d\u129b')},
    "ar_AR": {"name_en": u("Arabic"), "name": u("\u0627\u0644\u0639\u0631\u0628\u064a\u0629")},
    "bg_BG": {"name_en": u("Bulgarian"), "name": u("\u0411\u044a\u043b\u0433\u0430\u0440\u0441\u043a\u0438")},
    "bn_IN": {"name_en": u("Bengali"), "name": u("\u09ac\u09be\u0982\u09b2\u09be")},
    "bs_BA": {"name_en": u("Bosnian"), "name": u("Bosanski")},
    "ca_ES": {"name_en": u("Catalan"), "name": u("Catal\xe0")},
    "cs_CZ": {"name_en": u("Czech"), "name": u("\u010ce\u0161tina")},
    "cy_GB": {"name_en": u("Welsh"), "name": u("Cymraeg")},
    "da_DK": {"name_en": u("Danish"), "name": u("Dansk")},
    "de_DE": {"name_en": u("German"), "name": u("Deutsch")},
    "el_GR": {"name_en": u("Greek"), "name": u("\u0395\u03bb\u03bb\u03b7\u03bd\u03b9\u03ba\u03ac")},
    "en_GB": {"name_en": u("English (UK)"), "name": u("English (UK)")},
    "en_US": {"name_en": u("English (US)"), "name": u("English (US)")},
    "es_ES": {"name_en": u("Spanish (Spain)"), "name": u("Espa\xf1ol (Espa\xf1a)")},
    "es_LA": {"name_en": u("Spanish"), "name": u("Espa\xf1ol")},
 def test_query_string_encoding(self):
     response = self.fetch("/echo?foo=%C3%A9")
     data = json_decode(response.body)
     self.assertEqual(data, {u("foo"): [u("\u00e9")]})