def test_json_decode(self): # json_decode accepts both bytes and unicode, but strings it returns # are always unicode. self.assertEqual(json_decode(b'"foo"'), u("foo")) self.assertEqual(json_decode(u('"foo"')), u("foo")) # Non-ascii bytes are interpreted as utf8 self.assertEqual(json_decode(utf8(u('"\u00e9"'))), u("\u00e9"))
def test_json_encode(self): # json deals with strings, not bytes. On python 2 byte strings will # convert automatically if they are utf8; on python 3 byte strings # are not allowed. self.assertEqual(json_decode(json_encode(u("\u00e9"))), u("\u00e9")) if bytes_type is str: self.assertEqual(json_decode(json_encode(utf8(u("\u00e9")))), u("\u00e9")) self.assertRaises(UnicodeDecodeError, json_encode, b"\xe9")
def test_url_escape_unicode(self): tests = [ # byte strings are passed through as-is (u('\u00e9').encode('utf8'), '%C3%A9'), (u('\u00e9').encode('latin1'), '%E9'), # unicode strings become utf8 (u('\u00e9'), '%C3%A9'), ] for unescaped, escaped in tests: self.assertEqual(url_escape(unescaped), escaped)
def test_utf8_logging(self): self.logger.error(u("\u00e9").encode("utf8")) if issubclass(bytes_type, basestring_type): # on python 2, utf8 byte strings (and by extension ascii byte # strings) are passed through as-is. self.assertEqual(self.get_output(), utf8(u("\u00e9"))) else: # on python 3, byte strings always get repr'd even if # they're ascii-only, so this degenerates into another # copy of test_bytes_logging. self.assertEqual(self.get_output(), utf8(repr(utf8(u("\u00e9")))))
def test_unicode_literal_expression(self): # Unicode literals should be usable in templates. Note that this # test simulates unicode characters appearing directly in the # template file (with utf8 encoding), i.e. \u escapes would not # be used in the template file itself. if str is unicode_type: # python 3 needs a different version of this test since # 2to3 doesn't run on template internals template = Template(utf8(u('{{ "\u00e9" }}'))) else: template = Template(utf8(u('{{ u"\u00e9" }}'))) self.assertEqual(template.generate(), utf8(u("\u00e9")))
def test_url_unescape_unicode(self): tests = [ ('%C3%A9', u('\u00e9'), 'utf8'), ('%C3%A9', u('\u00c3\u00a9'), 'latin1'), ('%C3%A9', utf8(u('\u00e9')), None), ] for escaped, unescaped, encoding in tests: # input strings to url_unescape should only contain ascii # characters, but make sure the function accepts both byte # and unicode strings. self.assertEqual(url_unescape(to_unicode(escaped), encoding), unescaped) self.assertEqual(url_unescape(utf8(escaped), encoding), unescaped)
def test_xhtml_escape(self): tests = [ ("<foo>", "<foo>"), (u("<foo>"), u("<foo>")), (b"<foo>", b"<foo>"), ("<>&\"'", "<>&"'"), ("&", "&amp;"), (u("<\u00e9>"), u("<\u00e9>")), (b"<\xc3\xa9>", b"<\xc3\xa9>"), ] for unescaped, escaped in tests: self.assertEqual(utf8(xhtml_escape(unescaped)), utf8(escaped)) self.assertEqual(utf8(unescaped), utf8(xhtml_unescape(escaped)))
def test_gettext(self): webalchemy.tornado.locale.load_gettext_translations( os.path.join(os.path.dirname(__file__), 'gettext_translations'), "tornado_test") locale = webalchemy.tornado.locale.get("fr_FR") self.assertTrue(isinstance(locale, webalchemy.tornado.locale.GettextLocale)) self.assertEqual(locale.translate("school"), u("\u00e9cole"))
def list(self, parts): """Returns a comma-separated list for the given list of parts. The format is, e.g., "A, B and C", "A and B" or just "A" for lists of size 1. """ _ = self.translate if len(parts) == 0: return "" if len(parts) == 1: return parts[0] comma = u(' \u0648 ') if self.code.startswith("fa") else u(", ") return _("%(commas)s and %(last)s") % { "commas": comma.join(parts[:-1]), "last": parts[len(parts) - 1], }
def setUp(self): self.formatter = LogFormatter(color=False) # Fake color support. We can't guarantee anything about the $TERM # variable when the tests are run, so just patch in some values # for testing. (testing with color off fails to expose some potential # encoding issues from the control characters) self.formatter._colors = { logging.ERROR: u("\u0001"), } self.formatter._normal = u("\u0002") # construct a Logger directly to bypass getLogger's caching self.logger = logging.Logger('LogFormatterTest') self.logger.propagate = False self.tempdir = tempfile.mkdtemp() self.filename = os.path.join(self.tempdir, 'log.out') self.handler = self.make_handler(self.filename) self.handler.setFormatter(self.formatter) self.logger.addHandler(self.handler)
def make_link(m): url = m.group(1) proto = m.group(2) if require_protocol and not proto: return url # not protocol, no linkify if proto and proto not in permitted_protocols: return url # bad protocol, no linkify href = m.group(1) if not proto: href = "http://" + href # no proto specified, use http if callable(extra_params): params = " " + extra_params(href).strip() else: params = extra_params # clip long urls. max_len is just an approximation max_len = 30 if shorten and len(url) > max_len: before_clip = url if proto: proto_len = len(proto) + 1 + len(m.group(3) or "") # +1 for : else: proto_len = 0 parts = url[proto_len:].split("/") if len(parts) > 1: # Grab the whole host part plus the first bit of the path # The path is usually not that interesting once shortened # (no more slug, etc), so it really just provides a little # extra indication of shortening. url = url[:proto_len] + parts[0] + "/" + \ parts[1][:8].split('?')[0].split('.')[0] if len(url) > max_len * 1.5: # still too long url = url[:max_len] if url != before_clip: amp = url.rfind('&') # avoid splitting html char entities if amp > max_len - 5: url = url[:amp] url += "..." if len(url) >= len(before_clip): url = before_clip else: # full url is visible on mouse-over (for those who don't # have a status bar, such as Safari by default) params += ' title="%s"' % href return u('<a href="%s"%s>%s</a>') % (href, params, url)
def test_body_encoding(self): unicode_body = u("\xe9") byte_body = binascii.a2b_hex(b"e9") # unicode string in body gets converted to utf8 response = self.fetch("/echopost", method="POST", body=unicode_body, headers={"Content-Type": "application/blah"}) self.assertEqual(response.headers["Content-Length"], "2") self.assertEqual(response.body, utf8(unicode_body)) # byte strings pass through directly response = self.fetch("/echopost", method="POST", body=byte_body, headers={"Content-Type": "application/blah"}) self.assertEqual(response.headers["Content-Length"], "1") self.assertEqual(response.body, byte_body) # Mixing unicode in headers and byte string bodies shouldn't # break anything response = self.fetch("/echopost", method="POST", body=byte_body, headers={"Content-Type": "application/blah"}, user_agent=u("foo")) self.assertEqual(response.headers["Content-Length"], "1") self.assertEqual(response.body, byte_body)
def __init__(self, code, translations): self.code = code self.name = LOCALE_NAMES.get(code, {}).get("name", u("Unknown")) self.rtl = False for prefix in ["fa", "ar", "he"]: if self.code.startswith(prefix): self.rtl = True break self.translations = translations # Initialize strings for date formatting _ = self.translate self._months = [ _("January"), _("February"), _("March"), _("April"), _("May"), _("June"), _("July"), _("August"), _("September"), _("October"), _("November"), _("December")] self._weekdays = [ _("Monday"), _("Tuesday"), _("Wednesday"), _("Thursday"), _("Friday"), _("Saturday"), _("Sunday")]
def test_multipart_form(self): # Encodings here are tricky: Headers are latin1, bodies can be # anything (we use utf8 by default). response = self.raw_fetch([ b"POST /multipart HTTP/1.0", b"Content-Type: multipart/form-data; boundary=1234567890", b"X-Header-encoding-test: \xe9", ], b"\r\n".join([ b"Content-Disposition: form-data; name=argument", b"", u("\u00e1").encode("utf-8"), b"--1234567890", u('Content-Disposition: form-data; name="files"; filename="\u00f3"').encode("utf8"), b"", u("\u00fa").encode("utf-8"), b"--1234567890--", b"", ])) data = json_decode(response.body) self.assertEqual(u("\u00e9"), data["header"]) self.assertEqual(u("\u00e1"), data["argument"]) self.assertEqual(u("\u00f3"), data["filename"]) self.assertEqual(u("\u00fa"), data["filebody"])
def test_escape_return_types(self): # On python2 the escape methods should generally return the same # type as their argument self.assertEqual(type(xhtml_escape("foo")), str) self.assertEqual(type(xhtml_escape(u("foo"))), unicode_type)
def test_twitter_get_user(self): response = self.fetch( '/twitter/client/login?oauth_token=zxcv', headers={'Cookie': '_oauth_request_token=enhjdg==|MTIzNA=='}) response.rethrow() parsed = json_decode(response.body) self.assertEqual(parsed, {u('access_token'): {u('key'): u('hjkl'), u('screen_name'): u('foo'), u('secret'): u('vbnm')}, u('name'): u('Foo'), u('screen_name'): u('foo'), u('username'): u('foo')})
def test_utf8_in_file(self): tmpl = self.loader.load("utf8.html") result = tmpl.generate() self.assertEqual(to_unicode(result).strip(), u("H\u00e9llo"))
def test_unicode_template(self): template = Template(utf8(u("\u00e9"))) self.assertEqual(template.generate(), utf8(u("\u00e9")))
#!/usr/bin/env python from __future__ import absolute_import, division, print_function, with_statement import webalchemy.tornado.escape from webalchemy.tornado.escape import utf8, xhtml_escape, xhtml_unescape, url_escape, url_unescape, to_unicode, json_decode, json_encode from webalchemy.tornado.util import u, unicode_type, bytes_type from webalchemy.tornado.test.util import unittest linkify_tests = [ # (input, linkify_kwargs, expected_output) ("hello http://world.com/!", {}, u('hello <a href="http://world.com/">http://world.com/</a>!')), ("hello http://world.com/with?param=true&stuff=yes", {}, u('hello <a href="http://world.com/with?param=true&stuff=yes">http://world.com/with?param=true&stuff=yes</a>')), # an opened paren followed by many chars killed Gruber's regex ("http://url.com/w(aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", {}, u('<a href="http://url.com/w">http://url.com/w</a>(aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa')), # as did too many dots at the end ("http://url.com/withmany.......................................", {}, u('<a href="http://url.com/withmany">http://url.com/withmany</a>.......................................')), ("http://url.com/withmany((((((((((((((((((((((((((((((((((a)", {}, u('<a href="http://url.com/withmany">http://url.com/withmany</a>((((((((((((((((((((((((((((((((((a)')), # some examples from http://daringfireball.net/2009/11/liberal_regex_for_matching_urls
def test_unicode_apply(self): def upper(s): return to_unicode(s).upper() template = Template(utf8(u("{% apply upper %}foo \u00e9{% end %}"))) self.assertEqual(template.generate(upper=upper), utf8(u("FOO \u00c9")))
def test_empty_query_string(self): response = self.fetch("/echo?foo=&foo=") data = json_decode(response.body) self.assertEqual(data, {u("foo"): [u(""), u("")]})
def test_non_ascii_name(self): name = webalchemy.tornado.locale.LOCALE_NAMES['es_LA']['name'] self.assertTrue(isinstance(name, unicode_type)) self.assertEqual(name, u('Espa\u00f1ol')) self.assertEqual(utf8(name), b'Espa\xc3\xb1ol')
from __future__ import absolute_import, division, print_function, with_statement from webalchemy.tornado.ioloop import IOLoop from webalchemy.tornado.netutil import ThreadedResolver from webalchemy.tornado.util import u # When this module is imported, it runs getaddrinfo on a thread. Since # the hostname is unicode, getaddrinfo attempts to import encodings.idna # but blocks on the import lock. Verify that ThreadedResolver avoids # this deadlock. resolver = ThreadedResolver() IOLoop.current().run_sync(lambda: resolver.resolve(u('localhost'), 80))
def test_csv(self): webalchemy.tornado.locale.load_translations( os.path.join(os.path.dirname(__file__), 'csv_translations')) locale = webalchemy.tornado.locale.get("fr_FR") self.assertTrue(isinstance(locale, webalchemy.tornado.locale.CSVLocale)) self.assertEqual(locale.translate("school"), u("\u00e9cole"))
def test_unicode_logging(self): self.logger.error(u("\u00e9")) self.assertEqual(self.get_output(), utf8(u("\u00e9")))
def test_empty_post_parameters(self): response = self.fetch("/echo", method="POST", body="foo=&bar=") data = json_decode(response.body) self.assertEqual(data, {u("foo"): [u("")], u("bar"): [u("")]})
def format_date(self, date, gmt_offset=0, relative=True, shorter=False, full_format=False): """Formats the given date (which should be GMT). By default, we return a relative time (e.g., "2 minutes ago"). You can return an absolute date string with ``relative=False``. You can force a full format date ("July 10, 1980") with ``full_format=True``. This method is primarily intended for dates in the past. For dates in the future, we fall back to full format. """ if isinstance(date, numbers.Real): date = datetime.datetime.utcfromtimestamp(date) now = datetime.datetime.utcnow() if date > now: if relative and (date - now).seconds < 60: # Due to click skew, things are some things slightly # in the future. Round timestamps in the immediate # future down to now in relative mode. date = now else: # Otherwise, future dates always use the full format. full_format = True local_date = date - datetime.timedelta(minutes=gmt_offset) local_now = now - datetime.timedelta(minutes=gmt_offset) local_yesterday = local_now - datetime.timedelta(hours=24) difference = now - date seconds = difference.seconds days = difference.days _ = self.translate format = None if not full_format: if relative and days == 0: if seconds < 50: return _("1 second ago", "%(seconds)d seconds ago", seconds) % {"seconds": seconds} if seconds < 50 * 60: minutes = round(seconds / 60.0) return _("1 minute ago", "%(minutes)d minutes ago", minutes) % {"minutes": minutes} hours = round(seconds / (60.0 * 60)) return _("1 hour ago", "%(hours)d hours ago", hours) % {"hours": hours} if days == 0: format = _("%(time)s") elif days == 1 and local_date.day == local_yesterday.day and \ relative: format = _("yesterday") if shorter else \ _("yesterday at %(time)s") elif days < 5: format = _("%(weekday)s") if shorter else \ _("%(weekday)s at %(time)s") elif days < 334: # 11mo, since confusing for same month last year format = _("%(month_name)s %(day)s") if shorter else \ _("%(month_name)s %(day)s at %(time)s") if format is None: format = _("%(month_name)s %(day)s, %(year)s") if shorter else \ _("%(month_name)s %(day)s, %(year)s at %(time)s") tfhour_clock = self.code not in ("en", "en_US", "zh_CN") if tfhour_clock: str_time = "%d:%02d" % (local_date.hour, local_date.minute) elif self.code == "zh_CN": str_time = "%s%d:%02d" % ( (u('\u4e0a\u5348'), u('\u4e0b\u5348'))[local_date.hour >= 12], local_date.hour % 12 or 12, local_date.minute) else: str_time = "%d:%02d %s" % ( local_date.hour % 12 or 12, local_date.minute, ("am", "pm")[local_date.hour >= 12]) return format % { "month_name": self._months[local_date.month - 1], "weekday": self._weekdays[local_date.weekday()], "day": str(local_date.day), "year": str(local_date.year), "time": str_time }
from webalchemy.tornado.util import u, Configurable if hasattr(ssl, 'match_hostname') and hasattr(ssl, 'CertificateError'): # python 3.2+ ssl_match_hostname = ssl.match_hostname SSLCertificateError = ssl.CertificateError else: import backports.ssl_match_hostname ssl_match_hostname = backports.ssl_match_hostname.match_hostname SSLCertificateError = backports.ssl_match_hostname.CertificateError # ThreadedResolver runs getaddrinfo on a thread. If the hostname is unicode, # getaddrinfo attempts to import encodings.idna. If this is done at # module-import time, the import lock is already held by the main thread, # leading to deadlock. Avoid it by caching the idna encoder on the main # thread now. u('foo').encode('idna') def bind_sockets(port, address=None, family=socket.AF_UNSPEC, backlog=128, flags=None): """Creates listening sockets bound to the given port and address. Returns a list of socket objects (multiple sockets are returned if the given address maps to multiple IP addresses, which is most common for mixed IPv4 and IPv6 use). Address may be either an IP address or hostname. If it's a hostname, the server will listen on all IP addresses associated with the name. Address may be an empty string or None to listen on all available interfaces. Family may be set to either `socket.AF_INET` or `socket.AF_INET6` to restrict to IPv4 or IPv6 addresses, otherwise both will be used if available.
# python 3 self.ngettext = translations.ngettext self.gettext = translations.gettext # self.gettext must exist before __init__ is called, since it # calls into self.translate super(GettextLocale, self).__init__(code, translations) def translate(self, message, plural_message=None, count=None): if plural_message is not None: assert count is not None return self.ngettext(message, plural_message, count) else: return self.gettext(message) LOCALE_NAMES = { "af_ZA": {"name_en": u("Afrikaans"), "name": u("Afrikaans")}, "am_ET": {"name_en": u("Amharic"), "name": u('\u12a0\u121b\u122d\u129b')}, "ar_AR": {"name_en": u("Arabic"), "name": u("\u0627\u0644\u0639\u0631\u0628\u064a\u0629")}, "bg_BG": {"name_en": u("Bulgarian"), "name": u("\u0411\u044a\u043b\u0433\u0430\u0440\u0441\u043a\u0438")}, "bn_IN": {"name_en": u("Bengali"), "name": u("\u09ac\u09be\u0982\u09b2\u09be")}, "bs_BA": {"name_en": u("Bosnian"), "name": u("Bosanski")}, "ca_ES": {"name_en": u("Catalan"), "name": u("Catal\xe0")}, "cs_CZ": {"name_en": u("Czech"), "name": u("\u010ce\u0161tina")}, "cy_GB": {"name_en": u("Welsh"), "name": u("Cymraeg")}, "da_DK": {"name_en": u("Danish"), "name": u("Dansk")}, "de_DE": {"name_en": u("German"), "name": u("Deutsch")}, "el_GR": {"name_en": u("Greek"), "name": u("\u0395\u03bb\u03bb\u03b7\u03bd\u03b9\u03ba\u03ac")}, "en_GB": {"name_en": u("English (UK)"), "name": u("English (UK)")}, "en_US": {"name_en": u("English (US)"), "name": u("English (US)")}, "es_ES": {"name_en": u("Spanish (Spain)"), "name": u("Espa\xf1ol (Espa\xf1a)")}, "es_LA": {"name_en": u("Spanish"), "name": u("Espa\xf1ol")},
def test_query_string_encoding(self): response = self.fetch("/echo?foo=%C3%A9") data = json_decode(response.body) self.assertEqual(data, {u("foo"): [u("\u00e9")]})