def handle_read(self, data): logging.info("handle_read") data = to_unicode(data) if data == data.upper(): self.stream.write(b"error\talready capitalized\n") else: # data already has \n self.stream.write(utf8("ok\t%s" % data.upper())) self.stream.close()
def test_url_unescape_unicode(self): tests = [ ('%C3%A9', u'\u00e9', 'utf8'), ('%C3%A9', u'\u00c3\u00a9', 'latin1'), ('%C3%A9', utf8(u'\u00e9'), None), ] for escaped, unescaped, encoding in tests: # input strings to url_unescape should only contain ascii # characters, but make sure the function accepts both byte # and unicode strings. self.assertEqual(url_unescape(to_unicode(escaped), encoding), unescaped) self.assertEqual(url_unescape(utf8(escaped), encoding), unescaped)
def test_csv_bom(self): with open(os.path.join(os.path.dirname(__file__), 'csv_translations', 'fr_FR.csv'), 'rb') as f: char_data = to_unicode(f.read()) # Re-encode our input data (which is utf-8 without BOM) in # encodings that use the BOM and ensure that we can still load # it. Note that utf-16-le and utf-16-be do not write a BOM, # so we only test whichver variant is native to our platform. for encoding in ['utf-8-sig', 'utf-16']: tmpdir = tempfile.mkdtemp() try: with open(os.path.join(tmpdir, 'fr_FR.csv'), 'wb') as f: f.write(char_data.encode(encoding)) censiotornado.locale.load_translations(tmpdir) locale = censiotornado.locale.get('fr_FR') self.assertIsInstance(locale, censiotornado.locale.CSVLocale) self.assertEqual(locale.translate("school"), u"\u00e9cole") finally: shutil.rmtree(tmpdir)
def _handle_message(self, opcode, data): if self.client_terminated: return if self._frame_compressed: data = self._decompressor.decompress(data) if opcode == 0x1: # UTF-8 data self._message_bytes_in += len(data) try: decoded = data.decode("utf-8") except UnicodeDecodeError: self._abort() return self._run_callback(self.handler.on_message, decoded) elif opcode == 0x2: # Binary data self._message_bytes_in += len(data) self._run_callback(self.handler.on_message, data) elif opcode == 0x8: # Close self.client_terminated = True if len(data) >= 2: self.handler.close_code = struct.unpack('>H', data[:2])[0] if len(data) > 2: self.handler.close_reason = to_unicode(data[2:]) # Echo the received close code, if any (RFC 6455 section 5.5.1). self.close(self.handler.close_code) elif opcode == 0x9: # Ping self._write_frame(True, 0xA, data) elif opcode == 0xA: # Pong self._run_callback(self.handler.on_pong, data) else: self._abort()
def load_translations(directory, encoding=None): """Loads translations from CSV files in a directory. Translations are strings with optional Python-style named placeholders (e.g., ``My name is %(name)s``) and their associated translations. The directory should have translation files of the form ``LOCALE.csv``, e.g. ``es_GT.csv``. The CSV files should have two or three columns: string, translation, and an optional plural indicator. Plural indicators should be one of "plural" or "singular". A given string can have both singular and plural forms. For example ``%(name)s liked this`` may have a different verb conjugation depending on whether %(name)s is one name or a list of names. There should be two rows in the CSV file for that string, one with plural indicator "singular", and one "plural". For strings with no verbs that would change on translation, simply use "unknown" or the empty string (or don't include the column at all). The file is read using the `csv` module in the default "excel" dialect. In this format there should not be spaces after the commas. If no ``encoding`` parameter is given, the encoding will be detected automatically (among UTF-8 and UTF-16) if the file contains a byte-order marker (BOM), defaulting to UTF-8 if no BOM is present. Example translation ``es_LA.csv``:: "I love you","Te amo" "%(name)s liked this","A %(name)s les gustó esto","plural" "%(name)s liked this","A %(name)s le gustó esto","singular" .. versionchanged:: 4.3 Added ``encoding`` parameter. Added support for BOM-based encoding detection, UTF-16, and UTF-8-with-BOM. """ global _translations global _supported_locales _translations = {} for path in os.listdir(directory): if not path.endswith(".csv"): continue locale, extension = path.split(".") if not re.match("[a-z]+(_[A-Z]+)?$", locale): gen_log.error("Unrecognized locale %r (path: %s)", locale, os.path.join(directory, path)) continue full_path = os.path.join(directory, path) if encoding is None: # Try to autodetect encoding based on the BOM. with open(full_path, 'rb') as f: data = f.read(len(codecs.BOM_UTF16_LE)) if data in (codecs.BOM_UTF16_LE, codecs.BOM_UTF16_BE): encoding = 'utf-16' else: # utf-8-sig is "utf-8 with optional BOM". It's discouraged # in most cases but is common with CSV files because Excel # cannot read utf-8 files without a BOM. encoding = 'utf-8-sig' if PY3: # python 3: csv.reader requires a file open in text mode. # Force utf8 to avoid dependence on $LANG environment variable. f = open(full_path, "r", encoding=encoding) else: # python 2: csv can only handle byte strings (in ascii-compatible # encodings), which we decode below. Transcode everything into # utf8 before passing it to csv.reader. f = BytesIO() with codecs.open(full_path, "r", encoding=encoding) as infile: f.write(escape.utf8(infile.read())) f.seek(0) _translations[locale] = {} for i, row in enumerate(csv.reader(f)): if not row or len(row) < 2: continue row = [escape.to_unicode(c).strip() for c in row] english, translation = row[:2] if len(row) > 2: plural = row[2] or "unknown" else: plural = "unknown" if plural not in ("plural", "singular", "unknown"): gen_log.error("Unrecognized plural indicator %r in %s line %d", plural, path, i + 1) continue _translations[locale].setdefault(plural, {})[english] = translation f.close() _supported_locales = frozenset( list(_translations.keys()) + [_default_locale]) gen_log.debug("Supported locales: %s", sorted(_supported_locales))
def process_response(self, data): status, message = re.match('(.*)\t(.*)\n', to_unicode(data)).groups() if status == 'ok': return message else: raise CapError(message)
def __init__(self, template_string, name="<string>", loader=None, compress_whitespace=_UNSET, autoescape=_UNSET, whitespace=None): """Construct a Template. :arg str template_string: the contents of the template file. :arg str name: the filename from which the template was loaded (used for error message). :arg tornado.template.BaseLoader loader: the `~tornado.template.BaseLoader` responsible for this template, used to resolve ``{% include %}`` and ``{% extend %}`` directives. :arg bool compress_whitespace: Deprecated since Tornado 4.3. Equivalent to ``whitespace="single"`` if true and ``whitespace="all"`` if false. :arg str autoescape: The name of a function in the template namespace, or ``None`` to disable escaping by default. :arg str whitespace: A string specifying treatment of whitespace; see `filter_whitespace` for options. .. versionchanged:: 4.3 Added ``whitespace`` parameter; deprecated ``compress_whitespace``. """ self.name = escape.native_str(name) if compress_whitespace is not _UNSET: # Convert deprecated compress_whitespace (bool) to whitespace (str). if whitespace is not None: raise Exception( "cannot set both whitespace and compress_whitespace") whitespace = "single" if compress_whitespace else "all" if whitespace is None: if loader and loader.whitespace: whitespace = loader.whitespace else: # Whitespace defaults by filename. if name.endswith(".html") or name.endswith(".js"): whitespace = "single" else: whitespace = "all" # Validate the whitespace setting. filter_whitespace(whitespace, '') if autoescape is not _UNSET: self.autoescape = autoescape elif loader: self.autoescape = loader.autoescape else: self.autoescape = _DEFAULT_AUTOESCAPE self.namespace = loader.namespace if loader else {} reader = _TemplateReader(name, escape.native_str(template_string), whitespace) self.file = _File(self, _parse(reader, self)) self.code = self._generate_python(loader) self.loader = loader try: # Under python2.5, the fake filename used here must match # the module name used in __name__ below. # The dont_inherit flag prevents template.py's future imports # from being applied to the generated code. self.compiled = compile(escape.to_unicode(self.code), "%s.generated.py" % self.name.replace('.', '_'), "exec", dont_inherit=True) except Exception: formatted_code = _format_code(self.code).rstrip() app_log.error("%s code:\n%s", self.name, formatted_code) raise
def test_utf8_in_file(self): tmpl = self.loader.load("utf8.html") result = tmpl.generate() self.assertEqual(to_unicode(result).strip(), u"H\u00e9llo")
def upper(s): return utf8(to_unicode(s).upper())
def upper(s): return to_unicode(s).upper()