Python to_unicode Examples, censiotornado.escape.to_unicode Python Examples

Example #1

0

Show file

 def handle_read(self, data):
     logging.info("handle_read")
     data = to_unicode(data)
     if data == data.upper():
         self.stream.write(b"error\talready capitalized\n")
     else:
         # data already has \n
         self.stream.write(utf8("ok\t%s" % data.upper()))
     self.stream.close()

Example #2

0

Show file

 def test_url_unescape_unicode(self):
     tests = [
         ('%C3%A9', u'\u00e9', 'utf8'),
         ('%C3%A9', u'\u00c3\u00a9', 'latin1'),
         ('%C3%A9', utf8(u'\u00e9'), None),
     ]
     for escaped, unescaped, encoding in tests:
         # input strings to url_unescape should only contain ascii
         # characters, but make sure the function accepts both byte
         # and unicode strings.
         self.assertEqual(url_unescape(to_unicode(escaped), encoding),
                          unescaped)
         self.assertEqual(url_unescape(utf8(escaped), encoding), unescaped)

Example #3

0

Show file

 def test_csv_bom(self):
     with open(os.path.join(os.path.dirname(__file__), 'csv_translations',
                            'fr_FR.csv'), 'rb') as f:
         char_data = to_unicode(f.read())
     # Re-encode our input data (which is utf-8 without BOM) in
     # encodings that use the BOM and ensure that we can still load
     # it. Note that utf-16-le and utf-16-be do not write a BOM,
     # so we only test whichver variant is native to our platform.
     for encoding in ['utf-8-sig', 'utf-16']:
         tmpdir = tempfile.mkdtemp()
         try:
             with open(os.path.join(tmpdir, 'fr_FR.csv'), 'wb') as f:
                 f.write(char_data.encode(encoding))
             censiotornado.locale.load_translations(tmpdir)
             locale = censiotornado.locale.get('fr_FR')
             self.assertIsInstance(locale, censiotornado.locale.CSVLocale)
             self.assertEqual(locale.translate("school"), u"\u00e9cole")
         finally:
             shutil.rmtree(tmpdir)

Example #4

0

Show file

    def _handle_message(self, opcode, data):
        if self.client_terminated:
            return

        if self._frame_compressed:
            data = self._decompressor.decompress(data)

        if opcode == 0x1:
            # UTF-8 data
            self._message_bytes_in += len(data)
            try:
                decoded = data.decode("utf-8")
            except UnicodeDecodeError:
                self._abort()
                return
            self._run_callback(self.handler.on_message, decoded)
        elif opcode == 0x2:
            # Binary data
            self._message_bytes_in += len(data)
            self._run_callback(self.handler.on_message, data)
        elif opcode == 0x8:
            # Close
            self.client_terminated = True
            if len(data) >= 2:
                self.handler.close_code = struct.unpack('>H', data[:2])[0]
            if len(data) > 2:
                self.handler.close_reason = to_unicode(data[2:])
            # Echo the received close code, if any (RFC 6455 section 5.5.1).
            self.close(self.handler.close_code)
        elif opcode == 0x9:
            # Ping
            self._write_frame(True, 0xA, data)
        elif opcode == 0xA:
            # Pong
            self._run_callback(self.handler.on_pong, data)
        else:
            self._abort()

Example #5

0

Show file

def load_translations(directory, encoding=None):
    """Loads translations from CSV files in a directory.

    Translations are strings with optional Python-style named placeholders
    (e.g., ``My name is %(name)s``) and their associated translations.

    The directory should have translation files of the form ``LOCALE.csv``,
    e.g. ``es_GT.csv``. The CSV files should have two or three columns: string,
    translation, and an optional plural indicator. Plural indicators should
    be one of "plural" or "singular". A given string can have both singular
    and plural forms. For example ``%(name)s liked this`` may have a
    different verb conjugation depending on whether %(name)s is one
    name or a list of names. There should be two rows in the CSV file for
    that string, one with plural indicator "singular", and one "plural".
    For strings with no verbs that would change on translation, simply
    use "unknown" or the empty string (or don't include the column at all).

    The file is read using the `csv` module in the default "excel" dialect.
    In this format there should not be spaces after the commas.

    If no ``encoding`` parameter is given, the encoding will be
    detected automatically (among UTF-8 and UTF-16) if the file
    contains a byte-order marker (BOM), defaulting to UTF-8 if no BOM
    is present.

    Example translation ``es_LA.csv``::

        "I love you","Te amo"
        "%(name)s liked this","A %(name)s les gustó esto","plural"
        "%(name)s liked this","A %(name)s le gustó esto","singular"

    .. versionchanged:: 4.3
       Added ``encoding`` parameter. Added support for BOM-based encoding
       detection, UTF-16, and UTF-8-with-BOM.
    """
    global _translations
    global _supported_locales
    _translations = {}
    for path in os.listdir(directory):
        if not path.endswith(".csv"):
            continue
        locale, extension = path.split(".")
        if not re.match("[a-z]+(_[A-Z]+)?$", locale):
            gen_log.error("Unrecognized locale %r (path: %s)", locale,
                          os.path.join(directory, path))
            continue
        full_path = os.path.join(directory, path)
        if encoding is None:
            # Try to autodetect encoding based on the BOM.
            with open(full_path, 'rb') as f:
                data = f.read(len(codecs.BOM_UTF16_LE))
            if data in (codecs.BOM_UTF16_LE, codecs.BOM_UTF16_BE):
                encoding = 'utf-16'
            else:
                # utf-8-sig is "utf-8 with optional BOM". It's discouraged
                # in most cases but is common with CSV files because Excel
                # cannot read utf-8 files without a BOM.
                encoding = 'utf-8-sig'
        if PY3:
            # python 3: csv.reader requires a file open in text mode.
            # Force utf8 to avoid dependence on $LANG environment variable.
            f = open(full_path, "r", encoding=encoding)
        else:
            # python 2: csv can only handle byte strings (in ascii-compatible
            # encodings), which we decode below. Transcode everything into
            # utf8 before passing it to csv.reader.
            f = BytesIO()
            with codecs.open(full_path, "r", encoding=encoding) as infile:
                f.write(escape.utf8(infile.read()))
            f.seek(0)
        _translations[locale] = {}
        for i, row in enumerate(csv.reader(f)):
            if not row or len(row) < 2:
                continue
            row = [escape.to_unicode(c).strip() for c in row]
            english, translation = row[:2]
            if len(row) > 2:
                plural = row[2] or "unknown"
            else:
                plural = "unknown"
            if plural not in ("plural", "singular", "unknown"):
                gen_log.error("Unrecognized plural indicator %r in %s line %d",
                              plural, path, i + 1)
                continue
            _translations[locale].setdefault(plural, {})[english] = translation
        f.close()
    _supported_locales = frozenset(
        list(_translations.keys()) + [_default_locale])
    gen_log.debug("Supported locales: %s", sorted(_supported_locales))

Example #6

0

Show file

 def process_response(self, data):
     status, message = re.match('(.*)\t(.*)\n', to_unicode(data)).groups()
     if status == 'ok':
         return message
     else:
         raise CapError(message)

Example #7

0

Show file

    def __init__(self,
                 template_string,
                 name="<string>",
                 loader=None,
                 compress_whitespace=_UNSET,
                 autoescape=_UNSET,
                 whitespace=None):
        """Construct a Template.

        :arg str template_string: the contents of the template file.
        :arg str name: the filename from which the template was loaded
            (used for error message).
        :arg tornado.template.BaseLoader loader: the `~tornado.template.BaseLoader` responsible for this template,
            used to resolve ``{% include %}`` and ``{% extend %}``
            directives.
        :arg bool compress_whitespace: Deprecated since Tornado 4.3.
            Equivalent to ``whitespace="single"`` if true and
            ``whitespace="all"`` if false.
        :arg str autoescape: The name of a function in the template
            namespace, or ``None`` to disable escaping by default.
        :arg str whitespace: A string specifying treatment of whitespace;
            see `filter_whitespace` for options.

        .. versionchanged:: 4.3
           Added ``whitespace`` parameter; deprecated ``compress_whitespace``.
        """
        self.name = escape.native_str(name)

        if compress_whitespace is not _UNSET:
            # Convert deprecated compress_whitespace (bool) to whitespace (str).
            if whitespace is not None:
                raise Exception(
                    "cannot set both whitespace and compress_whitespace")
            whitespace = "single" if compress_whitespace else "all"
        if whitespace is None:
            if loader and loader.whitespace:
                whitespace = loader.whitespace
            else:
                # Whitespace defaults by filename.
                if name.endswith(".html") or name.endswith(".js"):
                    whitespace = "single"
                else:
                    whitespace = "all"
        # Validate the whitespace setting.
        filter_whitespace(whitespace, '')

        if autoescape is not _UNSET:
            self.autoescape = autoescape
        elif loader:
            self.autoescape = loader.autoescape
        else:
            self.autoescape = _DEFAULT_AUTOESCAPE

        self.namespace = loader.namespace if loader else {}
        reader = _TemplateReader(name, escape.native_str(template_string),
                                 whitespace)
        self.file = _File(self, _parse(reader, self))
        self.code = self._generate_python(loader)
        self.loader = loader
        try:
            # Under python2.5, the fake filename used here must match
            # the module name used in __name__ below.
            # The dont_inherit flag prevents template.py's future imports
            # from being applied to the generated code.
            self.compiled = compile(escape.to_unicode(self.code),
                                    "%s.generated.py" %
                                    self.name.replace('.', '_'),
                                    "exec",
                                    dont_inherit=True)
        except Exception:
            formatted_code = _format_code(self.code).rstrip()
            app_log.error("%s code:\n%s", self.name, formatted_code)
            raise

Example #8

0

Show file

 def test_utf8_in_file(self):
     tmpl = self.loader.load("utf8.html")
     result = tmpl.generate()
     self.assertEqual(to_unicode(result).strip(), u"H\u00e9llo")

Example #9

0

Show file

 def upper(s):
     return utf8(to_unicode(s).upper())

Example #10

0

Show file

 def upper(s):
     return to_unicode(s).upper()