Python to_unicode Exemples, pydal._compat.to_unicode Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : utf8.py Projet : fc7/web2py

def sort_key(s):
    """Unicode Collation Algorithm (UCA) (http://www.unicode.org/reports/tr10/)
    is used for utf-8 and unicode strings sorting and for utf-8 strings
    comparison

    Note:
        pyuca is a very memory cost module! It loads the whole
        "allkey.txt" file (~2mb!) into the memory. But this
        functionality is needed only when sort_key() is called as a
        part of sort() function or when Utf8 strings are compared.

    So, it is a lazy "sort_key" function which (ONLY ONCE, ON ITS
    FIRST CALL) imports pyuca and replaces itself with a real
    sort_key() function
    """
    global sort_key
    try:
        from gluon.contrib.pyuca import unicode_collator
        unicode_sort_key = unicode_collator.sort_key
        sort_key = lambda s: unicode_sort_key(
            to_unicode(s, 'utf-8') if isinstance(s, str) else s)
    except:
        sort_key = lambda s: (
            to_unicode(s, 'utf-8') if isinstance(s, str) else s).lower()
    return sort_key(s)

Exemple #2

0

Afficher le fichier

def sort_key(s):
    """Unicode Collation Algorithm (UCA) (http://www.unicode.org/reports/tr10/)
    is used for utf-8 and unicode strings sorting and for utf-8 strings
    comparison

    Note:
        pyuca is a very memory cost module! It loads the whole
        "allkey.txt" file (~2mb!) into the memory. But this
        functionality is needed only when sort_key() is called as a
        part of sort() function or when Utf8 strings are compared.

    So, it is a lazy "sort_key" function which (ONLY ONCE, ON ITS
    FIRST CALL) imports pyuca and replaces itself with a real
    sort_key() function
    """
    global sort_key
    try:
        from gluon.contrib.pyuca import unicode_collator
        unicode_sort_key = unicode_collator.sort_key
        sort_key = lambda s: unicode_sort_key(
            to_unicode(s, 'utf-8') if isinstance(s, str) else s)
    except:
        sort_key = lambda s: (to_unicode(s, 'utf-8')
                              if isinstance(s, str) else s).lower()
    return sort_key(s)

Exemple #3

0

Afficher le fichier

Fichier : utf8.py Projet : fc7/web2py

    def __repr__(self):
        r''' # note that we use raw strings to avoid having to use double back slashes below
        NOTE! This function is a clone of web2py:gluon.languages.utf_repl() function::

            utf8.__repr__() works same as str.repr() when processing ascii string
            >>> repr(Utf8('abc')) == repr(Utf8("abc")) == repr('abc') == repr("abc") == "'abc'"
            True
            >>> repr(Utf8('a"b"c')) == repr('a"b"c') == '\'a"b"c\''
            True
            >>> repr(Utf8("a'b'c")) == repr("a'b'c") == '"a\'b\'c"'
            True
            >>> repr(Utf8('a\'b"c')) == repr('a\'b"c') == repr(Utf8("a'b\"c")) == repr("a'b\"c") == '\'a\\\'b"c\''
            True
            >>> repr(Utf8('a\r\nb')) == repr('a\r\nb') == "'a\\r\\nb'" # Test for \r, \n
            True

        Unlike str.repr(), Utf8.__repr__() remains utf8 content when processing utf8 string::

            >>> repr(Utf8('中文字')) == repr(Utf8("中文字")) == "'中文字'" != repr('中文字')
            True
            >>> repr(Utf8('中"文"字')) == "'中\"文\"字'" != repr('中"文"字')
            True
            >>> repr(Utf8("中'文'字")) == '"中\'文\'字"' != repr("中'文'字")
            True
            >>> repr(Utf8('中\'文"字')) == repr(Utf8("中'文\"字")) == '\'中\\\'文"字\'' != repr('中\'文"字') == repr("中'文\"字")
            True
            >>> repr(Utf8('中\r\n文')) == "'中\\r\\n文'" != repr('中\r\n文') # Test for \r, \n
            True
        '''
        if str.find(self, "'") >= 0 and str.find(self, '"') < 0:  # only single quote exists
            return '"' + to_native(to_unicode(self, 'utf-8').translate(repr_escape_tab), 'utf-8') + '"'
        else:
            return "'" + to_native(to_unicode(self, 'utf-8').translate(repr_escape_tab2), 'utf-8') + "'"

Exemple #4

0

Afficher le fichier

def truncate(string, length, dots='...'):
    """Returns string of length < *length* or truncate string with adding
    *dots* suffix to the string's end

    Args:
        length (int): max length of string
        dots (str or unicode): string suffix, when string is cutted

    Returns:
        (utf8-str): original or cutted string
    """
    text = to_unicode(string, 'utf-8')
    dots = to_unicode(dots, 'utf-8') if isinstance(dots, str) else dots
    if len(text) > length:
        text = text[:length - len(dots)] + dots
    return str.__new__(Utf8, text.encode('utf-8'))

Exemple #5

0

Afficher le fichier

Fichier : utf8.py Projet : fc7/web2py

def ord(char):
    """Returns unicode id for utf8 or unicode *char* character
    SUPPOSE that *char* is an utf-8 or unicode character only
    """
    if isinstance(char, unicodeT):
        return __builtin__.ord(char)
    return __builtin__.ord(to_unicode(char, 'utf-8'))

Exemple #6

0

Afficher le fichier

Fichier : utf8.py Projet : fc7/web2py

def truncate(string, length, dots='...'):
    """Returns string of length < *length* or truncate string with adding
    *dots* suffix to the string's end

    Args:
        length (int): max length of string
        dots (str or unicode): string suffix, when string is cutted

    Returns:
        (utf8-str): original or cutted string
    """
    text = to_unicode(string, 'utf-8')
    dots = to_unicode(dots, 'utf-8') if isinstance(dots, str) else dots
    if len(text) > length:
        text = text[:length - len(dots)] + dots
    return str.__new__(Utf8, text.encode('utf-8'))

Exemple #7

0

Afficher le fichier

def ord(char):
    """Returns unicode id for utf8 or unicode *char* character
    SUPPOSE that *char* is an utf-8 or unicode character only
    """
    if isinstance(char, unicodeT):
        return __builtin__.ord(char)
    return __builtin__.ord(to_unicode(char, 'utf-8'))

Exemple #8

0

Afficher le fichier

Fichier : utf8.py Projet : fc7/web2py

 def __new__(cls, content='', codepage='utf-8'):
     if isinstance(content, unicodeT):
         return str.__new__(cls, to_native(content, 'utf-8'))
     elif codepage in ('utf-8', 'utf8') or isinstance(content, cls):
         return str.__new__(cls, content)
     else:
         return str.__new__(cls, to_native(to_unicode(content, codepage), 'utf-8'))

Exemple #9

0

Afficher le fichier

 def __new__(cls, content='', codepage='utf-8'):
     if isinstance(content, unicodeT):
         return str.__new__(cls, to_native(content, 'utf-8'))
     elif codepage in ('utf-8', 'utf8') or isinstance(content, cls):
         return str.__new__(cls, content)
     else:
         return str.__new__(
             cls, to_native(to_unicode(content, codepage), 'utf-8'))

Exemple #10

0

Afficher le fichier

    def __repr__(self):
        r''' # note that we use raw strings to avoid having to use double back slashes below
        NOTE! This function is a clone of web2py:gluon.languages.utf_repl() function::

            utf8.__repr__() works same as str.repr() when processing ascii string
            >>> repr(Utf8('abc')) == repr(Utf8("abc")) == repr('abc') == repr("abc") == "'abc'"
            True
            >>> repr(Utf8('a"b"c')) == repr('a"b"c') == '\'a"b"c\''
            True
            >>> repr(Utf8("a'b'c")) == repr("a'b'c") == '"a\'b\'c"'
            True
            >>> repr(Utf8('a\'b"c')) == repr('a\'b"c') == repr(Utf8("a'b\"c")) == repr("a'b\"c") == '\'a\\\'b"c\''
            True
            >>> repr(Utf8('a\r\nb')) == repr('a\r\nb') == "'a\\r\\nb'" # Test for \r, \n
            True

        Unlike str.repr(), Utf8.__repr__() remains utf8 content when processing utf8 string::

            >>> repr(Utf8('中文字')) == repr(Utf8("中文字")) == "'中文字'" != repr('中文字')
            True
            >>> repr(Utf8('中"文"字')) == "'中\"文\"字'" != repr('中"文"字')
            True
            >>> repr(Utf8("中'文'字")) == '"中\'文\'字"' != repr("中'文'字")
            True
            >>> repr(Utf8('中\'文"字')) == repr(Utf8("中'文\"字")) == '\'中\\\'文"字\'' != repr('中\'文"字') == repr("中'文\"字")
            True
            >>> repr(Utf8('中\r\n文')) == "'中\\r\\n文'" != repr('中\r\n文') # Test for \r, \n
            True
        '''
        if str.find(self, "'") >= 0 and str.find(
                self, '"') < 0:  # only single quote exists
            return '"' + to_native(
                to_unicode(self, 'utf-8').translate(repr_escape_tab),
                'utf-8') + '"'
        else:
            return "'" + to_native(
                to_unicode(self, 'utf-8').translate(repr_escape_tab2),
                'utf-8') + "'"

Exemple #11

0

Afficher le fichier

Fichier : languages.py Projet : Br1an20/GameEventWeb

def write_dict(filename, contents):
    if '__corrupted__' in contents:
        return
    fp = None
    try:
        fp = LockedFile(filename, 'w')
        fp.write('# -*- coding: utf-8 -*-\n{\n')
        for key in sorted(contents, key=lambda x: to_unicode(x, 'utf-8').lower()):
            fp.write('%s: %s,\n' % (repr(Utf8(key)),
                                    repr(Utf8(contents[key]))))
        fp.write('}\n')
    except (IOError, OSError):
        if is_writable():
            logging.warning('Unable to write to file %s' % filename)
        return
    finally:
        if fp:
            fp.close()

Exemple #12

0

Afficher le fichier

Fichier : languages.py Projet : fc7/web2py

def write_dict(filename, contents):
    if '__corrupted__' in contents:
        return
    fp = None
    try:
        fp = LockedFile(filename, 'w')
        fp.write('# -*- coding: utf-8 -*-\n{\n')
        for key in sorted(contents, key=lambda x: to_unicode(x, 'utf-8').lower()):
            fp.write('%s: %s,\n' % (repr(Utf8(key)),
                                    repr(Utf8(contents[key]))))
        fp.write('}\n')
    except (IOError, OSError):
        if is_writable():
            logging.warning('Unable to write to file %s' % filename)
        return
    finally:
        if fp:
            fp.close()

Exemple #13

0

Afficher le fichier

Fichier : languages.py Projet : leonelcamara/web2py

 def sub_dict(m):
     """ word(key or num)
         !word(key or num), !!word(key or num), !!!word(key or num)
         ?word1?word(key or num)
              ??word(key or num), ?word(key or num)
         ?word1?word?word0(key or num)
         ?word1?word?(key or num)
              ??word?word0(key or num)
         ?word1?word?(key or num)
              ??word?(key or num), ?word?(key or num)
     """
     w, n = m.group('w', 'n')
     c = w[0]
     n = int(n) if n.isdigit() else symbols[n]
     if c not in '!?':
         return self.plural(w, n)
     elif c == '?':
         # ?[word1]?word[?word0](key or num), ?[word1]?word(key or num) or ?word(key or num)
         (p1, sep, p2) = w[1:].partition("?")
         part1 = p1 if sep else ""
         (part2, sep, part3) = (p2 if sep else p1).partition("?")
         if not sep:
             part3 = part2
         num = int(n)
         return part1 if num == 1 else part3 if num == 0 else part2
     elif w.startswith('!!!'):
         word = w[3:]
         fun = upper_fun
     elif w.startswith('!!'):
         word = w[2:]
         fun = title_fun
     else:
         word = w[1:]
         fun = cap_fun
     s = fun(self.plural(word, n))
     return s if PY2 else to_unicode(s)

Exemple #14

0

Afficher le fichier

Fichier : languages.py Projet : tomodachi/web2py

 def sub_dict(m):
     """ word(key or num)
         !word(key or num), !!word(key or num), !!!word(key or num)
         ?word1?word(key or num)
              ??word(key or num), ?word(key or num)
         ?word1?word?word0(key or num)
         ?word1?word?(key or num)
              ??word?word0(key or num)
         ?word1?word?(key or num)
              ??word?(key or num), ?word?(key or num)
     """
     w, n = m.group('w', 'n')
     c = w[0]
     n = int(n) if n.isdigit() else symbols[n]
     if c not in '!?':
         return self.plural(w, n)
     elif c == '?':
         # ?[word1]?word[?word0](key or num), ?[word1]?word(key or num) or ?word(key or num)
         (p1, sep, p2) = w[1:].partition("?")
         part1 = p1 if sep else ""
         (part2, sep, part3) = (p2 if sep else p1).partition("?")
         if not sep:
             part3 = part2
         num = int(n)
         return part1 if num == 1 else part3 if num == 0 else part2
     elif w.startswith('!!!'):
         word = w[3:]
         fun = upper_fun
     elif w.startswith('!!'):
         word = w[2:]
         fun = title_fun
     else:
         word = w[1:]
         fun = cap_fun
     s = fun(self.plural(word, n))
     return s if PY2 else to_unicode(s)

Exemple #15

0

Afficher le fichier

| License: LGPLv3 (http://www.gnu.org/licenses/lgpl.html)
| Created by Vladyslav Kozlovskyy (Ukraine) <dbdevelop©gmail.com>
| for Web2py project

Utilities and class for UTF8 strings managing
----------------------------------------------
"""
from __future__ import print_function
from pydal._compat import builtin as __builtin__, unicodeT, iteritems, to_unicode, to_native

__all__ = ['Utf8']

repr_escape_tab = {}
#FIXME PY3
for i in range(1, 32):
    repr_escape_tab[i] = to_unicode("\\" + "x%02x" % i)
repr_escape_tab[7] = u'\\a'
repr_escape_tab[8] = u'\\b'
repr_escape_tab[9] = u'\\t'
repr_escape_tab[10] = u'\\n'
repr_escape_tab[11] = u'\\v'
repr_escape_tab[12] = u'\\f'
repr_escape_tab[13] = u'\\r'
repr_escape_tab[ord('\\')] = u'\\\\'
repr_escape_tab2 = repr_escape_tab.copy()
repr_escape_tab2[ord('\'')] = u"\\'"


def sort_key(s):
    """Unicode Collation Algorithm (UCA) (http://www.unicode.org/reports/tr10/)
    is used for utf-8 and unicode strings sorting and for utf-8 strings

Exemple #16

0

Afficher le fichier

Fichier : languages.py Projet : tomodachi/web2py

def upper_fun(s):
    return to_bytes(to_unicode(s).upper())

Exemple #17

0

Afficher le fichier

Fichier : languages.py Projet : tomodachi/web2py

def title_fun(s):
    return to_bytes(to_unicode(s).title())

Exemple #18

0

Afficher le fichier

Fichier : languages.py Projet : leonelcamara/web2py

def sort_function(x):
    return to_unicode(x, 'utf-8').lower()

Exemple #19

0

Afficher le fichier

Fichier : utf8.py Projet : fc7/web2py

 def __len__(self):
     return len(to_unicode(self, 'utf-8'))

Exemple #20

0

Afficher le fichier

Fichier : languages.py Projet : leonelcamara/web2py

 def encode(self, *a, **b):
     if PY2 and a[0] != 'utf8':
         return to_unicode(str(self)).encode(*a, **b)
     else:
         return str(self)

Exemple #21

0

Afficher le fichier

Fichier : languages.py Projet : tomodachi/web2py

def cap_fun(s):
    return to_bytes(to_unicode(s).capitalize())

Exemple #22

0

Afficher le fichier

 def __len__(self):
     return len(to_unicode(self, 'utf-8'))

Exemple #23

0

Afficher le fichier

 def __getitem__(self, index):
     return str.__new__(
         Utf8, to_native(to_unicode(self, 'utf-8')[index], 'utf-8'))

Exemple #24

0

Afficher le fichier

Fichier : utf8.py Projet : fc7/web2py

| License: LGPLv3 (http://www.gnu.org/licenses/lgpl.html)
| Created by Vladyslav Kozlovskyy (Ukraine) <dbdevelop©gmail.com>
| for Web2py project

Utilities and class for UTF8 strings managing
----------------------------------------------
"""
from __future__ import print_function
from pydal._compat import builtin as __builtin__, unicodeT, iteritems, to_unicode, to_native

__all__ = ['Utf8']

repr_escape_tab = {}
#FIXME PY3
for i in range(1, 32):
    repr_escape_tab[i] = to_unicode("\\"+"x%02x" % i)
repr_escape_tab[7] = u'\\a'
repr_escape_tab[8] = u'\\b'
repr_escape_tab[9] = u'\\t'
repr_escape_tab[10] = u'\\n'
repr_escape_tab[11] = u'\\v'
repr_escape_tab[12] = u'\\f'
repr_escape_tab[13] = u'\\r'
repr_escape_tab[ord('\\')] = u'\\\\'
repr_escape_tab2 = repr_escape_tab.copy()
repr_escape_tab2[ord('\'')] = u"\\'"


def sort_key(s):
    """Unicode Collation Algorithm (UCA) (http://www.unicode.org/reports/tr10/)
    is used for utf-8 and unicode strings sorting and for utf-8 strings

Exemple #25

0

Afficher le fichier

Fichier : languages.py Projet : tomodachi/web2py

 def encode(self, *a, **b):
     if PY2 and a[0] != 'utf8':
         return to_unicode(str(self)).encode(*a, **b)
     else:
         return str(self)

Exemple #26

0

Afficher le fichier

Fichier : languages.py Projet : tomodachi/web2py

def sort_function(x):
    return to_unicode(x, 'utf-8').lower()

Exemple #27

0

Afficher le fichier

 def __getslice__(self, begin, end):
     return str.__new__(
         Utf8, to_native(to_unicode(self, 'utf-8')[begin:end], 'utf-8'))

Exemple #28

0

Afficher le fichier

Fichier : languages.py Projet : leonelcamara/web2py

def title_fun(s):
    return to_bytes(to_unicode(s).title())

Exemple #29

0

Afficher le fichier

Fichier : languages.py Projet : leonelcamara/web2py

def cap_fun(s):
    return to_bytes(to_unicode(s).capitalize())

Exemple #30

0

Afficher le fichier

Fichier : utf8.py Projet : fc7/web2py

 def __getitem__(self, index):
     return str.__new__(Utf8, to_native(to_unicode(self, 'utf-8')[index], 'utf-8'))

Exemple #31

0

Afficher le fichier

Fichier : languages.py Projet : leonelcamara/web2py

def upper_fun(s):
    return to_bytes(to_unicode(s).upper())

Exemple #32

0

Afficher le fichier

Fichier : utf8.py Projet : fc7/web2py

 def __getslice__(self, begin, end):
     return str.__new__(Utf8, to_native(to_unicode(self, 'utf-8')[begin:end], 'utf-8'))