コード例 #1
0
 def test_mathematical_digits(self):
     # 5 consecutive sequences of 0-9
     for n in range(0x1d7ce, 0x1d800):
         self.assertEqual(
             unidecode(chr(n)),
             chr(ord('0') + (n - 0x1d7ce) % 10),
         )
コード例 #2
0
 def test_circled_latin(self):
     # 1 sequence of a-z
     for n in range(0, 26):
         self.assertEqual(
             unidecode(chr(0x24d0 + n)),
             chr(ord('a') + n),
         )
コード例 #3
0
    def to_python(self, *args, **kwargs):
        value = super(EncryptedCharField, self).to_python(*args, **kwargs)
        if value:
            if self.force_ascii:
                value = unidecode(value)

            if self.force_lower:
                value = value.lower()
            elif self.force_upper:
                value = value.upper()

            if self.normalize_spaces:
                value = ' '.join(value.split())
            elif self.trim_spaces:
                value = value.strip()

        return value
コード例 #4
0
    def test_mathematical_latin(self):
        # 13 consecutive sequences of A-Z, a-z with some codepoints
        # undefined. We just count the undefined ones and don't check
        # positions.
        empty_count = 0
        for n in range(0x1d400, 0x1d6a4):
            a = unidecode(chr(n))
            if n % 52 < 26:
                b = chr(ord('A') + n % 26)
            else:
                b = chr(ord('a') + n % 26)
            if not a:
                empty_count += 1
            else:
                self.assertEqual(a, b)

        self.assertEqual(empty_count, 24)
コード例 #5
0
def slugify(string,
            extra_characters='',
            ascii=False,
            lower=True,
            spaces=False):
    """
    Returns a copy of the given ``string`` without non-alphanumeric characters.
    Separators, symbols and punctuation characters are replaced by a dash. The
    other characters are simply removed.

    This is a modified version of the eponymous function of the Mozilla
    Foundation to integrate it with the function ``unidecode()``. This
    function generates an ASCII representation of the Unicode characters.
    """
    if ascii:
        string = unidecode(force_text(string))
    else:
        string = unicodedata.normalize('NFKC', force_text(string))

    slug_tokens = []
    for char in string:
        category = unicodedata.category(char)[0]
        # L and N signify letter and number.
        if category in 'LN' or char in extra_characters:
            slug_tokens.append(char)
        # P, S and Z signify punctuation, symbol and separator.
        elif category in 'PSZ':
            slug_tokens.append(' ')

    slug = ''.join(slug_tokens).strip()
    if spaces:
        slug = SPACES_RE.sub(' ', slug)
    else:
        slug = SPACES_RE.sub('-', slug)

    if lower:
        slug = slug.lower()

    return slug
コード例 #6
0
    def to_python(self, value):
        if value is None:
            return value

        if not isinstance(value, six.string_types):
            value = force_text(value)

        if self.normalize_spaces:
            value = ' '.join(value.split())
        elif self.trim_spaces:
            value = value.strip()

        if not value:
            return value

        if self.force_ascii:
            value = unidecode(value)

        if self.force_lower:
            value = value.lower()
        elif self.force_upper:
            value = value.upper()

        return value
コード例 #7
0
 def test_bmp(self):
     # Just check that it doesn't throw an exception
     for n in range(0, 0x10000):
         unidecode(chr(n))
コード例 #8
0
 def test_ascii(self):
     for n in range(0, 128):
         self.assertEqual(
             unidecode(chr(n)),
             chr(n),
         )
コード例 #9
0
 def checkUnidecode(self, tests):
     for unicode, ascii in tests:
         self.assertEqual(unidecode(unicode), ascii)
コード例 #10
0
def toascii(value):
    return unidecode(htmlentities.decode(value))