Example #1
0
def unormalize(ustring, ignorenonascii=None, substitute=None):
    """replace diacritical characters with their corresponding ascii characters

    Convert the unicode string to its long normalized form (unicode character
    will be transform into several characters) and keep the first one only.
    The normal form KD (NFKD) will apply the compatibility decomposition, i.e.
    replace all compatibility characters with their equivalents.

    :type substitute: str
    :param substitute: replacement character to use if decomposition fails

    :see: Another project about ASCII transliterations of Unicode text
          http://pypi.python.org/pypi/Unidecode
    """
    # backward compatibility, ignorenonascii was a boolean
    if ignorenonascii is not None:
        warn(
            "ignorenonascii is deprecated, use substitute named parameter instead",
            DeprecationWarning,
            stacklevel=2)
        if ignorenonascii:
            substitute = ''
    res = []
    for letter in ustring[:]:
        try:
            replacement = MANUAL_UNICODE_MAP[letter]
        except KeyError:
            replacement = _uninormalize('NFKD', letter)[0]
            if ord(replacement) >= 2**7:
                if substitute is None:
                    raise ValueError(
                        "can't deal with non-ascii based characters")
                replacement = substitute
        res.append(replacement)
    return u''.join(res)
Example #2
0
def unormalize(ustring, ignorenonascii=None, substitute=None):
    """replace diacritical characters with their corresponding ascii characters

    Convert the unicode string to its long normalized form (unicode character
    will be transform into several characters) and keep the first one only.
    The normal form KD (NFKD) will apply the compatibility decomposition, i.e.
    replace all compatibility characters with their equivalents.

    :type substitute: str
    :param substitute: replacement character to use if decomposition fails

    :see: Another project about ASCII transliterations of Unicode text
          http://pypi.python.org/pypi/Unidecode
    """
    # backward compatibility, ignorenonascii was a boolean
    if ignorenonascii is not None:
        warn("ignorenonascii is deprecated, use substitute named parameter instead",
             DeprecationWarning, stacklevel=2)
        if ignorenonascii:
            substitute = ''
    res = []
    for letter in ustring[:]:
        try:
            replacement = MANUAL_UNICODE_MAP[letter]
        except KeyError:
            replacement = _uninormalize('NFKD', letter)[0]
            if ord(replacement) >= 2 ** 7:
                if substitute is None:
                    raise ValueError("can't deal with non-ascii based characters")
                replacement = substitute
        res.append(replacement)
    return u''.join(res)
Example #3
0
def unormalize(ustring, ignorenonascii=False):
    """replace diacritical characters with their corresponding ascii characters
    """
    res = []
    for letter in ustring[:]:
        try:
            replacement = MANUAL_UNICODE_MAP[letter]
        except KeyError:
            if ord(letter) >= 2**8:
                if ignorenonascii:
                    continue
                raise ValueError("can't deal with non-ascii based characters")
            replacement = _uninormalize('NFD', letter)[0]
        res.append(replacement)
    return u''.join(res)
Example #4
0
def unormalize(ustring, ignorenonascii=False):
    """replace diacritical characters with their corresponding ascii characters
    """
    res = []
    for letter in ustring[:]:
        try:
            replacement = MANUAL_UNICODE_MAP[letter]
        except KeyError:
            if ord(letter) >= 2**8:
                if ignorenonascii:
                    continue
                raise ValueError("can't deal with non-ascii based characters")
            replacement = _uninormalize('NFD', letter)[0]
        res.append(replacement)
    return u''.join(res)
Example #5
0
def unormalize(ustring, ignorenonascii=False):
    """replace diacritical characters with their corresponding ascii characters

    Convert the unicode string to its long normalized form (unicode character
    will be transform into several characters) and keep the first one only.
    The normal form KD (NFKD) will apply the compatibility decomposition, i.e.
    replace all compatibility characters with their equivalents.

    :see: Another project about ASCII transliterations of Unicode text
          http://pypi.python.org/pypi/Unidecode
    """
    res = []
    for letter in ustring[:]:
        try:
            replacement = MANUAL_UNICODE_MAP[letter]
        except KeyError:
            if ord(letter) >= 2**8:
                if ignorenonascii:
                    continue
                raise ValueError("can't deal with non-ascii based characters")
            replacement = _uninormalize('NFKD', letter)[0]
        res.append(replacement)
    return u''.join(res)
Example #6
0
def unormalize(ustring, ignorenonascii=False):
    """replace diacritical characters with their corresponding ascii characters

    Convert the unicode string to its long normalized form (unicode character
    will be transform into several characters) and keep the first one only.
    The normal form KD (NFKD) will apply the compatibility decomposition, i.e.
    replace all compatibility characters with their equivalents.

    :see: Another project about ASCII transliterations of Unicode text
          http://pypi.python.org/pypi/Unidecode
    """
    res = []
    for letter in ustring[:]:
        try:
            replacement = MANUAL_UNICODE_MAP[letter]
        except KeyError:
            if ord(letter) >= 2**8:
                if ignorenonascii:
                    continue
                raise ValueError("can't deal with non-ascii based characters")
            replacement = _uninormalize('NFKD', letter)[0]
        res.append(replacement)
    return u''.join(res)