Exemple #1
0
def is_combining_remove(code_point):
    '''Check whether this is a combining character which should be listed
    in the section of the translit_combining file where combining
    characters are replaced by empty strings.

    We ignore combining characters from many scripts here because
    the original translit_combining file didn’t do this for the
    combining characters from these scripts either and I am not
    sure yet whether this would be useful to do for all combining
    characters or not. For the moment I think it is better to keep
    close to the spirit of the original file.
    '''
    if not unicode_utils.is_combining(code_point):
        return False
    name = unicode_utils.UNICODE_ATTRIBUTES[code_point]['name']
    for substring in ('DEVANAGARI', 'BENGALI', 'CYRILLIC', 'SYRIAC', 'THAANA',
                      'NKO', 'GURMUKHI', 'TAMIL', 'GUJARATI', 'ORIYA',
                      'TELUGU', 'KANNADA', 'MALAYALAM', 'SINHALA', 'THAI',
                      'LAO', 'TIBETAN', 'MYANMAR', 'ETHIOPIC', 'TAGALOG',
                      'HANUNOO', 'BUHID', 'TAGBANWA', 'KHMER', 'MONGOLIAN',
                      'LIMBU', 'NEW TAI LUE', 'BUGINESE', 'BALINESE',
                      'SUNDANESE', 'LEPCHA', 'IDEOGRAPHIC', 'HANGUL', 'SYLOTI',
                      'SAURASHTRA', 'KAYAH', 'REJANG', 'CHAM',
                      'VARIATION SELECTOR', 'KHAROSHTHI', 'MUSICAL SYMBOL',
                      'SAMARITAN', 'MANDAIC', 'TAI THAM', 'BATAK', 'VEDIC',
                      'COPTIC', 'TIFINAGH', 'BAMUM', 'JAVANESE', 'TAI VIET',
                      'MEETEI', 'MANICHAEAN', 'BRAHMI', 'KAITHI', 'CHAKMA',
                      'MAHAJANI', 'SHARADA', 'KHOJKI', 'KHUDAWADI', 'GRANTHA',
                      'TIRHUTA', 'SIDDHAM', 'MODI VOWEL', 'MODI SIGN', 'TAKRI',
                      'BASSA VAH', 'PAHAWH HMONG', 'MIAO', 'DUPLOYAN',
                      'MENDE KIKAKUI', 'AHOM', 'SIGNWRITING'):
        if substring in name:
            return False
    return True
def is_combining_remove(code_point):
    '''Check whether this is a combining character which should be listed
    in the section of the translit_combining file where combining
    characters are replaced by empty strings.

    We ignore combining characters from many scripts here because
    the original translit_combining file didn’t do this for the
    combining characters from these scripts either and I am not
    sure yet whether this would be useful to do for all combining
    characters or not. For the moment I think it is better to keep
    close to the spirit of the original file.
    '''
    if not unicode_utils.is_combining(code_point):
        return False
    name = unicode_utils.UNICODE_ATTRIBUTES[code_point]['name']
    for substring in ('DEVANAGARI',
                      'BENGALI',
                      'CYRILLIC',
                      'SYRIAC',
                      'THAANA',
                      'NKO',
                      'GURMUKHI',
                      'TAMIL',
                      'GUJARATI',
                      'ORIYA',
                      'TELUGU',
                      'KANNADA',
                      'MALAYALAM',
                      'SINHALA',
                      'THAI',
                      'LAO',
                      'TIBETAN',
                      'MYANMAR',
                      'ETHIOPIC',
                      'TAGALOG',
                      'HANUNOO',
                      'BUHID',
                      'TAGBANWA',
                      'KHMER',
                      'MONGOLIAN',
                      'LIMBU',
                      'NEW TAI LUE',
                      'BUGINESE',
                      'BALINESE',
                      'SUNDANESE',
                      'LEPCHA',
                      'IDEOGRAPHIC',
                      'HANGUL',
                      'SYLOTI',
                      'SAURASHTRA',
                      'KAYAH',
                      'REJANG',
                      'CHAM',
                      'VARIATION SELECTOR',
                      'KHAROSHTHI',
                      'MUSICAL SYMBOL',
                      'SAMARITAN',
                      'MANDAIC',
                      'TAI THAM',
                      'BATAK',
                      'VEDIC',
                      'COPTIC',
                      'TIFINAGH',
                      'BAMUM',
                      'JAVANESE',
                      'TAI VIET',
                      'MEETEI',
                      'MANICHAEAN',
                      'BRAHMI',
                      'KAITHI',
                      'CHAKMA',
                      'MAHAJANI',
                      'SHARADA',
                      'KHOJKI',
                      'KHUDAWADI',
                      'GRANTHA',
                      'TIRHUTA',
                      'SIDDHAM',
                      'MODI VOWEL',
                      'MODI SIGN',
                      'TAKRI',
                      'BASSA VAH',
                      'PAHAWH HMONG',
                      'MIAO',
                      'DUPLOYAN',
                      'MENDE KIKAKUI',
                      'AHOM',
                      'SIGNWRITING'
    ):
        if substring in name:
            return False
    return True
def is_combining_remove(code_point):
    """Check whether this is a combining character which should be listed
    in the section of the translit_combining file where combining
    characters are replaced by empty strings.

    We ignore combining characters from many scripts here because
    the original translit_combining file didn’t do this for the
    combining characters from these scripts either and I am not
    sure yet whether this would be useful to do for all combining
    characters or not. For the moment I think it is better to keep
    close to the spirit of the original file.
    """
    if not unicode_utils.is_combining(code_point):
        return False
    name = unicode_utils.UNICODE_ATTRIBUTES[code_point]["name"]
    for substring in (
        "DEVANAGARI",
        "BENGALI",
        "CYRILLIC",
        "SYRIAC",
        "THAANA",
        "NKO",
        "GURMUKHI",
        "TAMIL",
        "GUJARATI",
        "ORIYA",
        "TELUGU",
        "KANNADA",
        "MALAYALAM",
        "SINHALA",
        "THAI",
        "LAO",
        "TIBETAN",
        "MYANMAR",
        "ETHIOPIC",
        "TAGALOG",
        "HANUNOO",
        "BUHID",
        "TAGBANWA",
        "KHMER",
        "MONGOLIAN",
        "LIMBU",
        "NEW TAI LUE",
        "BUGINESE",
        "BALINESE",
        "SUNDANESE",
        "LEPCHA",
        "IDEOGRAPHIC",
        "HANGUL",
        "SYLOTI",
        "SAURASHTRA",
        "KAYAH",
        "REJANG",
        "CHAM",
        "VARIATION SELECTOR",
        "KHAROSHTHI",
        "MUSICAL SYMBOL",
        "SAMARITAN",
        "MANDAIC",
        "TAI THAM",
        "BATAK",
        "VEDIC",
        "COPTIC",
        "TIFINAGH",
        "BAMUM",
        "JAVANESE",
        "TAI VIET",
        "MEETEI",
        "MANICHAEAN",
        "BRAHMI",
        "KAITHI",
        "CHAKMA",
        "MAHAJANI",
        "SHARADA",
        "KHOJKI",
        "KHUDAWADI",
        "GRANTHA",
        "TIRHUTA",
        "SIDDHAM",
        "MODI VOWEL",
        "MODI SIGN",
        "TAKRI",
        "BASSA VAH",
        "PAHAWH HMONG",
        "MIAO",
        "DUPLOYAN",
        "MENDE KIKAKUI",
        "AHOM",
        "SIGNWRITING",
    ):
        if substring in name:
            return False
    return True