def is_combining_remove(code_point): '''Check whether this is a combining character which should be listed in the section of the translit_combining file where combining characters are replaced by empty strings. We ignore combining characters from many scripts here because the original translit_combining file didn’t do this for the combining characters from these scripts either and I am not sure yet whether this would be useful to do for all combining characters or not. For the moment I think it is better to keep close to the spirit of the original file. ''' if not unicode_utils.is_combining(code_point): return False name = unicode_utils.UNICODE_ATTRIBUTES[code_point]['name'] for substring in ('DEVANAGARI', 'BENGALI', 'CYRILLIC', 'SYRIAC', 'THAANA', 'NKO', 'GURMUKHI', 'TAMIL', 'GUJARATI', 'ORIYA', 'TELUGU', 'KANNADA', 'MALAYALAM', 'SINHALA', 'THAI', 'LAO', 'TIBETAN', 'MYANMAR', 'ETHIOPIC', 'TAGALOG', 'HANUNOO', 'BUHID', 'TAGBANWA', 'KHMER', 'MONGOLIAN', 'LIMBU', 'NEW TAI LUE', 'BUGINESE', 'BALINESE', 'SUNDANESE', 'LEPCHA', 'IDEOGRAPHIC', 'HANGUL', 'SYLOTI', 'SAURASHTRA', 'KAYAH', 'REJANG', 'CHAM', 'VARIATION SELECTOR', 'KHAROSHTHI', 'MUSICAL SYMBOL', 'SAMARITAN', 'MANDAIC', 'TAI THAM', 'BATAK', 'VEDIC', 'COPTIC', 'TIFINAGH', 'BAMUM', 'JAVANESE', 'TAI VIET', 'MEETEI', 'MANICHAEAN', 'BRAHMI', 'KAITHI', 'CHAKMA', 'MAHAJANI', 'SHARADA', 'KHOJKI', 'KHUDAWADI', 'GRANTHA', 'TIRHUTA', 'SIDDHAM', 'MODI VOWEL', 'MODI SIGN', 'TAKRI', 'BASSA VAH', 'PAHAWH HMONG', 'MIAO', 'DUPLOYAN', 'MENDE KIKAKUI', 'AHOM', 'SIGNWRITING'): if substring in name: return False return True
def is_combining_remove(code_point): '''Check whether this is a combining character which should be listed in the section of the translit_combining file where combining characters are replaced by empty strings. We ignore combining characters from many scripts here because the original translit_combining file didn’t do this for the combining characters from these scripts either and I am not sure yet whether this would be useful to do for all combining characters or not. For the moment I think it is better to keep close to the spirit of the original file. ''' if not unicode_utils.is_combining(code_point): return False name = unicode_utils.UNICODE_ATTRIBUTES[code_point]['name'] for substring in ('DEVANAGARI', 'BENGALI', 'CYRILLIC', 'SYRIAC', 'THAANA', 'NKO', 'GURMUKHI', 'TAMIL', 'GUJARATI', 'ORIYA', 'TELUGU', 'KANNADA', 'MALAYALAM', 'SINHALA', 'THAI', 'LAO', 'TIBETAN', 'MYANMAR', 'ETHIOPIC', 'TAGALOG', 'HANUNOO', 'BUHID', 'TAGBANWA', 'KHMER', 'MONGOLIAN', 'LIMBU', 'NEW TAI LUE', 'BUGINESE', 'BALINESE', 'SUNDANESE', 'LEPCHA', 'IDEOGRAPHIC', 'HANGUL', 'SYLOTI', 'SAURASHTRA', 'KAYAH', 'REJANG', 'CHAM', 'VARIATION SELECTOR', 'KHAROSHTHI', 'MUSICAL SYMBOL', 'SAMARITAN', 'MANDAIC', 'TAI THAM', 'BATAK', 'VEDIC', 'COPTIC', 'TIFINAGH', 'BAMUM', 'JAVANESE', 'TAI VIET', 'MEETEI', 'MANICHAEAN', 'BRAHMI', 'KAITHI', 'CHAKMA', 'MAHAJANI', 'SHARADA', 'KHOJKI', 'KHUDAWADI', 'GRANTHA', 'TIRHUTA', 'SIDDHAM', 'MODI VOWEL', 'MODI SIGN', 'TAKRI', 'BASSA VAH', 'PAHAWH HMONG', 'MIAO', 'DUPLOYAN', 'MENDE KIKAKUI', 'AHOM', 'SIGNWRITING' ): if substring in name: return False return True
def is_combining_remove(code_point): """Check whether this is a combining character which should be listed in the section of the translit_combining file where combining characters are replaced by empty strings. We ignore combining characters from many scripts here because the original translit_combining file didn’t do this for the combining characters from these scripts either and I am not sure yet whether this would be useful to do for all combining characters or not. For the moment I think it is better to keep close to the spirit of the original file. """ if not unicode_utils.is_combining(code_point): return False name = unicode_utils.UNICODE_ATTRIBUTES[code_point]["name"] for substring in ( "DEVANAGARI", "BENGALI", "CYRILLIC", "SYRIAC", "THAANA", "NKO", "GURMUKHI", "TAMIL", "GUJARATI", "ORIYA", "TELUGU", "KANNADA", "MALAYALAM", "SINHALA", "THAI", "LAO", "TIBETAN", "MYANMAR", "ETHIOPIC", "TAGALOG", "HANUNOO", "BUHID", "TAGBANWA", "KHMER", "MONGOLIAN", "LIMBU", "NEW TAI LUE", "BUGINESE", "BALINESE", "SUNDANESE", "LEPCHA", "IDEOGRAPHIC", "HANGUL", "SYLOTI", "SAURASHTRA", "KAYAH", "REJANG", "CHAM", "VARIATION SELECTOR", "KHAROSHTHI", "MUSICAL SYMBOL", "SAMARITAN", "MANDAIC", "TAI THAM", "BATAK", "VEDIC", "COPTIC", "TIFINAGH", "BAMUM", "JAVANESE", "TAI VIET", "MEETEI", "MANICHAEAN", "BRAHMI", "KAITHI", "CHAKMA", "MAHAJANI", "SHARADA", "KHOJKI", "KHUDAWADI", "GRANTHA", "TIRHUTA", "SIDDHAM", "MODI VOWEL", "MODI SIGN", "TAKRI", "BASSA VAH", "PAHAWH HMONG", "MIAO", "DUPLOYAN", "MENDE KIKAKUI", "AHOM", "SIGNWRITING", ): if substring in name: return False return True