def test_1(self): """ TESTSDCharacterStringGRC.test_1 """ dictionary = {} reversed_dict = invertdict( dictionary ) reversed_dict2 = invertdict( reversed_dict ) self.assertEqual( reversed_dict, reversed_dict2 )
";": "?", "!": "!", "·": ";", '"': '"', "'": "'", "—": "_", ":": ":", "\n": "\n", "\r": "\r", "\t": "\t", "‘": "<2018>", "’": "<2019>", "᾽": "<1FBD>", } LOWER_CASE_INVERSED = invertdict(LOWER_CASE, accept_duplicated_values=True) LOWER_CASE_INVERSED["b"] = "β" LOWER_CASE_INVERSED["s"] = "σ" UPPER_CASE_INVERSED = invertdict(UPPER_CASE) OTHER_SYMBOLS_INVERSED = invertdict(OTHER_SYMBOLS) PUNCTUATION_INVERSED = invertdict(PUNCTUATION) DIACRITICS = { "βαρεῖα": "\\", "ὀξεῖα": "/", "περισπωμένη": "/\\", "μακρόν": "_", "βραχύ": "-", "ψιλὸν": ")", "δασὺ": "(", "ὑπογεγραμμένη": "+i",
'MARK GUG RTAGS GYON' : "<", 'MARK GUG RTAGS GYAS' : ">", 'MARK ANG KHANG GYON' : "(", 'MARK ANG KHANG GYAS' : ")", # = Sanskrit avagraha (अवग्रह) = ऽ 'MARK PALUTA' : "&", } DIACRITICS = { # = Sanskrit visarga : 'SIGN RNAM BCAD' : 'H', # = srog med = Sanskrit virama 'MARK HALANTA' : '?', # = Sanskrit anusvara 'SIGN RJES SU NGA RO' : 'M', # = Sanskrit candrabindu 'SIGN NYI ZLA NAA DA' : '~M`', 'SIGN SNA LDAN' : '~M', } CONSONANTS_INVERSED = invertdict(CONSONANTS) VOWELS_INVERSED = invertdict(VOWELS) OTHER_SYMBOLS_INVERSED = invertdict(OTHER_SYMBOLS) PUNCTUATION_INVERSED = invertdict(PUNCTUATION) DIACRITICS_INVERSED = invertdict(DIACRITICS)
' ' : ' ', '.' : '<.>', '\n' : '\n', '\r' : '\r', '\t' : '\t', } DIACRITICS = { 'DEVANAGARI STRESS SIGN UDATTA' : chr(0x0301), 'DEVANAGARI STRESS SIGN ANUDATTA' : chr(0x0331), 'DEVANAGARI SIGN CANDRABINDU' : 'm̐', # 006D 0310 'DEVANAGARI SIGN ANUSVARA' : 'ṁ', # 1E41 } CONSONANTS_INVERSED = invertdict(CONSONANTS) CONSONANTS_WITH_NUKTA_INVERSED = invertdict(CONSONANTS_WITH_NUKTA) CONSONANTS_WITH_NUKTA_TO_CONSONANT_INVERSED = invertdict(CONSONANTS_WITH_NUKTA_TO_CONSONANT) VOWELS_INVERSED = invertdict(VOWELS) VOWELS_IN_HIATUS_INVERSED = invertdict(VOWELS_IN_HIATUS) OTHER_SYMBOLS_INVERSED = invertdict(OTHER_SYMBOLS) PUNCTUATION_INVERSED = invertdict(PUNCTUATION) DIACRITICS_INVERSED = invertdict(DIACRITICS) ################################################################################ # transliteration's patterns : # PATTERN is used to cut one complex characters into its elements. # PATTERN2 is used to cut several complex characters into a list of complex characters. ################################################################################ # in order to build the pattern strings for the regexes we have to SORT the
} # OTHER_SYMBOLS[base_char] = transliterated character OTHER_SYMBOLS = { '0' : '0', '1' : '1', '2' : '2', '3' : '3', '4' : '4', '5' : '5', '6' : '6', '7' : '7', '8' : '8', '9' : '9', } OTHER_SYMBOLS_INVERSED = invertdict( OTHER_SYMBOLS ) # PUNCTUATION[base_char] = transliterated character # # ABOUT 'weird characters' : some characters are defined in this table only in order to pass tests. PUNCTUATION = { ')' : ')', '(' : '(', '[' : '[', ']' : ']', '{' : '{', '}' : '}', ' ' : ' ', '\n' : '\n', '\r' : '\r', '\t' : '\t',
# 'DIGIT HALF HEIGHT' : "\\u0F33", # 'DIGIT HALF NINE' : "\\u0F34", # = Sanskrit avagraha (अवग्रह) = ऽ 'MARK PALUTA' : "ऽ", } PUNCTUATION = { 'MARK INTERSYLLABIC TSHEG' : " ", 'MARK SHAD' : chr(0x0964), # = Sanskrit danda } DIACRITICS = { 'SIGN RNAM BCAD' : chr(0x0903), 'MARK HALANTA' : chr(0x094D), 'SIGN RJES SU NGA RO' : chr(0x0902), # 'SIGN NYI ZLA NAA DA' : '???', 'SIGN SNA LDAN' : chr(0x0901), } CONSONANTS_INVERSED = invertdict(CONSONANTS) DEPENDENT_VOWELS_INVERSED = invertdict(DEPENDENT_VOWELS) INDEPENDENT_VOWELS_INVERSED = invertdict(INDEPENDENT_VOWELS) OTHER_SYMBOLS_INVERSED = invertdict(OTHER_SYMBOLS) PUNCTUATION_INVERSED = invertdict(PUNCTUATION) DIACRITICS_INVERSED = invertdict(DIACRITICS)
'\n' : '\n', # (???) see above '\r' : '\r', # (???) see above '\t' : '\t', # (???) see above } DIACRITICS = { 'DEVANAGARI SIGN INVERTED CANDRABINDU' : '~.N', # (???) see above 'DEVANAGARI SIGN CANDRABINDU' : '.N', 'DEVANAGARI SIGN ANUSVARA' : 'M', 'DEVANAGARI STRESS SIGN UDATTA' : "\\'", 'DEVANAGARI STRESS SIGN ANUDATTA' : '\\_', 'DEVANAGARI GRAVE ACCENT' : "<GRAVE ACCENT>", # (???) see above 'DEVANAGARI ACUTE ACCENT' : "<ACUTE ACCENT>", # (???) see above } CONSONANTS_INVERSED = invertdict(CONSONANTS) CONSONANTS_URDU_INVERSED = invertdict(CONSONANTS_URDU) VOWELS_INVERSED = invertdict(VOWELS) VOWELS_IN_HIATUS_INVERSED = invertdict(VOWELS_IN_HIATUS) OTHER_SYMBOLS_INVERSED = invertdict(OTHER_SYMBOLS) PUNCTUATION_INVERSED = invertdict(PUNCTUATION) DIACRITICS_INVERSED = invertdict(DIACRITICS) URDU_CONSONANT_2_CONSONANT_INVERSED = invertdict( URDU_CONSONANT_2_CONSONANT ) ################################################################################ # transliteration's patterns : # PATTERN is used to cut one complex characters into its elements. # PATTERN2 is used to cut several complex characters into a list of complex characters. ################################################################################ # in order to build the pattern strings for the regexes we have to SORT the
' ' : ' ', '.' : '.', ',' : ',', ';' : '?', '!' : '!', '·' : ';', '"' : '"', "'" : "'", "—" : "_", ":" : ":", '\n' : '\n', '\r' : '\r', '\t' : '\t', } CHOONPU_INVERSED = invertdict(CHOONPU) HIRAGANA_INVERSED = invertdict(HIRAGANA) HIRAGANA_DAKUTEN_INVERSED = invertdict(HIRAGANA_DAKUTEN) HIRAGANA_HANDAKUTEN_INVERSED = invertdict(HIRAGANA_HANDAKUTEN) KATAKANA_INVERSED = invertdict(KATAKANA) KATAKANA_DAKUTEN_INVERSED = invertdict(KATAKANA_DAKUTEN) KATAKANA_HANDAKUTEN_INVERSED = invertdict(KATAKANA_HANDAKUTEN) OTHER_SYMBOLS_INVERSED = invertdict(OTHER_SYMBOLS) PUNCTUATION_INVERSED = invertdict(PUNCTUATION) # Be carefull : order matters, hence the use of an OrderedDict object. COMPOSED_TRANSCRIPTIONS = OrderedDict(( ("shi[-]ya" , "sha"), ("shi[-]yu" , "shu"), ("shi[-]yo" , "sho"),