def expand_long_vowels(kana_string): """ Expands whatever long vowels are possible to expand. >>> a = expand_long_vowels(unicode('すー', 'utf8')) >>> b = unicode('すう', 'utf8') >>> a == b True """ not_found = -1 kana_string = scripts.to_hiragana(kana_string) table = kana_table.KanaTable.get_cached() i = kana_string.find(u'ー', 1) while i != not_found: previous_char = kana_string[i-1] previous_script = scripts.script_type(previous_char) if previous_script == scripts.Script.Hiragana: # Ok, we can correct this one. vowel = table.to_vowel_line(previous_char) kana_string = kana_string[:i] + vowel + kana_string[i+1:] i = kana_string.find(u'ー', i+1) return kana_string
def test_fetch_scripts(self): """ Test fetching of hiragana and katakana, and converting between them. """ hiragana = u'ぁあぃいぅうぇえぉおかがきぎくぐけげこごさざしじすずせぜそぞただちぢっつづてでとどなにぬねのはばぱひびぴふぶぷへべぺほぼぽまみむめもゃやゅゆょよらりるれろゎわゐゑをんゔゕゖ' # nopep8 self.assertEqual(scripts.get_script(Script.Hiragana), hiragana) katakana = u'ァアィイゥウェエォオカガキギクグケゲコゴサザシジスズセゼソゾタダチヂッツヅテデトドナニヌネノハバパヒビピフブプヘベペホボポマミムメモャヤュユョヨラリルレロヮワヰヱヲンヴヵヶ' # nopep8 self.assertEqual(scripts.get_script(Script.Katakana), katakana) self.assertEqual(scripts.to_hiragana(katakana), hiragana) self.assertEqual(scripts.to_katakana(hiragana), katakana) return