예제 #1
0
def expand_long_vowels(kana_string):
    """
    Expands whatever long vowels are possible to expand.

        >>> a = expand_long_vowels(unicode('すー', 'utf8'))
        >>> b = unicode('すう', 'utf8')
        >>> a == b
        True
    """
    not_found = -1
    kana_string = scripts.to_hiragana(kana_string)
    table = kana_table.KanaTable.get_cached()

    i = kana_string.find(u'ー', 1)
    while i != not_found:
        previous_char = kana_string[i-1]
        previous_script = scripts.script_type(previous_char)
        if previous_script == scripts.Script.Hiragana:
            # Ok, we can correct this one.
            vowel = table.to_vowel_line(previous_char)
            kana_string = kana_string[:i] + vowel + kana_string[i+1:]

        i = kana_string.find(u'ー', i+1)

    return kana_string
예제 #2
0
    def test_fetch_scripts(self):
        """
        Test fetching of hiragana and katakana, and converting between them.
        """
        hiragana = u'ぁあぃいぅうぇえぉおかがきぎくぐけげこごさざしじすずせぜそぞただちぢっつづてでとどなにぬねのはばぱひびぴふぶぷへべぺほぼぽまみむめもゃやゅゆょよらりるれろゎわゐゑをんゔゕゖ'  # nopep8
        self.assertEqual(scripts.get_script(Script.Hiragana), hiragana)
        katakana = u'ァアィイゥウェエォオカガキギクグケゲコゴサザシジスズセゼソゾタダチヂッツヅテデトドナニヌネノハバパヒビピフブプヘベペホボポマミムメモャヤュユョヨラリルレロヮワヰヱヲンヴヵヶ'  # nopep8
        self.assertEqual(scripts.get_script(Script.Katakana), katakana)

        self.assertEqual(scripts.to_hiragana(katakana), hiragana)
        self.assertEqual(scripts.to_katakana(hiragana), katakana)

        return