Beispiel #1
0
 def allomorph(self,
               word,
               morph,
               tolerance_style=DEFAULT_TOLERANCE_STYLE,
               guess_coda=DEFAULT_GUESS_CODA):
     suffix = self.I_PATTERN.sub(u'', morph)
     coda = guess_coda(word)
     next_onset, next_nucleus, next_coda = split_phonemes(suffix[0])
     if next_onset == u'ㅇ':
         if next_nucleus == u'ㅣ':
             # No allomorphs when a morph starts with "이" and has a coda.
             return suffix
         mapping = None
         if coda == u'' and next_nucleus in self.J_INJECTIONS:
             # Squeeze "이어" or "이에" to "여" or "예"
             # after a word which ends with a nucleus.
             mapping = self.J_INJECTIONS
         elif coda != u'' and next_nucleus in self.J_INJECTIONS.inv:
             # Lengthen "여" or "예" to "이어" or "이에"
             # after a word which ends with a consonant.
             mapping = self.J_INJECTIONS.inv
         if mapping is not None:
             next_nucleus = mapping[next_nucleus]
             next_letter = join_phonemes(u'ㅇ', next_nucleus, next_coda)
             suffix = next_letter + suffix[1:]
     if coda is None:
         morph = self.tolerance(tolerance_style)
     else:
         morph = self.rule(coda)
     return morph + suffix
Beispiel #2
0
def test_join_phonemes():
    assert join_phonemes(u'ㅅ', u'ㅓ', u'ㅂ') == u'섭'
    assert join_phonemes((u'ㅅ', u'ㅓ', u'ㅂ')) == u'섭'
    assert join_phonemes(u'ㅊ', u'ㅠ') == u'츄'
    assert join_phonemes(u'ㅊ', u'ㅠ', u'') == u'츄'
    assert join_phonemes((u'ㅊ', u'ㅠ')) == u'츄'
    with pytest.raises(TypeError):
        join_phonemes(u'ㄷ', u'ㅏ', u'ㄹ', u'ㄱ')
Beispiel #3
0
def test_join_phonemes():
    assert join_phonemes(u'ㅅ', u'ㅓ', u'ㅂ') == u'섭'
    assert join_phonemes((u'ㅅ', u'ㅓ', u'ㅂ')) == u'섭'
    assert join_phonemes(u'ㅊ', u'ㅠ') == u'츄'
    assert join_phonemes(u'ㅊ', u'ㅠ', u'') == u'츄'
    assert join_phonemes((u'ㅊ', u'ㅠ')) == u'츄'
    with pytest.raises(TypeError):
        join_phonemes(u'ㄷ', u'ㅏ', u'ㄹ', u'ㄱ')
Beispiel #4
0
 def regex_pattern(self):
     if self.final:
         return u'^(?:%s)$' % u'|'.join(re.escape(f) for f in self.morphs)
     patterns = []
     for morph in self.morphs:
         try:
             onset, nucleus, coda = split_phonemes(morph[-1])
         except ValueError:
             coda = None
         if coda == u'':
             start = morph[-1]
             end = join_phonemes(onset, nucleus, u'ㅎ')
             pattern = re.escape(morph[:-1]) + u'[%s-%s]' % (start, end)
         else:
             pattern = re.escape(morph)
         patterns.append(pattern)
     return u'^(?:%s)' % u'|'.join(u'(%s)' % p for p in patterns)