예제 #1
0
파일: phonetics.py 프로젝트: aliok/trnltk
    def application_matches(cls, word, applied_str, voicing_allowed):
        """
        Checks if a suffix applied word is matched by a surface.

            >>> Phonetics.application_matches(u'armudunu', u'armut', True)
            True
            >>> Phonetics.application_matches(u'armudunu', u'armut', False)
            False
            >>> Phonetics.application_matches(u'armudunu', u'armudu', True)
            True
            >>> Phonetics.application_matches(u'armudunu', u'armudu', False)
            True

        @param word: The full word (surface)
        @param applied_str: Suffix applied part of the word
        @param voicing_allowed: If voicing should be considered or ignored
        @type word: unicode
        @type applied_str: unicode
        @type voicing_allowed: bool
        @rtype: L{bool}
        """
        if not applied_str or len(applied_str) > len(word):
            return False

        elif word == applied_str or word.startswith(applied_str):
            return True

        if  voicing_allowed and word.startswith(applied_str[:-1]):
            last_letter_of_application = TurkishAlphabet.get_letter_for_char(applied_str[-1])
            last_letter_of_word_part = TurkishAlphabet.get_letter_for_char(word[len(applied_str) - 1])
            return TurkishAlphabet.voice(last_letter_of_application) == last_letter_of_word_part

        else:
            return False
예제 #2
0
    def _handle_phonetics(cls,
                          word,
                          phonetic_attributes,
                          form_str,
                          lexeme_attributes=None):
        lexeme_attributes = lexeme_attributes or []
        phonetic_attributes = phonetic_attributes or []

        first_letter_of_form = TurkishAlphabet.get_letter_for_char(form_str[0])

        # first apply voicing if possible
        if LexemeAttribute.NoVoicing not in lexeme_attributes and PhoneticAttributes.LastLetterVoicelessStop in phonetic_attributes and first_letter_of_form.vowel:
            voiced_letter = TurkishAlphabet.voice(
                TurkishAlphabet.get_letter_for_char(word[-1]))
            if voiced_letter:
                word = word[:-1] + voiced_letter.char_value

        # then try devoicing
        if PhoneticAttributes.LastLetterVoiceless in phonetic_attributes and TurkishAlphabet.devoice(
                first_letter_of_form):
            form_str = TurkishAlphabet.devoice(
                first_letter_of_form).char_value + form_str[1:]

        applied = u''

        for i in range(len(form_str)):
            c = form_str[i]
            next_c = form_str[i + 1] if i + 1 < len(form_str) else None

            if c == '!':
                continue

            letter = TurkishAlphabet.get_letter_for_char(c)
            if letter.vowel and letter.upper_case_char_value == c:
                if c == u'A':
                    if PhoneticAttributes.LastVowelBack in phonetic_attributes:
                        applied += u'a'
                    else:
                        applied += u'e'
                elif c == u'I':
                    if PhoneticAttributes.LastVowelBack in phonetic_attributes:
                        if PhoneticAttributes.LastVowelUnrounded in phonetic_attributes or next_c == '!':
                            applied += u'ı'
                        else:
                            applied += u'u'
                    else:
                        if PhoneticAttributes.LastVowelUnrounded in phonetic_attributes or next_c == '!':
                            applied += u'i'
                        else:
                            applied += u'ü'
                elif c == u'O':
                    if PhoneticAttributes.LastVowelBack in phonetic_attributes:
                        applied += u'o'
                    else:
                        applied += u'ö'

            else:
                applied = applied + c

        return word, applied
예제 #3
0
파일: phonetics.py 프로젝트: aliok/trnltk
    def _handle_phonetics(cls, word, phonetic_attributes, form_str, lexeme_attributes=None):
        lexeme_attributes = lexeme_attributes or []
        phonetic_attributes = phonetic_attributes or []

        first_letter_of_form = TurkishAlphabet.get_letter_for_char(form_str[0])

        # first apply voicing if possible
        if LexemeAttribute.NoVoicing not in lexeme_attributes and PhoneticAttributes.LastLetterVoicelessStop in phonetic_attributes and first_letter_of_form.vowel:
            voiced_letter = TurkishAlphabet.voice(TurkishAlphabet.get_letter_for_char(word[-1]))
            if voiced_letter:
                word = word[:-1] + voiced_letter.char_value

        # then try devoicing
        if PhoneticAttributes.LastLetterVoiceless in phonetic_attributes and TurkishAlphabet.devoice(first_letter_of_form):
            form_str = TurkishAlphabet.devoice(first_letter_of_form).char_value + form_str[1:]

        applied = u''

        for i in range(len(form_str)):
            c = form_str[i]
            next_c = form_str[i + 1] if i + 1 < len(form_str) else None

            if c == '!':
                continue

            letter = TurkishAlphabet.get_letter_for_char(c)
            if letter.vowel and letter.upper_case_char_value == c:
                if c == u'A':
                    if PhoneticAttributes.LastVowelBack in phonetic_attributes:
                        applied += u'a'
                    else:
                        applied += u'e'
                elif c == u'I':
                    if PhoneticAttributes.LastVowelBack in phonetic_attributes:
                        if PhoneticAttributes.LastVowelUnrounded in phonetic_attributes or next_c == '!':
                            applied += u'ı'
                        else:
                            applied += u'u'
                    else:
                        if PhoneticAttributes.LastVowelUnrounded in phonetic_attributes or next_c == '!':
                            applied += u'i'
                        else:
                            applied += u'ü'
                elif c == u'O':
                    if PhoneticAttributes.LastVowelBack in phonetic_attributes:
                        applied += u'o'
                    else:
                        applied += u'ö'

            else:
                applied = applied + c

        return word, applied
예제 #4
0
    def application_matches(cls, word, applied_str, voicing_allowed):
        """
        Checks if a suffix applied word is matched by a surface.

            >>> Phonetics.application_matches(u'armudunu', u'armut', True)
            True
            >>> Phonetics.application_matches(u'armudunu', u'armut', False)
            False
            >>> Phonetics.application_matches(u'armudunu', u'armudu', True)
            True
            >>> Phonetics.application_matches(u'armudunu', u'armudu', False)
            True

        @param word: The full word (surface)
        @param applied_str: Suffix applied part of the word
        @param voicing_allowed: If voicing should be considered or ignored
        @type word: unicode
        @type applied_str: unicode
        @type voicing_allowed: bool
        @rtype: L{bool}
        """
        if not applied_str or len(applied_str) > len(word):
            return False

        elif word == applied_str or word.startswith(applied_str):
            return True

        if voicing_allowed and word.startswith(applied_str[:-1]):
            last_letter_of_application = TurkishAlphabet.get_letter_for_char(
                applied_str[-1])
            last_letter_of_word_part = TurkishAlphabet.get_letter_for_char(
                word[len(applied_str) - 1])
            return TurkishAlphabet.voice(
                last_letter_of_application) == last_letter_of_word_part

        else:
            return False
예제 #5
0
    def _generate_modified_root_nodes(cls, lexeme):
        if LexemeAttribute.RootChange in lexeme.attributes:
            special_roots = cls._handle_special_roots(lexeme)
            if special_roots:
                return special_roots

        modified_seq = lexeme.root

        original_attributes = Phonetics.calculate_phonetic_attributes_of_plain_sequence(
            lexeme.root)
        modified_attributes = Phonetics.calculate_phonetic_attributes_of_plain_sequence(
            lexeme.root)
        original_phonetic_expectations = set()
        modified_phonetic_expectations = set()

        if LexemeAttribute.Voicing in lexeme.attributes or LexemeAttribute.VoicingOpt in lexeme.attributes:
            last_letter = TurkishAlphabet.get_letter_for_char(modified_seq[-1])
            modified_letter = TurkishAlphabet.voice(last_letter)
            assert modified_letter is not None
            if lexeme.lemma.endswith(u"nk"):
                modified_letter = TurkishAlphabet.L_g
            modified_seq = modified_seq[:-1] + modified_letter.char_value
            if PhoneticAttributes.LastLetterVoicelessStop in modified_attributes:
                modified_attributes.remove(
                    PhoneticAttributes.LastLetterVoicelessStop)
            if modified_letter.continuant:
                if PhoneticAttributes.LastLetterNotContinuant in modified_attributes:
                    modified_attributes.remove(
                        PhoneticAttributes.LastLetterNotContinuant)
                modified_attributes.add(
                    PhoneticAttributes.LastLetterContinuant)
            else:
                if PhoneticAttributes.LastLetterContinuant in modified_attributes:
                    modified_attributes.remove(
                        PhoneticAttributes.LastLetterContinuant)
                modified_attributes.add(
                    PhoneticAttributes.LastLetterNotContinuant)
            if LexemeAttribute.VoicingOpt not in lexeme.attributes:
                original_phonetic_expectations.add(
                    PhoneticExpectation.ConsonantStart)
            modified_phonetic_expectations.add(PhoneticExpectation.VowelStart)

        if LexemeAttribute.Doubling in lexeme.attributes:
            modified_seq = modified_seq + modified_seq[-1]
            original_phonetic_expectations.add(
                PhoneticExpectation.ConsonantStart)
            modified_phonetic_expectations.add(PhoneticExpectation.VowelStart)

        if LexemeAttribute.LastVowelDrop in lexeme.attributes:
            modified_seq = modified_seq[:-2] + modified_seq[-1]
            if lexeme.syntactic_category != SyntacticCategory.VERB:
                original_phonetic_expectations.add(
                    PhoneticExpectation.ConsonantStart)
            modified_phonetic_expectations.add(PhoneticExpectation.VowelStart)

        if LexemeAttribute.InverseHarmony in lexeme.attributes:
            original_attributes.add(PhoneticAttributes.LastVowelFrontal)
            if PhoneticAttributes.LastVowelBack in original_attributes:
                original_attributes.remove(PhoneticAttributes.LastVowelBack)
            modified_attributes.add(PhoneticAttributes.LastVowelFrontal)
            if PhoneticAttributes.LastVowelBack in modified_attributes:
                modified_attributes.remove(PhoneticAttributes.LastVowelBack)

        if LexemeAttribute.ProgressiveVowelDrop in lexeme.attributes:
            modified_seq = modified_seq[:-1]
            if RootGenerator._has_vowel(modified_seq):
                modified_attributes = Phonetics.calculate_phonetic_attributes_of_plain_sequence(
                    modified_seq)
            modified_phonetic_expectations.add(PhoneticExpectation.VowelStart)

        original_phonetic_expectations = original_phonetic_expectations or None
        modified_phonetic_expectations = modified_phonetic_expectations or None

        original = Root(lexeme.root, lexeme, original_phonetic_expectations,
                        original_attributes)
        modified = Root(modified_seq, lexeme, modified_phonetic_expectations,
                        modified_attributes)

        if original == modified:
            return [original]
        else:
            return [original, modified]
예제 #6
0
    def _generate_modified_root_nodes(cls, lexeme):
        if LexemeAttribute.RootChange in lexeme.attributes:
            special_roots = cls._handle_special_roots(lexeme)
            if special_roots:
                return special_roots

        modified_seq = lexeme.root

        original_attributes = Phonetics.calculate_phonetic_attributes_of_plain_sequence(lexeme.root)
        modified_attributes = Phonetics.calculate_phonetic_attributes_of_plain_sequence(lexeme.root)
        original_phonetic_expectations = set()
        modified_phonetic_expectations = set()

        if LexemeAttribute.Voicing in lexeme.attributes or LexemeAttribute.VoicingOpt in lexeme.attributes:
            last_letter = TurkishAlphabet.get_letter_for_char(modified_seq[-1])
            modified_letter = TurkishAlphabet.voice(last_letter)
            assert modified_letter is not None
            if lexeme.lemma.endswith(u"nk"):
                modified_letter = TurkishAlphabet.L_g
            modified_seq = modified_seq[:-1] + modified_letter.char_value
            if PhoneticAttributes.LastLetterVoicelessStop in modified_attributes:
                modified_attributes.remove(PhoneticAttributes.LastLetterVoicelessStop)
            if modified_letter.continuant:
                if PhoneticAttributes.LastLetterNotContinuant in modified_attributes :
                    modified_attributes.remove(PhoneticAttributes.LastLetterNotContinuant)
                modified_attributes.add(PhoneticAttributes.LastLetterContinuant)
            else:
                if PhoneticAttributes.LastLetterContinuant in modified_attributes:
                    modified_attributes.remove(PhoneticAttributes.LastLetterContinuant)
                modified_attributes.add(PhoneticAttributes.LastLetterNotContinuant)
            if LexemeAttribute.VoicingOpt not in lexeme.attributes:
                original_phonetic_expectations.add(PhoneticExpectation.ConsonantStart)
            modified_phonetic_expectations.add(PhoneticExpectation.VowelStart)

        if LexemeAttribute.Doubling in lexeme.attributes:
            modified_seq = modified_seq + modified_seq[-1]
            original_phonetic_expectations.add(PhoneticExpectation.ConsonantStart)
            modified_phonetic_expectations.add(PhoneticExpectation.VowelStart)

        if LexemeAttribute.LastVowelDrop in lexeme.attributes:
            modified_seq = modified_seq[:-2] + modified_seq[-1]
            if lexeme.syntactic_category!=SyntacticCategory.VERB:
                original_phonetic_expectations.add(PhoneticExpectation.ConsonantStart)
            modified_phonetic_expectations.add(PhoneticExpectation.VowelStart)

        if LexemeAttribute.InverseHarmony in lexeme.attributes:
            original_attributes.add(PhoneticAttributes.LastVowelFrontal)
            if PhoneticAttributes.LastVowelBack in original_attributes:
                original_attributes.remove(PhoneticAttributes.LastVowelBack)
            modified_attributes.add(PhoneticAttributes.LastVowelFrontal)
            if PhoneticAttributes.LastVowelBack in modified_attributes:
                modified_attributes.remove(PhoneticAttributes.LastVowelBack)

        if LexemeAttribute.ProgressiveVowelDrop in lexeme.attributes:
            modified_seq = modified_seq[:-1]
            if RootGenerator._has_vowel(modified_seq):
                modified_attributes = Phonetics.calculate_phonetic_attributes_of_plain_sequence(modified_seq)
            modified_phonetic_expectations.add(PhoneticExpectation.VowelStart)


        original_phonetic_expectations = original_phonetic_expectations or None
        modified_phonetic_expectations = modified_phonetic_expectations or None

        original = Root(lexeme.root, lexeme, original_phonetic_expectations, original_attributes)
        modified = Root(modified_seq, lexeme, modified_phonetic_expectations, modified_attributes)

        if original==modified:
            return [original]
        else:
            return [original, modified]