def application_matches(cls, word, applied_str, voicing_allowed): """ Checks if a suffix applied word is matched by a surface. >>> Phonetics.application_matches(u'armudunu', u'armut', True) True >>> Phonetics.application_matches(u'armudunu', u'armut', False) False >>> Phonetics.application_matches(u'armudunu', u'armudu', True) True >>> Phonetics.application_matches(u'armudunu', u'armudu', False) True @param word: The full word (surface) @param applied_str: Suffix applied part of the word @param voicing_allowed: If voicing should be considered or ignored @type word: unicode @type applied_str: unicode @type voicing_allowed: bool @rtype: L{bool} """ if not applied_str or len(applied_str) > len(word): return False elif word == applied_str or word.startswith(applied_str): return True if voicing_allowed and word.startswith(applied_str[:-1]): last_letter_of_application = TurkishAlphabet.get_letter_for_char(applied_str[-1]) last_letter_of_word_part = TurkishAlphabet.get_letter_for_char(word[len(applied_str) - 1]) return TurkishAlphabet.voice(last_letter_of_application) == last_letter_of_word_part else: return False
def _handle_phonetics(cls, word, phonetic_attributes, form_str, lexeme_attributes=None): lexeme_attributes = lexeme_attributes or [] phonetic_attributes = phonetic_attributes or [] first_letter_of_form = TurkishAlphabet.get_letter_for_char(form_str[0]) # first apply voicing if possible if LexemeAttribute.NoVoicing not in lexeme_attributes and PhoneticAttributes.LastLetterVoicelessStop in phonetic_attributes and first_letter_of_form.vowel: voiced_letter = TurkishAlphabet.voice( TurkishAlphabet.get_letter_for_char(word[-1])) if voiced_letter: word = word[:-1] + voiced_letter.char_value # then try devoicing if PhoneticAttributes.LastLetterVoiceless in phonetic_attributes and TurkishAlphabet.devoice( first_letter_of_form): form_str = TurkishAlphabet.devoice( first_letter_of_form).char_value + form_str[1:] applied = u'' for i in range(len(form_str)): c = form_str[i] next_c = form_str[i + 1] if i + 1 < len(form_str) else None if c == '!': continue letter = TurkishAlphabet.get_letter_for_char(c) if letter.vowel and letter.upper_case_char_value == c: if c == u'A': if PhoneticAttributes.LastVowelBack in phonetic_attributes: applied += u'a' else: applied += u'e' elif c == u'I': if PhoneticAttributes.LastVowelBack in phonetic_attributes: if PhoneticAttributes.LastVowelUnrounded in phonetic_attributes or next_c == '!': applied += u'ı' else: applied += u'u' else: if PhoneticAttributes.LastVowelUnrounded in phonetic_attributes or next_c == '!': applied += u'i' else: applied += u'ü' elif c == u'O': if PhoneticAttributes.LastVowelBack in phonetic_attributes: applied += u'o' else: applied += u'ö' else: applied = applied + c return word, applied
def _handle_phonetics(cls, word, phonetic_attributes, form_str, lexeme_attributes=None): lexeme_attributes = lexeme_attributes or [] phonetic_attributes = phonetic_attributes or [] first_letter_of_form = TurkishAlphabet.get_letter_for_char(form_str[0]) # first apply voicing if possible if LexemeAttribute.NoVoicing not in lexeme_attributes and PhoneticAttributes.LastLetterVoicelessStop in phonetic_attributes and first_letter_of_form.vowel: voiced_letter = TurkishAlphabet.voice(TurkishAlphabet.get_letter_for_char(word[-1])) if voiced_letter: word = word[:-1] + voiced_letter.char_value # then try devoicing if PhoneticAttributes.LastLetterVoiceless in phonetic_attributes and TurkishAlphabet.devoice(first_letter_of_form): form_str = TurkishAlphabet.devoice(first_letter_of_form).char_value + form_str[1:] applied = u'' for i in range(len(form_str)): c = form_str[i] next_c = form_str[i + 1] if i + 1 < len(form_str) else None if c == '!': continue letter = TurkishAlphabet.get_letter_for_char(c) if letter.vowel and letter.upper_case_char_value == c: if c == u'A': if PhoneticAttributes.LastVowelBack in phonetic_attributes: applied += u'a' else: applied += u'e' elif c == u'I': if PhoneticAttributes.LastVowelBack in phonetic_attributes: if PhoneticAttributes.LastVowelUnrounded in phonetic_attributes or next_c == '!': applied += u'ı' else: applied += u'u' else: if PhoneticAttributes.LastVowelUnrounded in phonetic_attributes or next_c == '!': applied += u'i' else: applied += u'ü' elif c == u'O': if PhoneticAttributes.LastVowelBack in phonetic_attributes: applied += u'o' else: applied += u'ö' else: applied = applied + c return word, applied
def application_matches(cls, word, applied_str, voicing_allowed): """ Checks if a suffix applied word is matched by a surface. >>> Phonetics.application_matches(u'armudunu', u'armut', True) True >>> Phonetics.application_matches(u'armudunu', u'armut', False) False >>> Phonetics.application_matches(u'armudunu', u'armudu', True) True >>> Phonetics.application_matches(u'armudunu', u'armudu', False) True @param word: The full word (surface) @param applied_str: Suffix applied part of the word @param voicing_allowed: If voicing should be considered or ignored @type word: unicode @type applied_str: unicode @type voicing_allowed: bool @rtype: L{bool} """ if not applied_str or len(applied_str) > len(word): return False elif word == applied_str or word.startswith(applied_str): return True if voicing_allowed and word.startswith(applied_str[:-1]): last_letter_of_application = TurkishAlphabet.get_letter_for_char( applied_str[-1]) last_letter_of_word_part = TurkishAlphabet.get_letter_for_char( word[len(applied_str) - 1]) return TurkishAlphabet.voice( last_letter_of_application) == last_letter_of_word_part else: return False
def _generate_modified_root_nodes(cls, lexeme): if LexemeAttribute.RootChange in lexeme.attributes: special_roots = cls._handle_special_roots(lexeme) if special_roots: return special_roots modified_seq = lexeme.root original_attributes = Phonetics.calculate_phonetic_attributes_of_plain_sequence( lexeme.root) modified_attributes = Phonetics.calculate_phonetic_attributes_of_plain_sequence( lexeme.root) original_phonetic_expectations = set() modified_phonetic_expectations = set() if LexemeAttribute.Voicing in lexeme.attributes or LexemeAttribute.VoicingOpt in lexeme.attributes: last_letter = TurkishAlphabet.get_letter_for_char(modified_seq[-1]) modified_letter = TurkishAlphabet.voice(last_letter) assert modified_letter is not None if lexeme.lemma.endswith(u"nk"): modified_letter = TurkishAlphabet.L_g modified_seq = modified_seq[:-1] + modified_letter.char_value if PhoneticAttributes.LastLetterVoicelessStop in modified_attributes: modified_attributes.remove( PhoneticAttributes.LastLetterVoicelessStop) if modified_letter.continuant: if PhoneticAttributes.LastLetterNotContinuant in modified_attributes: modified_attributes.remove( PhoneticAttributes.LastLetterNotContinuant) modified_attributes.add( PhoneticAttributes.LastLetterContinuant) else: if PhoneticAttributes.LastLetterContinuant in modified_attributes: modified_attributes.remove( PhoneticAttributes.LastLetterContinuant) modified_attributes.add( PhoneticAttributes.LastLetterNotContinuant) if LexemeAttribute.VoicingOpt not in lexeme.attributes: original_phonetic_expectations.add( PhoneticExpectation.ConsonantStart) modified_phonetic_expectations.add(PhoneticExpectation.VowelStart) if LexemeAttribute.Doubling in lexeme.attributes: modified_seq = modified_seq + modified_seq[-1] original_phonetic_expectations.add( PhoneticExpectation.ConsonantStart) modified_phonetic_expectations.add(PhoneticExpectation.VowelStart) if LexemeAttribute.LastVowelDrop in lexeme.attributes: modified_seq = modified_seq[:-2] + modified_seq[-1] if lexeme.syntactic_category != SyntacticCategory.VERB: original_phonetic_expectations.add( PhoneticExpectation.ConsonantStart) modified_phonetic_expectations.add(PhoneticExpectation.VowelStart) if LexemeAttribute.InverseHarmony in lexeme.attributes: original_attributes.add(PhoneticAttributes.LastVowelFrontal) if PhoneticAttributes.LastVowelBack in original_attributes: original_attributes.remove(PhoneticAttributes.LastVowelBack) modified_attributes.add(PhoneticAttributes.LastVowelFrontal) if PhoneticAttributes.LastVowelBack in modified_attributes: modified_attributes.remove(PhoneticAttributes.LastVowelBack) if LexemeAttribute.ProgressiveVowelDrop in lexeme.attributes: modified_seq = modified_seq[:-1] if RootGenerator._has_vowel(modified_seq): modified_attributes = Phonetics.calculate_phonetic_attributes_of_plain_sequence( modified_seq) modified_phonetic_expectations.add(PhoneticExpectation.VowelStart) original_phonetic_expectations = original_phonetic_expectations or None modified_phonetic_expectations = modified_phonetic_expectations or None original = Root(lexeme.root, lexeme, original_phonetic_expectations, original_attributes) modified = Root(modified_seq, lexeme, modified_phonetic_expectations, modified_attributes) if original == modified: return [original] else: return [original, modified]
def _generate_modified_root_nodes(cls, lexeme): if LexemeAttribute.RootChange in lexeme.attributes: special_roots = cls._handle_special_roots(lexeme) if special_roots: return special_roots modified_seq = lexeme.root original_attributes = Phonetics.calculate_phonetic_attributes_of_plain_sequence(lexeme.root) modified_attributes = Phonetics.calculate_phonetic_attributes_of_plain_sequence(lexeme.root) original_phonetic_expectations = set() modified_phonetic_expectations = set() if LexemeAttribute.Voicing in lexeme.attributes or LexemeAttribute.VoicingOpt in lexeme.attributes: last_letter = TurkishAlphabet.get_letter_for_char(modified_seq[-1]) modified_letter = TurkishAlphabet.voice(last_letter) assert modified_letter is not None if lexeme.lemma.endswith(u"nk"): modified_letter = TurkishAlphabet.L_g modified_seq = modified_seq[:-1] + modified_letter.char_value if PhoneticAttributes.LastLetterVoicelessStop in modified_attributes: modified_attributes.remove(PhoneticAttributes.LastLetterVoicelessStop) if modified_letter.continuant: if PhoneticAttributes.LastLetterNotContinuant in modified_attributes : modified_attributes.remove(PhoneticAttributes.LastLetterNotContinuant) modified_attributes.add(PhoneticAttributes.LastLetterContinuant) else: if PhoneticAttributes.LastLetterContinuant in modified_attributes: modified_attributes.remove(PhoneticAttributes.LastLetterContinuant) modified_attributes.add(PhoneticAttributes.LastLetterNotContinuant) if LexemeAttribute.VoicingOpt not in lexeme.attributes: original_phonetic_expectations.add(PhoneticExpectation.ConsonantStart) modified_phonetic_expectations.add(PhoneticExpectation.VowelStart) if LexemeAttribute.Doubling in lexeme.attributes: modified_seq = modified_seq + modified_seq[-1] original_phonetic_expectations.add(PhoneticExpectation.ConsonantStart) modified_phonetic_expectations.add(PhoneticExpectation.VowelStart) if LexemeAttribute.LastVowelDrop in lexeme.attributes: modified_seq = modified_seq[:-2] + modified_seq[-1] if lexeme.syntactic_category!=SyntacticCategory.VERB: original_phonetic_expectations.add(PhoneticExpectation.ConsonantStart) modified_phonetic_expectations.add(PhoneticExpectation.VowelStart) if LexemeAttribute.InverseHarmony in lexeme.attributes: original_attributes.add(PhoneticAttributes.LastVowelFrontal) if PhoneticAttributes.LastVowelBack in original_attributes: original_attributes.remove(PhoneticAttributes.LastVowelBack) modified_attributes.add(PhoneticAttributes.LastVowelFrontal) if PhoneticAttributes.LastVowelBack in modified_attributes: modified_attributes.remove(PhoneticAttributes.LastVowelBack) if LexemeAttribute.ProgressiveVowelDrop in lexeme.attributes: modified_seq = modified_seq[:-1] if RootGenerator._has_vowel(modified_seq): modified_attributes = Phonetics.calculate_phonetic_attributes_of_plain_sequence(modified_seq) modified_phonetic_expectations.add(PhoneticExpectation.VowelStart) original_phonetic_expectations = original_phonetic_expectations or None modified_phonetic_expectations = modified_phonetic_expectations or None original = Root(lexeme.root, lexeme, original_phonetic_expectations, original_attributes) modified = Root(modified_seq, lexeme, modified_phonetic_expectations, modified_attributes) if original==modified: return [original] else: return [original, modified]