def find_roots_for_partial_input(self, partial_input, whole_surface=None): """ @type partial_input: unicode @type whole_surface: unicode @rtype: list of Root """ assert partial_input and whole_surface assert len(partial_input) <= len(whole_surface) assert whole_surface.startswith(partial_input) if len(whole_surface) == len(partial_input): assert whole_surface == partial_input root = partial_input lemma = root lemma_root = lemma syntactic_category = SyntacticCategory.NOUN secondary_syntactic_category = None lexeme_attributes = set() lexeme = DynamicLexeme(lemma, lemma_root, syntactic_category, secondary_syntactic_category, lexeme_attributes) phonetic_expectations = set() phonetic_attributes = Phonetics.calculate_phonetic_attributes(partial_input, lexeme_attributes) no_orthographics_root = DynamicRoot(root, lexeme, phonetic_expectations, phonetic_attributes) if len(partial_input) < 2 <= len(whole_surface): return [] if whole_surface == partial_input or len(partial_input) < 2: return [no_orthographics_root] last_vowel = Phonetics.get_last_vowel(partial_input) if not last_vowel: return [no_orthographics_root] last_char = partial_input[-1] first_char_after_partial_input = whole_surface[len(partial_input)] if last_char.isupper() or first_char_after_partial_input.isupper(): return [no_orthographics_root] roots = self._get_voicing_and_doubling_roots(partial_input, last_char, first_char_after_partial_input, no_orthographics_root) first_vowel_letter_after_partial_input = self._get_first_vowel(whole_surface[len(partial_input) - 1:]) if first_vowel_letter_after_partial_input: if last_vowel.frontal != first_vowel_letter_after_partial_input.frontal: for r in roots: r.lexeme.attributes = set(r.lexeme.attributes) r.lexeme.attributes.add(LexemeAttribute.InverseHarmony) for r in roots: phonetic_attributes = Phonetics.calculate_phonetic_attributes(r.str, r.lexeme.attributes) r.phonetic_attributes = phonetic_attributes return roots
def find_roots_for_partial_input(self, partial_input, whole_surface=None): """ @type partial_input: unicode @type whole_surface: unicode @rtype: list of Root """ assert partial_input and whole_surface assert len(partial_input) <= len(whole_surface) assert whole_surface.startswith(partial_input) if len(whole_surface) == len(partial_input): assert whole_surface == partial_input if len( partial_input ) < 2: # not possible except (d,diyor) and (y,yiyor). but they are already in the dictionary return [] last_vowel = Phonetics.get_last_vowel(partial_input) if not last_vowel: return [] root = partial_input lemma = root lemma_root = lemma syntactic_category = SyntacticCategory.VERB secondary_syntactic_category = None lexeme_attributes = set() lexeme = DynamicLexeme(lemma, lemma_root, syntactic_category, secondary_syntactic_category, lexeme_attributes) phonetic_expectations = set() phonetic_attributes = Phonetics.calculate_phonetic_attributes_of_plain_sequence( partial_input) no_attr_root = DynamicRoot(root, lexeme, phonetic_expectations, phonetic_attributes) self._set_lexeme_and_phonetic_attributes([no_attr_root]) self._set_lemma([no_attr_root]) last_char = partial_input[-1] last_letter = TurkishAlphabet.get_letter_for_char(last_char) partial_surface_can_be_root_of_a_verb = self._seems_like_a_valid_verb_root( partial_input) if whole_surface == partial_input: return [no_attr_root ] if partial_surface_can_be_root_of_a_verb else [] first_char_after_partial_input = whole_surface[len(partial_input)] if first_char_after_partial_input.isupper(): return [] first_letter_after_partial_input = TurkishAlphabet.get_letter_for_char( first_char_after_partial_input) might_have_ProgressiveVowelDrop = not last_letter.vowel and\ any([whole_surface.startswith(partial_input+s) for s in [u'iyor', u'ıyor', u'uyor', u'üyor']]) might_have_Aorist_A = not last_letter.vowel and \ (whole_surface.startswith(partial_input + u'ar') or whole_surface.startswith(partial_input + u'er')) # no Aorist_I for -ur, -ür might_have_Aorist_I = not last_letter.vowel and\ (whole_surface.startswith(partial_input + u'ır') or whole_surface.startswith(partial_input + u'ir')) # for other letters, no voicing in verbs. {git+er->gider} vs {yapar, açar, diker} voicing_might_have_happened = last_letter == TurkishAlphabet.L_d and first_letter_after_partial_input.vowel possible_progressive_vowel_drop_roots = self._get_progressive_vowel_drop_roots( partial_input, whole_surface, no_attr_root, last_vowel) if might_have_ProgressiveVowelDrop else set() possible_aorist_A_roots = self._get_aorist_A_roots( no_attr_root) if might_have_Aorist_A else set() possible_aorist_I_roots = self._get_aorist_I_roots( no_attr_root) if might_have_Aorist_I else set() possible_causative_roots = self._get_possible_causative_roots( partial_input, whole_surface, no_attr_root) possible_passive_roots = self._get_possible_passive_roots( last_letter, partial_input, whole_surface, no_attr_root) if voicing_might_have_happened: possible_progressive_vowel_drop_roots = possible_progressive_vowel_drop_roots.union( set([ self._get_possible_voicing_root(r) for r in possible_progressive_vowel_drop_roots ])) possible_aorist_A_roots = possible_aorist_A_roots.union( set([ self._get_possible_voicing_root(r) for r in possible_aorist_A_roots ])) possible_aorist_I_roots = possible_aorist_I_roots.union( set([ self._get_possible_voicing_root(r) for r in possible_aorist_I_roots ])) possible_causative_roots = possible_causative_roots.union( set([ self._get_possible_voicing_root(r) for r in possible_causative_roots ])) possible_passive_roots = possible_passive_roots.union( set([ self._get_possible_voicing_root(r) for r in possible_passive_roots ])) generated_roots = set() generated_roots.add(no_attr_root) if voicing_might_have_happened: generated_roots.add(self._get_possible_voicing_root(no_attr_root)) generated_roots = generated_roots.union( possible_progressive_vowel_drop_roots) generated_roots = generated_roots.union(possible_aorist_A_roots) generated_roots = generated_roots.union(possible_aorist_I_roots) generated_roots = generated_roots.union(possible_causative_roots) generated_roots = generated_roots.union(possible_passive_roots) self._set_lexeme_and_phonetic_attributes(generated_roots) self._set_lemma(generated_roots) generated_roots = list(generated_roots) generated_roots = filter( lambda r: self._seems_like_a_valid_verb_root(r.lexeme.root), generated_roots) return generated_roots
def find_roots_for_partial_input(self, partial_input, whole_surface=None): """ @type partial_input: unicode @type whole_surface: unicode @rtype: list of Root """ assert partial_input and whole_surface assert len(partial_input) <= len(whole_surface) assert whole_surface.startswith(partial_input) if len(whole_surface) == len(partial_input): assert whole_surface == partial_input if len(partial_input) < 2: # not possible except (d,diyor) and (y,yiyor). but they are already in the dictionary return [] last_vowel = Phonetics.get_last_vowel(partial_input) if not last_vowel: return [] root = partial_input lemma = root lemma_root = lemma syntactic_category = SyntacticCategory.VERB secondary_syntactic_category = None lexeme_attributes = set() lexeme = DynamicLexeme(lemma, lemma_root, syntactic_category, secondary_syntactic_category, lexeme_attributes) phonetic_expectations = set() phonetic_attributes = Phonetics.calculate_phonetic_attributes_of_plain_sequence(partial_input) no_attr_root = DynamicRoot(root, lexeme, phonetic_expectations, phonetic_attributes) self._set_lexeme_and_phonetic_attributes([no_attr_root]) self._set_lemma([no_attr_root]) last_char = partial_input[-1] last_letter = TurkishAlphabet.get_letter_for_char(last_char) partial_surface_can_be_root_of_a_verb = self._seems_like_a_valid_verb_root(partial_input) if whole_surface==partial_input: return [no_attr_root] if partial_surface_can_be_root_of_a_verb else [] first_char_after_partial_input = whole_surface[len(partial_input)] if first_char_after_partial_input.isupper(): return [] first_letter_after_partial_input = TurkishAlphabet.get_letter_for_char(first_char_after_partial_input) might_have_ProgressiveVowelDrop = not last_letter.vowel and\ any([whole_surface.startswith(partial_input+s) for s in [u'iyor', u'ıyor', u'uyor', u'üyor']]) might_have_Aorist_A = not last_letter.vowel and \ (whole_surface.startswith(partial_input + u'ar') or whole_surface.startswith(partial_input + u'er')) # no Aorist_I for -ur, -ür might_have_Aorist_I = not last_letter.vowel and\ (whole_surface.startswith(partial_input + u'ır') or whole_surface.startswith(partial_input + u'ir')) # for other letters, no voicing in verbs. {git+er->gider} vs {yapar, açar, diker} voicing_might_have_happened = last_letter==TurkishAlphabet.L_d and first_letter_after_partial_input.vowel possible_progressive_vowel_drop_roots = self._get_progressive_vowel_drop_roots(partial_input, whole_surface, no_attr_root, last_vowel) if might_have_ProgressiveVowelDrop else set() possible_aorist_A_roots = self._get_aorist_A_roots(no_attr_root) if might_have_Aorist_A else set() possible_aorist_I_roots = self._get_aorist_I_roots(no_attr_root) if might_have_Aorist_I else set() possible_causative_roots = self._get_possible_causative_roots(partial_input, whole_surface, no_attr_root) possible_passive_roots = self._get_possible_passive_roots(last_letter, partial_input, whole_surface, no_attr_root) if voicing_might_have_happened: possible_progressive_vowel_drop_roots = possible_progressive_vowel_drop_roots.union(set([self._get_possible_voicing_root(r) for r in possible_progressive_vowel_drop_roots])) possible_aorist_A_roots = possible_aorist_A_roots.union(set([self._get_possible_voicing_root(r) for r in possible_aorist_A_roots])) possible_aorist_I_roots = possible_aorist_I_roots.union(set([self._get_possible_voicing_root(r) for r in possible_aorist_I_roots])) possible_causative_roots = possible_causative_roots.union(set([self._get_possible_voicing_root(r) for r in possible_causative_roots])) possible_passive_roots = possible_passive_roots.union(set([self._get_possible_voicing_root(r) for r in possible_passive_roots])) generated_roots = set() generated_roots.add(no_attr_root) if voicing_might_have_happened: generated_roots.add(self._get_possible_voicing_root(no_attr_root)) generated_roots = generated_roots.union(possible_progressive_vowel_drop_roots) generated_roots = generated_roots.union(possible_aorist_A_roots) generated_roots = generated_roots.union(possible_aorist_I_roots) generated_roots = generated_roots.union(possible_causative_roots) generated_roots = generated_roots.union(possible_passive_roots) self._set_lexeme_and_phonetic_attributes(generated_roots) self._set_lemma(generated_roots) generated_roots = list(generated_roots) generated_roots = filter(lambda r: self._seems_like_a_valid_verb_root(r.lexeme.root), generated_roots) return generated_roots