def find_roots_for_partial_input(self, partial_input, whole_surface=None):
        """
        @type partial_input: unicode
        @type whole_surface: unicode
        @rtype: list of Root
        """
        assert partial_input and whole_surface
        assert len(partial_input) <= len(whole_surface)
        assert whole_surface.startswith(partial_input)
        if len(whole_surface) == len(partial_input):
            assert whole_surface == partial_input

        root = partial_input
        lemma = root
        lemma_root = lemma
        syntactic_category = SyntacticCategory.NOUN
        secondary_syntactic_category = None
        lexeme_attributes = set()

        lexeme = DynamicLexeme(lemma, lemma_root, syntactic_category, secondary_syntactic_category,
            lexeme_attributes)

        phonetic_expectations = set()
        phonetic_attributes = Phonetics.calculate_phonetic_attributes(partial_input, lexeme_attributes)

        no_orthographics_root = DynamicRoot(root, lexeme, phonetic_expectations, phonetic_attributes)

        if len(partial_input) < 2 <= len(whole_surface):
            return []

        if whole_surface == partial_input or len(partial_input) < 2:
            return [no_orthographics_root]

        last_vowel = Phonetics.get_last_vowel(partial_input)

        if not last_vowel:
            return [no_orthographics_root]

        last_char = partial_input[-1]
        first_char_after_partial_input = whole_surface[len(partial_input)]
        if last_char.isupper() or first_char_after_partial_input.isupper():
            return [no_orthographics_root]

        roots = self._get_voicing_and_doubling_roots(partial_input, last_char, first_char_after_partial_input,
            no_orthographics_root)

        first_vowel_letter_after_partial_input = self._get_first_vowel(whole_surface[len(partial_input) - 1:])
        if first_vowel_letter_after_partial_input:
            if last_vowel.frontal != first_vowel_letter_after_partial_input.frontal:
                for r in roots:
                    r.lexeme.attributes = set(r.lexeme.attributes)
                    r.lexeme.attributes.add(LexemeAttribute.InverseHarmony)

        for r in roots:
            phonetic_attributes = Phonetics.calculate_phonetic_attributes(r.str, r.lexeme.attributes)
            r.phonetic_attributes = phonetic_attributes

        return roots
예제 #2
0
    def find_roots_for_partial_input(self, partial_input, whole_surface=None):
        """
        @type partial_input: unicode
        @type whole_surface: unicode
        @rtype: list of Root
        """
        assert partial_input and whole_surface
        assert len(partial_input) <= len(whole_surface)
        assert whole_surface.startswith(partial_input)
        if len(whole_surface) == len(partial_input):
            assert whole_surface == partial_input

        if len(
                partial_input
        ) < 2:  # not possible except (d,diyor) and (y,yiyor). but they are already in the dictionary
            return []

        last_vowel = Phonetics.get_last_vowel(partial_input)

        if not last_vowel:
            return []

        root = partial_input
        lemma = root
        lemma_root = lemma
        syntactic_category = SyntacticCategory.VERB
        secondary_syntactic_category = None
        lexeme_attributes = set()

        lexeme = DynamicLexeme(lemma, lemma_root, syntactic_category,
                               secondary_syntactic_category, lexeme_attributes)

        phonetic_expectations = set()
        phonetic_attributes = Phonetics.calculate_phonetic_attributes_of_plain_sequence(
            partial_input)

        no_attr_root = DynamicRoot(root, lexeme, phonetic_expectations,
                                   phonetic_attributes)

        self._set_lexeme_and_phonetic_attributes([no_attr_root])
        self._set_lemma([no_attr_root])

        last_char = partial_input[-1]
        last_letter = TurkishAlphabet.get_letter_for_char(last_char)

        partial_surface_can_be_root_of_a_verb = self._seems_like_a_valid_verb_root(
            partial_input)

        if whole_surface == partial_input:
            return [no_attr_root
                    ] if partial_surface_can_be_root_of_a_verb else []

        first_char_after_partial_input = whole_surface[len(partial_input)]

        if first_char_after_partial_input.isupper():
            return []

        first_letter_after_partial_input = TurkishAlphabet.get_letter_for_char(
            first_char_after_partial_input)


        might_have_ProgressiveVowelDrop = not last_letter.vowel and\
                                          any([whole_surface.startswith(partial_input+s) for s in [u'iyor', u'ıyor', u'uyor', u'üyor']])

        might_have_Aorist_A = not last_letter.vowel and \
                              (whole_surface.startswith(partial_input + u'ar') or whole_surface.startswith(partial_input + u'er'))

        # no Aorist_I for -ur, -ür
        might_have_Aorist_I = not last_letter.vowel and\
                              (whole_surface.startswith(partial_input + u'ır') or whole_surface.startswith(partial_input + u'ir'))

        # for other letters, no voicing in verbs. {git+er->gider} vs {yapar, açar, diker}
        voicing_might_have_happened = last_letter == TurkishAlphabet.L_d and first_letter_after_partial_input.vowel

        possible_progressive_vowel_drop_roots = self._get_progressive_vowel_drop_roots(
            partial_input, whole_surface, no_attr_root,
            last_vowel) if might_have_ProgressiveVowelDrop else set()
        possible_aorist_A_roots = self._get_aorist_A_roots(
            no_attr_root) if might_have_Aorist_A else set()
        possible_aorist_I_roots = self._get_aorist_I_roots(
            no_attr_root) if might_have_Aorist_I else set()
        possible_causative_roots = self._get_possible_causative_roots(
            partial_input, whole_surface, no_attr_root)
        possible_passive_roots = self._get_possible_passive_roots(
            last_letter, partial_input, whole_surface, no_attr_root)

        if voicing_might_have_happened:
            possible_progressive_vowel_drop_roots = possible_progressive_vowel_drop_roots.union(
                set([
                    self._get_possible_voicing_root(r)
                    for r in possible_progressive_vowel_drop_roots
                ]))
            possible_aorist_A_roots = possible_aorist_A_roots.union(
                set([
                    self._get_possible_voicing_root(r)
                    for r in possible_aorist_A_roots
                ]))
            possible_aorist_I_roots = possible_aorist_I_roots.union(
                set([
                    self._get_possible_voicing_root(r)
                    for r in possible_aorist_I_roots
                ]))
            possible_causative_roots = possible_causative_roots.union(
                set([
                    self._get_possible_voicing_root(r)
                    for r in possible_causative_roots
                ]))
            possible_passive_roots = possible_passive_roots.union(
                set([
                    self._get_possible_voicing_root(r)
                    for r in possible_passive_roots
                ]))

        generated_roots = set()

        generated_roots.add(no_attr_root)

        if voicing_might_have_happened:
            generated_roots.add(self._get_possible_voicing_root(no_attr_root))

        generated_roots = generated_roots.union(
            possible_progressive_vowel_drop_roots)
        generated_roots = generated_roots.union(possible_aorist_A_roots)
        generated_roots = generated_roots.union(possible_aorist_I_roots)
        generated_roots = generated_roots.union(possible_causative_roots)
        generated_roots = generated_roots.union(possible_passive_roots)

        self._set_lexeme_and_phonetic_attributes(generated_roots)
        self._set_lemma(generated_roots)

        generated_roots = list(generated_roots)

        generated_roots = filter(
            lambda r: self._seems_like_a_valid_verb_root(r.lexeme.root),
            generated_roots)

        return generated_roots
예제 #3
0
    def find_roots_for_partial_input(self, partial_input, whole_surface=None):
        """
        @type partial_input: unicode
        @type whole_surface: unicode
        @rtype: list of Root
        """
        assert partial_input and whole_surface
        assert len(partial_input) <= len(whole_surface)
        assert whole_surface.startswith(partial_input)
        if len(whole_surface) == len(partial_input):
            assert whole_surface == partial_input

        if len(partial_input) < 2:      # not possible except (d,diyor) and (y,yiyor). but they are already in the dictionary
            return []

        last_vowel = Phonetics.get_last_vowel(partial_input)

        if not last_vowel:
            return []

        root = partial_input
        lemma = root
        lemma_root = lemma
        syntactic_category = SyntacticCategory.VERB
        secondary_syntactic_category = None
        lexeme_attributes = set()

        lexeme = DynamicLexeme(lemma, lemma_root, syntactic_category, secondary_syntactic_category,
            lexeme_attributes)

        phonetic_expectations = set()
        phonetic_attributes = Phonetics.calculate_phonetic_attributes_of_plain_sequence(partial_input)

        no_attr_root = DynamicRoot(root, lexeme, phonetic_expectations, phonetic_attributes)

        self._set_lexeme_and_phonetic_attributes([no_attr_root])
        self._set_lemma([no_attr_root])

        last_char = partial_input[-1]
        last_letter = TurkishAlphabet.get_letter_for_char(last_char)

        partial_surface_can_be_root_of_a_verb = self._seems_like_a_valid_verb_root(partial_input)

        if whole_surface==partial_input:
            return [no_attr_root] if partial_surface_can_be_root_of_a_verb else []


        first_char_after_partial_input = whole_surface[len(partial_input)]

        if first_char_after_partial_input.isupper():
            return []

        first_letter_after_partial_input = TurkishAlphabet.get_letter_for_char(first_char_after_partial_input)


        might_have_ProgressiveVowelDrop = not last_letter.vowel and\
                                          any([whole_surface.startswith(partial_input+s) for s in [u'iyor', u'ıyor', u'uyor', u'üyor']])

        might_have_Aorist_A = not last_letter.vowel and \
                              (whole_surface.startswith(partial_input + u'ar') or whole_surface.startswith(partial_input + u'er'))

        # no Aorist_I for -ur, -ür
        might_have_Aorist_I = not last_letter.vowel and\
                              (whole_surface.startswith(partial_input + u'ır') or whole_surface.startswith(partial_input + u'ir'))

        # for other letters, no voicing in verbs. {git+er->gider} vs {yapar, açar, diker}
        voicing_might_have_happened = last_letter==TurkishAlphabet.L_d and first_letter_after_partial_input.vowel

        possible_progressive_vowel_drop_roots = self._get_progressive_vowel_drop_roots(partial_input, whole_surface, no_attr_root, last_vowel) if might_have_ProgressiveVowelDrop else set()
        possible_aorist_A_roots = self._get_aorist_A_roots(no_attr_root) if might_have_Aorist_A else set()
        possible_aorist_I_roots = self._get_aorist_I_roots(no_attr_root) if might_have_Aorist_I else set()
        possible_causative_roots = self._get_possible_causative_roots(partial_input, whole_surface, no_attr_root)
        possible_passive_roots = self._get_possible_passive_roots(last_letter, partial_input, whole_surface, no_attr_root)


        if voicing_might_have_happened:
            possible_progressive_vowel_drop_roots = possible_progressive_vowel_drop_roots.union(set([self._get_possible_voicing_root(r) for r in possible_progressive_vowel_drop_roots]))
            possible_aorist_A_roots = possible_aorist_A_roots.union(set([self._get_possible_voicing_root(r) for r in possible_aorist_A_roots]))
            possible_aorist_I_roots = possible_aorist_I_roots.union(set([self._get_possible_voicing_root(r) for r in possible_aorist_I_roots]))
            possible_causative_roots = possible_causative_roots.union(set([self._get_possible_voicing_root(r) for r in possible_causative_roots]))
            possible_passive_roots = possible_passive_roots.union(set([self._get_possible_voicing_root(r) for r in possible_passive_roots]))

        generated_roots = set()

        generated_roots.add(no_attr_root)

        if voicing_might_have_happened:
            generated_roots.add(self._get_possible_voicing_root(no_attr_root))

        generated_roots = generated_roots.union(possible_progressive_vowel_drop_roots)
        generated_roots = generated_roots.union(possible_aorist_A_roots)
        generated_roots = generated_roots.union(possible_aorist_I_roots)
        generated_roots = generated_roots.union(possible_causative_roots)
        generated_roots = generated_roots.union(possible_passive_roots)

        self._set_lexeme_and_phonetic_attributes(generated_roots)
        self._set_lemma(generated_roots)

        generated_roots = list(generated_roots)

        generated_roots = filter(lambda r: self._seems_like_a_valid_verb_root(r.lexeme.root), generated_roots)

        return generated_roots