def find_roots_for_partial_input(self, partial_input, whole_surface=None):
        """
        @type partial_input: unicode
        @type whole_surface: unicode
        @rtype: list of Root
        """
        assert partial_input and whole_surface
        assert len(partial_input) <= len(whole_surface)
        assert whole_surface.startswith(partial_input)
        if len(whole_surface) == len(partial_input):
            assert whole_surface == partial_input

        root = partial_input
        lemma = root
        lemma_root = lemma
        syntactic_category = SyntacticCategory.NOUN
        secondary_syntactic_category = None
        lexeme_attributes = set()

        lexeme = DynamicLexeme(lemma, lemma_root, syntactic_category, secondary_syntactic_category,
            lexeme_attributes)

        phonetic_expectations = set()
        phonetic_attributes = Phonetics.calculate_phonetic_attributes(partial_input, lexeme_attributes)

        no_orthographics_root = DynamicRoot(root, lexeme, phonetic_expectations, phonetic_attributes)

        if len(partial_input) < 2 <= len(whole_surface):
            return []

        if whole_surface == partial_input or len(partial_input) < 2:
            return [no_orthographics_root]

        last_vowel = Phonetics.get_last_vowel(partial_input)

        if not last_vowel:
            return [no_orthographics_root]

        last_char = partial_input[-1]
        first_char_after_partial_input = whole_surface[len(partial_input)]
        if last_char.isupper() or first_char_after_partial_input.isupper():
            return [no_orthographics_root]

        roots = self._get_voicing_and_doubling_roots(partial_input, last_char, first_char_after_partial_input,
            no_orthographics_root)

        first_vowel_letter_after_partial_input = self._get_first_vowel(whole_surface[len(partial_input) - 1:])
        if first_vowel_letter_after_partial_input:
            if last_vowel.frontal != first_vowel_letter_after_partial_input.frontal:
                for r in roots:
                    r.lexeme.attributes = set(r.lexeme.attributes)
                    r.lexeme.attributes.add(LexemeAttribute.InverseHarmony)

        for r in roots:
            phonetic_attributes = Phonetics.calculate_phonetic_attributes(r.str, r.lexeme.attributes)
            r.phonetic_attributes = phonetic_attributes

        return roots
Ejemplo n.º 2
0
    def __init__(self, abbr):
        root = abbr
        lexeme = DynamicLexeme(abbr, abbr, SyntacticCategory.NOUN, SecondarySyntacticCategory.ABBREVIATION, None)
        phonetic_attributes = None

        last_letter = TurkishAlphabet.get_letter_for_char(abbr[-1])
        if last_letter.vowel:
            phonetic_attributes = Phonetics.calculate_phonetic_attributes_of_plain_sequence(abbr)
        else:
            phonetic_attributes = Phonetics.calculate_phonetic_attributes_of_plain_sequence(abbr + u"E")

        phonetic_expectations = None
        super(AbbreviationRoot, self).__init__(root, lexeme, phonetic_expectations, phonetic_attributes)
Ejemplo n.º 3
0
def transition_allowed_for_suffix_form(morpheme_container, suffix_form):
    if suffix_form.precondition and not suffix_form.precondition.is_satisfied_by(morpheme_container):
        if logger.isEnabledFor(logging.DEBUG):
            logger.debug('      Precondition "%s" of suffix form "%s" is not satisfied with transitions %s, skipping.', suffix_form.form, suffix_form.precondition, morpheme_container)
        return False

    if suffix_form.form and not Phonetics.expectations_satisfied(morpheme_container.get_phonetic_expectations(), suffix_form.form):
        logger.debug('      Suffix form "%s" does not satisfy phonetic expectations %s, skipping.', suffix_form.form, morpheme_container.get_phonetic_expectations())
        return False

    if not Phonetics.is_suffix_form_applicable(morpheme_container.get_surface_so_far(), suffix_form.form):
        logger.debug('      Suffix form "%s" is not phonetically applicable to "%s", skipping.', suffix_form.form, morpheme_container.get_surface_so_far())
        return False

    return True
Ejemplo n.º 4
0
    def _get_possible_passive_roots(self, last_letter,  partial_input, whole_surface, no_attr_root):
        might_have_Passive_Il = (not last_letter.vowel and any([whole_surface.startswith(partial_input+s) for s in [u'il', u'ıl', u'ul', u'ül']])) or\
                                (last_letter.vowel and whole_surface.startswith(partial_input+ u'l'))

        might_have_Passive_In = (not last_letter.vowel and any([whole_surface.startswith(partial_input+s) for s in [u'in', u'ın', u'un', u'ün']])) or\
                                (last_letter.vowel and whole_surface.startswith(partial_input+ u'n'))

        might_have_Passive_InIl = (not last_letter.vowel and any([whole_surface.startswith(partial_input+s) for s in [u'inil', u'ınıl', u'unul', u'ünül']])) or\
                                  (last_letter.vowel and any([whole_surface.startswith(partial_input+s) for s in [u'nil', u'nıl', u'nul', u'nül']]))

        might_have_passives = {(LexemeAttribute.Passive_Il, might_have_Passive_Il),
                               (LexemeAttribute.Passive_In, might_have_Passive_In),
                               (LexemeAttribute.Passive_InIl, might_have_Passive_InIl)}

        might_have_passives = filter(lambda t : t[1], might_have_passives)

        passive_roots = set()

        for passive_attr, might_have_happened in might_have_passives:
            # cannot have other passives at the same time
            # cannot have any other causative at the same time
            # cannot have progressive vowel drop at the same time
            # cannot have aorist_A or aorist_I at the same time
            generated_root = no_attr_root._clone(True)

            generated_root.lexeme.attributes = {passive_attr} if passive_attr else set()

            generated_root.lexeme.phonetic_attributes = Phonetics.calculate_phonetic_attributes(partial_input, generated_root.lexeme.attributes)

            passive_roots.add(generated_root)

        return passive_roots
Ejemplo n.º 5
0
 def _set_lemma(self, generated_roots):
     for r in generated_roots:
         word, applied_suffix_form = Phonetics.apply(
             r.lexeme.root, r.phonetic_attributes, u'mAk',
             r.lexeme.attributes)
         assert word and applied_suffix_form
         r.lexeme.lemma = word + applied_suffix_form
Ejemplo n.º 6
0
 def __init__(self, numeral):
     root = numeral
     lexeme = DynamicLexeme(numeral, numeral, SyntacticCategory.NUMERAL, SecondarySyntacticCategory.DIGITS, None)
     phonetic_expectations = None
     phonetic_attributes = Phonetics.calculate_phonetic_attributes_of_plain_sequence(
         DigitsToNumberConverter.convert_digits_to_words(numeral)
     )
     super(NumeralRoot, self).__init__(root, lexeme, phonetic_expectations, phonetic_attributes)
Ejemplo n.º 7
0
 def get_phonetic_attributes(self):
     if self.has_transitions():
         suffix_so_far = self.get_surface_so_far()[len(self._root.str):]
         if not suffix_so_far or suffix_so_far.isspace() or not suffix_so_far.isalnum():
             return self._root.phonetic_attributes
         else:
             return Phonetics.calculate_phonetic_attributes(self.get_surface_so_far(), self.get_lexeme_attributes())
     else:
         return self._root.phonetic_attributes
Ejemplo n.º 8
0
 def __init__(self, numeral):
     root = numeral
     lexeme = DynamicLexeme(numeral, numeral, SyntacticCategory.NUMERAL,
                            SecondarySyntacticCategory.DIGITS, None)
     phonetic_expectations = None
     phonetic_attributes = Phonetics.calculate_phonetic_attributes_of_plain_sequence(
         DigitsToNumberConverter.convert_digits_to_words(numeral))
     super(NumeralRoot, self).__init__(root, lexeme, phonetic_expectations,
                                       phonetic_attributes)
Ejemplo n.º 9
0
    def __init__(self, abbr):
        root = abbr
        lexeme = DynamicLexeme(abbr, abbr, SyntacticCategory.NOUN,
                               SecondarySyntacticCategory.ABBREVIATION, None)
        phonetic_attributes = None

        last_letter = TurkishAlphabet.get_letter_for_char(abbr[-1])
        if last_letter.vowel:
            phonetic_attributes = Phonetics.calculate_phonetic_attributes_of_plain_sequence(
                abbr)
        else:
            phonetic_attributes = Phonetics.calculate_phonetic_attributes_of_plain_sequence(
                abbr + u'E')

        phonetic_expectations = None
        super(AbbreviationRoot,
              self).__init__(root, lexeme, phonetic_expectations,
                             phonetic_attributes)
Ejemplo n.º 10
0
 def __init__(self, noun):
     root = noun
     lexeme = DynamicLexeme(noun, noun, SyntacticCategory.NOUN,
                            SecondarySyntacticCategory.PROPER_NOUN, None)
     phonetic_attributes = Phonetics.calculate_phonetic_attributes_of_plain_sequence(
         noun)
     phonetic_expectations = None
     super(ProperNounRoot,
           self).__init__(root, lexeme, phonetic_expectations,
                          phonetic_attributes)
Ejemplo n.º 11
0
 def generate(cls, lexeme):
     if any(x in lexeme.attributes for x in RootGenerator._modifiers):
         try:
             return RootGenerator._generate_modified_root_nodes(lexeme)
         except:
             print u'Error generating roots for lexeme : {}'.format(lexeme)
             raise
     else:
         phonetic_attributes = Phonetics.calculate_phonetic_attributes_of_plain_sequence(lexeme.root)
         root = Root(lexeme.root, lexeme, None, phonetic_attributes)
         return [root]
Ejemplo n.º 12
0
 def _set_lexeme_and_phonetic_attributes(self, generated_roots):
     for r in generated_roots:
         r.phonetic_attributes = Phonetics.calculate_phonetic_attributes(r.str, r.lexeme.attributes)
         if r.str.endswith(u'd') and r.lexeme.root.endswith(u't'):
             if LexemeAttribute.NoVoicing in r.lexeme.attributes:
                 r.lexeme.attributes.remove(LexemeAttribute.NoVoicing)
             r.lexeme.attributes.add(LexemeAttribute.Voicing)
         else:
             if LexemeAttribute.Voicing in r.lexeme.attributes:
                 r.lexeme.attributes.remove(LexemeAttribute.Voicing)
             r.lexeme.attributes.add(LexemeAttribute.NoVoicing)
Ejemplo n.º 13
0
 def get_phonetic_attributes(self):
     if self.has_transitions():
         suffix_so_far = self.get_surface_so_far()[len(self._root.str):]
         if not suffix_so_far or suffix_so_far.isspace(
         ) or not suffix_so_far.isalnum():
             return self._root.phonetic_attributes
         else:
             return Phonetics.calculate_phonetic_attributes(
                 self.get_surface_so_far(), self.get_lexeme_attributes())
     else:
         return self._root.phonetic_attributes
Ejemplo n.º 14
0
 def generate(cls, lexeme):
     if any(x in lexeme.attributes for x in RootGenerator._modifiers):
         try:
             return RootGenerator._generate_modified_root_nodes(lexeme)
         except:
             print u'Error generating roots for lexeme : {}'.format(lexeme)
             raise
     else:
         phonetic_attributes = Phonetics.calculate_phonetic_attributes_of_plain_sequence(
             lexeme.root)
         root = Root(lexeme.root, lexeme, None, phonetic_attributes)
         return [root]
Ejemplo n.º 15
0
 def _set_lexeme_and_phonetic_attributes(self, generated_roots):
     for r in generated_roots:
         r.phonetic_attributes = Phonetics.calculate_phonetic_attributes(
             r.str, r.lexeme.attributes)
         if r.str.endswith(u'd') and r.lexeme.root.endswith(u't'):
             if LexemeAttribute.NoVoicing in r.lexeme.attributes:
                 r.lexeme.attributes.remove(LexemeAttribute.NoVoicing)
             r.lexeme.attributes.add(LexemeAttribute.Voicing)
         else:
             if LexemeAttribute.Voicing in r.lexeme.attributes:
                 r.lexeme.attributes.remove(LexemeAttribute.Voicing)
             r.lexeme.attributes.add(LexemeAttribute.NoVoicing)
Ejemplo n.º 16
0
    def _get_possible_causative_roots(self, partial_input, whole_surface,
                                      no_attr_root):
        # no voicing can happen on causative_t
        might_have_Causative_t = whole_surface.startswith(partial_input + u't')

        might_have_Causative_Ir = any([
            whole_surface.startswith(partial_input + s)
            for s in [u'ir', u'ır', u'ur', u'ür']
        ])

        # no voicing can happen on causative_It
        might_have_Causative_It = any([
            whole_surface.startswith(partial_input + s)
            for s in [u'it', u'ıt', u'ut', u'üt']
        ])

        might_have_Causative_Ar = any([
            whole_surface.startswith(partial_input + s)
            for s in [u'ar', u'er']
        ])

        might_have_Causative_dIr = any([whole_surface.startswith(partial_input+s) for s in [u'dir', u'dır', u'dur', u'dür']]) or\
                                   any([whole_surface.startswith(partial_input+s) for s in [u'tir', u'tır', u'tur', u'tür']])

        might_have_causatives = {
            (LexemeAttribute.Causative_t, might_have_Causative_t),
            (LexemeAttribute.Causative_Ir, might_have_Causative_Ir),
            (LexemeAttribute.Causative_It, might_have_Causative_It),
            (LexemeAttribute.Causative_Ar, might_have_Causative_Ar),
            (LexemeAttribute.Causative_dIr, might_have_Causative_dIr)
        }

        might_have_causatives = filter(lambda t: t[1], might_have_causatives)

        causative_roots = set()

        for causative_attr, might_have_happened in might_have_causatives:
            # cannot have other causatives at the same time
            # cannot have any other passive at the same time
            # cannot have progressive vowel drop at the same time
            # cannot have aorist_A or aorist_I at the same time
            generated_root = no_attr_root._clone(True)

            generated_root.lexeme.attributes = {causative_attr
                                                } if causative_attr else set()

            generated_root.lexeme.phonetic_attributes = Phonetics.calculate_phonetic_attributes(
                partial_input, generated_root.lexeme.attributes)

            causative_roots.add(generated_root)

        return causative_roots
Ejemplo n.º 17
0
def try_suffix_form(morpheme_container, suffix_form, to_state, word):
    state_before_suffix_form_application = morpheme_container.get_last_state()

    if not transition_allowed_for_suffix_form(morpheme_container, suffix_form):
        return None

    so_far = morpheme_container.get_surface_so_far()
    morpheme_container_lexeme_attributes = morpheme_container.get_lexeme_attributes()

    morpheme_container_phonetic_attributes = morpheme_container.get_phonetic_attributes()

    modified_word, fitting_suffix_form = Phonetics.apply(so_far, morpheme_container_phonetic_attributes, suffix_form.form, morpheme_container_lexeme_attributes)
    applied_str =  modified_word + fitting_suffix_form
    if Phonetics.application_matches(word, applied_str, to_state.name!='VERB_ROOT'):
        actual_suffix_form_str = word[len(so_far):len(applied_str)]
        logger.debug('      Word "%s" starts with applied str "%s" (%s), adding to current morpheme container', word, applied_str, actual_suffix_form_str)
        clone = morpheme_container.clone()
        clone.add_transition(SuffixFormApplication(suffix_form, actual_suffix_form_str, fitting_suffix_form), to_state)

        if morpheme_container.has_transitions() and morpheme_container.get_last_transition().suffix_form_application.suffix_form.postcondition and not morpheme_container.get_last_transition().suffix_form_application.suffix_form.postcondition.is_satisfied_by(clone):
            if logger.isEnabledFor(logging.DEBUG):
                logger.debug('      Suffix does not satisfy the postcondition "%s" of last transition suffix form "%s", skipping.', morpheme_container.get_last_transition().suffix_form_application.suffix_form.postcondition, formatter.format_transition(clone.get_last_transition()))
            return None

        if morpheme_container.has_transitions() and state_before_suffix_form_application.type==State.DERIVATIONAL:
            logger.debug('      Suffix is derivative, checking the post derivation conditions of suffixes from previous derivation.')
            for transition in morpheme_container.get_transitions_from_derivation_suffix():
                application_suffix_form = transition.suffix_form_application.suffix_form
                if application_suffix_form.post_derivation_condition:
                    matches = application_suffix_form.post_derivation_condition.is_satisfied_by(clone)
                    if not matches:
                        logger.debug('      Post derivation condition "%s" of suffix "%s" is not satisfied, skipping.', application_suffix_form.post_derivation_condition, application_suffix_form.suffix)
                        return None

        return clone

    else:
        logger.debug('      Word "%s" does not start with applied str "%s" (%s), skipping', word, applied_str, applied_str)
        return None
Ejemplo n.º 18
0
def transition_allowed_for_suffix_form(morpheme_container, suffix_form):
    if suffix_form.precondition and not suffix_form.precondition.is_satisfied_by(
            morpheme_container):
        if logger.isEnabledFor(logging.DEBUG):
            logger.debug(
                '      Precondition "%s" of suffix form "%s" is not satisfied with transitions %s, skipping.',
                suffix_form.form, suffix_form.precondition, morpheme_container)
        return False

    if suffix_form.form and not Phonetics.expectations_satisfied(
            morpheme_container.get_phonetic_expectations(), suffix_form.form):
        logger.debug(
            '      Suffix form "%s" does not satisfy phonetic expectations %s, skipping.',
            suffix_form.form, morpheme_container.get_phonetic_expectations())
        return False

    if not Phonetics.is_suffix_form_applicable(
            morpheme_container.get_surface_so_far(), suffix_form.form):
        logger.debug(
            '      Suffix form "%s" is not phonetically applicable to "%s", skipping.',
            suffix_form.form, morpheme_container.get_surface_so_far())
        return False

    return True
Ejemplo n.º 19
0
    def _get_possible_causative_roots(self, partial_input, whole_surface, no_attr_root):
        # no voicing can happen on causative_t
        might_have_Causative_t = whole_surface.startswith(partial_input + u't')

        might_have_Causative_Ir = any([whole_surface.startswith(partial_input+s) for s in [u'ir', u'ır', u'ur', u'ür']])

        # no voicing can happen on causative_It
        might_have_Causative_It = any([whole_surface.startswith(partial_input+s) for s in [u'it', u'ıt', u'ut', u'üt']])

        might_have_Causative_Ar = any([whole_surface.startswith(partial_input+s) for s in [u'ar', u'er']])

        might_have_Causative_dIr = any([whole_surface.startswith(partial_input+s) for s in [u'dir', u'dır', u'dur', u'dür']]) or\
                                   any([whole_surface.startswith(partial_input+s) for s in [u'tir', u'tır', u'tur', u'tür']])

        might_have_causatives = {(LexemeAttribute.Causative_t, might_have_Causative_t),
                                 (LexemeAttribute.Causative_Ir, might_have_Causative_Ir),
                                 (LexemeAttribute.Causative_It, might_have_Causative_It),
                                 (LexemeAttribute.Causative_Ar, might_have_Causative_Ar),
                                 (LexemeAttribute.Causative_dIr, might_have_Causative_dIr)}

        might_have_causatives = filter(lambda t : t[1], might_have_causatives)

        causative_roots = set()

        for causative_attr, might_have_happened in might_have_causatives:
            # cannot have other causatives at the same time
            # cannot have any other passive at the same time
            # cannot have progressive vowel drop at the same time
            # cannot have aorist_A or aorist_I at the same time
            generated_root = no_attr_root._clone(True)

            generated_root.lexeme.attributes = {causative_attr} if causative_attr else set()

            generated_root.lexeme.phonetic_attributes = Phonetics.calculate_phonetic_attributes(partial_input, generated_root.lexeme.attributes)

            causative_roots.add(generated_root)

        return causative_roots
Ejemplo n.º 20
0
    def _get_possible_passive_roots(self, last_letter, partial_input,
                                    whole_surface, no_attr_root):
        might_have_Passive_Il = (not last_letter.vowel and any([whole_surface.startswith(partial_input+s) for s in [u'il', u'ıl', u'ul', u'ül']])) or\
                                (last_letter.vowel and whole_surface.startswith(partial_input+ u'l'))

        might_have_Passive_In = (not last_letter.vowel and any([whole_surface.startswith(partial_input+s) for s in [u'in', u'ın', u'un', u'ün']])) or\
                                (last_letter.vowel and whole_surface.startswith(partial_input+ u'n'))

        might_have_Passive_InIl = (not last_letter.vowel and any([whole_surface.startswith(partial_input+s) for s in [u'inil', u'ınıl', u'unul', u'ünül']])) or\
                                  (last_letter.vowel and any([whole_surface.startswith(partial_input+s) for s in [u'nil', u'nıl', u'nul', u'nül']]))

        might_have_passives = {
            (LexemeAttribute.Passive_Il, might_have_Passive_Il),
            (LexemeAttribute.Passive_In, might_have_Passive_In),
            (LexemeAttribute.Passive_InIl, might_have_Passive_InIl)
        }

        might_have_passives = filter(lambda t: t[1], might_have_passives)

        passive_roots = set()

        for passive_attr, might_have_happened in might_have_passives:
            # cannot have other passives at the same time
            # cannot have any other causative at the same time
            # cannot have progressive vowel drop at the same time
            # cannot have aorist_A or aorist_I at the same time
            generated_root = no_attr_root._clone(True)

            generated_root.lexeme.attributes = {passive_attr
                                                } if passive_attr else set()

            generated_root.lexeme.phonetic_attributes = Phonetics.calculate_phonetic_attributes(
                partial_input, generated_root.lexeme.attributes)

            passive_roots.add(generated_root)

        return passive_roots
Ejemplo n.º 21
0
def ap(word, form_str, lexeme_attributes=None):
    phonetic_attributes = Phonetics.calculate_phonetic_attributes(
        word, lexeme_attributes)
    word, application = Phonetics.apply(word, phonetic_attributes, form_str,
                                        lexeme_attributes)
    return word + application
Ejemplo n.º 22
0
def try_suffix_form(morpheme_container, suffix_form, to_state, word):
    state_before_suffix_form_application = morpheme_container.get_last_state()

    if not transition_allowed_for_suffix_form(morpheme_container, suffix_form):
        return None

    so_far = morpheme_container.get_surface_so_far()
    morpheme_container_lexeme_attributes = morpheme_container.get_lexeme_attributes(
    )

    morpheme_container_phonetic_attributes = morpheme_container.get_phonetic_attributes(
    )

    modified_word, fitting_suffix_form = Phonetics.apply(
        so_far, morpheme_container_phonetic_attributes, suffix_form.form,
        morpheme_container_lexeme_attributes)
    applied_str = modified_word + fitting_suffix_form
    if Phonetics.application_matches(word, applied_str,
                                     to_state.name != 'VERB_ROOT'):
        actual_suffix_form_str = word[len(so_far):len(applied_str)]
        logger.debug(
            '      Word "%s" starts with applied str "%s" (%s), adding to current morpheme container',
            word, applied_str, actual_suffix_form_str)
        clone = morpheme_container.clone()
        clone.add_transition(
            SuffixFormApplication(suffix_form, actual_suffix_form_str,
                                  fitting_suffix_form), to_state)

        if morpheme_container.has_transitions(
        ) and morpheme_container.get_last_transition(
        ).suffix_form_application.suffix_form.postcondition and not morpheme_container.get_last_transition(
        ).suffix_form_application.suffix_form.postcondition.is_satisfied_by(
                clone):
            if logger.isEnabledFor(logging.DEBUG):
                logger.debug(
                    '      Suffix does not satisfy the postcondition "%s" of last transition suffix form "%s", skipping.',
                    morpheme_container.get_last_transition().
                    suffix_form_application.suffix_form.postcondition,
                    formatter.format_transition(clone.get_last_transition()))
            return None

        if morpheme_container.has_transitions(
        ) and state_before_suffix_form_application.type == State.DERIVATIONAL:
            logger.debug(
                '      Suffix is derivative, checking the post derivation conditions of suffixes from previous derivation.'
            )
            for transition in morpheme_container.get_transitions_from_derivation_suffix(
            ):
                application_suffix_form = transition.suffix_form_application.suffix_form
                if application_suffix_form.post_derivation_condition:
                    matches = application_suffix_form.post_derivation_condition.is_satisfied_by(
                        clone)
                    if not matches:
                        logger.debug(
                            '      Post derivation condition "%s" of suffix "%s" is not satisfied, skipping.',
                            application_suffix_form.post_derivation_condition,
                            application_suffix_form.suffix)
                        return None

        return clone

    else:
        logger.debug(
            '      Word "%s" does not start with applied str "%s" (%s), skipping',
            word, applied_str, applied_str)
        return None
Ejemplo n.º 23
0
    def find_roots_for_partial_input(self, partial_input, whole_surface=None):
        """
        @type partial_input: unicode
        @type whole_surface: unicode
        @rtype: list of Root
        """
        assert partial_input and whole_surface
        assert len(partial_input) <= len(whole_surface)
        assert whole_surface.startswith(partial_input)
        if len(whole_surface) == len(partial_input):
            assert whole_surface == partial_input

        if len(partial_input) < 2:      # not possible except (d,diyor) and (y,yiyor). but they are already in the dictionary
            return []

        last_vowel = Phonetics.get_last_vowel(partial_input)

        if not last_vowel:
            return []

        root = partial_input
        lemma = root
        lemma_root = lemma
        syntactic_category = SyntacticCategory.VERB
        secondary_syntactic_category = None
        lexeme_attributes = set()

        lexeme = DynamicLexeme(lemma, lemma_root, syntactic_category, secondary_syntactic_category,
            lexeme_attributes)

        phonetic_expectations = set()
        phonetic_attributes = Phonetics.calculate_phonetic_attributes_of_plain_sequence(partial_input)

        no_attr_root = DynamicRoot(root, lexeme, phonetic_expectations, phonetic_attributes)

        self._set_lexeme_and_phonetic_attributes([no_attr_root])
        self._set_lemma([no_attr_root])

        last_char = partial_input[-1]
        last_letter = TurkishAlphabet.get_letter_for_char(last_char)

        partial_surface_can_be_root_of_a_verb = self._seems_like_a_valid_verb_root(partial_input)

        if whole_surface==partial_input:
            return [no_attr_root] if partial_surface_can_be_root_of_a_verb else []


        first_char_after_partial_input = whole_surface[len(partial_input)]

        if first_char_after_partial_input.isupper():
            return []

        first_letter_after_partial_input = TurkishAlphabet.get_letter_for_char(first_char_after_partial_input)


        might_have_ProgressiveVowelDrop = not last_letter.vowel and\
                                          any([whole_surface.startswith(partial_input+s) for s in [u'iyor', u'ıyor', u'uyor', u'üyor']])

        might_have_Aorist_A = not last_letter.vowel and \
                              (whole_surface.startswith(partial_input + u'ar') or whole_surface.startswith(partial_input + u'er'))

        # no Aorist_I for -ur, -ür
        might_have_Aorist_I = not last_letter.vowel and\
                              (whole_surface.startswith(partial_input + u'ır') or whole_surface.startswith(partial_input + u'ir'))

        # for other letters, no voicing in verbs. {git+er->gider} vs {yapar, açar, diker}
        voicing_might_have_happened = last_letter==TurkishAlphabet.L_d and first_letter_after_partial_input.vowel

        possible_progressive_vowel_drop_roots = self._get_progressive_vowel_drop_roots(partial_input, whole_surface, no_attr_root, last_vowel) if might_have_ProgressiveVowelDrop else set()
        possible_aorist_A_roots = self._get_aorist_A_roots(no_attr_root) if might_have_Aorist_A else set()
        possible_aorist_I_roots = self._get_aorist_I_roots(no_attr_root) if might_have_Aorist_I else set()
        possible_causative_roots = self._get_possible_causative_roots(partial_input, whole_surface, no_attr_root)
        possible_passive_roots = self._get_possible_passive_roots(last_letter, partial_input, whole_surface, no_attr_root)


        if voicing_might_have_happened:
            possible_progressive_vowel_drop_roots = possible_progressive_vowel_drop_roots.union(set([self._get_possible_voicing_root(r) for r in possible_progressive_vowel_drop_roots]))
            possible_aorist_A_roots = possible_aorist_A_roots.union(set([self._get_possible_voicing_root(r) for r in possible_aorist_A_roots]))
            possible_aorist_I_roots = possible_aorist_I_roots.union(set([self._get_possible_voicing_root(r) for r in possible_aorist_I_roots]))
            possible_causative_roots = possible_causative_roots.union(set([self._get_possible_voicing_root(r) for r in possible_causative_roots]))
            possible_passive_roots = possible_passive_roots.union(set([self._get_possible_voicing_root(r) for r in possible_passive_roots]))

        generated_roots = set()

        generated_roots.add(no_attr_root)

        if voicing_might_have_happened:
            generated_roots.add(self._get_possible_voicing_root(no_attr_root))

        generated_roots = generated_roots.union(possible_progressive_vowel_drop_roots)
        generated_roots = generated_roots.union(possible_aorist_A_roots)
        generated_roots = generated_roots.union(possible_aorist_I_roots)
        generated_roots = generated_roots.union(possible_causative_roots)
        generated_roots = generated_roots.union(possible_passive_roots)

        self._set_lexeme_and_phonetic_attributes(generated_roots)
        self._set_lemma(generated_roots)

        generated_roots = list(generated_roots)

        generated_roots = filter(lambda r: self._seems_like_a_valid_verb_root(r.lexeme.root), generated_roots)

        return generated_roots
Ejemplo n.º 24
0
 def _set_lemma(self, generated_roots):
     for r in generated_roots:
         word, applied_suffix_form = Phonetics.apply(r.lexeme.root, r.phonetic_attributes, u'mAk',
             r.lexeme.attributes)
         assert word and applied_suffix_form
         r.lexeme.lemma = word + applied_suffix_form
Ejemplo n.º 25
0
    def _handle_special_roots(cls, lexeme):
        lexeme.attributes.remove(LexemeAttribute.RootChange)

        if lexeme.lemma==u'ben':
            root_ben = Root(u'ben', lexeme, None, Phonetics.calculate_phonetic_attributes_of_plain_sequence(u'ben'))
            root_ban = Root(u'ban', lexeme, None, Phonetics.calculate_phonetic_attributes_of_plain_sequence(u'ban'))
            return [root_ben, root_ban]
        elif lexeme.lemma==u'sen':
            root_sen = Root(u'sen', lexeme, None, Phonetics.calculate_phonetic_attributes_of_plain_sequence(u'ben'))
            root_san = Root(u'san', lexeme, None, Phonetics.calculate_phonetic_attributes_of_plain_sequence(u'ban'))
            return [root_sen, root_san]
        elif lexeme.lemma==u'demek':
            root_di = Root(u'di', lexeme, None, Phonetics.calculate_phonetic_attributes_of_plain_sequence(u'di'))
            root_de = Root(u'de', lexeme, None, Phonetics.calculate_phonetic_attributes_of_plain_sequence(u'de'))
            return [root_di, root_de]
        elif lexeme.lemma==u'yemek':
            root_yi = Root(u'yi', lexeme, None, Phonetics.calculate_phonetic_attributes_of_plain_sequence(u'yi'))
            root_ye = Root(u'ye', lexeme, None, Phonetics.calculate_phonetic_attributes_of_plain_sequence(u'ye'))
            return [root_yi, root_ye]
        elif lexeme.lemma==u'hepsi':
            root_hep = Root(u'hep', lexeme, None, Phonetics.calculate_phonetic_attributes_of_plain_sequence(u'hep'))
            root_hepsi = Root(u'hepsi', lexeme, None, Phonetics.calculate_phonetic_attributes_of_plain_sequence(u'hepsi'))
            return [root_hep, root_hepsi]
        elif lexeme.lemma==u'ora':
            root_or = Root(u'or', lexeme, None, Phonetics.calculate_phonetic_attributes_of_plain_sequence(u'or'))
            root_ora = Root(u'ora', lexeme, None, Phonetics.calculate_phonetic_attributes_of_plain_sequence(u'ora'))
            return [root_or, root_ora]
        elif lexeme.lemma==u'bura':
            root_bur = Root(u'bur', lexeme, None, Phonetics.calculate_phonetic_attributes_of_plain_sequence(u'bur'))
            root_bura = Root(u'bura', lexeme, None, Phonetics.calculate_phonetic_attributes_of_plain_sequence(u'bura'))
            return [root_bur, root_bura]
        elif lexeme.lemma==u'şura':
            root_sur = Root(u'şur', lexeme, None, Phonetics.calculate_phonetic_attributes_of_plain_sequence(u'şur'))
            root_sura = Root(u'şura', lexeme, None, Phonetics.calculate_phonetic_attributes_of_plain_sequence(u'şura'))
            return [root_sur, root_sura]
        elif lexeme.lemma==u'nere':
            root_ner = Root(u'ner', lexeme, None, Phonetics.calculate_phonetic_attributes_of_plain_sequence(u'ner'))
            root_nere = Root(u'nere', lexeme, None, Phonetics.calculate_phonetic_attributes_of_plain_sequence(u'nere'))
            return [root_ner, root_nere]
        elif lexeme.lemma==u'nere':
            root_ner = Root(u'ner', lexeme, None, Phonetics.calculate_phonetic_attributes_of_plain_sequence(u'ner'))
            root_nere = Root(u'nere', lexeme, None, Phonetics.calculate_phonetic_attributes_of_plain_sequence(u'nere'))
            return [root_ner, root_nere]
        elif lexeme.lemma==u'içeri':
            root_icer = Root(u'içer', lexeme, None, Phonetics.calculate_phonetic_attributes_of_plain_sequence(u'içer'))
            root_iceri = Root(u'içeri', lexeme, None, Phonetics.calculate_phonetic_attributes_of_plain_sequence(u'içeri'))
            return [root_icer, root_iceri]
        elif lexeme.lemma==u'dışarı':
            root_disar = Root(u'dışar', lexeme, None, Phonetics.calculate_phonetic_attributes_of_plain_sequence(u'dışar'))
            root_disari = Root(u'dışarı', lexeme, None, Phonetics.calculate_phonetic_attributes_of_plain_sequence(u'dışarı'))
            return [root_disar, root_disari]
        elif lexeme.lemma==u'birbiri':
            root_birbir = Root(u'birbir', lexeme, None, Phonetics.calculate_phonetic_attributes_of_plain_sequence(u'birbir'))
            root_birbiri = Root(u'birbiri', lexeme, None, Phonetics.calculate_phonetic_attributes_of_plain_sequence(u'birbiri'))
            return [root_birbir, root_birbiri]
        else:
            raise Exception('Unhandled root change : {} !'.format(lexeme))
Ejemplo n.º 26
0
    def _generate_modified_root_nodes(cls, lexeme):
        if LexemeAttribute.RootChange in lexeme.attributes:
            special_roots = cls._handle_special_roots(lexeme)
            if special_roots:
                return special_roots

        modified_seq = lexeme.root

        original_attributes = Phonetics.calculate_phonetic_attributes_of_plain_sequence(lexeme.root)
        modified_attributes = Phonetics.calculate_phonetic_attributes_of_plain_sequence(lexeme.root)
        original_phonetic_expectations = set()
        modified_phonetic_expectations = set()

        if LexemeAttribute.Voicing in lexeme.attributes or LexemeAttribute.VoicingOpt in lexeme.attributes:
            last_letter = TurkishAlphabet.get_letter_for_char(modified_seq[-1])
            modified_letter = TurkishAlphabet.voice(last_letter)
            assert modified_letter is not None
            if lexeme.lemma.endswith(u"nk"):
                modified_letter = TurkishAlphabet.L_g
            modified_seq = modified_seq[:-1] + modified_letter.char_value
            if PhoneticAttributes.LastLetterVoicelessStop in modified_attributes:
                modified_attributes.remove(PhoneticAttributes.LastLetterVoicelessStop)
            if modified_letter.continuant:
                if PhoneticAttributes.LastLetterNotContinuant in modified_attributes :
                    modified_attributes.remove(PhoneticAttributes.LastLetterNotContinuant)
                modified_attributes.add(PhoneticAttributes.LastLetterContinuant)
            else:
                if PhoneticAttributes.LastLetterContinuant in modified_attributes:
                    modified_attributes.remove(PhoneticAttributes.LastLetterContinuant)
                modified_attributes.add(PhoneticAttributes.LastLetterNotContinuant)
            if LexemeAttribute.VoicingOpt not in lexeme.attributes:
                original_phonetic_expectations.add(PhoneticExpectation.ConsonantStart)
            modified_phonetic_expectations.add(PhoneticExpectation.VowelStart)

        if LexemeAttribute.Doubling in lexeme.attributes:
            modified_seq = modified_seq + modified_seq[-1]
            original_phonetic_expectations.add(PhoneticExpectation.ConsonantStart)
            modified_phonetic_expectations.add(PhoneticExpectation.VowelStart)

        if LexemeAttribute.LastVowelDrop in lexeme.attributes:
            modified_seq = modified_seq[:-2] + modified_seq[-1]
            if lexeme.syntactic_category!=SyntacticCategory.VERB:
                original_phonetic_expectations.add(PhoneticExpectation.ConsonantStart)
            modified_phonetic_expectations.add(PhoneticExpectation.VowelStart)

        if LexemeAttribute.InverseHarmony in lexeme.attributes:
            original_attributes.add(PhoneticAttributes.LastVowelFrontal)
            if PhoneticAttributes.LastVowelBack in original_attributes:
                original_attributes.remove(PhoneticAttributes.LastVowelBack)
            modified_attributes.add(PhoneticAttributes.LastVowelFrontal)
            if PhoneticAttributes.LastVowelBack in modified_attributes:
                modified_attributes.remove(PhoneticAttributes.LastVowelBack)

        if LexemeAttribute.ProgressiveVowelDrop in lexeme.attributes:
            modified_seq = modified_seq[:-1]
            if RootGenerator._has_vowel(modified_seq):
                modified_attributes = Phonetics.calculate_phonetic_attributes_of_plain_sequence(modified_seq)
            modified_phonetic_expectations.add(PhoneticExpectation.VowelStart)


        original_phonetic_expectations = original_phonetic_expectations or None
        modified_phonetic_expectations = modified_phonetic_expectations or None

        original = Root(lexeme.root, lexeme, original_phonetic_expectations, original_attributes)
        modified = Root(modified_seq, lexeme, modified_phonetic_expectations, modified_attributes)

        if original==modified:
            return [original]
        else:
            return [original, modified]
Ejemplo n.º 27
0
def ap(word, form_str, lexeme_attributes=None):
    phonetic_attributes = Phonetics.calculate_phonetic_attributes(word, lexeme_attributes)
    word, application = Phonetics.apply(word, phonetic_attributes, form_str, lexeme_attributes)
    return word + application
Ejemplo n.º 28
0
    def _handle_special_roots(cls, lexeme):
        lexeme.attributes.remove(LexemeAttribute.RootChange)

        if lexeme.lemma == u'ben':
            root_ben = Root(
                u'ben', lexeme, None,
                Phonetics.calculate_phonetic_attributes_of_plain_sequence(
                    u'ben'))
            root_ban = Root(
                u'ban', lexeme, None,
                Phonetics.calculate_phonetic_attributes_of_plain_sequence(
                    u'ban'))
            return [root_ben, root_ban]
        elif lexeme.lemma == u'sen':
            root_sen = Root(
                u'sen', lexeme, None,
                Phonetics.calculate_phonetic_attributes_of_plain_sequence(
                    u'ben'))
            root_san = Root(
                u'san', lexeme, None,
                Phonetics.calculate_phonetic_attributes_of_plain_sequence(
                    u'ban'))
            return [root_sen, root_san]
        elif lexeme.lemma == u'demek':
            root_di = Root(
                u'di', lexeme, None,
                Phonetics.calculate_phonetic_attributes_of_plain_sequence(
                    u'di'))
            root_de = Root(
                u'de', lexeme, None,
                Phonetics.calculate_phonetic_attributes_of_plain_sequence(
                    u'de'))
            return [root_di, root_de]
        elif lexeme.lemma == u'yemek':
            root_yi = Root(
                u'yi', lexeme, None,
                Phonetics.calculate_phonetic_attributes_of_plain_sequence(
                    u'yi'))
            root_ye = Root(
                u'ye', lexeme, None,
                Phonetics.calculate_phonetic_attributes_of_plain_sequence(
                    u'ye'))
            return [root_yi, root_ye]
        elif lexeme.lemma == u'hepsi':
            root_hep = Root(
                u'hep', lexeme, None,
                Phonetics.calculate_phonetic_attributes_of_plain_sequence(
                    u'hep'))
            root_hepsi = Root(
                u'hepsi', lexeme, None,
                Phonetics.calculate_phonetic_attributes_of_plain_sequence(
                    u'hepsi'))
            return [root_hep, root_hepsi]
        elif lexeme.lemma == u'ora':
            root_or = Root(
                u'or', lexeme, None,
                Phonetics.calculate_phonetic_attributes_of_plain_sequence(
                    u'or'))
            root_ora = Root(
                u'ora', lexeme, None,
                Phonetics.calculate_phonetic_attributes_of_plain_sequence(
                    u'ora'))
            return [root_or, root_ora]
        elif lexeme.lemma == u'bura':
            root_bur = Root(
                u'bur', lexeme, None,
                Phonetics.calculate_phonetic_attributes_of_plain_sequence(
                    u'bur'))
            root_bura = Root(
                u'bura', lexeme, None,
                Phonetics.calculate_phonetic_attributes_of_plain_sequence(
                    u'bura'))
            return [root_bur, root_bura]
        elif lexeme.lemma == u'şura':
            root_sur = Root(
                u'şur', lexeme, None,
                Phonetics.calculate_phonetic_attributes_of_plain_sequence(
                    u'şur'))
            root_sura = Root(
                u'şura', lexeme, None,
                Phonetics.calculate_phonetic_attributes_of_plain_sequence(
                    u'şura'))
            return [root_sur, root_sura]
        elif lexeme.lemma == u'nere':
            root_ner = Root(
                u'ner', lexeme, None,
                Phonetics.calculate_phonetic_attributes_of_plain_sequence(
                    u'ner'))
            root_nere = Root(
                u'nere', lexeme, None,
                Phonetics.calculate_phonetic_attributes_of_plain_sequence(
                    u'nere'))
            return [root_ner, root_nere]
        elif lexeme.lemma == u'nere':
            root_ner = Root(
                u'ner', lexeme, None,
                Phonetics.calculate_phonetic_attributes_of_plain_sequence(
                    u'ner'))
            root_nere = Root(
                u'nere', lexeme, None,
                Phonetics.calculate_phonetic_attributes_of_plain_sequence(
                    u'nere'))
            return [root_ner, root_nere]
        elif lexeme.lemma == u'içeri':
            root_icer = Root(
                u'içer', lexeme, None,
                Phonetics.calculate_phonetic_attributes_of_plain_sequence(
                    u'içer'))
            root_iceri = Root(
                u'içeri', lexeme, None,
                Phonetics.calculate_phonetic_attributes_of_plain_sequence(
                    u'içeri'))
            return [root_icer, root_iceri]
        elif lexeme.lemma == u'dışarı':
            root_disar = Root(
                u'dışar', lexeme, None,
                Phonetics.calculate_phonetic_attributes_of_plain_sequence(
                    u'dışar'))
            root_disari = Root(
                u'dışarı', lexeme, None,
                Phonetics.calculate_phonetic_attributes_of_plain_sequence(
                    u'dışarı'))
            return [root_disar, root_disari]
        elif lexeme.lemma == u'birbiri':
            root_birbir = Root(
                u'birbir', lexeme, None,
                Phonetics.calculate_phonetic_attributes_of_plain_sequence(
                    u'birbir'))
            root_birbiri = Root(
                u'birbiri', lexeme, None,
                Phonetics.calculate_phonetic_attributes_of_plain_sequence(
                    u'birbiri'))
            return [root_birbir, root_birbiri]
        else:
            raise Exception('Unhandled root change : {} !'.format(lexeme))
Ejemplo n.º 29
0
    def _generate_modified_root_nodes(cls, lexeme):
        if LexemeAttribute.RootChange in lexeme.attributes:
            special_roots = cls._handle_special_roots(lexeme)
            if special_roots:
                return special_roots

        modified_seq = lexeme.root

        original_attributes = Phonetics.calculate_phonetic_attributes_of_plain_sequence(
            lexeme.root)
        modified_attributes = Phonetics.calculate_phonetic_attributes_of_plain_sequence(
            lexeme.root)
        original_phonetic_expectations = set()
        modified_phonetic_expectations = set()

        if LexemeAttribute.Voicing in lexeme.attributes or LexemeAttribute.VoicingOpt in lexeme.attributes:
            last_letter = TurkishAlphabet.get_letter_for_char(modified_seq[-1])
            modified_letter = TurkishAlphabet.voice(last_letter)
            assert modified_letter is not None
            if lexeme.lemma.endswith(u"nk"):
                modified_letter = TurkishAlphabet.L_g
            modified_seq = modified_seq[:-1] + modified_letter.char_value
            if PhoneticAttributes.LastLetterVoicelessStop in modified_attributes:
                modified_attributes.remove(
                    PhoneticAttributes.LastLetterVoicelessStop)
            if modified_letter.continuant:
                if PhoneticAttributes.LastLetterNotContinuant in modified_attributes:
                    modified_attributes.remove(
                        PhoneticAttributes.LastLetterNotContinuant)
                modified_attributes.add(
                    PhoneticAttributes.LastLetterContinuant)
            else:
                if PhoneticAttributes.LastLetterContinuant in modified_attributes:
                    modified_attributes.remove(
                        PhoneticAttributes.LastLetterContinuant)
                modified_attributes.add(
                    PhoneticAttributes.LastLetterNotContinuant)
            if LexemeAttribute.VoicingOpt not in lexeme.attributes:
                original_phonetic_expectations.add(
                    PhoneticExpectation.ConsonantStart)
            modified_phonetic_expectations.add(PhoneticExpectation.VowelStart)

        if LexemeAttribute.Doubling in lexeme.attributes:
            modified_seq = modified_seq + modified_seq[-1]
            original_phonetic_expectations.add(
                PhoneticExpectation.ConsonantStart)
            modified_phonetic_expectations.add(PhoneticExpectation.VowelStart)

        if LexemeAttribute.LastVowelDrop in lexeme.attributes:
            modified_seq = modified_seq[:-2] + modified_seq[-1]
            if lexeme.syntactic_category != SyntacticCategory.VERB:
                original_phonetic_expectations.add(
                    PhoneticExpectation.ConsonantStart)
            modified_phonetic_expectations.add(PhoneticExpectation.VowelStart)

        if LexemeAttribute.InverseHarmony in lexeme.attributes:
            original_attributes.add(PhoneticAttributes.LastVowelFrontal)
            if PhoneticAttributes.LastVowelBack in original_attributes:
                original_attributes.remove(PhoneticAttributes.LastVowelBack)
            modified_attributes.add(PhoneticAttributes.LastVowelFrontal)
            if PhoneticAttributes.LastVowelBack in modified_attributes:
                modified_attributes.remove(PhoneticAttributes.LastVowelBack)

        if LexemeAttribute.ProgressiveVowelDrop in lexeme.attributes:
            modified_seq = modified_seq[:-1]
            if RootGenerator._has_vowel(modified_seq):
                modified_attributes = Phonetics.calculate_phonetic_attributes_of_plain_sequence(
                    modified_seq)
            modified_phonetic_expectations.add(PhoneticExpectation.VowelStart)

        original_phonetic_expectations = original_phonetic_expectations or None
        modified_phonetic_expectations = modified_phonetic_expectations or None

        original = Root(lexeme.root, lexeme, original_phonetic_expectations,
                        original_attributes)
        modified = Root(modified_seq, lexeme, modified_phonetic_expectations,
                        modified_attributes)

        if original == modified:
            return [original]
        else:
            return [original, modified]
Ejemplo n.º 30
0
    def find_roots_for_partial_input(self, partial_input, whole_surface=None):
        """
        @type partial_input: unicode
        @type whole_surface: unicode
        @rtype: list of Root
        """
        assert partial_input and whole_surface
        assert len(partial_input) <= len(whole_surface)
        assert whole_surface.startswith(partial_input)
        if len(whole_surface) == len(partial_input):
            assert whole_surface == partial_input

        if len(
                partial_input
        ) < 2:  # not possible except (d,diyor) and (y,yiyor). but they are already in the dictionary
            return []

        last_vowel = Phonetics.get_last_vowel(partial_input)

        if not last_vowel:
            return []

        root = partial_input
        lemma = root
        lemma_root = lemma
        syntactic_category = SyntacticCategory.VERB
        secondary_syntactic_category = None
        lexeme_attributes = set()

        lexeme = DynamicLexeme(lemma, lemma_root, syntactic_category,
                               secondary_syntactic_category, lexeme_attributes)

        phonetic_expectations = set()
        phonetic_attributes = Phonetics.calculate_phonetic_attributes_of_plain_sequence(
            partial_input)

        no_attr_root = DynamicRoot(root, lexeme, phonetic_expectations,
                                   phonetic_attributes)

        self._set_lexeme_and_phonetic_attributes([no_attr_root])
        self._set_lemma([no_attr_root])

        last_char = partial_input[-1]
        last_letter = TurkishAlphabet.get_letter_for_char(last_char)

        partial_surface_can_be_root_of_a_verb = self._seems_like_a_valid_verb_root(
            partial_input)

        if whole_surface == partial_input:
            return [no_attr_root
                    ] if partial_surface_can_be_root_of_a_verb else []

        first_char_after_partial_input = whole_surface[len(partial_input)]

        if first_char_after_partial_input.isupper():
            return []

        first_letter_after_partial_input = TurkishAlphabet.get_letter_for_char(
            first_char_after_partial_input)


        might_have_ProgressiveVowelDrop = not last_letter.vowel and\
                                          any([whole_surface.startswith(partial_input+s) for s in [u'iyor', u'ıyor', u'uyor', u'üyor']])

        might_have_Aorist_A = not last_letter.vowel and \
                              (whole_surface.startswith(partial_input + u'ar') or whole_surface.startswith(partial_input + u'er'))

        # no Aorist_I for -ur, -ür
        might_have_Aorist_I = not last_letter.vowel and\
                              (whole_surface.startswith(partial_input + u'ır') or whole_surface.startswith(partial_input + u'ir'))

        # for other letters, no voicing in verbs. {git+er->gider} vs {yapar, açar, diker}
        voicing_might_have_happened = last_letter == TurkishAlphabet.L_d and first_letter_after_partial_input.vowel

        possible_progressive_vowel_drop_roots = self._get_progressive_vowel_drop_roots(
            partial_input, whole_surface, no_attr_root,
            last_vowel) if might_have_ProgressiveVowelDrop else set()
        possible_aorist_A_roots = self._get_aorist_A_roots(
            no_attr_root) if might_have_Aorist_A else set()
        possible_aorist_I_roots = self._get_aorist_I_roots(
            no_attr_root) if might_have_Aorist_I else set()
        possible_causative_roots = self._get_possible_causative_roots(
            partial_input, whole_surface, no_attr_root)
        possible_passive_roots = self._get_possible_passive_roots(
            last_letter, partial_input, whole_surface, no_attr_root)

        if voicing_might_have_happened:
            possible_progressive_vowel_drop_roots = possible_progressive_vowel_drop_roots.union(
                set([
                    self._get_possible_voicing_root(r)
                    for r in possible_progressive_vowel_drop_roots
                ]))
            possible_aorist_A_roots = possible_aorist_A_roots.union(
                set([
                    self._get_possible_voicing_root(r)
                    for r in possible_aorist_A_roots
                ]))
            possible_aorist_I_roots = possible_aorist_I_roots.union(
                set([
                    self._get_possible_voicing_root(r)
                    for r in possible_aorist_I_roots
                ]))
            possible_causative_roots = possible_causative_roots.union(
                set([
                    self._get_possible_voicing_root(r)
                    for r in possible_causative_roots
                ]))
            possible_passive_roots = possible_passive_roots.union(
                set([
                    self._get_possible_voicing_root(r)
                    for r in possible_passive_roots
                ]))

        generated_roots = set()

        generated_roots.add(no_attr_root)

        if voicing_might_have_happened:
            generated_roots.add(self._get_possible_voicing_root(no_attr_root))

        generated_roots = generated_roots.union(
            possible_progressive_vowel_drop_roots)
        generated_roots = generated_roots.union(possible_aorist_A_roots)
        generated_roots = generated_roots.union(possible_aorist_I_roots)
        generated_roots = generated_roots.union(possible_causative_roots)
        generated_roots = generated_roots.union(possible_passive_roots)

        self._set_lexeme_and_phonetic_attributes(generated_roots)
        self._set_lemma(generated_roots)

        generated_roots = list(generated_roots)

        generated_roots = filter(
            lambda r: self._seems_like_a_valid_verb_root(r.lexeme.root),
            generated_roots)

        return generated_roots
Ejemplo n.º 31
0
 def __init__(self, noun):
     root = noun
     lexeme = DynamicLexeme(noun, noun, SyntacticCategory.NOUN, SecondarySyntacticCategory.PROPER_NOUN, None)
     phonetic_attributes = Phonetics.calculate_phonetic_attributes_of_plain_sequence(noun)
     phonetic_expectations = None
     super(ProperNounRoot, self).__init__(root, lexeme, phonetic_expectations, phonetic_attributes)