Example #1
0
    def create_from_string(morph, string, tech_vocabulary={}):
        normalized = efication(string.upper())

        if ' ' in string:
            return WORD_CONSTRUCTORS[WORD_TYPE.NOUN_GROUP].create_from_baseword(morph, string, tech_vocabulary)

        class_, properties = get_gram_info(morph, normalized, tech_vocabulary)

        if class_ == u'С':
            return WORD_CONSTRUCTORS[WORD_TYPE.NOUN].create_from_baseword(morph, string, tech_vocabulary)
        elif class_ == u'П':
            return WORD_CONSTRUCTORS[WORD_TYPE.ADJECTIVE].create_from_baseword(morph, string, tech_vocabulary)
        elif class_ == u'КР_ПРИЛ':
            return WORD_CONSTRUCTORS[WORD_TYPE.ADJECTIVE].create_from_baseword(morph, string, tech_vocabulary)
        elif class_ == u'Г':
            return WORD_CONSTRUCTORS[WORD_TYPE.VERB].create_from_baseword(morph, string, tech_vocabulary)
        elif class_ == u'ПРИЧАСТИЕ':
            return WORD_CONSTRUCTORS[WORD_TYPE.PARTICIPLE].create_from_baseword(morph, string, tech_vocabulary)
        elif class_ == u'КР_ПРИЧАСТИЕ':
            return WORD_CONSTRUCTORS[WORD_TYPE.SHORT_PARTICIPLE].create_from_baseword(morph, string, tech_vocabulary)
        elif class_ == u'МС':
            return WORD_CONSTRUCTORS[WORD_TYPE.PRONOUN].create_from_baseword(morph, string, tech_vocabulary)
        elif class_ == u'МС-П':
            return WORD_CONSTRUCTORS[WORD_TYPE.ADJECTIVE].create_from_baseword(morph, string, tech_vocabulary)
        else:
            raise TextgenException(u'unknown word type: %s of word: %s' % (class_, string) )
Example #2
0
    def create_from_baseword(cls, morph, src, tech_vocabulary={}):
        '''
        one noun MUST be in им
        there are problems with nouns in multiple number: рога
        '''
        main_noun = None
        main_properties = None

        phrase = []

        for word in src.split(' '):
            if word:
                try:
                    class_, properties = get_gram_info(morph,
                                                       efication(word.upper()),
                                                       tech_vocabulary)
                except NoGrammarFound:
                    return cls(normalized=src)

                if class_ == u'С':
                    if u'им' == properties.case and (u'ед' == properties.number
                                                     or properties.gender
                                                     == u'мн'):
                        main_noun = word
                        main_properties = properties
                        phrase.append((class_, efication(word).upper(), False))
                    else:
                        phrase.append((class_, efication(word).upper(), True))
                else:
                    phrase.append((class_, efication(word).upper(), False))

        if not main_noun:
            # return cls(normalized=src)
            raise NormalFormNeeded('no main noun found in phrase "%s"' % src)

        forms = []

        for number in PROPERTIES.NUMBERS:

            additional_properties = []
            # if number == u'ед':
            #     additional_properties = [properties.gender]

            for case in PROPERTIES.CASES:
                phrase_form = []

                for class_, word, constant in phrase:
                    if constant:
                        phrase_form.append(word.lower())
                    else:
                        phrase_form.append(
                            morph.inflect_ru(
                                word, u','.join([case, number] +
                                                additional_properties),
                                class_).lower())
                forms.append(' '.join(phrase_form))

        return cls(normalized=src,
                   forms=forms,
                   properties=[main_properties.gender])
Example #3
0
    def create_from_baseword(cls, morph, src, tech_vocabulary={}):
        normalized = efication(src.upper())
        try:
            class_, properties = get_gram_info(morph, normalized, tech_vocabulary)
        except NoGrammarFound:
            return cls(normalized=src)

        if u'прш' != properties.time or u'ед' != properties.number or u'мр' != properties.gender:
            raise NormalFormNeeded(u'word "%s" not in normal form: %s' % (src, properties))

        base = morph.inflect_ru(normalized, u'ед,мр', u'Г')

        forms = [morph.inflect_ru(base, u'прш,мр,ед').lower(),
                 morph.inflect_ru(base, u'прш,жр,ед').lower(),
                 morph.inflect_ru(base, u'прш,ср,ед').lower(),
                 morph.inflect_ru(base, u'прш,мн').lower(),
                 morph.inflect_ru(base, u'нст,1л,ед').lower(),
                 morph.inflect_ru(base, u'нст,1л,мн').lower(),
                 morph.inflect_ru(base, u'нст,2л,ед').lower(),
                 morph.inflect_ru(base, u'нст,2л,мн').lower(),
                 morph.inflect_ru(base, u'нст,3л,ед').lower(),
                 morph.inflect_ru(base, u'нст,3л,мн').lower(),
                 morph.inflect_ru(base, u'буд,1л,ед').lower(),
                 morph.inflect_ru(base, u'буд,1л,мн').lower(),
                 morph.inflect_ru(base, u'буд,2л,ед').lower(),
                 morph.inflect_ru(base, u'буд,2л,мн').lower(),
                 morph.inflect_ru(base, u'буд,3л,ед').lower(),
                 morph.inflect_ru(base, u'буд,3л,мн').lower()]

        return cls(normalized=src, forms=forms, properties=[])
Example #4
0
    def create_from_baseword(cls, morph, src, tech_vocabulary={}):
        normalized = efication(src.upper())
        try:
            class_, properties = get_gram_info(morph, normalized,
                                               tech_vocabulary)
        except NoGrammarFound:
            return cls(normalized=src)

        if u'прш' != properties.time or u'ед' != properties.number or u'мр' != properties.gender:
            raise NormalFormNeeded(u'word "%s" not in normal form: %s' %
                                   (src, properties))

        forms = []

        for time in (u'прш', u'нст'):
            for gender in PROPERTIES.GENDERS:
                forms.append(
                    morph.inflect_ru(normalized, u'%s,%s,ед' % (time, gender),
                                     class_).lower())

            forms.append(
                morph.inflect_ru(normalized, u'%s,мн' % (time, ),
                                 class_).lower())

        return cls(normalized=src, forms=forms, properties=[])
Example #5
0
    def create_from_baseword(cls, morph, src, tech_vocabulary={}):
        normalized = efication(src.upper())
        try:
            class_, properties = get_gram_info(morph, normalized,
                                               tech_vocabulary)
        except NoGrammarFound:
            return cls(normalized=src)

        if u'прш' != properties.time or u'ед' != properties.number or u'мр' != properties.gender:
            raise NormalFormNeeded(u'word "%s" not in normal form: %s' %
                                   (src, properties))

        base = morph.inflect_ru(normalized, u'ед,мр', u'Г')

        forms = [
            morph.inflect_ru(base, u'прш,мр,ед').lower(),
            morph.inflect_ru(base, u'прш,жр,ед').lower(),
            morph.inflect_ru(base, u'прш,ср,ед').lower(),
            morph.inflect_ru(base, u'прш,мн').lower(),
            morph.inflect_ru(base, u'нст,1л,ед').lower(),
            morph.inflect_ru(base, u'нст,1л,мн').lower(),
            morph.inflect_ru(base, u'нст,2л,ед').lower(),
            morph.inflect_ru(base, u'нст,2л,мн').lower(),
            morph.inflect_ru(base, u'нст,3л,ед').lower(),
            morph.inflect_ru(base, u'нст,3л,мн').lower(),
            morph.inflect_ru(base, u'буд,1л,ед').lower(),
            morph.inflect_ru(base, u'буд,1л,мн').lower(),
            morph.inflect_ru(base, u'буд,2л,ед').lower(),
            morph.inflect_ru(base, u'буд,2л,мн').lower(),
            morph.inflect_ru(base, u'буд,3л,ед').lower(),
            morph.inflect_ru(base, u'буд,3л,мн').lower()
        ]

        return cls(normalized=src, forms=forms, properties=[])
Example #6
0
    def create_from_baseword(cls, morph, src, tech_vocabulary={}):
        normalized = efication(src.upper())
        try:
            class_, properties = get_gram_info(morph, normalized,
                                               tech_vocabulary)
        except NoGrammarFound:
            return cls(normalized=src)

        if u'им' != properties.case or u'ед' != properties.number:
            raise NormalFormNeeded(u'word "%s" not in normal form: %s' %
                                   (src, properties))

        forms = []

        # single
        for gender in PROPERTIES.GENDERS:
            for case in PROPERTIES.CASES:
                forms.append(
                    morph.inflect_ru(normalized, u'%s,%s,ед' % (case, gender),
                                     class_).lower())

        #multiple
        for case in PROPERTIES.CASES:
            forms.append(
                morph.inflect_ru(normalized, u'%s,%s' % (case, u'мн'),
                                 class_).lower())

        return cls(normalized=src, forms=forms, properties=[])
Example #7
0
    def create_from_baseword(cls, morph, src, tech_vocabulary={}):
        '''
        one noun MUST be in им
        there are problems with nouns in multiple number: рога
        '''
        main_noun = None
        main_properties = None

        phrase = []

        for word in src.split(' '):
            if word:
                try:
                    class_, properties = get_gram_info(morph, efication(word.upper()), tech_vocabulary)
                except NoGrammarFound:
                    return cls(normalized=src)

                if class_ == u'С':
                    if u'им' == properties.case and (u'ед' == properties.number or properties.gender == u'мн'):
                        main_noun = word
                        main_properties = properties
                        phrase.append((class_, efication(word).upper(), False))
                    else:
                        phrase.append((class_, efication(word).upper(), True))
                else:
                    phrase.append((class_, efication(word).upper(), False))

        if not main_noun:
            # return cls(normalized=src)
            raise NormalFormNeeded('no main noun found in phrase "%s"' % src)

        forms = []

        for number in PROPERTIES.NUMBERS:

            additional_properties = []
            # if number == u'ед':
            #     additional_properties = [properties.gender]

            for case in PROPERTIES.CASES:
                phrase_form = []

                for class_, word, constant in phrase:
                    if constant:
                        phrase_form.append(word.lower())
                    else:
                        phrase_form.append(morph.inflect_ru(word, u','.join([case, number]+additional_properties), class_ ).lower())
                forms.append( ' '.join(phrase_form))

        return cls(normalized=src, forms=forms, properties=[main_properties.gender])
Example #8
0
    def create_from_baseword(cls, morph, src, tech_vocabulary={}):
        normalized = efication(src.upper())
        try:
            class_, properties = get_gram_info(morph, normalized, tech_vocabulary)
        except NoGrammarFound:
            return cls(normalized=src)

        if u'им' != properties.case or (u'ед' != properties.number and properties.gender in (u'мр', u'ср', u'жр')):
            raise NormalFormNeeded(u'word "%s" not in normal form: %s' % (src, properties))

        forms = []

        for number in PROPERTIES.NUMBERS:
            for case in PROPERTIES.CASES:
                forms.append(morph.inflect_ru(normalized, u'%s,%s' % (case, number), class_ ).lower() )

        return cls(normalized=src, forms=forms, properties=[properties.gender])
Example #9
0
    def create_from_baseword(cls, morph, src, tech_vocabulary={}):
        normalized = efication(src.upper())
        try:
            class_, properties = get_gram_info(morph, normalized, tech_vocabulary)
        except NoGrammarFound:
            return cls(normalized=src)

        if u'прш' != properties.time or u'ед' != properties.number or u'мр' != properties.gender:
            raise NormalFormNeeded(u'word "%s" not in normal form: %s' % (src, properties))

        forms = []

        for time in (u'прш', u'нст'):
            for gender in PROPERTIES.GENDERS:
                forms.append(morph.inflect_ru(normalized, u'%s,%s,ед' % (time, gender), class_).lower() )

            forms.append(morph.inflect_ru(normalized, u'%s,мн' % (time, ), class_).lower() )

        return cls(normalized=src, forms=forms, properties=[])
Example #10
0
    def create_from_baseword(cls, morph, src, tech_vocabulary={}):
        normalized = efication(src.upper())
        try:
            class_, properties = get_gram_info(morph, normalized, tech_vocabulary)
        except NoGrammarFound:
            return cls(normalized=src)

        if u'им' != properties.case or u'ед' != properties.number:
            raise NormalFormNeeded(u'word "%s" not in normal form: %s' % (src, properties))

        forms = []

        # single
        for gender in PROPERTIES.GENDERS:
            for case in PROPERTIES.CASES:
                forms.append(morph.inflect_ru(normalized, u'%s,%s,ед' % (case, gender), class_).lower() )

        #multiple
        for case in PROPERTIES.CASES:
            forms.append(morph.inflect_ru(normalized, u'%s,%s' % (case, u'мн'), class_).lower() )

        return cls(normalized=src, forms=forms, properties=[])
Example #11
0
    def create_from_string(morph, string, tech_vocabulary={}):
        normalized = efication(string.upper())

        if ' ' in string:
            return WORD_CONSTRUCTORS[
                WORD_TYPE.NOUN_GROUP].create_from_baseword(
                    morph, string, tech_vocabulary)

        class_, properties = get_gram_info(morph, normalized, tech_vocabulary)

        if class_ == u'С':
            return WORD_CONSTRUCTORS[WORD_TYPE.NOUN].create_from_baseword(
                morph, string, tech_vocabulary)
        elif class_ == u'П':
            return WORD_CONSTRUCTORS[WORD_TYPE.ADJECTIVE].create_from_baseword(
                morph, string, tech_vocabulary)
        elif class_ == u'КР_ПРИЛ':
            return WORD_CONSTRUCTORS[WORD_TYPE.ADJECTIVE].create_from_baseword(
                morph, string, tech_vocabulary)
        elif class_ == u'Г':
            return WORD_CONSTRUCTORS[WORD_TYPE.VERB].create_from_baseword(
                morph, string, tech_vocabulary)
        elif class_ == u'ПРИЧАСТИЕ':
            return WORD_CONSTRUCTORS[
                WORD_TYPE.PARTICIPLE].create_from_baseword(
                    morph, string, tech_vocabulary)
        elif class_ == u'КР_ПРИЧАСТИЕ':
            return WORD_CONSTRUCTORS[
                WORD_TYPE.SHORT_PARTICIPLE].create_from_baseword(
                    morph, string, tech_vocabulary)
        elif class_ == u'МС':
            return WORD_CONSTRUCTORS[WORD_TYPE.PRONOUN].create_from_baseword(
                morph, string, tech_vocabulary)
        elif class_ == u'МС-П':
            return WORD_CONSTRUCTORS[WORD_TYPE.ADJECTIVE].create_from_baseword(
                morph, string, tech_vocabulary)
        else:
            raise TextgenException(u'unknown word type: %s of word: %s' %
                                   (class_, string))
Example #12
0
    def create_from_baseword(cls, morph, src, tech_vocabulary={}):
        normalized = efication(src.upper())
        try:
            class_, properties = get_gram_info(morph, normalized,
                                               tech_vocabulary)
        except NoGrammarFound:
            return cls(normalized=src)

        if u'им' != properties.case or (u'ед' != properties.number
                                        and properties.gender
                                        in (u'мр', u'ср', u'жр')):
            raise NormalFormNeeded(u'word "%s" not in normal form: %s' %
                                   (src, properties))

        forms = []

        for number in PROPERTIES.NUMBERS:
            for case in PROPERTIES.CASES:
                forms.append(
                    morph.inflect_ru(normalized, u'%s,%s' % (case, number),
                                     class_).lower())

        return cls(normalized=src, forms=forms, properties=[properties.gender])