def create_from_string(morph, string, tech_vocabulary={}): normalized = efication(string.upper()) if ' ' in string: return WORD_CONSTRUCTORS[WORD_TYPE.NOUN_GROUP].create_from_baseword(morph, string, tech_vocabulary) class_, properties = get_gram_info(morph, normalized, tech_vocabulary) if class_ == u'С': return WORD_CONSTRUCTORS[WORD_TYPE.NOUN].create_from_baseword(morph, string, tech_vocabulary) elif class_ == u'П': return WORD_CONSTRUCTORS[WORD_TYPE.ADJECTIVE].create_from_baseword(morph, string, tech_vocabulary) elif class_ == u'КР_ПРИЛ': return WORD_CONSTRUCTORS[WORD_TYPE.ADJECTIVE].create_from_baseword(morph, string, tech_vocabulary) elif class_ == u'Г': return WORD_CONSTRUCTORS[WORD_TYPE.VERB].create_from_baseword(morph, string, tech_vocabulary) elif class_ == u'ПРИЧАСТИЕ': return WORD_CONSTRUCTORS[WORD_TYPE.PARTICIPLE].create_from_baseword(morph, string, tech_vocabulary) elif class_ == u'КР_ПРИЧАСТИЕ': return WORD_CONSTRUCTORS[WORD_TYPE.SHORT_PARTICIPLE].create_from_baseword(morph, string, tech_vocabulary) elif class_ == u'МС': return WORD_CONSTRUCTORS[WORD_TYPE.PRONOUN].create_from_baseword(morph, string, tech_vocabulary) elif class_ == u'МС-П': return WORD_CONSTRUCTORS[WORD_TYPE.ADJECTIVE].create_from_baseword(morph, string, tech_vocabulary) else: raise TextgenException(u'unknown word type: %s of word: %s' % (class_, string) )
def create_from_baseword(cls, morph, src, tech_vocabulary={}): ''' one noun MUST be in им there are problems with nouns in multiple number: рога ''' main_noun = None main_properties = None phrase = [] for word in src.split(' '): if word: try: class_, properties = get_gram_info(morph, efication(word.upper()), tech_vocabulary) except NoGrammarFound: return cls(normalized=src) if class_ == u'С': if u'им' == properties.case and (u'ед' == properties.number or properties.gender == u'мн'): main_noun = word main_properties = properties phrase.append((class_, efication(word).upper(), False)) else: phrase.append((class_, efication(word).upper(), True)) else: phrase.append((class_, efication(word).upper(), False)) if not main_noun: # return cls(normalized=src) raise NormalFormNeeded('no main noun found in phrase "%s"' % src) forms = [] for number in PROPERTIES.NUMBERS: additional_properties = [] # if number == u'ед': # additional_properties = [properties.gender] for case in PROPERTIES.CASES: phrase_form = [] for class_, word, constant in phrase: if constant: phrase_form.append(word.lower()) else: phrase_form.append( morph.inflect_ru( word, u','.join([case, number] + additional_properties), class_).lower()) forms.append(' '.join(phrase_form)) return cls(normalized=src, forms=forms, properties=[main_properties.gender])
def create_from_baseword(cls, morph, src, tech_vocabulary={}): normalized = efication(src.upper()) try: class_, properties = get_gram_info(morph, normalized, tech_vocabulary) except NoGrammarFound: return cls(normalized=src) if u'прш' != properties.time or u'ед' != properties.number or u'мр' != properties.gender: raise NormalFormNeeded(u'word "%s" not in normal form: %s' % (src, properties)) base = morph.inflect_ru(normalized, u'ед,мр', u'Г') forms = [morph.inflect_ru(base, u'прш,мр,ед').lower(), morph.inflect_ru(base, u'прш,жр,ед').lower(), morph.inflect_ru(base, u'прш,ср,ед').lower(), morph.inflect_ru(base, u'прш,мн').lower(), morph.inflect_ru(base, u'нст,1л,ед').lower(), morph.inflect_ru(base, u'нст,1л,мн').lower(), morph.inflect_ru(base, u'нст,2л,ед').lower(), morph.inflect_ru(base, u'нст,2л,мн').lower(), morph.inflect_ru(base, u'нст,3л,ед').lower(), morph.inflect_ru(base, u'нст,3л,мн').lower(), morph.inflect_ru(base, u'буд,1л,ед').lower(), morph.inflect_ru(base, u'буд,1л,мн').lower(), morph.inflect_ru(base, u'буд,2л,ед').lower(), morph.inflect_ru(base, u'буд,2л,мн').lower(), morph.inflect_ru(base, u'буд,3л,ед').lower(), morph.inflect_ru(base, u'буд,3л,мн').lower()] return cls(normalized=src, forms=forms, properties=[])
def create_from_baseword(cls, morph, src, tech_vocabulary={}): normalized = efication(src.upper()) try: class_, properties = get_gram_info(morph, normalized, tech_vocabulary) except NoGrammarFound: return cls(normalized=src) if u'прш' != properties.time or u'ед' != properties.number or u'мр' != properties.gender: raise NormalFormNeeded(u'word "%s" not in normal form: %s' % (src, properties)) forms = [] for time in (u'прш', u'нст'): for gender in PROPERTIES.GENDERS: forms.append( morph.inflect_ru(normalized, u'%s,%s,ед' % (time, gender), class_).lower()) forms.append( morph.inflect_ru(normalized, u'%s,мн' % (time, ), class_).lower()) return cls(normalized=src, forms=forms, properties=[])
def create_from_baseword(cls, morph, src, tech_vocabulary={}): normalized = efication(src.upper()) try: class_, properties = get_gram_info(morph, normalized, tech_vocabulary) except NoGrammarFound: return cls(normalized=src) if u'прш' != properties.time or u'ед' != properties.number or u'мр' != properties.gender: raise NormalFormNeeded(u'word "%s" not in normal form: %s' % (src, properties)) base = morph.inflect_ru(normalized, u'ед,мр', u'Г') forms = [ morph.inflect_ru(base, u'прш,мр,ед').lower(), morph.inflect_ru(base, u'прш,жр,ед').lower(), morph.inflect_ru(base, u'прш,ср,ед').lower(), morph.inflect_ru(base, u'прш,мн').lower(), morph.inflect_ru(base, u'нст,1л,ед').lower(), morph.inflect_ru(base, u'нст,1л,мн').lower(), morph.inflect_ru(base, u'нст,2л,ед').lower(), morph.inflect_ru(base, u'нст,2л,мн').lower(), morph.inflect_ru(base, u'нст,3л,ед').lower(), morph.inflect_ru(base, u'нст,3л,мн').lower(), morph.inflect_ru(base, u'буд,1л,ед').lower(), morph.inflect_ru(base, u'буд,1л,мн').lower(), morph.inflect_ru(base, u'буд,2л,ед').lower(), morph.inflect_ru(base, u'буд,2л,мн').lower(), morph.inflect_ru(base, u'буд,3л,ед').lower(), morph.inflect_ru(base, u'буд,3л,мн').lower() ] return cls(normalized=src, forms=forms, properties=[])
def create_from_baseword(cls, morph, src, tech_vocabulary={}): normalized = efication(src.upper()) try: class_, properties = get_gram_info(morph, normalized, tech_vocabulary) except NoGrammarFound: return cls(normalized=src) if u'им' != properties.case or u'ед' != properties.number: raise NormalFormNeeded(u'word "%s" not in normal form: %s' % (src, properties)) forms = [] # single for gender in PROPERTIES.GENDERS: for case in PROPERTIES.CASES: forms.append( morph.inflect_ru(normalized, u'%s,%s,ед' % (case, gender), class_).lower()) #multiple for case in PROPERTIES.CASES: forms.append( morph.inflect_ru(normalized, u'%s,%s' % (case, u'мн'), class_).lower()) return cls(normalized=src, forms=forms, properties=[])
def create_from_baseword(cls, morph, src, tech_vocabulary={}): ''' one noun MUST be in им there are problems with nouns in multiple number: рога ''' main_noun = None main_properties = None phrase = [] for word in src.split(' '): if word: try: class_, properties = get_gram_info(morph, efication(word.upper()), tech_vocabulary) except NoGrammarFound: return cls(normalized=src) if class_ == u'С': if u'им' == properties.case and (u'ед' == properties.number or properties.gender == u'мн'): main_noun = word main_properties = properties phrase.append((class_, efication(word).upper(), False)) else: phrase.append((class_, efication(word).upper(), True)) else: phrase.append((class_, efication(word).upper(), False)) if not main_noun: # return cls(normalized=src) raise NormalFormNeeded('no main noun found in phrase "%s"' % src) forms = [] for number in PROPERTIES.NUMBERS: additional_properties = [] # if number == u'ед': # additional_properties = [properties.gender] for case in PROPERTIES.CASES: phrase_form = [] for class_, word, constant in phrase: if constant: phrase_form.append(word.lower()) else: phrase_form.append(morph.inflect_ru(word, u','.join([case, number]+additional_properties), class_ ).lower()) forms.append( ' '.join(phrase_form)) return cls(normalized=src, forms=forms, properties=[main_properties.gender])
def create_from_baseword(cls, morph, src, tech_vocabulary={}): normalized = efication(src.upper()) try: class_, properties = get_gram_info(morph, normalized, tech_vocabulary) except NoGrammarFound: return cls(normalized=src) if u'им' != properties.case or (u'ед' != properties.number and properties.gender in (u'мр', u'ср', u'жр')): raise NormalFormNeeded(u'word "%s" not in normal form: %s' % (src, properties)) forms = [] for number in PROPERTIES.NUMBERS: for case in PROPERTIES.CASES: forms.append(morph.inflect_ru(normalized, u'%s,%s' % (case, number), class_ ).lower() ) return cls(normalized=src, forms=forms, properties=[properties.gender])
def create_from_baseword(cls, morph, src, tech_vocabulary={}): normalized = efication(src.upper()) try: class_, properties = get_gram_info(morph, normalized, tech_vocabulary) except NoGrammarFound: return cls(normalized=src) if u'прш' != properties.time or u'ед' != properties.number or u'мр' != properties.gender: raise NormalFormNeeded(u'word "%s" not in normal form: %s' % (src, properties)) forms = [] for time in (u'прш', u'нст'): for gender in PROPERTIES.GENDERS: forms.append(morph.inflect_ru(normalized, u'%s,%s,ед' % (time, gender), class_).lower() ) forms.append(morph.inflect_ru(normalized, u'%s,мн' % (time, ), class_).lower() ) return cls(normalized=src, forms=forms, properties=[])
def create_from_baseword(cls, morph, src, tech_vocabulary={}): normalized = efication(src.upper()) try: class_, properties = get_gram_info(morph, normalized, tech_vocabulary) except NoGrammarFound: return cls(normalized=src) if u'им' != properties.case or u'ед' != properties.number: raise NormalFormNeeded(u'word "%s" not in normal form: %s' % (src, properties)) forms = [] # single for gender in PROPERTIES.GENDERS: for case in PROPERTIES.CASES: forms.append(morph.inflect_ru(normalized, u'%s,%s,ед' % (case, gender), class_).lower() ) #multiple for case in PROPERTIES.CASES: forms.append(morph.inflect_ru(normalized, u'%s,%s' % (case, u'мн'), class_).lower() ) return cls(normalized=src, forms=forms, properties=[])
def create_from_string(morph, string, tech_vocabulary={}): normalized = efication(string.upper()) if ' ' in string: return WORD_CONSTRUCTORS[ WORD_TYPE.NOUN_GROUP].create_from_baseword( morph, string, tech_vocabulary) class_, properties = get_gram_info(morph, normalized, tech_vocabulary) if class_ == u'С': return WORD_CONSTRUCTORS[WORD_TYPE.NOUN].create_from_baseword( morph, string, tech_vocabulary) elif class_ == u'П': return WORD_CONSTRUCTORS[WORD_TYPE.ADJECTIVE].create_from_baseword( morph, string, tech_vocabulary) elif class_ == u'КР_ПРИЛ': return WORD_CONSTRUCTORS[WORD_TYPE.ADJECTIVE].create_from_baseword( morph, string, tech_vocabulary) elif class_ == u'Г': return WORD_CONSTRUCTORS[WORD_TYPE.VERB].create_from_baseword( morph, string, tech_vocabulary) elif class_ == u'ПРИЧАСТИЕ': return WORD_CONSTRUCTORS[ WORD_TYPE.PARTICIPLE].create_from_baseword( morph, string, tech_vocabulary) elif class_ == u'КР_ПРИЧАСТИЕ': return WORD_CONSTRUCTORS[ WORD_TYPE.SHORT_PARTICIPLE].create_from_baseword( morph, string, tech_vocabulary) elif class_ == u'МС': return WORD_CONSTRUCTORS[WORD_TYPE.PRONOUN].create_from_baseword( morph, string, tech_vocabulary) elif class_ == u'МС-П': return WORD_CONSTRUCTORS[WORD_TYPE.ADJECTIVE].create_from_baseword( morph, string, tech_vocabulary) else: raise TextgenException(u'unknown word type: %s of word: %s' % (class_, string))
def create_from_baseword(cls, morph, src, tech_vocabulary={}): normalized = efication(src.upper()) try: class_, properties = get_gram_info(morph, normalized, tech_vocabulary) except NoGrammarFound: return cls(normalized=src) if u'им' != properties.case or (u'ед' != properties.number and properties.gender in (u'мр', u'ср', u'жр')): raise NormalFormNeeded(u'word "%s" not in normal form: %s' % (src, properties)) forms = [] for number in PROPERTIES.NUMBERS: for case in PROPERTIES.CASES: forms.append( morph.inflect_ru(normalized, u'%s,%s' % (case, number), class_).lower()) return cls(normalized=src, forms=forms, properties=[properties.gender])