def create_from_baseword(cls, morph, src, tech_vocabulary={}): ''' one noun MUST be in им there are problems with nouns in multiple number: рога ''' main_noun = None main_properties = None phrase = [] for word in src.split(' '): if word: try: class_, properties = get_gram_info(morph, efication(word.upper()), tech_vocabulary) except NoGrammarFound: return cls(normalized=src) if class_ == u'С': if u'им' == properties.case and (u'ед' == properties.number or properties.gender == u'мн'): main_noun = word main_properties = properties phrase.append((class_, efication(word).upper(), False)) else: phrase.append((class_, efication(word).upper(), True)) else: phrase.append((class_, efication(word).upper(), False)) if not main_noun: # return cls(normalized=src) raise NormalFormNeeded('no main noun found in phrase "%s"' % src) forms = [] for number in PROPERTIES.NUMBERS: additional_properties = [] # if number == u'ед': # additional_properties = [properties.gender] for case in PROPERTIES.CASES: phrase_form = [] for class_, word, constant in phrase: if constant: phrase_form.append(word.lower()) else: phrase_form.append( morph.inflect_ru( word, u','.join([case, number] + additional_properties), class_).lower()) forms.append(' '.join(phrase_form)) return cls(normalized=src, forms=forms, properties=[main_properties.gender])
def create_from_baseword(cls, morph, src, tech_vocabulary={}): ''' one noun MUST be in им there are problems with nouns in multiple number: рога ''' main_noun = None main_properties = None phrase = [] for word in src.split(' '): if word: try: class_, properties = get_gram_info(morph, efication(word.upper()), tech_vocabulary) except NoGrammarFound: return cls(normalized=src) if class_ == u'С': if u'им' == properties.case and (u'ед' == properties.number or properties.gender == u'мн'): main_noun = word main_properties = properties phrase.append((class_, efication(word).upper(), False)) else: phrase.append((class_, efication(word).upper(), True)) else: phrase.append((class_, efication(word).upper(), False)) if not main_noun: # return cls(normalized=src) raise NormalFormNeeded('no main noun found in phrase "%s"' % src) forms = [] for number in PROPERTIES.NUMBERS: additional_properties = [] # if number == u'ед': # additional_properties = [properties.gender] for case in PROPERTIES.CASES: phrase_form = [] for class_, word, constant in phrase: if constant: phrase_form.append(word.lower()) else: phrase_form.append(morph.inflect_ru(word, u','.join([case, number]+additional_properties), class_ ).lower()) forms.append( ' '.join(phrase_form)) return cls(normalized=src, forms=forms, properties=[main_properties.gender])
def create_from_baseword(cls, morph, src, tech_vocabulary={}): normalized = efication(src.upper()) try: class_, properties = get_gram_info(morph, normalized, tech_vocabulary) except NoGrammarFound: return cls(normalized=src) if u'прш' != properties.time or u'ед' != properties.number or u'мр' != properties.gender: raise NormalFormNeeded(u'word "%s" not in normal form: %s' % (src, properties)) forms = [] for time in (u'прш', u'нст'): for gender in PROPERTIES.GENDERS: forms.append( morph.inflect_ru(normalized, u'%s,%s,ед' % (time, gender), class_).lower()) forms.append( morph.inflect_ru(normalized, u'%s,мн' % (time, ), class_).lower()) return cls(normalized=src, forms=forms, properties=[])
def _preprocess_externals(self, dictionary, externals): processed_externals = {} for external_id, external in externals.items(): additional_args = () if isinstance(external, tuple): normalized, additional_args = external additional_args = additional_args.split(u',') if isinstance( additional_args, basestring) else additional_args else: normalized = external if isinstance(normalized, numbers.Number): word = Numeral(normalized) arguments = Args() elif isinstance(normalized, WordBase): word = normalized arguments = Args(*word.properties) else: word = dictionary.get_word(efication(normalized)) arguments = Args(*word.properties) arguments.update(*additional_args) processed_externals[external_id] = (word, arguments) return processed_externals
def create_from_baseword(cls, morph, src, tech_vocabulary={}): normalized = efication(src.upper()) try: class_, properties = get_gram_info(morph, normalized, tech_vocabulary) except NoGrammarFound: return cls(normalized=src) if u'прш' != properties.time or u'ед' != properties.number or u'мр' != properties.gender: raise NormalFormNeeded(u'word "%s" not in normal form: %s' % (src, properties)) base = morph.inflect_ru(normalized, u'ед,мр', u'Г') forms = [ morph.inflect_ru(base, u'прш,мр,ед').lower(), morph.inflect_ru(base, u'прш,жр,ед').lower(), morph.inflect_ru(base, u'прш,ср,ед').lower(), morph.inflect_ru(base, u'прш,мн').lower(), morph.inflect_ru(base, u'нст,1л,ед').lower(), morph.inflect_ru(base, u'нст,1л,мн').lower(), morph.inflect_ru(base, u'нст,2л,ед').lower(), morph.inflect_ru(base, u'нст,2л,мн').lower(), morph.inflect_ru(base, u'нст,3л,ед').lower(), morph.inflect_ru(base, u'нст,3л,мн').lower(), morph.inflect_ru(base, u'буд,1л,ед').lower(), morph.inflect_ru(base, u'буд,1л,мн').lower(), morph.inflect_ru(base, u'буд,2л,ед').lower(), morph.inflect_ru(base, u'буд,2л,мн').lower(), morph.inflect_ru(base, u'буд,3л,ед').lower(), morph.inflect_ru(base, u'буд,3л,мн').lower() ] return cls(normalized=src, forms=forms, properties=[])
def create_from_baseword(cls, morph, src, tech_vocabulary={}): normalized = efication(src.upper()) try: class_, properties = get_gram_info(morph, normalized, tech_vocabulary) except NoGrammarFound: return cls(normalized=src) if u'им' != properties.case or u'ед' != properties.number: raise NormalFormNeeded(u'word "%s" not in normal form: %s' % (src, properties)) forms = [] # single for gender in PROPERTIES.GENDERS: for case in PROPERTIES.CASES: forms.append( morph.inflect_ru(normalized, u'%s,%s,ед' % (case, gender), class_).lower()) #multiple for case in PROPERTIES.CASES: forms.append( morph.inflect_ru(normalized, u'%s,%s' % (case, u'мн'), class_).lower()) return cls(normalized=src, forms=forms, properties=[])
def create_from_baseword(cls, morph, src, tech_vocabulary={}): normalized = efication(src.upper()) try: class_, properties = get_gram_info(morph, normalized, tech_vocabulary) except NoGrammarFound: return cls(normalized=src) if u'прш' != properties.time or u'ед' != properties.number or u'мр' != properties.gender: raise NormalFormNeeded(u'word "%s" not in normal form: %s' % (src, properties)) base = morph.inflect_ru(normalized, u'ед,мр', u'Г') forms = [morph.inflect_ru(base, u'прш,мр,ед').lower(), morph.inflect_ru(base, u'прш,жр,ед').lower(), morph.inflect_ru(base, u'прш,ср,ед').lower(), morph.inflect_ru(base, u'прш,мн').lower(), morph.inflect_ru(base, u'нст,1л,ед').lower(), morph.inflect_ru(base, u'нст,1л,мн').lower(), morph.inflect_ru(base, u'нст,2л,ед').lower(), morph.inflect_ru(base, u'нст,2л,мн').lower(), morph.inflect_ru(base, u'нст,3л,ед').lower(), morph.inflect_ru(base, u'нст,3л,мн').lower(), morph.inflect_ru(base, u'буд,1л,ед').lower(), morph.inflect_ru(base, u'буд,1л,мн').lower(), morph.inflect_ru(base, u'буд,2л,ед').lower(), morph.inflect_ru(base, u'буд,2л,мн').lower(), morph.inflect_ru(base, u'буд,3л,ед').lower(), morph.inflect_ru(base, u'буд,3л,мн').lower()] return cls(normalized=src, forms=forms, properties=[])
def _preprocess_externals(self, dictionary, externals): processed_externals = {} for external_id, external in externals.items(): additional_args = () if isinstance(external, tuple): normalized, additional_args = external additional_args = additional_args.split(u',') if isinstance(additional_args, basestring) else additional_args else: normalized = external if isinstance(normalized, numbers.Number): word = Numeral(normalized) arguments = Args() elif isinstance(normalized, WordBase): word = normalized arguments = Args(*word.properties) else: word = dictionary.get_word(efication(normalized)) arguments = Args(*word.properties) arguments.update(*additional_args) processed_externals[external_id] = (word, arguments) return processed_externals
def create_from_string(morph, string, tech_vocabulary={}): normalized = efication(string.upper()) if ' ' in string: return WORD_CONSTRUCTORS[WORD_TYPE.NOUN_GROUP].create_from_baseword(morph, string, tech_vocabulary) class_, properties = get_gram_info(morph, normalized, tech_vocabulary) if class_ == u'С': return WORD_CONSTRUCTORS[WORD_TYPE.NOUN].create_from_baseword(morph, string, tech_vocabulary) elif class_ == u'П': return WORD_CONSTRUCTORS[WORD_TYPE.ADJECTIVE].create_from_baseword(morph, string, tech_vocabulary) elif class_ == u'КР_ПРИЛ': return WORD_CONSTRUCTORS[WORD_TYPE.ADJECTIVE].create_from_baseword(morph, string, tech_vocabulary) elif class_ == u'Г': return WORD_CONSTRUCTORS[WORD_TYPE.VERB].create_from_baseword(morph, string, tech_vocabulary) elif class_ == u'ПРИЧАСТИЕ': return WORD_CONSTRUCTORS[WORD_TYPE.PARTICIPLE].create_from_baseword(morph, string, tech_vocabulary) elif class_ == u'КР_ПРИЧАСТИЕ': return WORD_CONSTRUCTORS[WORD_TYPE.SHORT_PARTICIPLE].create_from_baseword(morph, string, tech_vocabulary) elif class_ == u'МС': return WORD_CONSTRUCTORS[WORD_TYPE.PRONOUN].create_from_baseword(morph, string, tech_vocabulary) elif class_ == u'МС-П': return WORD_CONSTRUCTORS[WORD_TYPE.ADJECTIVE].create_from_baseword(morph, string, tech_vocabulary) else: raise TextgenException(u'unknown word type: %s of word: %s' % (class_, string) )
def create_from_baseword(cls, morph, src, tech_vocabulary={}): normalized = efication(src.upper()) try: class_, properties = get_gram_info(morph, normalized, tech_vocabulary) except NoGrammarFound: return cls(normalized=src) if u'им' != properties.case or (u'ед' != properties.number and properties.gender in (u'мр', u'ср', u'жр')): raise NormalFormNeeded(u'word "%s" not in normal form: %s' % (src, properties)) forms = [] for number in PROPERTIES.NUMBERS: for case in PROPERTIES.CASES: forms.append(morph.inflect_ru(normalized, u'%s,%s' % (case, number), class_ ).lower() ) return cls(normalized=src, forms=forms, properties=[properties.gender])
def create_from_baseword(cls, morph, src, tech_vocabulary={}): normalized = efication(src.upper()) try: class_, properties = get_gram_info(morph, normalized, tech_vocabulary) except NoGrammarFound: return cls(normalized=src) if u'прш' != properties.time or u'ед' != properties.number or u'мр' != properties.gender: raise NormalFormNeeded(u'word "%s" not in normal form: %s' % (src, properties)) forms = [] for time in (u'прш', u'нст'): for gender in PROPERTIES.GENDERS: forms.append(morph.inflect_ru(normalized, u'%s,%s,ед' % (time, gender), class_).lower() ) forms.append(morph.inflect_ru(normalized, u'%s,мн' % (time, ), class_).lower() ) return cls(normalized=src, forms=forms, properties=[])
def create_from_baseword(cls, morph, src, tech_vocabulary={}): normalized = efication(src.upper()) # pymorphy do not change gender of PRONOUNS, and we always need some words, so we hardcode them if normalized == u'ОН': return cls(normalized=src, forms=(u'он', u'его', u'ему', u'его', u'им', u'нем', u'она', u'ее', u'ей', u'ее', u'ей', u'ней', u'оно', u'его', u'ему', u'его', u'им', u'нём', u'они', u'их', u'им', u'их', u'ими', u'них'), properties=[]) if normalized == u'Я': return cls(normalized=src, forms=(u'я', u'меня', u'мне', u'меня', u'мной', u'мне', u'я', u'меня', u'мне', u'меня', u'мной', u'мне', u'я', u'меня', u'мне', u'меня', u'мной', u'мне', u'я', u'меня', u'мне', u'меня', u'мной', u'мне'), properties=[]) return Adjective.create_from_baseword(morph, src, tech_vocabulary)
def create_from_string(morph, string, tech_vocabulary={}): normalized = efication(string.upper()) if ' ' in string: return WORD_CONSTRUCTORS[ WORD_TYPE.NOUN_GROUP].create_from_baseword( morph, string, tech_vocabulary) class_, properties = get_gram_info(morph, normalized, tech_vocabulary) if class_ == u'С': return WORD_CONSTRUCTORS[WORD_TYPE.NOUN].create_from_baseword( morph, string, tech_vocabulary) elif class_ == u'П': return WORD_CONSTRUCTORS[WORD_TYPE.ADJECTIVE].create_from_baseword( morph, string, tech_vocabulary) elif class_ == u'КР_ПРИЛ': return WORD_CONSTRUCTORS[WORD_TYPE.ADJECTIVE].create_from_baseword( morph, string, tech_vocabulary) elif class_ == u'Г': return WORD_CONSTRUCTORS[WORD_TYPE.VERB].create_from_baseword( morph, string, tech_vocabulary) elif class_ == u'ПРИЧАСТИЕ': return WORD_CONSTRUCTORS[ WORD_TYPE.PARTICIPLE].create_from_baseword( morph, string, tech_vocabulary) elif class_ == u'КР_ПРИЧАСТИЕ': return WORD_CONSTRUCTORS[ WORD_TYPE.SHORT_PARTICIPLE].create_from_baseword( morph, string, tech_vocabulary) elif class_ == u'МС': return WORD_CONSTRUCTORS[WORD_TYPE.PRONOUN].create_from_baseword( morph, string, tech_vocabulary) elif class_ == u'МС-П': return WORD_CONSTRUCTORS[WORD_TYPE.ADJECTIVE].create_from_baseword( morph, string, tech_vocabulary) else: raise TextgenException(u'unknown word type: %s of word: %s' % (class_, string))
def create_from_baseword(cls, morph, src, tech_vocabulary={}): normalized = efication(src.upper()) try: class_, properties = get_gram_info(morph, normalized, tech_vocabulary) except NoGrammarFound: return cls(normalized=src) if u'им' != properties.case or u'ед' != properties.number: raise NormalFormNeeded(u'word "%s" not in normal form: %s' % (src, properties)) forms = [] # single for gender in PROPERTIES.GENDERS: for case in PROPERTIES.CASES: forms.append(morph.inflect_ru(normalized, u'%s,%s,ед' % (case, gender), class_).lower() ) #multiple for case in PROPERTIES.CASES: forms.append(morph.inflect_ru(normalized, u'%s,%s' % (case, u'мн'), class_).lower() ) return cls(normalized=src, forms=forms, properties=[])
def create_from_baseword(cls, morph, src, tech_vocabulary={}): normalized = efication(src.upper()) try: class_, properties = get_gram_info(morph, normalized, tech_vocabulary) except NoGrammarFound: return cls(normalized=src) if u'им' != properties.case or (u'ед' != properties.number and properties.gender in (u'мр', u'ср', u'жр')): raise NormalFormNeeded(u'word "%s" not in normal form: %s' % (src, properties)) forms = [] for number in PROPERTIES.NUMBERS: for case in PROPERTIES.CASES: forms.append( morph.inflect_ru(normalized, u'%s,%s' % (case, number), class_).lower()) return cls(normalized=src, forms=forms, properties=[properties.gender])
def get_word(self, normalized): normalized = efication(normalized) if normalized in self.data: return self.data[normalized] return Fake(u'<word not found: %s>' % normalized)
def add_word(self, word, overwrite=False): if not overwrite and efication(word.normalized) in self.data: # TODO: add test return self.data[efication(word.normalized)] = word