Python MorphCollection Examples

Programming Language: Python

Namespace/Package Name: pullenti.ner.MorphCollection

Class/Type: MorphCollection

Examples at hotexamples.com: 20

Python MorphCollection - 20 examples found. These are the top rated real world Python examples of pullenti.ner.MorphCollection.MorphCollection extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

MorphCollection(16)

_deserialize(2)

_serialize(2)

addItem(2)

add_item(2)

getIndexerItem(1)

get_indexer_item(1)

removeItems(1)

remove_items(1)

remove_items_list_cla(1)

Example #1

Show file

 def morph(self) -> 'MorphCollection':
     """ Морфологическая информация
     
     """
     if (self.__m_morph is None):
         self.__m_morph = MorphCollection()
     return self.__m_morph

Example #2

Show file

File: Token.py Project: MihaJjDa/APCLtask

 def _serialize(self, stream: io.IOBase) -> None:
     from pullenti.ner.core.internal.SerializerHelper import SerializerHelper
     SerializerHelper.serializeInt(stream, self.begin_char)
     SerializerHelper.serializeInt(stream, self.end_char)
     SerializerHelper.serializeInt(stream, self.__m_attrs)
     SerializerHelper.serializeInt(stream, self.chars.value)
     if (self.__m_morph is None):
         self.__m_morph = MorphCollection()
     self.__m_morph._serialize(stream)

Example #3

Show file

 def __init__(self,
              source: 'MorphToken',
              kit_: 'AnalysisKit',
              bchar: int = -1,
              echar: int = -1) -> None:
     super().__init__(kit_, (bchar if bchar >= 0 else
                             (0 if source is None else source.begin_char)),
                      (echar if echar >= 0 else
                       (0 if source is None else source.end_char)))
     self.term = None
     self.lemma = None
     self.term0 = None
     self.invariant_prefix_length_of_morph_vars = 0
     self.max_length_of_morph_vars = 0
     if (source is None):
         return
     self.chars = source.char_info
     self.term = source.term
     self.lemma = (Utils.ifNotNull(source.get_lemma(), self.term))
     self.max_length_of_morph_vars = (len(self.term))
     self.morph = MorphCollection()
     if (source.word_forms is not None):
         for wf in source.word_forms:
             self.morph.add_item(wf)
             if (wf.normal_case is not None and
                 (self.max_length_of_morph_vars < len(wf.normal_case))):
                 self.max_length_of_morph_vars = (len(wf.normal_case))
             if (wf.normal_full is not None and
                 (self.max_length_of_morph_vars < len(wf.normal_full))):
                 self.max_length_of_morph_vars = (len(wf.normal_full))
     i = 0
     while i < len(self.term):
         ch = self.term[i]
         j = 0
         j = 0
         while j < self.morph.items_count:
             wf = Utils.asObjectOrNull(self.morph.get_indexer_item(j),
                                       MorphWordForm)
             if (wf.normal_case is not None):
                 if (i >= len(wf.normal_case)):
                     break
                 if (wf.normal_case[i] != ch):
                     break
             if (wf.normal_full is not None):
                 if (i >= len(wf.normal_full)):
                     break
                 if (wf.normal_full[i] != ch):
                     break
             j += 1
         if (j < self.morph.items_count):
             break
         self.invariant_prefix_length_of_morph_vars = ((i + 1))
         i += 1
     if (self.morph.language.is_undefined
             and not source.language.is_undefined):
         self.morph.language = source.language

Example #4

Show file

File: Token.py Project: MihaJjDa/APCLtask

 def _deserialize(self, stream: io.IOBase, kit_: 'AnalysisKit',
                  vers: int) -> None:
     from pullenti.ner.core.internal.SerializerHelper import SerializerHelper
     self.kit = kit_
     self.begin_char = SerializerHelper.deserializeInt(stream)
     self.end_char = SerializerHelper.deserializeInt(stream)
     self.__m_attrs = (SerializerHelper.deserializeInt(stream))
     self.chars = CharsInfo._new2656(
         SerializerHelper.deserializeInt(stream))
     self.__m_morph = MorphCollection()
     self.__m_morph._deserialize(stream)

Example #5

Show file

 def __tryParseRu(t: 'Token') -> 'VerbPhraseToken':
     res = None
     t0 = t
     not0_ = None
     has_verb = False
     first_pass2814 = True
     while True:
         if first_pass2814: first_pass2814 = False
         else: t = t.next0_
         if (not (t is not None)): break
         if (not ((isinstance(t, TextToken)))):
             break
         tt = Utils.asObjectOrNull(t, TextToken)
         if (tt.term == "НЕ"):
             not0_ = t
             continue
         ty = 0
         mc = tt.getMorphClassInDictionary()
         if (tt.term == "НЕТ"):
             ty = 1
         elif (mc.is_adverb):
             ty = 2
         elif (tt.is_pure_verb or tt.is_verb_be):
             ty = 1
         elif (mc.is_verb):
             if (mc.is_preposition or mc.is_misc):
                 pass
             elif (mc.is_noun):
                 if (tt.term == "СТАЛИ"):
                     ty = 1
                 elif (not tt.chars.is_all_lower
                       and not MiscHelper.canBeStartOfSentence(tt)):
                     ty = 1
             elif (mc.is_proper):
                 if (tt.chars.is_all_lower):
                     ty = 1
             else:
                 ty = 1
         if (ty == 0):
             break
         if (res is None):
             res = VerbPhraseToken(t0, t)
         res.end_token = t
         it = VerbPhraseItemToken._new638(t, t, MorphCollection(t.morph))
         if (not0_ is not None):
             it.begin_token = not0_
             it.not0_ = True
             not0_ = (None)
         it.is_adverb = ty == 2
         it.normal = t.getNormalCaseText(
             (MorphClass.ADVERB if ty == 2 else MorphClass.VERB), False,
             MorphGender.UNDEFINED, False)
         res.items.append(it)
         if (not has_verb and ty == 1):
             res.morph = it.morph
             has_verb = True
     if (not has_verb):
         return None
     return res

Example #6

Show file

File: ReferentToken.py Project: MihaJjDa/APCLtask

 def __init__(self, entity : 'Referent', begin : 'Token', end : 'Token', kit_ : 'AnalysisKit'=None) -> None:
     super().__init__(begin, end, kit_)
     self.referent = None;
     self.data = None;
     self.misc_attrs = 0
     self.referent = entity
     if (self.morph is None): 
         self.morph = MorphCollection()

Example #7

Show file

File: _NounPraseHelperInt.py Project: pullenti/PullentiPython

 def __try_parse_ru(first: 'Token',
                    typ: 'NounPhraseParseAttr',
                    max_char_pos: int,
                    def_noun: 'NounPhraseItem' = None) -> 'NounPhraseToken':
     if (first is None):
         return None
     items = None
     adverbs = None
     prep = None
     kak = False
     t0 = first
     if ((((typ) & (NounPhraseParseAttr.PARSEPREPOSITION))) !=
         (NounPhraseParseAttr.NO) and t0.is_value("КАК", None)):
         t0 = t0.next0_
         prep = PrepositionHelper.try_parse(t0)
         if (prep is not None):
             t0 = prep.end_token.next0_
         kak = True
     internal_noun_prase = None
     conj_before = False
     t = t0
     first_pass3041 = True
     while True:
         if first_pass3041: first_pass3041 = False
         else: t = t.next0_
         if (not (t is not None)): break
         if (max_char_pos > 0 and t.begin_char > max_char_pos):
             break
         if ((t.morph.class0_.is_conjunction
              and not t.morph.class0_.is_adjective
              and not t.morph.class0_.is_pronoun)
                 and not t.morph.class0_.is_noun):
             if (conj_before):
                 break
             if ((((typ) & (NounPhraseParseAttr.CANNOTHASCOMMAAND))) !=
                 (NounPhraseParseAttr.NO)):
                 break
             if (items is not None and ((t.is_and or t.is_or))):
                 conj_before = True
                 if ((t.next0_ is not None and t.next0_.is_char_of("\\/")
                      and t.next0_.next0_ is not None)
                         and t.next0_.next0_.is_or):
                     t = t.next0_.next0_
                 if (((t.next0_ is not None and t.next0_.is_char('(')
                       and t.next0_.next0_ is not None)
                      and t.next0_.next0_.is_or
                      and t.next0_.next0_.next0_ is not None)
                         and t.next0_.next0_.next0_.is_char(')')):
                     t = t.next0_.next0_.next0_
                 continue
             break
         elif (t.is_comma):
             if (conj_before or items is None):
                 break
             if ((((typ) & (NounPhraseParseAttr.CANNOTHASCOMMAAND))) !=
                 (NounPhraseParseAttr.NO)):
                 break
             mc = t.previous.get_morph_class_in_dictionary()
             if (mc.is_proper_surname or mc.is_proper_secname):
                 break
             conj_before = True
             if (kak and t.next0_ is not None
                     and t.next0_.is_value("ТАК", None)):
                 t = t.next0_
                 if (t.next0_ is not None and t.next0_.is_and):
                     t = t.next0_
                 pr = PrepositionHelper.try_parse(t.next0_)
                 if (pr is not None):
                     t = pr.end_token
             if (items[len(items) - 1].can_be_noun
                     and items[len(items) -
                               1].end_token.morph.class0_.is_pronoun):
                 break
             continue
         elif (t.is_char('(')):
             if (items is None):
                 return None
             brr = BracketHelper.try_parse(t, BracketParseAttr.NO, 100)
             if (brr is None):
                 break
             if (brr.length_char > 100):
                 break
             t = brr.end_token
             continue
         if (isinstance(t, ReferentToken)):
             if ((((typ) & (NounPhraseParseAttr.REFERENTCANBENOUN))) == (
                     NounPhraseParseAttr.NO)):
                 break
         elif (t.chars.is_latin_letter):
             break
         it = NounPhraseItem.try_parse(t, items, typ)
         if (it is None or ((not it.can_be_adj and not it.can_be_noun))):
             if (((it is not None and items is not None
                   and t.chars.is_capital_upper) and
                  (t.whitespaces_before_count < 3) and t.length_char > 3)
                     and not t.get_morph_class_in_dictionary().is_noun and
                     not t.get_morph_class_in_dictionary().is_adjective):
                 it.can_be_noun = True
                 items.append(it)
                 break
             if ((((typ) & (NounPhraseParseAttr.PARSEADVERBS))) !=
                 (NounPhraseParseAttr.NO) and (isinstance(t, TextToken))
                     and t.morph.class0_.is_adverb):
                 if (adverbs is None):
                     adverbs = list()
                 adverbs.append(Utils.asObjectOrNull(t, TextToken))
                 continue
             break
         it.conj_before = conj_before
         conj_before = False
         if (not it.can_be_adj and not it.can_be_noun):
             break
         if (t.is_newline_before and t != first):
             if ((((typ) & (NounPhraseParseAttr.MULTILINES))) !=
                 (NounPhraseParseAttr.NO)):
                 pass
             elif (items is not None
                   and t.chars != items[len(items) - 1].chars):
                 if (t.chars.is_all_lower
                         and items[len(items) - 1].chars.is_capital_upper):
                     pass
                 else:
                     break
         if (items is None):
             items = list()
         else:
             it0 = items[len(items) - 1]
             if (it0.can_be_noun and it0.is_personal_pronoun):
                 if (it.is_pronoun):
                     break
                 if ((it0.begin_token.previous is not None
                      and it0.begin_token.previous.
                      get_morph_class_in_dictionary().is_verb
                      and not it0.begin_token.previous.
                      get_morph_class_in_dictionary().is_adjective)
                         and not it0.begin_token.previous.
                         get_morph_class_in_dictionary().is_preposition):
                     if (t.morph.case_.is_nominative
                             or t.morph.case_.is_accusative):
                         pass
                     else:
                         break
                 if (it.can_be_noun and it.is_verb):
                     if (it0.previous is None):
                         pass
                     elif ((isinstance(it0.previous, TextToken))
                           and not it0.previous.chars.is_letter):
                         pass
                     else:
                         break
         items.append(it)
         t = it.end_token
         if (t.is_newline_after and not t.chars.is_all_lower):
             mc = t.get_morph_class_in_dictionary()
             if (mc.is_proper_surname):
                 break
             if (t.morph.class0_.is_proper_surname and mc.is_undefined):
                 break
     if (items is None):
         return None
     tt1 = None
     if (len(items) == 1 and items[0].can_be_adj):
         and0_ = False
         tt1 = items[0].end_token.next0_
         first_pass3042 = True
         while True:
             if first_pass3042: first_pass3042 = False
             else: tt1 = tt1.next0_
             if (not (tt1 is not None)): break
             if (tt1.is_and or tt1.is_or):
                 and0_ = True
                 break
             if (tt1.is_comma or tt1.is_value("НО", None)
                     or tt1.is_value("ТАК", None)):
                 continue
             break
         if (and0_):
             if (items[0].can_be_noun and items[0].is_personal_pronoun):
                 and0_ = False
         if (and0_):
             tt2 = tt1.next0_
             if (tt2 is not None and tt2.morph.class0_.is_preposition):
                 tt2 = tt2.next0_
             npt1 = _NounPraseHelperInt.__try_parse_ru(
                 tt2, typ, max_char_pos, None)
             if (npt1 is not None and len(npt1.adjectives) > 0):
                 ok1 = False
                 for av in items[0].adj_morph:
                     for v in npt1.noun.noun_morph:
                         if (v.check_accord(av, False, False)):
                             items[0].morph.add_item(av)
                             ok1 = True
                 if (ok1):
                     npt1.begin_token = items[0].begin_token
                     npt1.end_token = tt1.previous
                     npt1.adjectives.clear()
                     npt1.adjectives.append(items[0])
                     return npt1
     if (def_noun is not None):
         items.append(def_noun)
     last1 = items[len(items) - 1]
     check = True
     for it in items:
         if (not it.can_be_adj):
             check = False
             break
         elif (it.can_be_noun and it.is_personal_pronoun):
             check = False
             break
     tt1 = last1.end_token.next0_
     if ((tt1 is not None and check and
          ((tt1.morph.class0_.is_preposition
            or tt1.morph.case_.is_instrumental)))
             and (tt1.whitespaces_before_count < 2)):
         inp = NounPhraseHelper.try_parse(
             tt1,
             Utils.valToEnum((typ) | (NounPhraseParseAttr.PARSEPREPOSITION),
                             NounPhraseParseAttr), max_char_pos, None)
         if (inp is not None):
             tt1 = inp.end_token.next0_
             npt1 = _NounPraseHelperInt.__try_parse_ru(
                 tt1, typ, max_char_pos, None)
             if (npt1 is not None):
                 ok = True
                 ii = 0
                 first_pass3043 = True
                 while True:
                     if first_pass3043: first_pass3043 = False
                     else: ii += 1
                     if (not (ii < len(items))): break
                     it = items[ii]
                     if (NounPhraseItem.try_accord_adj_and_noun(
                             it,
                             Utils.asObjectOrNull(npt1.noun,
                                                  NounPhraseItem))):
                         continue
                     if (ii > 0):
                         inp2 = NounPhraseHelper.try_parse(
                             it.begin_token, typ, max_char_pos, None)
                         if (inp2 is not None
                                 and inp2.end_token == inp.end_token):
                             del items[ii:ii + len(items) - ii]
                             inp = inp2
                             break
                     ok = False
                     break
                 if (ok):
                     if (npt1.morph.case_.is_genitive
                             and not inp.morph.case_.is_instrumental):
                         ok = False
                 if (ok):
                     i = 0
                     while i < len(items):
                         npt1.adjectives.insert(i, items[i])
                         i += 1
                     npt1.internal_noun = inp
                     mmm = MorphCollection(npt1.morph)
                     for it in items:
                         mmm.remove_items(it.adj_morph[0], False)
                     if (mmm.gender != MorphGender.UNDEFINED
                             or mmm.number != MorphNumber.UNDEFINED
                             or not mmm.case_.is_undefined):
                         npt1.morph = mmm
                     if (adverbs is not None):
                         if (npt1.adverbs is None):
                             npt1.adverbs = adverbs
                         else:
                             npt1.adverbs[0:0] = adverbs
                     npt1.begin_token = first
                     return npt1
             if (tt1 is not None and tt1.morph.class0_.is_noun
                     and not tt1.morph.case_.is_genitive):
                 it = NounPhraseItem.try_parse(tt1, items, typ)
                 if (it is not None and it.can_be_noun):
                     internal_noun_prase = inp
                     inp.begin_token = items[0].end_token.next0_
                     items.append(it)
     i = 0
     first_pass3044 = True
     while True:
         if first_pass3044: first_pass3044 = False
         else: i += 1
         if (not (i < len(items))): break
         if (items[i].can_be_adj
                 and items[i].begin_token.morph.class0_.is_verb):
             it = items[i].begin_token
             if (not it.get_morph_class_in_dictionary().is_verb):
                 continue
             if (it.is_value("УПОЛНОМОЧЕННЫЙ", None)):
                 continue
             if ((((typ) & (NounPhraseParseAttr.PARSEVERBS))) == (
                     NounPhraseParseAttr.NO)):
                 continue
             inp = _NounPraseHelperInt.__try_parse_ru(
                 items[i].end_token.next0_, NounPhraseParseAttr.NO,
                 max_char_pos, None)
             if (inp is None):
                 continue
             if (inp.anafor is not None and i == (len(items) - 1)
                     and NounPhraseItem.try_accord_adj_and_noun(
                         items[i],
                         Utils.asObjectOrNull(inp.noun, NounPhraseItem))):
                 inp.begin_token = first
                 ii = 0
                 while ii < len(items):
                     inp.adjectives.insert(ii, items[ii])
                     ii += 1
                 return inp
             if (inp.end_token.whitespaces_after_count > 3):
                 continue
             npt1 = _NounPraseHelperInt.__try_parse_ru(
                 inp.end_token.next0_, NounPhraseParseAttr.NO, max_char_pos,
                 None)
             if (npt1 is None):
                 continue
             ok = True
             j = 0
             while j <= i:
                 if (not NounPhraseItem.try_accord_adj_and_noun(
                         items[j],
                         Utils.asObjectOrNull(npt1.noun, NounPhraseItem))):
                     ok = False
                     break
                 j += 1
             if (not ok):
                 continue
             verb = VerbPhraseHelper.try_parse(it, True, False, False)
             if (verb is None):
                 continue
             vlinks = SemanticHelper.try_create_links(verb, inp, None)
             nlinks = SemanticHelper.try_create_links(inp, npt1, None)
             if (len(vlinks) == 0 and len(nlinks) > 0):
                 continue
             j = 0
             while j <= i:
                 npt1.adjectives.insert(j, items[j])
                 j += 1
             items[i].end_token = inp.end_token
             mmm = MorphCollection(npt1.morph)
             bil = list()
             j = 0
             while j <= i:
                 bil.clear()
                 for m in items[j].adj_morph:
                     bil.append(m)
                 mmm.remove_items_list_cla(bil, None)
                 j += 1
             if (mmm.gender != MorphGender.UNDEFINED
                     or mmm.number != MorphNumber.UNDEFINED
                     or not mmm.case_.is_undefined):
                 npt1.morph = mmm
             if (adverbs is not None):
                 if (npt1.adverbs is None):
                     npt1.adverbs = adverbs
                 else:
                     npt1.adverbs[0:0] = adverbs
             npt1.begin_token = first
             return npt1
     ok2 = False
     if ((len(items) == 1 and
          (((typ) & (NounPhraseParseAttr.ADJECTIVECANBELAST))) !=
          (NounPhraseParseAttr.NO) and
          (items[0].whitespaces_after_count < 3))
             and not items[0].is_adverb):
         if (not items[0].can_be_adj):
             ok2 = True
         elif (items[0].is_personal_pronoun and items[0].can_be_noun):
             ok2 = True
     if (ok2):
         it = NounPhraseItem.try_parse(items[0].end_token.next0_, None, typ)
         if (it is not None and it.can_be_adj
                 and it.begin_token.chars.is_all_lower):
             ok2 = True
             if (it.is_adverb or it.is_verb):
                 ok2 = False
             if (it.is_pronoun and items[0].is_pronoun):
                 ok2 = False
                 if (it.can_be_adj_for_personal_pronoun
                         and items[0].is_personal_pronoun):
                     ok2 = True
             if (ok2 and NounPhraseItem.try_accord_adj_and_noun(
                     it, items[0])):
                 npt1 = _NounPraseHelperInt.__try_parse_ru(
                     it.begin_token, typ, max_char_pos, None)
                 if (npt1 is not None and ((npt1.end_char > it.end_char
                                            or len(npt1.adjectives) > 0))):
                     pass
                 else:
                     items.insert(0, it)
     noun = None
     adj_after = None
     for i in range(len(items) - 1, -1, -1):
         if (items[i].can_be_noun):
             if (items[i].conj_before):
                 continue
             if (i > 0 and not items[i - 1].can_be_adj):
                 continue
             if (i > 0 and items[i - 1].can_be_noun):
                 if (items[i - 1].is_doubt_adjective):
                     continue
                 if (items[i - 1].is_pronoun and items[i].is_pronoun):
                     if (items[i].is_pronoun and
                             items[i - 1].can_be_adj_for_personal_pronoun):
                         pass
                     else:
                         continue
             noun = items[i]
             del items[i:i + len(items) - i]
             if (adj_after is not None):
                 items.append(adj_after)
             elif (len(items) > 0 and items[0].can_be_noun
                   and not items[0].can_be_adj):
                 noun = items[0]
                 items.clear()
             break
     if (noun is None):
         return None
     res = NounPhraseToken._new466(first, noun.end_token, prep)
     if (adverbs is not None):
         for a in adverbs:
             if (a.begin_char < noun.begin_char):
                 if (len(items) == 0 and prep is None):
                     return None
                 if (res.adverbs is None):
                     res.adverbs = list()
                 res.adverbs.append(a)
     res.noun = (noun)
     res.multi_nouns = noun.multi_nouns
     if (kak):
         res.multi_nouns = True
     res.internal_noun = internal_noun_prase
     for v in noun.noun_morph:
         noun.morph.add_item(v)
     res.morph = noun.morph
     if (res.morph.case_.is_nominative and first.previous is not None
             and first.previous.morph.class0_.is_preposition):
         res.morph.case_ = (res.morph.case_) ^ MorphCase.NOMINATIVE
     if ((((typ) &
           (NounPhraseParseAttr.PARSEPRONOUNS))) == (NounPhraseParseAttr.NO)
             and ((res.morph.class0_.is_pronoun
                   or res.morph.class0_.is_personal_pronoun))):
         return None
     stat = None
     if (len(items) > 1):
         stat = dict()
     need_update_morph = False
     if (len(items) > 0):
         ok_list = list()
         is_num_not = False
         for vv in noun.noun_morph:
             i = 0
             v = vv
             i = 0
             while i < len(items):
                 ok = False
                 for av in items[i].adj_morph:
                     if (v.check_accord(av, False, False)):
                         ok = True
                         if (not ((av.case_) & v.case_).is_undefined
                                 and av.case_ != v.case_):
                             v.case_ = av.case_ = (av.case_) & v.case_
                         break
                 if (not ok):
                     if (items[i].can_be_numeric_adj
                             and items[i].try_accord_var(v, False)):
                         ok = True
                         v1 = NounPhraseItemTextVar()
                         v1.copy_from_item(v)
                         v1.number = MorphNumber.PLURAL
                         is_num_not = True
                         v1.case_ = MorphCase()
                         for a in items[i].adj_morph:
                             v1.case_ = (v1.case_) | a.case_
                         v = v1
                     else:
                         break
                 i += 1
             if (i >= len(items)):
                 ok_list.append(v)
         if (len(ok_list) > 0 and
             (((len(ok_list) < res.morph.items_count) or is_num_not))):
             res.morph = MorphCollection()
             for v in ok_list:
                 res.morph.add_item(v)
             if (not is_num_not):
                 noun.morph = res.morph
     i = 0
     first_pass3045 = True
     while True:
         if first_pass3045: first_pass3045 = False
         else: i += 1
         if (not (i < len(items))): break
         for av in items[i].adj_morph:
             for v in noun.noun_morph:
                 if (v.check_accord(av, False, False)):
                     if (not ((av.case_) & v.case_).is_undefined
                             and av.case_ != v.case_):
                         v.case_ = av.case_ = (av.case_) & v.case_
                         need_update_morph = True
                     items[i].morph.add_item(av)
                     if (stat is not None and av.normal_value is not None
                             and len(av.normal_value) > 1):
                         last = av.normal_value[len(av.normal_value) - 1]
                         if (not last in stat):
                             stat[last] = 1
                         else:
                             stat[last] += 1
         if (items[i].is_pronoun or items[i].is_personal_pronoun):
             res.anafor = items[i].begin_token
             if ((((typ) & (NounPhraseParseAttr.PARSEPRONOUNS))) == (
                     NounPhraseParseAttr.NO)):
                 continue
         tt = Utils.asObjectOrNull(items[i].begin_token, TextToken)
         if (tt is not None and not tt.term.startswith("ВЫСШ")):
             err = False
             for wf in tt.morph.items:
                 if (wf.class0_.is_adjective):
                     if (wf.contains_attr("прев.", None)):
                         if ((((typ) &
                               (NounPhraseParseAttr.IGNOREADJBEST))) !=
                             (NounPhraseParseAttr.NO)):
                             err = True
                     if (wf.contains_attr("к.ф.", None)
                             and tt.morph.class0_.is_personal_pronoun):
                         return None
             if (err):
                 continue
         if (res.morph.case_.is_nominative):
             v = MiscHelper.get_text_value_of_meta_token(
                 items[i], GetTextAttr.KEEPQUOTES)
             if (not Utils.isNullOrEmpty(v)):
                 if (items[i].get_normal_case_text(
                         None, MorphNumber.UNDEFINED, MorphGender.UNDEFINED,
                         False) != v):
                     wf = NounPhraseItemTextVar(items[i].morph, None)
                     wf.normal_value = v
                     wf.class0_ = MorphClass.ADJECTIVE
                     wf.case_ = res.morph.case_
                     if (res.morph.case_.is_prepositional
                             or res.morph.gender == MorphGender.NEUTER
                             or res.morph.gender == MorphGender.FEMINIE):
                         items[i].morph.add_item(wf)
                     else:
                         items[i].morph.insert_item(0, wf)
         res.adjectives.append(items[i])
         if (items[i].end_char > res.end_char):
             res.end_token = items[i].end_token
     i = 0
     first_pass3046 = True
     while True:
         if first_pass3046: first_pass3046 = False
         else: i += 1
         if (not (i < (len(res.adjectives) - 1))): break
         if (res.adjectives[i].whitespaces_after_count > 5):
             if (res.adjectives[i].chars != res.adjectives[i + 1].chars):
                 if (not res.adjectives[i + 1].chars.is_all_lower):
                     return None
                 if (res.adjectives[i].chars.is_all_upper
                         and res.adjectives[i + 1].chars.is_capital_upper):
                     return None
                 if (res.adjectives[i].chars.is_capital_upper
                         and res.adjectives[i + 1].chars.is_all_upper):
                     return None
             if (res.adjectives[i].whitespaces_after_count > 10):
                 if (res.adjectives[i].newlines_after_count == 1):
                     if (res.adjectives[i].chars.is_capital_upper and i == 0
                             and res.adjectives[i + 1].chars.is_all_lower):
                         continue
                     if (res.adjectives[i].chars == res.adjectives[
                             i + 1].chars):
                         continue
                 return None
     if (need_update_morph):
         noun.morph = MorphCollection()
         for v in noun.noun_morph:
             noun.morph.add_item(v)
         res.morph = noun.morph
     if (len(res.adjectives) > 0):
         if (noun.begin_token.previous is not None):
             if (noun.begin_token.previous.is_comma_and):
                 if (res.adjectives[0].begin_char > noun.begin_char):
                     pass
                 else:
                     return None
         zap = 0
         and0_ = 0
         cou = 0
         last_and = False
         i = 0
         while i < (len(res.adjectives) - 1):
             te = res.adjectives[i].end_token.next0_
             if (te is None):
                 return None
             if (te.is_char('(')):
                 pass
             elif (te.is_comma):
                 zap += 1
                 last_and = False
             elif (te.is_and or te.is_or):
                 and0_ += 1
                 last_and = True
             if (not res.adjectives[i].begin_token.morph.class0_.is_pronoun
                 ):
                 cou += 1
             i += 1
         if ((zap + and0_) > 0):
             if (and0_ > 1):
                 return None
             elif (and0_ == 1 and not last_and):
                 return None
             if ((zap + and0_) != cou):
                 if (and0_ == 1):
                     pass
                 else:
                     return None
             last = Utils.asObjectOrNull(
                 res.adjectives[len(res.adjectives) - 1], NounPhraseItem)
             if (last.is_pronoun and not last_and):
                 return None
     if (stat is not None):
         for adj in items:
             if (adj.morph.items_count > 1):
                 w1 = Utils.asObjectOrNull(adj.morph.get_indexer_item(0),
                                           NounPhraseItemTextVar)
                 w2 = Utils.asObjectOrNull(adj.morph.get_indexer_item(1),
                                           NounPhraseItemTextVar)
                 if ((len(w1.normal_value) < 2)
                         or (len(w2.normal_value) < 2)):
                     break
                 l1 = w1.normal_value[len(w1.normal_value) - 1]
                 l2 = w2.normal_value[len(w2.normal_value) - 1]
                 i1 = 0
                 i2 = 0
                 wrapi1468 = RefOutArgWrapper(0)
                 Utils.tryGetValue(stat, l1, wrapi1468)
                 i1 = wrapi1468.value
                 wrapi2467 = RefOutArgWrapper(0)
                 Utils.tryGetValue(stat, l2, wrapi2467)
                 i2 = wrapi2467.value
                 if (i1 < i2):
                     adj.morph.remove_item(1)
                     adj.morph.insert_item(0, w2)
     if (res.begin_token.get_morph_class_in_dictionary().is_verb
             and len(items) > 0):
         if (not res.begin_token.chars.is_all_lower
                 or res.begin_token.previous is None):
             pass
         elif (res.begin_token.previous.morph.class0_.is_preposition):
             pass
         else:
             comma = False
             tt = res.begin_token.previous
             first_pass3047 = True
             while True:
                 if first_pass3047: first_pass3047 = False
                 else: tt = tt.previous
                 if (not (tt is not None and tt.end_char <= res.end_char)):
                     break
                 if (tt.morph.class0_.is_adverb):
                     continue
                 if (tt.is_char_of(".;")):
                     break
                 if (tt.is_comma):
                     comma = True
                     continue
                 if (tt.is_value("НЕ", None)):
                     continue
                 if (((tt.morph.class0_.is_noun
                       or tt.morph.class0_.is_proper)) and comma):
                     for it in res.begin_token.morph.items:
                         if (it.class0_.is_verb
                                 and (isinstance(it, MorphWordForm))):
                             if (tt.morph.check_accord(it, False, False)):
                                 if (res.morph.case_.is_instrumental):
                                     return None
                 break
     if (res.begin_token == res.end_token):
         mc = res.begin_token.get_morph_class_in_dictionary()
         if (mc.is_adverb):
             if (res.begin_token.previous is not None and
                     res.begin_token.previous.morph.class0_.is_preposition):
                 pass
             elif (mc.is_noun and not mc.is_preposition
                   and not mc.is_conjunction):
                 pass
             elif (res.begin_token.is_value("ВЕСЬ", None)):
                 pass
             else:
                 return None
     if (def_noun is not None and def_noun.end_token == res.end_token
             and len(res.adjectives) > 0):
         res.end_token = res.adjectives[len(res.adjectives) - 1].end_token
     return res

Example #8

Show file

File: _NounPraseHelperInt.py Project: pullenti/PullentiPython

 def __try_parse_en(first: 'Token', typ: 'NounPhraseParseAttr',
                    max_char_pos: int) -> 'NounPhraseToken':
     if (first is None):
         return None
     items = None
     has_article = False
     has_prop = False
     has_misc = False
     if (first.previous is not None
             and first.previous.morph.class0_.is_preposition
             and (first.whitespaces_before_count < 3)):
         has_prop = True
     t = first
     first_pass3048 = True
     while True:
         if first_pass3048: first_pass3048 = False
         else: t = t.next0_
         if (not (t is not None)): break
         if (max_char_pos > 0 and t.begin_char > max_char_pos):
             break
         if (not t.chars.is_latin_letter):
             break
         if (t != first and t.whitespaces_before_count > 2):
             if ((((typ) & (NounPhraseParseAttr.MULTILINES))) !=
                 (NounPhraseParseAttr.NO)):
                 pass
             elif (MiscHelper.is_eng_article(t.previous)):
                 pass
             else:
                 break
         tt = Utils.asObjectOrNull(t, TextToken)
         if (t == first and tt is not None):
             if (MiscHelper.is_eng_article(tt)):
                 has_article = True
                 continue
         if (isinstance(t, ReferentToken)):
             if ((((typ) & (NounPhraseParseAttr.REFERENTCANBENOUN))) == (
                     NounPhraseParseAttr.NO)):
                 break
         elif (tt is None):
             break
         if ((t.is_value("SO", None) and t.next0_ is not None
              and t.next0_.is_hiphen) and t.next0_.next0_ is not None):
             if (t.next0_.next0_.is_value("CALL", None)):
                 t = t.next0_.next0_
                 continue
         mc = t.get_morph_class_in_dictionary()
         if (mc.is_conjunction or mc.is_preposition):
             break
         if (mc.is_pronoun or mc.is_personal_pronoun):
             if ((((typ) & (NounPhraseParseAttr.PARSEPRONOUNS))) == (
                     NounPhraseParseAttr.NO)):
                 break
         elif (mc.is_misc):
             if (t.is_value("THIS", None) or t.is_value("THAT", None)):
                 has_misc = True
                 if ((((typ) & (NounPhraseParseAttr.PARSEPRONOUNS))) == (
                         NounPhraseParseAttr.NO)):
                     break
         is_adj = False
         if (((has_article or has_prop or has_misc)) and items is None):
             pass
         elif (isinstance(t, ReferentToken)):
             pass
         else:
             if (not mc.is_noun and not mc.is_adjective):
                 if (mc.is_undefined and has_article):
                     pass
                 elif (items is None and mc.is_undefined
                       and t.chars.is_capital_upper):
                     pass
                 elif (mc.is_pronoun):
                     pass
                 elif (tt.term.endswith("EAN")):
                     is_adj = True
                 elif (MiscHelper.is_eng_adj_suffix(tt.next0_)):
                     pass
                 else:
                     break
             if (mc.is_verb):
                 if (t.next0_ is not None and t.next0_.morph.class0_.is_verb
                         and (t.whitespaces_after_count < 2)):
                     pass
                 elif (t.chars.is_capital_upper
                       and not MiscHelper.can_be_start_of_sentence(t)):
                     pass
                 elif ((t.chars.is_capital_upper and mc.is_noun and
                        (isinstance(t.next0_, TextToken)))
                       and t.next0_.chars.is_capital_upper):
                     pass
                 elif (isinstance(t, ReferentToken)):
                     pass
                 else:
                     break
         if (items is None):
             items = list()
         it = NounPhraseItem(t, t)
         if (mc.is_noun):
             it.can_be_noun = True
         if (mc.is_adjective or mc.is_pronoun or is_adj):
             it.can_be_adj = True
         items.append(it)
         t = it.end_token
         if (len(items) == 1):
             if (MiscHelper.is_eng_adj_suffix(t.next0_)):
                 mc.is_noun = False
                 mc.is_adjective = True
                 t = t.next0_.next0_
     if (items is None):
         return None
     noun = items[len(items) - 1]
     res = NounPhraseToken(first, noun.end_token)
     res.noun = (noun)
     res.morph = MorphCollection()
     for v in noun.end_token.morph.items:
         if (v.class0_.is_verb):
             continue
         if (v.class0_.is_proper and noun.begin_token.chars.is_all_lower):
             continue
         if (isinstance(v, MorphWordForm)):
             wf = MorphWordForm()
             wf.copy_from_word_form(Utils.asObjectOrNull(v, MorphWordForm))
             if (has_article and v.number != MorphNumber.SINGULAR):
                 wf.number = MorphNumber.SINGULAR
             res.morph.add_item(wf)
         else:
             bi = MorphBaseInfo()
             bi.copy_from(v)
             if (has_article and v.number != MorphNumber.SINGULAR):
                 bi.number = MorphNumber.SINGULAR
             res.morph.add_item(bi)
     if (res.morph.items_count == 0 and has_article):
         res.morph.add_item(
             MorphBaseInfo._new192(MorphClass.NOUN, MorphNumber.SINGULAR))
     i = 0
     while i < (len(items) - 1):
         res.adjectives.append(items[i])
         i += 1
     return res

Example #9

Show file

 def try_parse(t: 'Token', items: typing.List['NounPhraseItem'],
               attrs: 'NounPhraseParseAttr') -> 'NounPhraseItem':
     if (t is None):
         return None
     t0 = t
     _can_be_surname = False
     _is_doubt_adj = False
     rt = Utils.asObjectOrNull(t, ReferentToken)
     if (rt is not None and rt.begin_token == rt.end_token
             and (isinstance(rt.begin_token, TextToken))):
         res = NounPhraseItem.try_parse(rt.begin_token, items, attrs)
         if (res is not None):
             res.begin_token = res.end_token = t
             res.can_be_noun = True
             return res
     if (rt is not None):
         res = NounPhraseItem(t, t)
         for m in t.morph.items:
             v = NounPhraseItemTextVar(m, None)
             v.normal_value = str(t.get_referent())
             res.noun_morph.append(v)
         res.can_be_noun = True
         return res
     if (isinstance(t, NumberToken)):
         pass
     has_legal_verb = False
     if (isinstance(t, TextToken)):
         if (not t.chars.is_letter):
             return None
         str0_ = t.term
         if (str0_[len(str0_) - 1] == 'А' or str0_[len(str0_) - 1] == 'О'):
             for wf in t.morph.items:
                 if ((isinstance(wf, MorphWordForm))
                         and wf.is_in_dictionary):
                     if (wf.class0_.is_verb):
                         mc = t.get_morph_class_in_dictionary()
                         if (not mc.is_noun and
                             (((attrs) &
                               (NounPhraseParseAttr.IGNOREPARTICIPLES)))
                                 == (NounPhraseParseAttr.NO)):
                             if (not LanguageHelper.ends_with_ex(
                                     str0_, "ОГО", "ЕГО", None, None)):
                                 return None
                         has_legal_verb = True
                     if (wf.class0_.is_adverb):
                         if (t.next0_ is None or not t.next0_.is_hiphen):
                             if ((str0_ == "ВСЕГО" or str0_ == "ДОМА"
                                  or str0_ == "НЕСКОЛЬКО")
                                     or str0_ == "МНОГО"
                                     or str0_ == "ПОРЯДКА"):
                                 pass
                             else:
                                 return None
                     if (wf.class0_.is_adjective):
                         if (wf.contains_attr("к.ф.", None)):
                             if (t.get_morph_class_in_dictionary() ==
                                     MorphClass.ADJECTIVE):
                                 pass
                             else:
                                 _is_doubt_adj = True
         mc0 = t.morph.class0_
         if (mc0.is_proper_surname and not t.chars.is_all_lower):
             for wf in t.morph.items:
                 if (wf.class0_.is_proper_surname
                         and wf.number != MorphNumber.PLURAL):
                     wff = Utils.asObjectOrNull(wf, MorphWordForm)
                     if (wff is None):
                         continue
                     s = Utils.ifNotNull((Utils.ifNotNull(
                         wff.normal_full, wff.normal_case)), "")
                     if (LanguageHelper.ends_with_ex(
                             s, "ИН", "ЕН", "ЫН", None)):
                         if (not wff.is_in_dictionary):
                             _can_be_surname = True
                         else:
                             return None
                     if (wff.is_in_dictionary
                             and LanguageHelper.ends_with(s, "ОВ")):
                         _can_be_surname = True
         if (mc0.is_proper_name and not t.chars.is_all_lower):
             for wff in t.morph.items:
                 wf = Utils.asObjectOrNull(wff, MorphWordForm)
                 if (wf is None):
                     continue
                 if (wf.normal_case == "ГОР"):
                     continue
                 if (wf.class0_.is_proper_name and wf.is_in_dictionary):
                     if (wf.normal_case is None
                             or not wf.normal_case.startswith("ЛЮБ")):
                         if (mc0.is_adjective
                                 and t.morph.contains_attr("неизм.", None)):
                             pass
                         elif (
                             (((attrs) &
                               (NounPhraseParseAttr.REFERENTCANBENOUN))
                              ) == (NounPhraseParseAttr.REFERENTCANBENOUN)):
                             pass
                         else:
                             if (items is None or (len(items) < 1)):
                                 return None
                             if (not items[0].is_std_adjective):
                                 return None
         if (mc0.is_adjective and t.morph.items_count == 1):
             if (t.morph.get_indexer_item(0).contains_attr(
                     "в.ср.ст.", None)):
                 return None
         mc1 = t.get_morph_class_in_dictionary()
         if (mc1 == MorphClass.VERB and t.morph.case_.is_undefined):
             return None
         if (((((attrs) & (NounPhraseParseAttr.IGNOREPARTICIPLES)))
              == (NounPhraseParseAttr.IGNOREPARTICIPLES)
              and t.morph.class0_.is_verb and not t.morph.class0_.is_noun)
                 and not t.morph.class0_.is_proper):
             for wf in t.morph.items:
                 if (wf.class0_.is_verb):
                     if (wf.contains_attr("дейст.з.", None)):
                         if (LanguageHelper.ends_with(t.term, "СЯ")):
                             pass
                         else:
                             return None
     t1 = None
     for k in range(2):
         t = (Utils.ifNotNull(t1, t0))
         if (k == 0):
             if (((isinstance(t0, TextToken)) and t0.next0_ is not None
                  and t0.next0_.is_hiphen)
                     and t0.next0_.next0_ is not None):
                 if (not t0.is_whitespace_after
                         and not t0.morph.class0_.is_pronoun and
                         not (isinstance(t0.next0_.next0_, NumberToken))):
                     if (not t0.next0_.is_whitespace_after):
                         t = t0.next0_.next0_
                     elif (t0.next0_.next0_.chars.is_all_lower
                           and LanguageHelper.ends_with(t0.term, "О")):
                         t = t0.next0_.next0_
         it = NounPhraseItem._new404(t0, t, _can_be_surname)
         if (t0 == t and (isinstance(t0, ReferentToken))):
             it.can_be_noun = True
             it.morph = MorphCollection(t0.morph)
         can_be_prepos = False
         for v in t.morph.items:
             wf = Utils.asObjectOrNull(v, MorphWordForm)
             if (v.class0_.is_verb and not v.case_.is_undefined):
                 it.can_be_adj = True
                 it.adj_morph.append(NounPhraseItemTextVar(v, t))
                 continue
             if (v.class0_.is_preposition):
                 can_be_prepos = True
             if (v.class0_.is_adjective
                     or ((v.class0_.is_pronoun
                          and not v.class0_.is_personal_pronoun
                          and not v.contains_attr("неизм.", None))) or
                 ((v.class0_.is_noun and (isinstance(t, NumberToken))))):
                 if (NounPhraseItem.try_accord_variant(
                         items, (0 if items is None else len(items)), v,
                         False)):
                     is_doub = False
                     if (v.contains_attr("к.ф.", None)):
                         continue
                     if (v.contains_attr("собир.", None)
                             and not (isinstance(t, NumberToken))):
                         if (wf is not None and wf.is_in_dictionary):
                             return None
                         continue
                     if (v.contains_attr("сравн.", None)):
                         continue
                     ok = True
                     if (isinstance(t, TextToken)):
                         s = t.term
                         if (s == "ПРАВО" or s == "ПРАВА"):
                             ok = False
                         elif (LanguageHelper.ends_with(s, "ОВ") and
                               t.get_morph_class_in_dictionary().is_noun):
                             ok = False
                     elif (isinstance(t, NumberToken)):
                         if (v.class0_.is_noun
                                 and t.morph.class0_.is_adjective):
                             ok = False
                         elif (t.morph.class0_.is_noun and ((
                             (attrs) &
                             (NounPhraseParseAttr.PARSENUMERICASADJECTIVE)))
                               == (NounPhraseParseAttr.NO)):
                             ok = False
                     if (ok):
                         it.adj_morph.append(NounPhraseItemTextVar(v, t))
                         it.can_be_adj = True
                         if (_is_doubt_adj and t0 == t):
                             it.is_doubt_adjective = True
                         if (has_legal_verb and wf is not None
                                 and wf.is_in_dictionary):
                             it.can_be_noun = True
                         if (wf is not None and wf.class0_.is_pronoun):
                             it.can_be_noun = True
                             it.noun_morph.append(
                                 NounPhraseItemTextVar(v, t))
             can_be_noun_ = False
             if (isinstance(t, NumberToken)):
                 pass
             elif (v.class0_.is_noun
                   or ((wf is not None and wf.normal_case == "САМ"))):
                 can_be_noun_ = True
             elif (v.class0_.is_personal_pronoun):
                 if (items is None or len(items) == 0):
                     can_be_noun_ = True
                 else:
                     for it1 in items:
                         if (it1.is_verb):
                             if (len(items) == 1
                                     and not v.case_.is_nominative):
                                 can_be_noun_ = True
                             else:
                                 return None
                     if (len(items) == 1):
                         if (items[0].can_be_adj_for_personal_pronoun):
                             can_be_noun_ = True
             elif (
                 (v.class0_.is_pronoun and
                  ((items is None or len(items) == 0 or
                    ((len(items) == 1
                      and items[0].can_be_adj_for_personal_pronoun))))
                  and wf is not None) and
                 (((((wf.normal_case == "ТОТ" or wf.normal_full == "ТО"
                      or wf.normal_case == "ТО") or wf.normal_case == "ЭТО"
                     or wf.normal_case == "ВСЕ") or wf.normal_case == "ЧТО"
                    or wf.normal_case == "КТО") or wf.normal_full
                   == "КОТОРЫЙ" or wf.normal_case == "КОТОРЫЙ"))):
                 if (wf.normal_case == "ВСЕ"):
                     if (t.next0_ is not None
                             and t.next0_.is_value("РАВНО", None)):
                         return None
                 can_be_noun_ = True
             elif (wf is not None and ((Utils.ifNotNull(
                     wf.normal_full, wf.normal_case))) == "КОТОРЫЙ"
                   and (((attrs) & (NounPhraseParseAttr.PARSEPRONOUNS)))
                   == (NounPhraseParseAttr.NO)):
                 return None
             elif (v.class0_.is_proper and (isinstance(t, TextToken))):
                 if (t.length_char > 4 or v.class0_.is_proper_name):
                     can_be_noun_ = True
             if (can_be_noun_):
                 added = False
                 if (items is not None and len(items) > 1 and
                     (((attrs) & (NounPhraseParseAttr.MULTINOUNS))) !=
                     (NounPhraseParseAttr.NO)):
                     ok1 = True
                     ii = 1
                     while ii < len(items):
                         if (not items[ii].conj_before):
                             ok1 = False
                             break
                         ii += 1
                     if (ok1):
                         if (NounPhraseItem.try_accord_variant(
                                 items,
                             (0 if items is None else len(items)), v,
                                 True)):
                             it.noun_morph.append(
                                 NounPhraseItemTextVar(v, t))
                             it.can_be_noun = True
                             it.multi_nouns = True
                             added = True
                 if (not added):
                     if (NounPhraseItem.try_accord_variant(
                             items, (0 if items is None else len(items)), v,
                             False)):
                         it.noun_morph.append(NounPhraseItemTextVar(v, t))
                         it.can_be_noun = True
                         if (v.class0_.is_personal_pronoun
                                 and t.morph.contains_attr("неизм.", None)
                                 and not it.can_be_adj):
                             itt = NounPhraseItemTextVar(v, t)
                             itt.case_ = MorphCase.ALL_CASES
                             itt.number = MorphNumber.UNDEFINED
                             if (itt.normal_value is None):
                                 pass
                             it.adj_morph.append(itt)
                             it.can_be_adj = True
                     elif ((len(items) > 0 and len(items[0].adj_morph) > 0
                            and items[0].adj_morph[0].number
                            == MorphNumber.PLURAL)
                           and not ((items[0].adj_morph[0].case_)
                                    & v.case_).is_undefined
                           and not items[0].adj_morph[0].class0_.is_verb):
                         if (t.next0_ is not None and t.next0_.is_comma_and
                                 and
                             (isinstance(t.next0_.next0_, TextToken))):
                             npt2 = NounPhraseHelper.try_parse(
                                 t.next0_.next0_, attrs, 0, None)
                             if (npt2 is not None
                                     and npt2.preposition is None
                                     and not ((npt2.morph.case_) & v.case_
                                              & items[0].adj_morph[0].case_
                                              ).is_undefined):
                                 it.noun_morph.append(
                                     NounPhraseItemTextVar(v, t))
                                 it.can_be_noun = True
         if (t0 != t):
             for v in it.adj_morph:
                 v.correct_prefix(Utils.asObjectOrNull(t0, TextToken),
                                  False)
             for v in it.noun_morph:
                 v.correct_prefix(Utils.asObjectOrNull(t0, TextToken), True)
         if (k == 1 and it.can_be_noun and not it.can_be_adj):
             if (t1 is not None):
                 it.end_token = t1
             else:
                 it.end_token = t0.next0_.next0_
             for v in it.noun_morph:
                 if (v.normal_value is not None
                         and (v.normal_value.find('-') < 0)):
                     v.normal_value = "{0}-{1}".format(
                         v.normal_value,
                         it.end_token.get_normal_case_text(
                             None, MorphNumber.UNDEFINED,
                             MorphGender.UNDEFINED, False))
         if (it.can_be_adj):
             if (NounPhraseItem.__m_std_adjectives.try_parse(
                     it.begin_token, TerminParseAttr.NO) is not None):
                 it.is_std_adjective = True
         if (can_be_prepos and it.can_be_noun):
             if (items is not None and len(items) > 0):
                 npt1 = NounPhraseHelper.try_parse(
                     t,
                     Utils.valToEnum((NounPhraseParseAttr.PARSEPREPOSITION)
                                     | (NounPhraseParseAttr.PARSEPRONOUNS) |
                                     (NounPhraseParseAttr.PARSEVERBS),
                                     NounPhraseParseAttr), 0, None)
                 if (npt1 is not None and npt1.end_char > t.end_char):
                     return None
             else:
                 npt1 = NounPhraseHelper.try_parse(
                     t.next0_,
                     Utils.valToEnum((NounPhraseParseAttr.PARSEPRONOUNS) |
                                     (NounPhraseParseAttr.PARSEVERBS),
                                     NounPhraseParseAttr), 0, None)
                 if (npt1 is not None):
                     mc = LanguageHelper.get_case_after_preposition(t.lemma)
                     if (not ((mc) & npt1.morph.case_).is_undefined):
                         return None
         if (it.can_be_noun or it.can_be_adj or k == 1):
             if (it.begin_token.morph.class0_.is_pronoun):
                 tt2 = it.end_token.next0_
                 if ((tt2 is not None and tt2.is_hiphen
                      and not tt2.is_whitespace_after)
                         and not tt2.is_whitespace_before):
                     tt2 = tt2.next0_
                 if (isinstance(tt2, TextToken)):
                     ss = tt2.term
                     if ((ss == "ЖЕ" or ss == "БЫ" or ss == "ЛИ")
                             or ss == "Ж"):
                         it.end_token = tt2
                     elif (ss == "НИБУДЬ" or ss == "ЛИБО"
                           or (((ss == "ТО" and tt2.previous.is_hiphen))
                               and it.can_be_adj)):
                         it.end_token = tt2
                         for m in it.adj_morph:
                             m.normal_value = "{0}-{1}".format(
                                 m.normal_value, ss)
                             if (m.single_number_value is not None):
                                 m.single_number_value = "{0}-{1}".format(
                                     m.single_number_value, ss)
             return it
         if (t0 == t):
             if (t0.is_value("БИЗНЕС", None) and t0.next0_ is not None
                     and t0.next0_.chars == t0.chars):
                 t1 = t0.next0_
                 continue
             return it
     return None

Example #10

Show file

class TextToken(Token):
    """ Входной токен (после морфанализа) """
    def __init__(self, source: 'MorphToken', kit_: 'AnalysisKit') -> None:
        super().__init__(kit_, (0 if source is None else source.begin_char),
                         (0 if source is None else source.end_char))
        self.term = None
        self.lemma = None
        self.term0 = None
        self.invariant_prefix_length = 0
        self.max_length = 0
        if (source is None):
            return
        self.chars = source.char_info
        self.term = source.term
        self.lemma = (Utils.ifNotNull(source.lemma, self.term))
        self.max_length = (len(self.term))
        self.morph = MorphCollection()
        if (source.word_forms is not None):
            for wf in source.word_forms:
                self.morph.addItem(wf)
                if (wf.normal_case is not None
                        and (self.max_length < len(wf.normal_case))):
                    self.max_length = (len(wf.normal_case))
                if (wf.normal_full is not None
                        and (self.max_length < len(wf.normal_full))):
                    self.max_length = (len(wf.normal_full))
        i = 0
        while i < len(self.term):
            ch = self.term[i]
            j = 0
            while j < self.morph.items_count:
                wf = Utils.asObjectOrNull(self.morph.getIndexerItem(j),
                                          MorphWordForm)
                if (wf.normal_case is not None):
                    if (i >= len(wf.normal_case)):
                        break
                    if (wf.normal_case[i] != ch):
                        break
                if (wf.normal_full is not None):
                    if (i >= len(wf.normal_full)):
                        break
                    if (wf.normal_full[i] != ch):
                        break
                j += 1
            if (j < self.morph.items_count):
                break
            self.invariant_prefix_length = ((i + 1))
            i += 1
        if (self.morph.language.is_undefined
                and not source.language.is_undefined):
            self.morph.language = source.language

    def getLemma(self) -> str:
        """ Получить лемму (устарело, используйте Lemma)
        
        """
        return self.lemma

    def __str__(self) -> str:
        res = Utils.newStringIO(self.term)
        for l_ in self.morph.items:
            print(", {0}".format(str(l_)), end="", file=res, flush=True)
        return Utils.toStringStringIO(res)

    def checkValue(self, dict0_: typing.List[tuple]) -> object:
        """ Попробовать привязать словарь
        
        Args:
            dict0_(typing.List[tuple]): 
        
        """
        if (dict0_ is None):
            return None
        wrapres2699 = RefOutArgWrapper(None)
        inoutres2700 = Utils.tryGetValue(dict0_, self.term, wrapres2699)
        res = wrapres2699.value
        if (inoutres2700):
            return res
        if (self.morph is not None):
            for it in self.morph.items:
                mf = Utils.asObjectOrNull(it, MorphWordForm)
                if (mf is not None):
                    if (mf.normal_case is not None):
                        wrapres2695 = RefOutArgWrapper(None)
                        inoutres2696 = Utils.tryGetValue(
                            dict0_, mf.normal_case, wrapres2695)
                        res = wrapres2695.value
                        if (inoutres2696):
                            return res
                    if (mf.normal_full is not None
                            and mf.normal_case != mf.normal_full):
                        wrapres2697 = RefOutArgWrapper(None)
                        inoutres2698 = Utils.tryGetValue(
                            dict0_, mf.normal_full, wrapres2697)
                        res = wrapres2697.value
                        if (inoutres2698):
                            return res
        return None

    def getSourceText(self) -> str:
        return super().getSourceText()

    def isValue(self, term_: str, termua: str = None) -> bool:
        if (termua is not None and self.morph.language.is_ua):
            if (self.isValue(termua, None)):
                return True
        if (term_ is None):
            return False
        if (self.invariant_prefix_length > len(term_)):
            return False
        if (self.max_length >= len(self.term)
                and (self.max_length < len(term_))):
            return False
        if (term_ == self.term):
            return True
        for wf in self.morph.items:
            if ((wf).normal_case == term_ or (wf).normal_full == term_):
                return True
        return False

    @property
    def is_and(self) -> bool:
        """ Это соединительный союз И (на всех языках) """
        if (not self.morph.class0_.is_conjunction):
            if (self.length_char == 1 and self.isChar('&')):
                return True
            return False
        val = self.term
        if (val == "И" or val == "AND" or val == "UND"):
            return True
        if (self.morph.language.is_ua):
            if (val == "І" or val == "ТА"):
                return True
        return False

    @property
    def is_or(self) -> bool:
        """ Это соединительный союз ИЛИ (на всех языках) """
        if (not self.morph.class0_.is_conjunction):
            return False
        val = self.term
        if (val == "ИЛИ" or val == "OR"):
            return True
        if (self.morph.language.is_ua):
            if (val == "АБО"):
                return True
        return False

    @property
    def is_letters(self) -> bool:
        return str.isalpha(self.term[0])

    def getMorphClassInDictionary(self) -> 'MorphClass':
        res = MorphClass()
        for wf in self.morph.items:
            if ((isinstance(wf, MorphWordForm)) and (wf).is_in_dictionary):
                res |= wf.class0_
        return res

    def getNormalCaseText(self,
                          mc: 'MorphClass' = None,
                          single_number: bool = False,
                          gender: 'MorphGender' = MorphGender.UNDEFINED,
                          keep_chars: bool = False) -> str:
        from pullenti.ner.core.MiscHelper import MiscHelper
        empty = True
        if (mc is not None and mc.is_preposition):
            return LanguageHelper.normalizePreposition(self.term)
        for it in self.morph.items:
            if (mc is not None and not mc.is_undefined):
                cc = (it.class0_.value) & (mc.value)
                if (cc == 0):
                    continue
                if (MorphClass.isMiscInt(cc) and not MorphClass.isProperInt(cc)
                        and mc.value != it.class0_.value):
                    continue
            wf = Utils.asObjectOrNull(it, MorphWordForm)
            normal_full = False
            if (gender != MorphGender.UNDEFINED):
                if ((((it.gender) & (gender))) == (MorphGender.UNDEFINED)):
                    if ((gender == MorphGender.MASCULINE and
                         ((it.gender != MorphGender.UNDEFINED or it.number
                           == MorphNumber.PLURAL)) and wf is not None)
                            and wf.normal_full is not None):
                        normal_full = True
                    elif (gender == MorphGender.MASCULINE
                          and it.class0_.is_personal_pronoun):
                        pass
                    else:
                        continue
            if (not it.case_.is_undefined):
                empty = False
            if (wf is not None):
                if (single_number and it.number == MorphNumber.PLURAL
                        and wf.normal_full is not None):
                    le = len(wf.normal_case)
                    if ((le == (len(wf.normal_full) + 2) and le > 4
                         and wf.normal_case[le - 2] == 'С')
                            and wf.normal_case[le - 1] == 'Я'):
                        res = wf.normal_case
                    else:
                        res = (wf.normal_full
                               if normal_full else wf.normal_full)
                else:
                    res = (wf.normal_full if normal_full else
                           (Utils.ifNotNull(wf.normal_case, self.term)))
                if (single_number and mc is not None
                        and mc == MorphClass.NOUN):
                    if (res == "ДЕТИ"):
                        res = "РЕБЕНОК"
                if (keep_chars):
                    if (self.chars.is_all_lower):
                        res = res.lower()
                    elif (self.chars.is_capital_upper):
                        res = MiscHelper.convertFirstCharUpperAndOtherLower(
                            res)
                return res
        if (not empty):
            return None
        te = None
        if (single_number and mc is not None):
            bi = MorphBaseInfo._new549(MorphClass(mc), gender,
                                       MorphNumber.SINGULAR,
                                       self.morph.language)
            vars0_ = Morphology.getWordform(self.term, bi)
            if (vars0_ is not None):
                te = vars0_
        if (self.chars.is_cyrillic_letter and te is None
                and len(self.term) > 3):
            ch0 = self.term[len(self.term) - 1]
            ch1 = self.term[len(self.term) - 2]
            if (ch0 == 'М' and ((ch1 == 'О' or ch1 == 'А'))):
                te = self.term[0:0 + len(self.term) - 2]
            elif (not LanguageHelper.isCyrillicVowel(ch1)
                  and LanguageHelper.isCyrillicVowel(ch0)):
                te = self.term[0:0 + len(self.term) - 1]
        if (te is None):
            te = self.term
        if (keep_chars):
            if (self.chars.is_all_lower):
                return te.lower()
            elif (self.chars.is_capital_upper):
                return MiscHelper.convertFirstCharUpperAndOtherLower(te)
        return te

    @staticmethod
    def getSourceTextTokens(begin: 'Token',
                            end: 'Token') -> typing.List['TextToken']:
        from pullenti.ner.MetaToken import MetaToken
        res = list()
        t = begin
        while t is not None and t != end.next0_ and t.end_char <= end.end_char:
            if (isinstance(t, TextToken)):
                res.append(Utils.asObjectOrNull(t, TextToken))
            elif (isinstance(t, MetaToken)):
                res.extend(
                    TextToken.getSourceTextTokens((t).begin_token,
                                                  (t).end_token))
            t = t.next0_
        return res

    @property
    def is_pure_verb(self) -> bool:
        """ Признак того, что это чистый глагол """
        ret = False
        if ((self.isValue("МОЖНО", None) or self.isValue("МОЖЕТ", None)
             or self.isValue("ДОЛЖНЫЙ", None)) or self.isValue("НУЖНО", None)):
            return True
        for it in self.morph.items:
            if ((isinstance(it, MorphWordForm)) and (it).is_in_dictionary):
                if (it.class0_.is_verb and it.case_.is_undefined):
                    ret = True
                elif (not it.class0_.is_verb):
                    if (it.class0_.is_adjective
                            and it.containsAttr("к.ф.", None)):
                        pass
                    else:
                        return False
        return ret

    @property
    def is_verb_be(self) -> bool:
        """ Проверка, что это глагол типа БЫТЬ, ЯВЛЯТЬСЯ и т.п. """
        if ((self.isValue("БЫТЬ", None) or self.isValue("ЕСТЬ", None)
             or self.isValue("ЯВЛЯТЬ", None)) or self.isValue("BE", None)):
            return True
        if (self.term == "IS" or self.term == "WAS" or self.term == "BECAME"):
            return True
        if (self.term == "Є"):
            return True
        return False

    def _serialize(self, stream: io.IOBase) -> None:
        from pullenti.ner.core.internal.SerializerHelper import SerializerHelper
        super()._serialize(stream)
        SerializerHelper.serializeString(stream, self.term)
        SerializerHelper.serializeString(stream, self.lemma)
        SerializerHelper.serializeShort(stream, self.invariant_prefix_length)
        SerializerHelper.serializeShort(stream, self.max_length)

    def _deserialize(self, stream: io.IOBase, kit_: 'AnalysisKit',
                     vers: int) -> None:
        from pullenti.ner.core.internal.SerializerHelper import SerializerHelper
        super()._deserialize(stream, kit_, vers)
        self.term = SerializerHelper.deserializeString(stream)
        self.lemma = SerializerHelper.deserializeString(stream)
        self.invariant_prefix_length = SerializerHelper.deserializeShort(
            stream)
        self.max_length = SerializerHelper.deserializeShort(stream)

    @staticmethod
    def _new538(_arg1: 'MorphToken', _arg2: 'AnalysisKit',
                _arg3: str) -> 'TextToken':
        res = TextToken(_arg1, _arg2)
        res.term0 = _arg3
        return res

    @staticmethod
    def _new541(_arg1: 'MorphToken', _arg2: 'AnalysisKit', _arg3: 'CharsInfo',
                _arg4: int, _arg5: int, _arg6: str) -> 'TextToken':
        res = TextToken(_arg1, _arg2)
        res.chars = _arg3
        res.begin_char = _arg4
        res.end_char = _arg5
        res.term0 = _arg6
        return res

Example #11

Show file

File: Termin.py Project: MihaJjDa/APCLtask

 def tryParse(
         self,
         t0: 'Token',
         pars: 'TerminParseAttr' = TerminParseAttr.NO) -> 'TerminToken':
     """ Попробовать привязать термин
     
     Args:
         t0(Token): 
         fullWordsOnly: 
     
     """
     from pullenti.ner.core.MiscHelper import MiscHelper
     from pullenti.ner.core.BracketHelper import BracketHelper
     if (t0 is None):
         return None
     term = None
     if (isinstance(t0, TextToken)):
         term = (t0).term
     if (self.acronym_smart is not None
             and (((pars) &
                   (TerminParseAttr.FULLWORDSONLY))) == (TerminParseAttr.NO)
             and term is not None):
         if (self.acronym_smart == term):
             if (t0.next0_ is not None and t0.next0_.isChar('.')
                     and not t0.is_whitespace_after):
                 return TerminToken._new606(t0, t0.next0_, self)
             else:
                 return TerminToken._new606(t0, t0, self)
         t1 = Utils.asObjectOrNull(t0, TextToken)
         tt = Utils.asObjectOrNull(t0, TextToken)
         i = 0
         while i < len(self.acronym):
             if (tt is None):
                 break
             term1 = tt.term
             if (len(term1) != 1 or tt.is_whitespace_after):
                 break
             if (i > 0 and tt.is_whitespace_before):
                 break
             if (term1[0] != self.acronym[i]):
                 break
             if (tt.next0_ is None or not tt.next0_.isChar('.')):
                 break
             t1 = (Utils.asObjectOrNull(tt.next0_, TextToken))
             tt = (Utils.asObjectOrNull(tt.next0_.next0_, TextToken))
             i += 1
         if (i >= len(self.acronym)):
             return TerminToken._new606(t0, t1, self)
     if (self.acronym is not None and term is not None
             and self.acronym == term):
         if (t0.chars.is_all_upper or self.acronym_can_be_lower
                 or ((not t0.chars.is_all_lower and len(term) >= 3))):
             return TerminToken._new606(t0, t0, self)
     if (self.acronym is not None and t0.chars.is_last_lower
             and t0.length_char > 3):
         if (t0.isValue(self.acronym, None)):
             return TerminToken._new606(t0, t0, self)
     cou = 0
     i = 0
     while i < len(self.terms):
         if (self.terms[i].is_hiphen):
             cou -= 1
         else:
             cou += 1
         i += 1
     if (len(self.terms) > 0
             and ((not self.ignore_terms_order or cou == 1))):
         t1 = t0
         tt = t0
         e0_ = None
         eup = None
         ok = True
         mc = None
         dont_change_mc = False
         i = 0
         first_pass2812 = True
         while True:
             if first_pass2812: first_pass2812 = False
             else: i += 1
             if (not (i < len(self.terms))): break
             if (self.terms[i].is_hiphen):
                 continue
             if (tt is not None and tt.is_hiphen and i > 0):
                 tt = tt.next0_
             if (i > 0 and tt is not None):
                 if ((((pars) & (TerminParseAttr.IGNOREBRACKETS))) !=
                     (TerminParseAttr.NO) and not tt.chars.is_letter
                         and BracketHelper.isBracket(tt, False)):
                     tt = tt.next0_
             if (((((pars) & (TerminParseAttr.CANBEGEOOBJECT))) !=
                  (TerminParseAttr.NO) and i > 0 and
                  (isinstance(tt, ReferentToken)))
                     and tt.getReferent().type_name == "GEO"):
                 tt = tt.next0_
             if ((isinstance(tt, ReferentToken)) and e0_ is None):
                 eup = tt
                 e0_ = (tt).end_token
                 tt = (tt).begin_token
             if (tt is None):
                 ok = False
                 break
             if (not self.terms[i].checkByToken(tt)):
                 if (tt.next0_ is not None and tt.isChar('.')
                         and self.terms[i].checkByToken(tt.next0_)):
                     tt = tt.next0_
                 elif (((i > 0 and tt.next0_ is not None and
                         (isinstance(tt, TextToken))) and
                        ((tt.morph.class0_.is_preposition
                          or MiscHelper.isEngArticle(tt)))
                        and self.terms[i].checkByToken(tt.next0_))
                       and not self.terms[i - 1].is_pattern_any):
                     tt = tt.next0_
                 else:
                     ok = False
                     if (((i + 2) < len(self.terms))
                             and self.terms[i + 1].is_hiphen
                             and self.terms[i + 2].checkByPrefToken(
                                 self.terms[i],
                                 Utils.asObjectOrNull(tt, TextToken))):
                         i += 2
                         ok = True
                     elif (((not tt.is_whitespace_after
                             and tt.next0_ is not None and
                             (isinstance(tt, TextToken))) and
                            (tt).length_char == 1
                            and tt.next0_.isCharOf("\"'`’“”"))
                           and not tt.next0_.is_whitespace_after
                           and (isinstance(tt.next0_.next0_, TextToken))):
                         if (self.terms[i].checkByStrPrefToken(
                             (tt).term,
                                 Utils.asObjectOrNull(
                                     tt.next0_.next0_, TextToken))):
                             ok = True
                             tt = tt.next0_.next0_
                     if (not ok):
                         if (i > 0 and
                             (((pars) &
                               (TerminParseAttr.IGNORESTOPWORDS))) !=
                             (TerminParseAttr.NO)):
                             if (isinstance(tt, TextToken)):
                                 if (not tt.chars.is_letter):
                                     tt = tt.next0_
                                     i -= 1
                                     continue
                                 mc1 = tt.getMorphClassInDictionary()
                                 if (mc1.is_conjunction
                                         or mc1.is_preposition):
                                     tt = tt.next0_
                                     i -= 1
                                     continue
                             if (isinstance(tt, NumberToken)):
                                 tt = tt.next0_
                                 i -= 1
                                 continue
                         break
             if (tt.morph.items_count > 0 and not dont_change_mc):
                 mc = MorphCollection(tt.morph)
                 if (((mc.class0_.is_noun or mc.class0_.is_verb))
                         and not mc.class0_.is_adjective):
                     if (((i + 1) < len(self.terms))
                             and self.terms[i + 1].is_hiphen):
                         pass
                     else:
                         dont_change_mc = True
             if (tt.morph.class0_.is_preposition
                     or tt.morph.class0_.is_conjunction):
                 dont_change_mc = True
             if (tt == e0_):
                 tt = eup
                 eup = (None)
                 e0_ = (None)
             if (e0_ is None):
                 t1 = tt
             tt = tt.next0_
         if (ok and i >= len(self.terms)):
             if (t1.next0_ is not None and t1.next0_.isChar('.')
                     and self.abridges is not None):
                 for a in self.abridges:
                     if (a.tryAttach(t0) is not None):
                         t1 = t1.next0_
                         break
             if (t0 != t1 and t0.morph.class0_.is_adjective):
                 npt = NounPhraseHelper.tryParse(t0, NounPhraseParseAttr.NO,
                                                 0)
                 if (npt is not None and npt.end_char <= t1.end_char):
                     mc = npt.morph
             return TerminToken._new611(t0, t1, mc)
     if (len(self.terms) > 1 and self.ignore_terms_order):
         terms_ = list(self.terms)
         t1 = t0
         tt = t0
         while len(terms_) > 0:
             if (tt != t0 and tt is not None and tt.is_hiphen):
                 tt = tt.next0_
             if (tt is None):
                 break
             j = 0
             while j < len(terms_):
                 if (terms_[j].checkByToken(tt)):
                     break
                 j += 1
             if (j >= len(terms_)):
                 if (tt != t0 and (((pars) &
                                    (TerminParseAttr.IGNORESTOPWORDS))) !=
                     (TerminParseAttr.NO)):
                     if (isinstance(tt, TextToken)):
                         if (not tt.chars.is_letter):
                             tt = tt.next0_
                             continue
                         mc1 = tt.getMorphClassInDictionary()
                         if (mc1.is_conjunction or mc1.is_preposition):
                             tt = tt.next0_
                             continue
                     if (isinstance(tt, NumberToken)):
                         tt = tt.next0_
                         continue
                 break
             del terms_[j]
             t1 = tt
             tt = tt.next0_
         for i in range(len(terms_) - 1, -1, -1):
             if (terms_[i].is_hiphen):
                 del terms_[i]
         if (len(terms_) == 0):
             return TerminToken(t0, t1)
     if (self.abridges is not None and
         (((pars) &
           (TerminParseAttr.FULLWORDSONLY))) == (TerminParseAttr.NO)):
         res = None
         for a in self.abridges:
             r = a.tryAttach(t0)
             if (r is None):
                 continue
             if (r.abridge_without_point and len(self.terms) > 0):
                 if (not ((isinstance(t0, TextToken)))):
                     continue
                 if (a.parts[0].value != (t0).term):
                     continue
             if (res is None or (res.length_char < r.length_char)):
                 res = r
         if (res is not None):
             return res
     return None

Example #12

Show file

class Token:
    """ Базовый класс для всех токенов. Наследные классы - TextToken (конечная словоформа) и MetaToken (связный фрагмент других токенов).
    
    Токен
    """
    def __init__(self, kit_: 'AnalysisKit', begin: int, end: int) -> None:
        self.kit = None
        self.__m_begin_char = 0
        self.__m_end_char = 0
        self.tag = None
        self._m_previous = None
        self._m_next = None
        self.__m_morph = None
        self.chars = None
        self.__m_attrs = 0
        self.kit = kit_
        self.__m_begin_char = begin
        self.__m_end_char = end

    @property
    def begin_char(self) -> int:
        """ Позиция в тексте начального символа
        
        """
        return self.__m_begin_char

    @property
    def end_char(self) -> int:
        """ Позиция в тексте конечного символа
        
        """
        return self.__m_end_char

    @property
    def length_char(self) -> int:
        """ Длина в текстовых символах """
        return (self.end_char - self.begin_char) + 1

    @property
    def previous(self) -> 'Token':
        """ Предыдущий токен в цепочке токенов
        
        """
        return self._m_previous

    @previous.setter
    def previous(self, value) -> 'Token':
        self._m_previous = value
        if (value is not None):
            value._m_next = self
        self.__m_attrs = (0)
        return value

    @property
    def next0_(self) -> 'Token':
        """ Следующий токен в цепочке токенов
        
        """
        return self._m_next

    @next0_.setter
    def next0_(self, value) -> 'Token':
        self._m_next = value
        if (value is not None):
            value._m_previous = self
        self.__m_attrs = (0)
        return value

    @property
    def morph(self) -> 'MorphCollection':
        """ Морфологическая информация
        
        """
        if (self.__m_morph is None):
            self.__m_morph = MorphCollection()
        return self.__m_morph

    @morph.setter
    def morph(self, value) -> 'MorphCollection':
        self.__m_morph = value
        return value

    def __str__(self) -> str:
        return self.kit.sofa.text[self.begin_char:self.begin_char +
                                  (self.end_char + 1) - self.begin_char]

    def __get_attr(self, i: int) -> bool:
        ch = '\x00'
        if ((((self.__m_attrs) & 1)) == 0):
            self.__m_attrs = (1)
            if (self._m_previous is None):
                self._set_attr(1, True)
                self._set_attr(3, True)
            else:
                j = self._m_previous.end_char + 1
                while j < self.begin_char:
                    ch = self.kit.sofa.text[j]
                    if (Utils.isWhitespace((ch))):
                        self._set_attr(1, True)
                        if ((ord(ch)) == 0xD or (ord(ch)) == 0xA
                                or ch == '\f'):
                            self._set_attr(3, True)
                    j += 1
            if (self._m_next is None):
                self._set_attr(2, True)
                self._set_attr(4, True)
            else:
                j = self.end_char + 1
                while j < self._m_next.begin_char:
                    ch = self.kit.sofa.text[j]
                    if (Utils.isWhitespace(ch)):
                        self._set_attr(2, True)
                        if ((ord(ch)) == 0xD or (ord(ch)) == 0xA
                                or ch == '\f'):
                            self._set_attr(4, True)
                    j += 1
        return (((((self.__m_attrs) >> i)) & 1)) != 0

    def _set_attr(self, i: int, val: bool) -> None:
        if (val):
            self.__m_attrs |= ((1 << i))
        else:
            self.__m_attrs &= (~((1 << i)))

    @property
    def is_whitespace_before(self) -> bool:
        """ Наличие пробельных символов перед """
        return self.__get_attr(1)

    @is_whitespace_before.setter
    def is_whitespace_before(self, value) -> bool:
        self._set_attr(1, value)
        return value

    @property
    def is_whitespace_after(self) -> bool:
        """ Наличие пробельных символов после """
        return self.__get_attr(2)

    @is_whitespace_after.setter
    def is_whitespace_after(self, value) -> bool:
        self._set_attr(2, value)
        return value

    @property
    def is_newline_before(self) -> bool:
        """ Элемент начинается с новой строки.
        Для 1-го элемента всегда true. """
        return self.__get_attr(3)

    @is_newline_before.setter
    def is_newline_before(self, value) -> bool:
        self._set_attr(3, value)
        return value

    @property
    def is_newline_after(self) -> bool:
        """ Элемент заканчивает строку.
        Для последнего элемента всегда true. """
        return self.__get_attr(4)

    @is_newline_after.setter
    def is_newline_after(self, value) -> bool:
        self._set_attr(4, value)
        return value

    @property
    def inner_bool(self) -> bool:
        # Это используется внутренним образом
        return self.__get_attr(5)

    @inner_bool.setter
    def inner_bool(self, value) -> bool:
        self._set_attr(5, value)
        return value

    @property
    def not_noun_phrase(self) -> bool:
        # Это используется внутренним образом
        # (признак того, что здесь не начинается именная группа, чтобы повторно не пытаться выделять)
        return self.__get_attr(6)

    @not_noun_phrase.setter
    def not_noun_phrase(self, value) -> bool:
        self._set_attr(6, value)
        return value

    @property
    def whitespaces_before_count(self) -> int:
        """ Количество пробелов перед, переход на новую строку = 10, табуляция = 5 """
        if (self.previous is None):
            return 100
        if ((self.previous.end_char + 1) == self.begin_char):
            return 0
        return self.__calc_whitespaces(self.previous.end_char + 1,
                                       self.begin_char - 1)

    @property
    def newlines_before_count(self) -> int:
        """ Количество переходов на новую строку перед """
        ch0 = chr(0)
        res = 0
        txt = self.kit.sofa.text
        for p in range(self.begin_char - 1, -1, -1):
            ch = txt[p]
            if ((ord(ch)) == 0xA):
                res += 1
            elif ((ord(ch)) == 0xD and (ord(ch0)) != 0xA):
                res += 1
            elif (ch == '\f'):
                res += 10
            elif (not Utils.isWhitespace(ch)):
                break
            ch0 = ch
        return res

    @property
    def newlines_after_count(self) -> int:
        """ Количество переходов на новую строку перед """
        ch0 = chr(0)
        res = 0
        txt = self.kit.sofa.text
        p = self.end_char + 1
        while p < len(txt):
            ch = txt[p]
            if ((ord(ch)) == 0xD):
                res += 1
            elif ((ord(ch)) == 0xA and (ord(ch0)) != 0xD):
                res += 1
            elif (ch == '\f'):
                res += 10
            elif (not Utils.isWhitespace(ch)):
                break
            ch0 = ch
            p += 1
        return res

    @property
    def whitespaces_after_count(self) -> int:
        """ Количество пробелов перед, переход на новую строку = 10, табуляция = 5 """
        if (self.next0_ is None):
            return 100
        if ((self.end_char + 1) == self.next0_.begin_char):
            return 0
        return self.__calc_whitespaces(self.end_char + 1,
                                       self.next0_.begin_char - 1)

    def __calc_whitespaces(self, p0: int, p1: int) -> int:
        if ((p0 < 0) or p0 > p1 or p1 >= len(self.kit.sofa.text)):
            return -1
        res = 0
        i = p0
        while i <= p1:
            ch = self.kit.get_text_character(i)
            if (ch == '\r' or ch == '\n'):
                res += 10
                ch1 = self.kit.get_text_character(i + 1)
                if (ch != ch1 and ((ch1 == '\r' or ch1 == '\n'))):
                    i += 1
            elif (ch == '\t'):
                res += 5
            elif (ch == '\u0007'):
                res += 100
            elif (ch == '\f'):
                res += 100
            else:
                res += 1
            i += 1
        return res

    @property
    def is_hiphen(self) -> bool:
        """ Это символ переноса """
        ch = self.kit.sofa.text[self.begin_char]
        return LanguageHelper.is_hiphen(ch)

    @property
    def is_table_control_char(self) -> bool:
        """ Это спец-символы для табличных элементов (7h, 1Eh, 1Fh) """
        ch = self.kit.sofa.text[self.begin_char]
        return (ord(ch)) == 7 or (ord(ch)) == 0x1F or (ord(ch)) == 0x1E

    @property
    def is_and(self) -> bool:
        """ Это соединительный союз И (на всех языках) """
        return False

    @property
    def is_or(self) -> bool:
        """ Это соединительный союз ИЛИ (на всех языках) """
        return False

    @property
    def is_comma(self) -> bool:
        """ Это запятая """
        return self.is_char(',')

    @property
    def is_comma_and(self) -> bool:
        """ Это запятая или союз И """
        return self.is_comma or self.is_and

    def is_char(self, ch: 'char') -> bool:
        """ Токен состоит из конкретного символа
        
        Args:
            ch('char'): проверяемый символ
        
        """
        if (self.begin_char != self.end_char):
            return False
        return self.kit.sofa.text[self.begin_char] == ch

    def is_char_of(self, chars_: str) -> bool:
        """ Токен состоит из одного символа, который есть в указанной строке
        
        Args:
            chars_(str): строка возможных символов
        
        """
        if (self.begin_char != self.end_char):
            return False
        return chars_.find(self.kit.sofa.text[self.begin_char]) >= 0

    def is_value(self, term: str, termua: str = None) -> bool:
        """ Проверка конкретного значения слова
        
        Args:
            term(str): слово (проверяется значение TextToken.Term)
            termua(str): слово для проверки на украинском языке
        
        Returns:
            bool: да-нет
        """
        return False

    @property
    def is_letters(self) -> bool:
        """ Признак того, что это буквенный текстовой токен (TextToken) """
        return False

    def get_referent(self) -> 'Referent':
        """ Получить ссылку на сущность (не null только для ReferentToken)
        
        """
        return None

    def get_referents(self) -> typing.List['Referent']:
        """ Получить список ссылок на все сущности, скрывающиеся под элементом.
        Дело в том, что одни сущности могут накрывать другие (например, адрес накроет город).
        
        """
        return None

    def get_normal_case_text(self,
                             mc: 'MorphClass' = None,
                             num: 'MorphNumber' = MorphNumber.UNDEFINED,
                             gender: 'MorphGender' = MorphGender.UNDEFINED,
                             keep_chars: bool = False) -> str:
        """ Получить связанный с токеном текст в именительном падеже
        
        Args:
            mc(MorphClass): желательная часть речи
            num(MorphNumber): желательное число
            gender(MorphGender): желательный пол
            keep_chars(bool): сохранять регистр символов (по умолчанию, всё в верхний)
        
        Returns:
            str: строка текста
        """
        return str(self)

    def get_source_text(self) -> str:
        """ Получить фрагмент исходного текста, связанный с токеном
        
        Returns:
            str: фрагмент исходного текста
        """
        len0_ = (self.end_char + 1) - self.begin_char
        if ((len0_ < 1) or (self.begin_char < 0)):
            return None
        if ((self.begin_char + len0_) > len(self.kit.sofa.text)):
            return None
        return self.kit.sofa.text[self.begin_char:self.begin_char + len0_]

    def get_morph_class_in_dictionary(self) -> 'MorphClass':
        """ Проверка, что слово есть в словаре соответствующего языка
        
        Returns:
            MorphClass: части речи, если не из словаря, то IsUndefined
        """
        return self.morph.class0_

    def _serialize(self, stream: Stream) -> None:
        from pullenti.ner.core.internal.SerializerHelper import SerializerHelper
        SerializerHelper.serialize_int(stream, self.begin_char)
        SerializerHelper.serialize_int(stream, self.end_char)
        SerializerHelper.serialize_int(stream, self.__m_attrs)
        SerializerHelper.serialize_int(stream, self.chars.value)
        if (self.__m_morph is None):
            self.__m_morph = MorphCollection()
        self.__m_morph._serialize(stream)

    def _deserialize(self, stream: Stream, kit_: 'AnalysisKit',
                     vers: int) -> None:
        from pullenti.ner.core.internal.SerializerHelper import SerializerHelper
        self.kit = kit_
        self.__m_begin_char = SerializerHelper.deserialize_int(stream)
        self.__m_end_char = SerializerHelper.deserialize_int(stream)
        self.__m_attrs = (SerializerHelper.deserialize_int(stream))
        self.chars = CharsInfo._new2561(
            SerializerHelper.deserialize_int(stream))
        self.__m_morph = MorphCollection()
        self.__m_morph._deserialize(stream)

Example #13

Show file

class TextToken(Token):
    """ Входной токен (после морфанализа)
    Текстовой токен
    """
    def __init__(self,
                 source: 'MorphToken',
                 kit_: 'AnalysisKit',
                 bchar: int = -1,
                 echar: int = -1) -> None:
        super().__init__(kit_, (bchar if bchar >= 0 else
                                (0 if source is None else source.begin_char)),
                         (echar if echar >= 0 else
                          (0 if source is None else source.end_char)))
        self.term = None
        self.lemma = None
        self.term0 = None
        self.invariant_prefix_length_of_morph_vars = 0
        self.max_length_of_morph_vars = 0
        if (source is None):
            return
        self.chars = source.char_info
        self.term = source.term
        self.lemma = (Utils.ifNotNull(source.get_lemma(), self.term))
        self.max_length_of_morph_vars = (len(self.term))
        self.morph = MorphCollection()
        if (source.word_forms is not None):
            for wf in source.word_forms:
                self.morph.add_item(wf)
                if (wf.normal_case is not None and
                    (self.max_length_of_morph_vars < len(wf.normal_case))):
                    self.max_length_of_morph_vars = (len(wf.normal_case))
                if (wf.normal_full is not None and
                    (self.max_length_of_morph_vars < len(wf.normal_full))):
                    self.max_length_of_morph_vars = (len(wf.normal_full))
        i = 0
        while i < len(self.term):
            ch = self.term[i]
            j = 0
            j = 0
            while j < self.morph.items_count:
                wf = Utils.asObjectOrNull(self.morph.get_indexer_item(j),
                                          MorphWordForm)
                if (wf.normal_case is not None):
                    if (i >= len(wf.normal_case)):
                        break
                    if (wf.normal_case[i] != ch):
                        break
                if (wf.normal_full is not None):
                    if (i >= len(wf.normal_full)):
                        break
                    if (wf.normal_full[i] != ch):
                        break
                j += 1
            if (j < self.morph.items_count):
                break
            self.invariant_prefix_length_of_morph_vars = ((i + 1))
            i += 1
        if (self.morph.language.is_undefined
                and not source.language.is_undefined):
            self.morph.language = source.language

    def __str__(self) -> str:
        res = Utils.newStringIO(self.term)
        for l_ in self.morph.items:
            print(", {0}".format(str(l_)), end="", file=res, flush=True)
        return Utils.toStringStringIO(res)

    def check_value(self, dict0_: typing.List[tuple]) -> object:
        """ Попробовать привязать словарь
        
        Args:
            dict0_(typing.List[tuple]): 
        
        """
        if (dict0_ is None):
            return None
        res = None
        wrapres2868 = RefOutArgWrapper(None)
        inoutres2869 = Utils.tryGetValue(dict0_, self.term, wrapres2868)
        res = wrapres2868.value
        if (inoutres2869):
            return res
        if (self.morph is not None):
            for it in self.morph.items:
                mf = Utils.asObjectOrNull(it, MorphWordForm)
                if (mf is not None):
                    if (mf.normal_case is not None):
                        wrapres2864 = RefOutArgWrapper(None)
                        inoutres2865 = Utils.tryGetValue(
                            dict0_, mf.normal_case, wrapres2864)
                        res = wrapres2864.value
                        if (inoutres2865):
                            return res
                    if (mf.normal_full is not None
                            and mf.normal_case != mf.normal_full):
                        wrapres2866 = RefOutArgWrapper(None)
                        inoutres2867 = Utils.tryGetValue(
                            dict0_, mf.normal_full, wrapres2866)
                        res = wrapres2866.value
                        if (inoutres2867):
                            return res
        return None

    def get_source_text(self) -> str:
        return super().get_source_text()

    def is_value(self, term_: str, termua: str = None) -> bool:
        if (termua is not None and self.morph.language.is_ua):
            if (self.is_value(termua, None)):
                return True
        if (term_ is None):
            return False
        if (self.invariant_prefix_length_of_morph_vars > len(term_)):
            return False
        if (self.max_length_of_morph_vars >= len(self.term)
                and (self.max_length_of_morph_vars < len(term_))):
            return False
        if (term_ == self.term):
            return True
        for wf in self.morph.items:
            if ((isinstance(wf, MorphWordForm)) and
                ((wf.normal_case == term_ or wf.normal_full == term_))):
                return True
        return False

    @property
    def is_and(self) -> bool:
        """ Это соединительный союз И (на всех языках) """
        if (not self.morph.class0_.is_conjunction):
            if (self.length_char == 1 and self.is_char('&')):
                return True
            return False
        val = self.term
        if (val == "И" or val == "AND" or val == "UND"):
            return True
        if (self.morph.language.is_ua):
            if (val == "І" or val == "ТА"):
                return True
        return False

    @property
    def is_or(self) -> bool:
        """ Это соединительный союз ИЛИ (на всех языках) """
        if (not self.morph.class0_.is_conjunction):
            return False
        val = self.term
        if (val == "ИЛИ" or val == "ЛИБО" or val == "OR"):
            return True
        if (self.morph.language.is_ua):
            if (val == "АБО"):
                return True
        return False

    @property
    def is_letters(self) -> bool:
        return str.isalpha(self.term[0])

    def get_morph_class_in_dictionary(self) -> 'MorphClass':
        res = MorphClass()
        for wf in self.morph.items:
            if ((isinstance(wf, MorphWordForm)) and wf.is_in_dictionary):
                res |= wf.class0_
        return res

    def get_normal_case_text(self,
                             mc: 'MorphClass' = None,
                             num: 'MorphNumber' = MorphNumber.UNDEFINED,
                             gender: 'MorphGender' = MorphGender.UNDEFINED,
                             keep_chars: bool = False) -> str:
        from pullenti.ner.core.MiscHelper import MiscHelper
        empty = True
        if (mc is not None and mc.is_preposition):
            return LanguageHelper.normalize_preposition(self.term)
        for it in self.morph.items:
            if (mc is not None and not mc.is_undefined):
                cc = (it.class0_) & mc
                if (cc.is_undefined):
                    continue
                if (cc.is_misc and not cc.is_proper and mc != it.class0_):
                    continue
            wf = Utils.asObjectOrNull(it, MorphWordForm)
            normal_full = False
            if (gender != MorphGender.UNDEFINED):
                if (((it.gender) & (gender)) == (MorphGender.UNDEFINED)):
                    if ((gender == MorphGender.MASCULINE and
                         ((it.gender != MorphGender.UNDEFINED or it.number
                           == MorphNumber.PLURAL)) and wf is not None)
                            and wf.normal_full is not None):
                        normal_full = True
                    elif (gender == MorphGender.MASCULINE
                          and it.class0_.is_personal_pronoun):
                        pass
                    else:
                        continue
            if (not it.case_.is_undefined):
                empty = False
            if (wf is not None):
                res = None
                if (num == MorphNumber.SINGULAR
                        and it.number == MorphNumber.PLURAL
                        and wf.normal_full is not None):
                    le = len(wf.normal_case)
                    if ((le == (len(wf.normal_full) + 2) and le > 4
                         and wf.normal_case[le - 2] == 'С')
                            and wf.normal_case[le - 1] == 'Я'):
                        res = wf.normal_case
                    else:
                        res = (wf.normal_full
                               if normal_full else wf.normal_full)
                else:
                    res = (wf.normal_full if normal_full else
                           (Utils.ifNotNull(wf.normal_case, self.term)))
                if (num == MorphNumber.SINGULAR and mc is not None
                        and mc == MorphClass.NOUN):
                    if (res == "ДЕТИ"):
                        res = "РЕБЕНОК"
                if (keep_chars):
                    if (self.chars.is_all_lower):
                        res = res.lower()
                    elif (self.chars.is_capital_upper):
                        res = MiscHelper.convert_first_char_upper_and_other_lower(
                            res)
                return res
        if (not empty):
            return None
        te = None
        if (num == MorphNumber.SINGULAR and mc is not None):
            bi = MorphBaseInfo._new492(MorphClass._new53(mc.value), gender,
                                       MorphNumber.SINGULAR,
                                       self.morph.language)
            vars0_ = MorphologyService.get_wordform(self.term, bi)
            if (vars0_ is not None):
                te = vars0_
        if (te is None):
            te = self.term
        if (keep_chars):
            if (self.chars.is_all_lower):
                return te.lower()
            elif (self.chars.is_capital_upper):
                return MiscHelper.convert_first_char_upper_and_other_lower(te)
        return te

    @staticmethod
    def get_source_text_tokens(begin: 'Token',
                               end: 'Token') -> typing.List['TextToken']:
        from pullenti.ner.MetaToken import MetaToken
        res = list()
        t = begin
        while t is not None and t != end.next0_ and t.end_char <= end.end_char:
            if (isinstance(t, TextToken)):
                res.append(Utils.asObjectOrNull(t, TextToken))
            elif (isinstance(t, MetaToken)):
                res.extend(
                    TextToken.get_source_text_tokens(t.begin_token,
                                                     t.end_token))
            t = t.next0_
        return res

    @property
    def is_pure_verb(self) -> bool:
        """ Признак того, что это чистый глагол """
        ret = False
        if ((self.is_value("МОЖНО", None) or self.is_value("МОЖЕТ", None)
             or self.is_value("ДОЛЖНЫЙ", None))
                or self.is_value("НУЖНО", None)):
            return True
        for it in self.morph.items:
            if ((isinstance(it, MorphWordForm)) and it.is_in_dictionary):
                if (it.class0_.is_verb and it.case_.is_undefined):
                    ret = True
                elif (not it.class0_.is_verb):
                    if (it.class0_.is_adjective
                            and it.contains_attr("к.ф.", None)):
                        pass
                    else:
                        return False
        return ret

    @property
    def is_verb_be(self) -> bool:
        """ Проверка, что это глагол типа БЫТЬ, ЯВЛЯТЬСЯ и т.п. """
        if ((self.is_value("БЫТЬ", None) or self.is_value("ЕСТЬ", None)
             or self.is_value("ЯВЛЯТЬ", None)) or self.is_value("BE", None)):
            return True
        if (self.term == "IS" or self.term == "WAS" or self.term == "BECAME"):
            return True
        if (self.term == "Є"):
            return True
        return False

    def _serialize(self, stream: Stream) -> None:
        from pullenti.ner.core.internal.SerializerHelper import SerializerHelper
        super()._serialize(stream)
        SerializerHelper.serialize_string(stream, self.term)
        SerializerHelper.serialize_string(stream, self.lemma)
        SerializerHelper.serialize_short(
            stream, self.invariant_prefix_length_of_morph_vars)
        SerializerHelper.serialize_short(stream, self.max_length_of_morph_vars)

    def _deserialize(self, stream: Stream, kit_: 'AnalysisKit',
                     vers: int) -> None:
        from pullenti.ner.core.internal.SerializerHelper import SerializerHelper
        super()._deserialize(stream, kit_, vers)
        self.term = SerializerHelper.deserialize_string(stream)
        self.lemma = SerializerHelper.deserialize_string(stream)
        self.invariant_prefix_length_of_morph_vars = SerializerHelper.deserialize_short(
            stream)
        self.max_length_of_morph_vars = SerializerHelper.deserialize_short(
            stream)

    @staticmethod
    def _new470(_arg1: 'MorphToken', _arg2: 'AnalysisKit', _arg3: int,
                _arg4: int, _arg5: str) -> 'TextToken':
        res = TextToken(_arg1, _arg2, _arg3, _arg4)
        res.term0 = _arg5
        return res

    @staticmethod
    def _new473(_arg1: 'MorphToken', _arg2: 'AnalysisKit', _arg3: int,
                _arg4: int, _arg5: 'CharsInfo', _arg6: str) -> 'TextToken':
        res = TextToken(_arg1, _arg2, _arg3, _arg4)
        res.chars = _arg5
        res.term0 = _arg6
        return res

Example #14

Show file

File: _NounPraseHelperInt.py Project: MihaJjDa/APCLtask

 def __tryParseRu(first: 'Token', typ: 'NounPhraseParseAttr',
                  max_char_pos: int) -> 'NounPhraseToken':
     if (first is None):
         return None
     items = None
     adverbs = None
     internal_noun_prase = None
     conj_before = False
     t = first
     first_pass2788 = True
     while True:
         if first_pass2788: first_pass2788 = False
         else: t = t.next0_
         if (not (t is not None)): break
         if (max_char_pos > 0 and t.begin_char > max_char_pos):
             break
         if ((t.morph.class0_.is_conjunction
              and not t.morph.class0_.is_adjective
              and not t.morph.class0_.is_pronoun)
                 and not t.morph.class0_.is_noun):
             if (conj_before):
                 break
             if ((((typ) & (NounPhraseParseAttr.CANNOTHASCOMMAAND))) !=
                 (NounPhraseParseAttr.NO)):
                 break
             if (items is not None and t.is_and):
                 conj_before = True
                 if ((t.next0_ is not None and t.next0_.isCharOf("\\/")
                      and t.next0_.next0_ is not None)
                         and t.next0_.next0_.is_or):
                     t = t.next0_.next0_
                 continue
             break
         elif (t.is_comma):
             if (conj_before or items is None):
                 break
             if ((((typ) & (NounPhraseParseAttr.CANNOTHASCOMMAAND))) !=
                 (NounPhraseParseAttr.NO)):
                 break
             mc = t.previous.getMorphClassInDictionary()
             if (mc.is_proper_surname or mc.is_proper_secname):
                 break
             conj_before = True
             continue
         elif (t.isChar('(')):
             if (items is None):
                 return None
             if ((((typ) & (NounPhraseParseAttr.IGNOREBRACKETS))) !=
                 (NounPhraseParseAttr.IGNOREBRACKETS)):
                 break
             brr = BracketHelper.tryParse(t, BracketParseAttr.NO, 100)
             if (brr is None):
                 break
             if (brr.length_char > 100):
                 break
             t = brr.end_token
             continue
         if (isinstance(t, ReferentToken)):
             if ((((typ) & (NounPhraseParseAttr.REFERENTCANBENOUN))) == (
                     NounPhraseParseAttr.NO)):
                 break
         elif (t.chars.is_latin_letter):
             break
         it = NounPhraseItem.tryParse(t, items, typ)
         if (it is None or ((not it.can_be_adj and not it.can_be_noun))):
             if ((((typ) & (NounPhraseParseAttr.PARSEADVERBS))) !=
                 (NounPhraseParseAttr.NO) and (isinstance(t, TextToken))
                     and t.morph.class0_.is_adverb):
                 if (items is None):
                     if (t.previous is not None
                             and t.previous.morph.class0_.is_preposition):
                         pass
                     else:
                         return None
                 if (adverbs is None):
                     adverbs = list()
                 adverbs.append(Utils.asObjectOrNull(t, TextToken))
                 continue
             break
         it.conj_before = conj_before
         conj_before = False
         if (not it.can_be_adj and not it.can_be_noun):
             break
         if (t.is_newline_before and t != first):
             if ((((typ) & (NounPhraseParseAttr.MULTILINES))) !=
                 (NounPhraseParseAttr.NO)):
                 pass
             elif (items is not None
                   and t.chars != items[len(items) - 1].chars):
                 if (t.chars.is_all_lower
                         and items[len(items) - 1].chars.is_capital_upper):
                     pass
                 else:
                     break
         if (items is None):
             items = list()
         else:
             it0 = items[len(items) - 1]
             if (it0.can_be_noun and it0.is_personal_pronoun):
                 if (it.is_pronoun):
                     break
                 if ((it0.begin_token.previous is not None and
                      it0.begin_token.previous.getMorphClassInDictionary(
                      ).is_verb and not it0.begin_token.previous.
                      getMorphClassInDictionary().is_adjective)
                         and not it0.begin_token.previous.
                         getMorphClassInDictionary().is_preposition):
                     if (t.morph.case_.is_nominative
                             or t.morph.case_.is_accusative):
                         pass
                     else:
                         break
                 if (it.can_be_noun and it.is_verb):
                     break
         items.append(it)
         t = it.end_token
         if (t.is_newline_after and not t.chars.is_all_lower):
             mc = t.getMorphClassInDictionary()
             if (mc.is_proper_surname):
                 break
             if (t.morph.class0_.is_proper_surname and mc.is_undefined):
                 break
     if (items is None):
         return None
     if (len(items) == 1 and items[0].can_be_adj):
         and0_ = False
         tt1 = items[0].end_token.next0_
         first_pass2789 = True
         while True:
             if first_pass2789: first_pass2789 = False
             else: tt1 = tt1.next0_
             if (not (tt1 is not None)): break
             if (tt1.is_and or tt1.is_or):
                 and0_ = True
                 break
             if (tt1.is_comma or tt1.isValue("НО", None)
                     or tt1.isValue("ТАК", None)):
                 continue
             break
         if (and0_):
             if (items[0].can_be_noun and items[0].is_personal_pronoun):
                 and0_ = False
         if (and0_):
             tt2 = tt1.next0_
             if (tt2 is not None and tt2.morph.class0_.is_preposition):
                 tt2 = tt2.next0_
             npt1 = _NounPraseHelperInt.__tryParseRu(tt2, typ, max_char_pos)
             if (npt1 is not None and len(npt1.adjectives) > 0):
                 ok1 = False
                 for av in items[0].adj_morph:
                     for v in (npt1.noun).noun_morph:
                         if (v.checkAccord(av, False)):
                             items[0].morph.addItem(av)
                             ok1 = True
                 if (ok1):
                     npt1.begin_token = items[0].begin_token
                     npt1.end_token = tt1.previous
                     npt1.adjectives.clear()
                     npt1.adjectives.append(items[0])
                     return npt1
     last1 = items[len(items) - 1]
     check = True
     for it in items:
         if (not it.can_be_adj):
             check = False
             break
         elif (it.can_be_noun and it.is_personal_pronoun):
             check = False
             break
     tt1 = last1.end_token.next0_
     if ((tt1 is not None and check and
          ((tt1.morph.class0_.is_preposition
            or tt1.morph.case_.is_instrumental)))
             and (tt1.whitespaces_before_count < 2)):
         inp = NounPhraseHelper.tryParse(
             tt1,
             Utils.valToEnum((typ) | (NounPhraseParseAttr.PARSEPREPOSITION),
                             NounPhraseParseAttr), max_char_pos)
         if (inp is not None):
             tt1 = inp.end_token.next0_
             npt1 = _NounPraseHelperInt.__tryParseRu(tt1, typ, max_char_pos)
             if (npt1 is not None):
                 ok = True
                 for it in items:
                     if (not NounPhraseItem.tryAccordAdjAndNoun(
                             it,
                             Utils.asObjectOrNull(npt1.noun,
                                                  NounPhraseItem))):
                         ok = False
                         break
                 if (ok):
                     i = 0
                     while i < len(items):
                         npt1.adjectives.insert(i, items[i])
                         i += 1
                     npt1.internal_noun = inp
                     mmm = MorphCollection(npt1.morph)
                     for it in items:
                         mmm.removeItems(it.adj_morph[0], False)
                     if (mmm.gender != MorphGender.UNDEFINED
                             or mmm.number != MorphNumber.UNDEFINED
                             or not mmm.case_.is_undefined):
                         npt1.morph = mmm
                     if (adverbs is not None):
                         if (npt1.adverbs is None):
                             npt1.adverbs = adverbs
                         else:
                             npt1.adverbs[0:0] = adverbs
                     return npt1
             if (tt1 is not None and tt1.morph.class0_.is_noun):
                 it = NounPhraseItem.tryParse(tt1, items, typ)
                 if (it is not None and it.can_be_noun):
                     internal_noun_prase = inp
                     inp.begin_token = items[0].end_token.next0_
                     items.append(it)
     ok2 = False
     if ((len(items) == 1 and
          (((typ) & (NounPhraseParseAttr.ADJECTIVECANBELAST))) !=
          (NounPhraseParseAttr.NO) and
          (items[0].whitespaces_after_count < 3))
             and not items[0].is_adverb):
         if (not items[0].can_be_adj):
             ok2 = True
         elif (items[0].is_personal_pronoun and items[0].can_be_noun):
             ok2 = True
     if (ok2):
         it = NounPhraseItem.tryParse(items[0].end_token.next0_, None, typ)
         if (it is not None and it.can_be_adj
                 and it.begin_token.chars.is_all_lower):
             ok2 = True
             if (it.is_adverb or it.is_verb):
                 ok2 = False
             if (it.is_pronoun and items[0].is_pronoun):
                 ok2 = False
                 if (it.can_be_adj_for_personal_pronoun
                         and items[0].is_personal_pronoun):
                     ok2 = True
             if (ok2 and NounPhraseItem.tryAccordAdjAndNoun(it, items[0])):
                 npt1 = _NounPraseHelperInt.__tryParseRu(
                     it.begin_token, typ, max_char_pos)
                 if (npt1 is not None and ((npt1.end_char > it.end_char
                                            or len(npt1.adjectives) > 0))):
                     pass
                 else:
                     items.insert(0, it)
     noun = None
     adj_after = None
     for i in range(len(items) - 1, -1, -1):
         if (items[i].can_be_noun):
             if (items[i].conj_before):
                 continue
             if (i > 0 and not items[i - 1].can_be_adj):
                 continue
             if (i > 0 and items[i - 1].can_be_noun):
                 if (items[i - 1].is_doubt_adjective):
                     continue
                 if (items[i - 1].is_pronoun and items[i].is_pronoun):
                     if (items[i].is_pronoun and
                             items[i - 1].can_be_adj_for_personal_pronoun):
                         pass
                     else:
                         continue
             noun = items[i]
             del items[i:i + len(items) - i]
             if (adj_after is not None):
                 items.append(adj_after)
             break
     if (noun is None):
         return None
     res = NounPhraseToken(first, noun.end_token)
     if (adverbs is not None):
         for a in adverbs:
             if (a.begin_char < noun.begin_char):
                 if (res.adverbs is None):
                     res.adverbs = list()
                 res.adverbs.append(a)
     res.noun = (noun)
     res.internal_noun = internal_noun_prase
     for v in noun.noun_morph:
         noun.morph.addItem(v)
     res.morph = noun.morph
     if (res.morph.case_.is_nominative and first.previous is not None
             and first.previous.morph.class0_.is_preposition):
         res.morph.case_ = (res.morph.case_) ^ MorphCase.NOMINATIVE
     if ((((typ) &
           (NounPhraseParseAttr.PARSEPRONOUNS))) == (NounPhraseParseAttr.NO)
             and ((res.morph.class0_.is_pronoun
                   or res.morph.class0_.is_personal_pronoun))):
         return None
     stat = None
     if (len(items) > 1):
         stat = dict()
     need_update_morph = False
     if (len(items) > 0):
         ok_list = list()
         is_num_not = False
         for vv in noun.noun_morph:
             v = vv
             i = 0
             while i < len(items):
                 ok = False
                 for av in items[i].adj_morph:
                     if (v.checkAccord(av, False)):
                         ok = True
                         if (not ((av.case_) & v.case_).is_undefined
                                 and av.case_ != v.case_):
                             v.case_ = av.case_ = (av.case_) & v.case_
                         break
                 if (not ok):
                     if (items[i].can_be_numeric_adj
                             and items[i].tryAccordVar(v)):
                         ok = True
                         v = (Utils.asObjectOrNull(v.clone(),
                                                   NounPhraseItemTextVar))
                         v.number = MorphNumber.PLURAL
                         is_num_not = True
                         v.case_ = MorphCase()
                         for a in items[i].adj_morph:
                             v.case_ = (v.case_) | a.case_
                     else:
                         break
                 i += 1
             if (i >= len(items)):
                 ok_list.append(v)
         if (len(ok_list) > 0 and
             (((len(ok_list) < res.morph.items_count) or is_num_not))):
             res.morph = MorphCollection()
             for v in ok_list:
                 res.morph.addItem(v)
             if (not is_num_not):
                 noun.morph = res.morph
     i = 0
     first_pass2790 = True
     while True:
         if first_pass2790: first_pass2790 = False
         else: i += 1
         if (not (i < len(items))): break
         for av in items[i].adj_morph:
             for v in noun.noun_morph:
                 if (v.checkAccord(av, False)):
                     if (not ((av.case_) & v.case_).is_undefined
                             and av.case_ != v.case_):
                         v.case_ = av.case_ = (av.case_) & v.case_
                         need_update_morph = True
                     items[i].morph.addItem(av)
                     if (stat is not None and len(av.normal_value) > 1):
                         last = av.normal_value[len(av.normal_value) - 1]
                         if (not last in stat):
                             stat[last] = 1
                         else:
                             stat[last] += 1
         if (items[i].is_pronoun or items[i].is_personal_pronoun):
             res.anafor = items[i].begin_token
             if ((((typ) & (NounPhraseParseAttr.PARSEPRONOUNS))) == (
                     NounPhraseParseAttr.NO)):
                 continue
         tt = Utils.asObjectOrNull(items[i].begin_token, TextToken)
         if (tt is not None and not tt.term.startswith("ВЫСШ")):
             err = False
             for wf in tt.morph.items:
                 if (wf.class0_.is_adjective):
                     if (wf.containsAttr("прев.", None)):
                         if ((((typ) &
                               (NounPhraseParseAttr.IGNOREADJBEST))) !=
                             (NounPhraseParseAttr.NO)):
                             err = True
                     if (wf.containsAttr("к.ф.", None)
                             and tt.morph.class0_.is_personal_pronoun):
                         return None
             if (err):
                 continue
         if (res.morph.case_.is_nominative):
             v = MiscHelper.getTextValueOfMetaToken(items[i],
                                                    GetTextAttr.KEEPQUOTES)
             if (not Utils.isNullOrEmpty(v)):
                 if (items[i].getNormalCaseText(
                         None, False, MorphGender.UNDEFINED, False) != v):
                     wf = NounPhraseItemTextVar(items[i].morph, None)
                     wf.normal_value = v
                     wf.class0_ = MorphClass.ADJECTIVE
                     wf.case_ = res.morph.case_
                     if (res.morph.case_.is_prepositional
                             or res.morph.gender == MorphGender.NEUTER
                             or res.morph.gender == MorphGender.FEMINIE):
                         items[i].morph.addItem(wf)
                     else:
                         items[i].morph.insertItem(0, wf)
         res.adjectives.append(items[i])
         if (items[i].end_char > res.end_char):
             res.end_token = items[i].end_token
     i = 0
     first_pass2791 = True
     while True:
         if first_pass2791: first_pass2791 = False
         else: i += 1
         if (not (i < (len(res.adjectives) - 1))): break
         if (res.adjectives[i].whitespaces_after_count > 5):
             if (res.adjectives[i].chars != res.adjectives[i + 1].chars):
                 if (not res.adjectives[i + 1].chars.is_all_lower):
                     return None
                 if (res.adjectives[i].chars.is_all_upper
                         and res.adjectives[i + 1].chars.is_capital_upper):
                     return None
                 if (res.adjectives[i].chars.is_capital_upper
                         and res.adjectives[i + 1].chars.is_all_upper):
                     return None
             if (res.adjectives[i].whitespaces_after_count > 10):
                 if (res.adjectives[i].newlines_after_count == 1):
                     if (res.adjectives[i].chars.is_capital_upper and i == 0
                             and res.adjectives[i + 1].chars.is_all_lower):
                         continue
                     if (res.adjectives[i].chars == res.adjectives[
                             i + 1].chars):
                         continue
                 return None
     if (need_update_morph):
         noun.morph = MorphCollection()
         for v in noun.noun_morph:
             noun.morph.addItem(v)
         res.morph = noun.morph
     if (len(res.adjectives) > 0):
         if (noun.begin_token.previous is not None):
             if (noun.begin_token.previous.is_comma_and):
                 if (res.adjectives[0].begin_char > noun.begin_char):
                     pass
                 else:
                     return None
         zap = 0
         and0_ = 0
         cou = 0
         last_and = False
         i = 0
         while i < (len(res.adjectives) - 1):
             te = res.adjectives[i].end_token.next0_
             if (te is None):
                 return None
             if (te.isChar('(')):
                 pass
             elif (te.is_comma):
                 zap += 1
             elif (te.is_and):
                 and0_ += 1
                 if (i == (len(res.adjectives) - 2)):
                     last_and = True
             if (not res.adjectives[i].begin_token.morph.class0_.is_pronoun
                 ):
                 cou += 1
             i += 1
         if ((zap + and0_) > 0):
             if (and0_ > 1):
                 return None
             elif (and0_ == 1 and not last_and):
                 return None
             if ((zap + and0_) != cou):
                 if (and0_ == 1):
                     pass
                 else:
                     return None
             last = Utils.asObjectOrNull(
                 res.adjectives[len(res.adjectives) - 1], NounPhraseItem)
             if (last.is_pronoun and not last_and):
                 return None
     if (stat is not None):
         for adj in items:
             if (adj.morph.items_count > 1):
                 w1 = Utils.asObjectOrNull(adj.morph.getIndexerItem(0),
                                           NounPhraseItemTextVar)
                 w2 = Utils.asObjectOrNull(adj.morph.getIndexerItem(1),
                                           NounPhraseItemTextVar)
                 if ((len(w1.normal_value) < 2)
                         or (len(w2.normal_value) < 2)):
                     break
                 l1 = w1.normal_value[len(w1.normal_value) - 1]
                 l2 = w2.normal_value[len(w2.normal_value) - 1]
                 i1 = 0
                 i2 = 0
                 wrapi1534 = RefOutArgWrapper(0)
                 Utils.tryGetValue(stat, l1, wrapi1534)
                 i1 = wrapi1534.value
                 wrapi2533 = RefOutArgWrapper(0)
                 Utils.tryGetValue(stat, l2, wrapi2533)
                 i2 = wrapi2533.value
                 if (i1 < i2):
                     adj.morph.removeItem(1)
                     adj.morph.insertItem(0, w2)
     if (res.begin_token.getMorphClassInDictionary().is_verb
             and len(items) > 0):
         if (not res.begin_token.chars.is_all_lower
                 or res.begin_token.previous is None):
             pass
         elif (res.begin_token.previous.morph.class0_.is_preposition):
             pass
         else:
             comma = False
             tt = res.begin_token.previous
             first_pass2792 = True
             while True:
                 if first_pass2792: first_pass2792 = False
                 else: tt = tt.previous
                 if (not (tt is not None)): break
                 if (tt.morph.class0_.is_adverb):
                     continue
                 if (tt.isCharOf(".;")):
                     break
                 if (tt.is_comma):
                     comma = True
                     continue
                 if (tt.isValue("НЕ", None)):
                     continue
                 if (((tt.morph.class0_.is_noun
                       or tt.morph.class0_.is_proper)) and comma):
                     for it in res.begin_token.morph.items:
                         if (it.class0_.is_verb
                                 and (isinstance(it, MorphWordForm))):
                             if (tt.morph.checkAccord(it, False)):
                                 if (res.morph.case_.is_instrumental):
                                     return None
                                 ews = Explanatory.findDerivates(
                                     (it).normal_case, True,
                                     tt.morph.language)
                                 if (ews is not None):
                                     for ew in ews:
                                         if (ew.transitive > 0):
                                             if (res.morph.case_.is_genitive
                                                 ):
                                                 return None
                                         if (ew.nexts is not None):
                                             wrapcm535 = RefOutArgWrapper(
                                                 None)
                                             inoutres536 = Utils.tryGetValue(
                                                 ew.nexts, "", wrapcm535)
                                             cm = wrapcm535.value
                                             if (inoutres536):
                                                 if (not (
                                                     (cm) & res.morph.case_
                                                 ).is_undefined):
                                                     return None
                 break
     if (res.begin_token == res.end_token):
         mc = res.begin_token.getMorphClassInDictionary()
         if (mc.is_adverb):
             if (res.begin_token.previous is not None and
                     res.begin_token.previous.morph.class0_.is_preposition):
                 pass
             elif (mc.is_noun and not mc.is_preposition
                   and not mc.is_conjunction):
                 pass
             elif (res.begin_token.isValue("ВЕСЬ", None)):
                 pass
             else:
                 return None
     return res

Example #15

Show file

File: SentItem.py Project: pullenti/PullentiPython

 def parse_near_items(t : 'Token', t1 : 'Token', lev : int, prev : typing.List['SentItem']) -> typing.List['SentItem']:
     if (lev > 100): 
         return None
     if (t is None or t.begin_char > t1.end_char): 
         return None
     res = list()
     if (isinstance(t, ReferentToken)): 
         res.append(SentItem(Utils.asObjectOrNull(t, MetaToken)))
         return res
     delim = DelimToken.try_parse(t)
     if (delim is not None): 
         res.append(SentItem(delim))
         return res
     conj = ConjunctionHelper.try_parse(t)
     if (conj is not None): 
         res.append(SentItem(conj))
         return res
     prep_ = PrepositionHelper.try_parse(t)
     t111 = (t if prep_ is None else prep_.end_token.next0_)
     if ((isinstance(t111, NumberToken)) and ((t111.morph.class0_.is_adjective and not t111.morph.class0_.is_noun))): 
         t111 = (None)
     num = (None if t111 is None else NumbersWithUnitToken.try_parse(t111, None, False, False, False, False))
     if (num is not None): 
         if (len(num.units) == 0): 
             npt1 = NounPhraseHelper.try_parse(num.end_token.next0_, SentItem.__m_npt_attrs, 0, None)
             if (npt1 is None and num.end_token.next0_ is not None and num.end_token.next0_.is_value("РАЗ", None)): 
                 npt1 = NounPhraseToken(num.end_token.next0_, num.end_token.next0_)
                 npt1.noun = MetaToken(num.end_token.next0_, num.end_token.next0_)
             if (npt1 is not None and prep_ is not None): 
                 if (npt1.noun.end_token.is_value("РАЗ", None)): 
                     npt1.morph.remove_items(prep_.next_case, False)
                 elif (((npt1.morph.case_) & prep_.next_case).is_undefined): 
                     npt1 = (None)
                 else: 
                     npt1.morph.remove_items(prep_.next_case, False)
             if ((npt1 is not None and npt1.end_token.is_value("ОНИ", None) and npt1.preposition is not None) and npt1.preposition.normal == "ИЗ"): 
                 npt1.morph = MorphCollection(num.end_token.morph)
                 npt1.preposition = (None)
                 nn = str(num)
                 si1 = SentItem(npt1)
                 if (nn == "1" and (isinstance(num.end_token, NumberToken)) and num.end_token.end_token.is_value("ОДИН", None)): 
                     a = SemAttribute._new2946(SemAttributeType.ONEOF, num.end_token.end_token.get_normal_case_text(None, MorphNumber.SINGULAR, MorphGender.UNDEFINED, False))
                     aex = SemAttributeEx._new2945(num, a)
                     si1.attrs = list()
                     si1.attrs.append(aex)
                 else: 
                     si1.quant = SemQuantity(nn, num.begin_token, num.end_token)
                 if (prep_ is not None): 
                     si1.prep = prep_.normal
                 res.append(si1)
                 return res
             if (npt1 is not None): 
                 si1 = SentItem._new2948(npt1, SemQuantity(str(num), num.begin_token, num.end_token))
                 if (prep_ is not None): 
                     si1.prep = prep_.normal
                 if (npt1.end_token.is_value("РАЗ", None)): 
                     si1.typ = SentItemType.FORMULA
                 if (((npt1.morph.number) & (MorphNumber.PLURAL)) == (MorphNumber.UNDEFINED) and si1.quant.spelling != "1"): 
                     ok = False
                     if (si1.quant.spelling.endswith("1")): 
                         ok = True
                     elif (si1.typ == SentItemType.FORMULA): 
                         ok = True
                     elif (si1.quant.spelling.endswith("2") and npt1.morph.case_.is_genitive): 
                         ok = True
                     elif (si1.quant.spelling.endswith("3") and npt1.morph.case_.is_genitive): 
                         ok = True
                     elif (si1.quant.spelling.endswith("4") and npt1.morph.case_.is_genitive): 
                         ok = True
                     if (ok): 
                         npt1.morph = MorphCollection()
                         npt1.morph.number = MorphNumber.PLURAL
                 res.append(si1)
                 return res
         num.begin_token = t
         num.morph = MorphCollection(num.end_token.morph)
         si = SentItem(num)
         if (prep_ is not None): 
             si.prep = prep_.normal
         res.append(si)
         if (si.prep == "НА"): 
             aa = AdverbToken.try_parse(si.end_token.next0_)
             if (aa is not None and ((aa.typ == SemAttributeType.LESS or aa.typ == SemAttributeType.GREAT))): 
                 si.add_attr(aa)
                 si.end_token = aa.end_token
         return res
     mc = t.get_morph_class_in_dictionary()
     adv = AdverbToken.try_parse(t)
     npt = NounPhraseHelper.try_parse(t, SentItem.__m_npt_attrs, 0, None)
     if (npt is not None and (isinstance(npt.end_token, TextToken)) and npt.end_token.term == "БЫЛИ"): 
         npt = (None)
     if (npt is not None and adv is not None): 
         if (adv.end_char > npt.end_char): 
             npt = (None)
         elif (adv.end_char == npt.end_char): 
             res.append(SentItem(npt))
             res.append(SentItem(adv))
             return res
     if (npt is not None and len(npt.adjectives) == 0): 
         if (npt.end_token.is_value("КОТОРЫЙ", None) and t.previous is not None and t.previous.is_comma_and): 
             res1 = SentItem.__parse_subsent(npt, t1, lev + 1, prev)
             if (res1 is not None): 
                 return res1
         if (npt.end_token.is_value("СКОЛЬКО", None)): 
             tt1 = npt.end_token.next0_
             if (tt1 is not None and tt1.is_value("ВСЕГО", None)): 
                 tt1 = tt1.next0_
             npt1 = NounPhraseHelper.try_parse(tt1, NounPhraseParseAttr.NO, 0, None)
             if (npt1 is not None and not npt1.morph.case_.is_undefined and prep_ is not None): 
                 if (((prep_.next_case) & npt1.morph.case_).is_undefined): 
                     npt1 = (None)
                 else: 
                     npt1.morph.remove_items(prep_.next_case, False)
             if (npt1 is not None): 
                 npt1.begin_token = npt.begin_token
                 npt1.preposition = npt.preposition
                 npt1.adjectives.append(MetaToken(npt.end_token, npt.end_token))
                 npt = npt1
         if (npt.end_token.morph.class0_.is_adjective): 
             if (VerbPhraseHelper.try_parse(t, True, False, False) is not None): 
                 npt = (None)
     vrb = None
     if (npt is not None and len(npt.adjectives) > 0): 
         vrb = VerbPhraseHelper.try_parse(t, True, False, False)
         if (vrb is not None and vrb.first_verb.is_participle): 
             npt = (None)
     elif (adv is None or npt is not None): 
         vrb = VerbPhraseHelper.try_parse(t, True, False, False)
     if (npt is not None): 
         res.append(SentItem(npt))
     if (vrb is not None and not vrb.first_verb.is_participle and not vrb.first_verb.is_dee_participle): 
         vars0_ = list()
         for wf in vrb.first_verb.morph.items: 
             if (wf.class0_.is_verb and (isinstance(wf, MorphWordForm)) and wf.is_in_dictionary): 
                 vars0_.append(Utils.asObjectOrNull(wf, MorphWordForm))
         if (len(vars0_) < 2): 
             res.append(SentItem(vrb))
         else: 
             vrb.first_verb.verb_morph = vars0_[0]
             res.append(SentItem(vrb))
             i = 1
             while i < len(vars0_): 
                 vrb = VerbPhraseHelper.try_parse(t, False, False, False)
                 if (vrb is None): 
                     break
                 vrb.first_verb.verb_morph = vars0_[i]
                 res.append(SentItem(vrb))
                 i += 1
             if (vars0_[0].misc.mood == MorphMood.IMPERATIVE and vars0_[1].misc.mood != MorphMood.IMPERATIVE): 
                 rr = res[0]
                 res[0] = res[1]
                 res[1] = rr
         return res
     if (vrb is not None): 
         res1 = SentItem.__parse_participles(vrb, t1, lev + 1)
         if (res1 is not None): 
             res.extend(res1)
     if (len(res) > 0): 
         return res
     if (adv is not None): 
         if (adv.typ == SemAttributeType.OTHER): 
             npt1 = NounPhraseHelper.try_parse(adv.end_token.next0_, SentItem.__m_npt_attrs, 0, None)
             if (npt1 is not None and npt1.end_token.is_value("ОНИ", None) and npt1.preposition is not None): 
                 si1 = SentItem(npt1)
                 a = SemAttribute._new2946(SemAttributeType.OTHER, adv.end_token.get_normal_case_text(None, MorphNumber.UNDEFINED, MorphGender.UNDEFINED, False))
                 aex = SemAttributeEx._new2945(num, a)
                 si1.attrs = list()
                 si1.attrs.append(aex)
                 if (prep_ is not None): 
                     si1.prep = prep_.normal
                 res.append(si1)
                 return res
             for i in range(len(prev) - 1, -1, -1):
                 if (prev[i].attrs is not None): 
                     for a in prev[i].attrs: 
                         if (a.attr.typ == SemAttributeType.ONEOF): 
                             si1 = SentItem(prev[i].source)
                             aa = SemAttribute._new2946(SemAttributeType.OTHER, adv.end_token.get_normal_case_text(None, MorphNumber.UNDEFINED, MorphGender.UNDEFINED, False))
                             aex = SemAttributeEx._new2945(adv, aa)
                             si1.attrs = list()
                             si1.attrs.append(aex)
                             if (prep_ is not None): 
                                 si1.prep = prep_.normal
                             si1.begin_token = adv.begin_token
                             si1.end_token = adv.end_token
                             res.append(si1)
                             return res
         res.append(SentItem(adv))
         return res
     if (mc.is_adjective): 
         npt = NounPhraseToken._new2953(t, t, MorphCollection(t.morph))
         npt.noun = MetaToken(t, t)
         res.append(SentItem(npt))
         return res
     return None

Example #16

Show file

 def try_attach_territory(
         li: typing.List['TerrItemToken'],
         ad: 'AnalyzerData',
         attach_always: bool = False,
         cits: typing.List['CityItemToken'] = None,
         exists: typing.List['GeoReferent'] = None) -> 'ReferentToken':
     if (li is None or len(li) == 0):
         return None
     ex_obj = None
     new_name = None
     adj_list = list()
     noun = None
     add_noun = None
     rt = TerrAttachHelper.__try_attach_moscowao(li, ad)
     if (rt is not None):
         return rt
     if (li[0].termin_item is not None
             and li[0].termin_item.canonic_text == "ТЕРРИТОРИЯ"):
         res2 = TerrAttachHelper.__try_attach_pure_terr(li, ad)
         return res2
     if (len(li) == 2):
         if (li[0].rzd is not None and li[1].rzd_dir is not None):
             rzd = GeoReferent()
             rzd._add_name(li[1].rzd_dir)
             rzd._add_typ_ter(li[0].kit.base_language)
             rzd.add_slot(GeoReferent.ATTR_REF, li[0].rzd.referent, False,
                          0)
             rzd.add_ext_referent(li[0].rzd)
             return ReferentToken(rzd, li[0].begin_token, li[1].end_token)
         if (li[1].rzd is not None and li[0].rzd_dir is not None):
             rzd = GeoReferent()
             rzd._add_name(li[0].rzd_dir)
             rzd._add_typ_ter(li[0].kit.base_language)
             rzd.add_slot(GeoReferent.ATTR_REF, li[1].rzd.referent, False,
                          0)
             rzd.add_ext_referent(li[1].rzd)
             return ReferentToken(rzd, li[0].begin_token, li[1].end_token)
     can_be_city_before = False
     adj_terr_before = False
     if (cits is not None):
         if (cits[0].typ == CityItemToken.ItemType.CITY):
             can_be_city_before = True
         elif (cits[0].typ == CityItemToken.ItemType.NOUN
               and len(cits) > 1):
             can_be_city_before = True
     k = 0
     k = 0
     while k < len(li):
         if (li[k].onto_item is not None):
             if (ex_obj is not None or new_name is not None):
                 break
             if (noun is not None):
                 if (k == 1):
                     if (noun.termin_item.canonic_text == "РАЙОН"
                             or noun.termin_item.canonic_text == "ОБЛАСТЬ"
                             or noun.termin_item.canonic_text == "СОЮЗ"):
                         if (isinstance(li[k].onto_item.referent,
                                        GeoReferent)):
                             if (li[k].onto_item.referent.is_state):
                                 break
                         ok = False
                         tt = li[k].end_token.next0_
                         if (tt is None):
                             ok = True
                         elif (tt.is_char_of(",.")):
                             ok = True
                         if (not ok):
                             ok = MiscLocationHelper.check_geo_object_before(
                                 li[0].begin_token)
                         if (not ok):
                             adr = AddressItemToken.try_parse(
                                 tt, None, False, False, None)
                             if (adr is not None):
                                 if (adr.typ ==
                                         AddressItemToken.ItemType.STREET):
                                     ok = True
                         if (not ok):
                             break
                     if (li[k].onto_item is not None):
                         if (noun.begin_token.is_value("МО", None)
                                 or noun.begin_token.is_value("ЛО", None)):
                             return None
             ex_obj = li[k]
         elif (li[k].termin_item is not None):
             if (noun is not None):
                 break
             if (li[k].termin_item.is_always_prefix and k > 0):
                 break
             if (k > 0 and li[k].is_doubt):
                 if (li[k].begin_token == li[k].end_token
                         and li[k].begin_token.is_value("ЗАО", None)):
                     break
             if (li[k].termin_item.is_adjective
                     or li[k].is_geo_in_dictionary):
                 adj_list.append(li[k])
             else:
                 if (ex_obj is not None):
                     geo_ = Utils.asObjectOrNull(ex_obj.onto_item.referent,
                                                 GeoReferent)
                     if (geo_ is None):
                         break
                     if (ex_obj.is_adjective and
                         ((li[k].termin_item.canonic_text == "СОЮЗ" or
                           li[k].termin_item.canonic_text == "ФЕДЕРАЦИЯ"))):
                         str0_ = str(ex_obj.onto_item)
                         if (not li[k].termin_item.canonic_text in str0_):
                             return None
                     if (li[k].termin_item.canonic_text == "РАЙОН"
                             or li[k].termin_item.canonic_text == "ОКРУГ"
                             or li[k].termin_item.canonic_text == "КРАЙ"):
                         tmp = io.StringIO()
                         for s in geo_.slots:
                             if (s.type_name == GeoReferent.ATTR_TYPE):
                                 print("{0};".format(s.value),
                                       end="",
                                       file=tmp,
                                       flush=True)
                         if (not li[k].termin_item.canonic_text
                                 in Utils.toStringStringIO(tmp).upper()):
                             if (k != 1 or new_name is not None):
                                 break
                             new_name = li[0]
                             new_name.is_adjective = True
                             new_name.onto_item = (None)
                             ex_obj = (None)
                 noun = li[k]
                 if (k == 0):
                     tt = TerrItemToken.try_parse(
                         li[k].begin_token.previous, None, True, False,
                         None)
                     if (tt is not None and tt.morph.class0_.is_adjective):
                         adj_terr_before = True
         else:
             if (ex_obj is not None):
                 break
             if (new_name is not None):
                 break
             new_name = li[k]
         k += 1
     name = None
     alt_name = None
     full_name = None
     morph_ = None
     if (ex_obj is not None):
         if (ex_obj.is_adjective and not ex_obj.morph.language.is_en
                 and noun is None):
             if (attach_always and ex_obj.end_token.next0_ is not None):
                 npt = NounPhraseHelper.try_parse(ex_obj.begin_token,
                                                  NounPhraseParseAttr.NO, 0,
                                                  None)
                 if (ex_obj.end_token.next0_.is_comma_and):
                     pass
                 elif (npt is None):
                     pass
                 else:
                     str0_ = StreetItemToken.try_parse(
                         ex_obj.end_token.next0_, None, False, None, False)
                     if (str0_ is not None):
                         if (str0_.typ == StreetItemType.NOUN
                                 and str0_.end_token == npt.end_token):
                             return None
             else:
                 cit = CityItemToken.try_parse(ex_obj.end_token.next0_,
                                               None, False, None)
                 if (cit is not None
                         and ((cit.typ == CityItemToken.ItemType.NOUN
                               or cit.typ == CityItemToken.ItemType.CITY))):
                     npt = NounPhraseHelper.try_parse(
                         ex_obj.begin_token, NounPhraseParseAttr.NO, 0,
                         None)
                     if (npt is not None
                             and npt.end_token == cit.end_token):
                         pass
                     else:
                         return None
                 elif (ex_obj.begin_token.is_value("ПОДНЕБЕСНЫЙ", None)):
                     pass
                 else:
                     return None
         if (noun is None and ex_obj.can_be_city):
             cit0 = CityItemToken.try_parse_back(
                 ex_obj.begin_token.previous)
             if (cit0 is not None
                     and cit0.typ != CityItemToken.ItemType.PROPERNAME):
                 return None
         if (ex_obj.is_doubt and noun is None):
             ok2 = False
             if (TerrAttachHelper.__can_be_geo_after(
                     ex_obj.end_token.next0_)):
                 ok2 = True
             elif (not ex_obj.can_be_surname and not ex_obj.can_be_city):
                 if ((ex_obj.end_token.next0_ is not None
                      and ex_obj.end_token.next0_.is_char(')')
                      and ex_obj.begin_token.previous is not None)
                         and ex_obj.begin_token.previous.is_char('(')):
                     ok2 = True
                 elif (ex_obj.chars.is_latin_letter
                       and ex_obj.begin_token.previous is not None):
                     if (ex_obj.begin_token.previous.is_value("IN", None)):
                         ok2 = True
                     elif (ex_obj.begin_token.previous.is_value(
                             "THE", None) and
                           ex_obj.begin_token.previous.previous is not None
                           and
                           ex_obj.begin_token.previous.previous.is_value(
                               "IN", None)):
                         ok2 = True
             if (not ok2):
                 cit0 = CityItemToken.try_parse_back(
                     ex_obj.begin_token.previous)
                 if (cit0 is not None
                         and cit0.typ != CityItemToken.ItemType.PROPERNAME):
                     pass
                 elif (MiscLocationHelper.check_geo_object_before(
                         ex_obj.begin_token.previous)):
                     pass
                 else:
                     return None
         name = ex_obj.onto_item.canonic_text
         morph_ = ex_obj.morph
     elif (new_name is not None):
         if (noun is None):
             return None
         j = 1
         while j < k:
             if (li[j].is_newline_before and not li[0].is_newline_before):
                 if (BracketHelper.can_be_start_of_sequence(
                         li[j].begin_token, False, False)):
                     pass
                 else:
                     return None
             j += 1
         morph_ = noun.morph
         if (new_name.is_adjective):
             if (noun.termin_item.acronym == "АО"):
                 if (noun.begin_token != noun.end_token):
                     return None
                 if (new_name.morph.gender != MorphGender.FEMINIE):
                     return None
             geo_before = None
             tt0 = li[0].begin_token.previous
             if (tt0 is not None and tt0.is_comma_and):
                 tt0 = tt0.previous
             if (not li[0].is_newline_before and tt0 is not None):
                 geo_before = (Utils.asObjectOrNull(tt0.get_referent(),
                                                    GeoReferent))
             if (Utils.indexOfList(li, noun, 0) < Utils.indexOfList(
                     li, new_name, 0)):
                 if (noun.termin_item.is_state):
                     return None
                 if (new_name.can_be_surname and geo_before is None):
                     if (((noun.morph.case_)
                          & new_name.morph.case_).is_undefined):
                         return None
                 if (MiscHelper.is_exists_in_dictionary(
                         new_name.begin_token, new_name.end_token,
                     (MorphClass.ADJECTIVE) | MorphClass.PRONOUN
                         | MorphClass.VERB)):
                     if (noun.begin_token != new_name.begin_token):
                         if (geo_before is None):
                             if (len(li) == 2 and
                                     TerrAttachHelper.__can_be_geo_after(
                                         li[1].end_token.next0_)):
                                 pass
                             elif (len(li) == 3
                                   and li[2].termin_item is not None
                                   and TerrAttachHelper.__can_be_geo_after(
                                       li[2].end_token.next0_)):
                                 pass
                             elif (new_name.is_geo_in_dictionary):
                                 pass
                             elif (new_name.end_token.is_newline_after):
                                 pass
                             else:
                                 return None
                 npt = NounPhraseHelper.try_parse(
                     new_name.end_token, NounPhraseParseAttr.PARSEPRONOUNS,
                     0, None)
                 if (npt is not None
                         and npt.end_token != new_name.end_token):
                     if (len(li) >= 3 and li[2].termin_item is not None
                             and npt.end_token == li[2].end_token):
                         add_noun = li[2]
                     else:
                         return None
                 rtp = new_name.kit.process_referent(
                     "PERSON", new_name.begin_token)
                 if (rtp is not None):
                     return None
                 name = ProperNameHelper.get_name_ex(
                     new_name.begin_token, new_name.end_token,
                     MorphClass.ADJECTIVE, MorphCase.UNDEFINED,
                     noun.termin_item.gender, False, False)
             else:
                 ok = False
                 if (((k + 1) < len(li)) and li[k].termin_item is None
                         and li[k + 1].termin_item is not None):
                     ok = True
                 elif ((k < len(li)) and li[k].onto_item is not None):
                     ok = True
                 elif (k == len(li) and not new_name.is_adj_in_dictionary):
                     ok = True
                 elif (MiscLocationHelper.check_geo_object_before(
                         li[0].begin_token) or can_be_city_before):
                     ok = True
                 elif (MiscLocationHelper.check_geo_object_after(
                         li[k - 1].end_token, False)):
                     ok = True
                 elif (len(li) == 3 and k == 2):
                     cit = CityItemToken.try_parse(li[2].begin_token, None,
                                                   False, None)
                     if (cit is not None):
                         if (cit.typ == CityItemToken.ItemType.CITY
                                 or cit.typ == CityItemToken.ItemType.NOUN):
                             ok = True
                 elif (len(li) == 2):
                     ok = TerrAttachHelper.__can_be_geo_after(
                         li[len(li) - 1].end_token.next0_)
                 if (not ok and not li[0].is_newline_before
                         and not li[0].chars.is_all_lower):
                     rt00 = li[0].kit.process_referent(
                         "PERSONPROPERTY", li[0].begin_token.previous)
                     if (rt00 is not None):
                         ok = True
                 if (noun.termin_item is not None
                         and noun.termin_item.is_strong
                         and new_name.is_adjective):
                     ok = True
                 if (noun.is_doubt and len(adj_list) == 0
                         and geo_before is None):
                     return None
                 name = ProperNameHelper.get_name_ex(
                     new_name.begin_token, new_name.end_token,
                     MorphClass.ADJECTIVE, MorphCase.UNDEFINED,
                     noun.termin_item.gender, False, False)
                 if (not ok and not attach_always):
                     if (MiscHelper.is_exists_in_dictionary(
                             new_name.begin_token, new_name.end_token,
                         (MorphClass.ADJECTIVE) | MorphClass.PRONOUN
                             | MorphClass.VERB)):
                         if (exists is not None):
                             for e0_ in exists:
                                 if (e0_.find_slot(GeoReferent.ATTR_NAME,
                                                   name, True) is not None):
                                     ok = True
                                     break
                         if (not ok):
                             return None
                 full_name = "{0} {1}".format(
                     ProperNameHelper.get_name_ex(li[0].begin_token,
                                                  noun.begin_token.previous,
                                                  MorphClass.ADJECTIVE,
                                                  MorphCase.UNDEFINED,
                                                  noun.termin_item.gender,
                                                  False, False),
                     noun.termin_item.canonic_text)
         else:
             if (not attach_always or
                 ((noun.termin_item is not None
                   and noun.termin_item.canonic_text == "ФЕДЕРАЦИЯ"))):
                 is_latin = noun.chars.is_latin_letter and new_name.chars.is_latin_letter
                 if (Utils.indexOfList(li, noun, 0) > Utils.indexOfList(
                         li, new_name, 0)):
                     if (not is_latin):
                         return None
                 if (not new_name.is_district_name
                         and not BracketHelper.can_be_start_of_sequence(
                             new_name.begin_token, False, False)):
                     if (len(adj_list) == 0
                             and MiscHelper.is_exists_in_dictionary(
                                 new_name.begin_token, new_name.end_token,
                                 (MorphClass.NOUN) | MorphClass.PRONOUN)):
                         if (len(li) == 2 and noun.is_city_region
                                 and (noun.whitespaces_after_count < 2)):
                             pass
                         else:
                             return None
                     if (not is_latin):
                         if ((noun.termin_item.is_region
                              and not attach_always and
                              ((not adj_terr_before or new_name.is_doubt)))
                                 and not noun.is_city_region and
                                 not noun.termin_item.is_specific_prefix):
                             if (not MiscLocationHelper.
                                     check_geo_object_before(
                                         noun.begin_token)):
                                 if (not noun.is_doubt and noun.begin_token
                                         != noun.end_token):
                                     pass
                                 elif ((noun.termin_item.is_always_prefix
                                        and len(li) == 2 and li[0] == noun)
                                       and li[1] == new_name):
                                     pass
                                 else:
                                     return None
                         if (noun.is_doubt and len(adj_list) == 0):
                             if (noun.termin_item.acronym == "МО"
                                     or noun.termin_item.acronym == "ЛО"):
                                 if (k == (len(li) - 1)
                                         and li[k].termin_item is not None):
                                     add_noun = li[k]
                                     k += 1
                                 elif (len(li) == 2 and noun == li[0]
                                       and str(new_name).endswith("совет")):
                                     pass
                                 else:
                                     return None
                             else:
                                 return None
                         pers = new_name.kit.process_referent(
                             "PERSON", new_name.begin_token)
                         if (pers is not None):
                             return None
             name = MiscHelper.get_text_value(new_name.begin_token,
                                              new_name.end_token,
                                              GetTextAttr.NO)
             if (new_name.begin_token != new_name.end_token):
                 ttt = new_name.begin_token.next0_
                 while ttt is not None and ttt.end_char <= new_name.end_char:
                     if (ttt.chars.is_letter):
                         ty = TerrItemToken.try_parse(
                             ttt, None, False, False, None)
                         if ((ty is not None and ty.termin_item is not None
                              and noun is not None)
                                 and ((noun.termin_item.canonic_text
                                       in ty.termin_item.canonic_text
                                       or ty.termin_item.canonic_text
                                       in noun.termin_item.canonic_text))):
                             name = MiscHelper.get_text_value(
                                 new_name.begin_token, ttt.previous,
                                 GetTextAttr.NO)
                             break
                     ttt = ttt.next0_
             if (len(adj_list) > 0):
                 npt = NounPhraseHelper.try_parse(adj_list[0].begin_token,
                                                  NounPhraseParseAttr.NO, 0,
                                                  None)
                 if (npt is not None and npt.end_token == noun.end_token):
                     alt_name = "{0} {1}".format(
                         npt.get_normal_case_text(None,
                                                  MorphNumber.UNDEFINED,
                                                  MorphGender.UNDEFINED,
                                                  False), name)
     else:
         if ((len(li) == 1 and noun is not None
              and noun.end_token.next0_ is not None) and (isinstance(
                  noun.end_token.next0_.get_referent(), GeoReferent))):
             g = Utils.asObjectOrNull(noun.end_token.next0_.get_referent(),
                                      GeoReferent)
             if (noun.termin_item is not None):
                 tyy = noun.termin_item.canonic_text.lower()
                 ooo = False
                 if (g.find_slot(GeoReferent.ATTR_TYPE, tyy, True)
                         is not None):
                     ooo = True
                 elif (tyy.endswith("район") and g.find_slot(
                         GeoReferent.ATTR_TYPE, "район", True) is not None):
                     ooo = True
                 if (ooo):
                     return ReferentToken._new734(g, noun.begin_token,
                                                  noun.end_token.next0_,
                                                  noun.begin_token.morph)
         if ((len(li) == 1 and noun == li[0]
              and li[0].termin_item is not None)
                 and TerrItemToken.try_parse(li[0].end_token.next0_, None,
                                             True, False, None) is None and
                 TerrItemToken.try_parse(li[0].begin_token.previous, None,
                                         True, False, None) is None):
             if (li[0].morph.number == MorphNumber.PLURAL):
                 return None
             cou = 0
             str0_ = li[0].termin_item.canonic_text.lower()
             tt = li[0].begin_token.previous
             first_pass3158 = True
             while True:
                 if first_pass3158: first_pass3158 = False
                 else: tt = tt.previous
                 if (not (tt is not None)): break
                 if (tt.is_newline_after):
                     cou += 10
                 else:
                     cou += 1
                 if (cou > 500):
                     break
                 g = Utils.asObjectOrNull(tt.get_referent(), GeoReferent)
                 if (g is None):
                     continue
                 ok = True
                 cou = 0
                 tt = li[0].end_token.next0_
                 first_pass3159 = True
                 while True:
                     if first_pass3159: first_pass3159 = False
                     else: tt = tt.next0_
                     if (not (tt is not None)): break
                     if (tt.is_newline_before):
                         cou += 10
                     else:
                         cou += 1
                     if (cou > 500):
                         break
                     tee = TerrItemToken.try_parse(tt, None, True, False,
                                                   None)
                     if (tee is None):
                         continue
                     ok = False
                     break
                 if (ok):
                     ii = 0
                     while g is not None and (ii < 3):
                         if (g.find_slot(GeoReferent.ATTR_TYPE, str0_, True)
                                 is not None):
                             return ReferentToken._new734(
                                 g, li[0].begin_token, li[0].end_token,
                                 noun.begin_token.morph)
                         g = g.higher
                         ii += 1
                 break
         return None
     ter = None
     if (ex_obj is not None and (isinstance(ex_obj.tag, GeoReferent))):
         ter = (Utils.asObjectOrNull(ex_obj.tag, GeoReferent))
     else:
         ter = GeoReferent()
         if (ex_obj is not None):
             geo_ = Utils.asObjectOrNull(ex_obj.onto_item.referent,
                                         GeoReferent)
             if (geo_ is not None and not geo_.is_city):
                 ter._merge_slots2(geo_, li[0].kit.base_language)
             else:
                 ter._add_name(name)
             if (noun is None and ex_obj.can_be_city):
                 ter._add_typ_city(li[0].kit.base_language)
             else:
                 pass
         elif (new_name is not None):
             ter._add_name(name)
             if (alt_name is not None):
                 ter._add_name(alt_name)
         if (noun is not None):
             if (noun.termin_item.canonic_text == "АО"):
                 ter._add_typ(
                     ("АВТОНОМНИЙ ОКРУГ" if li[0].kit.base_language.is_ua
                      else "АВТОНОМНЫЙ ОКРУГ"))
             elif (noun.termin_item.canonic_text == "МУНИЦИПАЛЬНОЕ СОБРАНИЕ"
                   or noun.termin_item.canonic_text
                   == "МУНІЦИПАЛЬНЕ ЗБОРИ"):
                 ter._add_typ(("МУНІЦИПАЛЬНЕ УТВОРЕННЯ"
                               if li[0].kit.base_language.is_ua else
                               "МУНИЦИПАЛЬНОЕ ОБРАЗОВАНИЕ"))
             elif (noun.termin_item.acronym == "МО"
                   and add_noun is not None):
                 ter._add_typ(add_noun.termin_item.canonic_text)
             else:
                 if (noun.termin_item.canonic_text == "СОЮЗ"
                         and ex_obj is not None
                         and ex_obj.end_char > noun.end_char):
                     return ReferentToken._new734(ter, ex_obj.begin_token,
                                                  ex_obj.end_token,
                                                  ex_obj.morph)
                 ter._add_typ(noun.termin_item.canonic_text)
                 if (noun.termin_item.is_region and ter.is_state):
                     ter._add_typ_reg(li[0].kit.base_language)
         if (ter.is_state and ter.is_region):
             for a in adj_list:
                 if (a.termin_item.is_region):
                     ter._add_typ_reg(li[0].kit.base_language)
                     break
         if (ter.is_state):
             if (full_name is not None):
                 ter._add_name(full_name)
     res = ReferentToken(ter, li[0].begin_token, li[k - 1].end_token)
     if (noun is not None and noun.morph.class0_.is_noun):
         res.morph = noun.morph
     else:
         res.morph = MorphCollection()
         ii = 0
         while ii < k:
             for v in li[ii].morph.items:
                 bi = MorphBaseInfo()
                 bi.copy_from(v)
                 if (noun is not None):
                     if (bi.class0_.is_adjective):
                         bi.class0_ = MorphClass.NOUN
                 res.morph.add_item(bi)
             ii += 1
     if (li[0].termin_item is not None
             and li[0].termin_item.is_specific_prefix):
         res.begin_token = li[0].end_token.next0_
     if (add_noun is not None and add_noun.end_char > res.end_char):
         res.end_token = add_noun.end_token
     if ((isinstance(res.begin_token.previous, TextToken))
             and (res.whitespaces_before_count < 2)):
         tt = Utils.asObjectOrNull(res.begin_token.previous, TextToken)
         if (tt.term == "АР"):
             for ty in ter.typs:
                 if ("республика" in ty or "республіка" in ty):
                     res.begin_token = tt
                     break
     return res

Example #17

Show file

File: Termin.py Project: MihaJjDa/APCLtask

 def tryAttach(self, t0: 'Token') -> 'TerminToken':
     from pullenti.ner.Token import Token
     from pullenti.ner.TextToken import TextToken
     from pullenti.ner.MetaToken import MetaToken
     from pullenti.ner.MorphCollection import MorphCollection
     from pullenti.ner.core.TerminToken import TerminToken
     t1 = Utils.asObjectOrNull(t0, TextToken)
     if (t1 is None):
         return None
     if (t1.term != self.parts[0].value):
         if (len(self.parts) != 1
                 or not t1.isValue(self.parts[0].value, None)):
             return None
     if (self.tail is None):
         te = t1
         point = False
         if (te.next0_ is not None):
             if (te.next0_.isChar('.')):
                 te = te.next0_
                 point = True
             elif (len(self.parts) > 1):
                 while te.next0_ is not None:
                     if (te.next0_.isCharOf("\\/.")
                             or te.next0_.is_hiphen):
                         te = te.next0_
                         point = True
                     else:
                         break
         if (te is None):
             return None
         tt = te.next0_
         i = 1
         while i < len(self.parts):
             if (tt is not None and tt.whitespaces_before_count > 2):
                 return None
             if (tt is not None
                     and ((tt.is_hiphen or tt.isCharOf("\\/.")))):
                 tt = tt.next0_
             elif (not point and self.parts[i - 1].has_delim):
                 return None
             if (tt is None):
                 return None
             if (isinstance(tt, TextToken)):
                 tet = Utils.asObjectOrNull(tt, TextToken)
                 if (tet.term != self.parts[i].value):
                     if (not tet.isValue(self.parts[i].value, None)):
                         return None
             elif (isinstance(tt, MetaToken)):
                 mt = Utils.asObjectOrNull(tt, MetaToken)
                 if (mt.begin_token != mt.end_token):
                     return None
                 if (not mt.begin_token.isValue(self.parts[i].value,
                                                None)):
                     return None
             te = tt
             if (tt.next0_ is not None and
                 ((tt.next0_.isCharOf(".\\/") or tt.next0_.is_hiphen))):
                 tt = tt.next0_
                 point = True
                 if (tt is not None):
                     te = tt
             else:
                 point = False
             tt = tt.next0_
             i += 1
         res = TerminToken._new603(t0, te, t0 == te)
         if (point):
             res.morph = MorphCollection()
         return res
     t1 = (Utils.asObjectOrNull(t1.next0_, TextToken))
     if (t1 is None or not t1.isCharOf("-\\/")):
         return None
     t1 = (Utils.asObjectOrNull(t1.next0_, TextToken))
     if (t1 is None):
         return None
     if (t1.term[0] != self.tail[0]):
         return None
     return TerminToken(t0, t1)

Example #18

Show file

File: Token.py Project: MihaJjDa/APCLtask

class Token:
    """ Базовый класс для всех токенов """
    def __init__(self, kit_: 'AnalysisKit', begin: int, end: int) -> None:
        self.kit = None
        self.begin_char = 0
        self.end_char = 0
        self.tag = None
        self._m_previous = None
        self._m_next = None
        self.__m_morph = None
        self.chars = None
        self.__m_attrs = 0
        self.kit = kit_
        self.begin_char = begin
        self.end_char = end

    @property
    def length_char(self) -> int:
        """ Длина в исходных символах """
        return (self.end_char - self.begin_char) + 1

    @property
    def previous(self) -> 'Token':
        """ Предыдущий токен """
        return self._m_previous

    @previous.setter
    def previous(self, value) -> 'Token':
        self._m_previous = value
        if (value is not None):
            value._m_next = self
        self.__m_attrs = (0)
        return value

    @property
    def next0_(self) -> 'Token':
        """ Следующий токен """
        return self._m_next

    @next0_.setter
    def next0_(self, value) -> 'Token':
        self._m_next = value
        if (value is not None):
            value._m_previous = self
        self.__m_attrs = (0)
        return value

    @property
    def morph(self) -> 'MorphCollection':
        """ Морфологическая информация """
        if (self.__m_morph is None):
            self.__m_morph = MorphCollection()
        return self.__m_morph

    @morph.setter
    def morph(self, value) -> 'MorphCollection':
        self.__m_morph = value
        return value

    def __str__(self) -> str:
        return self.kit.sofa.text[self.begin_char:self.begin_char +
                                  (self.end_char + 1) - self.begin_char]

    def __getAttr(self, i: int) -> bool:
        if ((((self.__m_attrs) & 1)) == 0):
            self.__m_attrs = (1)
            if (self._m_previous is None):
                self._setAttr(1, True)
                self._setAttr(3, True)
            else:
                j = self._m_previous.end_char + 1
                while j < self.begin_char:
                    ch = self.kit.sofa.text[j]
                    if (Utils.isWhitespace((ch))):
                        self._setAttr(1, True)
                        if ((ord(ch)) == 0xD or (ord(ch)) == 0xA
                                or ch == '\f'):
                            self._setAttr(3, True)
                    j += 1
            if (self._m_next is None):
                self._setAttr(2, True)
                self._setAttr(4, True)
            else:
                j = self.end_char + 1
                while j < self._m_next.begin_char:
                    ch = self.kit.sofa.text[j]
                    if (Utils.isWhitespace(ch)):
                        self._setAttr(2, True)
                        if ((ord(ch)) == 0xD or (ord(ch)) == 0xA
                                or ch == '\f'):
                            self._setAttr(4, True)
                    j += 1
        return (((((self.__m_attrs) >> i)) & 1)) != 0

    def _setAttr(self, i: int, val: bool) -> None:
        if (val):
            self.__m_attrs |= ((1 << i))
        else:
            self.__m_attrs &= (~((1 << i)))

    @property
    def is_whitespace_before(self) -> bool:
        """ Наличие пробельных символов перед """
        return self.__getAttr(1)

    @is_whitespace_before.setter
    def is_whitespace_before(self, value) -> bool:
        self._setAttr(1, value)
        return value

    @property
    def is_whitespace_after(self) -> bool:
        """ Наличие пробельных символов после """
        return self.__getAttr(2)

    @is_whitespace_after.setter
    def is_whitespace_after(self, value) -> bool:
        self._setAttr(2, value)
        return value

    @property
    def is_newline_before(self) -> bool:
        """ Элемент начинается с новой строки.
         Для 1-го элемента всегда true. """
        return self.__getAttr(3)

    @is_newline_before.setter
    def is_newline_before(self, value) -> bool:
        self._setAttr(3, value)
        return value

    @property
    def is_newline_after(self) -> bool:
        """ Элемент заканчивает строку.
         Для последнего элемента всегда true. """
        return self.__getAttr(4)

    @is_newline_after.setter
    def is_newline_after(self, value) -> bool:
        self._setAttr(4, value)
        return value

    @property
    def inner_bool(self) -> bool:
        """ Это используется внутренним образом """
        return self.__getAttr(5)

    @inner_bool.setter
    def inner_bool(self, value) -> bool:
        self._setAttr(5, value)
        return value

    @property
    def not_noun_phrase(self) -> bool:
        """ Это используется внутренним образом 
         (признак того, что здесь не начинается именная группа, чтобы повторно не пытаться выделять) """
        return self.__getAttr(6)

    @not_noun_phrase.setter
    def not_noun_phrase(self, value) -> bool:
        self._setAttr(6, value)
        return value

    @property
    def whitespaces_before_count(self) -> int:
        """ Количество пробелов перед, переход на новую строку = 10, табуляция = 5 """
        if (self.previous is None):
            return 100
        if ((self.previous.end_char + 1) == self.begin_char):
            return 0
        return self.__calcWhitespaces(self.previous.end_char + 1,
                                      self.begin_char - 1)

    @property
    def newlines_before_count(self) -> int:
        """ Количество переходов на новую строку перед """
        ch0 = chr(0)
        res = 0
        txt = self.kit.sofa.text
        for p in range(self.begin_char - 1, -1, -1):
            ch = txt[p]
            if ((ord(ch)) == 0xA):
                res += 1
            elif ((ord(ch)) == 0xD and (ord(ch0)) != 0xA):
                res += 1
            elif (ch == '\f'):
                res += 10
            elif (not Utils.isWhitespace(ch)):
                break
            ch0 = ch
        return res

    @property
    def newlines_after_count(self) -> int:
        """ Количество переходов на новую строку перед """
        ch0 = chr(0)
        res = 0
        txt = self.kit.sofa.text
        p = self.end_char + 1
        while p < len(txt):
            ch = txt[p]
            if ((ord(ch)) == 0xD):
                res += 1
            elif ((ord(ch)) == 0xA and (ord(ch0)) != 0xD):
                res += 1
            elif (ch == '\f'):
                res += 10
            elif (not Utils.isWhitespace(ch)):
                break
            ch0 = ch
            p += 1
        return res

    @property
    def whitespaces_after_count(self) -> int:
        """ Количество пробелов перед, переход на новую строку = 10, табуляция = 5 """
        if (self.next0_ is None):
            return 100
        if ((self.end_char + 1) == self.next0_.begin_char):
            return 0
        return self.__calcWhitespaces(self.end_char + 1,
                                      self.next0_.begin_char - 1)

    def __calcWhitespaces(self, p0: int, p1: int) -> int:
        if ((p0 < 0) or p0 > p1 or p1 >= len(self.kit.sofa.text)):
            return -1
        res = 0
        i = p0
        while i <= p1:
            ch = self.kit.getTextCharacter(i)
            if (ch == '\r' or ch == '\n'):
                res += 10
                ch1 = self.kit.getTextCharacter(i + 1)
                if (ch != ch1 and ((ch1 == '\r' or ch1 == '\n'))):
                    i += 1
            elif (ch == '\t'):
                res += 5
            elif (ch == '\u0007'):
                res += 100
            elif (ch == '\f'):
                res += 100
            else:
                res += 1
            i += 1
        return res

    @property
    def is_hiphen(self) -> bool:
        """ Это символ переноса """
        ch = self.kit.sofa.text[self.begin_char]
        return LanguageHelper.isHiphen(ch)

    @property
    def is_table_control_char(self) -> bool:
        """ Это спец-символы для табличных элементов (7h, 1Eh, 1Fh) """
        ch = self.kit.sofa.text[self.begin_char]
        return (ord(ch)) == 7 or (ord(ch)) == 0x1F or (ord(ch)) == 0x1E

    @property
    def is_and(self) -> bool:
        """ Это соединительный союз И (на всех языках) """
        return False

    @property
    def is_or(self) -> bool:
        """ Это соединительный союз ИЛИ (на всех языках) """
        return False

    @property
    def is_comma(self) -> bool:
        """ Это запятая """
        return self.isChar(',')

    @property
    def is_comma_and(self) -> bool:
        """ Это запятая или союз И """
        return self.is_comma or self.is_and

    def isChar(self, ch: 'char') -> bool:
        """ Токен состоит из символа
        
        Args:
            ch('char'): проверяемый символ
        
        """
        if (self.begin_char != self.end_char):
            return False
        return self.kit.sofa.text[self.begin_char] == ch

    def isCharOf(self, chars_: str) -> bool:
        """ Токен состоит из одного символа, который есть в указанной строке
        
        Args:
            chars_(str): строка возможных символов
        
        """
        if (self.begin_char != self.end_char):
            return False
        return chars_.find(self.kit.sofa.text[self.begin_char]) >= 0

    def isValue(self, term: str, termua: str = None) -> bool:
        return False

    @property
    def is_letters(self) -> bool:
        """ Признак того, что это буквенный текстовой токен (TextToken) """
        return False

    @property
    def is_number(self) -> bool:
        """ Это число (в различных вариантах задания) """
        return False

    @property
    def is_referent(self) -> bool:
        """ Это сущность (Referent) """
        return False

    def getReferent(self) -> 'Referent':
        """ Ссылка на сущность (для ReferentToken) """
        return None

    def getReferents(self) -> typing.List['Referent']:
        """ Получить список ссылок на все сущности, скрывающиеся под элементом
         (дело в том, что одни сущности могут поглощать дркгие, например, адрес поглотит город)
        
        """
        return None

    def getNormalCaseText(self,
                          mc: 'MorphClass' = None,
                          single_number: bool = False,
                          gender: 'MorphGender' = MorphGender.UNDEFINED,
                          keep_chars: bool = False) -> str:
        """ Получить связанный с токеном текст в именительном падеже
        
        Args:
            mc(MorphClass): 
            single_number(bool): переводить ли в единственное число
        
        """
        return str(self)

    def getSourceText(self) -> str:
        """ Получить чистый фрагмент исходного текста
        
        """
        len0_ = (self.end_char + 1) - self.begin_char
        if ((len0_ < 1) or (self.begin_char < 0)):
            return None
        if ((self.begin_char + len0_) > len(self.kit.sofa.text)):
            return None
        return self.kit.sofa.text[self.begin_char:self.begin_char + len0_]

    def getMorphClassInDictionary(self) -> 'MorphClass':
        """ Проверка, что это текстовый токен и есть в словаре соотв. тип
        
        Args:
            cla: 
        
        """
        return self.morph.class0_

    def _serialize(self, stream: io.IOBase) -> None:
        from pullenti.ner.core.internal.SerializerHelper import SerializerHelper
        SerializerHelper.serializeInt(stream, self.begin_char)
        SerializerHelper.serializeInt(stream, self.end_char)
        SerializerHelper.serializeInt(stream, self.__m_attrs)
        SerializerHelper.serializeInt(stream, self.chars.value)
        if (self.__m_morph is None):
            self.__m_morph = MorphCollection()
        self.__m_morph._serialize(stream)

    def _deserialize(self, stream: io.IOBase, kit_: 'AnalysisKit',
                     vers: int) -> None:
        from pullenti.ner.core.internal.SerializerHelper import SerializerHelper
        self.kit = kit_
        self.begin_char = SerializerHelper.deserializeInt(stream)
        self.end_char = SerializerHelper.deserializeInt(stream)
        self.__m_attrs = (SerializerHelper.deserializeInt(stream))
        self.chars = CharsInfo._new2656(
            SerializerHelper.deserializeInt(stream))
        self.__m_morph = MorphCollection()
        self.__m_morph._deserialize(stream)

Example #19

Show file

 def _createReferentToken(p : 'PersonReferent', begin : 'Token', end : 'Token', morph_ : 'MorphCollection', attrs : typing.List['PersonAttrToken'], ad : 'PersonAnalyzerData', for_attribute : bool, after_be_predicate : bool) -> 'ReferentToken':
     from pullenti.ner.person.internal.PersonIdentityToken import PersonIdentityToken
     if (p is None): 
         return None
     has_prefix = False
     if (attrs is not None): 
         for a in attrs: 
             if (a.typ == PersonAttrTerminType.BESTREGARDS): 
                 has_prefix = True
             else: 
                 if (a.begin_char < begin.begin_char): 
                     begin = a.begin_token
                 if (a.typ != PersonAttrTerminType.PREFIX): 
                     if (a.age is not None): 
                         p.addSlot(PersonReferent.ATTR_AGE, a.age, False, 0)
                     if (a.prop_ref is None): 
                         p.addSlot(PersonReferent.ATTR_ATTR, a.value, False, 0)
                     else: 
                         p.addSlot(PersonReferent.ATTR_ATTR, a, False, 0)
                 elif (a.gender == MorphGender.FEMINIE and not p.is_female): 
                     p.is_female = True
                 elif (a.gender == MorphGender.MASCULINE and not p.is_male): 
                     p.is_male = True
     elif ((isinstance(begin.previous, TextToken)) and (begin.whitespaces_before_count < 3)): 
         if ((begin.previous).term == "ИП"): 
             a = PersonAttrToken(begin.previous, begin.previous)
             a.prop_ref = PersonPropertyReferent()
             a.prop_ref.name = "индивидуальный предприниматель"
             p.addSlot(PersonReferent.ATTR_ATTR, a, False, 0)
             begin = begin.previous
     m0 = MorphCollection()
     for it in morph_.items: 
         bi = MorphBaseInfo(it)
         bi.number = MorphNumber.SINGULAR
         if (bi.gender == MorphGender.UNDEFINED): 
             if (p.is_male and not p.is_female): 
                 bi.gender = MorphGender.MASCULINE
             if (not p.is_male and p.is_female): 
                 bi.gender = MorphGender.FEMINIE
         m0.addItem(bi)
     morph_ = m0
     if ((attrs is not None and len(attrs) > 0 and not attrs[0].morph.case_.is_undefined) and morph_.case_.is_undefined): 
         morph_.case_ = attrs[0].morph.case_
         if (attrs[0].morph.number == MorphNumber.SINGULAR): 
             morph_.number = MorphNumber.SINGULAR
         if (p.is_male and not p.is_female): 
             morph_.gender = MorphGender.MASCULINE
         elif (p.is_female): 
             morph_.gender = MorphGender.FEMINIE
     if (begin.previous is not None): 
         ttt = begin.previous
         if (ttt.isValue("ИМЕНИ", "ІМЕНІ")): 
             for_attribute = True
         else: 
             if (ttt.isChar('.') and ttt.previous is not None): 
                 ttt = ttt.previous
             if (ttt.whitespaces_after_count < 3): 
                 if (ttt.isValue("ИМ", "ІМ")): 
                     for_attribute = True
     if (for_attribute): 
         return ReferentToken._new2329(p, begin, end, morph_, p._m_person_identity_typ)
     if ((begin.previous is not None and begin.previous.is_comma_and and (isinstance(begin.previous.previous, ReferentToken))) and (isinstance(begin.previous.previous.getReferent(), PersonReferent))): 
         rt00 = Utils.asObjectOrNull(begin.previous.previous, ReferentToken)
         ttt = rt00
         while ttt is not None: 
             if (ttt.previous is None or not ((isinstance(ttt.previous.previous, ReferentToken)))): 
                 break
             if (not ttt.previous.is_comma_and or not ((isinstance(ttt.previous.previous.getReferent(), PersonReferent)))): 
                 break
             rt00 = (Utils.asObjectOrNull(ttt.previous.previous, ReferentToken))
             ttt = (rt00)
         if (isinstance(rt00.begin_token.getReferent(), PersonPropertyReferent)): 
             ok = False
             if ((rt00.begin_token).end_token.next0_ is not None and (rt00.begin_token).end_token.next0_.isChar(':')): 
                 ok = True
             elif (rt00.begin_token.morph.number == MorphNumber.PLURAL): 
                 ok = True
             if (ok): 
                 p.addSlot(PersonReferent.ATTR_ATTR, rt00.begin_token.getReferent(), False, 0)
     if (ad is not None): 
         if (ad.overflow_level > 10): 
             return ReferentToken._new2329(p, begin, end, morph_, p._m_person_identity_typ)
         ad.overflow_level += 1
     attrs1 = None
     has_position = False
     open_br = False
     t = end.next0_
     first_pass3095 = True
     while True:
         if first_pass3095: first_pass3095 = False
         else: t = t.next0_
         if (not (t is not None)): break
         if (t.is_table_control_char): 
             break
         if (t.is_newline_before): 
             if (t.newlines_before_count > 2): 
                 break
             if (attrs1 is not None and len(attrs1) > 0): 
                 break
             ml = MailLine.parse(t, 0)
             if (ml is not None and ml.typ == MailLine.Types.FROM): 
                 break
             if (t.chars.is_capital_upper): 
                 attr1 = PersonAttrToken.tryAttach(t, (None if ad is None else ad.local_ontology), PersonAttrToken.PersonAttrAttachAttrs.NO)
                 ok1 = False
                 if (attr1 is not None): 
                     if (has_prefix or attr1.is_newline_after or ((attr1.end_token.next0_ is not None and attr1.end_token.next0_.is_table_control_char))): 
                         ok1 = True
                     else: 
                         tt2 = t.next0_
                         while tt2 is not None and tt2.end_char <= attr1.end_char: 
                             if (tt2.is_whitespace_before): 
                                 ok1 = True
                             tt2 = tt2.next0_
                 else: 
                     ttt = PersonHelper.__correctTailAttributes(p, t)
                     if (ttt is not None and ttt != t): 
                         t = ttt
                         end = t
                         continue
                 if (not ok1): 
                     break
         if (t.is_hiphen or t.isCharOf("_>|")): 
             continue
         if (t.isValue("МОДЕЛЬ", None)): 
             break
         tt = PersonHelper.__correctTailAttributes(p, t)
         if (tt != t and tt is not None): 
             t = tt
             end = t
             continue
         is_be = False
         if (t.isChar('(') and t == end.next0_): 
             open_br = True
             t = t.next0_
             if (t is None): 
                 break
             pit1 = PersonItemToken.tryAttach(t, None, PersonItemToken.ParseAttr.NO, None)
             if ((pit1 is not None and t.chars.is_capital_upper and pit1.end_token.next0_ is not None) and (isinstance(t, TextToken)) and pit1.end_token.next0_.isChar(')')): 
                 if (pit1.lastname is not None): 
                     inf = MorphBaseInfo._new2321(MorphCase.NOMINATIVE)
                     if (p.is_male): 
                         inf.gender = Utils.valToEnum((inf.gender) | (MorphGender.MASCULINE), MorphGender)
                     if (p.is_female): 
                         inf.gender = Utils.valToEnum((inf.gender) | (MorphGender.FEMINIE), MorphGender)
                     sur = PersonIdentityToken.createLastname(pit1, inf)
                     if (sur is not None): 
                         p._addFioIdentity(sur, None, None)
                         t = pit1.end_token.next0_
                         end = t
                         continue
         elif (t.is_comma): 
             t = t.next0_
             if ((isinstance(t, TextToken)) and (t).isValue("WHO", None)): 
                 continue
         elif ((isinstance(t, TextToken)) and (t).is_verb_be): 
             t = t.next0_
         elif (t.is_and and t.is_whitespace_after and not t.is_newline_after): 
             if (t == end.next0_): 
                 break
             t = t.next0_
         elif (t.is_hiphen and t == end.next0_): 
             t = t.next0_
         elif (t.isChar('.') and t == end.next0_ and has_prefix): 
             t = t.next0_
         ttt2 = PersonHelper.createNickname(p, t)
         if (ttt2 is not None): 
             end = ttt2
             t = end
             continue
         if (t is None): 
             break
         attr = None
         attr = PersonAttrToken.tryAttach(t, (None if ad is None else ad.local_ontology), PersonAttrToken.PersonAttrAttachAttrs.NO)
         if (attr is None): 
             if ((t is not None and t.getReferent() is not None and t.getReferent().type_name == "GEO") and attrs1 is not None and open_br): 
                 continue
             if ((t.chars.is_capital_upper and open_br and t.next0_ is not None) and t.next0_.isChar(')')): 
                 if (p.findSlot(PersonReferent.ATTR_LASTNAME, None, True) is None): 
                     p.addSlot(PersonReferent.ATTR_LASTNAME, t.getSourceText().upper(), False, 0)
                     t = t.next0_
                     end = t
             if (t is not None and t.isValue("КОТОРЫЙ", None) and t.morph.number == MorphNumber.SINGULAR): 
                 if (not p.is_female and t.morph.gender == MorphGender.FEMINIE): 
                     p.is_female = True
                     p._correctData()
                 elif (not p.is_male and t.morph.gender == MorphGender.MASCULINE): 
                     p.is_male = True
                     p._correctData()
             break
         if (attr.morph.number == MorphNumber.PLURAL): 
             break
         if (attr.typ == PersonAttrTerminType.BESTREGARDS): 
             break
         if (attr.is_doubt): 
             if (has_prefix): 
                 pass
             elif (t.is_newline_before and attr.is_newline_after): 
                 pass
             elif (t.previous is not None and ((t.previous.is_hiphen or t.previous.isChar(':')))): 
                 pass
             else: 
                 break
         if (not morph_.case_.is_undefined and not attr.morph.case_.is_undefined): 
             if (((morph_.case_) & attr.morph.case_).is_undefined and not is_be): 
                 break
         if (open_br): 
             if (PersonAnalyzer._tryAttachPerson(t, ad, False, 0, True) is not None): 
                 break
         if (attrs1 is None): 
             if (t.previous.is_comma and t.previous == end.next0_): 
                 ttt = attr.end_token.next0_
                 if (ttt is not None): 
                     if (ttt.morph.class0_.is_verb): 
                         if (MiscHelper.canBeStartOfSentence(begin)): 
                             pass
                         else: 
                             break
             attrs1 = list()
         attrs1.append(attr)
         if (attr.typ == PersonAttrTerminType.POSITION or attr.typ == PersonAttrTerminType.KING): 
             if (not is_be): 
                 has_position = True
         elif (attr.typ != PersonAttrTerminType.PREFIX): 
             if (attr.typ == PersonAttrTerminType.OTHER and attr.age is not None): 
                 pass
             else: 
                 attrs1 = (None)
                 break
         t = attr.end_token
     if (attrs1 is not None and has_position and attrs is not None): 
         te1 = attrs[len(attrs) - 1].end_token.next0_
         te2 = attrs1[0].begin_token
         if (te1.whitespaces_after_count > te2.whitespaces_before_count and (te2.whitespaces_before_count < 2)): 
             pass
         elif (attrs1[0].age is not None): 
             pass
         elif (((te1.is_hiphen or te1.isChar(':'))) and not attrs1[0].is_newline_before and ((te2.previous.is_comma or te2.previous == end))): 
             pass
         else: 
             for a in attrs: 
                 if (a.typ == PersonAttrTerminType.POSITION): 
                     te = attrs1[len(attrs1) - 1].end_token
                     if (te.next0_ is not None): 
                         if (not te.next0_.isChar('.')): 
                             attrs1 = (None)
                             break
     if (attrs1 is not None and not has_prefix): 
         attr = attrs1[len(attrs1) - 1]
         ok = False
         if (attr.end_token.next0_ is not None and attr.end_token.next0_.chars.is_capital_upper): 
             ok = True
         else: 
             rt = PersonAnalyzer._tryAttachPerson(attr.begin_token, ad, False, -1, False)
             if (rt is not None and (isinstance(rt.referent, PersonReferent))): 
                 ok = True
         if (ok): 
             if (attr.begin_token.whitespaces_before_count > attr.end_token.whitespaces_after_count): 
                 attrs1 = (None)
             elif (attr.begin_token.whitespaces_before_count == attr.end_token.whitespaces_after_count): 
                 rt1 = PersonAnalyzer._tryAttachPerson(attr.begin_token, ad, False, -1, False)
                 if (rt1 is not None): 
                     attrs1 = (None)
     if (attrs1 is not None): 
         for a in attrs1: 
             if (a.typ != PersonAttrTerminType.PREFIX): 
                 if (a.age is not None): 
                     p.addSlot(PersonReferent.ATTR_AGE, a.age, True, 0)
                 elif (a.prop_ref is None): 
                     p.addSlot(PersonReferent.ATTR_ATTR, a.value, False, 0)
                 else: 
                     p.addSlot(PersonReferent.ATTR_ATTR, a, False, 0)
                 end = a.end_token
                 if (a.gender != MorphGender.UNDEFINED and not p.is_female and not p.is_male): 
                     if (a.gender == MorphGender.MASCULINE and not p.is_male): 
                         p.is_male = True
                         p._correctData()
                     elif (a.gender == MorphGender.FEMINIE and not p.is_female): 
                         p.is_female = True
                         p._correctData()
         if (open_br): 
             if (end.next0_ is not None and end.next0_.isChar(')')): 
                 end = end.next0_
     crlf_cou = 0
     t = end.next0_
     first_pass3096 = True
     while True:
         if first_pass3096: first_pass3096 = False
         else: t = t.next0_
         if (not (t is not None)): break
         if (t.is_table_control_char): 
             break
         if (t.is_newline_before): 
             ml = MailLine.parse(t, 0)
             if (ml is not None and ml.typ == MailLine.Types.FROM): 
                 break
             crlf_cou += 1
         if (t.isCharOf(":,(") or t.is_hiphen): 
             continue
         if (t.isChar('.') and t == end.next0_): 
             continue
         r = t.getReferent()
         if (r is not None): 
             if (r.type_name == "PHONE" or r.type_name == "URI" or r.type_name == "ADDRESS"): 
                 ty = r.getStringValue("SCHEME")
                 if (r.type_name == "URI"): 
                     if ((ty != "mailto" and ty != "skype" and ty != "ICQ") and ty != "http"): 
                         break
                 p._addContact(r)
                 end = t
                 crlf_cou = 0
                 continue
         if (isinstance(r, PersonIdentityReferent)): 
             p.addSlot(PersonReferent.ATTR_IDDOC, r, False, 0)
             end = t
             crlf_cou = 0
             continue
         if (r is not None and r.type_name == "ORGANIZATION"): 
             if (t.next0_ is not None and t.next0_.morph.class0_.is_verb): 
                 break
             if (begin.previous is not None and begin.previous.morph.class0_.is_verb): 
                 break
             if (t.whitespaces_after_count == 1): 
                 break
             exist = False
             for s in p.slots: 
                 if (s.type_name == PersonReferent.ATTR_ATTR and (isinstance(s.value, PersonPropertyReferent))): 
                     pr = Utils.asObjectOrNull(s.value, PersonPropertyReferent)
                     if (pr.findSlot(PersonPropertyReferent.ATTR_REF, r, True) is not None): 
                         exist = True
                         break
                 elif (s.type_name == PersonReferent.ATTR_ATTR and (isinstance(s.value, PersonAttrToken))): 
                     pr = Utils.asObjectOrNull(s.value, PersonAttrToken)
                     if (pr.referent.findSlot(PersonPropertyReferent.ATTR_REF, r, True) is not None): 
                         exist = True
                         break
             if (not exist): 
                 pat = PersonAttrToken(t, t)
                 pat.prop_ref = PersonPropertyReferent._new2291("сотрудник")
                 pat.prop_ref.addSlot(PersonPropertyReferent.ATTR_REF, r, False, 0)
                 p.addSlot(PersonReferent.ATTR_ATTR, pat, False, 0)
             continue
         if (r is not None): 
             break
         if (not has_prefix or crlf_cou >= 2): 
             break
         rt = t.kit.processReferent("PERSON", t)
         if (rt is not None): 
             break
     if (ad is not None): 
         ad.overflow_level -= 1
     return ReferentToken._new2329(p, begin, end, morph_, p._m_person_identity_typ)

Example #20

Show file

 def __try_parse_ru(t: 'Token', can_be_partition: bool,
                    can_be_adj_partition: bool,
                    force_parse: bool) -> 'VerbPhraseToken':
     res = None
     t0 = t
     not0_ = None
     has_verb = False
     verb_be_before = False
     prep = None
     first_pass3070 = True
     while True:
         if first_pass3070: first_pass3070 = False
         else: t = t.next0_
         if (not (t is not None)): break
         if (not (isinstance(t, TextToken))):
             break
         tt = Utils.asObjectOrNull(t, TextToken)
         is_participle = False
         if (tt.term == "НЕ"):
             not0_ = t
             continue
         ty = 0
         norm = None
         mc = tt.get_morph_class_in_dictionary()
         if (tt.term == "НЕТ"):
             if (has_verb):
                 break
             ty = 1
         elif (tt.term == "ДОПУСТИМО"):
             ty = 3
         elif (mc.is_adverb and not mc.is_verb):
             ty = 2
         elif (tt.is_pure_verb or tt.is_verb_be):
             ty = 1
             if (has_verb):
                 if (not tt.morph.contains_attr("инф.", None)):
                     if (verb_be_before):
                         pass
                     else:
                         break
         elif (mc.is_verb):
             if (mc.is_preposition or mc.is_misc or mc.is_pronoun):
                 pass
             elif (mc.is_noun):
                 if (tt.term == "СТАЛИ" or tt.term == "СТЕКЛО"
                         or tt.term == "БЫЛИ"):
                     ty = 1
                 elif (not tt.chars.is_all_lower
                       and not MiscHelper.can_be_start_of_sentence(tt)):
                     ty = 1
                 elif (mc.is_adjective and can_be_partition):
                     ty = 1
                 elif (force_parse):
                     ty = 1
             elif (mc.is_proper):
                 if (tt.chars.is_all_lower):
                     ty = 1
             else:
                 ty = 1
             if (mc.is_adjective):
                 is_participle = True
             if (not tt.morph.case_.is_undefined):
                 is_participle = True
             if (not can_be_partition and is_participle):
                 break
             if (has_verb):
                 if (tt.morph.contains_attr("инф.", None)):
                     pass
                 elif (not is_participle):
                     pass
                 else:
                     break
         elif ((mc.is_adjective and tt.morph.contains_attr("к.ф.", None)
                and tt.term.endswith("О")) and NounPhraseHelper.try_parse(
                    tt, NounPhraseParseAttr.NO, 0, None) is None):
             ty = 2
         elif (mc.is_adjective
               and ((can_be_partition or can_be_adj_partition))):
             if (tt.morph.contains_attr("к.ф.", None)
                     and not can_be_adj_partition):
                 break
             norm = tt.get_normal_case_text(MorphClass.ADJECTIVE,
                                            MorphNumber.SINGULAR,
                                            MorphGender.MASCULINE, False)
             if (norm.endswith("ЙШИЙ")):
                 pass
             else:
                 grs = DerivateService.find_derivates(norm, True, None)
                 if (grs is not None and len(grs) > 0):
                     hverb = False
                     hpart = False
                     for gr in grs:
                         for w in gr.words:
                             if (w.class0_.is_adjective
                                     and w.class0_.is_verb):
                                 if (w.spelling == norm):
                                     hpart = True
                             elif (w.class0_.is_verb):
                                 hverb = True
                     if (hpart and hverb):
                         ty = 3
                     elif (can_be_adj_partition):
                         ty = 3
                     if (ty != 3 and not Utils.isNullOrEmpty(grs[0].prefix)
                             and norm.startswith(grs[0].prefix)):
                         hverb = False
                         hpart = False
                         norm1 = norm[len(grs[0].prefix):]
                         grs = DerivateService.find_derivates(
                             norm1, True, None)
                         if (grs is not None and len(grs) > 0):
                             for gr in grs:
                                 for w in gr.words:
                                     if (w.class0_.is_adjective
                                             and w.class0_.is_verb):
                                         if (w.spelling == norm1):
                                             hpart = True
                                     elif (w.class0_.is_verb):
                                         hverb = True
                         if (hpart and hverb):
                             ty = 3
         if (ty == 0 and t == t0 and can_be_partition):
             prep = PrepositionHelper.try_parse(t)
             if (prep is not None):
                 t = prep.end_token
                 continue
         if (ty == 0):
             break
         if (res is None):
             res = VerbPhraseToken(t0, t)
         res.end_token = t
         it = VerbPhraseItemToken._new603(t, t, MorphCollection(t.morph))
         if (not0_ is not None):
             it.begin_token = not0_
             it.not0_ = True
             not0_ = (None)
         it.is_adverb = ty == 2
         if (prep is not None and not t.morph.case_.is_undefined
                 and len(res.items) == 0):
             if (((prep.next_case) & t.morph.case_).is_undefined):
                 return None
             it.morph.remove_items(prep.next_case, False)
             res.preposition = prep
         if (norm is None):
             norm = t.get_normal_case_text(
                 (MorphClass.ADJECTIVE if ty == 3 else
                  (MorphClass.ADVERB if ty == 2 else MorphClass.VERB)),
                 MorphNumber.SINGULAR, MorphGender.MASCULINE, False)
             if (ty == 1 and not tt.morph.case_.is_undefined):
                 mi = MorphWordForm._new604(MorphCase.NOMINATIVE,
                                            MorphNumber.SINGULAR,
                                            MorphGender.MASCULINE)
                 for mit in tt.morph.items:
                     if (isinstance(mit, MorphWordForm)):
                         mi.misc = mit.misc
                         break
                 nnn = MorphologyService.get_wordform("КК" + t.term, mi)
                 if (nnn is not None):
                     norm = nnn[2:]
         it.normal = norm
         res.items.append(it)
         if (not has_verb and ((ty == 1 or ty == 3))):
             res.morph = it.morph
             has_verb = True
         if (ty == 1 or ty == 3):
             if (ty == 1 and tt.is_verb_be):
                 verb_be_before = True
             else:
                 verb_be_before = False
     if (not has_verb):
         return None
     for i in range(len(res.items) - 1, 0, -1):
         if (res.items[i].is_adverb):
             del res.items[i]
             res.end_token = res.items[i - 1].end_token
         else:
             break
     return res