Python BracketHelper.try_parse 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: pullenti.ner.core.BracketHelper

클래스/타입: BracketHelper

메소드/함수: try_parse

hotexamples.com에서의 예제들: 30

Python BracketHelper.try_parse - 30개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 pullenti.ner.core.BracketHelper.BracketHelper.try_parse에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

tryParse(30)

try_parse(30)

canBeStartOfSequence(23)

can_be_start_of_sequence(21)

canBeEndOfSequence(13)

isBracket(13)

can_be_end_of_sequence(12)

is_bracket(11)

예제 #1

파일 보기

파일: ContractHelper.py 프로젝트: pullenti/PullentiPython

 def __calc_newline_between_coef(fr1: 'FragToken', fr2: 'FragToken') -> int:
     if (fr1.newlines_after_count > 1):
         return 1
     tt = fr1.begin_token
     while tt is not None and tt.end_char <= fr1.end_char:
         if (BracketHelper.can_be_start_of_sequence(tt, False, False)):
             br = BracketHelper.try_parse(tt,
                                          BracketParseAttr.CANBEMANYLINES,
                                          100)
             if (br is not None and br.end_char >= fr2.begin_char):
                 return -1
         tt = tt.next0_
     t = fr1.end_token
     if (t.is_char_of(":;.")):
         return 1
     if ((isinstance(t, TextToken))
             and ((t.morph.class0_.is_preposition
                   or t.morph.class0_.is_conjunction))):
         return -1
     t1 = fr2.begin_token
     if (isinstance(t1, TextToken)):
         if (t1.chars.is_all_lower):
             return -1
         if (BracketHelper.can_be_start_of_sequence(t1, False, False)):
             if (t.chars.is_all_lower):
                 return -1
     elif (isinstance(t1, NumberToken)):
         if (t.chars.is_all_lower):
             return -1
     if (t.chars.is_all_lower):
         if (fr2.end_token.is_char(';')):
             return -1
     return 0

예제 #2

파일 보기

파일: WeaponItemToken.py 프로젝트: pullenti/PullentiPython

 def try_parse(t : 'Token', prev : 'WeaponItemToken', after_conj : bool, attach_high : bool=False) -> 'WeaponItemToken':
     res = WeaponItemToken.__try_parse(t, prev, after_conj, attach_high)
     if (res is None): 
         npt = NounPhraseHelper.try_parse(t, NounPhraseParseAttr.NO, 0, None)
         if (npt is not None and npt.noun.begin_char > npt.begin_char): 
             res = WeaponItemToken.__try_parse(npt.noun.begin_token, prev, after_conj, attach_high)
             if (res is not None): 
                 if (res.typ == WeaponItemToken.Typs.NOUN): 
                     str0_ = npt.get_normal_case_text(None, MorphNumber.SINGULAR, MorphGender.UNDEFINED, False)
                     if (str0_ == "РУЧНОЙ ГРАНАТ"): 
                         str0_ = "РУЧНАЯ ГРАНАТА"
                     if ((Utils.ifNotNull(str0_, "")).endswith(res.value)): 
                         if (res.alt_value is None): 
                             res.alt_value = str0_
                         else: 
                             str0_ = str0_[0:0+len(str0_) - len(res.value)].strip()
                             res.alt_value = "{0} {1}".format(str0_, res.alt_value)
                         res.begin_token = t
                         return res
         return None
     if (res.typ == WeaponItemToken.Typs.NAME): 
         br = BracketHelper.try_parse(res.end_token.next0_, BracketParseAttr.NO, 100)
         if (br is not None and br.is_char('(')): 
             alt = MiscHelper.get_text_value_of_meta_token(br, GetTextAttr.NO)
             if (MiscHelper.can_be_equal_cyr_and_latss(res.value, alt)): 
                 res.alt_value = alt
                 res.end_token = br.end_token
     return res

예제 #3

파일 보기

파일: TitleItemToken.py 프로젝트: pullenti/PullentiPython

 def __try_attach_speciality(t: 'Token',
                             key_word_before: bool) -> 'TitleItemToken':
     if (t is None):
         return None
     susp = False
     if (not key_word_before):
         if (not t.is_newline_before):
             susp = True
     val = None
     t0 = t
     dig_count = 0
     for i in range(3):
         nt = Utils.asObjectOrNull(t, NumberToken)
         if (nt is None):
             break
         if (nt.typ != NumberSpellingType.DIGIT
                 or nt.morph.class0_.is_adjective):
             break
         if (val is None):
             val = io.StringIO()
         if (susp and t.length_char != 2):
             return None
         digs = nt.get_source_text()
         dig_count += len(digs)
         print(digs, end="", file=val)
         if (t.next0_ is None):
             break
         t = t.next0_
         if (t.is_char_of(".,") or t.is_hiphen):
             if (susp and (i < 2)):
                 if (not t.is_char('.') or t.is_whitespace_after
                         or t.is_whitespace_before):
                     return None
             if (t.next0_ is not None):
                 t = t.next0_
     if (val is None or (dig_count < 5)):
         return None
     if (dig_count != 6):
         if (not key_word_before):
             return None
     else:
         Utils.insertStringIO(val, 4, '.')
         Utils.insertStringIO(val, 2, '.')
     tt = t.next0_
     first_pass3395 = True
     while True:
         if first_pass3395: first_pass3395 = False
         else: tt = tt.next0_
         if (not (tt is not None)): break
         if (tt.is_newline_before):
             break
         br = BracketHelper.try_parse(tt, BracketParseAttr.NO, 100)
         if (br is not None):
             tt = br.end_token
             t = tt
             continue
         t = tt
     return TitleItemToken._new2655(t0, t, TitleItemToken.Types.SPECIALITY,
                                    Utils.toStringStringIO(val))

예제 #4

파일 보기

파일: UriAnalyzer.py 프로젝트: pullenti/PullentiPython

 def __check_detail(rt: 'ReferentToken') -> None:
     if (rt.end_token.whitespaces_after_count > 2
             or rt.end_token.next0_ is None):
         return
     if (rt.end_token.next0_.is_char('(')):
         br = BracketHelper.try_parse(rt.end_token.next0_,
                                      BracketParseAttr.NO, 100)
         if (br is not None):
             rt.referent.detail = MiscHelper.get_text_value(
                 br.begin_token.next0_, br.end_token.previous,
                 GetTextAttr.NO)
             rt.end_token = br.end_token

예제 #5

파일 보기

파일: TitlePageReferent.py 프로젝트: pullenti/PullentiPython

 def _add_name(self, begin: 'Token', end: 'Token') -> 'Termin':
     if (BracketHelper.can_be_start_of_sequence(begin, True, False)):
         br = BracketHelper.try_parse(begin, BracketParseAttr.NO, 100)
         if (br is not None and br.end_token == end):
             begin = begin.next0_
             end = end.previous
     val = MiscHelper.get_text_value(
         begin, end,
         Utils.valToEnum(
             (GetTextAttr.KEEPREGISTER) | (GetTextAttr.KEEPQUOTES),
             GetTextAttr))
     if (val is None):
         return None
     if (val.endswith(".") and not val.endswith("..")):
         val = val[0:0 + len(val) - 1].strip()
     self.add_slot(TitlePageReferent.ATTR_NAME, val, False, 0)
     return Termin(val.upper())

예제 #6

파일 보기

 def try_attach_org(t: 'Token',
                    can_be_cyr: bool = False) -> 'ReferentToken':
     from pullenti.ner.org.internal.OrgItemNameToken import OrgItemNameToken
     if (t is None):
         return None
     br = False
     if (t.is_char('(') and t.next0_ is not None):
         t = t.next0_
         br = True
     if (isinstance(t, NumberToken)):
         if (t.typ == NumberSpellingType.WORDS
                 and t.morph.class0_.is_adjective
                 and t.chars.is_capital_upper):
             pass
         else:
             return None
     else:
         if (t.chars.is_all_lower):
             return None
         if ((t.length_char < 3) and not t.chars.is_letter):
             return None
         if (not t.chars.is_latin_letter):
             if (not can_be_cyr or not t.chars.is_cyrillic_letter):
                 return None
     t0 = t
     t1 = t0
     nam_wo = 0
     tok = None
     geo_ = None
     add_typ = None
     first_pass3312 = True
     while True:
         if first_pass3312: first_pass3312 = False
         else: t = t.next0_
         if (not (t is not None)): break
         if (t != t0 and t.whitespaces_before_count > 1):
             break
         if (t.is_char(')')):
             break
         if (t.is_char('(') and t.next0_ is not None):
             if ((isinstance(t.next0_.get_referent(), GeoReferent))
                     and t.next0_.next0_ is not None
                     and t.next0_.next0_.is_char(')')):
                 geo_ = (Utils.asObjectOrNull(t.next0_.get_referent(),
                                              GeoReferent))
                 t = t.next0_.next0_
                 continue
             typ = OrgItemTypeToken.try_attach(t.next0_, True, None)
             if ((typ is not None and typ.end_token.next0_ is not None
                  and typ.end_token.next0_.is_char(')'))
                     and typ.chars.is_latin_letter):
                 add_typ = typ
                 t = typ.end_token.next0_
                 continue
             if (((isinstance(t.next0_, TextToken)) and t.next0_.next0_
                  is not None and t.next0_.next0_.is_char(')'))
                     and t.next0_.chars.is_capital_upper):
                 t = t.next0_.next0_
                 t1 = t
                 continue
             break
         tok = OrgItemEngItem.try_attach(t, can_be_cyr)
         if (tok is None and t.is_char_of(".,") and t.next0_ is not None):
             tok = OrgItemEngItem.try_attach(t.next0_, can_be_cyr)
             if (tok is None and t.next0_.is_char_of(",.")):
                 tok = OrgItemEngItem.try_attach(t.next0_.next0_,
                                                 can_be_cyr)
         if (tok is not None):
             if (tok.length_char == 1 and t0.chars.is_cyrillic_letter):
                 return None
             break
         if (t.is_hiphen and not t.is_whitespace_after
                 and not t.is_whitespace_before):
             continue
         if (t.is_char_of("&+") or t.is_and):
             continue
         if (t.is_char('.')):
             if (t.previous is not None and t.previous.length_char == 1):
                 continue
             elif (MiscHelper.can_be_start_of_sentence(t.next0_)):
                 break
         if (not t.chars.is_latin_letter):
             if (not can_be_cyr or not t.chars.is_cyrillic_letter):
                 break
         if (t.chars.is_all_lower):
             if (t.morph.class0_.is_preposition
                     or t.morph.class0_.is_conjunction):
                 continue
             if (br):
                 continue
             break
         mc = t.get_morph_class_in_dictionary()
         if (mc.is_verb):
             if (t.next0_ is not None
                     and t.next0_.morph.class0_.is_preposition):
                 break
         if (t.next0_ is not None and t.next0_.is_value("OF", None)):
             break
         if (isinstance(t, TextToken)):
             nam_wo += 1
         t1 = t
     if (tok is None):
         return None
     if (t0 == tok.begin_token):
         br2 = BracketHelper.try_parse(tok.end_token.next0_,
                                       BracketParseAttr.NO, 100)
         if (br2 is not None):
             org1 = OrganizationReferent()
             if (tok.short_value is not None):
                 org1.add_type_str(tok.short_value)
             org1.add_type_str(tok.full_value)
             nam1 = MiscHelper.get_text_value(br2.begin_token,
                                              br2.end_token, GetTextAttr.NO)
             if (nam1 is not None):
                 org1.add_name(nam1, True, None)
                 return ReferentToken(org1, t0, br2.end_token)
         return None
     org0_ = OrganizationReferent()
     te = tok.end_token
     if (tok.is_bank):
         t1 = tok.end_token
     if (tok.full_value == "company" and (tok.whitespaces_after_count < 3)):
         tok1 = OrgItemEngItem.try_attach(tok.end_token.next0_, can_be_cyr)
         if (tok1 is not None):
             t1 = tok.end_token
             tok = tok1
             te = tok.end_token
     if (tok.full_value == "company"):
         if (nam_wo == 0):
             return None
     nam = MiscHelper.get_text_value(t0, t1, GetTextAttr.IGNOREARTICLES)
     if (nam == "STOCK" and tok.full_value == "company"):
         return None
     alt_nam = None
     if (Utils.isNullOrEmpty(nam)):
         return None
     if (nam.find('(') > 0):
         i1 = nam.find('(')
         i2 = nam.find(')')
         if (i1 < i2):
             alt_nam = nam
             tai = None
             if ((i2 + 1) < len(nam)):
                 tai = nam[i2:].strip()
             nam = nam[0:0 + i1].strip()
             if (tai is not None):
                 nam = "{0} {1}".format(nam, tai)
     if (tok.is_bank):
         org0_.add_type_str(
             ("bank" if tok.kit.base_language.is_en else "банк"))
         org0_.add_profile(OrgProfile.FINANCE)
         if ((t1.next0_ is not None and t1.next0_.is_value("OF", None)
              and t1.next0_.next0_ is not None)
                 and t1.next0_.next0_.chars.is_latin_letter):
             nam0 = OrgItemNameToken.try_attach(t1.next0_, None, False,
                                                False)
             if (nam0 is not None):
                 te = nam0.end_token
             else:
                 te = t1.next0_.next0_
             nam = MiscHelper.get_text_value(t0, te, GetTextAttr.NO)
             if (isinstance(te.get_referent(), GeoReferent)):
                 org0_._add_geo_object(
                     Utils.asObjectOrNull(te.get_referent(), GeoReferent))
         elif (t0 == t1):
             return None
     else:
         if (tok.short_value is not None):
             org0_.add_type_str(tok.short_value)
         org0_.add_type_str(tok.full_value)
     if (Utils.isNullOrEmpty(nam)):
         return None
     org0_.add_name(nam, True, None)
     if (alt_nam is not None):
         org0_.add_name(alt_nam, True, None)
     res = ReferentToken(org0_, t0, te)
     t = te
     while t.next0_ is not None:
         if (t.next0_.is_char_of(",.")):
             t = t.next0_
         else:
             break
     if (t.whitespaces_after_count < 2):
         tok = OrgItemEngItem.try_attach(t.next0_, can_be_cyr)
         if (tok is not None):
             if (tok.short_value is not None):
                 org0_.add_type_str(tok.short_value)
             org0_.add_type_str(tok.full_value)
             res.end_token = tok.end_token
     if (geo_ is not None):
         org0_._add_geo_object(geo_)
     if (add_typ is not None):
         org0_.add_type(add_typ, False)
     if (not br):
         return res
     t = res.end_token
     if (t.next0_ is None or t.next0_.is_char(')')):
         res.end_token = t.next0_
     else:
         return None
     return res

예제 #7

파일 보기

파일: BookLinkToken.py 프로젝트: pullenti/PullentiPython

 def __try_parse(t: 'Token', lev: int) -> 'BookLinkToken':
     if (t is None or lev > 3):
         return None
     if (t.is_char('[')):
         re = BookLinkToken.__try_parse(t.next0_, lev + 1)
         if (re is not None and re.end_token.next0_ is not None
                 and re.end_token.next0_.is_char(']')):
             re.begin_token = t
             re.end_token = re.end_token.next0_
             return re
         if (re is not None and re.end_token.is_char(']')):
             re.begin_token = t
             return re
         if (re is not None):
             if (re.typ == BookLinkTyp.SOSTAVITEL
                     or re.typ == BookLinkTyp.EDITORS):
                 return re
         br = BracketHelper.try_parse(t, BracketParseAttr.NO, 100)
         if (br is not None):
             if ((isinstance(br.end_token.previous, NumberToken))
                     and (br.length_char < 30)):
                 return BookLinkToken._new329(
                     t, br.end_token, BookLinkTyp.NUMBER,
                     MiscHelper.get_text_value(br.begin_token.next0_,
                                               br.end_token.previous,
                                               GetTextAttr.NO))
     t0 = t
     if (isinstance(t, ReferentToken)):
         if (isinstance(t.get_referent(), PersonReferent)):
             return BookLinkToken.try_parse_author(
                 t, FioTemplateType.UNDEFINED)
         if (isinstance(t.get_referent(), GeoReferent)):
             return BookLinkToken._new326(t, t, BookLinkTyp.GEO,
                                          t.get_referent())
         if (isinstance(t.get_referent(), DateReferent)):
             dr = Utils.asObjectOrNull(t.get_referent(), DateReferent)
             if (len(dr.slots) == 1 and dr.year > 0):
                 return BookLinkToken._new329(t, t, BookLinkTyp.YEAR,
                                              str(dr.year))
             if (dr.year > 0 and t.previous is not None
                     and t.previous.is_comma):
                 return BookLinkToken._new329(t, t, BookLinkTyp.YEAR,
                                              str(dr.year))
         if (isinstance(t.get_referent(), OrganizationReferent)):
             org0_ = Utils.asObjectOrNull(t.get_referent(),
                                          OrganizationReferent)
             if (org0_.kind == OrganizationKind.PRESS):
                 return BookLinkToken._new326(t, t, BookLinkTyp.PRESS,
                                              org0_)
         if (isinstance(t.get_referent(), UriReferent)):
             uri = Utils.asObjectOrNull(t.get_referent(), UriReferent)
             if ((uri.scheme == "http" or uri.scheme == "https"
                  or uri.scheme == "ftp") or uri.scheme is None):
                 return BookLinkToken._new326(t, t, BookLinkTyp.URL, uri)
     tok_ = BookLinkToken.__m_termins.try_parse(t, TerminParseAttr.NO)
     if (tok_ is not None):
         typ_ = Utils.valToEnum(tok_.termin.tag, BookLinkTyp)
         ok = True
         if (typ_ == BookLinkTyp.TYPE or typ_ == BookLinkTyp.NAMETAIL
                 or typ_ == BookLinkTyp.ELECTRONRES):
             if (t.previous is not None and
                 ((t.previous.is_char_of(".:[") or t.previous.is_hiphen))):
                 pass
             else:
                 ok = False
         if (ok):
             return BookLinkToken._new329(t, tok_.end_token, typ_,
                                          tok_.termin.canonic_text)
         if (typ_ == BookLinkTyp.ELECTRONRES):
             tt = tok_.end_token.next0_
             first_pass3019 = True
             while True:
                 if first_pass3019: first_pass3019 = False
                 else: tt = tt.next0_
                 if (not (tt is not None)): break
                 if ((isinstance(tt, TextToken))
                         and not tt.chars.is_letter):
                     continue
                 if (isinstance(tt.get_referent(), UriReferent)):
                     return BookLinkToken._new326(t, tt,
                                                  BookLinkTyp.ELECTRONRES,
                                                  tt.get_referent())
                 break
     if (t.is_char('/')):
         res = BookLinkToken._new329(t, t, BookLinkTyp.DELIMETER, "/")
         if (t.next0_ is not None and t.next0_.is_char('/')):
             res.end_token = t.next0_
             res.value = "//"
         if (not t.is_whitespace_before and not t.is_whitespace_after):
             coo = 3
             no = True
             tt = t.next0_
             while tt is not None and coo > 0:
                 vvv = BookLinkToken.try_parse(tt, lev + 1)
                 if (vvv is not None and vvv.typ != BookLinkTyp.NUMBER):
                     no = False
                     break
                 tt = tt.next0_
                 coo -= 1
             if (no):
                 return None
         return res
     if ((isinstance(t, NumberToken)) and t.int_value is not None
             and t.typ == NumberSpellingType.DIGIT):
         res = BookLinkToken._new329(t, t, BookLinkTyp.NUMBER, str(t.value))
         val = t.int_value
         if (val >= 1930 and (val < 2030)):
             res.typ = BookLinkTyp.YEAR
         if (t.next0_ is not None and t.next0_.is_char('.')):
             res.end_token = t.next0_
         elif ((t.next0_ is not None and t.next0_.length_char == 1
                and not t.next0_.chars.is_letter)
               and t.next0_.is_whitespace_after):
             res.end_token = t.next0_
         elif (isinstance(t.next0_, TextToken)):
             term = t.next0_.term
             if (((term == "СТР" or term == "C" or term == "С")
                  or term == "P" or term == "S") or term == "PAGES"):
                 res.end_token = t.next0_
                 res.typ = BookLinkTyp.PAGES
                 res.value = str(t.value)
         return res
     if (isinstance(t, TextToken)):
         term = t.term
         if (((((
             ((term == "СТР" or term == "C" or term == "С") or term == "ТОМ"
              or term == "T") or term == "Т" or term == "P") or term == "PP"
                or term == "V") or term == "VOL" or term == "S")
              or term == "СТОР" or t.is_value("PAGE", None))
                 or t.is_value("СТРАНИЦА", "СТОРІНКА")):
             tt = t.next0_
             while tt is not None:
                 if (tt.is_char_of(".:~")):
                     tt = tt.next0_
                 else:
                     break
             if (isinstance(tt, NumberToken)):
                 res = BookLinkToken._new328(t, tt, BookLinkTyp.PAGERANGE)
                 tt0 = tt
                 tt1 = tt
                 tt = tt.next0_
                 first_pass3020 = True
                 while True:
                     if first_pass3020: first_pass3020 = False
                     else: tt = tt.next0_
                     if (not (tt is not None)): break
                     if (tt.is_char_of(",") or tt.is_hiphen):
                         if (isinstance(tt.next0_, NumberToken)):
                             tt = tt.next0_
                             res.end_token = tt
                             tt1 = tt
                             continue
                     break
                 res.value = MiscHelper.get_text_value(
                     tt0, tt1, GetTextAttr.NO)
                 return res
         if ((term == "M" or term == "М" or term == "СПБ") or term == "K"
                 or term == "К"):
             if (t.next0_ is not None and t.next0_.is_char_of(":;")):
                 re = BookLinkToken._new328(t, t.next0_, BookLinkTyp.GEO)
                 return re
             if (t.next0_ is not None and t.next0_.is_char_of(".")):
                 res = BookLinkToken._new328(t, t.next0_, BookLinkTyp.GEO)
                 if (t.next0_.next0_ is not None
                         and t.next0_.next0_.is_char_of(":;")):
                     res.end_token = t.next0_.next0_
                 elif (t.next0_.next0_ is not None
                       and (isinstance(t.next0_.next0_, NumberToken))):
                     pass
                 elif (t.next0_.next0_ is not None
                       and t.next0_.next0_.is_comma and
                       (isinstance(t.next0_.next0_.next0_, NumberToken))):
                     pass
                 else:
                     return None
                 return res
         if (term == "ПЕР" or term == "ПЕРЕВ" or term == "ПЕРЕВОД"):
             tt = t
             if (tt.next0_ is not None and tt.next0_.is_char('.')):
                 tt = tt.next0_
             if (tt.next0_ is not None
                     and ((tt.next0_.is_value("C", None)
                           or tt.next0_.is_value("С", None)))):
                 tt = tt.next0_
                 if (tt.next0_ is None or tt.whitespaces_after_count > 2):
                     return None
                 re = BookLinkToken._new328(t, tt.next0_,
                                            BookLinkTyp.TRANSLATE)
                 return re
         if (term == "ТАМ" or term == "ТАМЖЕ"):
             res = BookLinkToken._new328(t, t, BookLinkTyp.TAMZE)
             if (t.next0_ is not None and t.next0_.is_value("ЖЕ", None)):
                 res.end_token = t.next0_
             return res
         if (((term == "СМ" or term == "CM" or term == "НАПР")
              or term == "НАПРИМЕР" or term == "SEE") or term == "ПОДРОБНЕЕ"
                 or term == "ПОДРОБНО"):
             res = BookLinkToken._new328(t, t, BookLinkTyp.SEE)
             t = t.next0_
             first_pass3021 = True
             while True:
                 if first_pass3021: first_pass3021 = False
                 else: t = t.next0_
                 if (not (t is not None)): break
                 if (t.is_char_of(".:") or t.is_value("ALSO", None)):
                     res.end_token = t
                     continue
                 if (t.is_value("В", None) or t.is_value("IN", None)):
                     res.end_token = t
                     continue
                 vvv = BookLinkToken.__try_parse(t, lev + 1)
                 if (vvv is not None and vvv.typ == BookLinkTyp.SEE):
                     res.end_token = vvv.end_token
                     break
                 break
             return res
         if (term == "БОЛЕЕ"):
             vvv = BookLinkToken.__try_parse(t.next0_, lev + 1)
             if (vvv is not None and vvv.typ == BookLinkTyp.SEE):
                 vvv.begin_token = t
                 return vvv
         no = MiscHelper.check_number_prefix(t)
         if (isinstance(no, NumberToken)):
             return BookLinkToken._new328(t, no, BookLinkTyp.N)
         if (((term == "B" or term == "В"))
                 and (isinstance(t.next0_, NumberToken))
                 and (isinstance(t.next0_.next0_, TextToken))):
             term2 = t.next0_.next0_.term
             if (((term2 == "Т" or term2 == "T" or term2.startswith("ТОМ"))
                  or term2 == "TT" or term2 == "ТТ") or term2 == "КН"
                     or term2.startswith("КНИГ")):
                 return BookLinkToken._new328(t, t.next0_.next0_,
                                              BookLinkTyp.VOLUME)
     if (t.is_char('(')):
         if (((isinstance(t.next0_, NumberToken)) and t.next0_.int_value
              is not None and t.next0_.next0_ is not None)
                 and t.next0_.next0_.is_char(')')):
             num = t.next0_.int_value
             if (num > 1900 and num <= 2040):
                 if (num <= datetime.datetime.now().year):
                     return BookLinkToken._new329(t, t.next0_.next0_,
                                                  BookLinkTyp.YEAR,
                                                  str(num))
         if (((isinstance(t.next0_, ReferentToken)) and
              (isinstance(t.next0_.get_referent(), DateReferent))
              and t.next0_.next0_ is not None)
                 and t.next0_.next0_.is_char(')')):
             num = t.next0_.get_referent().year
             if (num > 0):
                 return BookLinkToken._new329(t, t.next0_.next0_,
                                              BookLinkTyp.YEAR, str(num))
     return None

예제 #8

파일 보기

 def try_create_canonic_decree_ref_uri(t: 'Token') -> 'CanonicDecreeRefUri':
     if (not (isinstance(t, ReferentToken))):
         return None
     dr = Utils.asObjectOrNull(t.get_referent(), DecreeReferent)
     res = None
     if (dr is not None):
         if (dr.kind == DecreeKind.PUBLISHER):
             return None
         res = CanonicDecreeRefUri._new833(t.kit.sofa.text, dr,
                                           t.begin_char, t.end_char)
         if ((t.previous is not None and t.previous.is_char('(')
              and t.next0_ is not None) and t.next0_.is_char(')')):
             return res
         if (t.misc_attrs != 0):
             return res
         rt = Utils.asObjectOrNull(t, ReferentToken)
         if (rt.begin_token.is_char('(') and rt.end_token.is_char(')')):
             res = CanonicDecreeRefUri._new833(
                 t.kit.sofa.text, dr, rt.begin_token.next0_.begin_char,
                 rt.end_token.previous.end_char)
             return res
         next_decree_items = None
         if ((t.next0_ is not None and t.next0_.is_comma_and and
              (isinstance(t.next0_.next0_, ReferentToken))) and
             (isinstance(t.next0_.next0_.get_referent(), DecreeReferent))):
             next_decree_items = DecreeToken.try_attach_list(
                 t.next0_.next0_.begin_token, None, 10, False)
             if (next_decree_items is not None
                     and len(next_decree_items) > 1):
                 i = 0
                 while i < (len(next_decree_items) - 1):
                     if (next_decree_items[i].is_newline_after):
                         del next_decree_items[i + 1:i + 1 +
                                               len(next_decree_items) - i -
                                               1]
                         break
                     i += 1
         was_typ = False
         was_num = False
         tt = t.begin_token
         first_pass3090 = True
         while True:
             if first_pass3090: first_pass3090 = False
             else: tt = tt.next0_
             if (not (tt is not None and tt.end_char <= t.end_char)): break
             if (tt.begin_char == t.begin_char and tt.is_char('(')
                     and tt.next0_ is not None):
                 res.begin_char = tt.next0_.begin_char
             if (tt.is_char('(') and tt.next0_ is not None
                     and tt.next0_.is_value("ДАЛЕЕ", None)):
                 if (res.end_char >= tt.begin_char):
                     res.end_char = tt.previous.end_char
                 break
             if (tt.end_char == t.end_char and tt.is_char(')')):
                 res.end_char = tt.previous.end_char
                 tt1 = tt.previous
                 while tt1 is not None and tt1.begin_char >= res.begin_char:
                     if (tt1.is_char('(') and tt1.previous is not None):
                         if (res.begin_char < tt1.previous.begin_char):
                             res.end_char = tt1.previous.end_char
                     tt1 = tt1.previous
             li = DecreeToken.try_attach_list(tt, None, 10, False)
             if (li is not None and len(li) > 0):
                 ii = 0
                 while ii < (len(li) - 1):
                     if (li[ii].typ == DecreeToken.ItemType.TYP and
                             li[ii + 1].typ == DecreeToken.ItemType.TERR):
                         res.type_with_geo = MiscHelper.get_text_value(
                             li[ii].begin_token, li[ii + 1].end_token,
                             GetTextAttr.FIRSTNOUNGROUPTONOMINATIVESINGLE)
                     ii += 1
                 if ((next_decree_items is not None
                      and len(next_decree_items) > 1 and
                      (len(next_decree_items) < len(li)))
                         and next_decree_items[0].typ !=
                         DecreeToken.ItemType.TYP):
                     d = len(li) - len(next_decree_items)
                     j = 0
                     j = 0
                     while j < len(next_decree_items):
                         if (next_decree_items[j].typ != li[d + j].typ):
                             break
                         j += 1
                     if (j >= len(next_decree_items)):
                         del li[0:0 + d]
                         res.begin_char = li[0].begin_char
                 elif (
                     (next_decree_items is not None
                      and len(next_decree_items) == 1 and
                      next_decree_items[0].typ == DecreeToken.ItemType.NAME)
                         and len(li) == 2
                         and li[1].typ == DecreeToken.ItemType.NAME):
                     res.begin_char = li[1].begin_char
                     res.end_char = li[1].end_char
                     break
                 elif ((next_decree_items is not None
                        and len(next_decree_items) == 1
                        and next_decree_items[0].typ
                        == DecreeToken.ItemType.NUMBER) and
                       li[len(li) - 1].typ == DecreeToken.ItemType.NUMBER):
                     res.begin_char = li[len(li) - 1].begin_char
                     res.end_char = li[len(li) - 1].end_char
                 i = 0
                 first_pass3091 = True
                 while True:
                     if first_pass3091: first_pass3091 = False
                     else: i += 1
                     if (not (i < len(li))): break
                     l_ = li[i]
                     if (l_.begin_char > t.end_char):
                         del li[i:i + len(li) - i]
                         break
                     if (l_.typ == DecreeToken.ItemType.NAME):
                         if (not was_num):
                             if (dr.kind == DecreeKind.CONTRACT):
                                 continue
                             if (((i + 1) < len(li)) and (
                                 (li[i + 1].typ == DecreeToken.ItemType.DATE
                                  or li[i + 1].typ
                                  == DecreeToken.ItemType.NUMBER))):
                                 continue
                         ee = l_.begin_token.previous.end_char
                         if (ee > res.begin_char and (ee < res.end_char)):
                             res.end_char = ee
                         break
                     if (l_.typ == DecreeToken.ItemType.NUMBER):
                         was_num = True
                     if (i == 0):
                         if (l_.typ == DecreeToken.ItemType.TYP):
                             was_typ = True
                         elif (l_.typ == DecreeToken.ItemType.OWNER
                               or l_.typ == DecreeToken.ItemType.ORG):
                             if (((i + 1) < len(li)) and
                                 ((li[1].typ == DecreeToken.ItemType.DATE
                                   or li[1].typ
                                   == DecreeToken.ItemType.NUMBER))):
                                 was_typ = True
                         if (was_typ):
                             tt0 = l_.begin_token.previous
                             if (tt0 is not None and tt0.is_char('.')):
                                 tt0 = tt0.previous
                             if (tt0 is not None and
                                 ((tt0.is_value("УТВЕРЖДЕННЫЙ", None)
                                   or tt0.is_value("УТВЕРДИТЬ", None)
                                   or tt0.is_value("УТВ", None)))):
                                 if (l_.begin_char > res.begin_char):
                                     res.begin_char = l_.begin_char
                                     if (res.end_char < res.begin_char):
                                         res.end_char = t.end_char
                                     res.is_adopted = True
                 if (len(li) > 0):
                     tt = li[len(li) - 1].end_token
                     if (tt.is_char(')')):
                         tt = tt.previous
                     continue
             if (was_typ):
                 na = DecreeToken.try_attach_name(tt, dr.typ0, True, False)
                 if (na is not None and tt.begin_char > t.begin_char):
                     tt1 = na.end_token.next0_
                     if (tt1 is not None and tt1.is_char_of(",()")):
                         tt1 = tt1.next0_
                     if (tt1 is not None and (tt1.end_char < t.end_char)):
                         if (tt1.is_value("УТВЕРЖДЕННЫЙ", None)
                                 or tt1.is_value("УТВЕРДИТЬ", None)
                                 or tt1.is_value("УТВ", None)):
                             tt = tt1
                             continue
                     if (tt.previous is not None
                             and tt.previous.is_char(':')
                             and na.end_char <= res.end_char):
                         res.begin_char = tt.begin_char
                         break
                     if (tt.previous.end_char > res.begin_char):
                         res.end_char = tt.previous.end_char
                         break
         return res
     dpr = Utils.asObjectOrNull(t.get_referent(), DecreePartReferent)
     if (dpr is None):
         return None
     if ((t.previous is not None and t.previous.is_hiphen and
          (isinstance(t.previous.previous, ReferentToken))) and (isinstance(
              t.previous.previous.get_referent(), DecreePartReferent))):
         if (DecreePartReferent.create_range_referent(
                 Utils.asObjectOrNull(t.previous.previous.get_referent(),
                                      DecreePartReferent), dpr)
                 is not None):
             return None
     t1 = t
     has_diap = False
     diap_ref = None
     if ((t.next0_ is not None and t.next0_.is_hiphen and
          (isinstance(t.next0_.next0_, ReferentToken))) and
         (isinstance(t.next0_.next0_.get_referent(), DecreePartReferent))):
         diap = DecreePartReferent.create_range_referent(
             Utils.asObjectOrNull(dpr, DecreePartReferent),
             Utils.asObjectOrNull(t.next0_.next0_.get_referent(),
                                  DecreePartReferent))
         if (diap is not None):
             dpr = diap
             has_diap = True
             t1 = t.next0_.next0_
             diap_ref = (Utils.asObjectOrNull(t1, ReferentToken))
     res = CanonicDecreeRefUri._new835(t.kit.sofa.text, dpr, t.begin_char,
                                       t1.end_char, has_diap)
     if ((t.previous is not None and t.previous.is_char('(')
          and t1.next0_ is not None) and t1.next0_.is_char(')')):
         return res
     tt = t.begin_token
     while tt is not None and tt.end_char <= t.end_char:
         if (isinstance(tt.get_referent(), DecreeReferent)):
             if (tt.begin_char > t.begin_char):
                 res.end_char = tt.previous.end_char
                 if (tt.previous.morph.class0_.is_preposition
                         and tt.previous.previous is not None):
                     res.end_char = tt.previous.previous.end_char
             elif (tt.end_char < t.end_char):
                 res.begin_char = tt.begin_char
             break
         tt = tt.next0_
     has_same_before = DecreeHelper.__has_same_decree(t, dpr, True)
     has_same_after = DecreeHelper.__has_same_decree(t, dpr, False)
     ptmin = PartToken.ItemType.PREFIX
     ptmin2 = PartToken.ItemType.PREFIX
     max0_ = 0
     max2 = 0
     for s in dpr.slots:
         pt = PartToken._get_type_by_attr_name(s.type_name)
         if (pt == PartToken.ItemType.PREFIX):
             continue
         co = PartToken._get_rank(pt)
         if (co < 1):
             if (pt == PartToken.ItemType.PART
                     and dpr.find_slot(DecreePartReferent.ATTR_CLAUSE, None,
                                       True) is not None):
                 co = PartToken._get_rank(PartToken.ItemType.PARAGRAPH)
             else:
                 continue
         if (co > max0_):
             max2 = max0_
             ptmin2 = ptmin
             max0_ = co
             ptmin = pt
         elif (co > max2):
             max2 = co
             ptmin2 = pt
     if (ptmin != PartToken.ItemType.PREFIX):
         tt = t.begin_token
         while tt is not None and tt.end_char <= res.end_char:
             if (tt.begin_char >= res.begin_char):
                 pt = PartToken.try_attach(tt, None, False, False)
                 if (pt is not None and pt.typ == ptmin):
                     res.begin_char = pt.begin_char
                     res.end_char = pt.end_char
                     if (pt.typ == PartToken.ItemType.APPENDIX
                             and pt.end_token.is_value("К", None)
                             and pt.begin_token != pt.end_token):
                         res.end_char = pt.end_token.previous.end_char
                     if (pt.end_char == t.end_char):
                         if ((t.next0_ is not None and t.next0_.is_comma_and
                              and
                              (isinstance(t.next0_.next0_, ReferentToken)))
                                 and
                             (isinstance(t.next0_.next0_.get_referent(),
                                         DecreePartReferent))):
                             tt1 = t.next0_.next0_.begin_token
                             ok = True
                             if (tt1.chars.is_letter):
                                 ok = False
                             if (ok):
                                 for v in pt.values:
                                     res.begin_char = v.begin_char
                                     res.end_char = v.end_char
                                     break
                     if (not has_diap):
                         return res
                     break
             tt = tt.next0_
         if (has_diap and diap_ref is not None):
             tt = diap_ref.begin_token
             while tt is not None and tt.end_char <= diap_ref.end_char:
                 if (tt.is_char(',')):
                     break
                 if (tt != diap_ref.begin_token
                         and tt.is_whitespace_before):
                     break
                 res.end_char = tt.end_char
                 tt = tt.next0_
             return res
     if (((has_same_before or has_same_after))
             and ptmin != PartToken.ItemType.PREFIX):
         tt = t.begin_token
         first_pass3092 = True
         while True:
             if first_pass3092: first_pass3092 = False
             else: tt = tt.next0_
             if (not (tt is not None and tt.end_char <= res.end_char)):
                 break
             if (tt.begin_char >= res.begin_char):
                 pt = (PartToken.try_attach(tt, None, False, False)
                       if not has_same_before else None)
                 if (pt is not None):
                     if (pt.typ == ptmin):
                         for v in pt.values:
                             res.begin_char = v.begin_char
                             res.end_char = v.end_char
                             return res
                     tt = pt.end_token
                     continue
                 if ((isinstance(tt, NumberToken))
                         and tt.begin_char == res.begin_char):
                     res.end_char = tt.end_char
                     while tt is not None and tt.next0_ is not None:
                         if (not tt.next0_.is_char('.')
                                 or tt.is_whitespace_after
                                 or tt.next0_.is_whitespace_after):
                             break
                         if (not (isinstance(tt.next0_.next0_,
                                             NumberToken))):
                             break
                         tt = tt.next0_.next0_
                         res.end_char = tt.end_char
                     if (tt.next0_ is not None and tt.next0_.is_hiphen):
                         if (isinstance(tt.next0_.next0_, NumberToken)):
                             tt = tt.next0_.next0_
                             res.end_char = tt.end_char
                             while tt is not None and tt.next0_ is not None:
                                 if (not tt.next0_.is_char('.')
                                         or tt.is_whitespace_after
                                         or tt.next0_.is_whitespace_after):
                                     break
                                 if (not (isinstance(
                                         tt.next0_.next0_, NumberToken))):
                                     break
                                 tt = tt.next0_.next0_
                                 res.end_char = tt.end_char
                         elif (tt.next0_.next0_ is not None and
                               (isinstance(tt.next0_.next0_.get_referent(),
                                           DecreePartReferent))
                               and has_diap):
                             res.end_char = tt.next0_.next0_.begin_token.end_char
                     return res
                 if (BracketHelper.can_be_start_of_sequence(
                         tt, True, False)
                         and tt.begin_char == res.begin_char
                         and has_same_before):
                     br = BracketHelper.try_parse(tt, BracketParseAttr.NO,
                                                  100)
                     if (br is not None
                             and br.end_token.previous == tt.next0_):
                         res.end_char = br.end_char
                         return res
         return res
     if (not has_same_before and not has_same_after
             and ptmin != PartToken.ItemType.PREFIX):
         tt = t.begin_token
         while tt is not None and tt.end_char <= res.end_char:
             if (tt.begin_char >= res.begin_char):
                 pts = PartToken.try_attach_list(tt, False, 40)
                 if (pts is None or len(pts) == 0):
                     break
                 i = 0
                 while i < len(pts):
                     if (pts[i].typ == ptmin):
                         res.begin_char = pts[i].begin_char
                         res.end_char = pts[i].end_char
                         tt = pts[i].end_token
                         if (tt.next0_ is not None and tt.next0_.is_hiphen):
                             if (isinstance(tt.next0_.next0_, NumberToken)):
                                 res.end_char = tt.next0_.next0_.end_char
                             elif (tt.next0_.next0_ is not None
                                   and (isinstance(
                                       tt.next0_.next0_.get_referent(),
                                       DecreePartReferent)) and has_diap):
                                 res.end_char = tt.next0_.next0_.begin_token.end_char
                         return res
                     i += 1
             tt = tt.next0_
     return res

예제 #9

파일 보기

파일: WeaponItemToken.py 프로젝트: pullenti/PullentiPython

 def __try_parse(t : 'Token', prev : 'WeaponItemToken', after_conj : bool, attach_high : bool=False) -> 'WeaponItemToken':
     if (t is None): 
         return None
     if (BracketHelper.is_bracket(t, True)): 
         wit = WeaponItemToken.__try_parse(t.next0_, prev, after_conj, attach_high)
         if (wit is not None): 
             if (wit.end_token.next0_ is None): 
                 wit.begin_token = t
                 return wit
             if (BracketHelper.is_bracket(wit.end_token.next0_, True)): 
                 wit.begin_token = t
                 wit.end_token = wit.end_token.next0_
                 return wit
     tok = WeaponItemToken.M_ONTOLOGY.try_parse(t, TerminParseAttr.NO)
     if (tok is not None): 
         res = WeaponItemToken(t, tok.end_token)
         res.typ = (Utils.valToEnum(tok.termin.tag, WeaponItemToken.Typs))
         if (res.typ == WeaponItemToken.Typs.NOUN): 
             res.value = tok.termin.canonic_text
             if (tok.termin.tag2 is not None): 
                 res.is_doubt = True
             tt = res.end_token.next0_
             first_pass3426 = True
             while True:
                 if first_pass3426: first_pass3426 = False
                 else: tt = tt.next0_
                 if (not (tt is not None)): break
                 if (tt.whitespaces_before_count > 2): 
                     break
                 wit = WeaponItemToken.__try_parse(tt, None, False, False)
                 if (wit is not None): 
                     if (wit.typ == WeaponItemToken.Typs.BRAND): 
                         res.__inner_tokens.append(wit)
                         tt = wit.end_token
                         res.end_token = tt
                         continue
                     break
                 if (not (isinstance(tt, TextToken))): 
                     break
                 mc = tt.get_morph_class_in_dictionary()
                 if (mc == MorphClass.ADJECTIVE): 
                     if (res.alt_value is None): 
                         res.alt_value = res.value
                     if (res.alt_value.endswith(res.value)): 
                         res.alt_value = res.alt_value[0:0+len(res.alt_value) - len(res.value)]
                     res.alt_value = "{0}{1} {2}".format(res.alt_value, tt.term, res.value)
                     res.end_token = tt
                     continue
                 break
             return res
         if (res.typ == WeaponItemToken.Typs.BRAND or res.typ == WeaponItemToken.Typs.NAME): 
             res.value = tok.termin.canonic_text
             return res
         if (res.typ == WeaponItemToken.Typs.MODEL): 
             res.value = tok.termin.canonic_text
             if (isinstance(tok.termin.tag2, list)): 
                 li = Utils.asObjectOrNull(tok.termin.tag2, list)
                 for to in li: 
                     wit = WeaponItemToken._new2758(t, tok.end_token, Utils.valToEnum(to.tag, WeaponItemToken.Typs), to.canonic_text, tok.begin_token == tok.end_token)
                     res.__inner_tokens.append(wit)
                     if (to.additional_vars is not None and len(to.additional_vars) > 0): 
                         wit.alt_value = to.additional_vars[0].canonic_text
             res.__correct_model()
             return res
     nnn = MiscHelper.check_number_prefix(t)
     if (nnn is not None): 
         tit = TransItemToken._attach_number(nnn, True)
         if (tit is not None): 
             res = WeaponItemToken._new2759(t, tit.end_token, WeaponItemToken.Typs.NUMBER)
             res.value = tit.value
             res.alt_value = tit.alt_value
             return res
     if (((isinstance(t, TextToken)) and t.chars.is_letter and t.chars.is_all_upper) and (t.length_char < 4)): 
         if ((t.next0_ is not None and ((t.next0_.is_hiphen or t.next0_.is_char('.'))) and (t.next0_.whitespaces_after_count < 2)) and (isinstance(t.next0_.next0_, NumberToken))): 
             res = WeaponItemToken._new2760(t, t.next0_, WeaponItemToken.Typs.MODEL, True)
             res.value = t.term
             res.__correct_model()
             return res
         if ((isinstance(t.next0_, NumberToken)) and not t.is_whitespace_after): 
             res = WeaponItemToken._new2760(t, t, WeaponItemToken.Typs.MODEL, True)
             res.value = t.term
             res.__correct_model()
             return res
         if (t.term == "СП" and (t.whitespaces_after_count < 3) and (isinstance(t.next0_, TextToken))): 
             pp = WeaponItemToken.__try_parse(t.next0_, None, False, False)
             if (pp is not None and ((pp.typ == WeaponItemToken.Typs.MODEL or pp.typ == WeaponItemToken.Typs.BRAND))): 
                 res = WeaponItemToken._new2759(t, t, WeaponItemToken.Typs.NOUN)
                 res.value = "ПИСТОЛЕТ"
                 res.alt_value = "СЛУЖЕБНЫЙ ПИСТОЛЕТ"
                 return res
     if (((isinstance(t, TextToken)) and t.chars.is_letter and not t.chars.is_all_lower) and t.length_char > 2): 
         ok = False
         if (prev is not None and ((prev.typ == WeaponItemToken.Typs.NOUN or prev.typ == WeaponItemToken.Typs.MODEL or prev.typ == WeaponItemToken.Typs.BRAND))): 
             ok = True
         elif (prev is None and t.previous is not None and t.previous.is_comma_and): 
             ok = True
         if (ok): 
             res = WeaponItemToken._new2760(t, t, WeaponItemToken.Typs.NAME, True)
             res.value = t.term
             if ((t.next0_ is not None and t.next0_.is_hiphen and (isinstance(t.next0_.next0_, TextToken))) and t.next0_.next0_.chars == t.chars): 
                 res.value = "{0}-{1}".format(res.value, t.next0_.next0_.term)
                 res.end_token = t.next0_.next0_
             if (prev is not None and prev.typ == WeaponItemToken.Typs.NOUN): 
                 res.typ = WeaponItemToken.Typs.BRAND
             if (res.end_token.next0_ is not None and res.end_token.next0_.is_hiphen and (isinstance(res.end_token.next0_.next0_, NumberToken))): 
                 res.typ = WeaponItemToken.Typs.MODEL
                 res.__correct_model()
             elif (not res.end_token.is_whitespace_after and (isinstance(res.end_token.next0_, NumberToken))): 
                 res.typ = WeaponItemToken.Typs.MODEL
                 res.__correct_model()
             return res
     if (t.is_value("МАРКА", None)): 
         res = WeaponItemToken.__try_parse(t.next0_, prev, after_conj, False)
         if (res is not None and res.typ == WeaponItemToken.Typs.BRAND): 
             res.begin_token = t
             return res
         if (BracketHelper.can_be_start_of_sequence(t.next0_, True, False)): 
             br = BracketHelper.try_parse(t.next0_, BracketParseAttr.NO, 100)
             if (br is not None): 
                 return WeaponItemToken._new2764(t, br.end_token, WeaponItemToken.Typs.BRAND, MiscHelper.get_text_value(br.begin_token, br.end_token, GetTextAttr.NO))
         if (((isinstance(t, TextToken)) and (isinstance(t.next0_, TextToken)) and t.next0_.length_char > 1) and not t.next0_.chars.is_all_lower): 
             return WeaponItemToken._new2764(t, t.next0_, WeaponItemToken.Typs.BRAND, t.term)
     if (t.is_value("КАЛИБР", "КАЛІБР")): 
         tt1 = t.next0_
         if (tt1 is not None and ((tt1.is_hiphen or tt1.is_char(':')))): 
             tt1 = tt1.next0_
         num = NumbersWithUnitToken.try_parse(tt1, None, False, False, False, False)
         if (num is not None and num.single_val is not None): 
             return WeaponItemToken._new2764(t, num.end_token, WeaponItemToken.Typs.CALIBER, NumberHelper.double_to_string(num.single_val))
     if (isinstance(t, NumberToken)): 
         num = NumbersWithUnitToken.try_parse(t, None, False, False, False, False)
         if (num is not None and num.single_val is not None): 
             if (len(num.units) == 1 and num.units[0].unit is not None and num.units[0].unit.name_cyr == "мм"): 
                 return WeaponItemToken._new2764(t, num.end_token, WeaponItemToken.Typs.CALIBER, NumberHelper.double_to_string(num.single_val))
             if (num.end_token.next0_ is not None and num.end_token.next0_.is_value("КАЛИБР", "КАЛІБР")): 
                 return WeaponItemToken._new2764(t, num.end_token.next0_, WeaponItemToken.Typs.CALIBER, NumberHelper.double_to_string(num.single_val))
     if (t.is_value("ПРОИЗВОДСТВО", "ВИРОБНИЦТВО")): 
         tt1 = t.next0_
         if (tt1 is not None and ((tt1.is_hiphen or tt1.is_char(':')))): 
             tt1 = tt1.next0_
         if (isinstance(tt1, ReferentToken)): 
             if ((isinstance(tt1.get_referent(), OrganizationReferent)) or (isinstance(tt1.get_referent(), GeoReferent))): 
                 return WeaponItemToken._new2769(t, tt1, WeaponItemToken.Typs.DEVELOPER, tt1.get_referent())
     return None

예제 #10

파일 보기

파일: _NounPraseHelperInt.py 프로젝트: pullenti/PullentiPython

 def __try_parse_ru(first: 'Token',
                    typ: 'NounPhraseParseAttr',
                    max_char_pos: int,
                    def_noun: 'NounPhraseItem' = None) -> 'NounPhraseToken':
     if (first is None):
         return None
     items = None
     adverbs = None
     prep = None
     kak = False
     t0 = first
     if ((((typ) & (NounPhraseParseAttr.PARSEPREPOSITION))) !=
         (NounPhraseParseAttr.NO) and t0.is_value("КАК", None)):
         t0 = t0.next0_
         prep = PrepositionHelper.try_parse(t0)
         if (prep is not None):
             t0 = prep.end_token.next0_
         kak = True
     internal_noun_prase = None
     conj_before = False
     t = t0
     first_pass3041 = True
     while True:
         if first_pass3041: first_pass3041 = False
         else: t = t.next0_
         if (not (t is not None)): break
         if (max_char_pos > 0 and t.begin_char > max_char_pos):
             break
         if ((t.morph.class0_.is_conjunction
              and not t.morph.class0_.is_adjective
              and not t.morph.class0_.is_pronoun)
                 and not t.morph.class0_.is_noun):
             if (conj_before):
                 break
             if ((((typ) & (NounPhraseParseAttr.CANNOTHASCOMMAAND))) !=
                 (NounPhraseParseAttr.NO)):
                 break
             if (items is not None and ((t.is_and or t.is_or))):
                 conj_before = True
                 if ((t.next0_ is not None and t.next0_.is_char_of("\\/")
                      and t.next0_.next0_ is not None)
                         and t.next0_.next0_.is_or):
                     t = t.next0_.next0_
                 if (((t.next0_ is not None and t.next0_.is_char('(')
                       and t.next0_.next0_ is not None)
                      and t.next0_.next0_.is_or
                      and t.next0_.next0_.next0_ is not None)
                         and t.next0_.next0_.next0_.is_char(')')):
                     t = t.next0_.next0_.next0_
                 continue
             break
         elif (t.is_comma):
             if (conj_before or items is None):
                 break
             if ((((typ) & (NounPhraseParseAttr.CANNOTHASCOMMAAND))) !=
                 (NounPhraseParseAttr.NO)):
                 break
             mc = t.previous.get_morph_class_in_dictionary()
             if (mc.is_proper_surname or mc.is_proper_secname):
                 break
             conj_before = True
             if (kak and t.next0_ is not None
                     and t.next0_.is_value("ТАК", None)):
                 t = t.next0_
                 if (t.next0_ is not None and t.next0_.is_and):
                     t = t.next0_
                 pr = PrepositionHelper.try_parse(t.next0_)
                 if (pr is not None):
                     t = pr.end_token
             if (items[len(items) - 1].can_be_noun
                     and items[len(items) -
                               1].end_token.morph.class0_.is_pronoun):
                 break
             continue
         elif (t.is_char('(')):
             if (items is None):
                 return None
             brr = BracketHelper.try_parse(t, BracketParseAttr.NO, 100)
             if (brr is None):
                 break
             if (brr.length_char > 100):
                 break
             t = brr.end_token
             continue
         if (isinstance(t, ReferentToken)):
             if ((((typ) & (NounPhraseParseAttr.REFERENTCANBENOUN))) == (
                     NounPhraseParseAttr.NO)):
                 break
         elif (t.chars.is_latin_letter):
             break
         it = NounPhraseItem.try_parse(t, items, typ)
         if (it is None or ((not it.can_be_adj and not it.can_be_noun))):
             if (((it is not None and items is not None
                   and t.chars.is_capital_upper) and
                  (t.whitespaces_before_count < 3) and t.length_char > 3)
                     and not t.get_morph_class_in_dictionary().is_noun and
                     not t.get_morph_class_in_dictionary().is_adjective):
                 it.can_be_noun = True
                 items.append(it)
                 break
             if ((((typ) & (NounPhraseParseAttr.PARSEADVERBS))) !=
                 (NounPhraseParseAttr.NO) and (isinstance(t, TextToken))
                     and t.morph.class0_.is_adverb):
                 if (adverbs is None):
                     adverbs = list()
                 adverbs.append(Utils.asObjectOrNull(t, TextToken))
                 continue
             break
         it.conj_before = conj_before
         conj_before = False
         if (not it.can_be_adj and not it.can_be_noun):
             break
         if (t.is_newline_before and t != first):
             if ((((typ) & (NounPhraseParseAttr.MULTILINES))) !=
                 (NounPhraseParseAttr.NO)):
                 pass
             elif (items is not None
                   and t.chars != items[len(items) - 1].chars):
                 if (t.chars.is_all_lower
                         and items[len(items) - 1].chars.is_capital_upper):
                     pass
                 else:
                     break
         if (items is None):
             items = list()
         else:
             it0 = items[len(items) - 1]
             if (it0.can_be_noun and it0.is_personal_pronoun):
                 if (it.is_pronoun):
                     break
                 if ((it0.begin_token.previous is not None
                      and it0.begin_token.previous.
                      get_morph_class_in_dictionary().is_verb
                      and not it0.begin_token.previous.
                      get_morph_class_in_dictionary().is_adjective)
                         and not it0.begin_token.previous.
                         get_morph_class_in_dictionary().is_preposition):
                     if (t.morph.case_.is_nominative
                             or t.morph.case_.is_accusative):
                         pass
                     else:
                         break
                 if (it.can_be_noun and it.is_verb):
                     if (it0.previous is None):
                         pass
                     elif ((isinstance(it0.previous, TextToken))
                           and not it0.previous.chars.is_letter):
                         pass
                     else:
                         break
         items.append(it)
         t = it.end_token
         if (t.is_newline_after and not t.chars.is_all_lower):
             mc = t.get_morph_class_in_dictionary()
             if (mc.is_proper_surname):
                 break
             if (t.morph.class0_.is_proper_surname and mc.is_undefined):
                 break
     if (items is None):
         return None
     tt1 = None
     if (len(items) == 1 and items[0].can_be_adj):
         and0_ = False
         tt1 = items[0].end_token.next0_
         first_pass3042 = True
         while True:
             if first_pass3042: first_pass3042 = False
             else: tt1 = tt1.next0_
             if (not (tt1 is not None)): break
             if (tt1.is_and or tt1.is_or):
                 and0_ = True
                 break
             if (tt1.is_comma or tt1.is_value("НО", None)
                     or tt1.is_value("ТАК", None)):
                 continue
             break
         if (and0_):
             if (items[0].can_be_noun and items[0].is_personal_pronoun):
                 and0_ = False
         if (and0_):
             tt2 = tt1.next0_
             if (tt2 is not None and tt2.morph.class0_.is_preposition):
                 tt2 = tt2.next0_
             npt1 = _NounPraseHelperInt.__try_parse_ru(
                 tt2, typ, max_char_pos, None)
             if (npt1 is not None and len(npt1.adjectives) > 0):
                 ok1 = False
                 for av in items[0].adj_morph:
                     for v in npt1.noun.noun_morph:
                         if (v.check_accord(av, False, False)):
                             items[0].morph.add_item(av)
                             ok1 = True
                 if (ok1):
                     npt1.begin_token = items[0].begin_token
                     npt1.end_token = tt1.previous
                     npt1.adjectives.clear()
                     npt1.adjectives.append(items[0])
                     return npt1
     if (def_noun is not None):
         items.append(def_noun)
     last1 = items[len(items) - 1]
     check = True
     for it in items:
         if (not it.can_be_adj):
             check = False
             break
         elif (it.can_be_noun and it.is_personal_pronoun):
             check = False
             break
     tt1 = last1.end_token.next0_
     if ((tt1 is not None and check and
          ((tt1.morph.class0_.is_preposition
            or tt1.morph.case_.is_instrumental)))
             and (tt1.whitespaces_before_count < 2)):
         inp = NounPhraseHelper.try_parse(
             tt1,
             Utils.valToEnum((typ) | (NounPhraseParseAttr.PARSEPREPOSITION),
                             NounPhraseParseAttr), max_char_pos, None)
         if (inp is not None):
             tt1 = inp.end_token.next0_
             npt1 = _NounPraseHelperInt.__try_parse_ru(
                 tt1, typ, max_char_pos, None)
             if (npt1 is not None):
                 ok = True
                 ii = 0
                 first_pass3043 = True
                 while True:
                     if first_pass3043: first_pass3043 = False
                     else: ii += 1
                     if (not (ii < len(items))): break
                     it = items[ii]
                     if (NounPhraseItem.try_accord_adj_and_noun(
                             it,
                             Utils.asObjectOrNull(npt1.noun,
                                                  NounPhraseItem))):
                         continue
                     if (ii > 0):
                         inp2 = NounPhraseHelper.try_parse(
                             it.begin_token, typ, max_char_pos, None)
                         if (inp2 is not None
                                 and inp2.end_token == inp.end_token):
                             del items[ii:ii + len(items) - ii]
                             inp = inp2
                             break
                     ok = False
                     break
                 if (ok):
                     if (npt1.morph.case_.is_genitive
                             and not inp.morph.case_.is_instrumental):
                         ok = False
                 if (ok):
                     i = 0
                     while i < len(items):
                         npt1.adjectives.insert(i, items[i])
                         i += 1
                     npt1.internal_noun = inp
                     mmm = MorphCollection(npt1.morph)
                     for it in items:
                         mmm.remove_items(it.adj_morph[0], False)
                     if (mmm.gender != MorphGender.UNDEFINED
                             or mmm.number != MorphNumber.UNDEFINED
                             or not mmm.case_.is_undefined):
                         npt1.morph = mmm
                     if (adverbs is not None):
                         if (npt1.adverbs is None):
                             npt1.adverbs = adverbs
                         else:
                             npt1.adverbs[0:0] = adverbs
                     npt1.begin_token = first
                     return npt1
             if (tt1 is not None and tt1.morph.class0_.is_noun
                     and not tt1.morph.case_.is_genitive):
                 it = NounPhraseItem.try_parse(tt1, items, typ)
                 if (it is not None and it.can_be_noun):
                     internal_noun_prase = inp
                     inp.begin_token = items[0].end_token.next0_
                     items.append(it)
     i = 0
     first_pass3044 = True
     while True:
         if first_pass3044: first_pass3044 = False
         else: i += 1
         if (not (i < len(items))): break
         if (items[i].can_be_adj
                 and items[i].begin_token.morph.class0_.is_verb):
             it = items[i].begin_token
             if (not it.get_morph_class_in_dictionary().is_verb):
                 continue
             if (it.is_value("УПОЛНОМОЧЕННЫЙ", None)):
                 continue
             if ((((typ) & (NounPhraseParseAttr.PARSEVERBS))) == (
                     NounPhraseParseAttr.NO)):
                 continue
             inp = _NounPraseHelperInt.__try_parse_ru(
                 items[i].end_token.next0_, NounPhraseParseAttr.NO,
                 max_char_pos, None)
             if (inp is None):
                 continue
             if (inp.anafor is not None and i == (len(items) - 1)
                     and NounPhraseItem.try_accord_adj_and_noun(
                         items[i],
                         Utils.asObjectOrNull(inp.noun, NounPhraseItem))):
                 inp.begin_token = first
                 ii = 0
                 while ii < len(items):
                     inp.adjectives.insert(ii, items[ii])
                     ii += 1
                 return inp
             if (inp.end_token.whitespaces_after_count > 3):
                 continue
             npt1 = _NounPraseHelperInt.__try_parse_ru(
                 inp.end_token.next0_, NounPhraseParseAttr.NO, max_char_pos,
                 None)
             if (npt1 is None):
                 continue
             ok = True
             j = 0
             while j <= i:
                 if (not NounPhraseItem.try_accord_adj_and_noun(
                         items[j],
                         Utils.asObjectOrNull(npt1.noun, NounPhraseItem))):
                     ok = False
                     break
                 j += 1
             if (not ok):
                 continue
             verb = VerbPhraseHelper.try_parse(it, True, False, False)
             if (verb is None):
                 continue
             vlinks = SemanticHelper.try_create_links(verb, inp, None)
             nlinks = SemanticHelper.try_create_links(inp, npt1, None)
             if (len(vlinks) == 0 and len(nlinks) > 0):
                 continue
             j = 0
             while j <= i:
                 npt1.adjectives.insert(j, items[j])
                 j += 1
             items[i].end_token = inp.end_token
             mmm = MorphCollection(npt1.morph)
             bil = list()
             j = 0
             while j <= i:
                 bil.clear()
                 for m in items[j].adj_morph:
                     bil.append(m)
                 mmm.remove_items_list_cla(bil, None)
                 j += 1
             if (mmm.gender != MorphGender.UNDEFINED
                     or mmm.number != MorphNumber.UNDEFINED
                     or not mmm.case_.is_undefined):
                 npt1.morph = mmm
             if (adverbs is not None):
                 if (npt1.adverbs is None):
                     npt1.adverbs = adverbs
                 else:
                     npt1.adverbs[0:0] = adverbs
             npt1.begin_token = first
             return npt1
     ok2 = False
     if ((len(items) == 1 and
          (((typ) & (NounPhraseParseAttr.ADJECTIVECANBELAST))) !=
          (NounPhraseParseAttr.NO) and
          (items[0].whitespaces_after_count < 3))
             and not items[0].is_adverb):
         if (not items[0].can_be_adj):
             ok2 = True
         elif (items[0].is_personal_pronoun and items[0].can_be_noun):
             ok2 = True
     if (ok2):
         it = NounPhraseItem.try_parse(items[0].end_token.next0_, None, typ)
         if (it is not None and it.can_be_adj
                 and it.begin_token.chars.is_all_lower):
             ok2 = True
             if (it.is_adverb or it.is_verb):
                 ok2 = False
             if (it.is_pronoun and items[0].is_pronoun):
                 ok2 = False
                 if (it.can_be_adj_for_personal_pronoun
                         and items[0].is_personal_pronoun):
                     ok2 = True
             if (ok2 and NounPhraseItem.try_accord_adj_and_noun(
                     it, items[0])):
                 npt1 = _NounPraseHelperInt.__try_parse_ru(
                     it.begin_token, typ, max_char_pos, None)
                 if (npt1 is not None and ((npt1.end_char > it.end_char
                                            or len(npt1.adjectives) > 0))):
                     pass
                 else:
                     items.insert(0, it)
     noun = None
     adj_after = None
     for i in range(len(items) - 1, -1, -1):
         if (items[i].can_be_noun):
             if (items[i].conj_before):
                 continue
             if (i > 0 and not items[i - 1].can_be_adj):
                 continue
             if (i > 0 and items[i - 1].can_be_noun):
                 if (items[i - 1].is_doubt_adjective):
                     continue
                 if (items[i - 1].is_pronoun and items[i].is_pronoun):
                     if (items[i].is_pronoun and
                             items[i - 1].can_be_adj_for_personal_pronoun):
                         pass
                     else:
                         continue
             noun = items[i]
             del items[i:i + len(items) - i]
             if (adj_after is not None):
                 items.append(adj_after)
             elif (len(items) > 0 and items[0].can_be_noun
                   and not items[0].can_be_adj):
                 noun = items[0]
                 items.clear()
             break
     if (noun is None):
         return None
     res = NounPhraseToken._new466(first, noun.end_token, prep)
     if (adverbs is not None):
         for a in adverbs:
             if (a.begin_char < noun.begin_char):
                 if (len(items) == 0 and prep is None):
                     return None
                 if (res.adverbs is None):
                     res.adverbs = list()
                 res.adverbs.append(a)
     res.noun = (noun)
     res.multi_nouns = noun.multi_nouns
     if (kak):
         res.multi_nouns = True
     res.internal_noun = internal_noun_prase
     for v in noun.noun_morph:
         noun.morph.add_item(v)
     res.morph = noun.morph
     if (res.morph.case_.is_nominative and first.previous is not None
             and first.previous.morph.class0_.is_preposition):
         res.morph.case_ = (res.morph.case_) ^ MorphCase.NOMINATIVE
     if ((((typ) &
           (NounPhraseParseAttr.PARSEPRONOUNS))) == (NounPhraseParseAttr.NO)
             and ((res.morph.class0_.is_pronoun
                   or res.morph.class0_.is_personal_pronoun))):
         return None
     stat = None
     if (len(items) > 1):
         stat = dict()
     need_update_morph = False
     if (len(items) > 0):
         ok_list = list()
         is_num_not = False
         for vv in noun.noun_morph:
             i = 0
             v = vv
             i = 0
             while i < len(items):
                 ok = False
                 for av in items[i].adj_morph:
                     if (v.check_accord(av, False, False)):
                         ok = True
                         if (not ((av.case_) & v.case_).is_undefined
                                 and av.case_ != v.case_):
                             v.case_ = av.case_ = (av.case_) & v.case_
                         break
                 if (not ok):
                     if (items[i].can_be_numeric_adj
                             and items[i].try_accord_var(v, False)):
                         ok = True
                         v1 = NounPhraseItemTextVar()
                         v1.copy_from_item(v)
                         v1.number = MorphNumber.PLURAL
                         is_num_not = True
                         v1.case_ = MorphCase()
                         for a in items[i].adj_morph:
                             v1.case_ = (v1.case_) | a.case_
                         v = v1
                     else:
                         break
                 i += 1
             if (i >= len(items)):
                 ok_list.append(v)
         if (len(ok_list) > 0 and
             (((len(ok_list) < res.morph.items_count) or is_num_not))):
             res.morph = MorphCollection()
             for v in ok_list:
                 res.morph.add_item(v)
             if (not is_num_not):
                 noun.morph = res.morph
     i = 0
     first_pass3045 = True
     while True:
         if first_pass3045: first_pass3045 = False
         else: i += 1
         if (not (i < len(items))): break
         for av in items[i].adj_morph:
             for v in noun.noun_morph:
                 if (v.check_accord(av, False, False)):
                     if (not ((av.case_) & v.case_).is_undefined
                             and av.case_ != v.case_):
                         v.case_ = av.case_ = (av.case_) & v.case_
                         need_update_morph = True
                     items[i].morph.add_item(av)
                     if (stat is not None and av.normal_value is not None
                             and len(av.normal_value) > 1):
                         last = av.normal_value[len(av.normal_value) - 1]
                         if (not last in stat):
                             stat[last] = 1
                         else:
                             stat[last] += 1
         if (items[i].is_pronoun or items[i].is_personal_pronoun):
             res.anafor = items[i].begin_token
             if ((((typ) & (NounPhraseParseAttr.PARSEPRONOUNS))) == (
                     NounPhraseParseAttr.NO)):
                 continue
         tt = Utils.asObjectOrNull(items[i].begin_token, TextToken)
         if (tt is not None and not tt.term.startswith("ВЫСШ")):
             err = False
             for wf in tt.morph.items:
                 if (wf.class0_.is_adjective):
                     if (wf.contains_attr("прев.", None)):
                         if ((((typ) &
                               (NounPhraseParseAttr.IGNOREADJBEST))) !=
                             (NounPhraseParseAttr.NO)):
                             err = True
                     if (wf.contains_attr("к.ф.", None)
                             and tt.morph.class0_.is_personal_pronoun):
                         return None
             if (err):
                 continue
         if (res.morph.case_.is_nominative):
             v = MiscHelper.get_text_value_of_meta_token(
                 items[i], GetTextAttr.KEEPQUOTES)
             if (not Utils.isNullOrEmpty(v)):
                 if (items[i].get_normal_case_text(
                         None, MorphNumber.UNDEFINED, MorphGender.UNDEFINED,
                         False) != v):
                     wf = NounPhraseItemTextVar(items[i].morph, None)
                     wf.normal_value = v
                     wf.class0_ = MorphClass.ADJECTIVE
                     wf.case_ = res.morph.case_
                     if (res.morph.case_.is_prepositional
                             or res.morph.gender == MorphGender.NEUTER
                             or res.morph.gender == MorphGender.FEMINIE):
                         items[i].morph.add_item(wf)
                     else:
                         items[i].morph.insert_item(0, wf)
         res.adjectives.append(items[i])
         if (items[i].end_char > res.end_char):
             res.end_token = items[i].end_token
     i = 0
     first_pass3046 = True
     while True:
         if first_pass3046: first_pass3046 = False
         else: i += 1
         if (not (i < (len(res.adjectives) - 1))): break
         if (res.adjectives[i].whitespaces_after_count > 5):
             if (res.adjectives[i].chars != res.adjectives[i + 1].chars):
                 if (not res.adjectives[i + 1].chars.is_all_lower):
                     return None
                 if (res.adjectives[i].chars.is_all_upper
                         and res.adjectives[i + 1].chars.is_capital_upper):
                     return None
                 if (res.adjectives[i].chars.is_capital_upper
                         and res.adjectives[i + 1].chars.is_all_upper):
                     return None
             if (res.adjectives[i].whitespaces_after_count > 10):
                 if (res.adjectives[i].newlines_after_count == 1):
                     if (res.adjectives[i].chars.is_capital_upper and i == 0
                             and res.adjectives[i + 1].chars.is_all_lower):
                         continue
                     if (res.adjectives[i].chars == res.adjectives[
                             i + 1].chars):
                         continue
                 return None
     if (need_update_morph):
         noun.morph = MorphCollection()
         for v in noun.noun_morph:
             noun.morph.add_item(v)
         res.morph = noun.morph
     if (len(res.adjectives) > 0):
         if (noun.begin_token.previous is not None):
             if (noun.begin_token.previous.is_comma_and):
                 if (res.adjectives[0].begin_char > noun.begin_char):
                     pass
                 else:
                     return None
         zap = 0
         and0_ = 0
         cou = 0
         last_and = False
         i = 0
         while i < (len(res.adjectives) - 1):
             te = res.adjectives[i].end_token.next0_
             if (te is None):
                 return None
             if (te.is_char('(')):
                 pass
             elif (te.is_comma):
                 zap += 1
                 last_and = False
             elif (te.is_and or te.is_or):
                 and0_ += 1
                 last_and = True
             if (not res.adjectives[i].begin_token.morph.class0_.is_pronoun
                 ):
                 cou += 1
             i += 1
         if ((zap + and0_) > 0):
             if (and0_ > 1):
                 return None
             elif (and0_ == 1 and not last_and):
                 return None
             if ((zap + and0_) != cou):
                 if (and0_ == 1):
                     pass
                 else:
                     return None
             last = Utils.asObjectOrNull(
                 res.adjectives[len(res.adjectives) - 1], NounPhraseItem)
             if (last.is_pronoun and not last_and):
                 return None
     if (stat is not None):
         for adj in items:
             if (adj.morph.items_count > 1):
                 w1 = Utils.asObjectOrNull(adj.morph.get_indexer_item(0),
                                           NounPhraseItemTextVar)
                 w2 = Utils.asObjectOrNull(adj.morph.get_indexer_item(1),
                                           NounPhraseItemTextVar)
                 if ((len(w1.normal_value) < 2)
                         or (len(w2.normal_value) < 2)):
                     break
                 l1 = w1.normal_value[len(w1.normal_value) - 1]
                 l2 = w2.normal_value[len(w2.normal_value) - 1]
                 i1 = 0
                 i2 = 0
                 wrapi1468 = RefOutArgWrapper(0)
                 Utils.tryGetValue(stat, l1, wrapi1468)
                 i1 = wrapi1468.value
                 wrapi2467 = RefOutArgWrapper(0)
                 Utils.tryGetValue(stat, l2, wrapi2467)
                 i2 = wrapi2467.value
                 if (i1 < i2):
                     adj.morph.remove_item(1)
                     adj.morph.insert_item(0, w2)
     if (res.begin_token.get_morph_class_in_dictionary().is_verb
             and len(items) > 0):
         if (not res.begin_token.chars.is_all_lower
                 or res.begin_token.previous is None):
             pass
         elif (res.begin_token.previous.morph.class0_.is_preposition):
             pass
         else:
             comma = False
             tt = res.begin_token.previous
             first_pass3047 = True
             while True:
                 if first_pass3047: first_pass3047 = False
                 else: tt = tt.previous
                 if (not (tt is not None and tt.end_char <= res.end_char)):
                     break
                 if (tt.morph.class0_.is_adverb):
                     continue
                 if (tt.is_char_of(".;")):
                     break
                 if (tt.is_comma):
                     comma = True
                     continue
                 if (tt.is_value("НЕ", None)):
                     continue
                 if (((tt.morph.class0_.is_noun
                       or tt.morph.class0_.is_proper)) and comma):
                     for it in res.begin_token.morph.items:
                         if (it.class0_.is_verb
                                 and (isinstance(it, MorphWordForm))):
                             if (tt.morph.check_accord(it, False, False)):
                                 if (res.morph.case_.is_instrumental):
                                     return None
                 break
     if (res.begin_token == res.end_token):
         mc = res.begin_token.get_morph_class_in_dictionary()
         if (mc.is_adverb):
             if (res.begin_token.previous is not None and
                     res.begin_token.previous.morph.class0_.is_preposition):
                 pass
             elif (mc.is_noun and not mc.is_preposition
                   and not mc.is_conjunction):
                 pass
             elif (res.begin_token.is_value("ВЕСЬ", None)):
                 pass
             else:
                 return None
     if (def_noun is not None and def_noun.end_token == res.end_token
             and len(res.adjectives) > 0):
         res.end_token = res.adjectives[len(res.adjectives) - 1].end_token
     return res

예제 #11

파일 보기

 def __try_parse(t: 'Token',
                 is_in_lit: bool,
                 max_char: int = 0) -> typing.List['ReferentToken']:
     if (t is None):
         return None
     is_bracket_regime = False
     if (t.previous is not None and t.previous.is_char('(')):
         is_bracket_regime = True
     blt = BookLinkToken.try_parse(t, 0)
     if (blt is None):
         blt = BookLinkToken.try_parse_author(t, FioTemplateType.UNDEFINED)
     if (blt is None and not is_bracket_regime):
         return None
     t0 = t
     coef = 0
     is_electr_res = False
     decree = None
     regtyp = BookLinkAnalyzer.RegionTyp.UNDEFINED
     num = None
     spec_see = None
     book_prev = None
     if (is_bracket_regime):
         regtyp = BookLinkAnalyzer.RegionTyp.AUTHORS
     elif (blt.typ == BookLinkTyp.PERSON):
         if (not is_in_lit):
             return None
         regtyp = BookLinkAnalyzer.RegionTyp.AUTHORS
     elif (blt.typ == BookLinkTyp.NUMBER):
         num = blt.value
         t = blt.end_token.next0_
         if (t is None or t.is_newline_before):
             return None
         if (not t.is_whitespace_before):
             if (isinstance(t, NumberToken)):
                 n = t.value
                 if ((((n == "3" or n == "0")) and not t.is_whitespace_after
                      and (isinstance(t.next0_, TextToken)))
                         and t.next0_.chars.is_all_lower):
                     pass
                 else:
                     return None
             elif (not (isinstance(t, TextToken)) or t.chars.is_all_lower):
                 r = t.get_referent()
                 if (isinstance(r, PersonReferent)):
                     pass
                 elif (is_in_lit and r is not None
                       and r.type_name == "DECREE"):
                     pass
                 else:
                     return None
         first_pass3025 = True
         while True:
             if first_pass3025: first_pass3025 = False
             else: t = t.next0_
             if (not (t is not None)): break
             if (isinstance(t, NumberToken)):
                 break
             if (not (isinstance(t, TextToken))):
                 break
             if (BracketHelper.can_be_start_of_sequence(t, True, False)):
                 break
             if (not t.chars.is_letter):
                 continue
             bbb = BookLinkToken.try_parse(t, 0)
             if (bbb is not None):
                 if (bbb.typ == BookLinkTyp.TAMZE):
                     spec_see = bbb
                     t = bbb.end_token.next0_
                     break
                 if (bbb.typ == BookLinkTyp.SEE):
                     t = bbb.end_token
                     continue
             break
         if (spec_see is not None and spec_see.typ == BookLinkTyp.TAMZE):
             coef += 1
             max0_ = 1000
             tt = t0
             while tt is not None and max0_ > 0:
                 if (isinstance(tt.get_referent(), BookLinkRefReferent)):
                     book_prev = tt.get_referent().book
                     break
                 tt = tt.previous
                 max0_ -= 1
         blt1 = BookLinkToken.try_parse_author(t, FioTemplateType.UNDEFINED)
         if (blt1 is not None and blt1.typ == BookLinkTyp.PERSON):
             regtyp = BookLinkAnalyzer.RegionTyp.AUTHORS
         else:
             ok = False
             tt = t
             first_pass3026 = True
             while True:
                 if first_pass3026: first_pass3026 = False
                 else: tt = (None if tt is None else tt.next0_)
                 if (not (tt is not None)): break
                 if (tt.is_newline_before):
                     break
                 if (is_in_lit and tt.get_referent() is not None
                         and tt.get_referent().type_name == "DECREE"):
                     ok = True
                     decree = tt
                     break
                 bbb = BookLinkToken.try_parse(tt, 0)
                 if (bbb is None):
                     continue
                 if (bbb.typ == BookLinkTyp.ELECTRONRES):
                     is_electr_res = True
                     ok = True
                     break
                 if (bbb.typ == BookLinkTyp.DELIMETER):
                     tt = bbb.end_token.next0_
                     if (BookLinkToken.try_parse_author(
                             tt, FioTemplateType.UNDEFINED) is not None):
                         ok = True
                         break
                     bbb = BookLinkToken.try_parse(tt, 0)
                     if (bbb is not None):
                         if (bbb.typ == BookLinkTyp.EDITORS
                                 or bbb.typ == BookLinkTyp.TRANSLATE
                                 or bbb.typ == BookLinkTyp.SOSTAVITEL):
                             ok = True
                             break
             if (not ok and not is_in_lit):
                 if (BookLinkToken.check_link_before(t0, num)):
                     pass
                 else:
                     return None
             regtyp = BookLinkAnalyzer.RegionTyp.NAME
     else:
         return None
     res = BookLinkReferent()
     corr_authors = list()
     t00 = t
     blt00 = None
     start_of_name = None
     prev_pers_templ = FioTemplateType.UNDEFINED
     if (regtyp == BookLinkAnalyzer.RegionTyp.AUTHORS):
         first_pass3027 = True
         while True:
             if first_pass3027: first_pass3027 = False
             else: t = t.next0_
             if (not (t is not None)): break
             if (max_char > 0 and t.begin_char >= max_char):
                 break
             if (t.is_char_of(".;") or t.is_comma_and):
                 continue
             if (t.is_char('/')):
                 break
             if ((t.is_char('(') and t.next0_ is not None
                  and t.next0_.is_value("EDS", None))
                     and t.next0_.next0_ is not None
                     and t.next0_.next0_.is_char(')')):
                 t = t.next0_.next0_.next0_
                 break
             blt = BookLinkToken.try_parse_author(t, prev_pers_templ)
             if (blt is None and t.previous is not None
                     and t.previous.is_and):
                 blt = BookLinkToken.try_parse_author(
                     t.previous, FioTemplateType.UNDEFINED)
             if (blt is None):
                 if ((isinstance(t.get_referent(), OrganizationReferent))
                         and blt00 is not None):
                     bbb2 = BookLinkToken.try_parse(t.next0_, 0)
                     if (bbb2 is not None):
                         if (bbb2.typ == BookLinkTyp.YEAR):
                             res.add_slot(BookLinkReferent.ATTR_AUTHOR,
                                          t.get_referent(), False, 0)
                             res.year = int(bbb2.value)
                             coef += 0.5
                             t = bbb2.end_token.next0_
                 break
             if (blt.typ == BookLinkTyp.PERSON):
                 tt2 = blt.end_token.next0_
                 bbb2 = BookLinkToken.try_parse(tt2, 0)
                 if (bbb2 is not None):
                     if (bbb2.typ == BookLinkTyp.YEAR):
                         res.year = int(bbb2.value)
                         coef += 0.5
                         blt.end_token = bbb2.end_token
                         blt00 = (None)
                 if (blt00 is not None
                         and ((blt00.end_token.next0_ == blt.begin_token
                               or blt.begin_token.previous.is_char('.')))):
                     tt11 = blt.end_token.next0_
                     nex = BookLinkToken.try_parse(tt11, 0)
                     if (nex is not None
                             and nex.typ == BookLinkTyp.ANDOTHERS):
                         pass
                     else:
                         if (tt11 is None):
                             break
                         if (tt11.is_char('/') and tt11.next0_ is not None
                                 and tt11.next0_.is_char('/')):
                             break
                         if (tt11.is_char(':')):
                             break
                         if ((str(blt).find('.') < 0)
                                 and str(blt00).find('.') > 0):
                             break
                         if ((isinstance(tt11, TextToken))
                                 and tt11.chars.is_all_lower):
                             break
                         if (tt11.is_char_of(",.;")
                                 and tt11.next0_ is not None):
                             tt11 = tt11.next0_
                         nex = BookLinkToken.try_parse(tt11, 0)
                         if (nex is not None
                                 and nex.typ != BookLinkTyp.PERSON
                                 and nex.typ != BookLinkTyp.ANDOTHERS):
                             break
                 elif (
                     (blt00 is not None
                      and blt00.person_template != FioTemplateType.UNDEFINED
                      and blt.person_template != blt00.person_template)
                         and blt.person_template
                         == FioTemplateType.NAMESURNAME):
                     if (blt.end_token.next0_ is None
                             or not blt.end_token.next0_.is_comma_and):
                         break
                     if (BookLinkToken.try_parse_author(
                             blt.end_token.next0_.next0_,
                             FioTemplateType.UNDEFINED) is not None):
                         pass
                     else:
                         break
                 if (blt00 is None and blt.person_template
                         == FioTemplateType.NAMESURNAME):
                     tt = blt.end_token.next0_
                     if (tt is not None and tt.is_hiphen):
                         tt = tt.next0_
                     if (isinstance(tt, NumberToken)):
                         break
                 BookLinkAnalyzer.__add_author(res, blt)
                 coef += 1
                 t = blt.end_token
                 if (isinstance(t.get_referent(), PersonReferent)):
                     corr_authors.append(
                         Utils.asObjectOrNull(t, ReferentToken))
                 blt00 = blt
                 prev_pers_templ = blt.person_template
                 start_of_name = blt.start_of_name
                 if ((start_of_name) is not None):
                     t = t.next0_
                     break
                 continue
             if (blt.typ == BookLinkTyp.ANDOTHERS):
                 coef += 0.5
                 t = blt.end_token.next0_
                 res.authors_and_other = True
                 break
             break
     if (t is None):
         return None
     if ((t.is_newline_before and t != t0 and num is None)
             and res.find_slot(BookLinkReferent.ATTR_AUTHOR, None,
                               True) is None):
         return None
     if (start_of_name is None):
         if (t.chars.is_all_lower):
             coef -= (1)
         if (t.chars.is_latin_letter and not is_electr_res and num is None):
             if (res.get_slot_value(BookLinkReferent.ATTR_AUTHOR) is None):
                 return None
     tn0 = t
     tn1 = None
     uri = None
     next_num = None
     nn = 0
     wrapnn376 = RefOutArgWrapper(0)
     inoutres377 = Utils.tryParseInt(Utils.ifNotNull(num, ""), wrapnn376)
     nn = wrapnn376.value
     if (inoutres377):
         next_num = str((nn + 1))
     br = (BracketHelper.try_parse(
         t,
         Utils.valToEnum(
             (BracketParseAttr.CANCONTAINSVERBS) |
             (BracketParseAttr.CANBEMANYLINES), BracketParseAttr), 100) if
           BracketHelper.can_be_start_of_sequence(t, True, False) else None)
     if (br is not None):
         t = t.next0_
     pages = None
     first_pass3028 = True
     while True:
         if first_pass3028: first_pass3028 = False
         else: t = t.next0_
         if (not (t is not None)): break
         if (max_char > 0 and t.begin_char >= max_char):
             break
         if (br is not None and br.end_token == t):
             tn1 = t
             break
         tit = TitleItemToken.try_attach(t)
         if (tit is not None):
             if ((tit.typ == TitleItemToken.Types.TYP and tn0 == t
                  and br is None)
                     and BracketHelper.can_be_start_of_sequence(
                         tit.end_token.next0_, True, False)):
                 br = BracketHelper.try_parse(tit.end_token.next0_,
                                              BracketParseAttr.NO, 100)
                 if (br is not None):
                     coef += (1)
                     if (num is not None):
                         coef += 1
                     tn0 = br.begin_token
                     tn1 = br.end_token
                     res.typ = tit.value.lower()
                     t = br.end_token.next0_
                     break
         if (t.is_newline_before and t != tn0):
             if (br is not None and (t.end_char < br.end_char)):
                 pass
             elif (not MiscHelper.can_be_start_of_sentence(t)):
                 pass
             else:
                 if (t.newlines_before_count > 1):
                     break
                 if ((isinstance(t, NumberToken)) and num is not None
                         and t.int_value is not None):
                     if (num == str((t.int_value - 1))):
                         break
                 elif (num is not None):
                     pass
                 else:
                     nnn = NounPhraseHelper.try_parse(
                         t.previous,
                         Utils.valToEnum(
                             ((NounPhraseParseAttr.PARSEPREPOSITION) |
                              (NounPhraseParseAttr.PARSEADVERBS) |
                              (NounPhraseParseAttr.PARSENUMERICASADJECTIVE))
                             | (NounPhraseParseAttr.MULTILINES),
                             NounPhraseParseAttr), 0, None)
                     if (nnn is not None and nnn.end_char >= t.end_char):
                         pass
                     else:
                         break
         if (t.is_char_of(".;") and t.whitespaces_after_count > 0):
             tit = TitleItemToken.try_attach(t.next0_)
             if ((tit) is not None):
                 if (tit.typ == TitleItemToken.Types.TYP):
                     break
             stop = True
             words = 0
             notwords = 0
             tt = t.next0_
             first_pass3029 = True
             while True:
                 if first_pass3029: first_pass3029 = False
                 else: tt = tt.next0_
                 if (not (tt is not None)): break
                 blt0 = BookLinkToken.try_parse(tt, 0)
                 if (blt0 is None):
                     if (tt.is_newline_before):
                         break
                     if ((isinstance(tt, TextToken)) and not tt.
                             get_morph_class_in_dictionary().is_undefined):
                         words += 1
                     else:
                         notwords += 1
                     if (words > 6 and words > (notwords * 4)):
                         stop = False
                         break
                     continue
                 if ((blt0.typ == BookLinkTyp.DELIMETER
                      or blt0.typ == BookLinkTyp.TRANSLATE
                      or blt0.typ == BookLinkTyp.TYPE)
                         or blt0.typ == BookLinkTyp.GEO
                         or blt0.typ == BookLinkTyp.PRESS):
                     stop = False
                 break
             if (br is not None
                     and br.end_token.previous.end_char > t.end_char):
                 stop = False
             if (stop):
                 break
         if (t == decree):
             t = t.next0_
             break
         blt = BookLinkToken.try_parse(t, 0)
         if (blt is None):
             tn1 = t
             continue
         if (blt.typ == BookLinkTyp.DELIMETER):
             break
         if (((blt.typ == BookLinkTyp.MISC or blt.typ
               == BookLinkTyp.TRANSLATE or blt.typ == BookLinkTyp.NAMETAIL)
              or blt.typ == BookLinkTyp.TYPE
              or blt.typ == BookLinkTyp.VOLUME)
                 or blt.typ == BookLinkTyp.PAGERANGE
                 or blt.typ == BookLinkTyp.PAGES):
             coef += 1
             break
         if (blt.typ == BookLinkTyp.GEO or blt.typ == BookLinkTyp.PRESS):
             if (t.previous.is_hiphen or t.previous.is_char_of(".;")
                     or blt.add_coef > 0):
                 break
         if (blt.typ == BookLinkTyp.YEAR):
             if (t.previous is not None and t.previous.is_comma):
                 break
         if (blt.typ == BookLinkTyp.ELECTRONRES):
             is_electr_res = True
             break
         if (blt.typ == BookLinkTyp.URL):
             if (t == tn0 or t.previous.is_char_of(":.")):
                 is_electr_res = True
                 break
         tn1 = t
     if (tn1 is None and start_of_name is None):
         if (is_electr_res):
             uri_re = BookLinkReferent()
             rt0 = ReferentToken(uri_re, t00, t)
             rts0 = list()
             bref0 = BookLinkRefReferent._new372(uri_re)
             if (num is not None):
                 bref0.number = num
             rt01 = ReferentToken(bref0, t0, rt0.end_token)
             ok = False
             while t is not None:
                 if (t.is_newline_before):
                     break
                 blt0 = BookLinkToken.try_parse(t, 0)
                 if (blt0 is not None):
                     if (isinstance(blt0.ref, UriReferent)):
                         uri_re.add_slot(
                             BookLinkReferent.ATTR_URL,
                             Utils.asObjectOrNull(blt0.ref, UriReferent),
                             False, 0)
                         ok = True
                     t = blt0.end_token
                 rt0.end_token = rt01.end_token = t
                 t = t.next0_
             if (ok):
                 rts0.append(rt01)
                 rts0.append(rt0)
                 return rts0
         if (decree is not None and num is not None):
             rts0 = list()
             bref0 = BookLinkRefReferent._new372(decree.get_referent())
             if (num is not None):
                 bref0.number = num
             rt01 = ReferentToken(bref0, t0, decree)
             t = decree.next0_
             while t is not None:
                 if (t.is_newline_before):
                     break
                 if (isinstance(t, TextToken)):
                     if (t.is_pure_verb):
                         return None
                 rt01.end_token = t
                 t = t.next0_
             rts0.append(rt01)
             return rts0
         if (book_prev is not None):
             tt = t
             while tt is not None and ((tt.is_char_of(",.")
                                        or tt.is_hiphen)):
                 tt = tt.next0_
             blt0 = BookLinkToken.try_parse(tt, 0)
             if (blt0 is not None and blt0.typ == BookLinkTyp.PAGERANGE):
                 rts0 = list()
                 bref0 = BookLinkRefReferent._new372(book_prev)
                 if (num is not None):
                     bref0.number = num
                 bref0.pages = blt0.value
                 rt00 = ReferentToken(bref0, t0, blt0.end_token)
                 rts0.append(rt00)
                 return rts0
         return None
     if (br is not None
             and ((tn1 == br.end_token or tn1 == br.end_token.previous))):
         tn0 = tn0.next0_
         tn1 = tn1.previous
     if (start_of_name is None):
         while tn0 is not None:
             if (tn0.is_char_of(":,~")):
                 tn0 = tn0.next0_
             else:
                 break
     while tn1 is not None and tn1.begin_char > tn0.begin_char:
         if (tn1.is_char_of(".;,:(~") or tn1.is_hiphen
                 or tn1.is_value("РЕД", None)):
             pass
         else:
             break
         tn1 = tn1.previous
     nam = MiscHelper.get_text_value(
         tn0, tn1,
         Utils.valToEnum(
             (GetTextAttr.KEEPQUOTES) | (GetTextAttr.KEEPREGISTER),
             GetTextAttr))
     if (start_of_name is not None):
         if (nam is None or (len(nam) < 3)):
             nam = start_of_name
         else:
             nam = "{0}{1}{2}".format(
                 start_of_name, (" " if tn0.is_whitespace_before else ""),
                 nam)
     if (nam is None):
         return None
     res.name = nam
     if (num is None and not is_in_lit):
         if (len(nam) < 20):
             return None
         coef -= (2)
     if (len(nam) > 500):
         coef -= (math.floor(len(nam) / 500))
     if (is_bracket_regime):
         coef -= 1
     if (len(nam) > 200):
         if (num is None):
             return None
         if (res.find_slot(BookLinkReferent.ATTR_AUTHOR, None, True) is None
                 and not BookLinkToken.check_link_before(t0, num)):
             return None
     en = 0
     ru = 0
     ua = 0
     cha = 0
     nocha = 0
     chalen = 0
     lt0 = tn0
     lt1 = tn1
     if (tn1 is None):
         if (t is None):
             return None
         lt0 = t0
         lt1 = t
         tn1 = t.previous
     tt = lt0
     while tt is not None and tt.end_char <= lt1.end_char:
         if ((isinstance(tt, TextToken)) and tt.chars.is_letter):
             if (tt.chars.is_latin_letter):
                 en += 1
             elif (tt.morph.language.is_ua):
                 ua += 1
             elif (tt.morph.language.is_ru):
                 ru += 1
             if (tt.length_char > 2):
                 cha += 1
                 chalen += tt.length_char
         elif (not (isinstance(tt, ReferentToken))):
             nocha += 1
         tt = tt.next0_
     if (ru > (ua + en)):
         res.lang = "RU"
     elif (ua > (ru + en)):
         res.lang = "UA"
     elif (en > (ru + ua)):
         res.lang = "EN"
     if (nocha > 3 and nocha > cha and start_of_name is None):
         if (nocha > (math.floor(chalen / 3))):
             coef -= (2)
     if (res.lang == "EN"):
         tt = tn0.next0_
         first_pass3030 = True
         while True:
             if first_pass3030: first_pass3030 = False
             else: tt = tt.next0_
             if (not (tt is not None and (tt.end_char < tn1.end_char))):
                 break
             if (tt.is_comma and tt.next0_ is not None
                     and ((not tt.next0_.chars.is_all_lower or
                           (isinstance(tt.next0_, ReferentToken))))):
                 if (tt.next0_.next0_ is not None
                         and tt.next0_.next0_.is_comma_and):
                     if (isinstance(tt.next0_, ReferentToken)):
                         pass
                     else:
                         continue
                 nam = MiscHelper.get_text_value(
                     tn0, tt.previous,
                     Utils.valToEnum((GetTextAttr.KEEPQUOTES) |
                                     (GetTextAttr.KEEPREGISTER),
                                     GetTextAttr))
                 if (nam is not None and len(nam) > 15):
                     res.name = nam
                     break
     rt = ReferentToken(res, t00, tn1)
     authors = True
     edits = False
     br = (None)
     first_pass3031 = True
     while True:
         if first_pass3031: first_pass3031 = False
         else: t = t.next0_
         if (not (t is not None)): break
         if (max_char > 0 and t.begin_char >= max_char):
             break
         if (BracketHelper.can_be_start_of_sequence(t, False, False)):
             br = BracketHelper.try_parse(t,
                                          BracketParseAttr.CANBEMANYLINES,
                                          100)
             if (br is not None and br.length_char > 300):
                 br = (None)
         blt = BookLinkToken.try_parse(t, 0)
         if (t.is_newline_before and not t.is_char('/')
                 and not t.previous.is_char('/')):
             if (blt is not None and blt.typ == BookLinkTyp.NUMBER):
                 break
             if (t.previous.is_char_of(":")):
                 pass
             elif (blt is not None and ((
                 ((blt.typ == BookLinkTyp.DELIMETER or blt.typ
                   == BookLinkTyp.PAGERANGE or blt.typ == BookLinkTyp.PAGES)
                  or blt.typ == BookLinkTyp.GEO or blt.typ
                  == BookLinkTyp.PRESS) or blt.typ == BookLinkTyp.N))):
                 pass
             elif (num is not None and BookLinkToken.try_parse_author(
                     t, FioTemplateType.UNDEFINED) is not None):
                 pass
             elif (num is not None and blt is not None
                   and blt.typ != BookLinkTyp.NUMBER):
                 pass
             elif (br is not None and (t.end_char < br.end_char)
                   and t.begin_char > br.begin_char):
                 pass
             else:
                 ok = False
                 mmm = 50
                 tt = t.next0_
                 while tt is not None and mmm > 0:
                     if (tt.is_newline_before):
                         blt2 = BookLinkToken.try_parse(tt, 0)
                         if (blt2 is not None
                                 and blt2.typ == BookLinkTyp.NUMBER
                                 and blt2.value == next_num):
                             ok = True
                             break
                         if (blt2 is not None):
                             if (blt2.typ == BookLinkTyp.PAGES
                                     or blt2.typ == BookLinkTyp.GEO
                                     or blt2.typ == BookLinkTyp.PRESS):
                                 ok = True
                                 break
                     tt = tt.next0_
                     mmm -= 1
                 if (not ok):
                     npt = NounPhraseHelper.try_parse(
                         t.previous,
                         Utils.valToEnum(
                             ((NounPhraseParseAttr.MULTILINES) |
                              (NounPhraseParseAttr.PARSEADVERBS) |
                              (NounPhraseParseAttr.PARSEPREPOSITION)) |
                             (NounPhraseParseAttr.PARSEVERBS) |
                             (NounPhraseParseAttr.PARSEPRONOUNS),
                             NounPhraseParseAttr), 0, None)
                     if (npt is not None and npt.end_char >= t.end_char):
                         ok = True
                 if (not ok):
                     break
         rt.end_token = t
         if (blt is not None):
             rt.end_token = blt.end_token
         if (t.is_char_of(".,") or t.is_hiphen):
             continue
         if (t.is_value("С", None)):
             pass
         if (regtyp == BookLinkAnalyzer.RegionTyp.FIRST and blt is not None
                 and blt.typ == BookLinkTyp.EDITORS):
             edits = True
             t = blt.end_token
             coef += 1
             continue
         if (regtyp == BookLinkAnalyzer.RegionTyp.FIRST and blt is not None
                 and blt.typ == BookLinkTyp.SOSTAVITEL):
             edits = False
             t = blt.end_token
             coef += 1
             continue
         if (regtyp == BookLinkAnalyzer.RegionTyp.FIRST and authors):
             blt2 = BookLinkToken.try_parse_author(t, prev_pers_templ)
             if (blt2 is not None and blt2.typ == BookLinkTyp.PERSON):
                 prev_pers_templ = blt2.person_template
                 if (not edits):
                     BookLinkAnalyzer.__add_author(res, blt2)
                 coef += 1
                 t = blt2.end_token
                 continue
             if (blt2 is not None and blt2.typ == BookLinkTyp.ANDOTHERS):
                 if (not edits):
                     res.authors_and_other = True
                 coef += 1
                 t = blt2.end_token
                 continue
             authors = False
         if (blt is None):
             continue
         if (blt.typ == BookLinkTyp.ELECTRONRES
                 or blt.typ == BookLinkTyp.URL):
             is_electr_res = True
             if (blt.typ == BookLinkTyp.ELECTRONRES):
                 coef += 1.5
             else:
                 coef += 0.5
             if (isinstance(blt.ref, UriReferent)):
                 res.add_slot(BookLinkReferent.ATTR_URL,
                              Utils.asObjectOrNull(blt.ref, UriReferent),
                              False, 0)
         elif (blt.typ == BookLinkTyp.YEAR):
             if (res.year == 0):
                 res.year = int(blt.value)
                 coef += 0.5
         elif (blt.typ == BookLinkTyp.DELIMETER):
             coef += 1
             if (blt.length_char == 2):
                 regtyp = BookLinkAnalyzer.RegionTyp.SECOND
             else:
                 regtyp = BookLinkAnalyzer.RegionTyp.FIRST
         elif (
             (((blt.typ == BookLinkTyp.MISC or blt.typ == BookLinkTyp.TYPE
                or blt.typ == BookLinkTyp.PAGES) or blt.typ
               == BookLinkTyp.NAMETAIL or blt.typ == BookLinkTyp.TRANSLATE)
              or blt.typ == BookLinkTyp.PRESS
              or blt.typ == BookLinkTyp.VOLUME)
                 or blt.typ == BookLinkTyp.N):
             coef += 1
         elif (blt.typ == BookLinkTyp.PAGERANGE):
             pages = blt
             coef += 1
             if (is_bracket_regime and blt.end_token.next0_ is not None
                     and blt.end_token.next0_.is_char(')')):
                 coef += (2)
                 if (res.name is not None
                         and res.find_slot(BookLinkReferent.ATTR_AUTHOR,
                                           None, True) is not None):
                     coef = (10)
         elif (blt.typ == BookLinkTyp.GEO
               and ((regtyp == BookLinkAnalyzer.RegionTyp.SECOND
                     or regtyp == BookLinkAnalyzer.RegionTyp.FIRST))):
             coef += 1
         elif (blt.typ == BookLinkTyp.GEO and t.previous is not None
               and t.previous.is_char('.')):
             coef += 1
         elif (blt.typ == BookLinkTyp.ANDOTHERS):
             coef += 1
             if (authors):
                 res.authors_and_other = True
         coef += blt.add_coef
         t = blt.end_token
     if ((coef < 2.5) and num is not None):
         if (BookLinkToken.check_link_before(t0, num)):
             coef += (2)
         elif (BookLinkToken.check_link_after(rt.end_token, num)):
             coef += (1)
     if (rt.length_char > 500):
         return None
     if (is_in_lit):
         coef += 1
     if (coef < 2.5):
         if (is_electr_res and uri is not None):
             pass
         elif (coef >= 2 and is_in_lit):
             pass
         else:
             return None
     for rr in corr_authors:
         pits0 = PersonItemToken.try_attach_list(
             rr.begin_token, None,
             PersonItemToken.ParseAttr.CANINITIALBEDIGIT, 10)
         if (pits0 is None or (len(pits0) < 2)):
             continue
         if (pits0[0].typ == PersonItemToken.ItemType.VALUE):
             exi = False
             for i in range(len(rr.referent.slots) - 1, -1, -1):
                 s = rr.referent.slots[i]
                 if (s.type_name == PersonReferent.ATTR_LASTNAME):
                     ln = Utils.asObjectOrNull(s.value, str)
                     if (ln is None):
                         continue
                     if (ln == pits0[0].value):
                         exi = True
                         continue
                     if (ln.find('-') > 0):
                         ln = ln[0:0 + ln.find('-')]
                     if (pits0[0].begin_token.is_value(ln, None)):
                         del rr.referent.slots[i]
             if (not exi):
                 rr.referent.add_slot(PersonReferent.ATTR_LASTNAME,
                                      pits0[0].value, False, 0)
     rts = list()
     bref = BookLinkRefReferent._new372(res)
     if (num is not None):
         bref.number = num
     rt1 = ReferentToken(bref, t0, rt.end_token)
     if (pages is not None):
         if (pages.value is not None):
             bref.pages = pages.value
         rt.end_token = pages.begin_token.previous
     rts.append(rt1)
     rts.append(rt)
     return rts

예제 #12

파일 보기

파일: PersonHelper.py 프로젝트: pullenti/PullentiPython

 def create_nickname(pr : 'PersonReferent', t : 'Token') -> 'Token':
     has_keyw = False
     is_br = False
     first_pass3367 = True
     while True:
         if first_pass3367: first_pass3367 = False
         else: t = t.next0_
         if (not (t is not None)): break
         if (t.is_hiphen or t.is_comma or t.is_char_of(".:;")): 
             continue
         if (t.morph.class0_.is_preposition): 
             continue
         if (t.is_char('(')): 
             is_br = True
             continue
         if ((t.is_value("ПРОЗВИЩЕ", "ПРІЗВИСЬКО") or t.is_value("КЛИЧКА", None) or t.is_value("ПСЕВДОНИМ", "ПСЕВДОНІМ")) or t.is_value("ПСЕВДО", None) or t.is_value("ПОЗЫВНОЙ", "ПОЗИВНИЙ")): 
             has_keyw = True
             continue
         break
     if (not has_keyw or t is None): 
         return None
     if (BracketHelper.is_bracket(t, True)): 
         br = BracketHelper.try_parse(t, BracketParseAttr.NO, 100)
         if (br is not None): 
             ni = MiscHelper.get_text_value(br.begin_token.next0_, br.end_token.previous, GetTextAttr.NO)
             if (ni is not None): 
                 pr.add_slot(PersonReferent.ATTR_NICKNAME, ni, False, 0)
                 t = br.end_token
                 tt = t.next0_
                 first_pass3368 = True
                 while True:
                     if first_pass3368: first_pass3368 = False
                     else: tt = tt.next0_
                     if (not (tt is not None)): break
                     if (tt.is_comma_and): 
                         continue
                     if (not BracketHelper.is_bracket(tt, True)): 
                         break
                     br = BracketHelper.try_parse(tt, BracketParseAttr.NO, 100)
                     if (br is None): 
                         break
                     ni = MiscHelper.get_text_value(br.begin_token.next0_, br.end_token.previous, GetTextAttr.NO)
                     if (ni is not None): 
                         pr.add_slot(PersonReferent.ATTR_NICKNAME, ni, False, 0)
                     tt = br.end_token
                     t = tt
                 if (is_br and t.next0_ is not None and t.next0_.is_char(')')): 
                     t = t.next0_
                 return t
     else: 
         ret = None
         first_pass3369 = True
         while True:
             if first_pass3369: first_pass3369 = False
             else: t = t.next0_
             if (not (t is not None)): break
             if (t.is_comma_and): 
                 continue
             if (ret is not None and t.chars.is_all_lower): 
                 break
             if (t.whitespaces_before_count > 2): 
                 break
             pli = PersonItemToken.try_attach_list(t, None, PersonItemToken.ParseAttr.NO, 10)
             if (pli is not None and ((len(pli) == 1 or len(pli) == 2))): 
                 ni = MiscHelper.get_text_value(pli[0].begin_token, pli[len(pli) - 1].end_token, GetTextAttr.NO)
                 if (ni is not None): 
                     pr.add_slot(PersonReferent.ATTR_NICKNAME, ni, False, 0)
                     t = pli[len(pli) - 1].end_token
                     if (is_br and t.next0_ is not None and t.next0_.is_char(')')): 
                         t = t.next0_
                     ret = t
                     continue
             if ((isinstance(t, ReferentToken)) and not t.chars.is_all_lower and t.begin_token == t.end_token): 
                 val = MiscHelper.get_text_value_of_meta_token(Utils.asObjectOrNull(t, ReferentToken), GetTextAttr.NO)
                 pr.add_slot(PersonReferent.ATTR_NICKNAME, val, False, 0)
                 if (is_br and t.next0_ is not None and t.next0_.is_char(')')): 
                     t = t.next0_
                 ret = t
                 continue
             break
         return ret
     return None

예제 #13

파일 보기

파일: WeaponAnalyzer.py 프로젝트: pullenti/PullentiPython

 def process(self, kit: 'AnalysisKit') -> None:
     ad = kit.get_analyzer_data(self)
     models = TerminCollection()
     objs_by_model = dict()
     obj_by_names = TerminCollection()
     t = kit.first_token
     first_pass3428 = True
     while True:
         if first_pass3428: first_pass3428 = False
         else: t = t.next0_
         if (not (t is not None)): break
         its = WeaponItemToken.try_parse_list(t, 10)
         if (its is None):
             continue
         rts = self.__try_attach(its, False)
         if (rts is not None):
             for rt in rts:
                 rt.referent = ad.register_referent(rt.referent)
                 kit.embed_token(rt)
                 t = (rt)
                 for s in rt.referent.slots:
                     if (s.type_name == WeaponReferent.ATTR_MODEL):
                         mod = str(s.value)
                         for k in range(2):
                             if (not str.isdigit(mod[0])):
                                 li = []
                                 wrapli2804 = RefOutArgWrapper(None)
                                 inoutres2805 = Utils.tryGetValue(
                                     objs_by_model, mod, wrapli2804)
                                 li = wrapli2804.value
                                 if (not inoutres2805):
                                     li = list()
                                     objs_by_model[mod] = li
                                 if (not rt.referent in li):
                                     li.append(rt.referent)
                                 models.add_string(mod, li, None, False)
                             if (k > 0):
                                 break
                             brand = rt.referent.get_string_value(
                                 WeaponReferent.ATTR_BRAND)
                             if (brand is None):
                                 break
                             mod = "{0} {1}".format(brand, mod)
                     elif (s.type_name == WeaponReferent.ATTR_NAME):
                         obj_by_names.add(
                             Termin._new100(str(s.value), rt.referent))
     if (len(objs_by_model) == 0 and len(obj_by_names.termins) == 0):
         return
     t = kit.first_token
     first_pass3429 = True
     while True:
         if first_pass3429: first_pass3429 = False
         else: t = t.next0_
         if (not (t is not None)): break
         br = BracketHelper.try_parse(t, BracketParseAttr.NO, 10)
         if (br is not None):
             toks = obj_by_names.try_parse(t.next0_, TerminParseAttr.NO)
             if (toks is not None
                     and toks.end_token.next0_ == br.end_token):
                 rt0 = ReferentToken(
                     Utils.asObjectOrNull(toks.termin.tag, Referent),
                     br.begin_token, br.end_token)
                 kit.embed_token(rt0)
                 t = (rt0)
                 continue
         if (not (isinstance(t, TextToken))):
             continue
         if (not t.chars.is_letter):
             continue
         tok = models.try_parse(t, TerminParseAttr.NO)
         if (tok is None):
             if (not t.chars.is_all_lower):
                 tok = obj_by_names.try_parse(t, TerminParseAttr.NO)
             if (tok is None):
                 continue
         if (not tok.is_whitespace_after):
             if (tok.end_token.next0_ is None
                     or not tok.end_token.next0_.is_char_of(",.)")):
                 if (not BracketHelper.is_bracket(tok.end_token.next0_,
                                                  False)):
                     continue
         tr = None
         li = Utils.asObjectOrNull(tok.termin.tag, list)
         if (li is not None and len(li) == 1):
             tr = li[0]
         else:
             tr = (Utils.asObjectOrNull(tok.termin.tag, Referent))
         if (tr is not None):
             tit = WeaponItemToken.try_parse(tok.begin_token.previous, None,
                                             False, True)
             if (tit is not None and tit.typ == WeaponItemToken.Typs.BRAND):
                 tr.add_slot(WeaponReferent.ATTR_BRAND, tit.value, False, 0)
                 tok.begin_token = tit.begin_token
             rt0 = ReferentToken(tr, tok.begin_token, tok.end_token)
             kit.embed_token(rt0)
             t = (rt0)
             continue

예제 #14

파일 보기

 def parse(t : 'Token', max_char : int=0, prev : 'InstrToken'=None) -> 'InstrToken':
     from pullenti.ner.instrument.internal.InstrToken1 import InstrToken1
     is_start_of_line = False
     t00 = t
     if (t is not None): 
         is_start_of_line = t00.is_newline_before
         while t is not None:
             if (t.is_table_control_char and not t.is_char(chr(0x1F))): 
                 if (t.is_newline_after and not is_start_of_line): 
                     is_start_of_line = True
                 t = t.next0_
             else: 
                 break
     if (t is None): 
         return None
     if (t.is_newline_before): 
         is_start_of_line = True
     if (is_start_of_line): 
         if ((t.is_value("СОДЕРЖИМОЕ", "ВМІСТ") or t.is_value("СОДЕРЖАНИЕ", "ЗМІСТ") or t.is_value("ОГЛАВЛЕНИЕ", "ЗМІСТ")) or ((t.is_value("СПИСОК", None) and t.next0_ is not None and t.next0_.is_value("РАЗДЕЛ", None)))): 
             cont = InstrToken1.parse(t, True, None, 0, None, False, 0, False, False)
             if (cont is not None and cont.typ == InstrToken1.Types.INDEX): 
                 return InstrToken(t, cont.end_token)
     t0 = t
     t1 = None
     has_word = False
     first_pass3255 = True
     while True:
         if first_pass3255: first_pass3255 = False
         else: t = t.next0_
         if (not (t is not None)): break
         if (t.is_newline_before and t != t0): 
             break
         if (max_char > 0 and t.begin_char > max_char): 
             break
         if (is_start_of_line and t == t0): 
             if (t.is_value("ГЛАВА", None)): 
                 next0__ = InstrToken.parse(t.next0_, 0, None)
                 if (next0__ is not None and next0__.typ == ILTypes.PERSON): 
                     next0__.begin_token = t
                     return next0__
             tt = None
             if ((isinstance(t.get_referent(), PersonReferent)) or (isinstance(t.get_referent(), PersonPropertyReferent)) or (isinstance(t.get_referent(), InstrumentParticipantReferent))): 
                 return InstrToken.__correct_person(InstrToken._new1511(t00, t, ILTypes.PERSON, t))
             is_ref = False
             if (isinstance(t.get_referent(), PersonPropertyReferent)): 
                 tt = t.next0_
                 is_ref = True
             elif (prev is not None and prev.typ == ILTypes.PERSON): 
                 rt = t.kit.process_referent(PersonAnalyzer.ANALYZER_NAME, t)
                 if (rt is not None): 
                     if (isinstance(rt.referent, PersonReferent)): 
                         return InstrToken._new1512(t00, rt.end_token, ILTypes.PERSON)
                     tt = rt.end_token.next0_
             cou = 0
             t11 = (None if tt is None else tt.previous)
             first_pass3256 = True
             while True:
                 if first_pass3256: first_pass3256 = False
                 else: tt = tt.next0_
                 if (not (tt is not None)): break
                 if (tt.is_table_control_char): 
                     continue
                 re = tt.get_referent()
                 if (isinstance(re, PersonReferent)): 
                     return InstrToken._new1511(t00, tt, ILTypes.PERSON, tt)
                 if (isinstance(re, GeoReferent)): 
                     t11 = tt
                     continue
                 if (re is not None): 
                     break
                 if (DecreeToken.is_keyword(tt, False) is not None): 
                     break
                 if (tt.is_newline_before): 
                     cou += 1
                     if (cou > 4): 
                         break
             if (tt is None and is_ref): 
                 return InstrToken._new1511(t00, Utils.ifNotNull(t11, t), ILTypes.PERSON, t)
         dt = DecreeToken.try_attach(t, None, False)
         if (dt is not None): 
             if (dt.typ == DecreeToken.ItemType.TYP and not t.chars.is_all_lower): 
                 if (t != t0): 
                     break
                 has_verb_ = False
                 tt = dt.end_token
                 while tt is not None: 
                     if (tt.is_newline_before): 
                         break
                     elif ((isinstance(tt, TextToken)) and tt.is_pure_verb): 
                         has_verb_ = True
                         break
                     tt = tt.next0_
                 if (not has_verb_): 
                     res2 = InstrToken._new1515(t0, dt.end_token, ILTypes.TYP, Utils.ifNotNull(dt.full_value, dt.value))
                     if (res2.value == "ДОПОЛНИТЕЛЬНОЕ СОГЛАШЕНИЕ" or res2.value == "ДОДАТКОВА УГОДА"): 
                         if (res2.begin_char > 500 and res2.newlines_before_count > 1): 
                             res2.typ = ILTypes.APPENDIX
                     return res2
             if (dt.typ == DecreeToken.ItemType.NUMBER): 
                 if (t != t0): 
                     break
                 return InstrToken._new1515(t0, dt.end_token, ILTypes.REGNUMBER, dt.value)
             if (dt.typ == DecreeToken.ItemType.ORG): 
                 if (t != t0): 
                     break
                 return InstrToken._new1517(t0, dt.end_token, ILTypes.ORGANIZATION, dt.ref, dt.value)
             if (dt.typ == DecreeToken.ItemType.TERR): 
                 if (t != t0): 
                     break
                 re = InstrToken._new1517(t0, dt.end_token, ILTypes.GEO, dt.ref, dt.value)
                 t1 = re.end_token.next0_
                 if (t1 is not None and t1.is_char(',')): 
                     t1 = t1.next0_
                 if (t1 is not None and t1.is_value("КРЕМЛЬ", None)): 
                     re.end_token = t1
                 elif ((t1 is not None and t1.is_value("ДОМ", "БУДИНОК") and t1.next0_ is not None) and t1.next0_.is_value("СОВЕТ", "РАД")): 
                     re.end_token = t1.next0_
                     if (t1.next0_.next0_ is not None and (isinstance(t1.next0_.next0_.get_referent(), GeoReferent))): 
                         re.end_token = t1.next0_.next0_
                 return re
             if (dt.typ == DecreeToken.ItemType.OWNER): 
                 if (t != t0): 
                     break
                 if (dt.ref is not None and str(dt.ref.referent).startswith("агент")): 
                     dt = (None)
                 if (dt is not None): 
                     res1 = InstrToken._new1517(t0, dt.end_token, ILTypes.PERSON, dt.ref, dt.value)
                     return InstrToken.__correct_person(res1)
         if (BracketHelper.can_be_start_of_sequence(t, False, False)): 
             br = BracketHelper.try_parse(t, BracketParseAttr.NO, 100)
             if (br is not None): 
                 t1 = br.end_token
                 t = t1
                 continue
             if (t.next0_ is not None and BracketHelper.can_be_end_of_sequence(t.next0_, False, None, False)): 
                 t1 = t.next0_
                 t = t1
                 continue
         if (isinstance(t, TextToken)): 
             if (t.is_char('_')): 
                 t1 = t
                 continue
         r = t.get_referent()
         if (isinstance(r, DateReferent)): 
             tt = t
             if (tt.next0_ is not None and tt.next0_.is_char_of(",;")): 
                 tt = tt.next0_
             if (not t.is_newline_before and not tt.is_newline_after): 
                 t1 = tt
                 continue
             if (not has_word): 
                 return InstrToken._new1511(t, tt, ILTypes.DATE, t)
             if (t != t0): 
                 break
         has_word = True
         if (isinstance(r, InstrumentParticipantReferent)): 
             tt = t.begin_token
             first_pass3257 = True
             while True:
                 if first_pass3257: first_pass3257 = False
                 else: tt = tt.next0_
                 if (not (tt is not None and (tt.end_char < t.end_char))): break
                 rr = tt.get_referent()
                 if (rr is None): 
                     continue
                 if ((isinstance(rr, OrganizationReferent)) or (isinstance(rr, BankDataReferent)) or (isinstance(rr, UriReferent))): 
                     r = (None)
                     break
         if ((isinstance(r, PersonReferent)) or (isinstance(r, PersonPropertyReferent)) or (isinstance(r, InstrumentParticipantReferent))): 
             if (t != t0): 
                 break
             if (isinstance(r, InstrumentParticipantReferent)): 
                 pass
             res1 = InstrToken._new1511(t, t, ILTypes.PERSON, t)
             return InstrToken.__correct_person(res1)
         if (isinstance(r, OrganizationReferent)): 
             if (t != t0): 
                 break
             return InstrToken._new1511(t, t, ILTypes.ORGANIZATION, t)
         if (isinstance(r, DecreePartReferent)): 
             dpr = Utils.asObjectOrNull(r, DecreePartReferent)
             if (dpr.appendix is not None): 
                 if (t.is_newline_before or is_start_of_line): 
                     if (t.is_newline_after or t.whitespaces_before_count > 30): 
                         return InstrToken._new1515(t, t, ILTypes.APPENDIX, "ПРИЛОЖЕНИЕ")
                     ok = True
                     tt = t.next0_
                     first_pass3258 = True
                     while True:
                         if first_pass3258: first_pass3258 = False
                         else: tt = tt.next0_
                         if (not (tt is not None)): break
                         if (tt.is_newline_before): 
                             break
                         npt = NounPhraseHelper.try_parse(tt, NounPhraseParseAttr.NO, 0, None)
                         if (npt is not None): 
                             tt = npt.end_token
                             continue
                         ok = False
                         break
                     if (ok): 
                         return InstrToken._new1515(t, t, ILTypes.APPENDIX, "ПРИЛОЖЕНИЕ")
         if ((isinstance(r, DecreeReferent)) and r.kind == DecreeKind.PUBLISHER and t == t0): 
             res1 = InstrToken._new1512(t, t, ILTypes.APPROVED)
             tt = t.next0_
             first_pass3259 = True
             while True:
                 if first_pass3259: first_pass3259 = False
                 else: tt = tt.next0_
                 if (not (tt is not None)): break
                 if (tt.is_char_of(",;")): 
                     continue
                 if ((isinstance(tt.get_referent(), DecreeReferent)) and tt.get_referent().kind == DecreeKind.PUBLISHER): 
                     res1.end_token = t
                 else: 
                     break
             return res1
         if (t.is_value("ЗА", None) and t.next0_ is not None and t.is_newline_before): 
             rr = t.next0_.get_referent()
             if ((isinstance(rr, PersonReferent)) or (isinstance(rr, PersonPropertyReferent)) or (isinstance(rr, InstrumentParticipantReferent))): 
                 if (t != t0): 
                     break
                 res1 = InstrToken._new1511(t, t.next0_, ILTypes.PERSON, t.next0_)
                 t = t.next0_.next0_
                 if ((isinstance(rr, InstrumentParticipantReferent)) and t is not None): 
                     r = t.get_referent()
                     if ((r) is not None): 
                         if ((isinstance(r, PersonReferent)) or (isinstance(r, PersonPropertyReferent))): 
                             res1.end_token = t
                             res1.ref = (t)
                 return res1
         ii = 0
         while ii < len(InstrToken._m_directives): 
             if (t.is_value(InstrToken._m_directives[ii], None)): 
                 if (t.next0_ is not None and t.next0_.is_value("СЛЕДУЮЩЕЕ", "НАСТУПНЕ")): 
                     if (t != t0): 
                         break
                     t11 = t.next0_
                     ok = False
                     if (t11.next0_ is not None and t11.next0_.is_char_of(":.") and t11.next0_.is_newline_after): 
                         ok = True
                         t11 = t11.next0_
                     if (ok): 
                         return InstrToken._new1515(t, t11, ILTypes.DIRECTIVE, InstrToken._m_directives_norm[ii])
                 if (t.is_newline_after or ((t.next0_ is not None and t.next0_.is_char(':') and t.next0_.is_newline_after))): 
                     if (t != t0): 
                         break
                     if (not t.is_newline_before): 
                         if ((InstrToken._m_directives_norm[ii] != "ПРИКАЗ" and InstrToken._m_directives_norm[ii] != "ПОСТАНОВЛЕНИЕ" and InstrToken._m_directives_norm[ii] != "НАКАЗ") and InstrToken._m_directives_norm[ii] != "ПОСТАНОВУ"): 
                             break
                     return InstrToken._new1515(t, (t if t.is_newline_after else t.next0_), ILTypes.DIRECTIVE, InstrToken._m_directives_norm[ii])
                 break
             ii += 1
         if (t.is_newline_before and t.chars.is_letter and t.length_char == 1): 
             for d in InstrToken._m_directives: 
                 t11 = MiscHelper.try_attach_word_by_letters(d, t, True)
                 if (t11 is not None): 
                     if (t11.next0_ is not None and t11.next0_.is_char(':')): 
                         t11 = t11.next0_
                     return InstrToken._new1512(t, t11, ILTypes.DIRECTIVE)
         tte = (t.begin_token if isinstance(t, MetaToken) else t)
         term = (tte.term if isinstance(tte, TextToken) else None)
         if (is_start_of_line and not tte.chars.is_all_lower and t == t0): 
             npt = NounPhraseHelper.try_parse(tte, NounPhraseParseAttr.NO, 0, None)
             if (npt is not None and ((term == "ПРИЛОЖЕНИЯ" or term == "ДОДАТКИ"))): 
                 # if (tte.Next != null && tte.Next.IsChar(':'))
                 npt = (None)
             if (npt is not None and npt.morph.case_.is_nominative and (isinstance(npt.end_token, TextToken))): 
                 term1 = npt.end_token.term
                 if (((term1 == "ПРИЛОЖЕНИЕ" or term1 == "ДОДАТОК" or term1 == "МНЕНИЕ") or term1 == "ДУМКА" or term1 == "АКТ") or term1 == "ФОРМА" or term == "ЗАЯВКА"): 
                     tt1 = npt.end_token.next0_
                     dt1 = DecreeToken.try_attach(tt1, None, False)
                     if (dt1 is not None and dt1.typ == DecreeToken.ItemType.NUMBER): 
                         tt1 = dt1.end_token.next0_
                     elif (isinstance(tt1, NumberToken)): 
                         tt1 = tt1.next0_
                     elif ((isinstance(tt1, TextToken)) and tt1.length_char == 1 and tt1.chars.is_letter): 
                         tt1 = tt1.next0_
                     ok = True
                     if (tt1 is None): 
                         ok = False
                     elif (tt1.is_value("В", "У")): 
                         ok = False
                     elif (tt1.is_value("К", None) and tt1.is_newline_before): 
                         return InstrToken._new1515(t, t, ILTypes.APPENDIX, term1)
                     elif (not tt1.is_newline_before and InstrToken._check_entered(tt1) is not None): 
                         ok = False
                     elif (tt1 == t.next0_ and ((tt1.is_char(':') or ((tt1.is_value("НА", None) and term1 != "ЗАЯВКА"))))): 
                         ok = False
                     if (ok): 
                         br = BracketHelper.try_parse(tt1, BracketParseAttr.NO, 100)
                         if (br is not None): 
                             tt1 = br.end_token.next0_
                             if (br.end_token.next0_ is None or not br.end_token.is_newline_after or br.end_token.next0_.is_char_of(";,")): 
                                 ok = False
                             if (tt1 is not None and tt1.is_value("ПРИЛОЖЕНИЕ", "ДОДАТОК")): 
                                 ok = False
                     if (prev is not None and prev.typ == ILTypes.APPENDIX): 
                         ok = False
                     if (ok): 
                         cou = 0
                         ttt = tte.previous
                         while ttt is not None and (cou < 300): 
                             if (ttt.is_table_control_char): 
                                 if (not ttt.is_char(chr(0x1F))): 
                                     if (ttt == tte.previous and ttt.is_char(chr(0x1E))): 
                                         pass
                                     else: 
                                         ok = False
                                 break
                             ttt = ttt.previous; cou += 1
                     if (ok): 
                         it1 = InstrToken1.parse(t, True, None, 0, None, False, 0, False, False)
                         if (it1 is not None): 
                             if (it1.has_verb): 
                                 ok = False
                     if (ok and t.previous is not None): 
                         ttp = t.previous
                         first_pass3260 = True
                         while True:
                             if first_pass3260: first_pass3260 = False
                             else: ttp = ttp.previous
                             if (not (ttp is not None)): break
                             if (ttp.is_table_control_char and not ttp.is_char(chr(0x1F))): 
                                 continue
                             if (BracketHelper.is_bracket(ttp, False) and not BracketHelper.can_be_end_of_sequence(ttp, False, None, False)): 
                                 continue
                             if (ttp.is_char_of(";:")): 
                                 ok = False
                             break
                     if ((ok and t.previous is not None and (t.newlines_before_count < 3)) and not t.is_newline_after): 
                         lines = 0
                         ttp = t.previous
                         first_pass3261 = True
                         while True:
                             if first_pass3261: first_pass3261 = False
                             else: ttp = ttp.previous
                             if (not (ttp is not None)): break
                             if (not ttp.is_newline_before): 
                                 continue
                             while ttp is not None and (ttp.end_char < t.begin_char): 
                                 if (isinstance(ttp, NumberToken)): 
                                     pass
                                 elif ((isinstance(ttp, TextToken)) and ttp.length_char > 1): 
                                     if (ttp.is_value("ПРИЛОЖЕНИЕ", "ДОДАТОК")): 
                                         ok = False
                                     break
                                 else: 
                                     break
                                 ttp = ttp.next0_
                             lines += 1
                             if (lines > 1): 
                                 break
                     if (ok and ((term1 != "ПРИЛОЖЕНИЕ" and term1 != "ДОДАТОК" and term1 != "МНЕНИЕ"))): 
                         if (t.newlines_before_count < 3): 
                             ok = False
                     if (ok): 
                         return InstrToken._new1515(t, t, ILTypes.APPENDIX, term1)
         app = False
         if ((((term == "ОСОБОЕ" or term == "ОСОБЛИВЕ")) and t.next0_ is not None and t.next0_.is_value("МНЕНИЕ", "ДУМКА")) and t == t0 and is_start_of_line): 
             app = True
         if ((((term == "ДОПОЛНИТЕЛЬНОЕ" or term == "ДОДАТКОВА")) and t.next0_ is not None and t.next0_.is_value("СОГЛАШЕНИЕ", "УГОДА")) and t == t0 and is_start_of_line): 
             app = True
         if (app): 
             tt = t.next0_
             while tt is not None: 
                 if (tt.is_newline_before): 
                     break
                 elif (tt.get_morph_class_in_dictionary() == MorphClass.VERB): 
                     app = False
                     break
                 tt = tt.next0_
             if (app): 
                 return InstrToken._new1512(t, t.next0_, ILTypes.APPENDIX)
         if (not t.chars.is_all_lower and t == t0): 
             tt = InstrToken._check_approved(t)
             if (tt is not None): 
                 if (tt.next0_ is not None and (isinstance(tt.next0_.get_referent(), DecreeReferent))): 
                     return InstrToken._new1511(t, tt, ILTypes.APPROVED, tt.next0_.get_referent())
                 dt1 = DecreeToken.try_attach(tt.next0_, None, False)
                 if (dt1 is not None and dt1.typ == DecreeToken.ItemType.TYP): 
                     return InstrToken._new1512(t, tt, ILTypes.APPROVED)
         t1 = t
         is_start_of_line = False
     if (t1 is None): 
         return None
     res = InstrToken._new1512(t00, t1, ILTypes.UNDEFINED)
     res.no_words = True
     t = t0
     first_pass3262 = True
     while True:
         if first_pass3262: first_pass3262 = False
         else: t = t.next0_
         if (not (t is not None and t.end_char <= t1.end_char)): break
         if (not (isinstance(t, TextToken))): 
             if (isinstance(t, ReferentToken)): 
                 res.no_words = False
             continue
         if (not t.chars.is_letter): 
             continue
         res.no_words = False
         if (t.is_pure_verb): 
             res.has_verb = True
     if (t0.is_value("ВОПРОС", "ПИТАННЯ") and t0.next0_ is not None and t0.next0_.is_char_of(":.")): 
         res.typ = ILTypes.QUESTION
     return res

예제 #15

파일 보기

 def __correct_person(res : 'InstrToken') -> 'InstrToken':
     spec_chars = 0
     if (not res.is_pure_person): 
         res.typ = ILTypes.UNDEFINED
         return res
     t = res.end_token.next0_
     first_pass3254 = True
     while True:
         if first_pass3254: first_pass3254 = False
         else: t = t.next0_
         if (not (t is not None)): break
         if ((isinstance(t, ReferentToken)) and (isinstance(res.ref, ReferentToken))): 
             ok = False
             if (t.get_referent() == res.ref.referent): 
                 ok = True
             ip = Utils.asObjectOrNull(res.ref.referent, InstrumentParticipantReferent)
             if (ip is not None and ip._contains_ref(t.get_referent())): 
                 ok = True
             if (not ok and t.previous is not None and t.previous.is_table_control_char): 
                 if ((isinstance(res.ref.referent, PersonPropertyReferent)) and (isinstance(t.get_referent(), PersonReferent))): 
                     ok = True
                     res.ref = (t)
             if (ok): 
                 res.end_token = t
                 continue
         tok = InstrToken.__m_ontology.try_parse(t, TerminParseAttr.NO)
         if (tok is not None): 
             if ((((tok.termin.canonic_text == "ПОДПИСЬ" or tok.termin.canonic_text == "ПІДПИС")) and t.is_newline_before and t.next0_ is not None) and t.next0_.is_value("СТОРОНА", None)): 
                 break
             t = tok.end_token
             res.end_token = t
             continue
         if (t.is_char(',')): 
             continue
         if (t.is_table_control_char and not t.is_newline_before): 
             continue
         if (t.is_char_of("_/\\")): 
             res.end_token = t
             spec_chars += 1
             continue
         if (t.is_char('(') and t.next0_ is not None): 
             tok = InstrToken.__m_ontology.try_parse(t.next0_, TerminParseAttr.NO)
             if ((tok) is not None): 
                 br = BracketHelper.try_parse(t, BracketParseAttr.NO, 100)
                 if (br is not None): 
                     t = br.end_token
                     res.end_token = t
                     continue
         break
     rt0 = Utils.asObjectOrNull(res.ref, ReferentToken)
     if (rt0 is not None and (isinstance(rt0.referent, InstrumentParticipantReferent))): 
         tt = res.begin_token
         while tt is not None and tt.end_char <= res.end_char: 
             if ((isinstance(tt.get_referent(), PersonReferent)) or (isinstance(tt.get_referent(), PersonPropertyReferent))): 
                 res.ref = (tt)
                 return res
             elif ((isinstance(tt, TextToken)) and tt.is_char_of("_/\\")): 
                 spec_chars += 1
             elif (isinstance(tt, MetaToken)): 
                 ttt = tt.begin_token
                 while ttt is not None and ttt.end_char <= tt.end_char: 
                     if ((isinstance(ttt.get_referent(), PersonReferent)) or (isinstance(ttt.get_referent(), PersonPropertyReferent))): 
                         res.ref = (ttt)
                         return res
                     elif ((isinstance(ttt, TextToken)) and ttt.is_char_of("_/\\")): 
                         spec_chars += 1
                     ttt = ttt.next0_
             tt = tt.next0_
         if (spec_chars < 10): 
             res.typ = ILTypes.UNDEFINED
     return res

예제 #16

파일 보기

 def try_parse(t : 'Token', loc_onto : 'IntOntologyCollection') -> 'NamedItemToken':
     if (t is None): 
         return None
     if (isinstance(t, ReferentToken)): 
         r = t.get_referent()
         if ((r.type_name == "PERSON" or r.type_name == "PERSONPROPERTY" or (isinstance(r, GeoReferent))) or r.type_name == "ORGANIZATION"): 
             return NamedItemToken._new1758(t, t, r, t.morph)
         return None
     typ = NamedItemToken.__m_types.try_parse(t, TerminParseAttr.NO)
     nam = NamedItemToken.__m_names.try_parse(t, TerminParseAttr.NO)
     if (typ is not None): 
         if (not (isinstance(t, TextToken))): 
             return None
         res = NamedItemToken._new1759(typ.begin_token, typ.end_token, typ.morph, typ.chars)
         res.kind = (Utils.valToEnum(typ.termin.tag, NamedEntityKind))
         res.type_value = typ.termin.canonic_text
         if ((nam is not None and nam.end_token == typ.end_token and not t.chars.is_all_lower) and (Utils.valToEnum(nam.termin.tag, NamedEntityKind)) == res.kind): 
             res.name_value = nam.termin.canonic_text
             res.is_wellknown = True
         return res
     if (nam is not None): 
         if (nam.begin_token.chars.is_all_lower): 
             return None
         res = NamedItemToken._new1759(nam.begin_token, nam.end_token, nam.morph, nam.chars)
         res.kind = (Utils.valToEnum(nam.termin.tag, NamedEntityKind))
         res.name_value = nam.termin.canonic_text
         ok = True
         if (not t.is_whitespace_before and t.previous is not None): 
             ok = False
         elif (not t.is_whitespace_after and t.next0_ is not None): 
             if (t.next0_.is_char_of(",.;!?") and t.next0_.is_whitespace_after): 
                 pass
             else: 
                 ok = False
         if (ok): 
             res.is_wellknown = True
             res.type_value = (Utils.asObjectOrNull(nam.termin.tag2, str))
         return res
     adj = MiscLocationHelper.try_attach_nord_west(t)
     if (adj is not None): 
         if (adj.morph.class0_.is_noun): 
             if (adj.end_token.is_value("ВОСТОК", None)): 
                 if (adj.begin_token == adj.end_token): 
                     return None
                 re = NamedItemToken._new1761(t, adj.end_token, adj.morph)
                 re.kind = NamedEntityKind.LOCATION
                 re.name_value = MiscHelper.get_text_value(t, adj.end_token, GetTextAttr.FIRSTNOUNGROUPTONOMINATIVE)
                 re.is_wellknown = True
                 return re
             return None
         if (adj.whitespaces_after_count > 2): 
             return None
         if ((isinstance(adj.end_token.next0_, ReferentToken)) and (isinstance(adj.end_token.next0_.get_referent(), GeoReferent))): 
             re = NamedItemToken._new1761(t, adj.end_token.next0_, adj.end_token.next0_.morph)
             re.kind = NamedEntityKind.LOCATION
             re.name_value = MiscHelper.get_text_value(t, adj.end_token.next0_, GetTextAttr.FIRSTNOUNGROUPTONOMINATIVE)
             re.is_wellknown = True
             re.ref = adj.end_token.next0_.get_referent()
             return re
         res = NamedItemToken.try_parse(adj.end_token.next0_, loc_onto)
         if (res is not None and res.kind == NamedEntityKind.LOCATION): 
             s = adj.get_normal_case_text(MorphClass.ADJECTIVE, MorphNumber.SINGULAR, res.morph.gender, False)
             if (s is not None): 
                 if (res.name_value is None): 
                     res.name_value = s.upper()
                 else: 
                     res.name_value = "{0} {1}".format(s.upper(), res.name_value)
                     res.type_value = (None)
                 res.begin_token = t
                 res.chars = t.chars
                 res.is_wellknown = True
                 return res
     if (t.chars.is_capital_upper and not MiscHelper.can_be_start_of_sentence(t)): 
         npt = NounPhraseHelper.try_parse(t, NounPhraseParseAttr.NO, 0, None)
         if (npt is not None and len(npt.adjectives) > 0): 
             test = NamedItemToken.try_parse(npt.noun.begin_token, loc_onto)
             if (test is not None and test.end_token == npt.end_token and test.type_value is not None): 
                 test.begin_token = t
                 tmp = io.StringIO()
                 for a in npt.adjectives: 
                     s = a.get_normal_case_text(MorphClass.ADJECTIVE, MorphNumber.SINGULAR, test.morph.gender, False)
                     if (tmp.tell() > 0): 
                         print(' ', end="", file=tmp)
                     print(s, end="", file=tmp)
                 test.name_value = Utils.toStringStringIO(tmp)
                 test.chars = t.chars
                 if (test.kind == NamedEntityKind.LOCATION): 
                     test.is_wellknown = True
                 return test
     if ((BracketHelper.is_bracket(t, True) and t.next0_ is not None and t.next0_.chars.is_letter) and not t.next0_.chars.is_all_lower): 
         br = BracketHelper.try_parse(t, BracketParseAttr.NO, 100)
         if (br is not None): 
             res = NamedItemToken(t, br.end_token)
             res.is_in_bracket = True
             res.name_value = MiscHelper.get_text_value(t, br.end_token, GetTextAttr.NO)
             nam = NamedItemToken.__m_names.try_parse(t.next0_, TerminParseAttr.NO)
             if (nam is not None and nam.end_token == br.end_token.previous): 
                 res.kind = (Utils.valToEnum(nam.termin.tag, NamedEntityKind))
                 res.is_wellknown = True
                 res.name_value = nam.termin.canonic_text
             return res
     if (((isinstance(t, TextToken)) and t.chars.is_letter and not t.chars.is_all_lower) and t.length_char > 2): 
         res = NamedItemToken._new1761(t, t, t.morph)
         str0_ = t.term
         if (str0_.endswith("О") or str0_.endswith("И") or str0_.endswith("Ы")): 
             res.name_value = str0_
         else: 
             res.name_value = t.get_normal_case_text(None, MorphNumber.UNDEFINED, MorphGender.UNDEFINED, False)
         res.chars = t.chars
         if (((not t.is_whitespace_after and t.next0_ is not None and t.next0_.is_hiphen) and (isinstance(t.next0_.next0_, TextToken)) and not t.next0_.next0_.is_whitespace_after) and t.chars.is_cyrillic_letter == t.next0_.next0_.chars.is_cyrillic_letter): 
             res.end_token = t.next0_.next0_
             t = res.end_token
             res.name_value = "{0}-{1}".format(res.name_value, t.get_normal_case_text(None, MorphNumber.UNDEFINED, MorphGender.UNDEFINED, False))
         return res
     return None

예제 #17

파일 보기

 def __try_parse_thesis(t: 'Token') -> 'ReferentToken':
     if (t is None):
         return None
     t0 = t
     tt = t
     mc = tt.get_morph_class_in_dictionary()
     preamb = None
     if (mc.is_conjunction):
         return None
     if (t.is_value("LET", None)):
         return None
     if (mc.is_preposition or mc.is_misc or mc.is_adverb):
         if (not MiscHelper.is_eng_article(tt)):
             tt = tt.next0_
             first_pass3131 = True
             while True:
                 if first_pass3131: first_pass3131 = False
                 else: tt = tt.next0_
                 if (not (tt is not None)): break
                 if (tt.is_comma):
                     break
                 if (tt.is_char('(')):
                     br = BracketHelper.try_parse(tt, BracketParseAttr.NO,
                                                  100)
                     if (br is not None):
                         tt = br.end_token
                         continue
                 if (MiscHelper.can_be_start_of_sentence(tt)):
                     break
                 npt0 = NounPhraseHelper.try_parse(
                     tt,
                     Utils.valToEnum(
                         (NounPhraseParseAttr.PARSENUMERICASADJECTIVE) |
                         (NounPhraseParseAttr.REFERENTCANBENOUN),
                         NounPhraseParseAttr), 0, None)
                 if (npt0 is not None):
                     tt = npt0.end_token
                     continue
                 if (tt.get_morph_class_in_dictionary().is_verb):
                     break
             if (tt is None or not tt.is_comma or tt.next0_ is None):
                 return None
             preamb = MetaToken(t0, tt.previous)
             tt = tt.next0_
     t1 = tt
     mc = tt.get_morph_class_in_dictionary()
     npt = NounPhraseHelper.try_parse(
         tt,
         Utils.valToEnum((NounPhraseParseAttr.PARSENUMERICASADJECTIVE) |
                         (NounPhraseParseAttr.REFERENTCANBENOUN) |
                         (NounPhraseParseAttr.PARSEADVERBS),
                         NounPhraseParseAttr), 0, None)
     if (npt is None and (isinstance(tt, TextToken))):
         if (tt.chars.is_all_upper):
             npt = NounPhraseToken(tt, tt)
         elif (not tt.chars.is_all_lower):
             if (mc.is_proper or preamb is not None):
                 npt = NounPhraseToken(tt, tt)
     if (npt is None):
         return None
     if (mc.is_personal_pronoun):
         return None
     t2 = npt.end_token.next0_
     if (t2 is None or MiscHelper.can_be_start_of_sentence(t2)
             or not (isinstance(t2, TextToken))):
         return None
     if (not t2.get_morph_class_in_dictionary().is_verb):
         return None
     t3 = t2
     tt = t2.next0_
     while tt is not None:
         if (not tt.get_morph_class_in_dictionary().is_verb):
             break
         tt = tt.next0_
     first_pass3132 = True
     while True:
         if first_pass3132: first_pass3132 = False
         else: tt = tt.next0_
         if (not (tt is not None)): break
         if (tt.next0_ is None):
             t3 = tt
             break
         if (tt.is_char_of(".;!?")):
             if (MiscHelper.can_be_start_of_sentence(tt.next0_)):
                 t3 = tt
                 break
         if (not (isinstance(tt, TextToken))):
             continue
         if (BracketHelper.can_be_start_of_sequence(tt, False, False)):
             br = BracketHelper.try_parse(tt, BracketParseAttr.NO, 100)
             if (br is not None):
                 tt = br.end_token
                 continue
     tt = t3
     if (t3.is_char_of(";.!?")):
         tt = tt.previous
     txt = MiscHelper.get_text_value(
         t2, tt,
         Utils.valToEnum(
             (GetTextAttr.KEEPREGISTER) | (GetTextAttr.KEEPQUOTES),
             GetTextAttr))
     if (txt is None or (len(txt) < 15)):
         return None
     if (t0 != t1):
         tt = t1.previous
         if (tt.is_comma):
             tt = tt.previous
         txt0 = MiscHelper.get_text_value(
             t0, tt,
             Utils.valToEnum(
                 (GetTextAttr.KEEPREGISTER) | (GetTextAttr.KEEPQUOTES),
                 GetTextAttr))
         if (txt0 is not None and len(txt0) > 10):
             if (t0.chars.is_capital_upper):
                 txt0 = ((str.lower(txt0[0])) + txt0[1:])
             txt = "{0}, {1}".format(txt, txt0)
     tt = t1
     if (MiscHelper.is_eng_article(tt)):
         tt = tt.next0_
     nam = MiscHelper.get_text_value(tt, t2.previous,
                                     GetTextAttr.KEEPQUOTES)
     if (nam.startswith("SO-CALLED")):
         nam = nam[9:].strip()
     dr = DefinitionReferent()
     dr.kind = DefinitionKind.ASSERTATION
     dr.add_slot(DefinitionReferent.ATTR_TERMIN, nam, False, 0)
     dr.add_slot(DefinitionReferent.ATTR_VALUE, txt, False, 0)
     return ReferentToken(dr, t0, t3)

예제 #18

파일 보기

파일: ListHelper.py 프로젝트: pullenti/PullentiPython

 def parse(t : 'Token', max_char : int, prev : 'LineToken') -> 'LineToken':
     from pullenti.morph.LanguageHelper import LanguageHelper
     from pullenti.ner.NumberToken import NumberToken
     from pullenti.ner.TextToken import TextToken
     from pullenti.ner.core.BracketHelper import BracketHelper
     from pullenti.ner.core.BracketParseAttr import BracketParseAttr
     from pullenti.ner.decree.DecreeReferent import DecreeReferent
     if (t is None or t.end_char > max_char): 
         return None
     res = ListHelper.LineToken(t, t)
     first_pass3272 = True
     while True:
         if first_pass3272: first_pass3272 = False
         else: t = t.next0_
         if (not (t is not None and t.end_char <= max_char)): break
         if (t.is_char(':')): 
             if (res.is_newline_before and res.begin_token.is_value("ПРИЛОЖЕНИЕ", "ДОДАТОК")): 
                 res.is_list_head = True
             res.end_token = t
             break
         if (t.is_char(';')): 
             if (not t.is_whitespace_after): 
                 pass
             if (t.previous is not None and (isinstance(t.previous.get_referent(), DecreeReferent))): 
                 if (not t.is_whitespace_after): 
                     continue
                 if (t.next0_ is not None and (isinstance(t.next0_.get_referent(), DecreeReferent))): 
                     continue
             res.is_list_item = True
             res.end_token = t
             break
         if (t.is_char('(')): 
             br = BracketHelper.try_parse(t, BracketParseAttr.NO, 100)
             if (br is not None): 
                 t = br.end_token
                 res.end_token = t
                 continue
         if (t.is_newline_before and t != res.begin_token): 
             next0__ = True
             if (t.previous.is_comma or t.previous.is_and or t.is_char_of("(")): 
                 next0__ = False
             elif (t.chars.is_letter or (isinstance(t, NumberToken))): 
                 if (t.chars.is_all_lower): 
                     next0__ = False
                 elif (t.previous.chars.is_letter): 
                     next0__ = False
             if (next0__): 
                 break
         res.end_token = t
     if (res.begin_token.is_hiphen): 
         res.is_list_item = (res.begin_token.next0_ is not None and not res.begin_token.next0_.is_hiphen)
     elif (res.begin_token.is_char_of("·")): 
         res.is_list_item = True
         res.begin_token = res.begin_token.next0_
     elif (res.begin_token.next0_ is not None and ((res.begin_token.next0_.is_char(')') or ((prev is not None and ((prev.is_list_item or prev.is_list_head))))))): 
         if (res.begin_token.length_char == 1 or (isinstance(res.begin_token, NumberToken))): 
             res.is_list_item = True
             if ((isinstance(res.begin_token, NumberToken)) and res.begin_token.int_value is not None): 
                 res.number = res.begin_token.int_value
             elif ((isinstance(res.begin_token, TextToken)) and res.begin_token.length_char == 1): 
                 te = res.begin_token.term
                 if (LanguageHelper.is_cyrillic_char(te[0])): 
                     res.number = ((ord(te[0])) - (ord('А')))
                 elif (LanguageHelper.is_latin_char(te[0])): 
                     res.number = ((ord(te[0])) - (ord('A')))
     return res

예제 #19

파일 보기

 def __calc_rank_and_value(self, min_newlines_count: int) -> bool:
     self.rank = 0
     if (self.begin_token.chars.is_all_lower):
         self.rank -= 30
     words = 0
     up_words = 0
     notwords = 0
     line_number = 0
     tstart = self.begin_token
     tend = self.end_token
     t = self.begin_token
     first_pass3396 = True
     while True:
         if first_pass3396: first_pass3396 = False
         else: t = t.next0_
         if (not (t != self.end_token.next0_ and t is not None
                  and t.end_char <= self.end_token.end_char)):
             break
         if (t.is_newline_before):
             pass
         tit = TitleItemToken.try_attach(t)
         if (tit is not None):
             if (tit.typ == TitleItemToken.Types.THEME
                     or tit.typ == TitleItemToken.Types.TYPANDTHEME):
                 if (t != self.begin_token):
                     if (line_number > 0):
                         return False
                     notwords = 0
                     up_words = notwords
                     words = up_words
                     tstart = tit.end_token.next0_
                 t = tit.end_token
                 if (t.next0_ is None):
                     return False
                 if (t.next0_.chars.is_letter
                         and t.next0_.chars.is_all_lower):
                     self.rank += 20
                 else:
                     self.rank += 100
                 tstart = t.next0_
                 if (tit.typ == TitleItemToken.Types.TYPANDTHEME):
                     self.type_value = tit.value
                 continue
             if (tit.typ == TitleItemToken.Types.TYP):
                 if (t == self.begin_token):
                     if (tit.end_token.is_newline_after):
                         self.type_value = tit.value
                         self.rank += 5
                         tstart = tit.end_token.next0_
                 t = tit.end_token
                 words += 1
                 if (tit.begin_token != tit.end_token):
                     words += 1
                 if (tit.chars.is_all_upper):
                     up_words += 1
                 continue
             if (tit.typ == TitleItemToken.Types.DUST
                     or tit.typ == TitleItemToken.Types.SPECIALITY):
                 if (t == self.begin_token):
                     return False
                 self.rank -= 20
                 if (tit.typ == TitleItemToken.Types.SPECIALITY):
                     self.speciality = tit.value
                 t = tit.end_token
                 continue
             if (tit.typ == TitleItemToken.Types.CONSULTANT
                     or tit.typ == TitleItemToken.Types.BOSS
                     or tit.typ == TitleItemToken.Types.EDITOR):
                 t = tit.end_token
                 if (t.next0_ is not None and
                     ((t.next0_.is_char_of(":") or t.next0_.is_hiphen
                       or t.whitespaces_after_count > 4))):
                     self.rank -= 10
                 else:
                     self.rank -= 2
                 continue
             return False
         blt = BookLinkToken.try_parse(t, 0)
         if (blt is not None):
             if (blt.typ == BookLinkTyp.MISC or blt.typ == BookLinkTyp.N
                     or blt.typ == BookLinkTyp.PAGES):
                 self.rank -= 10
             elif (blt.typ == BookLinkTyp.N
                   or blt.typ == BookLinkTyp.PAGERANGE):
                 self.rank -= 20
         if (t == self.begin_token and BookLinkToken.try_parse_author(
                 t, FioTemplateType.UNDEFINED) is not None):
             self.rank -= 20
         if (t.is_newline_before and t != self.begin_token):
             line_number += 1
             if (line_number > 4):
                 return False
             if (t.chars.is_all_lower):
                 self.rank += 10
             elif (t.previous.is_char('.')):
                 self.rank -= 10
             elif (t.previous.is_char_of(",-")):
                 self.rank += 10
             else:
                 npt = NounPhraseHelper.try_parse(t.previous,
                                                  NounPhraseParseAttr.NO, 0,
                                                  None)
                 if (npt is not None and npt.end_char >= t.end_char):
                     self.rank += 10
         if (t != self.begin_token
                 and t.newlines_before_count > min_newlines_count):
             self.rank -= (t.newlines_before_count - min_newlines_count)
         bst = BracketHelper.try_parse(t, BracketParseAttr.NO, 100)
         if (bst is not None and bst.is_quote_type
                 and bst.end_token.end_char <= self.end_token.end_char):
             if (words == 0):
                 tstart = bst.begin_token
                 self.rank += 10
                 if (bst.end_token == self.end_token):
                     tend = self.end_token
                     self.rank += 10
         rli = t.get_referents()
         if (rli is not None):
             for r in rli:
                 if (isinstance(r, OrganizationReferent)):
                     if (t.is_newline_before):
                         self.rank -= 10
                     else:
                         self.rank -= 4
                     continue
                 if ((isinstance(r, GeoReferent))
                         or (isinstance(r, PersonReferent))):
                     if (t.is_newline_before):
                         self.rank -= 5
                         if (t.is_newline_after or t.next0_ is None):
                             self.rank -= 20
                         elif (t.next0_.is_hiphen
                               or (isinstance(t.next0_, NumberToken))
                               or (isinstance(t.next0_.get_referent(),
                                              DateReferent))):
                             self.rank -= 20
                         elif (t != self.begin_token):
                             self.rank -= 20
                     continue
                 if ((isinstance(r, GeoReferent))
                         or (isinstance(r, DenominationReferent))):
                     continue
                 if ((isinstance(r, UriReferent))
                         or (isinstance(r, PhoneReferent))):
                     return False
                 if (t.is_newline_before):
                     self.rank -= 4
                 else:
                     self.rank -= 2
                 if (t == self.begin_token and (isinstance(
                         self.end_token.get_referent(), PersonReferent))):
                     self.rank -= 10
             words += 1
             if (t.chars.is_all_upper):
                 up_words += 1
             if (t == self.begin_token):
                 if (t.is_newline_after):
                     self.rank -= 10
                 elif (t.next0_ is not None and t.next0_.is_char('.')
                       and t.next0_.is_newline_after):
                     self.rank -= 10
             continue
         if (isinstance(t, NumberToken)):
             if (t.typ == NumberSpellingType.WORDS):
                 words += 1
                 if (t.chars.is_all_upper):
                     up_words += 1
             else:
                 notwords += 1
             continue
         pat = PersonAttrToken.try_attach(
             t, None, PersonAttrToken.PersonAttrAttachAttrs.NO)
         if (pat is not None):
             if (t.is_newline_before):
                 if (not pat.morph.case_.is_undefined
                         and not pat.morph.case_.is_nominative):
                     pass
                 elif (pat.chars.is_all_upper):
                     pass
                 else:
                     self.rank -= 20
             elif (t.chars.is_all_lower):
                 self.rank -= 1
             while t is not None:
                 words += 1
                 if (t.chars.is_all_upper):
                     up_words += 1
                 if (t == pat.end_token):
                     break
                 t = t.next0_
             continue
         oitt = OrgItemTypeToken.try_attach(t, True, None)
         if (oitt is not None):
             if (oitt.morph.number != MorphNumber.PLURAL
                     and not oitt.is_doubt_root_word):
                 if (not oitt.morph.case_.is_undefined
                         and not oitt.morph.case_.is_nominative):
                     words += 1
                     if (t.chars.is_all_upper):
                         up_words += 1
                 else:
                     self.rank -= 4
                     if (t == self.begin_token):
                         self.rank -= 5
             else:
                 words += 1
                 if (t.chars.is_all_upper):
                     up_words += 1
             t = oitt.end_token
             continue
         tt = Utils.asObjectOrNull(t, TextToken)
         if (tt is not None):
             if (tt.is_char('©')):
                 self.rank -= 10
             if (tt.is_char('_')):
                 self.rank -= 1
             if (tt.chars.is_letter):
                 if (tt.length_char > 2):
                     words += 1
                     if (t.chars.is_all_upper):
                         up_words += 1
             elif (not tt.is_char(',')):
                 notwords += 1
             if (tt.is_pure_verb):
                 self.rank -= 30
                 words -= 1
                 break
             if (tt == self.end_token):
                 if (tt.morph.class0_.is_preposition
                         or tt.morph.class0_.is_conjunction):
                     self.rank -= 10
                 elif (tt.is_char('.')):
                     self.rank += 5
             elif (tt.is_char_of("._")):
                 self.rank -= 5
     self.rank += words
     self.rank -= notwords
     if ((words < 1) and (self.rank < 50)):
         return False
     if (tstart is None or tend is None):
         return False
     if (tstart.end_char > tend.end_char):
         return False
     tit1 = TitleItemToken.try_attach(self.end_token.next0_)
     if (tit1 is not None
             and ((tit1.typ == TitleItemToken.Types.TYP
                   or tit1.typ == TitleItemToken.Types.SPECIALITY))):
         if (tit1.end_token.is_newline_after):
             self.rank += 15
         else:
             self.rank += 10
         if (tit1.typ == TitleItemToken.Types.SPECIALITY):
             self.speciality = tit1.value
     if (up_words > 4 and up_words > (math.floor((0.8 * (words))))):
         if (tstart.previous is not None and
             (isinstance(tstart.previous.get_referent(), PersonReferent))):
             self.rank += (5 + up_words)
     self.begin_name_token = tstart
     self.end_name_token = tend
     return True

예제 #20

파일 보기

 def __try_attach_contract_ground(t : 'Token', ip : 'InstrumentParticipantReferent', can_be_passport : bool=False) -> 'Token':
     ok = False
     first_pass3289 = True
     while True:
         if first_pass3289: first_pass3289 = False
         else: t = t.next0_
         if (not (t is not None)): break
         if (t.is_char(',') or t.morph.class0_.is_preposition): 
             continue
         if (t.is_char('(')): 
             br = BracketHelper.try_parse(t, BracketParseAttr.NO, 100)
             if (br is not None): 
                 t = br.end_token
                 continue
         if (t.is_value("ОСНОВАНИЕ", None) or t.is_value("ДЕЙСТВОВАТЬ", None) or t.is_value("ДЕЙСТВУЮЩИЙ", None)): 
             ok = True
             if (t.next0_ is not None and t.next0_.is_char('(')): 
                 br = BracketHelper.try_parse(t.next0_, BracketParseAttr.NO, 100)
                 if (br is not None and (br.length_char < 10)): 
                     t = br.end_token
             continue
         dr = Utils.asObjectOrNull(t.get_referent(), DecreeReferent)
         if (dr is not None): 
             ip.ground = dr
             return t
         pir = Utils.asObjectOrNull(t.get_referent(), PersonIdentityReferent)
         if (pir is not None and can_be_passport): 
             if (pir.typ is not None and not "паспорт" in pir.typ): 
                 ip.ground = pir
                 return t
         if (t.is_value("УСТАВ", None)): 
             ip.ground = t.get_normal_case_text(MorphClass.NOUN, MorphNumber.SINGULAR, MorphGender.UNDEFINED, False)
             return t
         if (t.is_value("ДОВЕРЕННОСТЬ", None)): 
             dts = DecreeToken.try_attach_list(t.next0_, None, 10, False)
             if (dts is None): 
                 has_spec = False
                 ttt = t.next0_
                 first_pass3290 = True
                 while True:
                     if first_pass3290: first_pass3290 = False
                     else: ttt = ttt.next0_
                     if (not (ttt is not None and ((ttt.end_char - t.end_char) < 200))): break
                     if (ttt.is_comma): 
                         continue
                     if (ttt.is_value("УДОСТОВЕРИТЬ", None) or ttt.is_value("УДОСТОВЕРЯТЬ", None)): 
                         has_spec = True
                         continue
                     dt = DecreeToken.try_attach(ttt, None, False)
                     if (dt is not None): 
                         if (dt.typ == DecreeToken.ItemType.DATE or dt.typ == DecreeToken.ItemType.NUMBER): 
                             dts = DecreeToken.try_attach_list(ttt, None, 10, False)
                             break
                     npt = NounPhraseHelper.try_parse(ttt, NounPhraseParseAttr.NO, 0, None)
                     if (npt is not None): 
                         if (npt.end_token.is_value("НОТАРИУС", None)): 
                             ttt = npt.end_token
                             has_spec = True
                             continue
                     if (ttt.get_referent() is not None): 
                         if (has_spec): 
                             continue
                     break
             if (dts is not None and len(dts) > 0): 
                 t0 = t
                 dr = DecreeReferent()
                 dr.typ = "ДОВЕРЕННОСТЬ"
                 for d in dts: 
                     if (d.typ == DecreeToken.ItemType.DATE): 
                         dr._add_date(d)
                         t = d.end_token
                     elif (d.typ == DecreeToken.ItemType.NUMBER): 
                         dr._add_number(d)
                         t = d.end_token
                     else: 
                         break
                 ad = t.kit.get_analyzer_data_by_analyzer_name(InstrumentAnalyzer.ANALYZER_NAME)
                 ip.ground = ad.register_referent(dr)
                 rt = ReferentToken(Utils.asObjectOrNull(ip.ground, Referent), t0, t)
                 t.kit.embed_token(rt)
                 return rt
             ip.ground = "ДОВЕРЕННОСТЬ"
             return t
         break
     return None

예제 #21

파일 보기

파일: PersonHelper.py 프로젝트: pullenti/PullentiPython

 def __correct_tail_attributes(p : 'PersonReferent', t0 : 'Token') -> 'Token':
     res = t0
     t = t0
     if (t is not None and t.is_char(',')): 
         t = t.next0_
     born = False
     die = False
     if (t is not None and ((t.is_value("РОДИТЬСЯ", "НАРОДИТИСЯ") or t.is_value("BORN", None)))): 
         t = t.next0_
         born = True
     elif (t is not None and ((t.is_value("УМЕРЕТЬ", "ПОМЕРТИ") or t.is_value("СКОНЧАТЬСЯ", None) or t.is_value("DIED", None)))): 
         t = t.next0_
         die = True
     elif ((t is not None and t.is_value("ДАТА", None) and t.next0_ is not None) and t.next0_.is_value("РОЖДЕНИЕ", "НАРОДЖЕННЯ")): 
         t = t.next0_.next0_
         born = True
     while t is not None:
         if (t.morph.class0_.is_preposition or t.is_hiphen or t.is_char(':')): 
             t = t.next0_
         else: 
             break
     if (t is not None and t.get_referent() is not None): 
         r = t.get_referent()
         if (r.type_name == "DATE"): 
             t1 = t
             if (t.next0_ is not None and ((t.next0_.is_value("Р", None) or t.next0_.is_value("РОЖДЕНИЕ", "НАРОДЖЕННЯ")))): 
                 born = True
                 t1 = t.next0_
                 if (t1.next0_ is not None and t1.next0_.is_char('.')): 
                     t1 = t1.next0_
             if (born): 
                 if (p is not None): 
                     p.add_slot(PersonReferent.ATTR_BORN, r, False, 0)
                 res = t1
                 t = t1
             elif (die): 
                 if (p is not None): 
                     p.add_slot(PersonReferent.ATTR_DIE, r, False, 0)
                 res = t1
                 t = t1
     if (die and t is not None): 
         ag = NumberHelper.try_parse_age(t.next0_)
         if (ag is not None): 
             if (p is not None): 
                 p.add_slot(PersonReferent.ATTR_AGE, str(ag.value), False, 0)
             t = ag.end_token.next0_
             res = ag.end_token
     if (t is None): 
         return res
     if (t.is_char('(')): 
         br = BracketHelper.try_parse(t, BracketParseAttr.NO, 100)
         if (br is not None): 
             t1 = t.next0_
             born = False
             if (t1.is_value("РОД", None)): 
                 born = True
                 t1 = t1.next0_
                 if (t1 is not None and t1.is_char('.')): 
                     t1 = t1.next0_
             if (isinstance(t1, ReferentToken)): 
                 r = t1.get_referent()
                 if (r.type_name == "DATERANGE" and t1.next0_ == br.end_token): 
                     bd = Utils.asObjectOrNull(r.get_slot_value("FROM"), Referent)
                     to = Utils.asObjectOrNull(r.get_slot_value("TO"), Referent)
                     if (bd is not None and to is not None): 
                         if (p is not None): 
                             p.add_slot(PersonReferent.ATTR_BORN, bd, False, 0)
                             p.add_slot(PersonReferent.ATTR_DIE, to, False, 0)
                         res = br.end_token
                         t = res
                 elif (r.type_name == "DATE" and t1.next0_ == br.end_token): 
                     if (p is not None): 
                         p.add_slot(PersonReferent.ATTR_BORN, r, False, 0)
                     res = br.end_token
                     t = res
     return res

예제 #22

파일 보기

 def process(self, kit: 'AnalysisKit') -> None:
     ad = kit.get_analyzer_data(self)
     is_lit_block = 0
     refs_by_num = dict()
     rts = []
     t = kit.first_token
     first_pass3022 = True
     while True:
         if first_pass3022: first_pass3022 = False
         else: t = t.next0_
         if (not (t is not None)): break
         if (t.is_char('(')):
             br = BracketHelper.try_parse(t, BracketParseAttr.NO, 100)
             if (br is not None and br.length_char > 70
                     and (br.length_char < 400)):
                 if (br.is_newline_after
                         or ((br.end_token.next0_ is not None
                              and br.end_token.next0_.is_char_of(".;")))):
                     rts = BookLinkAnalyzer.__try_parse(
                         t.next0_, False, br.end_char)
                     if (rts is not None and len(rts) >= 1):
                         if (len(rts) > 1):
                             rts[1].referent = ad.register_referent(
                                 rts[1].referent)
                             kit.embed_token(rts[1])
                             rts[0].referent.book = Utils.asObjectOrNull(
                                 rts[1].referent, BookLinkReferent)
                             if (rts[0].begin_char == rts[1].begin_char):
                                 rts[0].begin_token = rts[1]
                             if (rts[0].end_char == rts[1].end_char):
                                 rts[0].end_token = rts[1]
                         rts[0].begin_token = t
                         rts[0].end_token = br.end_token
                         rts[0].referent.typ = BookLinkRefType.INLINE
                         rts[0].referent = ad.register_referent(
                             rts[0].referent)
                         kit.embed_token(rts[0])
                         t = (rts[0])
                         continue
         if (not t.is_newline_before):
             continue
         if (is_lit_block <= 0):
             tt = BookLinkToken.parse_start_of_lit_block(t)
             if (tt is not None):
                 is_lit_block = 5
                 t = tt
                 continue
         rts = BookLinkAnalyzer.__try_parse(t, is_lit_block > 0, 0)
         if (rts is None or (len(rts) < 1)):
             is_lit_block -= 1
             if (is_lit_block < 0):
                 is_lit_block = 0
             continue
         is_lit_block += 1
         if (is_lit_block > 5):
             is_lit_block = 5
         if (len(rts) > 1):
             rts[1].referent = ad.register_referent(rts[1].referent)
             kit.embed_token(rts[1])
             rts[0].referent.book = Utils.asObjectOrNull(
                 rts[1].referent, BookLinkReferent)
             if (rts[0].begin_char == rts[1].begin_char):
                 rts[0].begin_token = rts[1]
             if (rts[0].end_char == rts[1].end_char):
                 rts[0].end_token = rts[1]
         re = Utils.asObjectOrNull(rts[0].referent, BookLinkRefReferent)
         re = (Utils.asObjectOrNull(ad.register_referent(re),
                                    BookLinkRefReferent))
         rts[0].referent = (re)
         kit.embed_token(rts[0])
         t = (rts[0])
         if (re.number is not None):
             li = []
             wrapli368 = RefOutArgWrapper(None)
             inoutres369 = Utils.tryGetValue(refs_by_num, re.number,
                                             wrapli368)
             li = wrapli368.value
             if (not inoutres369):
                 li = list()
                 refs_by_num[re.number] = li
             li.append(re)
     t = kit.first_token
     first_pass3023 = True
     while True:
         if first_pass3023: first_pass3023 = False
         else: t = t.next0_
         if (not (t is not None)): break
         if (not (isinstance(t, TextToken))):
             continue
         rt = BookLinkAnalyzer.__try_parse_short_inline(t)
         if (rt is None):
             continue
         re = Utils.asObjectOrNull(rt.referent, BookLinkRefReferent)
         li = []
         wrapli370 = RefOutArgWrapper(None)
         inoutres371 = Utils.tryGetValue(refs_by_num,
                                         Utils.ifNotNull(re.number, ""),
                                         wrapli370)
         li = wrapli370.value
         if (not inoutres371):
             continue
         i = 0
         i = 0
         while i < len(li):
             if (t.begin_char < li[i].occurrence[0].begin_char):
                 break
             i += 1
         if (i >= len(li)):
             continue
         re.book = li[i].book
         if (re.pages is None):
             re.pages = li[i].pages
         re.typ = BookLinkRefType.INLINE
         re = (Utils.asObjectOrNull(ad.register_referent(re),
                                    BookLinkRefReferent))
         rt.referent = (re)
         kit.embed_token(rt)
         t = (rt)

예제 #23

파일 보기

 def try_attach_requisites(t : 'Token', cur : 'InstrumentParticipantReferent', other : 'InstrumentParticipantReferent', cant_be_empty : bool=False) -> 'ReferentToken':
     if (t is None or cur is None): 
         return None
     if (t.is_table_control_char): 
         return None
     err = 0
     spec_chars = 0
     rt = None
     t0 = t
     is_in_tab_cell = False
     cou = 0
     tt = t.next0_
     while tt is not None and (cou < 300): 
         if (tt.is_table_control_char): 
             is_in_tab_cell = True
             break
         tt = tt.next0_; cou += 1
     first_pass3286 = True
     while True:
         if first_pass3286: first_pass3286 = False
         else: t = t.next0_
         if (not (t is not None)): break
         if (t.begin_char == 8923): 
             pass
         if (t.is_table_control_char): 
             if (t != t0): 
                 if (rt is not None): 
                     rt.end_token = t.previous
                 elif (not cant_be_empty): 
                     rt = ReferentToken(cur, t0, t.previous)
                 break
             else: 
                 continue
         if ((t.is_char_of(":.") or t.is_value("М", None) or t.is_value("M", None)) or t.is_value("П", None)): 
             if (rt is not None): 
                 rt.end_token = t
             continue
         pp = ParticipantToken.try_attach_to_exist(t, cur, other)
         if (pp is not None): 
             if (pp.referent != cur): 
                 break
             if (rt is None): 
                 rt = ReferentToken(cur, t, t)
             rt.end_token = pp.end_token
             err = 0
             continue
         if (t.is_newline_before): 
             iii = InstrToken.parse(t, 0, None)
             if (iii is not None): 
                 if (iii.typ == ILTypes.APPENDIX): 
                     break
         if (t.whitespaces_before_count > 25 and not is_in_tab_cell): 
             if (t != t0): 
                 if (t.previous is not None and t.previous.is_char_of(",;")): 
                     pass
                 elif (t.newlines_before_count > 1): 
                     break
             if ((isinstance(t.get_referent(), PersonReferent)) or (isinstance(t.get_referent(), OrganizationReferent))): 
                 if (not cur._contains_ref(t.get_referent())): 
                     break
         if ((t.is_char_of(";:,.") or t.is_hiphen or t.morph.class0_.is_preposition) or t.morph.class0_.is_conjunction): 
             continue
         if (t.is_char_of("_/\\")): 
             spec_chars += 1
             if (spec_chars > 10 and rt is None): 
                 rt = ReferentToken(cur, t0, t)
             if (rt is not None): 
                 rt.end_token = t
             continue
         if (t.is_newline_before and (isinstance(t, NumberToken))): 
             break
         if (t.is_value("ОФИС", None)): 
             if (BracketHelper.can_be_start_of_sequence(t.next0_, True, False)): 
                 br = BracketHelper.try_parse(t.next0_, BracketParseAttr.NO, 100)
                 if (br is not None): 
                     t = br.end_token
                     continue
             if ((isinstance(t.next0_, TextToken)) and not t.next0_.chars.is_all_lower): 
                 t = t.next0_
             continue
         r = t.get_referent()
         if ((((isinstance(r, PersonReferent)) or (isinstance(r, AddressReferent)) or (isinstance(r, UriReferent))) or (isinstance(r, OrganizationReferent)) or (isinstance(r, PhoneReferent))) or (isinstance(r, PersonIdentityReferent)) or (isinstance(r, BankDataReferent))): 
             if (other is not None and other.find_slot(None, r, True) is not None): 
                 if (not (isinstance(r, UriReferent))): 
                     break
             if (rt is None): 
                 rt = ReferentToken(cur, t, t)
             if (cur.find_slot(InstrumentParticipantReferent.ATTR_DELEGATE, r, True) is not None): 
                 pass
             else: 
                 cur.add_slot(InstrumentParticipantReferent.ATTR_REF, r, False, 0)
             rt.end_token = t
             err = 0
         else: 
             if ((isinstance(t, TextToken)) and t.length_char > 1): 
                 err += 1
             if (is_in_tab_cell and rt is not None): 
                 if (err > 300): 
                     break
             elif (err > 4): 
                 break
     return rt

예제 #24

파일 보기

파일: TitleItemToken.py 프로젝트: pullenti/PullentiPython

 def try_attach(t: 'Token') -> 'TitleItemToken':
     tt = Utils.asObjectOrNull(t, TextToken)
     if (tt is not None):
         t1 = tt
         if (tt.term == "ТЕМА"):
             tit = TitleItemToken.try_attach(tt.next0_)
             if (tit is not None and tit.typ == TitleItemToken.Types.TYP):
                 t1 = tit.end_token
                 if (t1.next0_ is not None and t1.next0_.is_char(':')):
                     t1 = t1.next0_
                 return TitleItemToken._new2655(
                     t, t1, TitleItemToken.Types.TYPANDTHEME, tit.value)
             if (tt.next0_ is not None and tt.next0_.is_char(':')):
                 t1 = tt.next0_
             return TitleItemToken(tt, t1, TitleItemToken.Types.THEME)
         if (tt.term == "ПО" or tt.term == "НА"):
             if (tt.next0_ is not None
                     and tt.next0_.is_value("ТЕМА", None)):
                 t1 = tt.next0_
                 if (t1.next0_ is not None and t1.next0_.is_char(':')):
                     t1 = t1.next0_
                 return TitleItemToken(tt, t1, TitleItemToken.Types.THEME)
         if (tt.term == "ПЕРЕВОД" or tt.term == "ПЕР"):
             tt2 = tt.next0_
             if (tt2 is not None and tt2.is_char('.')):
                 tt2 = tt2.next0_
             if (isinstance(tt2, TextToken)):
                 if (tt2.term == "C" or tt2.term == "С"):
                     tt2 = tt2.next0_
                     if (isinstance(tt2, TextToken)):
                         return TitleItemToken(
                             t, tt2, TitleItemToken.Types.TRANSLATE)
         if (tt.term == "СЕКЦИЯ" or tt.term == "SECTION"
                 or tt.term == "СЕКЦІЯ"):
             t1 = tt.next0_
             if (t1 is not None and t1.is_char(':')):
                 t1 = t1.next0_
             br = BracketHelper.try_parse(t1, BracketParseAttr.NO, 100)
             if (br is not None):
                 t1 = br.end_token
             elif (t1 != tt.next0_):
                 while t1 is not None:
                     if (t1.is_newline_after):
                         break
                     t1 = t1.next0_
                 if (t1 is None):
                     return None
             if (t1 != tt.next0_):
                 return TitleItemToken(tt, t1, TitleItemToken.Types.DUST)
         t1 = (None)
         if (tt.is_value("СПЕЦИАЛЬНОСТЬ", "СПЕЦІАЛЬНІСТЬ")):
             t1 = tt.next0_
         elif (tt.morph.class0_.is_preposition and tt.next0_ is not None
               and tt.next0_.is_value("СПЕЦИАЛЬНОСТЬ", "СПЕЦІАЛЬНІСТЬ")):
             t1 = tt.next0_.next0_
         elif (tt.is_char('/') and tt.is_newline_before):
             t1 = tt.next0_
         if (t1 is not None):
             if (t1.is_char_of(":") or t1.is_hiphen):
                 t1 = t1.next0_
             spec = TitleItemToken.__try_attach_speciality(t1, True)
             if (spec is not None):
                 spec.begin_token = t
                 return spec
     sss = TitleItemToken.__try_attach_speciality(t, False)
     if (sss is not None):
         return sss
     if (isinstance(t, ReferentToken)):
         return None
     npt = NounPhraseHelper.try_parse(t, NounPhraseParseAttr.NO, 0, None)
     if (npt is not None):
         s = npt.get_normal_case_text(None, MorphNumber.UNDEFINED,
                                      MorphGender.UNDEFINED, False)
         tok = TitleItemToken.M_TERMINS.try_parse(npt.end_token,
                                                  TerminParseAttr.NO)
         if (tok is not None):
             ty = Utils.valToEnum(tok.termin.tag, TitleItemToken.Types)
             if (ty == TitleItemToken.Types.TYP):
                 tit = TitleItemToken.try_attach(tok.end_token.next0_)
                 if (tit is not None
                         and tit.typ == TitleItemToken.Types.THEME):
                     return TitleItemToken._new2655(
                         npt.begin_token, tit.end_token,
                         TitleItemToken.Types.TYPANDTHEME, s)
                 if (s == "РАБОТА" or s == "РОБОТА" or s == "ПРОЕКТ"):
                     return None
                 t1 = tok.end_token
                 if (s == "ДИССЕРТАЦИЯ" or s == "ДИСЕРТАЦІЯ"):
                     err = 0
                     ttt = t1.next0_
                     first_pass3394 = True
                     while True:
                         if first_pass3394: first_pass3394 = False
                         else: ttt = ttt.next0_
                         if (not (ttt is not None)): break
                         if (ttt.morph.class0_.is_preposition):
                             continue
                         if (ttt.is_value("СОИСКАНИЕ", "")):
                             continue
                         npt1 = NounPhraseHelper.try_parse(
                             ttt, NounPhraseParseAttr.NO, 0, None)
                         if (npt1 is not None and npt1.noun.is_value(
                                 "СТЕПЕНЬ", "СТУПІНЬ")):
                             ttt = npt1.end_token
                             t1 = ttt
                             continue
                         rt = t1.kit.process_referent("PERSON", ttt)
                         if (rt is not None and (isinstance(
                                 rt.referent, PersonPropertyReferent))):
                             ppr = Utils.asObjectOrNull(
                                 rt.referent, PersonPropertyReferent)
                             if (ppr.name == "доктор наук"):
                                 t1 = rt.end_token
                                 s = "ДОКТОРСКАЯ ДИССЕРТАЦИЯ"
                                 break
                             elif (ppr.name == "кандидат наук"):
                                 t1 = rt.end_token
                                 s = "КАНДИДАТСКАЯ ДИССЕРТАЦИЯ"
                                 break
                             elif (ppr.name == "магистр"):
                                 t1 = rt.end_token
                                 s = "МАГИСТЕРСКАЯ ДИССЕРТАЦИЯ"
                                 break
                         if (ttt.is_value("ДОКТОР", None)
                                 or ttt.is_value("КАНДИДАТ", None)
                                 or ttt.is_value("МАГИСТР", "МАГІСТР")):
                             t1 = ttt
                             npt1 = NounPhraseHelper.try_parse(
                                 ttt.next0_, NounPhraseParseAttr.NO, 0,
                                 None)
                             if (npt1 is not None
                                     and npt1.end_token.is_value(
                                         "НАУК", None)):
                                 t1 = npt1.end_token
                             s = ("МАГИСТЕРСКАЯ ДИССЕРТАЦИЯ"
                                  if ttt.is_value("МАГИСТР", "МАГІСТР") else
                                  ("ДОКТОРСКАЯ ДИССЕРТАЦИЯ" if ttt.is_value(
                                      "ДОКТОР", None) else
                                   "КАНДИДАТСКАЯ ДИССЕРТАЦИЯ"))
                             break
                         err += 1
                         if (err > 3):
                             break
                 if (t1.next0_ is not None and t1.next0_.is_char('.')):
                     t1 = t1.next0_
                 if (s.endswith("ОТЧЕТ") and t1.next0_ is not None
                         and t1.next0_.is_value("О", None)):
                     npt1 = NounPhraseHelper.try_parse(
                         t1.next0_, NounPhraseParseAttr.PARSEPREPOSITION, 0,
                         None)
                     if (npt1 is not None
                             and npt1.morph.case_.is_prepositional):
                         t1 = npt1.end_token
                 return TitleItemToken._new2655(npt.begin_token, t1, ty, s)
     tok1 = TitleItemToken.M_TERMINS.try_parse(t, TerminParseAttr.NO)
     if (tok1 is not None):
         t1 = tok1.end_token
         re = TitleItemToken(
             tok1.begin_token, t1,
             Utils.valToEnum(tok1.termin.tag, TitleItemToken.Types))
         return re
     if (BracketHelper.can_be_start_of_sequence(t, False, False)):
         tok1 = TitleItemToken.M_TERMINS.try_parse(t.next0_,
                                                   TerminParseAttr.NO)
         if (tok1 is not None and BracketHelper.can_be_end_of_sequence(
                 tok1.end_token.next0_, False, None, False)):
             t1 = tok1.end_token.next0_
             return TitleItemToken(
                 tok1.begin_token, t1,
                 Utils.valToEnum(tok1.termin.tag, TitleItemToken.Types))
     return None

예제 #25

파일 보기

 def try_attach(t : 'Token', p1 : 'InstrumentParticipantReferent'=None, p2 : 'InstrumentParticipantReferent'=None, is_contract : bool=False) -> 'ParticipantToken':
     if (t is None): 
         return None
     tt = t
     br = False
     if (p1 is None and p2 is None and is_contract): 
         r1 = t.get_referent()
         if ((r1 is not None and t.next0_ is not None and t.next0_.is_comma_and) and (isinstance(t.next0_.next0_, ReferentToken))): 
             r2 = t.next0_.next0_.get_referent()
             if (r1.type_name == r2.type_name): 
                 ttt = t.next0_.next0_.next0_
                 refs = list()
                 refs.append(r1)
                 refs.append(r2)
                 first_pass3282 = True
                 while True:
                     if first_pass3282: first_pass3282 = False
                     else: ttt = ttt.next0_
                     if (not (ttt is not None)): break
                     if ((ttt.is_comma_and and ttt.next0_ is not None and ttt.next0_.get_referent() is not None) and ttt.next0_.get_referent().type_name == r1.type_name): 
                         ttt = ttt.next0_
                         if (not ttt.get_referent() in refs): 
                             refs.append(ttt.get_referent())
                         continue
                     break
                 first_pass3283 = True
                 while True:
                     if first_pass3283: first_pass3283 = False
                     else: ttt = ttt.next0_
                     if (not (ttt is not None)): break
                     if (ttt.is_comma or ttt.morph.class0_.is_preposition): 
                         continue
                     if ((ttt.is_value("ИМЕНОВАТЬ", None) or ttt.is_value("ДАЛЬНЕЙШИЙ", None) or ttt.is_value("ДАЛЕЕ", None)) or ttt.is_value("ТЕКСТ", None)): 
                         continue
                     if (ttt.is_value("ДОГОВАРИВАТЬСЯ", None)): 
                         continue
                     npt = NounPhraseHelper.try_parse(ttt, NounPhraseParseAttr.NO, 0, None)
                     if (npt is not None and npt.noun.is_value("СТОРОНА", None) and npt.morph.number != MorphNumber.SINGULAR): 
                         re = ParticipantToken._new1573(t, npt.end_token, ParticipantToken.Kinds.NAMEDASPARTS)
                         re.parts = refs
                         return re
                     break
         if ((isinstance(r1, OrganizationReferent)) or (isinstance(r1, PersonReferent))): 
             has_br = False
             has_named = False
             if (isinstance(r1, PersonReferent)): 
                 if (t.previous is not None and t.previous.is_value("ЛИЦО", None)): 
                     return None
             elif (t.previous is not None and ((t.previous.is_value("ВЫДАВАТЬ", None) or t.previous.is_value("ВЫДАТЬ", None)))): 
                 return None
             ttt = t.begin_token
             while ttt is not None and (ttt.end_char < t.end_char): 
                 if (ttt.is_char('(')): 
                     has_br = True
                 elif ((ttt.is_value("ИМЕНОВАТЬ", None) or ttt.is_value("ДАЛЬНЕЙШИЙ", None) or ttt.is_value("ДАЛЕЕ", None)) or ttt.is_value("ТЕКСТ", None)): 
                     has_named = True
                 elif ((ttt.is_comma or ttt.morph.class0_.is_preposition or ttt.is_hiphen) or ttt.is_char(':')): 
                     pass
                 elif (isinstance(ttt, ReferentToken)): 
                     pass
                 elif (has_br or has_named): 
                     npt = NounPhraseHelper.try_parse(ttt, NounPhraseParseAttr.REFERENTCANBENOUN, 0, None)
                     if (npt is None): 
                         break
                     if (has_br): 
                         if (npt.end_token.next0_ is None or not npt.end_token.next0_.is_char(')')): 
                             break
                     if (not has_named): 
                         if (ParticipantToken.M_ONTOLOGY.try_parse(ttt, TerminParseAttr.NO) is None): 
                             break
                     re = ParticipantToken._new1573(t, t, ParticipantToken.Kinds.NAMEDAS)
                     re.typ = npt.get_normal_case_text(None, MorphNumber.SINGULAR, MorphGender.UNDEFINED, False)
                     re.parts = list()
                     re.parts.append(r1)
                     return re
                 ttt = ttt.next0_
             has_br = False
             has_named = False
             end_side = None
             brr = None
             add_refs = None
             ttt = t.next0_
             first_pass3284 = True
             while True:
                 if first_pass3284: first_pass3284 = False
                 else: ttt = ttt.next0_
                 if (not (ttt is not None)): break
                 if ((isinstance(ttt, NumberToken)) and (isinstance(ttt.next0_, TextToken)) and ttt.next0_.term == "СТОРОНЫ"): 
                     ttt = ttt.next0_
                     end_side = ttt
                     if (ttt.next0_ is not None and ttt.next0_.is_comma): 
                         ttt = ttt.next0_
                     if (ttt.next0_ is not None and ttt.next0_.is_and): 
                         break
                 if (brr is not None and ttt.begin_char > brr.end_char): 
                     brr = (None)
                 if (BracketHelper.can_be_start_of_sequence(ttt, False, False)): 
                     brr = BracketHelper.try_parse(ttt, BracketParseAttr.NO, 100)
                     if (brr is not None and (brr.length_char < 7) and ttt.is_char('(')): 
                         ttt = brr.end_token
                         brr = (None)
                         continue
                 elif ((ttt.is_value("ИМЕНОВАТЬ", None) or ttt.is_value("ДАЛЬНЕЙШИЙ", None) or ttt.is_value("ДАЛЕЕ", None)) or ttt.is_value("ТЕКСТ", None)): 
                     has_named = True
                 elif ((ttt.is_comma or ttt.morph.class0_.is_preposition or ttt.is_hiphen) or ttt.is_char(':')): 
                     pass
                 elif (brr is not None or has_named): 
                     if (BracketHelper.can_be_start_of_sequence(ttt, True, False)): 
                         ttt = ttt.next0_
                     npt = NounPhraseHelper.try_parse(ttt, NounPhraseParseAttr.REFERENTCANBENOUN, 0, None)
                     typ22 = None
                     if (npt is not None): 
                         ttt = npt.end_token
                         if (npt.end_token.is_value("ДОГОВОР", None)): 
                             continue
                     else: 
                         ttok = None
                         if (isinstance(ttt, MetaToken)): 
                             ttok = ParticipantToken.M_ONTOLOGY.try_parse(ttt.begin_token, TerminParseAttr.NO)
                         if (ttok is not None): 
                             typ22 = ttok.termin.canonic_text
                         elif (has_named and ttt.morph.class0_.is_adjective): 
                             typ22 = ttt.get_normal_case_text(MorphClass.ADJECTIVE, MorphNumber.UNDEFINED, MorphGender.UNDEFINED, False)
                         elif (brr is not None): 
                             continue
                         else: 
                             break
                     if (BracketHelper.can_be_end_of_sequence(ttt.next0_, True, None, False)): 
                         ttt = ttt.next0_
                     if (brr is not None): 
                         if (ttt.next0_ is None): 
                             ttt = brr.end_token
                             continue
                         ttt = ttt.next0_
                     if (not has_named and typ22 is None): 
                         if (ParticipantToken.M_ONTOLOGY.try_parse(npt.begin_token, TerminParseAttr.NO) is None): 
                             break
                     re = ParticipantToken._new1573(t, ttt, ParticipantToken.Kinds.NAMEDAS)
                     re.typ = (Utils.ifNotNull(typ22, npt.get_normal_case_text(None, MorphNumber.SINGULAR, MorphGender.UNDEFINED, False)))
                     re.parts = list()
                     re.parts.append(r1)
                     return re
                 elif ((ttt.is_value("ЗАРЕГИСТРИРОВАННЫЙ", None) or ttt.is_value("КАЧЕСТВО", None) or ttt.is_value("ПРОЖИВАЮЩИЙ", None)) or ttt.is_value("ЗАРЕГ", None)): 
                     pass
                 elif (ttt.get_referent() == r1): 
                     pass
                 elif ((isinstance(ttt.get_referent(), PersonIdentityReferent)) or (isinstance(ttt.get_referent(), AddressReferent))): 
                     if (add_refs is None): 
                         add_refs = list()
                     add_refs.append(ttt.get_referent())
                 else: 
                     prr = ttt.kit.process_referent("PERSONPROPERTY", ttt)
                     if (prr is not None): 
                         ttt = prr.end_token
                         continue
                     if (isinstance(ttt.get_referent(), GeoReferent)): 
                         continue
                     npt = NounPhraseHelper.try_parse(ttt, NounPhraseParseAttr.NO, 0, None)
                     if (npt is not None): 
                         if ((npt.noun.is_value("МЕСТО", None) or npt.noun.is_value("ЖИТЕЛЬСТВО", None) or npt.noun.is_value("ПРЕДПРИНИМАТЕЛЬ", None)) or npt.noun.is_value("ПОЛ", None) or npt.noun.is_value("РОЖДЕНИЕ", None)): 
                             ttt = npt.end_token
                             continue
                     if (ttt.is_newline_before): 
                         break
                     if (ttt.length_char < 3): 
                         continue
                     mc = ttt.get_morph_class_in_dictionary()
                     if (mc.is_adverb or mc.is_adjective): 
                         continue
                     if (ttt.chars.is_all_upper): 
                         continue
                     break
             if (end_side is not None or ((add_refs is not None and t.previous is not None and t.previous.is_and))): 
                 re = ParticipantToken._new1573(t, Utils.ifNotNull(end_side, t), ParticipantToken.Kinds.NAMEDAS)
                 re.typ = (None)
                 re.parts = list()
                 re.parts.append(r1)
                 if (add_refs is not None): 
                     re.parts.extend(add_refs)
                 return re
         too = ParticipantToken.M_ONTOLOGY.try_parse(t, TerminParseAttr.NO)
         if (too is not None): 
             if ((isinstance(t.previous, TextToken)) and t.previous.is_value("ЛИЦО", None)): 
                 too = (None)
         if (too is not None and too.termin.tag is not None and too.termin.canonic_text != "СТОРОНА"): 
             tt1 = too.end_token.next0_
             if (tt1 is not None): 
                 if (tt1.is_hiphen or tt1.is_char(':')): 
                     tt1 = tt1.next0_
             if (isinstance(tt1, ReferentToken)): 
                 r1 = tt1.get_referent()
                 if ((isinstance(r1, PersonReferent)) or (isinstance(r1, OrganizationReferent))): 
                     re = ParticipantToken._new1573(t, tt1, ParticipantToken.Kinds.NAMEDAS)
                     re.typ = too.termin.canonic_text
                     re.parts = list()
                     re.parts.append(r1)
                     return re
     add_typ1 = (None if p1 is None else p1.typ)
     add_typ2 = (None if p2 is None else p2.typ)
     if (BracketHelper.can_be_start_of_sequence(tt, False, False) and tt.next0_ is not None): 
         br = True
         tt = tt.next0_
     term1 = None
     term2 = None
     if (add_typ1 is not None and add_typ1.find(' ') > 0 and not add_typ1.startswith("СТОРОНА")): 
         term1 = Termin(add_typ1)
     if (add_typ2 is not None and add_typ2.find(' ') > 0 and not add_typ2.startswith("СТОРОНА")): 
         term2 = Termin(add_typ2)
     named = False
     typ_ = None
     t1 = None
     t0 = tt
     first_pass3285 = True
     while True:
         if first_pass3285: first_pass3285 = False
         else: tt = tt.next0_
         if (not (tt is not None)): break
         if (tt.morph.class0_.is_preposition and typ_ is not None): 
             continue
         if (tt.is_char_of("(:)") or tt.is_hiphen): 
             continue
         if (tt.is_table_control_char): 
             break
         if (tt.is_newline_before and tt != t0): 
             if (isinstance(tt, NumberToken)): 
                 break
             if ((isinstance(tt, TextToken)) and (isinstance(tt.previous, TextToken))): 
                 if (tt.previous.is_value(tt.term, None)): 
                     break
         if (BracketHelper.is_bracket(tt, False)): 
             continue
         tok = (ParticipantToken.M_ONTOLOGY.try_parse(tt, TerminParseAttr.NO) if ParticipantToken.M_ONTOLOGY is not None else None)
         if (tok is not None and (isinstance(tt.previous, TextToken))): 
             if (tt.previous.is_value("ЛИЦО", None)): 
                 return None
         if (tok is None): 
             if (add_typ1 is not None and ((MiscHelper.is_not_more_than_one_error(add_typ1, tt) or (((isinstance(tt, MetaToken)) and tt.begin_token.is_value(add_typ1, None)))))): 
                 if (typ_ is not None): 
                     if (not ParticipantToken.__is_types_equal(add_typ1, typ_)): 
                         break
                 typ_ = add_typ1
                 t1 = tt
                 continue
             if (add_typ2 is not None and ((MiscHelper.is_not_more_than_one_error(add_typ2, tt) or (((isinstance(tt, MetaToken)) and tt.begin_token.is_value(add_typ2, None)))))): 
                 if (typ_ is not None): 
                     if (not ParticipantToken.__is_types_equal(add_typ2, typ_)): 
                         break
                 typ_ = add_typ2
                 t1 = tt
                 continue
             if (tt.chars.is_letter): 
                 if (term1 is not None): 
                     tok1 = term1.try_parse(tt, TerminParseAttr.NO)
                     if (tok1 is not None): 
                         if (typ_ is not None): 
                             if (not ParticipantToken.__is_types_equal(add_typ1, typ_)): 
                                 break
                         typ_ = add_typ1
                         tt = tok1.end_token
                         t1 = tt
                         continue
                 if (term2 is not None): 
                     tok2 = term2.try_parse(tt, TerminParseAttr.NO)
                     if (tok2 is not None): 
                         if (typ_ is not None): 
                             if (not ParticipantToken.__is_types_equal(add_typ2, typ_)): 
                                 break
                         typ_ = add_typ2
                         tt = tok2.end_token
                         t1 = tt
                         continue
                 if (named and tt.get_morph_class_in_dictionary().is_noun): 
                     if (not tt.chars.is_all_lower or BracketHelper.is_bracket(tt.previous, True)): 
                         if (DecreeToken.is_keyword(tt, False) is None): 
                             val = tt.get_normal_case_text(MorphClass.NOUN, MorphNumber.SINGULAR, MorphGender.UNDEFINED, False)
                             if (typ_ is not None): 
                                 if (not ParticipantToken.__is_types_equal(typ_, val)): 
                                     break
                             typ_ = val
                             t1 = tt
                             continue
             if (named and typ_ is None and is_contract): 
                 if ((isinstance(tt, TextToken)) and tt.chars.is_cyrillic_letter and tt.chars.is_capital_upper): 
                     dc = tt.get_morph_class_in_dictionary()
                     if (dc.is_undefined or dc.is_noun): 
                         dt = DecreeToken.try_attach(tt, None, False)
                         ok = True
                         if (dt is not None): 
                             ok = False
                         elif (tt.is_value("СТОРОНА", None)): 
                             ok = False
                         if (ok): 
                             typ_ = tt.lemma
                             t1 = tt
                             continue
                     if (dc.is_adjective): 
                         npt = NounPhraseHelper.try_parse(tt, NounPhraseParseAttr.NO, 0, None)
                         if (npt is not None and len(npt.adjectives) > 0 and npt.noun.get_morph_class_in_dictionary().is_noun): 
                             typ_ = npt.get_normal_case_text(None, MorphNumber.SINGULAR, MorphGender.UNDEFINED, False)
                             t1 = npt.end_token
                             continue
             if (tt == t): 
                 break
             if ((isinstance(tt, NumberToken)) or tt.is_char('.')): 
                 break
             if (tt.length_char < 4): 
                 if (typ_ is not None): 
                     continue
             break
         if (tok.termin.tag is None): 
             named = True
         else: 
             if (typ_ is not None): 
                 break
             if (tok.termin.canonic_text == "СТОРОНА"): 
                 tt1 = tt.next0_
                 if (tt1 is not None and tt1.is_hiphen): 
                     tt1 = tt1.next0_
                 if (not (isinstance(tt1, NumberToken))): 
                     break
                 if (tt1.is_newline_before): 
                     break
                 typ_ = "{0} {1}".format(tok.termin.canonic_text, tt1.value)
                 t1 = tt1
             else: 
                 typ_ = tok.termin.canonic_text
                 t1 = tok.end_token
             break
         tt = tok.end_token
     if (typ_ is None): 
         return None
     if (not named and t1 != t and not typ_.startswith("СТОРОНА")): 
         if (not ParticipantToken.__is_types_equal(typ_, add_typ1) and not ParticipantToken.__is_types_equal(typ_, add_typ2)): 
             return None
     if (BracketHelper.can_be_end_of_sequence(t1.next0_, False, None, False)): 
         t1 = t1.next0_
         if (not t.is_whitespace_before and BracketHelper.can_be_start_of_sequence(t.previous, False, False)): 
             t = t.previous
     elif (BracketHelper.can_be_start_of_sequence(t, False, False) and BracketHelper.can_be_end_of_sequence(t1.next0_, True, t, True)): 
         t1 = t1.next0_
     if (br and t1.next0_ is not None and BracketHelper.can_be_end_of_sequence(t1.next0_, False, None, False)): 
         t1 = t1.next0_
     res = ParticipantToken._new1578(t, t1, (ParticipantToken.Kinds.NAMEDAS if named else ParticipantToken.Kinds.PURE), typ_)
     if (t.is_char(':')): 
         res.begin_token = t.next0_
     return res

예제 #26

파일 보기

 def try_parse_number_with_postfix(t: 'Token') -> 'NumberExToken':
     if (t is None):
         return None
     t0 = t
     is_dollar = None
     if (t.length_char == 1 and t.next0_ is not None):
         is_dollar = NumberHelper._is_money_char(t)
         if ((is_dollar) is not None):
             t = t.next0_
     nt = Utils.asObjectOrNull(t, NumberToken)
     if (nt is None):
         if ((not (isinstance(t.previous, NumberToken)) and t.is_char('(')
              and (isinstance(t.next0_, NumberToken)))
                 and t.next0_.next0_ is not None
                 and t.next0_.next0_.is_char(')')):
             toks1 = NumberExHelper._m_postfixes.try_parse(
                 t.next0_.next0_.next0_, TerminParseAttr.NO)
             if (toks1 is not None
                     and (Utils.valToEnum(toks1.termin.tag, NumberExType))
                     == NumberExType.MONEY):
                 nt0 = Utils.asObjectOrNull(t.next0_, NumberToken)
                 res = NumberExToken._new405(t, toks1.end_token, nt0.value,
                                             nt0.typ, NumberExType.MONEY,
                                             nt0.real_value,
                                             toks1.begin_token.morph)
                 return NumberExHelper.__correct_money(
                     res, toks1.begin_token)
         tt = Utils.asObjectOrNull(t, TextToken)
         if (tt is None or not tt.morph.class0_.is_adjective):
             return None
         val = tt.term
         i = 4
         first_pass3038 = True
         while True:
             if first_pass3038: first_pass3038 = False
             else: i += 1
             if (not (i < (len(val) - 5))): break
             v = val[0:0 + i]
             li = NumberHelper._m_nums.find_termins_by_string(
                 v, tt.morph.language)
             if (li is None):
                 continue
             vv = val[i:]
             lii = NumberExHelper._m_postfixes.find_termins_by_string(
                 vv, tt.morph.language)
             if (lii is not None and len(lii) > 0):
                 re = NumberExToken._new406(
                     t, t, str(li[0].tag), NumberSpellingType.WORDS,
                     Utils.valToEnum(lii[0].tag, NumberExType), t.morph)
                 NumberExHelper.__correct_ext_types(re)
                 return re
             break
         return None
     if (t.next0_ is None and is_dollar is None):
         return None
     f = nt.real_value
     if (math.isnan(f)):
         return None
     t1 = nt.next0_
     if (((t1 is not None and t1.is_char_of(",.")))
             or (((isinstance(t1, NumberToken)) and
                  (t1.whitespaces_before_count < 3)))):
         d = 0
         tt11 = NumberHelper.try_parse_real_number(nt, False, False)
         if (tt11 is not None):
             t1 = tt11.end_token.next0_
             f = tt11.real_value
     if (t1 is None):
         if (is_dollar is None):
             return None
     elif ((t1.next0_ is not None and t1.next0_.is_value("С", "З")
            and t1.next0_.next0_ is not None)
           and t1.next0_.next0_.is_value("ПОЛОВИНА", None)):
         f += 0.5
         t1 = t1.next0_.next0_
     if (t1 is not None and t1.is_hiphen and t1.next0_ is not None):
         t1 = t1.next0_
     det = False
     altf = f
     if (((isinstance(t1, NumberToken)) and t1.previous is not None
          and t1.previous.is_hiphen) and t1.int_value == 0
             and t1.length_char == 2):
         t1 = t1.next0_
     if ((t1 is not None and t1.next0_ is not None and t1.is_char('('))
             and (((isinstance(t1.next0_, NumberToken))
                   or t1.next0_.is_value("НОЛЬ", None)))
             and t1.next0_.next0_ is not None):
         nt1 = Utils.asObjectOrNull(t1.next0_, NumberToken)
         val = 0
         if (nt1 is not None):
             val = nt1.real_value
         if (math.floor(f) == math.floor(val)):
             ttt = t1.next0_.next0_
             if (ttt.is_char(')')):
                 t1 = ttt.next0_
                 det = True
                 if ((isinstance(t1, NumberToken))
                         and t1.int_value is not None
                         and t1.int_value == 0):
                     t1 = t1.next0_
             elif (((((isinstance(ttt, NumberToken)) and
                      (ttt.real_value < 100) and ttt.next0_ is not None)
                     and ttt.next0_.is_char('/')
                     and ttt.next0_.next0_ is not None)
                    and ttt.next0_.next0_.get_source_text() == "100"
                    and ttt.next0_.next0_.next0_ is not None)
                   and ttt.next0_.next0_.next0_.is_char(')')):
                 rest = NumberExHelper.__get_decimal_rest100(f)
                 if (ttt.int_value is not None and rest == ttt.int_value):
                     t1 = ttt.next0_.next0_.next0_.next0_
                     det = True
             elif ((ttt.is_value("ЦЕЛЫХ", None) and
                    (isinstance(ttt.next0_, NumberToken))
                    and ttt.next0_.next0_ is not None)
                   and ttt.next0_.next0_.next0_ is not None
                   and ttt.next0_.next0_.next0_.is_char(')')):
                 num2 = Utils.asObjectOrNull(ttt.next0_, NumberToken)
                 altf = num2.real_value
                 if (ttt.next0_.next0_.is_value("ДЕСЯТЫЙ", None)):
                     altf /= (10)
                 elif (ttt.next0_.next0_.is_value("СОТЫЙ", None)):
                     altf /= (100)
                 elif (ttt.next0_.next0_.is_value("ТЫСЯЧНЫЙ", None)):
                     altf /= (1000)
                 elif (ttt.next0_.next0_.is_value("ДЕСЯТИТЫСЯЧНЫЙ", None)):
                     altf /= (10000)
                 elif (ttt.next0_.next0_.is_value("СТОТЫСЯЧНЫЙ", None)):
                     altf /= (100000)
                 elif (ttt.next0_.next0_.is_value("МИЛЛИОННЫЙ", None)):
                     altf /= (1000000)
                 if (altf < 1):
                     altf += val
                     t1 = ttt.next0_.next0_.next0_.next0_
                     det = True
             else:
                 toks1 = NumberExHelper._m_postfixes.try_parse(
                     ttt, TerminParseAttr.NO)
                 if (toks1 is not None):
                     if ((Utils.valToEnum(
                             toks1.termin.tag,
                             NumberExType)) == NumberExType.MONEY):
                         if (toks1.end_token.next0_ is not None
                                 and toks1.end_token.next0_.is_char(')')):
                             res = NumberExToken._new407(
                                 t, toks1.end_token.next0_, nt.value,
                                 nt.typ, NumberExType.MONEY, f, altf,
                                 toks1.begin_token.morph)
                             return NumberExHelper.__correct_money(
                                 res, toks1.begin_token)
                 res2 = NumberExHelper.try_parse_number_with_postfix(
                     t1.next0_)
                 if (res2 is not None and res2.end_token.next0_ is not None
                         and res2.end_token.next0_.is_char(')')):
                     res2.begin_token = t
                     res2.end_token = res2.end_token.next0_
                     res2.alt_real_value = res2.real_value
                     res2.real_value = f
                     NumberExHelper.__correct_ext_types(res2)
                     if (res2.whitespaces_after_count < 2):
                         toks2 = NumberExHelper._m_postfixes.try_parse(
                             res2.end_token.next0_, TerminParseAttr.NO)
                         if (toks2 is not None):
                             if ((Utils.valToEnum(
                                     toks2.termin.tag,
                                     NumberExType)) == NumberExType.MONEY):
                                 res2.end_token = toks2.end_token
                     return res2
         elif (nt1 is not None and nt1.typ == NumberSpellingType.WORDS
               and nt.typ == NumberSpellingType.DIGIT):
             altf = nt1.real_value
             ttt = t1.next0_.next0_
             if (ttt.is_char(')')):
                 t1 = ttt.next0_
                 det = True
             if (not det):
                 altf = f
     if ((t1 is not None and t1.is_char('(') and t1.next0_ is not None)
             and t1.next0_.is_value("СУММА", None)):
         br = BracketHelper.try_parse(t1, BracketParseAttr.NO, 100)
         if (br is not None):
             t1 = br.end_token.next0_
     if (is_dollar is not None):
         te = None
         if (t1 is not None):
             te = t1.previous
         else:
             t1 = t0
             while t1 is not None:
                 if (t1.next0_ is None):
                     te = t1
                 t1 = t1.next0_
         if (te is None):
             return None
         if (te.is_hiphen and te.next0_ is not None):
             if (te.next0_.is_value("МИЛЛИОННЫЙ", None)):
                 f *= (1000000)
                 altf *= (1000000)
                 te = te.next0_
             elif (te.next0_.is_value("МИЛЛИАРДНЫЙ", None)):
                 f *= (1000000000)
                 altf *= (1000000000)
                 te = te.next0_
         if (not te.is_whitespace_after
                 and (isinstance(te.next0_, TextToken))):
             if (te.next0_.is_value("M", None)):
                 f *= (1000000)
                 altf *= (1000000)
                 te = te.next0_
             elif (te.next0_.is_value("BN", None)):
                 f *= (1000000000)
                 altf *= (1000000000)
                 te = te.next0_
         return NumberExToken._new408(t0, te, "", nt.typ,
                                      NumberExType.MONEY, f, altf,
                                      is_dollar)
     if (t1 is None or ((t1.is_newline_before and not det))):
         return None
     toks = NumberExHelper._m_postfixes.try_parse(t1, TerminParseAttr.NO)
     if ((toks is None and det and (isinstance(t1, NumberToken)))
             and t1.value == "0"):
         toks = NumberExHelper._m_postfixes.try_parse(
             t1.next0_, TerminParseAttr.NO)
     if (toks is None and t1.is_char('р')):
         cou = 10
         ttt = t0.previous
         first_pass3039 = True
         while True:
             if first_pass3039: first_pass3039 = False
             else:
                 ttt = ttt.previous
                 cou -= 1
             if (not (ttt is not None and cou > 0)): break
             if (ttt.is_value("СУММА", None)
                     or ttt.is_value("НАЛИЧНЫЙ", None)
                     or ttt.is_value("БАЛАНС", None)):
                 pass
             elif (ttt.get_referent() is not None
                   and ttt.get_referent().type_name == "MONEY"):
                 pass
             else:
                 continue
             toks = TerminToken._new409(
                 t1, t1,
                 NumberExHelper._m_postfixes.find_termins_by_canonic_text(
                     "RUB")[0])
             if (t1.next0_ is not None and t1.next0_.is_char('.')):
                 toks.end_token = t1.next0_
             ty = Utils.valToEnum(toks.termin.tag, NumberExType)
             return NumberExToken._new410(t, toks.end_token, nt.value,
                                          nt.typ, ty, f, altf,
                                          toks.begin_token.morph, "RUB")
     if (toks is not None):
         t1 = toks.end_token
         if (not t1.is_char('.') and t1.next0_ is not None
                 and t1.next0_.is_char('.')):
             if ((isinstance(t1, TextToken)) and t1.is_value(
                     toks.termin.terms[0].canonical_text, None)):
                 pass
             elif (not t1.chars.is_letter):
                 pass
             else:
                 t1 = t1.next0_
         if (toks.termin.canonic_text == "LTL"):
             return None
         if (toks.begin_token == t1):
             if (t1.morph.class0_.is_preposition
                     or t1.morph.class0_.is_conjunction):
                 if (t1.is_whitespace_before and t1.is_whitespace_after):
                     return None
         ty = Utils.valToEnum(toks.termin.tag, NumberExType)
         res = NumberExToken._new407(t, t1, nt.value, nt.typ, ty, f, altf,
                                     toks.begin_token.morph)
         if (ty != NumberExType.MONEY):
             NumberExHelper.__correct_ext_types(res)
             return res
         return NumberExHelper.__correct_money(res, toks.begin_token)
     pfx = NumberExHelper.__attach_spec_postfix(t1)
     if (pfx is not None):
         pfx.begin_token = t
         pfx.value = nt.value
         pfx.typ = nt.typ
         pfx.real_value = f
         pfx.alt_real_value = altf
         return pfx
     if (t1.next0_ is not None and ((t1.morph.class0_.is_preposition
                                     or t1.morph.class0_.is_conjunction))):
         if (t1.is_value("НА", None)):
             pass
         else:
             nn = NumberExHelper.try_parse_number_with_postfix(t1.next0_)
             if (nn is not None):
                 return NumberExToken._new412(t, t, nt.value, nt.typ,
                                              nn.ex_typ, f, altf,
                                              nn.ex_typ2, nn.ex_typ_param)
     if (not t1.is_whitespace_after and (isinstance(t1.next0_, NumberToken))
             and (isinstance(t1, TextToken))):
         term = t1.term
         ty = NumberExType.UNDEFINED
         if (term == "СМХ" or term == "CMX"):
             ty = NumberExType.SANTIMETER
         elif (term == "MX" or term == "МХ"):
             ty = NumberExType.METER
         elif (term == "MMX" or term == "ММХ"):
             ty = NumberExType.MILLIMETER
         if (ty != NumberExType.UNDEFINED):
             return NumberExToken._new413(t, t1, nt.value, nt.typ, ty, f,
                                          altf, True)
     return None

예제 #27

파일 보기

파일: ProperNameHelper.py 프로젝트: pullenti/PullentiPython

 def get_name_ex(begin: 'Token',
                 end: 'Token',
                 cla: 'MorphClass',
                 mc: 'MorphCase',
                 gender: 'MorphGender' = MorphGender.UNDEFINED,
                 ignore_brackets_and_hiphens: bool = False,
                 ignore_geo_referent: bool = False) -> str:
     if (end is None or begin is None):
         return None
     if (begin.end_char > end.begin_char and begin != end):
         return None
     res = io.StringIO()
     prefix = None
     t = begin
     first_pass3064 = True
     while True:
         if first_pass3064: first_pass3064 = False
         else: t = t.next0_
         if (not (t is not None and t.end_char <= end.end_char)): break
         if (res.tell() > 1000):
             break
         if (t.is_table_control_char):
             continue
         if (ignore_brackets_and_hiphens):
             if (BracketHelper.is_bracket(t, False)):
                 if (t == end):
                     break
                 if (t.is_char_of("(<[")):
                     br = BracketHelper.try_parse(t, BracketParseAttr.NO,
                                                  100)
                     if (br is not None and br.end_char <= end.end_char):
                         tmp = ProperNameHelper.get_name_ex(
                             br.begin_token.next0_, br.end_token.previous,
                             MorphClass.UNDEFINED, MorphCase.UNDEFINED,
                             MorphGender.UNDEFINED,
                             ignore_brackets_and_hiphens, False)
                         if (tmp is not None):
                             if ((br.end_char == end.end_char
                                  and br.begin_token.next0_
                                  == br.end_token.previous and
                                  not br.begin_token.next0_.chars.is_letter)
                                     and not (isinstance(
                                         br.begin_token.next0_,
                                         ReferentToken))):
                                 pass
                             else:
                                 print(" {0}{1}{2}".format(
                                     t.get_source_text(), tmp,
                                     br.end_token.get_source_text()),
                                       end="",
                                       file=res,
                                       flush=True)
                         t = br.end_token
                 continue
             if (t.is_hiphen):
                 if (t == end):
                     break
                 elif (t.is_whitespace_before or t.is_whitespace_after):
                     continue
         tt = Utils.asObjectOrNull(t, TextToken)
         if (tt is not None):
             if (not ignore_brackets_and_hiphens):
                 if ((tt.next0_ is not None and tt.next0_.is_hiphen and
                      (isinstance(tt.next0_.next0_, TextToken)))
                         and tt != end and tt.next0_ != end):
                     if (prefix is None):
                         prefix = tt.term
                     else:
                         prefix = "{0}-{1}".format(prefix, tt.term)
                     t = tt.next0_
                     if (t == end):
                         break
                     else:
                         continue
             s = None
             if (cla.value != (0) or not mc.is_undefined
                     or gender != MorphGender.UNDEFINED):
                 for wff in tt.morph.items:
                     wf = Utils.asObjectOrNull(wff, MorphWordForm)
                     if (wf is None):
                         continue
                     if (cla.value != (0)):
                         if ((((wf.class0_.value) & (cla.value))) == 0):
                             continue
                     if (not mc.is_undefined):
                         if (((wf.case_) & mc).is_undefined):
                             continue
                     if (gender != MorphGender.UNDEFINED):
                         if (((wf.gender) &
                              (gender)) == (MorphGender.UNDEFINED)):
                             continue
                     if (s is None or wf.normal_case == tt.term):
                         s = wf.normal_case
                 if (s is None and gender != MorphGender.UNDEFINED):
                     for wff in tt.morph.items:
                         wf = Utils.asObjectOrNull(wff, MorphWordForm)
                         if (wf is None):
                             continue
                         if (cla.value != (0)):
                             if ((((wf.class0_.value) & (cla.value))) == 0):
                                 continue
                         if (not mc.is_undefined):
                             if (((wf.case_) & mc).is_undefined):
                                 continue
                         if (s is None or wf.normal_case == tt.term):
                             s = wf.normal_case
             if (s is None):
                 s = tt.term
                 if (tt.chars.is_last_lower and tt.length_char > 2):
                     s = tt.get_source_text()
                     for i in range(len(s) - 1, -1, -1):
                         if (str.isupper(s[i])):
                             s = s[0:0 + i + 1]
                             break
             if (prefix is not None):
                 delim = "-"
                 if (ignore_brackets_and_hiphens):
                     delim = " "
                 s = "{0}{1}{2}".format(prefix, delim, s)
             prefix = (None)
             if (res.tell() > 0 and len(s) > 0):
                 if (str.isalnum(s[0])):
                     ch0 = Utils.getCharAtStringIO(res, res.tell() - 1)
                     if (ch0 == '-'):
                         pass
                     else:
                         print(' ', end="", file=res)
                 elif (not ignore_brackets_and_hiphens
                       and BracketHelper.can_be_start_of_sequence(
                           tt, False, False)):
                     print(' ', end="", file=res)
             print(s, end="", file=res)
         elif (isinstance(t, NumberToken)):
             if (res.tell() > 0):
                 if (not t.is_whitespace_before and Utils.getCharAtStringIO(
                         res,
                         res.tell() - 1) == '-'):
                     pass
                 else:
                     print(' ', end="", file=res)
             nt = Utils.asObjectOrNull(t, NumberToken)
             if ((t.morph.class0_.is_adjective
                  and nt.typ == NumberSpellingType.WORDS
                  and nt.begin_token == nt.end_token)
                     and (isinstance(nt.begin_token, TextToken))):
                 print(nt.begin_token.term, end="", file=res)
             else:
                 print(nt.value, end="", file=res)
         elif (isinstance(t, MetaToken)):
             if ((ignore_geo_referent and t != begin
                  and t.get_referent() is not None)
                     and t.get_referent().type_name == "GEO"):
                 continue
             s = ProperNameHelper.get_name_ex(t.begin_token, t.end_token,
                                              cla, mc, gender,
                                              ignore_brackets_and_hiphens,
                                              ignore_geo_referent)
             if (not Utils.isNullOrEmpty(s)):
                 if (res.tell() > 0):
                     if (not t.is_whitespace_before
                             and Utils.getCharAtStringIO(
                                 res,
                                 res.tell() - 1) == '-'):
                         pass
                     else:
                         print(' ', end="", file=res)
                 print(s, end="", file=res)
         if (t == end):
             break
     if (res.tell() == 0):
         return None
     return Utils.toStringStringIO(res)

예제 #28

파일 보기

 def try_parse(t : 'Token', add_units : 'TerminCollection', can_be_set : bool=True, can_units_absent : bool=False, is_resctriction : bool=False, is_subval : bool=False) -> 'MeasureToken':
     if (not (isinstance(t, TextToken))): 
         return None
     if (t.is_table_control_char): 
         return None
     t0 = t
     whd = None
     minmax = 0
     wrapminmax1625 = RefOutArgWrapper(minmax)
     tt = NumbersWithUnitToken._is_min_or_max(t0, wrapminmax1625)
     minmax = wrapminmax1625.value
     if (tt is not None): 
         t = tt.next0_
     npt = NounPhraseHelper.try_parse(t, Utils.valToEnum((NounPhraseParseAttr.PARSEPREPOSITION) | (NounPhraseParseAttr.IGNOREBRACKETS), NounPhraseParseAttr), 0, None)
     if (npt is None): 
         whd = NumbersWithUnitToken._try_parsewhl(t)
         if (whd is not None): 
             npt = NounPhraseToken(t0, whd.end_token)
         elif (t0.is_value("КПД", None)): 
             npt = NounPhraseToken(t0, t0)
         elif ((isinstance(t0, TextToken)) and t0.length_char > 3 and t0.get_morph_class_in_dictionary().is_undefined): 
             npt = NounPhraseToken(t0, t0)
         elif (t0.is_value("T", None) and t0.chars.is_all_lower): 
             npt = NounPhraseToken(t0, t0)
             t = t0
             if (t.next0_ is not None and t.next0_.is_char('=')): 
                 npt.end_token = t.next0_
         elif ((isinstance(t0, TextToken)) and t0.chars.is_letter and is_subval): 
             if (NumbersWithUnitToken.try_parse(t, add_units, False, False, False, False) is not None): 
                 return None
             npt = NounPhraseToken(t0, t0)
             t = t0.next0_
             while t is not None: 
                 if (t.whitespaces_before_count > 2): 
                     break
                 elif (not (isinstance(t, TextToken))): 
                     break
                 elif (not t.chars.is_letter): 
                     br = BracketHelper.try_parse(t, BracketParseAttr.NO, 100)
                     if (br is not None): 
                         t = br.end_token
                         npt.end_token = t
                     else: 
                         break
                 elif (NumbersWithUnitToken.try_parse(t, add_units, False, False, False, False) is not None): 
                     break
                 else: 
                     npt.end_token = t
                 t = t.next0_
         else: 
             return None
     elif (NumberHelper.try_parse_real_number(t, True, False) is not None): 
         return None
     else: 
         dtok = DateItemToken.try_attach(t, None, False)
         if (dtok is not None): 
             return None
     t1 = npt.end_token
     t = npt.end_token
     name_ = MetaToken._new509(npt.begin_token, npt.end_token, npt.morph)
     units = None
     units2 = None
     internals_ = list()
     not0_ = False
     tt = t1.next0_
     first_pass3305 = True
     while True:
         if first_pass3305: first_pass3305 = False
         else: tt = tt.next0_
         if (not (tt is not None)): break
         if (tt.is_newline_before): 
             break
         if (tt.is_table_control_char): 
             break
         wrapminmax1617 = RefOutArgWrapper(minmax)
         tt2 = NumbersWithUnitToken._is_min_or_max(tt, wrapminmax1617)
         minmax = wrapminmax1617.value
         if (tt2 is not None): 
             tt = tt2
             t = tt
             t1 = t
             continue
         if ((tt.is_value("БЫТЬ", None) or tt.is_value("ДОЛЖЕН", None) or tt.is_value("ДОЛЖНЫЙ", None)) or tt.is_value("МОЖЕТ", None) or ((tt.is_value("СОСТАВЛЯТЬ", None) and not tt.get_morph_class_in_dictionary().is_adjective))): 
             t = tt
             t1 = t
             if (tt.previous.is_value("НЕ", None)): 
                 not0_ = True
             continue
         www = NumbersWithUnitToken._try_parsewhl(tt)
         if (www is not None): 
             whd = www
             tt = www.end_token
             t = tt
             t1 = t
             continue
         if (tt.is_value("ПРИ", None)): 
             mt1 = MeasureToken.try_parse(tt.next0_, add_units, False, False, True, False)
             if (mt1 is not None): 
                 internals_.append(mt1)
                 tt = mt1.end_token
                 t = tt
                 t1 = t
                 continue
             n1 = NumbersWithUnitToken.try_parse(tt.next0_, add_units, False, False, False, False)
             if (n1 is not None and len(n1.units) > 0): 
                 mt1 = MeasureToken._new1612(n1.begin_token, n1.end_token, n1)
                 internals_.append(mt1)
                 tt = mt1.end_token
                 t = tt
                 t1 = t
                 continue
         if (tt.is_value("ПО", None) and tt.next0_ is not None and tt.next0_.is_value("U", None)): 
             tt = tt.next0_
             t = tt
             t1 = t
             continue
         if (len(internals_) > 0): 
             if (tt.is_char(':')): 
                 break
             mt1 = MeasureToken.try_parse(tt.next0_, add_units, False, False, True, False)
             if (mt1 is not None and mt1.reliable): 
                 internals_.append(mt1)
                 tt = mt1.end_token
                 t = tt
                 t1 = t
                 continue
         if ((isinstance(tt, NumberToken)) and tt.typ == NumberSpellingType.WORDS): 
             npt3 = NounPhraseHelper.try_parse(tt, NounPhraseParseAttr.PARSENUMERICASADJECTIVE, 0, None)
             if (npt3 is not None): 
                 tt = npt3.end_token
                 t1 = tt
                 if (len(internals_) == 0): 
                     name_.end_token = t1
                 continue
         if (((tt.is_hiphen and not tt.is_whitespace_before and not tt.is_whitespace_after) and (isinstance(tt.next0_, NumberToken)) and (isinstance(tt.previous, TextToken))) and tt.previous.chars.is_all_upper): 
             t = tt.next0_
             tt = t
             t1 = tt
             if (len(internals_) == 0): 
                 name_.end_token = t1
             continue
         if (((isinstance(tt, NumberToken)) and not tt.is_whitespace_before and (isinstance(tt.previous, TextToken))) and tt.previous.chars.is_all_upper): 
             t = tt
             t1 = t
             if (len(internals_) == 0): 
                 name_.end_token = t1
             continue
         if ((((isinstance(tt, NumberToken)) and not tt.is_whitespace_after and tt.next0_.is_hiphen) and not tt.next0_.is_whitespace_after and (isinstance(tt.next0_.next0_, TextToken))) and tt.next0_.next0_.length_char > 2): 
             tt = tt.next0_.next0_
             t = tt
             t1 = t
             npt1 = NounPhraseHelper.try_parse(tt, NounPhraseParseAttr.NO, 0, None)
             if (npt1 is not None and npt1.end_char > tt.end_char): 
                 tt = npt1.end_token
                 t = tt
                 t1 = t
             if (len(internals_) == 0): 
                 name_.end_token = t1
             continue
         if ((isinstance(tt, NumberToken)) and tt.previous is not None): 
             if (tt.previous.is_value("USB", None)): 
                 t = tt
                 t1 = t
                 if (len(internals_) == 0): 
                     name_.end_token = t1
                 ttt = tt.next0_
                 while ttt is not None: 
                     if (ttt.is_whitespace_before): 
                         break
                     if (ttt.is_char_of(",:")): 
                         break
                     tt = ttt
                     t = tt
                     t1 = t
                     if (len(internals_) == 0): 
                         name_.end_token = t1
                     ttt = ttt.next0_
                 continue
         mt0 = NumbersWithUnitToken.try_parse(tt, add_units, False, False, False, False)
         if (mt0 is not None): 
             npt1 = NounPhraseHelper.try_parse(tt, Utils.valToEnum((NounPhraseParseAttr.PARSENUMERICASADJECTIVE) | (NounPhraseParseAttr.PARSEPREPOSITION), NounPhraseParseAttr), 0, None)
             if (npt1 is not None and npt1.end_char > mt0.end_char): 
                 tt = npt1.end_token
                 t = tt
                 t1 = t
                 if (len(internals_) == 0): 
                     name_.end_token = t1
                 continue
             break
         if (((tt.is_comma or tt.is_char('('))) and tt.next0_ is not None): 
             www = NumbersWithUnitToken._try_parsewhl(tt.next0_)
             if (www is not None): 
                 whd = www
                 tt = www.end_token
                 t = tt
                 t1 = t
                 if (tt.next0_ is not None and tt.next0_.is_comma): 
                     tt = tt.next0_
                     t1 = tt
                 if (tt.next0_ is not None and tt.next0_.is_char(')')): 
                     tt = tt.next0_
                     t1 = tt
                     continue
             uu = UnitToken.try_parse_list(tt.next0_, add_units, False)
             if (uu is not None): 
                 t = uu[len(uu) - 1].end_token
                 t1 = t
                 units = uu
                 if (tt.is_char('(') and t1.next0_ is not None and t1.next0_.is_char(')')): 
                     tt = t1.next0_
                     t = tt
                     t1 = t
                     continue
                 elif (t1.next0_ is not None and t1.next0_.is_char('(')): 
                     uu = UnitToken.try_parse_list(t1.next0_.next0_, add_units, False)
                     if (uu is not None and uu[len(uu) - 1].end_token.next0_ is not None and uu[len(uu) - 1].end_token.next0_.is_char(')')): 
                         units2 = uu
                         tt = uu[len(uu) - 1].end_token.next0_
                         t = tt
                         t1 = t
                         continue
                     www = NumbersWithUnitToken._try_parsewhl(t1.next0_)
                     if (www is not None): 
                         whd = www
                         tt = www.end_token
                         t = tt
                         t1 = t
                         continue
                 if (uu is not None and len(uu) > 0 and not uu[0].is_doubt): 
                     break
                 if (t1.next0_ is not None): 
                     if (t1.next0_.is_table_control_char or t1.is_newline_after): 
                         break
                 units = (None)
         if (BracketHelper.can_be_start_of_sequence(tt, False, False) and not (isinstance(tt.next0_, NumberToken))): 
             br = BracketHelper.try_parse(tt, BracketParseAttr.NO, 100)
             if (br is not None): 
                 tt = br.end_token
                 t = tt
                 t1 = t
                 continue
         if (tt.is_value("НЕ", None) and tt.next0_ is not None): 
             mc = tt.next0_.get_morph_class_in_dictionary()
             if (mc.is_adverb or mc.is_misc): 
                 break
             continue
         if (tt.is_value("ЯМЗ", None)): 
             pass
         npt2 = NounPhraseHelper.try_parse(tt, Utils.valToEnum((NounPhraseParseAttr.PARSEPREPOSITION) | (NounPhraseParseAttr.IGNOREBRACKETS) | (NounPhraseParseAttr.PARSEPRONOUNS), NounPhraseParseAttr), 0, None)
         if (npt2 is None): 
             if (tt.morph.class0_.is_preposition or tt.morph.class0_.is_conjunction): 
                 to = NumbersWithUnitToken.M_TERMINS.try_parse(tt, TerminParseAttr.NO)
                 if (to is not None): 
                     if ((isinstance(to.end_token.next0_, TextToken)) and to.end_token.next0_.is_letters): 
                         pass
                     else: 
                         break
                 t1 = tt
                 continue
             mc = tt.get_morph_class_in_dictionary()
             if (((isinstance(tt, TextToken)) and tt.chars.is_letter and tt.length_char > 1) and (((tt.chars.is_all_upper or mc.is_adverb or mc.is_undefined) or mc.is_adjective))): 
                 uu = UnitToken.try_parse_list(tt, add_units, False)
                 if (uu is not None): 
                     if (uu[0].length_char > 1 or len(uu) > 1): 
                         units = uu
                         t = uu[len(uu) - 1].end_token
                         t1 = t
                         break
                 t = tt
                 t1 = t
                 if (len(internals_) == 0): 
                     name_.end_token = tt
                 continue
             if (tt.is_comma): 
                 continue
             if (tt.is_char('.')): 
                 if (not MiscHelper.can_be_start_of_sentence(tt.next0_)): 
                     continue
                 uu = UnitToken.try_parse_list(tt.next0_, add_units, False)
                 if (uu is not None): 
                     if (uu[0].length_char > 2 or len(uu) > 1): 
                         units = uu
                         t = uu[len(uu) - 1].end_token
                         t1 = t
                         break
             break
         tt = npt2.end_token
         t = tt
         t1 = t
         if (len(internals_) > 0): 
             pass
         elif (t.is_value("ПРЕДЕЛ", None) or t.is_value("ГРАНИЦА", None) or t.is_value("ДИАПАЗОН", None)): 
             pass
         elif (t.chars.is_letter): 
             name_.end_token = t1
     t11 = t1
     t1 = t1.next0_
     first_pass3306 = True
     while True:
         if first_pass3306: first_pass3306 = False
         else: t1 = t1.next0_
         if (not (t1 is not None)): break
         if (t1.is_table_control_char): 
             pass
         elif (t1.is_char_of(":,_")): 
             if (is_resctriction): 
                 return None
             www = NumbersWithUnitToken._try_parsewhl(t1.next0_)
             if (www is not None): 
                 whd = www
                 t = www.end_token
                 t1 = t
                 continue
             uu = UnitToken.try_parse_list(t1.next0_, add_units, False)
             if (uu is not None): 
                 if (uu[0].length_char > 1 or len(uu) > 1): 
                     units = uu
                     t = uu[len(uu) - 1].end_token
                     t1 = t
                     continue
             if (t1.is_char(':')): 
                 li = list()
                 ttt = t1.next0_
                 first_pass3307 = True
                 while True:
                     if first_pass3307: first_pass3307 = False
                     else: ttt = ttt.next0_
                     if (not (ttt is not None)): break
                     if (ttt.is_hiphen or ttt.is_table_control_char): 
                         continue
                     if ((isinstance(ttt, TextToken)) and not ttt.chars.is_letter): 
                         continue
                     mt1 = MeasureToken.try_parse(ttt, add_units, True, True, False, True)
                     if (mt1 is None): 
                         break
                     li.append(mt1)
                     ttt = mt1.end_token
                     if (ttt.next0_ is not None and ttt.next0_.is_char(';')): 
                         ttt = ttt.next0_
                     if (ttt.is_char(';')): 
                         pass
                     elif (ttt.is_newline_after and mt1.is_newline_before): 
                         pass
                     else: 
                         break
                 if (len(li) > 1): 
                     res0 = MeasureToken._new1618(t0, li[len(li) - 1].end_token, li, True)
                     if (internals_ is not None and len(internals_) > 0): 
                         res0.internal_ex = internals_[0]
                     nam = MiscHelper.get_text_value_of_meta_token(name_, GetTextAttr.FIRSTNOUNGROUPTONOMINATIVE)
                     li[0].begin_token = t0
                     for v in li: 
                         v.name = "{0} ({1})".format(nam, Utils.ifNotNull(v.name, "")).strip()
                         if (v.nums is not None and len(v.nums.units) == 0 and units is not None): 
                             v.nums.units = units
                     return res0
         elif (t1.is_hiphen and t1.is_whitespace_after and t1.is_whitespace_before): 
             pass
         elif (t1.is_hiphen and t1.next0_ is not None and t1.next0_.is_char('(')): 
             pass
         else: 
             break
     if (t1 is None): 
         return None
     mts = NumbersWithUnitToken.try_parse_multi(t1, add_units, False, not0_, True, is_resctriction)
     if (mts is None): 
         if (units is not None and len(units) > 0): 
             if (t1 is None or t1.previous.is_char(':')): 
                 mts = list()
                 if (t1 is None): 
                     t1 = t11
                     while t1 is not None and t1.next0_ is not None: 
                         pass
                         t1 = t1.next0_
                 else: 
                     t1 = t1.previous
                 mts.append(NumbersWithUnitToken._new1619(t0, t1, math.nan))
         if (mts is None): 
             return None
     mt = mts[0]
     if (mt.begin_token == mt.end_token and not (isinstance(mt.begin_token, NumberToken))): 
         return None
     if (not is_subval and name_.begin_token.morph.class0_.is_preposition): 
         name_.begin_token = name_.begin_token.next0_
     if (mt.whl is not None): 
         whd = mt.whl
     for kk in range(10):
         if (whd is not None and whd.end_token == name_.end_token): 
             name_.end_token = whd.begin_token.previous
             continue
         if (units is not None): 
             if (units[len(units) - 1].end_token == name_.end_token): 
                 name_.end_token = units[0].begin_token.previous
                 continue
         break
     if (len(mts) > 1 and len(internals_) == 0): 
         if (len(mt.units) == 0): 
             if (units is not None): 
                 for m in mts: 
                     m.units = units
         res1 = MeasureToken._new1620(t0, mts[len(mts) - 1].end_token, name_.morph, True)
         res1.name = MiscHelper.get_text_value_of_meta_token(name_, GetTextAttr.FIRSTNOUNGROUPTONOMINATIVE)
         k = 0
         while k < len(mts): 
             ttt = MeasureToken._new1612(mts[k].begin_token, mts[k].end_token, mts[k])
             if (whd is not None): 
                 nams = Utils.asObjectOrNull(whd.tag, list)
                 if (k < len(nams)): 
                     ttt.name = nams[k]
             res1.internals.append(ttt)
             k += 1
         tt1 = res1.end_token.next0_
         if (tt1 is not None and tt1.is_char('±')): 
             nn = NumbersWithUnitToken._try_parse(tt1, add_units, True, False, False)
             if (nn is not None and nn.plus_minus_percent): 
                 res1.end_token = nn.end_token
                 res1.nums = nn
                 if (len(nn.units) > 0 and units is None and len(mt.units) == 0): 
                     for m in mts: 
                         m.units = nn.units
         return res1
     if (not mt.is_whitespace_before): 
         if (mt.begin_token.previous is None): 
             return None
         if (mt.begin_token.previous.is_char_of(":),") or mt.begin_token.previous.is_table_control_char or mt.begin_token.previous.is_value("IP", None)): 
             pass
         elif (mt.begin_token.is_hiphen and len(mt.units) > 0 and not mt.units[0].is_doubt): 
             pass
         else: 
             return None
     if (len(mt.units) == 0 and units is not None): 
         mt.units = units
         if (mt.div_num is not None and len(units) > 1 and len(mt.div_num.units) == 0): 
             i = 1
             while i < len(units): 
                 if (units[i].pow0_ == -1): 
                     j = i
                     while j < len(units): 
                         mt.div_num.units.append(units[j])
                         units[j].pow0_ = (- units[j].pow0_)
                         j += 1
                     del mt.units[i:i+len(units) - i]
                     break
                 i += 1
     if ((minmax < 0) and mt.single_val is not None): 
         mt.from_val = mt.single_val
         mt.from_include = True
         mt.single_val = (None)
     if (minmax > 0 and mt.single_val is not None): 
         mt.to_val = mt.single_val
         mt.to_include = True
         mt.single_val = (None)
     if (len(mt.units) == 0): 
         units = UnitToken.try_parse_list(mt.end_token.next0_, add_units, True)
         if (units is None): 
             if (can_units_absent): 
                 pass
             else: 
                 return None
         else: 
             mt.units = units
     res = MeasureToken._new1622(t0, mt.end_token, name_.morph, internals_)
     if (((not t0.is_whitespace_before and t0.previous is not None and t0 == name_.begin_token) and t0.previous.is_hiphen and not t0.previous.is_whitespace_before) and (isinstance(t0.previous.previous, TextToken))): 
         name_.begin_token = res.begin_token = name_.begin_token.previous.previous
     res.name = MiscHelper.get_text_value_of_meta_token(name_, (GetTextAttr.FIRSTNOUNGROUPTONOMINATIVE if not is_subval else GetTextAttr.NO))
     res.nums = mt
     for u in res.nums.units: 
         if (u.keyword is not None): 
             if (u.keyword.begin_char >= res.begin_char): 
                 res.reliable = True
     res.__parse_internals(add_units)
     if (len(res.internals) > 0 or not can_be_set): 
         return res
     t1 = res.end_token.next0_
     if (t1 is not None and t1.is_comma_and): 
         t1 = t1.next0_
     mts1 = NumbersWithUnitToken.try_parse_multi(t1, add_units, False, False, False, False)
     if ((mts1 is not None and len(mts1) == 1 and (t1.whitespaces_before_count < 3)) and len(mts1[0].units) > 0 and not UnitToken.can_be_equals(mts[0].units, mts1[0].units)): 
         res.is_set = True
         res.nums = (None)
         res.internals.append(MeasureToken._new1612(mt.begin_token, mt.end_token, mt))
         res.internals.append(MeasureToken._new1612(mts1[0].begin_token, mts1[0].end_token, mts1[0]))
         res.end_token = mts1[0].end_token
     return res

예제 #29

파일 보기

파일: WeaponItemToken.py 프로젝트: pullenti/PullentiPython

 def try_parse_list(t : 'Token', max_count : int=10) -> typing.List['WeaponItemToken']:
     tr = WeaponItemToken.try_parse(t, None, False, False)
     if (tr is None): 
         return None
     if (tr.typ == WeaponItemToken.Typs.CLASS or tr.typ == WeaponItemToken.Typs.DATE): 
         return None
     tr0 = tr
     res = list()
     if (len(tr.__inner_tokens) > 0): 
         res.extend(tr.__inner_tokens)
         if (res[0].begin_char > tr.begin_char): 
             res[0].begin_token = tr.begin_token
     res.append(tr)
     t = tr.end_token.next0_
     if (tr.typ == WeaponItemToken.Typs.NOUN): 
         while t is not None: 
             if (t.is_char(':') or t.is_hiphen): 
                 pass
             else: 
                 break
             t = t.next0_
     and_conj = False
     first_pass3425 = True
     while True:
         if first_pass3425: first_pass3425 = False
         else: t = t.next0_
         if (not (t is not None)): break
         if (max_count > 0 and len(res) >= max_count): 
             break
         if (t.is_char(':')): 
             continue
         if (tr0.typ == WeaponItemToken.Typs.NOUN): 
             if (t.is_hiphen and t.next0_ is not None): 
                 t = t.next0_
         tr = WeaponItemToken.try_parse(t, tr0, False, False)
         if (tr is None): 
             if (BracketHelper.can_be_end_of_sequence(t, True, None, False) and t.next0_ is not None): 
                 if (tr0.typ == WeaponItemToken.Typs.MODEL or tr0.typ == WeaponItemToken.Typs.BRAND): 
                     tt1 = t.next0_
                     if (tt1 is not None and tt1.is_comma): 
                         tt1 = tt1.next0_
                     tr = WeaponItemToken.try_parse(tt1, tr0, False, False)
         if (tr is None and (isinstance(t, ReferentToken))): 
             rt = Utils.asObjectOrNull(t, ReferentToken)
             if (rt.begin_token == rt.end_token and (isinstance(rt.begin_token, TextToken))): 
                 tr = WeaponItemToken.try_parse(rt.begin_token, tr0, False, False)
                 if (tr is not None and tr.begin_token == tr.end_token): 
                     tr.begin_token = tr.end_token = t
         if (tr is None and t.is_char('(')): 
             br = BracketHelper.try_parse(t, BracketParseAttr.NO, 100)
             if (br is not None): 
                 tt = br.end_token.next0_
                 if (tt is not None and tt.is_comma): 
                     tt = tt.next0_
                 tr = WeaponItemToken.try_parse(tt, tr0, False, False)
                 if (tr is not None and tr.typ == WeaponItemToken.Typs.NUMBER): 
                     pass
                 else: 
                     tr = (None)
         if (tr is None and t.is_hiphen): 
             if (tr0.typ == WeaponItemToken.Typs.BRAND or tr0.typ == WeaponItemToken.Typs.MODEL): 
                 tr = WeaponItemToken.try_parse(t.next0_, tr0, False, False)
         if (tr is None and t.is_comma): 
             if ((tr0.typ == WeaponItemToken.Typs.NAME or tr0.typ == WeaponItemToken.Typs.BRAND or tr0.typ == WeaponItemToken.Typs.MODEL) or tr0.typ == WeaponItemToken.Typs.CLASS or tr0.typ == WeaponItemToken.Typs.DATE): 
                 tr = WeaponItemToken.try_parse(t.next0_, tr0, True, False)
                 if (tr is not None): 
                     if (tr.typ == WeaponItemToken.Typs.NUMBER): 
                         pass
                     else: 
                         tr = (None)
         if (tr is None): 
             break
         if (t.is_newline_before): 
             if (tr.typ != WeaponItemToken.Typs.NUMBER): 
                 break
         if (len(tr.__inner_tokens) > 0): 
             res.extend(tr.__inner_tokens)
         res.append(tr)
         tr0 = tr
         t = tr.end_token
         if (and_conj): 
             break
     i = 0
     while i < (len(res) - 1): 
         if (res[i].typ == WeaponItemToken.Typs.MODEL and res[i + 1].typ == WeaponItemToken.Typs.MODEL): 
             res[i].end_token = res[i + 1].end_token
             res[i].value = "{0}{1}{2}".format(res[i].value, ('-' if res[i].end_token.next0_ is not None and res[i].end_token.next0_.is_hiphen else ' '), res[i + 1].value)
             del res[i + 1]
             i -= 1
         i += 1
     return res

예제 #30

파일 보기

파일: Sentence.py 프로젝트: pullenti/PullentiPython

 def parse_variants(t0 : 'Token', t1 : 'Token', lev : int, max_count : int=0, regime : 'SentItemType'=SentItemType.UNDEFINED) -> typing.List['Sentence']:
     from pullenti.semantic.internal.SentItem import SentItem
     if ((t0 is None or t1 is None or t0.end_char > t1.end_char) or lev > 100): 
         return None
     res = list()
     sent = Sentence()
     t = t0
     first_pass3463 = True
     while True:
         if first_pass3463: first_pass3463 = False
         else: t = t.next0_
         if (not (t is not None and t.end_char <= t1.end_char)): break
         if (t.is_char('(')): 
             br = BracketHelper.try_parse(t, BracketParseAttr.NO, 100)
             if (br is not None): 
                 t = br.end_token
                 continue
         items_ = SentItem.parse_near_items(t, t1, lev + 1, sent.items)
         if (items_ is None or len(items_) == 0): 
             continue
         if (len(items_) == 1 or ((max_count > 0 and len(res) > max_count))): 
             sent.items.append(items_[0])
             t = items_[0].end_token
             if (regime != SentItemType.UNDEFINED): 
                 it = items_[0]
                 if (it.can_be_noun): 
                     pass
                 elif (it.typ == SentItemType.DELIM): 
                     break
                 elif (it.typ == SentItemType.VERB): 
                     if (regime == SentItemType.PARTBEFORE): 
                         break
             continue
         m_nexts = dict()
         for it in items_: 
             nexts = None
             wrapnexts2942 = RefOutArgWrapper(None)
             inoutres2943 = Utils.tryGetValue(m_nexts, it.end_token.end_char, wrapnexts2942)
             nexts = wrapnexts2942.value
             if (not inoutres2943): 
                 nexts = Sentence.parse_variants(it.end_token.next0_, t1, lev + 1, max_count, SentItemType.UNDEFINED)
                 m_nexts[it.end_token.end_char] = nexts
             if (nexts is None or len(nexts) == 0): 
                 se = Sentence()
                 for itt in sent.items: 
                     itt1 = SentItem(None)
                     itt1.copy_from(itt)
                     se.items.append(itt1)
                 itt0 = SentItem(None)
                 itt0.copy_from(it)
                 se.items.append(itt0)
                 res.append(se)
             else: 
                 for sn in nexts: 
                     se = Sentence()
                     for itt in sent.items: 
                         itt1 = SentItem(None)
                         itt1.copy_from(itt)
                         se.items.append(itt1)
                     itt0 = SentItem(None)
                     itt0.copy_from(it)
                     se.items.append(itt0)
                     for itt in sn.items: 
                         itt1 = SentItem(None)
                         itt1.copy_from(itt)
                         se.items.append(itt1)
                     res.append(se)
         return res
     if (len(sent.items) == 0): 
         return None
     res.append(sent)
     return res