Python Utils.asObjectOrNull Beispiele

Programmiersprache: Python

Namespace / Paketname: pullenti.unisharp.Utils

Klasse / Typ: Utils

Methode / Funktion: asObjectOrNull

Beispiele auf hotexamples.com: 30

Python Utils.asObjectOrNull - 30 Beispiele gefunden. Dies sind die am besten bewerteten Python Beispiele für die pullenti.unisharp.Utils.Utils.asObjectOrNull, die aus Open Source-Projekten extrahiert wurden. Sie können Beispiele bewerten, um die Qualität der Beispiele zu verbessern.

Häufig verwendete Methoden

Anzeigen Verbergen

asObjectOrNull(30)

tryParseInt(30)

toStringStringIO(30)

isNullOrEmpty(30)

tryGetValue(30)

ifNotNull(30)

valToEnum(30)

enumToString(30)

setLengthStringIO(28)

getCharAtStringIO(23)

splitString(18)

newException(18)

isWhitespace(15)

newStringIO(14)

compareStrings(12)

indexOfList(10)

insertStringIO(10)

newArrayOfBytes(9)

endsWithString(8)

getResourceStream(7)

lastDayOfMonth(7)

getResourcesNames(7)

getResourceInfo(7)

writeByteIO(7)

removeStringIO(6)

setCharAtStringIO(6)

tryParseFloat(4)

readIO(3)

trimEndString(2)

writeIO(2)

replaceStringIO(2)

parseXmlFromString(2)

getXmlLocalName(2)

getXmlInnerText(2)

getDate(2)

readByteIO(1)

parseXmlFromStream(1)

startsWithString(1)

joinStrings(1)

getXmlName(1)

getXmlAttrByName(1)

getLengthIO(1)

newArray(1)

Beispiel #1

Datei anzeigen

 def get_normal_case_text(self,
                          mc: 'MorphClass' = None,
                          num: 'MorphNumber' = MorphNumber.UNDEFINED,
                          gender: 'MorphGender' = MorphGender.UNDEFINED,
                          keep_chars: bool = False) -> str:
     if ((isinstance(self.begin_token, ReferentToken))
             and self.begin_token == self.end_token):
         return self.begin_token.get_normal_case_text(
             mc, num, gender, keep_chars)
     res = None
     max_coef = 0
     def_coef = -1
     for it in self.morph.items:
         v = Utils.asObjectOrNull(it, NounPhraseItemTextVar)
         if (v is None):
             continue
         if (v.undef_coef > 0
                 and (((v.undef_coef < max_coef) or def_coef >= 0))):
             continue
         if (num == MorphNumber.SINGULAR
                 and v.single_number_value is not None):
             if (mc is not None and ((gender == MorphGender.NEUTER
                                      or gender == MorphGender.FEMINIE))
                     and mc.is_adjective):
                 bi = MorphBaseInfo._new401(MorphClass._new53(mc.value),
                                            gender, MorphNumber.SINGULAR,
                                            MorphCase.NOMINATIVE,
                                            self.morph.language)
                 str0_ = MorphologyService.get_wordform(
                     v.single_number_value, bi)
                 if (str0_ is not None):
                     res = str0_
             else:
                 res = v.single_number_value
             if (v.undef_coef == 0):
                 break
             max_coef = v.undef_coef
             continue
         if (Utils.isNullOrEmpty(v.normal_value)):
             continue
         if (str.isdigit(v.normal_value[0]) and mc is not None
                 and mc.is_adjective):
             val = 0
             wrapval402 = RefOutArgWrapper(0)
             inoutres403 = Utils.tryParseInt(v.normal_value, wrapval402)
             val = wrapval402.value
             if (inoutres403):
                 str0_ = NumberHelper.get_number_adjective(
                     val, gender,
                     (MorphNumber.SINGULAR if num == MorphNumber.SINGULAR
                      or val == 1 else MorphNumber.PLURAL))
                 if (str0_ is not None):
                     res = str0_
                     if (v.undef_coef == 0):
                         break
                     max_coef = v.undef_coef
                     continue
         res1 = it.normal_value
         if (num == MorphNumber.SINGULAR):
             if (res1 == "ДЕТИ"):
                 res1 = "РЕБЕНОК"
             elif (res1 == "ЛЮДИ"):
                 res1 = "ЧЕЛОВЕК"
         max_coef = v.undef_coef
         if (v.undef_coef > 0):
             res = res1
             continue
         def_co = 0
         if (mc is not None and mc.is_adjective and v.undef_coef == 0):
             pass
         elif (
             ((isinstance(self.begin_token, TextToken))
              and res1 == self.begin_token.term and it.case_.is_nominative)
                 and it.number == MorphNumber.SINGULAR):
             def_co = 1
         if (num == MorphNumber.PLURAL and
             ((v.number) & (MorphNumber.PLURAL)) == (MorphNumber.PLURAL)):
             def_co += 3
         if (res is None or def_co > def_coef):
             res = res1
             def_coef = def_co
             if (def_co > 0):
                 break
     if (res is not None):
         return self.__corr_chars(res, keep_chars)
     if (res is None and self.begin_token == self.end_token):
         res = self.begin_token.get_normal_case_text(
             mc, num, gender, keep_chars)
     elif (res is None):
         res = self.begin_token.get_normal_case_text(
             mc, num, gender, keep_chars)
         if (res is None):
             res = MiscHelper.get_text_value_of_meta_token(
                 self, (GetTextAttr.KEEPREGISTER
                        if keep_chars else GetTextAttr.NO))
         else:
             res = "{0} {1}".format(
                 res,
                 MiscHelper.get_text_value(
                     self.begin_token.next0_, self.end_token,
                     (GetTextAttr.KEEPREGISTER
                      if keep_chars else GetTextAttr.NO)))
     return Utils.ifNotNull(res, "?")

Beispiel #2

Datei anzeigen

 def find(self, word: str, try_create: bool,
          lang_: 'MorphLang') -> typing.List['DerivateGroup']:
     if (Utils.isNullOrEmpty(word)):
         return None
     tn = self._m_root
     i = 0
     while i < len(word):
         k = ord(word[i])
         tn1 = None
         if (tn.nodes is None):
             break
         wraptn14 = RefOutArgWrapper(None)
         inoutres5 = Utils.tryGetValue(tn.nodes, k, wraptn14)
         tn1 = wraptn14.value
         if (not inoutres5):
             break
         tn = tn1
         if (tn.lazy_pos > 0):
             pos = tn.lazy_pos
             wrappos3 = RefOutArgWrapper(pos)
             DeserializeHelper.deserialize_tree_node(
                 self.__m_buf, self, tn, True, wrappos3)
             pos = wrappos3.value
             tn.lazy_pos = 0
         i += 1
     res = (None if i < len(word) else tn.groups)
     li = None
     if (isinstance(res, list)):
         li = list(Utils.asObjectOrNull(res, list))
         gen = False
         nogen = False
         for g in li:
             if (g.is_generated):
                 gen = True
             else:
                 nogen = True
         if (gen and nogen):
             for i in range(len(li) - 1, -1, -1):
                 if (li[i].is_generated):
                     del li[i]
             else:
                 i = -1
     elif (isinstance(res, DerivateGroup)):
         li = list()
         li.append(Utils.asObjectOrNull(res, DerivateGroup))
     if (li is not None and lang_ is not None and not lang_.is_undefined):
         for i in range(len(li) - 1, -1, -1):
             if (not li[i].contains_word(word, lang_)):
                 del li[i]
         else:
             i = -1
     if (li is not None and len(li) > 0):
         return li
     if (len(word) < 4):
         return None
     ch0 = word[len(word) - 1]
     ch1 = word[len(word) - 2]
     ch2 = word[len(word) - 3]
     if (ch0 == 'О' or ((ch0 == 'И' and ch1 == 'К'))):
         word1 = word[0:0 + len(word) - 1]
         li = self.find(word1 + "ИЙ", False, lang_)
         if ((li) is not None):
             return li
         li = self.find(word1 + "ЫЙ", False, lang_)
         if ((li) is not None):
             return li
         if (ch0 == 'О' and ch1 == 'Н'):
             li = self.find(word1 + "СКИЙ", False, lang_)
             if ((li) is not None):
                 return li
     elif (((ch0 == 'Я' or ch0 == 'Ь')) and ((word[len(word) - 2] == 'С'))):
         word1 = word[0:0 + len(word) - 2]
         if (word1 == "ЯТЬ"):
             return None
         li = self.find(word1, False, lang_)
         if ((li) is not None):
             return li
     elif (ch0 == 'Е' and ch1 == 'Ь'):
         word1 = word[0:0 + len(word) - 2] + "ИЕ"
         li = self.find(word1, False, lang_)
         if ((li) is not None):
             return li
     elif (ch0 == 'Й' and ch2 == 'Н' and try_create):
         ch3 = word[len(word) - 4]
         word1 = None
         if (ch3 != 'Н'):
             if (LanguageHelper.is_cyrillic_vowel(ch3)):
                 word1 = (word[0:0 + len(word) - 3] + "Н" +
                          word[len(word) - 3:])
         else:
             word1 = (word[0:0 + len(word) - 4] + word[len(word) - 3:])
         if (word1 is not None):
             li = self.find(word1, False, lang_)
             if ((li) is not None):
                 return li
     if (ch0 == 'Й' and ch1 == 'О'):
         word2 = word[0:0 + len(word) - 2]
         li = self.find(word2 + "ИЙ", False, lang_)
         if ((li) is not None):
             return li
         li = self.find(word2 + "ЫЙ", False, lang_)
         if ((li) is not None):
             return li
     if (not try_create):
         return None
     len0_ = len(word) - 4
     i = 1
     first_pass2883 = True
     while True:
         if first_pass2883: first_pass2883 = False
         else: i += 1
         if (not (i <= len0_)): break
         rest = word[i:]
         li1 = self.find(rest, False, lang_)
         if (li1 is None):
             continue
         pref = word[0:0 + i]
         gen = list()
         for dg in li1:
             if (not dg.is_dummy and not dg.is_generated):
                 if (dg.not_generate):
                     if (len(rest) < 5):
                         continue
                 gg = dg.create_by_prefix(pref, lang_)
                 if (gg is not None):
                     gen.append(gg)
                     self.add(gg)
         if (len(gen) == 0):
             return None
         return gen
     return None

Beispiel #3

Datei anzeigen

 def __tryParse(t: 'Token',
                is_in_lit: bool,
                max_char: int = 0) -> typing.List['ReferentToken']:
     if (t is None):
         return None
     is_bracket_regime = False
     if (t.previous is not None and t.previous.isChar('(')):
         is_bracket_regime = True
     blt = BookLinkToken.tryParse(t, 0)
     if (blt is None):
         blt = BookLinkToken.tryParseAuthor(t, FioTemplateType.UNDEFINED)
     if (blt is None and not is_bracket_regime):
         return None
     t0 = t
     coef = 0
     is_electr_res = False
     decree = None
     regtyp = BookLinkAnalyzer.RegionTyp.UNDEFINED
     num = None
     spec_see = None
     book_prev = None
     if (is_bracket_regime):
         regtyp = BookLinkAnalyzer.RegionTyp.AUTHORS
     elif (blt.typ == BookLinkTyp.PERSON):
         if (not is_in_lit):
             return None
         regtyp = BookLinkAnalyzer.RegionTyp.AUTHORS
     elif (blt.typ == BookLinkTyp.NUMBER):
         num = blt.value
         t = blt.end_token.next0_
         if (t is None or t.is_newline_before):
             return None
         if (not t.is_whitespace_before):
             if (isinstance(t, NumberToken)):
                 n = (t).value
                 if ((((n == "3" or n == "0")) and not t.is_whitespace_after
                      and (isinstance(t.next0_, TextToken)))
                         and t.next0_.chars.is_all_lower):
                     pass
                 else:
                     return None
             elif (not ((isinstance(t, TextToken)))
                   or t.chars.is_all_lower):
                 r = t.getReferent()
                 if (isinstance(r, PersonReferent)):
                     pass
                 elif (is_in_lit and r is not None
                       and r.type_name == "DECREE"):
                     pass
                 else:
                     return None
         first_pass2757 = True
         while True:
             if first_pass2757: first_pass2757 = False
             else: t = t.next0_
             if (not (t is not None)): break
             if (isinstance(t, NumberToken)):
                 break
             if (not ((isinstance(t, TextToken)))):
                 break
             if (BracketHelper.canBeStartOfSequence(t, True, False)):
                 break
             if (not t.chars.is_letter):
                 continue
             bbb = BookLinkToken.tryParse(t, 0)
             if (bbb is not None):
                 if (bbb.typ == BookLinkTyp.TAMZE):
                     spec_see = bbb
                     t = bbb.end_token.next0_
                     break
                 if (bbb.typ == BookLinkTyp.SEE):
                     t = bbb.end_token
                     continue
             break
         if (spec_see is not None and spec_see.typ == BookLinkTyp.TAMZE):
             coef += 1
             max0_ = 1000
             tt = t0
             while tt is not None and max0_ > 0:
                 if (isinstance(tt.getReferent(), BookLinkRefReferent)):
                     book_prev = (tt.getReferent()).book
                     break
                 tt = tt.previous
                 max0_ -= 1
         blt1 = BookLinkToken.tryParseAuthor(t, FioTemplateType.UNDEFINED)
         if (blt1 is not None and blt1.typ == BookLinkTyp.PERSON):
             regtyp = BookLinkAnalyzer.RegionTyp.AUTHORS
         else:
             ok = False
             tt = t
             first_pass2758 = True
             while True:
                 if first_pass2758: first_pass2758 = False
                 else: tt = (None if tt is None else tt.next0_)
                 if (not (tt is not None)): break
                 if (tt.is_newline_before):
                     break
                 if (is_in_lit and tt.getReferent() is not None
                         and tt.getReferent().type_name == "DECREE"):
                     ok = True
                     decree = tt
                     break
                 bbb = BookLinkToken.tryParse(tt, 0)
                 if (bbb is None):
                     continue
                 if (bbb.typ == BookLinkTyp.ELECTRONRES):
                     is_electr_res = True
                     ok = True
                     break
                 if (bbb.typ == BookLinkTyp.DELIMETER):
                     tt = bbb.end_token.next0_
                     if (BookLinkToken.tryParseAuthor(
                             tt, FioTemplateType.UNDEFINED) is not None):
                         ok = True
                         break
                     bbb = BookLinkToken.tryParse(tt, 0)
                     if (bbb is not None):
                         if (bbb.typ == BookLinkTyp.EDITORS
                                 or bbb.typ == BookLinkTyp.TRANSLATE
                                 or bbb.typ == BookLinkTyp.SOSTAVITEL):
                             ok = True
                             break
             if (not ok and not is_in_lit):
                 if (BookLinkToken.checkLinkBefore(t0, num)):
                     pass
                 else:
                     return None
             regtyp = BookLinkAnalyzer.RegionTyp.NAME
     else:
         return None
     res = BookLinkReferent()
     corr_authors = list()
     t00 = t
     blt00 = None
     start_of_name = None
     prev_pers_templ = FioTemplateType.UNDEFINED
     if (regtyp == BookLinkAnalyzer.RegionTyp.AUTHORS):
         first_pass2759 = True
         while True:
             if first_pass2759: first_pass2759 = False
             else: t = t.next0_
             if (not (t is not None)): break
             if (max_char > 0 and t.begin_char >= max_char):
                 break
             if (t.isCharOf(".;") or t.is_comma_and):
                 continue
             if (t.isChar('/')):
                 break
             if ((t.isChar('(') and t.next0_ is not None
                  and t.next0_.isValue("EDS", None))
                     and t.next0_.next0_ is not None
                     and t.next0_.next0_.isChar(')')):
                 t = t.next0_.next0_.next0_
                 break
             blt = BookLinkToken.tryParseAuthor(t, prev_pers_templ)
             if (blt is None and t.previous is not None
                     and t.previous.is_and):
                 blt = BookLinkToken.tryParseAuthor(
                     t.previous, FioTemplateType.UNDEFINED)
             if (blt is None):
                 if ((isinstance(t.getReferent(), OrganizationReferent))
                         and blt00 is not None):
                     bbb2 = BookLinkToken.tryParse(t.next0_, 0)
                     if (bbb2 is not None):
                         if (bbb2.typ == BookLinkTyp.YEAR):
                             res.addSlot(BookLinkReferent.ATTR_AUTHOR,
                                         t.getReferent(), False, 0)
                             res.year = int(bbb2.value)
                             coef += .5
                             t = bbb2.end_token.next0_
                 break
             if (blt.typ == BookLinkTyp.PERSON):
                 tt2 = blt.end_token.next0_
                 bbb2 = BookLinkToken.tryParse(tt2, 0)
                 if (bbb2 is not None):
                     if (bbb2.typ == BookLinkTyp.YEAR):
                         res.year = int(bbb2.value)
                         coef += .5
                         blt.end_token = bbb2.end_token
                         blt00 = (None)
                 if (blt00 is not None
                         and ((blt00.end_token.next0_ == blt.begin_token
                               or blt.begin_token.previous.isChar('.')))):
                     tt11 = blt.end_token.next0_
                     nex = BookLinkToken.tryParse(tt11, 0)
                     if (nex is not None
                             and nex.typ == BookLinkTyp.ANDOTHERS):
                         pass
                     else:
                         if (tt11 is None):
                             break
                         if (tt11.isChar('/') and tt11.next0_ is not None
                                 and tt11.next0_.isChar('/')):
                             break
                         if (tt11.isChar(':')):
                             break
                         if ((str(blt).find('.') < 0)
                                 and str(blt00).find('.') > 0):
                             break
                         if ((isinstance(tt11, TextToken))
                                 and tt11.chars.is_all_lower):
                             break
                         if (tt11.isCharOf(",.;")
                                 and tt11.next0_ is not None):
                             tt11 = tt11.next0_
                         nex = BookLinkToken.tryParse(tt11, 0)
                         if (nex is not None
                                 and nex.typ != BookLinkTyp.PERSON
                                 and nex.typ != BookLinkTyp.ANDOTHERS):
                             break
                 elif (
                     (blt00 is not None
                      and blt00.person_template != FioTemplateType.UNDEFINED
                      and blt.person_template != blt00.person_template)
                         and blt.person_template
                         == FioTemplateType.NAMESURNAME):
                     if (blt.end_token.next0_ is None
                             or not blt.end_token.next0_.is_comma_and):
                         break
                     if (BookLinkToken.tryParseAuthor(
                             blt.end_token.next0_.next0_,
                             FioTemplateType.UNDEFINED) is not None):
                         pass
                     else:
                         break
                 if (blt00 is None and blt.person_template
                         == FioTemplateType.NAMESURNAME):
                     tt = blt.end_token.next0_
                     if (tt is not None and tt.is_hiphen):
                         tt = tt.next0_
                     if (isinstance(tt, NumberToken)):
                         break
                 BookLinkAnalyzer.__addAuthor(res, blt)
                 coef += 1
                 t = blt.end_token
                 if (isinstance(t.getReferent(), PersonReferent)):
                     corr_authors.append(
                         Utils.asObjectOrNull(t, ReferentToken))
                 blt00 = blt
                 prev_pers_templ = blt.person_template
                 start_of_name = blt.start_of_name
                 if ((start_of_name) is not None):
                     t = t.next0_
                     break
                 continue
             if (blt.typ == BookLinkTyp.ANDOTHERS):
                 coef += .5
                 t = blt.end_token.next0_
                 res.authors_and_other = True
                 break
             break
     if (t is None):
         return None
     if ((t.is_newline_before and t != t0 and num is None) and res.findSlot(
             BookLinkReferent.ATTR_AUTHOR, None, True) is None):
         return None
     if (start_of_name is None):
         if (t.chars.is_all_lower):
             coef -= (1)
         if (t.chars.is_latin_letter and not is_electr_res and num is None):
             if (res.getSlotValue(BookLinkReferent.ATTR_AUTHOR) is None):
                 return None
     tn0 = t
     tn1 = None
     uri = None
     next_num = None
     wrapnn393 = RefOutArgWrapper(0)
     inoutres394 = Utils.tryParseInt(Utils.ifNotNull(num, ""), wrapnn393)
     nn = wrapnn393.value
     if (inoutres394):
         next_num = str((nn + 1))
     br = (BracketHelper.tryParse(
         t,
         Utils.valToEnum(
             (BracketParseAttr.CANCONTAINSVERBS) |
             (BracketParseAttr.CANBEMANYLINES), BracketParseAttr), 100)
           if BracketHelper.canBeStartOfSequence(t, True, False) else None)
     if (br is not None):
         t = t.next0_
     pages = None
     first_pass2760 = True
     while True:
         if first_pass2760: first_pass2760 = False
         else: t = t.next0_
         if (not (t is not None)): break
         if (max_char > 0 and t.begin_char >= max_char):
             break
         if (br is not None and br.end_token == t):
             tn1 = t
             break
         tit = TitleItemToken.tryAttach(t)
         if (tit is not None):
             if ((tit.typ == TitleItemToken.Types.TYP and tn0 == t
                  and br is None) and BracketHelper.canBeStartOfSequence(
                      tit.end_token.next0_, True, False)):
                 br = BracketHelper.tryParse(tit.end_token.next0_,
                                             BracketParseAttr.NO, 100)
                 if (br is not None):
                     coef += (1)
                     if (num is not None):
                         coef += 1
                     tn0 = br.begin_token
                     tn1 = br.end_token
                     res.typ = tit.value.lower()
                     t = br.end_token.next0_
                     break
         if (t.is_newline_before and t != tn0):
             if (br is not None and (t.end_char < br.end_char)):
                 pass
             elif (not MiscHelper.canBeStartOfSentence(t)):
                 pass
             else:
                 if (t.newlines_before_count > 1):
                     break
                 if ((isinstance(t, NumberToken)) and num is not None
                         and (t).int_value is not None):
                     if (num == str(((t).int_value - 1))):
                         break
                 elif (num is not None):
                     pass
                 else:
                     nnn = NounPhraseHelper.tryParse(
                         t.previous,
                         Utils.valToEnum(
                             ((NounPhraseParseAttr.PARSEPREPOSITION) |
                              (NounPhraseParseAttr.PARSEADVERBS) |
                              (NounPhraseParseAttr.PARSENUMERICASADJECTIVE))
                             | (NounPhraseParseAttr.MULTILINES),
                             NounPhraseParseAttr), 0)
                     if (nnn is not None and nnn.end_char >= t.end_char):
                         pass
                     else:
                         break
         if (t.isCharOf(".;") and t.whitespaces_after_count > 0):
             tit = TitleItemToken.tryAttach(t.next0_)
             if ((tit) is not None):
                 if (tit.typ == TitleItemToken.Types.TYP):
                     break
             stop = True
             words = 0
             notwords = 0
             tt = t.next0_
             first_pass2761 = True
             while True:
                 if first_pass2761: first_pass2761 = False
                 else: tt = tt.next0_
                 if (not (tt is not None)): break
                 blt0 = BookLinkToken.tryParse(tt, 0)
                 if (blt0 is None):
                     if (tt.is_newline_before):
                         break
                     if ((isinstance(tt, TextToken)) and
                             not tt.getMorphClassInDictionary().is_undefined
                         ):
                         words += 1
                     else:
                         notwords += 1
                     if (words > 6 and words > (notwords * 4)):
                         stop = False
                         break
                     continue
                 if ((blt0.typ == BookLinkTyp.DELIMETER
                      or blt0.typ == BookLinkTyp.TRANSLATE
                      or blt0.typ == BookLinkTyp.TYPE)
                         or blt0.typ == BookLinkTyp.GEO
                         or blt0.typ == BookLinkTyp.PRESS):
                     stop = False
                 break
             if (br is not None
                     and br.end_token.previous.end_char > t.end_char):
                 stop = False
             if (stop):
                 break
         if (t == decree):
             t = t.next0_
             break
         blt = BookLinkToken.tryParse(t, 0)
         if (blt is None):
             tn1 = t
             continue
         if (blt.typ == BookLinkTyp.DELIMETER):
             break
         if (((blt.typ == BookLinkTyp.MISC or blt.typ
               == BookLinkTyp.TRANSLATE or blt.typ == BookLinkTyp.NAMETAIL)
              or blt.typ == BookLinkTyp.TYPE
              or blt.typ == BookLinkTyp.VOLUME)
                 or blt.typ == BookLinkTyp.PAGERANGE
                 or blt.typ == BookLinkTyp.PAGES):
             coef += 1
             break
         if (blt.typ == BookLinkTyp.GEO or blt.typ == BookLinkTyp.PRESS):
             if (t.previous.is_hiphen or t.previous.isCharOf(".;")
                     or blt.add_coef > 0):
                 break
         if (blt.typ == BookLinkTyp.YEAR):
             if (t.previous is not None and t.previous.is_comma):
                 break
         if (blt.typ == BookLinkTyp.ELECTRONRES):
             is_electr_res = True
             break
         if (blt.typ == BookLinkTyp.URL):
             if (t == tn0 or t.previous.isCharOf(":.")):
                 is_electr_res = True
                 break
         tn1 = t
     if (tn1 is None and start_of_name is None):
         if (is_electr_res):
             uri_re = BookLinkReferent()
             rt0 = ReferentToken(uri_re, t00, t)
             rts0 = list()
             bref0 = BookLinkRefReferent._new389(uri_re)
             if (num is not None):
                 bref0.number = num
             rt01 = ReferentToken(bref0, t0, rt0.end_token)
             ok = False
             while t is not None:
                 if (t.is_newline_before):
                     break
                 blt0 = BookLinkToken.tryParse(t, 0)
                 if (blt0 is not None):
                     if (isinstance(blt0.ref, UriReferent)):
                         uri_re.addSlot(
                             BookLinkReferent.ATTR_URL,
                             Utils.asObjectOrNull(blt0.ref, UriReferent),
                             False, 0)
                         ok = True
                     t = blt0.end_token
                 rt0.end_token = rt01.end_token = t
                 t = t.next0_
             if (ok):
                 rts0.append(rt01)
                 rts0.append(rt0)
                 return rts0
         if (decree is not None and num is not None):
             rts0 = list()
             bref0 = BookLinkRefReferent._new389(decree.getReferent())
             if (num is not None):
                 bref0.number = num
             rt01 = ReferentToken(bref0, t0, decree)
             t = decree.next0_
             while t is not None:
                 if (t.is_newline_before):
                     break
                 if (isinstance(t, TextToken)):
                     if ((t).is_pure_verb):
                         return None
                 rt01.end_token = t
                 t = t.next0_
             rts0.append(rt01)
             return rts0
         if (book_prev is not None):
             tt = t
             while tt is not None and ((tt.isCharOf(",.") or tt.is_hiphen)):
                 tt = tt.next0_
             blt0 = BookLinkToken.tryParse(tt, 0)
             if (blt0 is not None and blt0.typ == BookLinkTyp.PAGERANGE):
                 rts0 = list()
                 bref0 = BookLinkRefReferent._new389(book_prev)
                 if (num is not None):
                     bref0.number = num
                 bref0.pages = blt0.value
                 rt00 = ReferentToken(bref0, t0, blt0.end_token)
                 rts0.append(rt00)
                 return rts0
         return None
     if (br is not None
             and ((tn1 == br.end_token or tn1 == br.end_token.previous))):
         tn0 = tn0.next0_
         tn1 = tn1.previous
     if (start_of_name is None):
         while tn0 is not None:
             if (tn0.isCharOf(":,~")):
                 tn0 = tn0.next0_
             else:
                 break
     while tn1 is not None and tn1.begin_char > tn0.begin_char:
         if (tn1.isCharOf(".;,:(~") or tn1.is_hiphen
                 or tn1.isValue("РЕД", None)):
             pass
         else:
             break
         tn1 = tn1.previous
     nam = MiscHelper.getTextValue(
         tn0, tn1,
         Utils.valToEnum(
             (GetTextAttr.KEEPQUOTES) | (GetTextAttr.KEEPREGISTER),
             GetTextAttr))
     if (start_of_name is not None):
         if (nam is None or (len(nam) < 3)):
             nam = start_of_name
         else:
             nam = "{0}{1}{2}".format(
                 start_of_name, (" " if tn0.is_whitespace_before else ""),
                 nam)
     if (nam is None):
         return None
     res.name = nam
     if (num is None and not is_in_lit):
         if (len(nam) < 20):
             return None
         coef -= (2)
     if (len(nam) > 500):
         coef -= (math.floor(len(nam) / 500))
     if (is_bracket_regime):
         coef -= 1
     if (len(nam) > 200):
         if (num is None):
             return None
         if (res.findSlot(BookLinkReferent.ATTR_AUTHOR, None, True) is None
                 and not BookLinkToken.checkLinkBefore(t0, num)):
             return None
     en = 0
     ru = 0
     ua = 0
     cha = 0
     nocha = 0
     chalen = 0
     lt0 = tn0
     lt1 = tn1
     if (tn1 is None):
         if (t is None):
             return None
         lt0 = t0
         lt1 = t
         tn1 = t.previous
     tt = lt0
     while tt is not None and tt.end_char <= lt1.end_char:
         if ((isinstance(tt, TextToken)) and tt.chars.is_letter):
             if (tt.chars.is_latin_letter):
                 en += 1
             elif (tt.morph.language.is_ua):
                 ua += 1
             elif (tt.morph.language.is_ru):
                 ru += 1
             if (tt.length_char > 2):
                 cha += 1
                 chalen += tt.length_char
         elif (not ((isinstance(tt, ReferentToken)))):
             nocha += 1
         tt = tt.next0_
     if (ru > (ua + en)):
         res.lang = "RU"
     elif (ua > (ru + en)):
         res.lang = "UA"
     elif (en > (ru + ua)):
         res.lang = "EN"
     if (nocha > 3 and nocha > cha and start_of_name is None):
         if (nocha > (math.floor(chalen / 3))):
             coef -= (2)
     if (res.lang == "EN"):
         tt = tn0.next0_
         first_pass2762 = True
         while True:
             if first_pass2762: first_pass2762 = False
             else: tt = tt.next0_
             if (not (tt is not None and (tt.end_char < tn1.end_char))):
                 break
             if (tt.is_comma and tt.next0_ is not None
                     and ((not tt.next0_.chars.is_all_lower or
                           (isinstance(tt.next0_, ReferentToken))))):
                 if (tt.next0_.next0_ is not None
                         and tt.next0_.next0_.is_comma_and):
                     if (isinstance(tt.next0_, ReferentToken)):
                         pass
                     else:
                         continue
                 nam = MiscHelper.getTextValue(
                     tn0, tt.previous,
                     Utils.valToEnum((GetTextAttr.KEEPQUOTES) |
                                     (GetTextAttr.KEEPREGISTER),
                                     GetTextAttr))
                 if (nam is not None and len(nam) > 15):
                     res.name = nam
                     break
     rt = ReferentToken(res, t00, tn1)
     authors = True
     edits = False
     br = (None)
     first_pass2763 = True
     while True:
         if first_pass2763: first_pass2763 = False
         else: t = t.next0_
         if (not (t is not None)): break
         if (max_char > 0 and t.begin_char >= max_char):
             break
         if (BracketHelper.canBeStartOfSequence(t, False, False)):
             br = BracketHelper.tryParse(t, BracketParseAttr.CANBEMANYLINES,
                                         100)
             if (br is not None and br.length_char > 300):
                 br = (None)
         blt = BookLinkToken.tryParse(t, 0)
         if (t.is_newline_before and not t.isChar('/')
                 and not t.previous.isChar('/')):
             if (blt is not None and blt.typ == BookLinkTyp.NUMBER):
                 break
             if (t.previous.isCharOf(":")):
                 pass
             elif (blt is not None and ((
                 ((blt.typ == BookLinkTyp.DELIMETER or blt.typ
                   == BookLinkTyp.PAGERANGE or blt.typ == BookLinkTyp.PAGES)
                  or blt.typ == BookLinkTyp.GEO or blt.typ
                  == BookLinkTyp.PRESS) or blt.typ == BookLinkTyp.N))):
                 pass
             elif (num is not None and BookLinkToken.tryParseAuthor(
                     t, FioTemplateType.UNDEFINED) is not None):
                 pass
             elif (num is not None and blt is not None
                   and blt.typ != BookLinkTyp.NUMBER):
                 pass
             elif (br is not None and (t.end_char < br.end_char)
                   and t.begin_char > br.begin_char):
                 pass
             else:
                 ok = False
                 mmm = 50
                 tt = t.next0_
                 while tt is not None and mmm > 0:
                     if (tt.is_newline_before):
                         blt2 = BookLinkToken.tryParse(tt, 0)
                         if (blt2 is not None
                                 and blt2.typ == BookLinkTyp.NUMBER
                                 and blt2.value == next_num):
                             ok = True
                             break
                         if (blt2 is not None):
                             if (blt2.typ == BookLinkTyp.PAGES
                                     or blt2.typ == BookLinkTyp.GEO
                                     or blt2.typ == BookLinkTyp.PRESS):
                                 ok = True
                                 break
                     tt = tt.next0_
                     mmm -= 1
                 if (not ok):
                     npt = NounPhraseHelper.tryParse(
                         t.previous,
                         Utils.valToEnum(
                             ((NounPhraseParseAttr.MULTILINES) |
                              (NounPhraseParseAttr.PARSEADVERBS) |
                              (NounPhraseParseAttr.PARSEPREPOSITION)) |
                             (NounPhraseParseAttr.PARSEVERBS) |
                             (NounPhraseParseAttr.PARSEPRONOUNS),
                             NounPhraseParseAttr), 0)
                     if (npt is not None and npt.end_char >= t.end_char):
                         ok = True
                 if (not ok):
                     break
         rt.end_token = t
         if (blt is not None):
             rt.end_token = blt.end_token
         if (t.isCharOf(".,") or t.is_hiphen):
             continue
         if (t.isValue("С", None)):
             pass
         if (regtyp == BookLinkAnalyzer.RegionTyp.FIRST and blt is not None
                 and blt.typ == BookLinkTyp.EDITORS):
             edits = True
             t = blt.end_token
             coef += 1
             continue
         if (regtyp == BookLinkAnalyzer.RegionTyp.FIRST and blt is not None
                 and blt.typ == BookLinkTyp.SOSTAVITEL):
             edits = False
             t = blt.end_token
             coef += 1
             continue
         if (regtyp == BookLinkAnalyzer.RegionTyp.FIRST and authors):
             blt2 = BookLinkToken.tryParseAuthor(t, prev_pers_templ)
             if (blt2 is not None and blt2.typ == BookLinkTyp.PERSON):
                 prev_pers_templ = blt2.person_template
                 if (not edits):
                     BookLinkAnalyzer.__addAuthor(res, blt2)
                 coef += 1
                 t = blt2.end_token
                 continue
             if (blt2 is not None and blt2.typ == BookLinkTyp.ANDOTHERS):
                 if (not edits):
                     res.authors_and_other = True
                 coef += 1
                 t = blt2.end_token
                 continue
             authors = False
         if (blt is None):
             continue
         if (blt.typ == BookLinkTyp.ELECTRONRES
                 or blt.typ == BookLinkTyp.URL):
             is_electr_res = True
             if (blt.typ == BookLinkTyp.ELECTRONRES):
                 coef += 1.5
             else:
                 coef += .5
             if (isinstance(blt.ref, UriReferent)):
                 res.addSlot(BookLinkReferent.ATTR_URL,
                             Utils.asObjectOrNull(blt.ref, UriReferent),
                             False, 0)
         elif (blt.typ == BookLinkTyp.YEAR):
             if (res.year == 0):
                 res.year = int(blt.value)
                 coef += .5
         elif (blt.typ == BookLinkTyp.DELIMETER):
             coef += 1
             if (blt.length_char == 2):
                 regtyp = BookLinkAnalyzer.RegionTyp.SECOND
             else:
                 regtyp = BookLinkAnalyzer.RegionTyp.FIRST
         elif (
             (((blt.typ == BookLinkTyp.MISC or blt.typ == BookLinkTyp.TYPE
                or blt.typ == BookLinkTyp.PAGES) or blt.typ
               == BookLinkTyp.NAMETAIL or blt.typ == BookLinkTyp.TRANSLATE)
              or blt.typ == BookLinkTyp.PRESS
              or blt.typ == BookLinkTyp.VOLUME)
                 or blt.typ == BookLinkTyp.N):
             coef += 1
         elif (blt.typ == BookLinkTyp.PAGERANGE):
             pages = blt
             coef += 1
             if (is_bracket_regime and blt.end_token.next0_ is not None
                     and blt.end_token.next0_.isChar(')')):
                 coef += (2)
                 if (res.name is not None
                         and res.findSlot(BookLinkReferent.ATTR_AUTHOR,
                                          None, True) is not None):
                     coef = (10)
         elif (blt.typ == BookLinkTyp.GEO
               and ((regtyp == BookLinkAnalyzer.RegionTyp.SECOND
                     or regtyp == BookLinkAnalyzer.RegionTyp.FIRST))):
             coef += 1
         elif (blt.typ == BookLinkTyp.GEO and t.previous is not None
               and t.previous.isChar('.')):
             coef += 1
         elif (blt.typ == BookLinkTyp.ANDOTHERS):
             coef += 1
             if (authors):
                 res.authors_and_other = True
         coef += blt.add_coef
         t = blt.end_token
     if ((coef < 2.5) and num is not None):
         if (BookLinkToken.checkLinkBefore(t0, num)):
             coef += (2)
         elif (BookLinkToken.checkLinkAfter(rt.end_token, num)):
             coef += (1)
     if (rt.length_char > 500):
         return None
     if (is_in_lit):
         coef += 1
     if (coef < 2.5):
         if (is_electr_res and uri is not None):
             pass
         elif (coef >= 2 and is_in_lit):
             pass
         else:
             return None
     for rr in corr_authors:
         pits0 = PersonItemToken.tryAttachList(
             rr.begin_token, None,
             PersonItemToken.ParseAttr.CANINITIALBEDIGIT, 10)
         if (pits0 is None or (len(pits0) < 2)):
             continue
         if (pits0[0].typ == PersonItemToken.ItemType.VALUE):
             exi = False
             for i in range(len(rr.referent.slots) - 1, -1, -1):
                 s = rr.referent.slots[i]
                 if (s.type_name == PersonReferent.ATTR_LASTNAME):
                     ln = Utils.asObjectOrNull(s.value, str)
                     if (ln is None):
                         continue
                     if (ln == pits0[0].value):
                         exi = True
                         continue
                     if (ln.find('-') > 0):
                         ln = ln[0:0 + ln.find('-')]
                     if (pits0[0].begin_token.isValue(ln, None)):
                         del rr.referent.slots[i]
             if (not exi):
                 rr.referent.addSlot(PersonReferent.ATTR_LASTNAME,
                                     pits0[0].value, False, 0)
     rts = list()
     bref = BookLinkRefReferent._new389(res)
     if (num is not None):
         bref.number = num
     rt1 = ReferentToken(bref, t0, rt.end_token)
     if (pages is not None):
         if (pages.value is not None):
             bref.pages = pages.value
         rt.end_token = pages.begin_token.previous
     rts.append(rt1)
     rts.append(rt)
     return rts

Beispiel #4

Datei anzeigen

Datei: DenominationAnalyzer.py Projekt: pullenti/PullentiPython

 def try_attach(self, t : 'Token', for_ontology : bool=False) -> 'ReferentToken':
     if (t is None): 
         return None
     rt0 = self.__try_attach_spec(t)
     if (rt0 is not None): 
         return rt0
     if (t.chars.is_all_lower): 
         if (not t.is_whitespace_after and (isinstance(t.next0_, NumberToken))): 
             if (t.previous is None or t.is_whitespace_before or t.previous.is_char_of(",:")): 
                 pass
             else: 
                 return None
         else: 
             return None
     tmp = io.StringIO()
     t1 = t
     hiph = False
     ok = True
     nums = 0
     chars = 0
     w = t1.next0_
     first_pass3148 = True
     while True:
         if first_pass3148: first_pass3148 = False
         else: w = w.next0_
         if (not (w is not None)): break
         if (w.is_whitespace_before and not for_ontology): 
             break
         if (w.is_char_of("/\\_") or w.is_hiphen): 
             hiph = True
             print('-', end="", file=tmp)
             continue
         hiph = False
         nt = Utils.asObjectOrNull(w, NumberToken)
         if (nt is not None): 
             if (nt.typ != NumberSpellingType.DIGIT): 
                 break
             t1 = (nt)
             print(nt.get_source_text(), end="", file=tmp)
             nums += 1
             continue
         tt = Utils.asObjectOrNull(w, TextToken)
         if (tt is None): 
             break
         if (tt.length_char > 3): 
             ok = False
             break
         if (not str.isalpha(tt.term[0])): 
             if (tt.is_char_of(",:") or BracketHelper.can_be_end_of_sequence(tt, False, None, False)): 
                 break
             if (not tt.is_char_of("+*&^#@!")): 
                 ok = False
                 break
             chars += 1
         t1 = (tt)
         print(tt.get_source_text(), end="", file=tmp)
     if (not for_ontology): 
         if ((tmp.tell() < 1) or not ok or hiph): 
             return None
         if (tmp.tell() > 12): 
             return None
         last = Utils.getCharAtStringIO(tmp, tmp.tell() - 1)
         if (last == '!'): 
             return None
         if ((nums + chars) == 0): 
             return None
         if (not self.__check_attach(t, t1)): 
             return None
     new_dr = DenominationReferent()
     new_dr._add_value(t, t1)
     return ReferentToken(new_dr, t, t1)

Beispiel #5

Datei anzeigen

Datei: TitlePageReferent.py Projekt: pullenti/PullentiPython

 def org0_(self) -> 'OrganizationReferent':
     """ Организация """
     return Utils.asObjectOrNull(
         self.get_slot_value(TitlePageReferent.ATTR_ORG),
         OrganizationReferent)

Beispiel #6

Datei anzeigen

 def url(self) -> 'UriReferent':
     """ URL """
     return Utils.asObjectOrNull(
         self.get_slot_value(BookLinkReferent.ATTR_URL), UriReferent)

Beispiel #7

Datei anzeigen

Datei: BookLinkReferent.py Projekt: MihaJjDa/APCLtask

 def url(self) -> 'UriReferent':
     return Utils.asObjectOrNull(
         self.getSlotValue(BookLinkReferent.ATTR_URL), UriReferent)

Beispiel #8

Datei anzeigen

Datei: CityAttachHelper.py Projekt: MihaJjDa/APCLtask

 def __tryNounName(li: typing.List['CityItemToken'], oi: 'IntOntologyItem',
                   always: bool) -> 'ReferentToken':
     oi.value = (None)
     if (li is None or (len(li) < 2)
             or ((li[0].typ != CityItemToken.ItemType.NOUN
                  and li[0].typ != CityItemToken.ItemType.MISC))):
         return None
     ok = not li[0].doubtful
     if (ok and li[0].typ == CityItemToken.ItemType.MISC):
         ok = False
     typ = (None
            if li[0].typ == CityItemToken.ItemType.MISC else li[0].value)
     typ2 = (None if li[0].typ == CityItemToken.ItemType.MISC else
             li[0].alt_value)
     prob_adj = None
     i1 = 1
     org0_ = None
     if ((typ is not None and li[i1].typ == CityItemToken.ItemType.NOUN and
          ((i1 + 1) < len(li))) and li[0].whitespaces_after_count <= 1 and
         (((LanguageHelper.endsWith(typ, "ПОСЕЛОК")
            or LanguageHelper.endsWith(typ, "СЕЛИЩЕ") or typ == "ДЕРЕВНЯ")
           or typ == "СЕЛО"))):
         if (li[i1].begin_token == li[i1].end_token):
             ooo = AddressItemToken.tryAttachOrg(li[i1].begin_token)
             if (ooo is not None and ooo.ref_token is not None):
                 return None
         typ2 = li[i1].value
         if (typ2 == "СТАНЦИЯ" and li[i1].begin_token.isValue("СТ", None)
                 and ((i1 + 1) < len(li))):
             m = li[i1 + 1].morph
             if (m.number == MorphNumber.PLURAL):
                 prob_adj = "СТАРЫЕ"
             elif (m.gender == MorphGender.FEMINIE):
                 prob_adj = "СТАРАЯ"
             elif (m.gender == MorphGender.MASCULINE):
                 prob_adj = "СТАРЫЙ"
             else:
                 prob_adj = "СТАРОЕ"
         i1 += 1
     name = Utils.ifNotNull(li[i1].value,
                            ((None if li[i1].onto_item is None else
                              li[i1].onto_item.canonic_text)))
     alt_name = li[i1].alt_value
     if (name is None):
         return None
     mc = li[0].morph
     if (i1 == 1 and li[i1].typ == CityItemToken.ItemType.CITY
             and ((li[0].value == "ГОРОД" or li[0].value == "МІСТО"
                   or li[0].typ == CityItemToken.ItemType.MISC))):
         if (typ is None and ((i1 + 1) < len(li))
                 and li[i1 + 1].typ == CityItemToken.ItemType.NOUN):
             return None
         oi.value = li[i1].onto_item
         if (oi.value is not None):
             name = oi.value.canonic_text
         if (len(name) > 2 or oi.value.misc_attr is not None):
             if (not li[1].doubtful
                     or ((oi.value is not None
                          and oi.value.misc_attr is not None))):
                 ok = True
             elif (not ok and not li[1].is_newline_before):
                 if (li[0].geo_object_before or li[1].geo_object_after):
                     ok = True
                 elif (StreetDefineHelper.checkStreetAfter(
                         li[1].end_token.next0_)):
                     ok = True
                 elif (li[1].end_token.next0_ is not None
                       and (isinstance(li[1].end_token.next0_.getReferent(),
                                       DateReferent))):
                     ok = True
                 elif ((li[1].whitespaces_before_count < 2)
                       and li[1].onto_item is not None):
                     if (li[1].is_newline_after):
                         ok = True
             if (li[1].doubtful and li[1].end_token.next0_ is not None and
                     li[1].end_token.chars == li[1].end_token.next0_.chars):
                 ok = False
             if (li[0].begin_token.previous is not None
                     and li[0].begin_token.previous.isValue("В", None)):
                 ok = True
         if (not ok):
             ok = CityAttachHelper.checkYearAfter(li[1].end_token.next0_)
         if (not ok):
             ok = CityAttachHelper.checkCityAfter(li[1].end_token.next0_)
     elif ((li[i1].typ == CityItemToken.ItemType.PROPERNAME
            or li[i1].typ == CityItemToken.ItemType.CITY)):
         if (((li[0].value == "АДМИНИСТРАЦИЯ"
               or li[0].value == "АДМІНІСТРАЦІЯ")) and i1 == 1):
             return None
         if (li[i1].is_newline_before):
             if (len(li) != 2):
                 return None
         if (not li[0].doubtful):
             ok = True
             if (len(name) < 2):
                 ok = False
             elif ((len(name) < 3)
                   and li[0].morph.number != MorphNumber.SINGULAR):
                 ok = False
             if (li[i1].doubtful and not li[i1].geo_object_after
                     and not li[0].geo_object_before):
                 if (li[i1].morph.case_.is_genitive):
                     if (((li[0].begin_token.previous is None
                           or MiscLocationHelper.checkGeoObjectBefore(
                               li[0].begin_token))) and
                         ((li[i1].end_token.next0_ is None
                           or MiscLocationHelper.checkGeoObjectAfter(
                               li[i1].end_token.next0_)
                           or AddressItemToken.checkHouseAfter(
                               li[i1].end_token.next0_, False, True)))):
                         pass
                     else:
                         ok = False
                 else:
                     rt0 = li[i1].kit.processReferent(
                         "PERSONPROPERTY", li[0].begin_token.previous)
                     if (rt0 is not None):
                         rt1 = li[i1].kit.processReferent(
                             "PERSON", li[i1].begin_token)
                         if (rt1 is not None):
                             ok = False
             npt = NounPhraseHelper.tryParse(li[i1].begin_token,
                                             NounPhraseParseAttr.NO, 0)
             if (npt is not None):
                 if (npt.end_token.end_char > li[i1].end_char
                         and len(npt.adjectives) > 0 and
                         not npt.adjectives[0].end_token.next0_.is_comma):
                     ok = False
                 elif (TerrItemToken._m_unknown_regions.tryParse(
                         npt.end_token, TerminParseAttr.FULLWORDSONLY)
                       is not None):
                     ok1 = False
                     if (li[0].begin_token.previous is not None):
                         ttt = li[0].begin_token.previous
                         if (ttt.is_comma and ttt.previous is not None):
                             ttt = ttt.previous
                         geo_ = Utils.asObjectOrNull(
                             ttt.getReferent(), GeoReferent)
                         if (geo_ is not None and not geo_.is_city):
                             ok1 = True
                     if (npt.end_token.next0_ is not None):
                         ttt = npt.end_token.next0_
                         if (ttt.is_comma and ttt.next0_ is not None):
                             ttt = ttt.next0_
                         geo_ = Utils.asObjectOrNull(
                             ttt.getReferent(), GeoReferent)
                         if (geo_ is not None and not geo_.is_city):
                             ok1 = True
                     if (not ok1):
                         return None
             if (li[0].value == "ПОРТ"):
                 if (li[i1].chars.is_all_upper
                         or li[i1].chars.is_latin_letter):
                     return None
         elif (li[0].geo_object_before):
             ok = True
         elif (li[i1].geo_object_after and not li[i1].is_newline_after):
             ok = True
         else:
             ok = CityAttachHelper.checkYearAfter(li[i1].end_token.next0_)
         if (not ok):
             ok = CityAttachHelper.checkStreetAfter(li[i1].end_token.next0_)
         if (not ok and li[0].begin_token.previous is not None
                 and li[0].begin_token.previous.isValue("В", None)):
             ok = True
     else:
         return None
     if (not ok and not always):
         if (MiscLocationHelper.checkNearBefore(li[0].begin_token.previous)
                 is None):
             return None
     if (len(li) > (i1 + 1)):
         del li[i1 + 1:i1 + 1 + len(li) - i1 - 1]
     city = GeoReferent()
     if (oi.value is not None and oi.value.referent is not None):
         city = (Utils.asObjectOrNull(oi.value.referent.clone(),
                                      GeoReferent))
         city.occurrence.clear()
     if (not li[0].morph.case_.is_undefined
             and li[0].morph.gender != MorphGender.UNDEFINED):
         if (li[i1].end_token.morph.class0_.is_adjective
                 and li[i1].begin_token == li[i1].end_token):
             nam = ProperNameHelper.getNameEx(
                 li[i1].begin_token, li[i1].end_token, MorphClass.ADJECTIVE,
                 li[0].morph.case_, li[0].morph.gender, False, False)
             if (nam is not None and nam != name):
                 name = nam
     if (li[0].morph.case_.is_nominative):
         if (alt_name is not None):
             city._addName(alt_name)
         alt_name = (None)
     city._addName(name)
     if (prob_adj is not None):
         city._addName(prob_adj + " " + name)
     if (alt_name is not None):
         city._addName(alt_name)
         if (prob_adj is not None):
             city._addName(prob_adj + " " + alt_name)
     if (typ is not None):
         city._addTyp(typ)
     elif (not city.is_city):
         city._addTypCity(li[0].kit.base_language)
     if (typ2 is not None):
         city._addTyp(typ2.lower())
     if (li[0].higher_geo is not None
             and GeoOwnerHelper.canBeHigher(li[0].higher_geo, city)):
         city.higher = li[0].higher_geo
     if (li[0].typ == CityItemToken.ItemType.MISC):
         del li[0]
     res = ReferentToken._new719(city, li[0].begin_token,
                                 li[len(li) - 1].end_token, mc)
     if (res.end_token.next0_ is not None and res.end_token.next0_.is_hiphen
             and (isinstance(res.end_token.next0_.next0_, NumberToken))):
         num = Utils.asObjectOrNull(res.end_token.next0_.next0_,
                                    NumberToken)
         if ((num.typ == NumberSpellingType.DIGIT
              and not num.morph.class0_.is_adjective
              and num.int_value is not None) and (num.int_value < 50)):
             for s in city.slots:
                 if (s.type_name == GeoReferent.ATTR_NAME):
                     city.uploadSlot(s,
                                     "{0}-{1}".format(s.value, num.value))
             res.end_token = num
     if (li[0].begin_token == li[0].end_token
             and li[0].begin_token.isValue("ГОРОДОК", None)):
         if (AddressItemToken.checkHouseAfter(res.end_token.next0_, True,
                                              False)):
             return None
     return res

Beispiel #9

Datei anzeigen

Datei: CityAttachHelper.py Projekt: MihaJjDa/APCLtask

 def __tryNameExist(li: typing.List['CityItemToken'], oi: 'IntOntologyItem',
                    always: bool) -> 'ReferentToken':
     """ Это проверяем некоторые частные случаи
     
     Args:
         li(typing.List[CityItemToken]): 
         oi(IntOntologyItem): 
     
     """
     oi.value = (None)
     if (li is None or li[0].typ != CityItemToken.ItemType.CITY):
         return None
     oi.value = li[0].onto_item
     tt = Utils.asObjectOrNull(li[0].begin_token, TextToken)
     if (tt is None):
         return None
     ok = False
     nam = (li[0].value if oi.value is None else oi.value.canonic_text)
     if (nam is None):
         return None
     if (nam == "РИМ"):
         if (tt.term == "РИМ"):
             if ((isinstance(tt.next0_, TextToken)) and
                     tt.next0_.getMorphClassInDictionary().is_proper_secname
                 ):
                 pass
             else:
                 ok = True
         elif (tt.previous is not None and tt.previous.isValue("В", None)
               and tt.term == "РИМЕ"):
             ok = True
     elif (oi.value is not None and oi.value.referent is not None
           and oi.value.owner.is_ext_ontology):
         ok = True
     elif (nam.endswith("ГРАД") or nam.endswith("СК")):
         ok = True
     elif (nam.endswith("TOWN") or nam.startswith("SAN")):
         ok = True
     elif (li[0].chars.is_latin_letter
           and li[0].begin_token.previous is not None
           and ((li[0].begin_token.previous.isValue("IN", None)
                 or li[0].begin_token.previous.isValue("FROM", None)))):
         ok = True
     else:
         tt2 = li[0].end_token.next0_
         first_pass2890 = True
         while True:
             if first_pass2890: first_pass2890 = False
             else: tt2 = tt2.next0_
             if (not (tt2 is not None)): break
             if (tt2.is_newline_before):
                 break
             if ((tt2.isCharOf(",(") or tt2.morph.class0_.is_preposition
                  or tt2.morph.class0_.is_conjunction)
                     or tt2.morph.class0_.is_misc):
                 continue
             if ((isinstance(tt2.getReferent(), GeoReferent))
                     and tt2.chars.is_cyrillic_letter
                     == li[0].chars.is_cyrillic_letter):
                 ok = True
             break
         if (not ok):
             tt2 = li[0].begin_token.previous
             first_pass2891 = True
             while True:
                 if first_pass2891: first_pass2891 = False
                 else: tt2 = tt2.previous
                 if (not (tt2 is not None)): break
                 if (tt2.is_newline_after):
                     break
                 if ((tt2.isCharOf(",)") or tt2.morph.class0_.is_preposition
                      or tt2.morph.class0_.is_conjunction)
                         or tt2.morph.class0_.is_misc):
                     continue
                 if ((isinstance(tt2.getReferent(), GeoReferent))
                         and tt2.chars.is_cyrillic_letter
                         == li[0].chars.is_cyrillic_letter):
                     ok = True
                 if (ok):
                     sits = StreetItemToken.tryParseList(
                         li[0].begin_token, None, 10)
                     if (sits is not None and len(sits) > 1):
                         ss = StreetDefineHelper._tryParseStreet(
                             sits, False, False)
                         if (ss is not None):
                             del sits[0]
                             if (StreetDefineHelper._tryParseStreet(
                                     sits, False, False) is None):
                                 ok = False
                 if (ok):
                     if (len(li) > 1 and li[1].typ
                             == CityItemToken.ItemType.PROPERNAME
                             and (li[1].whitespaces_before_count < 3)):
                         ok = False
                     else:
                         mc = li[0].begin_token.getMorphClassInDictionary()
                         if (mc.is_proper_name or mc.is_proper_surname
                                 or mc.is_adjective):
                             ok = False
                         else:
                             npt = NounPhraseHelper.tryParse(
                                 li[0].begin_token, NounPhraseParseAttr.NO,
                                 0)
                             if (npt is not None
                                     and npt.end_char > li[0].end_char):
                                 ok = False
                 if (AddressItemToken.tryAttachOrg(li[0].begin_token)
                         is not None):
                     ok = False
                     break
                 break
     if (always):
         if (li[0].whitespaces_before_count > 3 and li[0].doubtful
                 and li[0].begin_token.getMorphClassInDictionary(
                 ).is_proper_surname):
             pp = li[0].kit.processReferent("PERSON", li[0].begin_token)
             if (pp is not None):
                 always = False
     if (li[0].begin_token.chars.is_latin_letter
             and li[0].begin_token == li[0].end_token):
         tt1 = li[0].end_token.next0_
         if (tt1 is not None and tt1.isChar(',')):
             tt1 = tt1.next0_
         if (((isinstance(tt1, TextToken)) and tt1.chars.is_latin_letter and
              (tt1.length_char < 3)) and not tt1.chars.is_all_lower):
             ok = False
     if (not ok and not always):
         return None
     city = None
     if (oi.value is not None
             and (isinstance(oi.value.referent, GeoReferent))
             and not oi.value.owner.is_ext_ontology):
         city = (Utils.asObjectOrNull(oi.value.referent, GeoReferent))
     else:
         city = GeoReferent()
         city._addName(nam)
         if (oi.value is not None
                 and (isinstance(oi.value.referent, GeoReferent))):
             city._mergeSlots2(
                 Utils.asObjectOrNull(oi.value.referent, GeoReferent),
                 li[0].kit.base_language)
         if (not city.is_city):
             city._addTypCity(li[0].kit.base_language)
     return ReferentToken._new719(city, li[0].begin_token, li[0].end_token,
                                  li[0].morph)

Beispiel #10

Datei anzeigen

 def try_attach_org(t: 'Token',
                    can_be_cyr: bool = False) -> 'ReferentToken':
     from pullenti.ner.org.internal.OrgItemNameToken import OrgItemNameToken
     if (t is None):
         return None
     br = False
     if (t.is_char('(') and t.next0_ is not None):
         t = t.next0_
         br = True
     if (isinstance(t, NumberToken)):
         if (t.typ == NumberSpellingType.WORDS
                 and t.morph.class0_.is_adjective
                 and t.chars.is_capital_upper):
             pass
         else:
             return None
     else:
         if (t.chars.is_all_lower):
             return None
         if ((t.length_char < 3) and not t.chars.is_letter):
             return None
         if (not t.chars.is_latin_letter):
             if (not can_be_cyr or not t.chars.is_cyrillic_letter):
                 return None
     t0 = t
     t1 = t0
     nam_wo = 0
     tok = None
     geo_ = None
     add_typ = None
     first_pass3312 = True
     while True:
         if first_pass3312: first_pass3312 = False
         else: t = t.next0_
         if (not (t is not None)): break
         if (t != t0 and t.whitespaces_before_count > 1):
             break
         if (t.is_char(')')):
             break
         if (t.is_char('(') and t.next0_ is not None):
             if ((isinstance(t.next0_.get_referent(), GeoReferent))
                     and t.next0_.next0_ is not None
                     and t.next0_.next0_.is_char(')')):
                 geo_ = (Utils.asObjectOrNull(t.next0_.get_referent(),
                                              GeoReferent))
                 t = t.next0_.next0_
                 continue
             typ = OrgItemTypeToken.try_attach(t.next0_, True, None)
             if ((typ is not None and typ.end_token.next0_ is not None
                  and typ.end_token.next0_.is_char(')'))
                     and typ.chars.is_latin_letter):
                 add_typ = typ
                 t = typ.end_token.next0_
                 continue
             if (((isinstance(t.next0_, TextToken)) and t.next0_.next0_
                  is not None and t.next0_.next0_.is_char(')'))
                     and t.next0_.chars.is_capital_upper):
                 t = t.next0_.next0_
                 t1 = t
                 continue
             break
         tok = OrgItemEngItem.try_attach(t, can_be_cyr)
         if (tok is None and t.is_char_of(".,") and t.next0_ is not None):
             tok = OrgItemEngItem.try_attach(t.next0_, can_be_cyr)
             if (tok is None and t.next0_.is_char_of(",.")):
                 tok = OrgItemEngItem.try_attach(t.next0_.next0_,
                                                 can_be_cyr)
         if (tok is not None):
             if (tok.length_char == 1 and t0.chars.is_cyrillic_letter):
                 return None
             break
         if (t.is_hiphen and not t.is_whitespace_after
                 and not t.is_whitespace_before):
             continue
         if (t.is_char_of("&+") or t.is_and):
             continue
         if (t.is_char('.')):
             if (t.previous is not None and t.previous.length_char == 1):
                 continue
             elif (MiscHelper.can_be_start_of_sentence(t.next0_)):
                 break
         if (not t.chars.is_latin_letter):
             if (not can_be_cyr or not t.chars.is_cyrillic_letter):
                 break
         if (t.chars.is_all_lower):
             if (t.morph.class0_.is_preposition
                     or t.morph.class0_.is_conjunction):
                 continue
             if (br):
                 continue
             break
         mc = t.get_morph_class_in_dictionary()
         if (mc.is_verb):
             if (t.next0_ is not None
                     and t.next0_.morph.class0_.is_preposition):
                 break
         if (t.next0_ is not None and t.next0_.is_value("OF", None)):
             break
         if (isinstance(t, TextToken)):
             nam_wo += 1
         t1 = t
     if (tok is None):
         return None
     if (t0 == tok.begin_token):
         br2 = BracketHelper.try_parse(tok.end_token.next0_,
                                       BracketParseAttr.NO, 100)
         if (br2 is not None):
             org1 = OrganizationReferent()
             if (tok.short_value is not None):
                 org1.add_type_str(tok.short_value)
             org1.add_type_str(tok.full_value)
             nam1 = MiscHelper.get_text_value(br2.begin_token,
                                              br2.end_token, GetTextAttr.NO)
             if (nam1 is not None):
                 org1.add_name(nam1, True, None)
                 return ReferentToken(org1, t0, br2.end_token)
         return None
     org0_ = OrganizationReferent()
     te = tok.end_token
     if (tok.is_bank):
         t1 = tok.end_token
     if (tok.full_value == "company" and (tok.whitespaces_after_count < 3)):
         tok1 = OrgItemEngItem.try_attach(tok.end_token.next0_, can_be_cyr)
         if (tok1 is not None):
             t1 = tok.end_token
             tok = tok1
             te = tok.end_token
     if (tok.full_value == "company"):
         if (nam_wo == 0):
             return None
     nam = MiscHelper.get_text_value(t0, t1, GetTextAttr.IGNOREARTICLES)
     if (nam == "STOCK" and tok.full_value == "company"):
         return None
     alt_nam = None
     if (Utils.isNullOrEmpty(nam)):
         return None
     if (nam.find('(') > 0):
         i1 = nam.find('(')
         i2 = nam.find(')')
         if (i1 < i2):
             alt_nam = nam
             tai = None
             if ((i2 + 1) < len(nam)):
                 tai = nam[i2:].strip()
             nam = nam[0:0 + i1].strip()
             if (tai is not None):
                 nam = "{0} {1}".format(nam, tai)
     if (tok.is_bank):
         org0_.add_type_str(
             ("bank" if tok.kit.base_language.is_en else "банк"))
         org0_.add_profile(OrgProfile.FINANCE)
         if ((t1.next0_ is not None and t1.next0_.is_value("OF", None)
              and t1.next0_.next0_ is not None)
                 and t1.next0_.next0_.chars.is_latin_letter):
             nam0 = OrgItemNameToken.try_attach(t1.next0_, None, False,
                                                False)
             if (nam0 is not None):
                 te = nam0.end_token
             else:
                 te = t1.next0_.next0_
             nam = MiscHelper.get_text_value(t0, te, GetTextAttr.NO)
             if (isinstance(te.get_referent(), GeoReferent)):
                 org0_._add_geo_object(
                     Utils.asObjectOrNull(te.get_referent(), GeoReferent))
         elif (t0 == t1):
             return None
     else:
         if (tok.short_value is not None):
             org0_.add_type_str(tok.short_value)
         org0_.add_type_str(tok.full_value)
     if (Utils.isNullOrEmpty(nam)):
         return None
     org0_.add_name(nam, True, None)
     if (alt_nam is not None):
         org0_.add_name(alt_nam, True, None)
     res = ReferentToken(org0_, t0, te)
     t = te
     while t.next0_ is not None:
         if (t.next0_.is_char_of(",.")):
             t = t.next0_
         else:
             break
     if (t.whitespaces_after_count < 2):
         tok = OrgItemEngItem.try_attach(t.next0_, can_be_cyr)
         if (tok is not None):
             if (tok.short_value is not None):
                 org0_.add_type_str(tok.short_value)
             org0_.add_type_str(tok.full_value)
             res.end_token = tok.end_token
     if (geo_ is not None):
         org0_._add_geo_object(geo_)
     if (add_typ is not None):
         org0_.add_type(add_typ, False)
     if (not br):
         return res
     t = res.end_token
     if (t.next0_ is None or t.next0_.is_char(')')):
         res.end_token = t.next0_
     else:
         return None
     return res

Beispiel #11

Datei anzeigen

 def tryParse(t: 'Token',
              loc_onto: 'IntOntologyCollection') -> 'NamedItemToken':
     if (t is None):
         return None
     if (isinstance(t, ReferentToken)):
         r = t.getReferent()
         if ((r.type_name == "PERSON" or r.type_name == "PERSONPROPERTY" or
              (isinstance(r, GeoReferent)))
                 or r.type_name == "ORGANIZATION"):
             return NamedItemToken._new1635(t, t, r, t.morph)
         return None
     typ = NamedItemToken.__m_types.tryParse(t, TerminParseAttr.NO)
     nam = NamedItemToken.__m_names.tryParse(t, TerminParseAttr.NO)
     if (typ is not None):
         if (not ((isinstance(t, TextToken)))):
             return None
         res = NamedItemToken._new1636(typ.begin_token, typ.end_token,
                                       typ.morph, typ.chars)
         res.kind = (Utils.valToEnum(typ.termin.tag, NamedEntityKind))
         res.type_value = typ.termin.canonic_text
         if ((nam is not None and nam.end_token == typ.end_token
              and not t.chars.is_all_lower) and (Utils.valToEnum(
                  nam.termin.tag, NamedEntityKind)) == res.kind):
             res.name_value = nam.termin.canonic_text
             res.is_wellknown = True
         return res
     if (nam is not None):
         if (nam.begin_token.chars.is_all_lower):
             return None
         res = NamedItemToken._new1636(nam.begin_token, nam.end_token,
                                       nam.morph, nam.chars)
         res.kind = (Utils.valToEnum(nam.termin.tag, NamedEntityKind))
         res.name_value = nam.termin.canonic_text
         ok = True
         if (not t.is_whitespace_before and t.previous is not None):
             ok = False
         elif (not t.is_whitespace_after and t.next0_ is not None):
             if (t.next0_.isCharOf(",.;!?")
                     and t.next0_.is_whitespace_after):
                 pass
             else:
                 ok = False
         if (ok):
             res.is_wellknown = True
             res.type_value = (Utils.asObjectOrNull(nam.termin.tag2, str))
         return res
     adj = MiscLocationHelper.tryAttachNordWest(t)
     if (adj is not None):
         if (adj.morph.class0_.is_noun):
             if (adj.end_token.isValue("ВОСТОК", None)):
                 if (adj.begin_token == adj.end_token):
                     return None
                 re = NamedItemToken._new1638(t, adj.end_token, adj.morph)
                 re.kind = NamedEntityKind.LOCATION
                 re.name_value = MiscHelper.getTextValue(
                     t, adj.end_token,
                     GetTextAttr.FIRSTNOUNGROUPTONOMINATIVE)
                 re.is_wellknown = True
                 return re
             return None
         if (adj.whitespaces_after_count > 2):
             return None
         if ((isinstance(adj.end_token.next0_, ReferentToken)) and
             (isinstance(adj.end_token.next0_.getReferent(), GeoReferent))):
             re = NamedItemToken._new1638(t, adj.end_token.next0_,
                                          adj.end_token.next0_.morph)
             re.kind = NamedEntityKind.LOCATION
             re.name_value = MiscHelper.getTextValue(
                 t, adj.end_token.next0_,
                 GetTextAttr.FIRSTNOUNGROUPTONOMINATIVE)
             re.is_wellknown = True
             re.ref = adj.end_token.next0_.getReferent()
             return re
         res = NamedItemToken.tryParse(adj.end_token.next0_, loc_onto)
         if (res is not None and res.kind == NamedEntityKind.LOCATION):
             s = adj.getNormalCaseText(MorphClass.ADJECTIVE, True,
                                       res.morph.gender, False)
             if (s is not None):
                 if (res.name_value is None):
                     res.name_value = s.upper()
                 else:
                     res.name_value = "{0} {1}".format(
                         s.upper(), res.name_value)
                     res.type_value = (None)
                 res.begin_token = t
                 res.chars = t.chars
                 res.is_wellknown = True
                 return res
     if (t.chars.is_capital_upper
             and not MiscHelper.canBeStartOfSentence(t)):
         npt = NounPhraseHelper.tryParse(t, NounPhraseParseAttr.NO, 0)
         if (npt is not None and len(npt.adjectives) > 0):
             test = NamedItemToken.tryParse(npt.noun.begin_token, loc_onto)
             if (test is not None and test.end_token == npt.end_token
                     and test.type_value is not None):
                 test.begin_token = t
                 tmp = io.StringIO()
                 for a in npt.adjectives:
                     s = a.getNormalCaseText(MorphClass.ADJECTIVE, True,
                                             test.morph.gender, False)
                     if (tmp.tell() > 0):
                         print(' ', end="", file=tmp)
                     print(s, end="", file=tmp)
                 test.name_value = Utils.toStringStringIO(tmp)
                 test.chars = t.chars
                 if (test.kind == NamedEntityKind.LOCATION):
                     test.is_wellknown = True
                 return test
     if ((BracketHelper.isBracket(t, True) and t.next0_ is not None
          and t.next0_.chars.is_letter)
             and not t.next0_.chars.is_all_lower):
         br = BracketHelper.tryParse(t, BracketParseAttr.NO, 100)
         if (br is not None):
             res = NamedItemToken(t, br.end_token)
             res.is_in_bracket = True
             res.name_value = MiscHelper.getTextValue(
                 t, br.end_token, GetTextAttr.NO)
             nam = NamedItemToken.__m_names.tryParse(
                 t.next0_, TerminParseAttr.NO)
             if (nam is not None
                     and nam.end_token == br.end_token.previous):
                 res.kind = (Utils.valToEnum(nam.termin.tag,
                                             NamedEntityKind))
                 res.is_wellknown = True
                 res.name_value = nam.termin.canonic_text
             return res
     if (((isinstance(t, TextToken)) and t.chars.is_letter
          and not t.chars.is_all_lower) and t.length_char > 2):
         res = NamedItemToken._new1638(t, t, t.morph)
         str0_ = (t).term
         if (str0_.endswith("О") or str0_.endswith("И")
                 or str0_.endswith("Ы")):
             res.name_value = str0_
         else:
             res.name_value = t.getNormalCaseText(None, False,
                                                  MorphGender.UNDEFINED,
                                                  False)
         res.chars = t.chars
         if (((not t.is_whitespace_after and t.next0_ is not None
               and t.next0_.is_hiphen) and
              (isinstance(t.next0_.next0_, TextToken))
              and not t.next0_.next0_.is_whitespace_after)
                 and t.chars.is_cyrillic_letter
                 == t.next0_.next0_.chars.is_cyrillic_letter):
             res.end_token = t.next0_.next0_
             t = res.end_token
             res.name_value = "{0}-{1}".format(
                 res.name_value,
                 t.getNormalCaseText(None, False, MorphGender.UNDEFINED,
                                     False))
         return res
     return None

Beispiel #12

Datei anzeigen

Datei: BookLinkToken.py Projekt: pullenti/PullentiPython

 def __try_parse(t: 'Token', lev: int) -> 'BookLinkToken':
     if (t is None or lev > 3):
         return None
     if (t.is_char('[')):
         re = BookLinkToken.__try_parse(t.next0_, lev + 1)
         if (re is not None and re.end_token.next0_ is not None
                 and re.end_token.next0_.is_char(']')):
             re.begin_token = t
             re.end_token = re.end_token.next0_
             return re
         if (re is not None and re.end_token.is_char(']')):
             re.begin_token = t
             return re
         if (re is not None):
             if (re.typ == BookLinkTyp.SOSTAVITEL
                     or re.typ == BookLinkTyp.EDITORS):
                 return re
         br = BracketHelper.try_parse(t, BracketParseAttr.NO, 100)
         if (br is not None):
             if ((isinstance(br.end_token.previous, NumberToken))
                     and (br.length_char < 30)):
                 return BookLinkToken._new329(
                     t, br.end_token, BookLinkTyp.NUMBER,
                     MiscHelper.get_text_value(br.begin_token.next0_,
                                               br.end_token.previous,
                                               GetTextAttr.NO))
     t0 = t
     if (isinstance(t, ReferentToken)):
         if (isinstance(t.get_referent(), PersonReferent)):
             return BookLinkToken.try_parse_author(
                 t, FioTemplateType.UNDEFINED)
         if (isinstance(t.get_referent(), GeoReferent)):
             return BookLinkToken._new326(t, t, BookLinkTyp.GEO,
                                          t.get_referent())
         if (isinstance(t.get_referent(), DateReferent)):
             dr = Utils.asObjectOrNull(t.get_referent(), DateReferent)
             if (len(dr.slots) == 1 and dr.year > 0):
                 return BookLinkToken._new329(t, t, BookLinkTyp.YEAR,
                                              str(dr.year))
             if (dr.year > 0 and t.previous is not None
                     and t.previous.is_comma):
                 return BookLinkToken._new329(t, t, BookLinkTyp.YEAR,
                                              str(dr.year))
         if (isinstance(t.get_referent(), OrganizationReferent)):
             org0_ = Utils.asObjectOrNull(t.get_referent(),
                                          OrganizationReferent)
             if (org0_.kind == OrganizationKind.PRESS):
                 return BookLinkToken._new326(t, t, BookLinkTyp.PRESS,
                                              org0_)
         if (isinstance(t.get_referent(), UriReferent)):
             uri = Utils.asObjectOrNull(t.get_referent(), UriReferent)
             if ((uri.scheme == "http" or uri.scheme == "https"
                  or uri.scheme == "ftp") or uri.scheme is None):
                 return BookLinkToken._new326(t, t, BookLinkTyp.URL, uri)
     tok_ = BookLinkToken.__m_termins.try_parse(t, TerminParseAttr.NO)
     if (tok_ is not None):
         typ_ = Utils.valToEnum(tok_.termin.tag, BookLinkTyp)
         ok = True
         if (typ_ == BookLinkTyp.TYPE or typ_ == BookLinkTyp.NAMETAIL
                 or typ_ == BookLinkTyp.ELECTRONRES):
             if (t.previous is not None and
                 ((t.previous.is_char_of(".:[") or t.previous.is_hiphen))):
                 pass
             else:
                 ok = False
         if (ok):
             return BookLinkToken._new329(t, tok_.end_token, typ_,
                                          tok_.termin.canonic_text)
         if (typ_ == BookLinkTyp.ELECTRONRES):
             tt = tok_.end_token.next0_
             first_pass3019 = True
             while True:
                 if first_pass3019: first_pass3019 = False
                 else: tt = tt.next0_
                 if (not (tt is not None)): break
                 if ((isinstance(tt, TextToken))
                         and not tt.chars.is_letter):
                     continue
                 if (isinstance(tt.get_referent(), UriReferent)):
                     return BookLinkToken._new326(t, tt,
                                                  BookLinkTyp.ELECTRONRES,
                                                  tt.get_referent())
                 break
     if (t.is_char('/')):
         res = BookLinkToken._new329(t, t, BookLinkTyp.DELIMETER, "/")
         if (t.next0_ is not None and t.next0_.is_char('/')):
             res.end_token = t.next0_
             res.value = "//"
         if (not t.is_whitespace_before and not t.is_whitespace_after):
             coo = 3
             no = True
             tt = t.next0_
             while tt is not None and coo > 0:
                 vvv = BookLinkToken.try_parse(tt, lev + 1)
                 if (vvv is not None and vvv.typ != BookLinkTyp.NUMBER):
                     no = False
                     break
                 tt = tt.next0_
                 coo -= 1
             if (no):
                 return None
         return res
     if ((isinstance(t, NumberToken)) and t.int_value is not None
             and t.typ == NumberSpellingType.DIGIT):
         res = BookLinkToken._new329(t, t, BookLinkTyp.NUMBER, str(t.value))
         val = t.int_value
         if (val >= 1930 and (val < 2030)):
             res.typ = BookLinkTyp.YEAR
         if (t.next0_ is not None and t.next0_.is_char('.')):
             res.end_token = t.next0_
         elif ((t.next0_ is not None and t.next0_.length_char == 1
                and not t.next0_.chars.is_letter)
               and t.next0_.is_whitespace_after):
             res.end_token = t.next0_
         elif (isinstance(t.next0_, TextToken)):
             term = t.next0_.term
             if (((term == "СТР" or term == "C" or term == "С")
                  or term == "P" or term == "S") or term == "PAGES"):
                 res.end_token = t.next0_
                 res.typ = BookLinkTyp.PAGES
                 res.value = str(t.value)
         return res
     if (isinstance(t, TextToken)):
         term = t.term
         if (((((
             ((term == "СТР" or term == "C" or term == "С") or term == "ТОМ"
              or term == "T") or term == "Т" or term == "P") or term == "PP"
                or term == "V") or term == "VOL" or term == "S")
              or term == "СТОР" or t.is_value("PAGE", None))
                 or t.is_value("СТРАНИЦА", "СТОРІНКА")):
             tt = t.next0_
             while tt is not None:
                 if (tt.is_char_of(".:~")):
                     tt = tt.next0_
                 else:
                     break
             if (isinstance(tt, NumberToken)):
                 res = BookLinkToken._new328(t, tt, BookLinkTyp.PAGERANGE)
                 tt0 = tt
                 tt1 = tt
                 tt = tt.next0_
                 first_pass3020 = True
                 while True:
                     if first_pass3020: first_pass3020 = False
                     else: tt = tt.next0_
                     if (not (tt is not None)): break
                     if (tt.is_char_of(",") or tt.is_hiphen):
                         if (isinstance(tt.next0_, NumberToken)):
                             tt = tt.next0_
                             res.end_token = tt
                             tt1 = tt
                             continue
                     break
                 res.value = MiscHelper.get_text_value(
                     tt0, tt1, GetTextAttr.NO)
                 return res
         if ((term == "M" or term == "М" or term == "СПБ") or term == "K"
                 or term == "К"):
             if (t.next0_ is not None and t.next0_.is_char_of(":;")):
                 re = BookLinkToken._new328(t, t.next0_, BookLinkTyp.GEO)
                 return re
             if (t.next0_ is not None and t.next0_.is_char_of(".")):
                 res = BookLinkToken._new328(t, t.next0_, BookLinkTyp.GEO)
                 if (t.next0_.next0_ is not None
                         and t.next0_.next0_.is_char_of(":;")):
                     res.end_token = t.next0_.next0_
                 elif (t.next0_.next0_ is not None
                       and (isinstance(t.next0_.next0_, NumberToken))):
                     pass
                 elif (t.next0_.next0_ is not None
                       and t.next0_.next0_.is_comma and
                       (isinstance(t.next0_.next0_.next0_, NumberToken))):
                     pass
                 else:
                     return None
                 return res
         if (term == "ПЕР" or term == "ПЕРЕВ" or term == "ПЕРЕВОД"):
             tt = t
             if (tt.next0_ is not None and tt.next0_.is_char('.')):
                 tt = tt.next0_
             if (tt.next0_ is not None
                     and ((tt.next0_.is_value("C", None)
                           or tt.next0_.is_value("С", None)))):
                 tt = tt.next0_
                 if (tt.next0_ is None or tt.whitespaces_after_count > 2):
                     return None
                 re = BookLinkToken._new328(t, tt.next0_,
                                            BookLinkTyp.TRANSLATE)
                 return re
         if (term == "ТАМ" or term == "ТАМЖЕ"):
             res = BookLinkToken._new328(t, t, BookLinkTyp.TAMZE)
             if (t.next0_ is not None and t.next0_.is_value("ЖЕ", None)):
                 res.end_token = t.next0_
             return res
         if (((term == "СМ" or term == "CM" or term == "НАПР")
              or term == "НАПРИМЕР" or term == "SEE") or term == "ПОДРОБНЕЕ"
                 or term == "ПОДРОБНО"):
             res = BookLinkToken._new328(t, t, BookLinkTyp.SEE)
             t = t.next0_
             first_pass3021 = True
             while True:
                 if first_pass3021: first_pass3021 = False
                 else: t = t.next0_
                 if (not (t is not None)): break
                 if (t.is_char_of(".:") or t.is_value("ALSO", None)):
                     res.end_token = t
                     continue
                 if (t.is_value("В", None) or t.is_value("IN", None)):
                     res.end_token = t
                     continue
                 vvv = BookLinkToken.__try_parse(t, lev + 1)
                 if (vvv is not None and vvv.typ == BookLinkTyp.SEE):
                     res.end_token = vvv.end_token
                     break
                 break
             return res
         if (term == "БОЛЕЕ"):
             vvv = BookLinkToken.__try_parse(t.next0_, lev + 1)
             if (vvv is not None and vvv.typ == BookLinkTyp.SEE):
                 vvv.begin_token = t
                 return vvv
         no = MiscHelper.check_number_prefix(t)
         if (isinstance(no, NumberToken)):
             return BookLinkToken._new328(t, no, BookLinkTyp.N)
         if (((term == "B" or term == "В"))
                 and (isinstance(t.next0_, NumberToken))
                 and (isinstance(t.next0_.next0_, TextToken))):
             term2 = t.next0_.next0_.term
             if (((term2 == "Т" or term2 == "T" or term2.startswith("ТОМ"))
                  or term2 == "TT" or term2 == "ТТ") or term2 == "КН"
                     or term2.startswith("КНИГ")):
                 return BookLinkToken._new328(t, t.next0_.next0_,
                                              BookLinkTyp.VOLUME)
     if (t.is_char('(')):
         if (((isinstance(t.next0_, NumberToken)) and t.next0_.int_value
              is not None and t.next0_.next0_ is not None)
                 and t.next0_.next0_.is_char(')')):
             num = t.next0_.int_value
             if (num > 1900 and num <= 2040):
                 if (num <= datetime.datetime.now().year):
                     return BookLinkToken._new329(t, t.next0_.next0_,
                                                  BookLinkTyp.YEAR,
                                                  str(num))
         if (((isinstance(t.next0_, ReferentToken)) and
              (isinstance(t.next0_.get_referent(), DateReferent))
              and t.next0_.next0_ is not None)
                 and t.next0_.next0_.is_char(')')):
             num = t.next0_.get_referent().year
             if (num > 0):
                 return BookLinkToken._new329(t, t.next0_.next0_,
                                              BookLinkTyp.YEAR, str(num))
     return None

Beispiel #13

Datei anzeigen

Datei: TerrAttachHelper.py Projekt: MihaJjDa/APCLtask

 def tryAttachTerritory(
         li: typing.List['TerrItemToken'],
         ad: 'AnalyzerData',
         attach_always: bool = False,
         cits: typing.List['CityItemToken'] = None,
         exists: typing.List['GeoReferent'] = None) -> 'ReferentToken':
     if (li is None or len(li) == 0):
         return None
     ex_obj = None
     new_name = None
     adj_list = list()
     noun = None
     add_noun = None
     rt = TerrAttachHelper.__tryAttachMoscowAO(li, ad)
     if (rt is not None):
         return rt
     if (li[0].termin_item is not None
             and li[0].termin_item.canonic_text == "ТЕРРИТОРИЯ"):
         res2 = TerrAttachHelper.__tryAttachPureTerr(li, ad)
         return res2
     if (len(li) == 2):
         if (li[0].rzd is not None and li[1].rzd_dir is not None):
             rzd = GeoReferent()
             rzd._addName(li[1].rzd_dir)
             rzd._addTypTer(li[0].kit.base_language)
             rzd.addSlot(GeoReferent.ATTR_REF, li[0].rzd.referent, False, 0)
             rzd.addExtReferent(li[0].rzd)
             return ReferentToken(rzd, li[0].begin_token, li[1].end_token)
         if (li[1].rzd is not None and li[0].rzd_dir is not None):
             rzd = GeoReferent()
             rzd._addName(li[0].rzd_dir)
             rzd._addTypTer(li[0].kit.base_language)
             rzd.addSlot(GeoReferent.ATTR_REF, li[1].rzd.referent, False, 0)
             rzd.addExtReferent(li[1].rzd)
             return ReferentToken(rzd, li[0].begin_token, li[1].end_token)
     can_be_city_before = False
     adj_terr_before = False
     if (cits is not None):
         if (cits[0].typ == CityItemToken.ItemType.CITY):
             can_be_city_before = True
         elif (cits[0].typ == CityItemToken.ItemType.NOUN
               and len(cits) > 1):
             can_be_city_before = True
     k = 0
     while k < len(li):
         if (li[k].onto_item is not None):
             if (ex_obj is not None or new_name is not None):
                 break
             if (noun is not None):
                 if (k == 1):
                     if (noun.termin_item.canonic_text == "РАЙОН"
                             or noun.termin_item.canonic_text == "ОБЛАСТЬ"
                             or noun.termin_item.canonic_text == "СОЮЗ"):
                         if (isinstance(li[k].onto_item.referent,
                                        GeoReferent)):
                             if ((li[k].onto_item.referent).is_state):
                                 break
                         ok = False
                         tt = li[k].end_token.next0_
                         if (tt is None):
                             ok = True
                         elif (tt.isCharOf(",.")):
                             ok = True
                         if (not ok):
                             ok = MiscLocationHelper.checkGeoObjectBefore(
                                 li[0].begin_token)
                         if (not ok):
                             adr = AddressItemToken.tryParse(
                                 tt, None, False, False, None)
                             if (adr is not None):
                                 if (adr.typ ==
                                         AddressItemToken.ItemType.STREET):
                                     ok = True
                         if (not ok):
                             break
                     if (li[k].onto_item is not None):
                         if (noun.begin_token.isValue("МО", None)
                                 or noun.begin_token.isValue("ЛО", None)):
                             return None
             ex_obj = li[k]
         elif (li[k].termin_item is not None):
             if (noun is not None):
                 break
             if (li[k].termin_item.is_always_prefix and k > 0):
                 break
             if (k > 0 and li[k].is_doubt):
                 if (li[k].begin_token == li[k].end_token
                         and li[k].begin_token.isValue("ЗАО", None)):
                     break
             if (li[k].termin_item.is_adjective
                     or li[k].is_geo_in_dictionary):
                 adj_list.append(li[k])
             else:
                 if (ex_obj is not None):
                     geo_ = Utils.asObjectOrNull(ex_obj.onto_item.referent,
                                                 GeoReferent)
                     if (geo_ is None):
                         break
                     if (ex_obj.is_adjective and
                         ((li[k].termin_item.canonic_text == "СОЮЗ" or
                           li[k].termin_item.canonic_text == "ФЕДЕРАЦИЯ"))):
                         str0_ = str(ex_obj.onto_item)
                         if (not li[k].termin_item.canonic_text in str0_):
                             return None
                     if (li[k].termin_item.canonic_text == "РАЙОН"
                             or li[k].termin_item.canonic_text == "ОКРУГ"
                             or li[k].termin_item.canonic_text == "КРАЙ"):
                         tmp = io.StringIO()
                         for s in geo_.slots:
                             if (s.type_name == GeoReferent.ATTR_TYPE):
                                 print("{0};".format(s.value),
                                       end="",
                                       file=tmp,
                                       flush=True)
                         if (not li[k].termin_item.canonic_text
                                 in Utils.toStringStringIO(tmp).upper()):
                             if (k != 1 or new_name is not None):
                                 break
                             new_name = li[0]
                             new_name.is_adjective = True
                             new_name.onto_item = (None)
                             ex_obj = (None)
                 noun = li[k]
                 if (k == 0):
                     tt = TerrItemToken.tryParse(li[k].begin_token.previous,
                                                 None, True, False)
                     if (tt is not None and tt.morph.class0_.is_adjective):
                         adj_terr_before = True
         else:
             if (ex_obj is not None):
                 break
             if (new_name is not None):
                 break
             new_name = li[k]
         k += 1
     name = None
     alt_name = None
     full_name = None
     morph_ = None
     if (ex_obj is not None):
         if (ex_obj.is_adjective and not ex_obj.morph.language.is_en
                 and noun is None):
             if (attach_always and ex_obj.end_token.next0_ is not None):
                 npt = NounPhraseHelper.tryParse(ex_obj.begin_token,
                                                 NounPhraseParseAttr.NO, 0)
                 if (ex_obj.end_token.next0_.is_comma_and):
                     pass
                 elif (npt is None):
                     pass
                 else:
                     str0_ = StreetItemToken.tryParse(
                         ex_obj.end_token.next0_, None, False, None, False)
                     if (str0_ is not None):
                         if (str0_.typ == StreetItemType.NOUN
                                 and str0_.end_token == npt.end_token):
                             return None
             else:
                 cit = CityItemToken.tryParse(ex_obj.end_token.next0_, None,
                                              False, None)
                 if (cit is not None
                         and ((cit.typ == CityItemToken.ItemType.NOUN
                               or cit.typ == CityItemToken.ItemType.CITY))):
                     npt = NounPhraseHelper.tryParse(
                         ex_obj.begin_token, NounPhraseParseAttr.NO, 0)
                     if (npt is not None
                             and npt.end_token == cit.end_token):
                         pass
                     else:
                         return None
                 elif (ex_obj.begin_token.isValue("ПОДНЕБЕСНЫЙ", None)):
                     pass
                 else:
                     return None
         if (noun is None and ex_obj.can_be_city):
             cit0 = CityItemToken.tryParseBack(ex_obj.begin_token.previous)
             if (cit0 is not None
                     and cit0.typ != CityItemToken.ItemType.PROPERNAME):
                 return None
         if (ex_obj.is_doubt and noun is None):
             ok2 = False
             if (TerrAttachHelper.__canBeGeoAfter(ex_obj.end_token.next0_)):
                 ok2 = True
             elif (not ex_obj.can_be_surname and not ex_obj.can_be_city):
                 if ((ex_obj.end_token.next0_ is not None
                      and ex_obj.end_token.next0_.isChar(')')
                      and ex_obj.begin_token.previous is not None)
                         and ex_obj.begin_token.previous.isChar('(')):
                     ok2 = True
                 elif (ex_obj.chars.is_latin_letter
                       and ex_obj.begin_token.previous is not None):
                     if (ex_obj.begin_token.previous.isValue("IN", None)):
                         ok2 = True
                     elif (ex_obj.begin_token.previous.isValue("THE", None)
                           and ex_obj.begin_token.previous.previous
                           is not None
                           and ex_obj.begin_token.previous.previous.isValue(
                               "IN", None)):
                         ok2 = True
             if (not ok2):
                 cit0 = CityItemToken.tryParseBack(
                     ex_obj.begin_token.previous)
                 if (cit0 is not None
                         and cit0.typ != CityItemToken.ItemType.PROPERNAME):
                     pass
                 elif (MiscLocationHelper.checkGeoObjectBefore(
                         ex_obj.begin_token.previous)):
                     pass
                 else:
                     return None
         name = ex_obj.onto_item.canonic_text
         morph_ = ex_obj.morph
     elif (new_name is not None):
         if (noun is None):
             return None
         j = 1
         while j < k:
             if (li[j].is_newline_before and not li[0].is_newline_before):
                 return None
             j += 1
         morph_ = noun.morph
         if (new_name.is_adjective):
             if (noun.termin_item.acronym == "АО"):
                 if (noun.begin_token != noun.end_token):
                     return None
                 if (new_name.morph.gender != MorphGender.FEMINIE):
                     return None
             geo_before = None
             tt0 = li[0].begin_token.previous
             if (tt0 is not None and tt0.is_comma_and):
                 tt0 = tt0.previous
             if (not li[0].is_newline_before and tt0 is not None):
                 geo_before = (Utils.asObjectOrNull(tt0.getReferent(),
                                                    GeoReferent))
             if (Utils.indexOfList(li, noun, 0) < Utils.indexOfList(
                     li, new_name, 0)):
                 if (noun.termin_item.is_state):
                     return None
                 if (new_name.can_be_surname and geo_before is None):
                     if (((noun.morph.case_)
                          & new_name.morph.case_).is_undefined):
                         return None
                 if (MiscHelper.isExistsInDictionary(
                         new_name.begin_token, new_name.end_token,
                     (MorphClass.ADJECTIVE) | MorphClass.PRONOUN
                         | MorphClass.VERB)):
                     if (noun.begin_token != new_name.begin_token):
                         if (geo_before is None):
                             if (len(li) == 2
                                     and TerrAttachHelper.__canBeGeoAfter(
                                         li[1].end_token.next0_)):
                                 pass
                             elif (len(li) == 3
                                   and li[2].termin_item is not None
                                   and TerrAttachHelper.__canBeGeoAfter(
                                       li[2].end_token.next0_)):
                                 pass
                             elif (new_name.is_geo_in_dictionary):
                                 pass
                             elif (new_name.end_token.is_newline_after):
                                 pass
                             else:
                                 return None
                 npt = NounPhraseHelper.tryParse(
                     new_name.end_token, NounPhraseParseAttr.PARSEPRONOUNS,
                     0)
                 if (npt is not None
                         and npt.end_token != new_name.end_token):
                     if (len(li) >= 3 and li[2].termin_item is not None
                             and npt.end_token == li[2].end_token):
                         add_noun = li[2]
                     else:
                         return None
                 rtp = new_name.kit.processReferent("PERSON",
                                                    new_name.begin_token)
                 if (rtp is not None):
                     return None
                 name = ProperNameHelper.getNameEx(new_name.begin_token,
                                                   new_name.end_token,
                                                   MorphClass.ADJECTIVE,
                                                   MorphCase.UNDEFINED,
                                                   noun.termin_item.gender,
                                                   False, False)
             else:
                 ok = False
                 if (((k + 1) < len(li)) and li[k].termin_item is None
                         and li[k + 1].termin_item is not None):
                     ok = True
                 elif ((k < len(li)) and li[k].onto_item is not None):
                     ok = True
                 elif (k == len(li) and not new_name.is_adj_in_dictionary):
                     ok = True
                 elif (MiscLocationHelper.checkGeoObjectBefore(
                         li[0].begin_token) or can_be_city_before):
                     ok = True
                 elif (MiscLocationHelper.checkGeoObjectAfter(
                         li[k - 1].end_token)):
                     ok = True
                 elif (len(li) == 3 and k == 2):
                     cit = CityItemToken.tryParse(li[2].begin_token, None,
                                                  False, None)
                     if (cit is not None):
                         if (cit.typ == CityItemToken.ItemType.CITY
                                 or cit.typ == CityItemToken.ItemType.NOUN):
                             ok = True
                 elif (len(li) == 2):
                     ok = TerrAttachHelper.__canBeGeoAfter(
                         li[len(li) - 1].end_token.next0_)
                 if (not ok and not li[0].is_newline_before
                         and not li[0].chars.is_all_lower):
                     rt00 = li[0].kit.processReferent(
                         "PERSONPROPERTY", li[0].begin_token.previous)
                     if (rt00 is not None):
                         ok = True
                 if (noun.termin_item is not None
                         and noun.termin_item.is_strong
                         and new_name.is_adjective):
                     ok = True
                 if (noun.is_doubt and len(adj_list) == 0
                         and geo_before is None):
                     return None
                 name = ProperNameHelper.getNameEx(new_name.begin_token,
                                                   new_name.end_token,
                                                   MorphClass.ADJECTIVE,
                                                   MorphCase.UNDEFINED,
                                                   noun.termin_item.gender,
                                                   False, False)
                 if (not ok and not attach_always):
                     if (MiscHelper.isExistsInDictionary(
                             new_name.begin_token, new_name.end_token,
                         (MorphClass.ADJECTIVE) | MorphClass.PRONOUN
                             | MorphClass.VERB)):
                         if (exists is not None):
                             for e0_ in exists:
                                 if (e0_.findSlot(GeoReferent.ATTR_NAME,
                                                  name, True) is not None):
                                     ok = True
                                     break
                         if (not ok):
                             return None
                 full_name = "{0} {1}".format(
                     ProperNameHelper.getNameEx(li[0].begin_token,
                                                noun.begin_token.previous,
                                                MorphClass.ADJECTIVE,
                                                MorphCase.UNDEFINED,
                                                noun.termin_item.gender,
                                                False, False),
                     noun.termin_item.canonic_text)
         else:
             if (not attach_always or
                 ((noun.termin_item is not None
                   and noun.termin_item.canonic_text == "ФЕДЕРАЦИЯ"))):
                 is_latin = noun.chars.is_latin_letter and new_name.chars.is_latin_letter
                 if (Utils.indexOfList(li, noun, 0) > Utils.indexOfList(
                         li, new_name, 0)):
                     if (not is_latin):
                         return None
                 if (not new_name.is_district_name
                         and not BracketHelper.canBeStartOfSequence(
                             new_name.begin_token, False, False)):
                     if (len(adj_list) == 0
                             and MiscHelper.isExistsInDictionary(
                                 new_name.begin_token, new_name.end_token,
                                 (MorphClass.NOUN) | MorphClass.PRONOUN)):
                         if (len(li) == 2 and noun.is_city_region
                                 and (noun.whitespaces_after_count < 2)):
                             pass
                         else:
                             return None
                     if (not is_latin):
                         if ((noun.termin_item.is_region
                              and not attach_always and
                              ((not adj_terr_before or new_name.is_doubt)))
                                 and not noun.is_city_region and
                                 not noun.termin_item.is_specific_prefix):
                             if (not MiscLocationHelper.
                                     checkGeoObjectBefore(
                                         noun.begin_token)):
                                 if (not noun.is_doubt and noun.begin_token
                                         != noun.end_token):
                                     pass
                                 else:
                                     return None
                         if (noun.is_doubt and len(adj_list) == 0):
                             if (((noun.termin_item.acronym == "МО"
                                   or noun.termin_item.acronym == "ЛО"))
                                     and k == (len(li) - 1)
                                     and li[k].termin_item is not None):
                                 add_noun = li[k]
                                 k += 1
                             else:
                                 return None
                         pers = new_name.kit.processReferent(
                             "PERSON", new_name.begin_token)
                         if (pers is not None):
                             return None
             name = MiscHelper.getTextValue(new_name.begin_token,
                                            new_name.end_token,
                                            GetTextAttr.NO)
             if (new_name.begin_token != new_name.end_token):
                 ttt = new_name.begin_token.next0_
                 while ttt is not None and ttt.end_char <= new_name.end_char:
                     if (ttt.chars.is_letter):
                         ty = TerrItemToken.tryParse(
                             ttt, None, False, False)
                         if ((ty is not None and ty.termin_item is not None
                              and noun is not None)
                                 and ((noun.termin_item.canonic_text
                                       in ty.termin_item.canonic_text
                                       or ty.termin_item.canonic_text
                                       in noun.termin_item.canonic_text))):
                             name = MiscHelper.getTextValue(
                                 new_name.begin_token, ttt.previous,
                                 GetTextAttr.NO)
                             break
                     ttt = ttt.next0_
             if (len(adj_list) > 0):
                 npt = NounPhraseHelper.tryParse(adj_list[0].begin_token,
                                                 NounPhraseParseAttr.NO, 0)
                 if (npt is not None and npt.end_token == noun.end_token):
                     alt_name = "{0} {1}".format(
                         npt.getNormalCaseText(None, False,
                                               MorphGender.UNDEFINED,
                                               False), name)
     else:
         if ((len(li) == 1 and noun is not None
              and noun.end_token.next0_ is not None) and (isinstance(
                  noun.end_token.next0_.getReferent(), GeoReferent))):
             g = Utils.asObjectOrNull(noun.end_token.next0_.getReferent(),
                                      GeoReferent)
             if (noun.termin_item is not None):
                 tyy = noun.termin_item.canonic_text.lower()
                 ooo = False
                 if (g.findSlot(GeoReferent.ATTR_TYPE, tyy, True)
                         is not None):
                     ooo = True
                 elif (tyy.endswith("район") and g.findSlot(
                         GeoReferent.ATTR_TYPE, "район", True) is not None):
                     ooo = True
                 if (ooo):
                     return ReferentToken._new719(g, noun.begin_token,
                                                  noun.end_token.next0_,
                                                  noun.begin_token.morph)
         if ((len(li) == 1 and noun == li[0]
              and li[0].termin_item is not None) and TerrItemToken.tryParse(
                  li[0].end_token.next0_, None, True, False) is None
                 and TerrItemToken.tryParse(li[0].begin_token.previous,
                                            None, True, False) is None):
             if (li[0].morph.number == MorphNumber.PLURAL):
                 return None
             cou = 0
             str0_ = li[0].termin_item.canonic_text.lower()
             tt = li[0].begin_token.previous
             first_pass2898 = True
             while True:
                 if first_pass2898: first_pass2898 = False
                 else: tt = tt.previous
                 if (not (tt is not None)): break
                 if (tt.is_newline_after):
                     cou += 10
                 else:
                     cou += 1
                 if (cou > 500):
                     break
                 g = Utils.asObjectOrNull(tt.getReferent(), GeoReferent)
                 if (g is None):
                     continue
                 ok = True
                 cou = 0
                 tt = li[0].end_token.next0_
                 first_pass2899 = True
                 while True:
                     if first_pass2899: first_pass2899 = False
                     else: tt = tt.next0_
                     if (not (tt is not None)): break
                     if (tt.is_newline_before):
                         cou += 10
                     else:
                         cou += 1
                     if (cou > 500):
                         break
                     tee = TerrItemToken.tryParse(tt, None, True, False)
                     if (tee is None):
                         continue
                     ok = False
                     break
                 if (ok):
                     ii = 0
                     while g is not None and (ii < 3):
                         if (g.findSlot(GeoReferent.ATTR_TYPE, str0_, True)
                                 is not None):
                             return ReferentToken._new719(
                                 g, li[0].begin_token, li[0].end_token,
                                 noun.begin_token.morph)
                         g = g.higher
                         ii += 1
                 break
         return None
     ter = None
     if (ex_obj is not None and (isinstance(ex_obj.tag, GeoReferent))):
         ter = (Utils.asObjectOrNull(ex_obj.tag, GeoReferent))
     else:
         ter = GeoReferent()
         if (ex_obj is not None):
             geo_ = Utils.asObjectOrNull(ex_obj.onto_item.referent,
                                         GeoReferent)
             if (geo_ is not None and not geo_.is_city):
                 ter._mergeSlots2(geo_, li[0].kit.base_language)
             else:
                 ter._addName(name)
             if (noun is None and ex_obj.can_be_city):
                 ter._addTypCity(li[0].kit.base_language)
             else:
                 pass
         elif (new_name is not None):
             ter._addName(name)
             if (alt_name is not None):
                 ter._addName(alt_name)
         if (noun is not None):
             if (noun.termin_item.canonic_text == "АО"):
                 ter._addTyp(
                     ("АВТОНОМНИЙ ОКРУГ" if li[0].kit.base_language.is_ua
                      else "АВТОНОМНЫЙ ОКРУГ"))
             elif (noun.termin_item.canonic_text == "МУНИЦИПАЛЬНОЕ СОБРАНИЕ"
                   or noun.termin_item.canonic_text
                   == "МУНІЦИПАЛЬНЕ ЗБОРИ"):
                 ter._addTyp(("МУНІЦИПАЛЬНЕ УТВОРЕННЯ"
                              if li[0].kit.base_language.is_ua else
                              "МУНИЦИПАЛЬНОЕ ОБРАЗОВАНИЕ"))
             elif (noun.termin_item.acronym == "МО"
                   and add_noun is not None):
                 ter._addTyp(add_noun.termin_item.canonic_text)
             else:
                 if (noun.termin_item.canonic_text == "СОЮЗ"
                         and ex_obj is not None
                         and ex_obj.end_char > noun.end_char):
                     return ReferentToken._new719(ter, ex_obj.begin_token,
                                                  ex_obj.end_token,
                                                  ex_obj.morph)
                 ter._addTyp(noun.termin_item.canonic_text)
                 if (noun.termin_item.is_region and ter.is_state):
                     ter._addTypReg(li[0].kit.base_language)
         if (ter.is_state and ter.is_region):
             for a in adj_list:
                 if (a.termin_item.is_region):
                     ter._addTypReg(li[0].kit.base_language)
                     break
         if (ter.is_state):
             if (full_name is not None):
                 ter._addName(full_name)
     res = ReferentToken(ter, li[0].begin_token, li[k - 1].end_token)
     if (noun is not None and noun.morph.class0_.is_noun):
         res.morph = noun.morph
     else:
         res.morph = MorphCollection()
         ii = 0
         while ii < k:
             for v in li[ii].morph.items:
                 bi = MorphBaseInfo(v)
                 if (noun is not None):
                     if (bi.class0_.is_adjective):
                         bi.class0_ = MorphClass.NOUN
                 res.morph.addItem(bi)
             ii += 1
     if (li[0].termin_item is not None
             and li[0].termin_item.is_specific_prefix):
         res.begin_token = li[0].end_token.next0_
     if (add_noun is not None and add_noun.end_char > res.end_char):
         res.end_token = add_noun.end_token
     if ((isinstance(res.begin_token.previous, TextToken))
             and (res.whitespaces_before_count < 2)):
         tt = Utils.asObjectOrNull(res.begin_token.previous, TextToken)
         if (tt.term == "АР"):
             for ty in ter.typs:
                 if ("республика" in ty or "республіка" in ty):
                     res.begin_token = tt
                     break
     return res

Beispiel #14

Datei anzeigen

Datei: GeoOwnerHelper.py Projekt: MihaJjDa/APCLtask

 def _canBeHigherToken(rhi: 'Token', rlo: 'Token') -> bool:
     if (rhi is None or rlo is None):
         return False
     if (rhi.morph.case_.is_instrumental
             and not rhi.morph.case_.is_genitive):
         return False
     hi = Utils.asObjectOrNull(rhi.getReferent(), GeoReferent)
     lo = Utils.asObjectOrNull(rlo.getReferent(), GeoReferent)
     if (hi is None or lo is None):
         return False
     citi_in_reg = False
     if (hi.is_city and lo.is_region):
         if (hi.findSlot(GeoReferent.ATTR_TYPE, "город", True) is not None
                 or hi.findSlot(GeoReferent.ATTR_TYPE, "місто", True)
                 is not None or hi.findSlot(GeoReferent.ATTR_TYPE, "city",
                                            True) is not None):
             s = GeoOwnerHelper.__getTypesString(lo)
             if ((("район" in s or "административный округ" in s
                   or "муниципальный округ" in s)
                  or "адміністративний округ" in s
                  or "муніципальний округ" in s) or lo.findSlot(
                      GeoReferent.ATTR_TYPE, "округ", True) is not None):
                 if (rhi.next0_ == rlo and rlo.morph.case_.is_genitive):
                     citi_in_reg = True
     if (hi.is_region and lo.is_city):
         if (lo.findSlot(GeoReferent.ATTR_TYPE, "город", True) is not None
                 or lo.findSlot(GeoReferent.ATTR_TYPE, "місто", True)
                 is not None or lo.findSlot(GeoReferent.ATTR_TYPE, "city",
                                            True) is not None):
             s = GeoOwnerHelper.__getTypesString(hi)
             if (s == "район;"):
                 if (hi.higher is not None and hi.higher.is_region):
                     citi_in_reg = True
                 elif (rhi.end_char <= rlo.begin_char
                       and rhi.next0_.is_comma
                       and not rlo.morph.case_.is_genitive):
                     citi_in_reg = True
                 elif (rhi.end_char <= rlo.begin_char
                       and rhi.next0_.is_comma):
                     citi_in_reg = True
         else:
             citi_in_reg = True
     if (rhi.end_char <= rlo.begin_char):
         if (not rhi.morph.class0_.is_adjective):
             if (hi.is_state and not rhi.chars.is_latin_letter):
                 return False
         if (rhi.is_newline_after or rlo.is_newline_before):
             if (not citi_in_reg):
                 return False
     else:
         pass
     if (rlo.previous is not None
             and rlo.previous.morph.class0_.is_preposition):
         if (rlo.previous.morph.language.is_ua):
             if ((rlo.previous.isValue("У", None)
                  and not rlo.morph.case_.is_dative
                  and not rlo.morph.case_.is_prepositional)
                     and not rlo.morph.case_.is_undefined):
                 return False
             if (rlo.previous.isValue("З", None)
                     and not rlo.morph.case_.is_genitive
                     and not rlo.morph.case_.is_undefined):
                 return False
         else:
             if ((rlo.previous.isValue("В", None)
                  and not rlo.morph.case_.is_dative
                  and not rlo.morph.case_.is_prepositional)
                     and not rlo.morph.case_.is_undefined):
                 return False
             if (rlo.previous.isValue("ИЗ", None)
                     and not rlo.morph.case_.is_genitive
                     and not rlo.morph.case_.is_undefined):
                 return False
     if (not GeoOwnerHelper.canBeHigher(hi, lo)):
         return citi_in_reg
     return True

Beispiel #15

Datei anzeigen

 def toString(self,
              short_variant: bool,
              lang: 'MorphLang',
              lev: int = 0) -> str:
     res = io.StringIO()
     ki = self.kind
     str0_ = (Utils.asObjectOrNull(
         MetaInstrumentBlock.GLOBAL_META.kind_feature.
         convertInnerValueToOuterValue(Utils.enumToString(ki), lang), str))
     if (str0_ is not None):
         print(str0_, end="", file=res)
         if (self.kind2 != InstrumentKind.UNDEFINED):
             str0_ = (Utils.asObjectOrNull(
                 MetaInstrumentBlock.GLOBAL_META.kind_feature.
                 convertInnerValueToOuterValue(
                     Utils.enumToString(self.kind2), lang), str))
             if (str0_ is not None):
                 print(" ({0})".format(str0_), end="", file=res, flush=True)
     if (self.number > 0):
         if (ki == InstrumentKind.TABLE):
             print(" {0} строк, {1} столбцов".format(
                 len(self.children), self.number),
                   end="",
                   file=res,
                   flush=True)
         else:
             print(" №{0}".format(self.number),
                   end="",
                   file=res,
                   flush=True)
             if (self.sub_number > 0):
                 print(".{0}".format(self.sub_number),
                       end="",
                       file=res,
                       flush=True)
                 if (self.sub_number2 > 0):
                     print(".{0}".format(self.sub_number2),
                           end="",
                           file=res,
                           flush=True)
                     if (self.sub_number3 > 0):
                         print(".{0}".format(self.sub_number3),
                               end="",
                               file=res,
                               flush=True)
             if (self.min_number > 0):
                 for i in range(res.tell() - 1, -1, -1):
                     if (Utils.getCharAtStringIO(res, i) == ' '
                             or Utils.getCharAtStringIO(res, i) == '.'):
                         Utils.insertStringIO(
                             res, i + 1, "{0}-".format(self.min_number))
                         break
     ignore_ref = False
     if (self.is_expired):
         print(" (утратить силу)", end="", file=res)
         ignore_ref = True
     elif (ki != InstrumentKind.EDITIONS and ki != InstrumentKind.APPROVED
           and (isinstance(self.ref, DecreeReferent))):
         print(" (*)", end="", file=res)
         ignore_ref = True
     str0_ = self.getStringValue(InstrumentBlockReferent.ATTR_NAME)
     if ((str0_) is None):
         str0_ = self.getStringValue(InstrumentBlockReferent.ATTR_VALUE)
     if (str0_ is not None):
         if (len(str0_) > 100):
             str0_ = (str0_[0:0 + 100] + "...")
         print(" \"{0}\"".format(str0_), end="", file=res, flush=True)
     elif (not ignore_ref and (isinstance(self.ref, Referent))
           and (lev < 30)):
         print(" \"{0}\"".format(
             self.ref.toString(short_variant, lang, lev + 1)),
               end="",
               file=res,
               flush=True)
     return Utils.toStringStringIO(res).strip()

Beispiel #16

Datei anzeigen

Datei: CityAttachHelper.py Projekt: MihaJjDa/APCLtask

 def __try1(li: typing.List['CityItemToken'], oi: 'IntOntologyItem',
            ad: 'AnalyzerDataWithOntology') -> 'ReferentToken':
     oi.value = (None)
     if (li is None or (len(li) < 1)):
         return None
     elif (li[0].typ != CityItemToken.ItemType.CITY):
         if (len(li) != 2 or li[0].typ != CityItemToken.ItemType.PROPERNAME
                 or li[1].typ != CityItemToken.ItemType.NOUN):
             return None
     i = 1
     oi.value = li[0].onto_item
     ok = not li[0].doubtful
     if ((ok and li[0].onto_item is not None
          and li[0].onto_item.misc_attr is None) and ad is not None):
         if (li[0].onto_item.owner != ad.local_ontology
                 and not li[0].onto_item.owner.is_ext_ontology):
             if (li[0].begin_token.previous is not None
                     and li[0].begin_token.previous.isValue("В", None)):
                 pass
             else:
                 ok = False
     if (len(li) == 1 and li[0].begin_token.morph.class0_.is_adjective):
         sits = StreetItemToken.tryParseList(li[0].begin_token, None, 3)
         if (sits is not None and len(sits) == 2
                 and sits[1].typ == StreetItemType.NOUN):
             return None
     typ = None
     alttyp = None
     mc = li[0].morph
     if (i < len(li)):
         if (li[i].typ == CityItemToken.ItemType.NOUN):
             at = None
             if (not li[i].chars.is_all_lower
                     and (li[i].whitespaces_after_count < 2)):
                 sit = StreetItemToken.tryParse(li[i].end_token.next0_,
                                                None, False, None, False)
                 if (sit is not None and sit.typ == StreetItemType.NOUN):
                     at = AddressItemToken.tryParse(li[i].begin_token, None,
                                                    False, False, None)
                     if (at is not None):
                         at2 = AddressItemToken.tryParse(
                             li[i].end_token.next0_, None, False, False,
                             None)
                         if (at2 is not None and at2.typ
                                 == AddressItemToken.ItemType.STREET):
                             at = (None)
             if (at is None):
                 typ = li[i].value
                 alttyp = li[i].alt_value
                 if (li[i].begin_token.isValue("СТ", None)
                         and li[i].begin_token.chars.is_all_upper):
                     return None
                 if ((i + 1) == len(li)):
                     ok = True
                     if (not li[i].morph.case_.is_undefined):
                         mc = li[i].morph
                     i += 1
                 elif (ok):
                     i += 1
                 else:
                     tt0 = li[0].begin_token.previous
                     if ((isinstance(tt0, TextToken))
                             and (tt0.whitespaces_after_count < 3)):
                         if (tt0.isValue("МЭР", "МЕР")
                                 or tt0.isValue("ГЛАВА", None)
                                 or tt0.isValue("ГРАДОНАЧАЛЬНИК", None)):
                             ok = True
                             i += 1
     if (not ok and oi.value is not None
             and (len(oi.value.canonic_text) < 4)):
         return None
     if (not ok and li[0].begin_token.morph.class0_.is_proper_name):
         return None
     if (not ok):
         if (not MiscHelper.isExistsInDictionary(
                 li[0].begin_token, li[0].end_token, (MorphClass.ADJECTIVE)
                 | MorphClass.NOUN | MorphClass.PRONOUN)):
             ok = (li[0].geo_object_before or li[i - 1].geo_object_after)
             if (ok and li[0].begin_token == li[0].end_token):
                 mcc = li[0].begin_token.getMorphClassInDictionary()
                 if (mcc.is_proper_name or mcc.is_proper_surname):
                     ok = False
                 elif (li[0].geo_object_before
                       and (li[0].whitespaces_after_count < 2)):
                     ad1 = AddressItemToken.tryParse(
                         li[0].begin_token, None, False, False, None)
                     if (ad1 is not None and ad1.typ
                             == AddressItemToken.ItemType.STREET):
                         ad2 = AddressItemToken.tryParse(
                             li[0].end_token.next0_, None, False, False,
                             None)
                         if (ad2 is None or ad2.typ !=
                                 AddressItemToken.ItemType.STREET):
                             ok = False
                     elif (AddressItemToken.tryAttachOrg(li[0].begin_token)
                           is not None):
                         ok = False
         if (ok):
             if (li[0].kit.processReferent("PERSON", li[0].begin_token)
                     is not None):
                 ok = False
     if (not ok):
         ok = CityAttachHelper.checkYearAfter(li[0].end_token.next0_)
     if (not ok and ((not li[0].begin_token.morph.class0_.is_adjective
                      or li[0].begin_token != li[0].end_token))):
         ok = CityAttachHelper.checkCityAfter(li[0].end_token.next0_)
     if (not ok):
         return None
     if (i < len(li)):
         del li[i:i + len(li) - i]
     rt = None
     if (oi.value is None):
         if (li[0].value is not None and li[0].higher_geo is not None):
             cap = GeoReferent()
             cap._addName(li[0].value)
             cap._addTypCity(li[0].kit.base_language)
             cap.higher = li[0].higher_geo
             if (typ is not None):
                 cap._addTyp(typ)
             if (alttyp is not None):
                 cap._addTyp(alttyp)
             rt = ReferentToken(cap, li[0].begin_token, li[0].end_token)
         else:
             if (li[0].value is None):
                 return None
             if (typ is None):
                 if ((len(li) == 1
                      and li[0].begin_token.previous is not None
                      and li[0].begin_token.previous.is_hiphen) and
                     (isinstance(li[0].begin_token.previous.previous,
                                 ReferentToken)) and
                     (isinstance(
                         li[0].begin_token.previous.previous.getReferent(),
                         GeoReferent))):
                     pass
                 else:
                     return None
             else:
                 if (not LanguageHelper.endsWithEx(typ, "ПУНКТ",
                                                   "ПОСЕЛЕНИЕ", "ПОСЕЛЕННЯ",
                                                   "ПОСЕЛОК")):
                     if (not LanguageHelper.endsWith(typ, "CITY")):
                         if (typ == "СТАНЦИЯ" and
                             ((MiscLocationHelper.checkGeoObjectBefore(
                                 li[0].begin_token)))):
                             pass
                         elif (len(li) > 1
                               and li[1].typ == CityItemToken.ItemType.NOUN
                               and li[0].typ
                               == CityItemToken.ItemType.CITY):
                             pass
                         else:
                             return None
                 if (li[0].begin_token.morph.class0_.is_adjective):
                     li[0].value = ProperNameHelper.getNameEx(
                         li[0].begin_token, li[0].end_token,
                         MorphClass.ADJECTIVE, li[1].morph.case_,
                         li[1].morph.gender, False, False)
     elif (isinstance(oi.value.referent, GeoReferent)):
         rt = ReferentToken._new719(
             Utils.asObjectOrNull(oi.value.referent, GeoReferent),
             li[0].begin_token, li[len(li) - 1].end_token, mc)
     elif (typ is None):
         typ = oi.value.typ
     if (rt is None):
         city = GeoReferent()
         city._addName(
             (li[0].value if oi.value is None else oi.value.canonic_text))
         if (typ is not None):
             city._addTyp(typ)
         else:
             city._addTypCity(li[0].kit.base_language)
         if (alttyp is not None):
             city._addTyp(alttyp)
         rt = ReferentToken._new719(city, li[0].begin_token,
                                    li[len(li) - 1].end_token, mc)
     if ((isinstance(rt.referent, GeoReferent)) and len(li) == 1
             and (rt.referent).is_city):
         if (rt.begin_token.previous is not None
                 and rt.begin_token.previous.isValue("Г", None)):
             rt.begin_token = rt.begin_token.previous
         elif ((rt.begin_token.previous is not None
                and rt.begin_token.previous.isChar('.')
                and rt.begin_token.previous.previous is not None)
               and rt.begin_token.previous.previous.isValue("Г", None)):
             rt.begin_token = rt.begin_token.previous.previous
         elif (rt.end_token.next0_ is not None
               and (rt.whitespaces_after_count < 2)
               and rt.end_token.next0_.isValue("Г", None)):
             rt.end_token = rt.end_token.next0_
             if (rt.end_token.next0_ is not None
                     and rt.end_token.next0_.isChar('.')):
                 rt.end_token = rt.end_token.next0_
     return rt

Beispiel #17

Datei anzeigen

Datei: TransportAnalyzer.py Projekt: MihaJjDa/APCLtask

 def process(self, kit: 'AnalysisKit') -> None:
     ad = kit.getAnalyzerData(self)
     models = TerminCollection()
     objs_by_model = dict()
     obj_by_names = TerminCollection()
     t = kit.first_token
     first_pass3136 = True
     while True:
         if first_pass3136: first_pass3136 = False
         else: t = t.next0_
         if (not (t is not None)): break
         its = TransItemToken.tryParseList(t, 10)
         if (its is None):
             continue
         rts = self.__tryAttach(its, False)
         if (rts is not None):
             for rt in rts:
                 cou = 0
                 tt = t.previous
                 first_pass3137 = True
                 while True:
                     if first_pass3137: first_pass3137 = False
                     else:
                         tt = tt.previous
                         cou += 1
                     if (not (tt is not None and (cou < 1000))): break
                     tr = Utils.asObjectOrNull(tt.getReferent(),
                                               TransportReferent)
                     if (tr is None):
                         continue
                     ok = True
                     for s in rt.referent.slots:
                         if (tr.findSlot(s.type_name, s.value, True) is
                                 None):
                             ok = False
                             break
                     if (ok):
                         rt.referent = (tr)
                         break
                 rt.referent = ad.registerReferent(rt.referent)
                 kit.embedToken(rt)
                 t = (rt)
                 for s in rt.referent.slots:
                     if (s.type_name == TransportReferent.ATTR_MODEL):
                         mod = str(s.value)
                         for k in range(2):
                             if (not str.isdigit(mod[0])):
                                 li = []
                                 wrapli2546 = RefOutArgWrapper(None)
                                 inoutres2547 = Utils.tryGetValue(
                                     objs_by_model, mod, wrapli2546)
                                 li = wrapli2546.value
                                 if (not inoutres2547):
                                     li = list()
                                     objs_by_model[mod] = li
                                 if (not rt.referent in li):
                                     li.append(rt.referent)
                                 models.addStr(mod, li, None, False)
                             if (k > 0):
                                 break
                             brand = rt.referent.getStringValue(
                                 TransportReferent.ATTR_BRAND)
                             if (brand is None):
                                 break
                             mod = "{0} {1}".format(brand, mod)
                     elif (s.type_name == TransportReferent.ATTR_NAME):
                         obj_by_names.add(
                             Termin._new117(str(s.value), rt.referent))
     if (len(objs_by_model) == 0 and len(obj_by_names.termins) == 0):
         return
     t = kit.first_token
     first_pass3138 = True
     while True:
         if first_pass3138: first_pass3138 = False
         else: t = t.next0_
         if (not (t is not None)): break
         br = BracketHelper.tryParse(t, BracketParseAttr.NO, 10)
         if (br is not None):
             toks = obj_by_names.tryParse(t.next0_, TerminParseAttr.NO)
             if (toks is not None
                     and toks.end_token.next0_ == br.end_token):
                 rt0 = ReferentToken(
                     Utils.asObjectOrNull(toks.termin.tag, Referent),
                     br.begin_token, br.end_token)
                 kit.embedToken(rt0)
                 t = (rt0)
                 continue
         if (not ((isinstance(t, TextToken)))):
             continue
         if (not t.chars.is_letter):
             continue
         tok = models.tryParse(t, TerminParseAttr.NO)
         if (tok is None):
             if (not t.chars.is_all_lower):
                 tok = obj_by_names.tryParse(t, TerminParseAttr.NO)
             if (tok is None):
                 continue
         if (not tok.is_whitespace_after):
             if (tok.end_token.next0_ is None
                     or not tok.end_token.next0_.isCharOf(",.)")):
                 if (not BracketHelper.isBracket(tok.end_token.next0_,
                                                 False)):
                     continue
         tr = None
         li = Utils.asObjectOrNull(tok.termin.tag, list)
         if (li is not None and len(li) == 1):
             tr = li[0]
         else:
             tr = (Utils.asObjectOrNull(tok.termin.tag, Referent))
         if (tr is not None):
             tit = TransItemToken.tryParse(tok.begin_token.previous, None,
                                           False, True)
             if (tit is not None and tit.typ == TransItemToken.Typs.BRAND):
                 tr.addSlot(TransportReferent.ATTR_BRAND, tit.value, False,
                            0)
                 tok.begin_token = tit.begin_token
             rt0 = ReferentToken(tr, tok.begin_token, tok.end_token)
             kit.embedToken(rt0)
             t = (rt0)
             continue

Beispiel #18

Datei anzeigen

 def process(self, kit : 'AnalysisKit') -> None:
     # Основная функция выделения телефонов
     ad = kit.get_analyzer_data(self)
     has_denoms = False
     for a in kit.processor.analyzers: 
         if ((isinstance(a, DenominationAnalyzer)) and not a.ignore_this_analyzer): 
             has_denoms = True
     if (not has_denoms): 
         a = DenominationAnalyzer()
         a.process(kit)
     li = list()
     tmp = io.StringIO()
     tmp2 = list()
     max0_ = 0
     t = kit.first_token
     while t is not None: 
         max0_ += 1
         t = t.next0_
     cur = 0
     t = kit.first_token
     first_pass3292 = True
     while True:
         if first_pass3292: first_pass3292 = False
         else: t = t.next0_; cur += 1
         if (not (t is not None)): break
         r = t.get_referent()
         if (r is not None): 
             t = self.__add_referents(ad, t, cur, max0_)
             continue
         if (not (isinstance(t, TextToken))): 
             continue
         if (not t.chars.is_letter or (t.length_char < 3)): 
             continue
         term = t.term
         if (term == "ЕСТЬ"): 
             if ((isinstance(t.previous, TextToken)) and t.previous.morph.class0_.is_verb): 
                 pass
             else: 
                 continue
         npt = None
         npt = NounPhraseHelper.try_parse(t, Utils.valToEnum((NounPhraseParseAttr.ADJECTIVECANBELAST) | (NounPhraseParseAttr.PARSEPREPOSITION), NounPhraseParseAttr), 0, None)
         if (npt is None): 
             mc = t.get_morph_class_in_dictionary()
             if (mc.is_verb and not mc.is_preposition): 
                 if (t.is_verb_be): 
                     continue
                 if (t.is_value("МОЧЬ", None) or t.is_value("WOULD", None)): 
                     continue
                 kref = KeywordReferent._new1595(KeywordType.PREDICATE)
                 norm = t.get_normal_case_text(MorphClass.VERB, MorphNumber.SINGULAR, MorphGender.UNDEFINED, False)
                 if (norm is None): 
                     norm = t.lemma
                 if (norm.endswith("ЬСЯ")): 
                     norm = norm[0:0+len(norm) - 2]
                 kref.add_slot(KeywordReferent.ATTR_VALUE, norm, False, 0)
                 drv = DerivateService.find_derivates(norm, True, t.morph.language)
                 KeywordAnalyzer.__add_normals(kref, drv, norm)
                 kref = (Utils.asObjectOrNull(ad.register_referent(kref), KeywordReferent))
                 KeywordAnalyzer.__set_rank(kref, cur, max0_)
                 rt1 = ReferentToken._new734(ad.register_referent(kref), t, t, t.morph)
                 kit.embed_token(rt1)
                 t = (rt1)
                 continue
             continue
         if (npt.internal_noun is not None): 
             continue
         if (npt.end_token.is_value("ЦЕЛОМ", None) or npt.end_token.is_value("ЧАСТНОСТИ", None)): 
             if (npt.preposition is not None): 
                 t = npt.end_token
                 continue
         if (npt.end_token.is_value("СТОРОНЫ", None) and npt.preposition is not None and npt.preposition.normal == "С"): 
             t = npt.end_token
             continue
         if (npt.begin_token == npt.end_token): 
             mc = t.get_morph_class_in_dictionary()
             if (mc.is_preposition): 
                 continue
             elif (mc.is_adverb): 
                 if (t.is_value("ПОТОМ", None)): 
                     continue
         else: 
             pass
         li.clear()
         t0 = t
         tt = t
         first_pass3293 = True
         while True:
             if first_pass3293: first_pass3293 = False
             else: tt = tt.next0_
             if (not (tt is not None and tt.end_char <= npt.end_char)): break
             if (not (isinstance(tt, TextToken))): 
                 continue
             if (tt.is_value("NATURAL", None)): 
                 pass
             if ((tt.length_char < 3) or not tt.chars.is_letter): 
                 continue
             mc = tt.get_morph_class_in_dictionary()
             if ((mc.is_preposition or mc.is_pronoun or mc.is_personal_pronoun) or mc.is_conjunction): 
                 if (tt.is_value("ОТНОШЕНИЕ", None)): 
                     pass
                 else: 
                     continue
             if (mc.is_misc): 
                 if (MiscHelper.is_eng_article(tt)): 
                     continue
             kref = KeywordReferent._new1595(KeywordType.OBJECT)
             norm = tt.lemma
             kref.add_slot(KeywordReferent.ATTR_VALUE, norm, False, 0)
             if (norm != "ЕСТЬ"): 
                 drv = DerivateService.find_derivates(norm, True, tt.morph.language)
                 KeywordAnalyzer.__add_normals(kref, drv, norm)
             kref = (Utils.asObjectOrNull(ad.register_referent(kref), KeywordReferent))
             KeywordAnalyzer.__set_rank(kref, cur, max0_)
             rt1 = ReferentToken._new734(kref, tt, tt, tt.morph)
             kit.embed_token(rt1)
             if (tt == t and len(li) == 0): 
                 t0 = (rt1)
             t = (rt1)
             li.append(kref)
         if (len(li) > 1): 
             kref = KeywordReferent._new1595(KeywordType.OBJECT)
             Utils.setLengthStringIO(tmp, 0)
             tmp2.clear()
             has_norm = False
             for kw in li: 
                 s = kw.get_string_value(KeywordReferent.ATTR_VALUE)
                 if (tmp.tell() > 0): 
                     print(' ', end="", file=tmp)
                 print(s, end="", file=tmp)
                 n = kw.get_string_value(KeywordReferent.ATTR_NORMAL)
                 if (n is not None): 
                     has_norm = True
                     tmp2.append(n)
                 else: 
                     tmp2.append(s)
                 kref.add_slot(KeywordReferent.ATTR_REF, kw, False, 0)
             val = npt.get_normal_case_text(None, MorphNumber.SINGULAR, MorphGender.UNDEFINED, False)
             kref.add_slot(KeywordReferent.ATTR_VALUE, val, False, 0)
             Utils.setLengthStringIO(tmp, 0)
             tmp2.sort()
             for s in tmp2: 
                 if (tmp.tell() > 0): 
                     print(' ', end="", file=tmp)
                 print(s, end="", file=tmp)
             norm = Utils.toStringStringIO(tmp)
             if (norm != val): 
                 kref.add_slot(KeywordReferent.ATTR_NORMAL, norm, False, 0)
             kref = (Utils.asObjectOrNull(ad.register_referent(kref), KeywordReferent))
             KeywordAnalyzer.__set_rank(kref, cur, max0_)
             rt1 = ReferentToken._new734(kref, t0, t, npt.morph)
             kit.embed_token(rt1)
             t = (rt1)
     cur = 0
     t = kit.first_token
     first_pass3294 = True
     while True:
         if first_pass3294: first_pass3294 = False
         else: t = t.next0_; cur += 1
         if (not (t is not None)): break
         kw = Utils.asObjectOrNull(t.get_referent(), KeywordReferent)
         if (kw is None or kw.typ != KeywordType.OBJECT): 
             continue
         if (t.next0_ is None or kw.child_words > 2): 
             continue
         t1 = t.next0_
         if (t1.is_value("OF", None) and (t1.whitespaces_after_count < 3) and t1.next0_ is not None): 
             t1 = t1.next0_
             if ((isinstance(t1, TextToken)) and MiscHelper.is_eng_article(t1) and t1.next0_ is not None): 
                 t1 = t1.next0_
         elif (not t1.morph.case_.is_genitive or t.whitespaces_after_count > 1): 
             continue
         kw2 = Utils.asObjectOrNull(t1.get_referent(), KeywordReferent)
         if (kw2 is None): 
             continue
         if (kw == kw2): 
             continue
         if (kw2.typ != KeywordType.OBJECT or (kw.child_words + kw2.child_words) > 3): 
             continue
         kw_un = KeywordReferent()
         kw_un._union(kw, kw2, MiscHelper.get_text_value(t1, t1, GetTextAttr.NO))
         kw_un = (Utils.asObjectOrNull(ad.register_referent(kw_un), KeywordReferent))
         KeywordAnalyzer.__set_rank(kw_un, cur, max0_)
         rt1 = ReferentToken._new734(kw_un, t, t1, t.morph)
         kit.embed_token(rt1)
         t = (rt1)
     if (KeywordAnalyzer.SORT_KEYWORDS_BY_RANK): 
         all0_ = list(ad.referents)
         all0_.sort(key=operator.attrgetter('rank'), reverse=True)
         ad.referents = all0_
     if (KeywordAnalyzer.ANNOTATION_MAX_SENTENCES > 0): 
         ano = AutoannoSentToken.create_annotation(kit, KeywordAnalyzer.ANNOTATION_MAX_SENTENCES)
         if (ano is not None): 
             ad.register_referent(ano)

Beispiel #19

Datei anzeigen

Datei: BracketHelper.py Projekt: MihaJjDa/APCLtask

 def tryParse(t: 'Token',
              typ: 'BracketParseAttr' = BracketParseAttr.NO,
              max_tokens: int = 100) -> 'BracketSequenceToken':
     """ Попробовать восстановить последовательность, обрамляемой кавычками
     
     Args:
         t(Token): 
         typ(BracketParseAttr): параметры выделения
         max_tokens(int): максимально токенов (вдруг забыли закрывающую ккавычку)
     
     """
     t0 = t
     cou = 0
     if (not BracketHelper.canBeStartOfSequence(t0, False, False)):
         return None
     br_list = list()
     br_list.append(BracketHelper.Bracket(t0))
     cou = 0
     crlf = 0
     last = None
     lev = 1
     is_assim = br_list[
         0].char0_ != '«' and BracketHelper.M_ASSYMOPEN_CHARS.find(
             br_list[0].char0_) >= 0
     t = t0.next0_
     first_pass2802 = True
     while True:
         if first_pass2802: first_pass2802 = False
         else: t = t.next0_
         if (not (t is not None)): break
         if (t.is_table_control_char):
             break
         last = t
         if (t.isCharOf(BracketHelper.M_OPEN_CHARS)
                 or t.isCharOf(BracketHelper.M_CLOSE_CHARS)):
             if (t.is_newline_before
                     and (((typ) & (BracketParseAttr.CANBEMANYLINES)))
                     == (BracketParseAttr.NO)):
                 if (t.whitespaces_before_count > 10
                         or BracketHelper.canBeStartOfSequence(
                             t, False, False)):
                     if (t.isChar('(') and not t0.isChar('(')):
                         pass
                     else:
                         last = t.previous
                         break
             bb = BracketHelper.Bracket(t)
             br_list.append(bb)
             if (len(br_list) > 20):
                 break
             if ((len(br_list) == 3 and br_list[1].can_be_open
                  and bb.can_be_close) and BracketHelper.__mustBeCloseChar(
                      bb.char0_, br_list[1].char0_)
                     and BracketHelper.__mustBeCloseChar(
                         bb.char0_, br_list[0].char0_)):
                 ok = False
                 tt = t.next0_
                 while tt is not None:
                     if (tt.is_newline_before):
                         break
                     if (tt.isChar(',')):
                         break
                     if (tt.isChar('.')):
                         tt = tt.next0_
                         while tt is not None:
                             if (tt.is_newline_before):
                                 break
                             elif (tt.isCharOf(BracketHelper.M_OPEN_CHARS)
                                   or tt.isCharOf(
                                       BracketHelper.M_CLOSE_CHARS)):
                                 bb2 = BracketHelper.Bracket(tt)
                                 if (BracketHelper.canBeEndOfSequence(
                                         tt, False, None, False)
                                         and BracketHelper.__canBeCloseChar(
                                             bb2.char0_,
                                             br_list[0].char0_)):
                                     ok = True
                                 break
                             tt = tt.next0_
                         break
                     if (t.isCharOf(BracketHelper.M_OPEN_CHARS)
                             or t.isCharOf(BracketHelper.M_CLOSE_CHARS)):
                         ok = True
                         break
                     tt = tt.next0_
                 if (not ok):
                     break
             if (is_assim):
                 if (bb.can_be_open and not bb.can_be_close
                         and bb.char0_ == br_list[0].char0_):
                     lev += 1
                 elif (bb.can_be_close and not bb.can_be_open and
                       BracketHelper.M_OPEN_CHARS.find(br_list[0].char0_)
                       == BracketHelper.M_CLOSE_CHARS.find(bb.char0_)):
                     lev -= 1
                     if (lev == 0):
                         break
         else:
             cou += 1
             if ((cou) > max_tokens):
                 break
             if ((((typ) & (BracketParseAttr.CANCONTAINSVERBS))) == (
                     BracketParseAttr.NO)):
                 if (t.morph.language.is_cyrillic):
                     if (t.getMorphClassInDictionary() == MorphClass.VERB):
                         if (not t.morph.class0_.is_adjective
                                 and not t.morph.containsAttr(
                                     "страд.з.", None)):
                             if (t.chars.is_all_lower):
                                 norm = t.getNormalCaseText(
                                     None, False, MorphGender.UNDEFINED,
                                     False)
                                 if (not LanguageHelper.endsWith(
                                         norm, "СЯ")):
                                     if (len(br_list) > 1):
                                         break
                                     if (br_list[0].char0_ != '('):
                                         break
                 elif (t.morph.language.is_en):
                     if (t.morph.class0_ == MorphClass.VERB
                             and t.chars.is_all_lower):
                         break
                 r = t.getReferent()
                 if (r is not None and r.type_name == "ADDRESS"):
                     if (not t0.isChar('(')):
                         break
         if ((((typ) & (BracketParseAttr.CANBEMANYLINES))) !=
             (BracketParseAttr.NO)):
             if (t.is_newline_before):
                 if (t.newlines_before_count > 1):
                     break
                 crlf += 1
             continue
         if (t.is_newline_before):
             if (t.whitespaces_before_count > 15):
                 break
             crlf += 1
             if (not t.chars.is_all_lower):
                 if (t.previous is not None and t.previous.isChar('.')):
                     break
             if ((isinstance(t.previous, MetaToken))
                     and BracketHelper.canBeEndOfSequence(
                         (t.previous).end_token, False, None, False)):
                 break
         if (crlf > 1):
             if (len(br_list) > 1):
                 break
             if (crlf > 10):
                 break
         if (t.isChar(';') and t.is_newline_after):
             break
     if ((len(br_list) == 1 and br_list[0].can_be_open and
          (isinstance(last, MetaToken))) and last.is_newline_after):
         if (BracketHelper.canBeEndOfSequence((last).end_token, False, None,
                                              False)):
             return BracketSequenceToken(t0, last)
     if (len(br_list) < 1):
         return None
     i = 1
     while i < (len(br_list) - 1):
         if (br_list[i].char0_ == '<' and br_list[i + 1].char0_ == '>'):
             br_list[i].can_be_open = True
             br_list[i + 1].can_be_close = True
         i += 1
     internals = None
     while len(br_list) > 3:
         i = len(br_list) - 1
         if ((br_list[i].can_be_close and br_list[i - 1].can_be_open
              and not BracketHelper.__canBeCloseChar(
                  br_list[i].char0_, br_list[0].char0_))
                 and BracketHelper.__canBeCloseChar(br_list[i].char0_,
                                                    br_list[i - 1].char0_)):
             del br_list[len(br_list) - 2:len(br_list) - 2 + 2]
             continue
         break
     while len(br_list) >= 4:
         changed = False
         i = 1
         while i < (len(br_list) - 2):
             if ((br_list[i].can_be_open and not br_list[i].can_be_close
                  and br_list[i + 1].can_be_close)
                     and not br_list[i + 1].can_be_open):
                 ok = False
                 if (BracketHelper.__mustBeCloseChar(
                         br_list[i + 1].char0_, br_list[i].char0_)
                         or br_list[i].char0_ != br_list[0].char0_):
                     ok = True
                     if ((i == 1 and ((i + 2) < len(br_list))
                          and br_list[i + 2].char0_ == ')')
                             and br_list[i + 1].char0_ != ')'
                             and BracketHelper.__canBeCloseChar(
                                 br_list[i + 1].char0_,
                                 br_list[i - 1].char0_)):
                         br_list[i + 2] = br_list[i + 1]
                 elif (i > 1 and ((i + 2) < len(br_list))
                       and BracketHelper.__mustBeCloseChar(
                           br_list[i + 2].char0_, br_list[i - 1].char0_)):
                     ok = True
                 if (ok):
                     if (internals is None):
                         internals = list()
                     internals.append(
                         BracketSequenceToken(br_list[i].source,
                                              br_list[i + 1].source))
                     del br_list[i:i + 2]
                     changed = True
                     break
             i += 1
         if (not changed):
             break
     res = None
     if ((len(br_list) >= 4 and br_list[1].can_be_open
          and br_list[2].can_be_close) and br_list[3].can_be_close
             and not br_list[3].can_be_open):
         if (BracketHelper.__canBeCloseChar(br_list[3].char0_,
                                            br_list[0].char0_)):
             res = BracketSequenceToken(br_list[0].source,
                                        br_list[3].source)
             if (br_list[0].source.next0_ != br_list[1].source
                     or br_list[2].source.next0_ != br_list[3].source):
                 res.internal.append(
                     BracketSequenceToken(br_list[1].source,
                                          br_list[2].source))
             if (internals is not None):
                 res.internal.extend(internals)
     if ((res is None and len(br_list) >= 3 and br_list[2].can_be_close)
             and not br_list[2].can_be_open):
         if ((((typ) & (BracketParseAttr.NEARCLOSEBRACKET))) !=
             (BracketParseAttr.NO)):
             if (BracketHelper.__canBeCloseChar(br_list[1].char0_,
                                                br_list[0].char0_)):
                 return BracketSequenceToken(br_list[0].source,
                                             br_list[1].source)
         ok = True
         if (BracketHelper.__canBeCloseChar(br_list[2].char0_,
                                            br_list[0].char0_)
                 and BracketHelper.__canBeCloseChar(br_list[1].char0_,
                                                    br_list[0].char0_)
                 and br_list[1].can_be_close):
             t = br_list[1].source
             while t != br_list[2].source and t is not None:
                 if (t.is_newline_before):
                     ok = False
                     break
                 if (t.chars.is_letter and t.chars.is_all_lower):
                     ok = False
                     break
                 npt = NounPhraseHelper.tryParse(t, NounPhraseParseAttr.NO,
                                                 0)
                 if (npt is not None):
                     t = npt.end_token
                 t = t.next0_
             if (ok):
                 t = br_list[0].source.next0_
                 while t != br_list[1].source and t is not None:
                     if (t.is_newline_before):
                         return BracketSequenceToken(
                             br_list[0].source, t.previous)
                     t = t.next0_
             lev1 = 0
             tt = br_list[0].source.previous
             first_pass2803 = True
             while True:
                 if first_pass2803: first_pass2803 = False
                 else: tt = tt.previous
                 if (not (tt is not None)): break
                 if (tt.is_newline_after or tt.is_table_control_char):
                     break
                 if (not ((isinstance(tt, TextToken)))):
                     continue
                 if (tt.chars.is_letter or tt.length_char > 1):
                     continue
                 ch = (tt).term[0]
                 if (BracketHelper.__canBeCloseChar(ch, br_list[0].char0_)):
                     lev1 += 1
                 elif (BracketHelper.__canBeCloseChar(
                         br_list[1].char0_, ch)):
                     lev1 -= 1
                     if (lev1 < 0):
                         return BracketSequenceToken(
                             br_list[0].source, br_list[1].source)
         if (ok and BracketHelper.__canBeCloseChar(br_list[2].char0_,
                                                   br_list[0].char0_)):
             intern = BracketSequenceToken(br_list[1].source,
                                           br_list[2].source)
             res = BracketSequenceToken(br_list[0].source,
                                        br_list[2].source)
             res.internal.append(intern)
         elif (ok and BracketHelper.__canBeCloseChar(
                 br_list[2].char0_, br_list[1].char0_)
               and br_list[0].can_be_open):
             if (BracketHelper.__canBeCloseChar(br_list[2].char0_,
                                                br_list[0].char0_)):
                 intern = BracketSequenceToken(br_list[1].source,
                                               br_list[2].source)
                 res = BracketSequenceToken(br_list[0].source,
                                            br_list[2].source)
                 res.internal.append(intern)
             elif (len(br_list) == 3):
                 return None
     if (res is None and len(br_list) > 1 and br_list[1].can_be_close):
         res = BracketSequenceToken(br_list[0].source, br_list[1].source)
     if (res is None
             and len(br_list) > 1 and BracketHelper.__canBeCloseChar(
                 br_list[1].char0_, br_list[0].char0_)):
         res = BracketSequenceToken(br_list[0].source, br_list[1].source)
     if (res is None and len(br_list) == 2
             and br_list[0].char0_ == br_list[1].char0_):
         res = BracketSequenceToken(br_list[0].source, br_list[1].source)
     if (res is not None and internals is not None):
         for i in internals:
             if (i.begin_char < res.end_char):
                 res.internal.append(i)
     if (res is None):
         cou = 0
         tt = t0.next0_
         first_pass2804 = True
         while True:
             if first_pass2804: first_pass2804 = False
             else:
                 tt = tt.next0_
                 cou += 1
             if (not (tt is not None)): break
             if (tt.is_table_control_char):
                 break
             if (MiscHelper.canBeStartOfSentence(tt)):
                 break
             if (max_tokens > 0 and cou > max_tokens):
                 break
             mt = Utils.asObjectOrNull(tt, MetaToken)
             if (mt is None):
                 continue
             if (isinstance(mt.end_token, TextToken)):
                 if ((mt.end_token).isCharOf(BracketHelper.M_CLOSE_CHARS)):
                     bb = BracketHelper.Bracket(
                         Utils.asObjectOrNull(mt.end_token, TextToken))
                     if (bb.can_be_close and BracketHelper.__canBeCloseChar(
                             bb.char0_, br_list[0].char0_)):
                         return BracketSequenceToken(t0, tt)
     return res

Beispiel #20

Datei anzeigen

Datei: BusinessFactReferent.py Projekt: MihaJjDa/APCLtask

 def when(self) -> 'Referent':
     """ Когда (DateReferent или DateRangeReferent) """
     return Utils.asObjectOrNull(self.getSlotValue(BusinessFactReferent.ATTR_WHEN), Referent)

Beispiel #21

Datei anzeigen

 def __tryAttach(self, t : 'Token', key_word : bool) -> 'ReferentToken':
     if (t is None): 
         return None
     t0 = t
     t1 = t
     uris_keys = None
     uris = None
     org0_ = None
     cor_org = None
     org_is_bank = False
     empty = 0
     last_uri = None
     first_pass2749 = True
     while True:
         if first_pass2749: first_pass2749 = False
         else: t = t.next0_
         if (not (t is not None)): break
         if (t.is_table_control_char and t != t0): 
             break
         if (t.is_comma or t.morph.class0_.is_preposition or t.isCharOf("/\\")): 
             continue
         bank_keyword = False
         if (t.isValue("ПОЛНЫЙ", None) and t.next0_ is not None and ((t.next0_.isValue("НАИМЕНОВАНИЕ", None) or t.next0_.isValue("НАЗВАНИЕ", None)))): 
             t = t.next0_.next0_
             if (t is None): 
                 break
         if (t.isValue("БАНК", None)): 
             if ((isinstance(t, ReferentToken)) and t.getReferent().type_name == "ORGANIZATION"): 
                 bank_keyword = True
             tt = t.next0_
             npt = NounPhraseHelper.tryParse(tt, NounPhraseParseAttr.NO, 0)
             if (npt is not None): 
                 tt = npt.end_token.next0_
             if (tt is not None and tt.isChar(':')): 
                 tt = tt.next0_
             if (tt is not None): 
                 if (not bank_keyword): 
                     t = tt
                     bank_keyword = True
                 elif (tt.getReferent() is not None and tt.getReferent().type_name == "ORGANIZATION"): 
                     t = tt
         r = t.getReferent()
         if (r is not None and r.type_name == "ORGANIZATION"): 
             is_bank = False
             kk = 0
             rr = r
             while rr is not None and (kk < 4): 
                 is_bank = Utils.compareStrings(Utils.ifNotNull(rr.getStringValue("KIND"), ""), "Bank", True) == 0
                 if (is_bank): 
                     break
                 rr = rr.parent_referent; kk += 1
             if (not is_bank and bank_keyword): 
                 is_bank = True
             if (not is_bank and uris is not None and "ИНН" in uris_keys): 
                 return None
             if ((last_uri is not None and last_uri.scheme == "К/С" and t.previous is not None) and t.previous.isValue("В", None)): 
                 cor_org = r
                 t1 = t
             elif (org0_ is None or ((not org_is_bank and is_bank))): 
                 org0_ = r
                 t1 = t
                 org_is_bank = is_bank
                 if (is_bank): 
                     continue
             if (uris is None and not key_word): 
                 return None
             continue
         if (isinstance(r, UriReferent)): 
             u = Utils.asObjectOrNull(r, UriReferent)
             if (uris is None): 
                 if (not BankAnalyzer.__isBankReq(u.scheme)): 
                     return None
                 if (u.scheme == "ИНН" and t.is_newline_after): 
                     return None
                 uris = list()
                 uris_keys = list()
             else: 
                 if (not BankAnalyzer.__isBankReq(u.scheme)): 
                     break
                 if (u.scheme in uris_keys): 
                     break
                 if (u.scheme == "ИНН"): 
                     if (empty > 0): 
                         break
             uris_keys.append(u.scheme)
             uris.append(u)
             last_uri = u
             t1 = t
             empty = 0
             continue
         elif (uris is None and not key_word and not org_is_bank): 
             return None
         if (r is not None and ((r.type_name == "GEO" or r.type_name == "ADDRESS"))): 
             empty += 1
             continue
         if (isinstance(t, TextToken)): 
             if (t.isValue("ПОЛНЫЙ", None) or t.isValue("НАИМЕНОВАНИЕ", None) or t.isValue("НАЗВАНИЕ", None)): 
                 pass
             elif (t.chars.is_letter): 
                 tok = BankAnalyzer.__m_ontology.tryParse(t, TerminParseAttr.NO)
                 if (tok is not None): 
                     t = tok.end_token
                     empty = 0
                 else: 
                     empty += 1
                     if (t.is_newline_before): 
                         nnn = NounPhraseHelper.tryParse(t, NounPhraseParseAttr.NO, 0)
                         if (nnn is not None and nnn.end_token.next0_ is not None and nnn.end_token.next0_.isChar(':')): 
                             break
                 if (uris is None): 
                     break
         if (empty > 2): 
             break
         if (empty > 0 and t.isChar(':') and t.is_newline_after): 
             break
         if (((isinstance(t, NumberToken)) and t.is_newline_before and t.next0_ is not None) and not t.next0_.chars.is_letter): 
             break
     if (uris is None): 
         return None
     if (not "Р/С" in uris_keys and not "Л/С" in uris_keys): 
         return None
     ok = False
     if ((len(uris) < 2) and org0_ is None): 
         return None
     bdr = BankDataReferent()
     for u in uris: 
         bdr.addSlot(BankDataReferent.ATTR_ITEM, u, False, 0)
     if (org0_ is not None): 
         bdr.addSlot(BankDataReferent.ATTR_BANK, org0_, False, 0)
     if (cor_org is not None): 
         bdr.addSlot(BankDataReferent.ATTR_CORBANK, cor_org, False, 0)
     org0 = (None if t0.previous is None else t0.previous.getReferent())
     if (org0 is not None and org0.type_name == "ORGANIZATION"): 
         for s in org0.slots: 
             if (isinstance(s.value, UriReferent)): 
                 u = Utils.asObjectOrNull(s.value, UriReferent)
                 if (BankAnalyzer.__isBankReq(u.scheme)): 
                     if (not u.scheme in uris_keys): 
                         bdr.addSlot(BankDataReferent.ATTR_ITEM, u, False, 0)
     return ReferentToken(bdr, t0, t1)

Beispiel #22

Datei anzeigen

Datei: BusinessFactReferent.py Projekt: MihaJjDa/APCLtask

 def who(self) -> 'Referent':
     """ Кто (действительный залог) """
     return Utils.asObjectOrNull(self.getSlotValue(BusinessFactReferent.ATTR_WHO), Referent)

Beispiel #23

Datei anzeigen

Datei: TitlePageReferent.py Projekt: pullenti/PullentiPython

 def date(self) -> 'DateReferent':
     """ Дата """
     return Utils.asObjectOrNull(
         self.get_slot_value(TitlePageReferent.ATTR_DATE), DateReferent)

Beispiel #24

Datei anzeigen

Datei: BusinessFactReferent.py Projekt: MihaJjDa/APCLtask

 def whom(self) -> 'Referent':
     """ Кого (страдательный залог) """
     return Utils.asObjectOrNull(self.getSlotValue(BusinessFactReferent.ATTR_WHOM), Referent)

Beispiel #25

Datei anzeigen

Datei: TitlePageReferent.py Projekt: pullenti/PullentiPython

 def city(self) -> 'GeoReferent':
     """ Город """
     return Utils.asObjectOrNull(
         self.get_slot_value(TitlePageReferent.ATTR_CITY), GeoReferent)

Beispiel #26

Datei anzeigen

Datei: GeoReferent.py Projekt: MihaJjDa/APCLtask

 def canBeEquals(self, obj : 'Referent', typ : 'EqualType') -> bool:
     geo_ = Utils.asObjectOrNull(obj, GeoReferent)
     if (geo_ is None): 
         return False
     if (geo_.alpha2 is not None and geo_.alpha2 == self.alpha2): 
         return True
     if (self.is_city != geo_.is_city): 
         return False
     if (self.is_union != geo_.is_union): 
         return False
     if (self.is_union): 
         for s in self.slots: 
             if (s.type_name == GeoReferent.ATTR_REF): 
                 if (obj.findSlot(GeoReferent.ATTR_REF, s.value, True) is None): 
                     return False
         for s in obj.slots: 
             if (s.type_name == GeoReferent.ATTR_REF): 
                 if (self.findSlot(GeoReferent.ATTR_REF, s.value, True) is None): 
                     return False
         return True
     ref1 = Utils.asObjectOrNull(self.getSlotValue(GeoReferent.ATTR_REF), Referent)
     ref2 = Utils.asObjectOrNull(geo_.getSlotValue(GeoReferent.ATTR_REF), Referent)
     if (ref1 is not None and ref2 is not None): 
         if (ref1 != ref2): 
             return False
     r = self.is_region or self.is_state
     r1 = geo_.is_region or geo_.is_state
     if (r != r1): 
         if (self.is_territory != geo_.is_territory): 
             return False
         return False
     eq_names = False
     for s in self.slots: 
         if (s.type_name == GeoReferent.ATTR_NAME): 
             if (geo_.findSlot(s.type_name, s.value, True) is not None): 
                 eq_names = True
                 break
     if (not eq_names): 
         return False
     if (self.is_region and geo_.is_region): 
         typs1 = self.typs
         typs2 = geo_.typs
         ok = False
         for t in typs1: 
             if (t in typs2): 
                 ok = True
             else: 
                 for tt in typs2: 
                     if (LanguageHelper.endsWith(tt, t) or LanguageHelper.endsWith(t, tt)): 
                         ok = True
         if (not ok): 
             return False
     if (self.higher is not None and geo_.higher is not None): 
         if (GeoReferent.__checkRoundDep(self) or GeoReferent.__checkRoundDep(geo_)): 
             return False
         if (self.higher.canBeEquals(geo_.higher, typ)): 
             pass
         elif (geo_.higher.higher is not None and self.higher.canBeEquals(geo_.higher.higher, typ)): 
             pass
         elif (self.higher.higher is not None and self.higher.higher.canBeEquals(geo_.higher, typ)): 
             pass
         else: 
             return False
     return True

Beispiel #27

Datei anzeigen

 def process(self, kit: 'AnalysisKit') -> None:
     ad = kit.getAnalyzerData(self)
     is_lit_block = 0
     refs_by_num = dict()
     rts = []
     t = kit.first_token
     first_pass2754 = True
     while True:
         if first_pass2754: first_pass2754 = False
         else: t = t.next0_
         if (not (t is not None)): break
         if (t.isChar('(')):
             br = BracketHelper.tryParse(t, BracketParseAttr.NO, 100)
             if (br is not None and br.length_char > 70
                     and (br.length_char < 400)):
                 if (br.is_newline_after
                         or ((br.end_token.next0_ is not None
                              and br.end_token.next0_.isCharOf(".;")))):
                     rts = BookLinkAnalyzer.__tryParse(
                         t.next0_, False, br.end_char)
                     if (rts is not None and len(rts) >= 1):
                         if (len(rts) > 1):
                             rts[1].referent = ad.registerReferent(
                                 rts[1].referent)
                             kit.embedToken(rts[1])
                             (rts[0].referent).book = Utils.asObjectOrNull(
                                 rts[1].referent, BookLinkReferent)
                             if (rts[0].begin_char == rts[1].begin_char):
                                 rts[0].begin_token = rts[1]
                             if (rts[0].end_char == rts[1].end_char):
                                 rts[0].end_token = rts[1]
                         rts[0].begin_token = t
                         rts[0].end_token = br.end_token
                         (rts[0].referent).typ = BookLinkRefType.INLINE
                         rts[0].referent = ad.registerReferent(
                             rts[0].referent)
                         kit.embedToken(rts[0])
                         t = (rts[0])
                         continue
         if (not t.is_newline_before):
             continue
         if (is_lit_block <= 0):
             tt = BookLinkToken.parseStartOfLitBlock(t)
             if (tt is not None):
                 is_lit_block = 5
                 t = tt
                 continue
         rts = BookLinkAnalyzer.__tryParse(t, is_lit_block > 0, 0)
         if (rts is None or (len(rts) < 1)):
             is_lit_block -= 1
             if ((is_lit_block) < 0):
                 is_lit_block = 0
             continue
         is_lit_block += 1
         if ((is_lit_block) > 5):
             is_lit_block = 5
         if (len(rts) > 1):
             rts[1].referent = ad.registerReferent(rts[1].referent)
             kit.embedToken(rts[1])
             (rts[0].referent).book = Utils.asObjectOrNull(
                 rts[1].referent, BookLinkReferent)
             if (rts[0].begin_char == rts[1].begin_char):
                 rts[0].begin_token = rts[1]
             if (rts[0].end_char == rts[1].end_char):
                 rts[0].end_token = rts[1]
         re = Utils.asObjectOrNull(rts[0].referent, BookLinkRefReferent)
         re = (Utils.asObjectOrNull(ad.registerReferent(re),
                                    BookLinkRefReferent))
         rts[0].referent = (re)
         kit.embedToken(rts[0])
         t = (rts[0])
         if (re.number is not None):
             li = []
             wrapli385 = RefOutArgWrapper(None)
             inoutres386 = Utils.tryGetValue(refs_by_num, re.number,
                                             wrapli385)
             li = wrapli385.value
             if (not inoutres386):
                 li = list()
                 refs_by_num[re.number] = li
             li.append(re)
     t = kit.first_token
     first_pass2755 = True
     while True:
         if first_pass2755: first_pass2755 = False
         else: t = t.next0_
         if (not (t is not None)): break
         if (not ((isinstance(t, TextToken)))):
             continue
         rt = BookLinkAnalyzer.__tryParseShortInline(t)
         if (rt is None):
             continue
         re = Utils.asObjectOrNull(rt.referent, BookLinkRefReferent)
         li = []
         wrapli387 = RefOutArgWrapper(None)
         inoutres388 = Utils.tryGetValue(refs_by_num,
                                         Utils.ifNotNull(re.number, ""),
                                         wrapli387)
         li = wrapli387.value
         if (not inoutres388):
             continue
         i = 0
         while i < len(li):
             if (t.begin_char < li[i].occurrence[0].begin_char):
                 break
             i += 1
         if (i >= len(li)):
             continue
         re.book = li[i].book
         if (re.pages is None):
             re.pages = li[i].pages
         re.typ = BookLinkRefType.INLINE
         re = (Utils.asObjectOrNull(ad.registerReferent(re),
                                    BookLinkRefReferent))
         rt.referent = (re)
         kit.embedToken(rt)
         t = (rt)

Beispiel #28

Datei anzeigen

 def ref(self) -> 'Referent':
     return Utils.asObjectOrNull(
         self.getSlotValue(InstrumentBlockReferent.ATTR_REF), Referent)

Beispiel #29

Datei anzeigen

 def _process(begin : 'Token', max_char_pos : int, kit : 'AnalysisKit', end_token : 'Token') -> 'TitlePageReferent':
     end_token.value = begin
     res = TitlePageReferent()
     term = None
     lines = Line.parse(begin, 30, 1500, max_char_pos)
     if (len(lines) < 1): 
         return None
     cou = len(lines)
     min_newlines_count = 10
     lines_count_stat = dict()
     i = 0
     while i < len(lines): 
         if (TitleNameToken.can_be_start_of_text_or_content(lines[i].begin_token, lines[i].end_token)): 
             cou = i
             break
         j = lines[i].newlines_before_count
         if (i > 0 and j > 0): 
             if (not j in lines_count_stat): 
                 lines_count_stat[j] = 1
             else: 
                 lines_count_stat[j] += 1
         i += 1
     max0_ = 0
     for kp in lines_count_stat.items(): 
         if (kp[1] > max0_): 
             max0_ = kp[1]
             min_newlines_count = kp[0]
     end_char = (lines[cou - 1].end_char if cou > 0 else 0)
     if (max_char_pos > 0 and end_char > max_char_pos): 
         end_char = max_char_pos
     names = list()
     i = 0
     while i < cou: 
         if (i == 6): 
             pass
         j = i
         while (j < cou) and (j < (i + 5)): 
             if (i == 6 and j == 8): 
                 pass
             if (j > i): 
                 if (lines[j - 1].is_pure_en and lines[j].is_pure_ru): 
                     break
                 if (lines[j - 1].is_pure_ru and lines[j].is_pure_en): 
                     break
                 if (lines[j].newlines_before_count >= (min_newlines_count * 2)): 
                     break
             ttt = TitleNameToken.try_parse(lines[i].begin_token, lines[j].end_token, min_newlines_count)
             if (ttt is not None): 
                 if (lines[i].is_pure_en): 
                     ttt.morph.language = MorphLang.EN
                 elif (lines[i].is_pure_ru): 
                     ttt.morph.language = MorphLang.RU
                 names.append(ttt)
             j += 1
         i += 1
     TitleNameToken.sort(names)
     name_rt = None
     if (len(names) > 0): 
         i0 = 0
         if (names[i0].morph.language.is_en): 
             ii = 1
             while ii < len(names): 
                 if (names[ii].morph.language.is_ru and names[ii].rank > 0): 
                     i0 = ii
                     break
                 ii += 1
         term = res._add_name(names[i0].begin_name_token, names[i0].end_name_token)
         if (names[i0].type_value is not None): 
             res._add_type(names[i0].type_value)
         if (names[i0].speciality is not None): 
             res.speciality = names[i0].speciality
         rt = ReferentToken(res, names[i0].begin_token, names[i0].end_token)
         if (kit is not None): 
             kit.embed_token(rt)
         else: 
             res.add_occurence(TextAnnotation(rt.begin_token, rt.end_token))
         end_token.value = rt.end_token
         name_rt = rt
         if (begin.begin_char == rt.begin_char): 
             begin = (rt)
     if (term is not None and kit is not None): 
         t = kit.first_token
         first_pass3397 = True
         while True:
             if first_pass3397: first_pass3397 = False
             else: t = t.next0_
             if (not (t is not None)): break
             tok = term.try_parse(t, TerminParseAttr.NO)
             if (tok is None): 
                 continue
             t0 = t
             t1 = tok.end_token
             if (t1.next0_ is not None and t1.next0_.is_char('.')): 
                 t1 = t1.next0_
             if (BracketHelper.can_be_start_of_sequence(t0.previous, False, False) and BracketHelper.can_be_end_of_sequence(t1.next0_, False, None, False)): 
                 t0 = t0.previous
                 t1 = t1.next0_
             rt = ReferentToken(res, t0, t1)
             kit.embed_token(rt)
             t = (rt)
     pr = PersonRelations()
     pers_typ = TitleItemToken.Types.UNDEFINED
     pers_types = pr.rel_types
     t = begin
     first_pass3398 = True
     while True:
         if first_pass3398: first_pass3398 = False
         else: t = t.next0_
         if (not (t is not None)): break
         if (max_char_pos > 0 and t.begin_char > max_char_pos): 
             break
         if (t == name_rt): 
             continue
         tpt = TitleItemToken.try_attach(t)
         if (tpt is not None): 
             pers_typ = TitleItemToken.Types.UNDEFINED
             if (tpt.typ == TitleItemToken.Types.TYP): 
                 if (len(res.types) == 0): 
                     res._add_type(tpt.value)
                 elif (len(res.types) == 1): 
                     ty = res.types[0].upper()
                     if (ty == "РЕФЕРАТ"): 
                         res._add_type(tpt.value)
                     elif (ty == "АВТОРЕФЕРАТ"): 
                         if (tpt.value == "КАНДИДАТСКАЯ ДИССЕРТАЦИЯ"): 
                             res.add_slot(TitlePageReferent.ATTR_TYPE, "автореферат кандидатской диссертации", True, 0)
                         elif (tpt.value == "ДОКТОРСКАЯ ДИССЕРТАЦИЯ"): 
                             res.add_slot(TitlePageReferent.ATTR_TYPE, "автореферат докторской диссертации", True, 0)
                         elif (tpt.value == "МАГИСТЕРСКАЯ ДИССЕРТАЦИЯ"): 
                             res.add_slot(TitlePageReferent.ATTR_TYPE, "автореферат магистерской диссертации", True, 0)
                         elif (tpt.value == "КАНДИДАТСЬКА ДИСЕРТАЦІЯ"): 
                             res.add_slot(TitlePageReferent.ATTR_TYPE, "автореферат кандидатської дисертації", True, 0)
                         elif (tpt.value == "ДОКТОРСЬКА ДИСЕРТАЦІЯ"): 
                             res.add_slot(TitlePageReferent.ATTR_TYPE, "автореферат докторської дисертації", True, 0)
                         elif (tpt.value == "МАГІСТЕРСЬКА ДИСЕРТАЦІЯ"): 
                             res.add_slot(TitlePageReferent.ATTR_TYPE, "автореферат магістерської дисертації", True, 0)
                         else: 
                             res._add_type(tpt.value)
                     elif (tpt.value == "РЕФЕРАТ" or tpt.value == "АВТОРЕФЕРАТ"): 
                         if (not tpt.value in ty): 
                             res._add_type(tpt.value)
             elif (tpt.typ == TitleItemToken.Types.SPECIALITY): 
                 if (res.speciality is None): 
                     res.speciality = tpt.value
             elif (tpt.typ in pers_types): 
                 pers_typ = tpt.typ
             t = tpt.end_token
             if (t.end_char > end_token.value.end_char): 
                 end_token.value = t
             if (t.next0_ is not None and t.next0_.is_char_of(":-")): 
                 t = t.next0_
             continue
         if (t.end_char > end_char): 
             break
         rli = t.get_referents()
         if (rli is None): 
             continue
         if (not t.is_newline_before and (isinstance(t.previous, TextToken))): 
             s = t.previous.term
             if (s == "ИМЕНИ" or s == "ИМ"): 
                 continue
             if (s == "." and t.previous.previous is not None and t.previous.previous.is_value("ИМ", None)): 
                 continue
         for r in rli: 
             if (isinstance(r, PersonReferent)): 
                 if (r != rli[0]): 
                     continue
                 p = Utils.asObjectOrNull(r, PersonReferent)
                 if (pers_typ != TitleItemToken.Types.UNDEFINED): 
                     if (t.previous is not None and t.previous.is_char('.')): 
                         pers_typ = TitleItemToken.Types.UNDEFINED
                 typ = pr.calc_typ_from_attrs(p)
                 if (typ != TitleItemToken.Types.UNDEFINED): 
                     pr.add(p, typ, 1)
                     pers_typ = typ
                 elif (pers_typ != TitleItemToken.Types.UNDEFINED): 
                     pr.add(p, pers_typ, 1)
                 elif (t.previous is not None and t.previous.is_char('©')): 
                     pers_typ = TitleItemToken.Types.WORKER
                     pr.add(p, pers_typ, 1)
                 else: 
                     tt = t.next0_
                     first_pass3399 = True
                     while True:
                         if first_pass3399: first_pass3399 = False
                         else: tt = tt.next0_
                         if (not (tt is not None)): break
                         rr = tt.get_referent()
                         if (rr == res): 
                             pers_typ = TitleItemToken.Types.WORKER
                             break
                         if (isinstance(rr, PersonReferent)): 
                             if (pr.calc_typ_from_attrs(Utils.asObjectOrNull(r, PersonReferent)) != TitleItemToken.Types.UNDEFINED): 
                                 break
                             else: 
                                 continue
                         if (rr is not None): 
                             break
                         tpt = TitleItemToken.try_attach(tt)
                         if (tpt is not None): 
                             if (tpt.typ != TitleItemToken.Types.TYP and tpt.typ != TitleItemToken.Types.TYPANDTHEME): 
                                 break
                             tt = tpt.end_token
                             if (tt.end_char > end_token.value.end_char): 
                                 end_token.value = tt
                             continue
                     if (pers_typ == TitleItemToken.Types.UNDEFINED): 
                         tt = t.previous
                         while tt is not None: 
                             rr = tt.get_referent()
                             if (rr == res): 
                                 pers_typ = TitleItemToken.Types.WORKER
                                 break
                             if (rr is not None): 
                                 break
                             if ((tt.is_value("СТУДЕНТ", None) or tt.is_value("СТУДЕНТКА", None) or tt.is_value("СЛУШАТЕЛЬ", None)) or tt.is_value("ДИПЛОМНИК", None) or tt.is_value("ИСПОЛНИТЕЛЬ", None)): 
                                 pers_typ = TitleItemToken.Types.WORKER
                                 break
                             tpt = TitleItemToken.try_attach(tt)
                             if (tpt is not None and tpt.typ != TitleItemToken.Types.TYP): 
                                 break
                             tt = tt.previous
                     if (pers_typ != TitleItemToken.Types.UNDEFINED): 
                         pr.add(p, pers_typ, 1)
                     else: 
                         pr.add(p, pers_typ, 0.5)
                     if (t.end_char > end_token.value.end_char): 
                         end_token.value = t
                 continue
             if (r == rli[0]): 
                 pers_typ = TitleItemToken.Types.UNDEFINED
             if (isinstance(r, DateReferent)): 
                 if (res.date is None): 
                     res.date = Utils.asObjectOrNull(r, DateReferent)
                     if (t.end_char > end_token.value.end_char): 
                         end_token.value = t
             elif (isinstance(r, GeoReferent)): 
                 if (res.city is None and r.is_city): 
                     res.city = Utils.asObjectOrNull(r, GeoReferent)
                     if (t.end_char > end_token.value.end_char): 
                         end_token.value = t
             if (isinstance(r, OrganizationReferent)): 
                 org0_ = Utils.asObjectOrNull(r, OrganizationReferent)
                 if ("курс" in org0_.types and org0_.number is not None): 
                     i = 0
                     wrapi2673 = RefOutArgWrapper(0)
                     inoutres2674 = Utils.tryParseInt(org0_.number, wrapi2673)
                     i = wrapi2673.value
                     if (inoutres2674): 
                         if (i > 0 and (i < 8)): 
                             res.student_year = i
                 while org0_.higher is not None: 
                     if (org0_.kind != OrganizationKind.DEPARTMENT): 
                         break
                     org0_ = org0_.higher
                 if (org0_.kind != OrganizationKind.DEPARTMENT): 
                     if (res.org0_ is None): 
                         res.org0_ = org0_
                     elif (OrganizationReferent.can_be_higher(res.org0_, org0_)): 
                         res.org0_ = org0_
                 if (t.end_char > end_token.value.end_char): 
                     end_token.value = t
             if ((isinstance(r, UriReferent)) or (isinstance(r, GeoReferent))): 
                 if (t.end_char > end_token.value.end_char): 
                     end_token.value = t
     for ty in pers_types: 
         for p in pr.get_persons(ty): 
             if (pr.get_attr_name_for_type(ty) is not None): 
                 res.add_slot(pr.get_attr_name_for_type(ty), p, False, 0)
     if (res.get_slot_value(TitlePageReferent.ATTR_AUTHOR) is None): 
         for p in pr.get_persons(TitleItemToken.Types.UNDEFINED): 
             res.add_slot(TitlePageReferent.ATTR_AUTHOR, p, False, 0)
             break
     if (res.city is None and res.org0_ is not None): 
         s = res.org0_.find_slot(OrganizationReferent.ATTR_GEO, None, True)
         if (s is not None and (isinstance(s.value, GeoReferent))): 
             if (s.value.is_city): 
                 res.city = Utils.asObjectOrNull(s.value, GeoReferent)
     if (res.date is None): 
         t = begin
         first_pass3400 = True
         while True:
             if first_pass3400: first_pass3400 = False
             else: t = t.next0_
             if (not (t is not None and t.end_char <= end_char)): break
             city = Utils.asObjectOrNull(t.get_referent(), GeoReferent)
             if (city is None): 
                 continue
             if (isinstance(t.next0_, TextToken)): 
                 if (t.next0_.is_char_of(":,") or t.next0_.is_hiphen): 
                     t = t.next0_
             rt = t.kit.process_referent(DateAnalyzer.ANALYZER_NAME, t.next0_)
             if (rt is not None): 
                 rt.save_to_local_ontology()
                 res.date = Utils.asObjectOrNull(rt.referent, DateReferent)
                 if (kit is not None): 
                     kit.embed_token(rt)
                 break
     if (len(res.slots) == 0): 
         return None
     else: 
         return res

Beispiel #30

Datei anzeigen

 def try_parse(t: 'Token', items: typing.List['NounPhraseItem'],
               attrs: 'NounPhraseParseAttr') -> 'NounPhraseItem':
     if (t is None):
         return None
     t0 = t
     _can_be_surname = False
     _is_doubt_adj = False
     rt = Utils.asObjectOrNull(t, ReferentToken)
     if (rt is not None and rt.begin_token == rt.end_token
             and (isinstance(rt.begin_token, TextToken))):
         res = NounPhraseItem.try_parse(rt.begin_token, items, attrs)
         if (res is not None):
             res.begin_token = res.end_token = t
             res.can_be_noun = True
             return res
     if (rt is not None):
         res = NounPhraseItem(t, t)
         for m in t.morph.items:
             v = NounPhraseItemTextVar(m, None)
             v.normal_value = str(t.get_referent())
             res.noun_morph.append(v)
         res.can_be_noun = True
         return res
     if (isinstance(t, NumberToken)):
         pass
     has_legal_verb = False
     if (isinstance(t, TextToken)):
         if (not t.chars.is_letter):
             return None
         str0_ = t.term
         if (str0_[len(str0_) - 1] == 'А' or str0_[len(str0_) - 1] == 'О'):
             for wf in t.morph.items:
                 if ((isinstance(wf, MorphWordForm))
                         and wf.is_in_dictionary):
                     if (wf.class0_.is_verb):
                         mc = t.get_morph_class_in_dictionary()
                         if (not mc.is_noun and
                             (((attrs) &
                               (NounPhraseParseAttr.IGNOREPARTICIPLES)))
                                 == (NounPhraseParseAttr.NO)):
                             if (not LanguageHelper.ends_with_ex(
                                     str0_, "ОГО", "ЕГО", None, None)):
                                 return None
                         has_legal_verb = True
                     if (wf.class0_.is_adverb):
                         if (t.next0_ is None or not t.next0_.is_hiphen):
                             if ((str0_ == "ВСЕГО" or str0_ == "ДОМА"
                                  or str0_ == "НЕСКОЛЬКО")
                                     or str0_ == "МНОГО"
                                     or str0_ == "ПОРЯДКА"):
                                 pass
                             else:
                                 return None
                     if (wf.class0_.is_adjective):
                         if (wf.contains_attr("к.ф.", None)):
                             if (t.get_morph_class_in_dictionary() ==
                                     MorphClass.ADJECTIVE):
                                 pass
                             else:
                                 _is_doubt_adj = True
         mc0 = t.morph.class0_
         if (mc0.is_proper_surname and not t.chars.is_all_lower):
             for wf in t.morph.items:
                 if (wf.class0_.is_proper_surname
                         and wf.number != MorphNumber.PLURAL):
                     wff = Utils.asObjectOrNull(wf, MorphWordForm)
                     if (wff is None):
                         continue
                     s = Utils.ifNotNull((Utils.ifNotNull(
                         wff.normal_full, wff.normal_case)), "")
                     if (LanguageHelper.ends_with_ex(
                             s, "ИН", "ЕН", "ЫН", None)):
                         if (not wff.is_in_dictionary):
                             _can_be_surname = True
                         else:
                             return None
                     if (wff.is_in_dictionary
                             and LanguageHelper.ends_with(s, "ОВ")):
                         _can_be_surname = True
         if (mc0.is_proper_name and not t.chars.is_all_lower):
             for wff in t.morph.items:
                 wf = Utils.asObjectOrNull(wff, MorphWordForm)
                 if (wf is None):
                     continue
                 if (wf.normal_case == "ГОР"):
                     continue
                 if (wf.class0_.is_proper_name and wf.is_in_dictionary):
                     if (wf.normal_case is None
                             or not wf.normal_case.startswith("ЛЮБ")):
                         if (mc0.is_adjective
                                 and t.morph.contains_attr("неизм.", None)):
                             pass
                         elif (
                             (((attrs) &
                               (NounPhraseParseAttr.REFERENTCANBENOUN))
                              ) == (NounPhraseParseAttr.REFERENTCANBENOUN)):
                             pass
                         else:
                             if (items is None or (len(items) < 1)):
                                 return None
                             if (not items[0].is_std_adjective):
                                 return None
         if (mc0.is_adjective and t.morph.items_count == 1):
             if (t.morph.get_indexer_item(0).contains_attr(
                     "в.ср.ст.", None)):
                 return None
         mc1 = t.get_morph_class_in_dictionary()
         if (mc1 == MorphClass.VERB and t.morph.case_.is_undefined):
             return None
         if (((((attrs) & (NounPhraseParseAttr.IGNOREPARTICIPLES)))
              == (NounPhraseParseAttr.IGNOREPARTICIPLES)
              and t.morph.class0_.is_verb and not t.morph.class0_.is_noun)
                 and not t.morph.class0_.is_proper):
             for wf in t.morph.items:
                 if (wf.class0_.is_verb):
                     if (wf.contains_attr("дейст.з.", None)):
                         if (LanguageHelper.ends_with(t.term, "СЯ")):
                             pass
                         else:
                             return None
     t1 = None
     for k in range(2):
         t = (Utils.ifNotNull(t1, t0))
         if (k == 0):
             if (((isinstance(t0, TextToken)) and t0.next0_ is not None
                  and t0.next0_.is_hiphen)
                     and t0.next0_.next0_ is not None):
                 if (not t0.is_whitespace_after
                         and not t0.morph.class0_.is_pronoun and
                         not (isinstance(t0.next0_.next0_, NumberToken))):
                     if (not t0.next0_.is_whitespace_after):
                         t = t0.next0_.next0_
                     elif (t0.next0_.next0_.chars.is_all_lower
                           and LanguageHelper.ends_with(t0.term, "О")):
                         t = t0.next0_.next0_
         it = NounPhraseItem._new404(t0, t, _can_be_surname)
         if (t0 == t and (isinstance(t0, ReferentToken))):
             it.can_be_noun = True
             it.morph = MorphCollection(t0.morph)
         can_be_prepos = False
         for v in t.morph.items:
             wf = Utils.asObjectOrNull(v, MorphWordForm)
             if (v.class0_.is_verb and not v.case_.is_undefined):
                 it.can_be_adj = True
                 it.adj_morph.append(NounPhraseItemTextVar(v, t))
                 continue
             if (v.class0_.is_preposition):
                 can_be_prepos = True
             if (v.class0_.is_adjective
                     or ((v.class0_.is_pronoun
                          and not v.class0_.is_personal_pronoun
                          and not v.contains_attr("неизм.", None))) or
                 ((v.class0_.is_noun and (isinstance(t, NumberToken))))):
                 if (NounPhraseItem.try_accord_variant(
                         items, (0 if items is None else len(items)), v,
                         False)):
                     is_doub = False
                     if (v.contains_attr("к.ф.", None)):
                         continue
                     if (v.contains_attr("собир.", None)
                             and not (isinstance(t, NumberToken))):
                         if (wf is not None and wf.is_in_dictionary):
                             return None
                         continue
                     if (v.contains_attr("сравн.", None)):
                         continue
                     ok = True
                     if (isinstance(t, TextToken)):
                         s = t.term
                         if (s == "ПРАВО" or s == "ПРАВА"):
                             ok = False
                         elif (LanguageHelper.ends_with(s, "ОВ") and
                               t.get_morph_class_in_dictionary().is_noun):
                             ok = False
                     elif (isinstance(t, NumberToken)):
                         if (v.class0_.is_noun
                                 and t.morph.class0_.is_adjective):
                             ok = False
                         elif (t.morph.class0_.is_noun and ((
                             (attrs) &
                             (NounPhraseParseAttr.PARSENUMERICASADJECTIVE)))
                               == (NounPhraseParseAttr.NO)):
                             ok = False
                     if (ok):
                         it.adj_morph.append(NounPhraseItemTextVar(v, t))
                         it.can_be_adj = True
                         if (_is_doubt_adj and t0 == t):
                             it.is_doubt_adjective = True
                         if (has_legal_verb and wf is not None
                                 and wf.is_in_dictionary):
                             it.can_be_noun = True
                         if (wf is not None and wf.class0_.is_pronoun):
                             it.can_be_noun = True
                             it.noun_morph.append(
                                 NounPhraseItemTextVar(v, t))
             can_be_noun_ = False
             if (isinstance(t, NumberToken)):
                 pass
             elif (v.class0_.is_noun
                   or ((wf is not None and wf.normal_case == "САМ"))):
                 can_be_noun_ = True
             elif (v.class0_.is_personal_pronoun):
                 if (items is None or len(items) == 0):
                     can_be_noun_ = True
                 else:
                     for it1 in items:
                         if (it1.is_verb):
                             if (len(items) == 1
                                     and not v.case_.is_nominative):
                                 can_be_noun_ = True
                             else:
                                 return None
                     if (len(items) == 1):
                         if (items[0].can_be_adj_for_personal_pronoun):
                             can_be_noun_ = True
             elif (
                 (v.class0_.is_pronoun and
                  ((items is None or len(items) == 0 or
                    ((len(items) == 1
                      and items[0].can_be_adj_for_personal_pronoun))))
                  and wf is not None) and
                 (((((wf.normal_case == "ТОТ" or wf.normal_full == "ТО"
                      or wf.normal_case == "ТО") or wf.normal_case == "ЭТО"
                     or wf.normal_case == "ВСЕ") or wf.normal_case == "ЧТО"
                    or wf.normal_case == "КТО") or wf.normal_full
                   == "КОТОРЫЙ" or wf.normal_case == "КОТОРЫЙ"))):
                 if (wf.normal_case == "ВСЕ"):
                     if (t.next0_ is not None
                             and t.next0_.is_value("РАВНО", None)):
                         return None
                 can_be_noun_ = True
             elif (wf is not None and ((Utils.ifNotNull(
                     wf.normal_full, wf.normal_case))) == "КОТОРЫЙ"
                   and (((attrs) & (NounPhraseParseAttr.PARSEPRONOUNS)))
                   == (NounPhraseParseAttr.NO)):
                 return None
             elif (v.class0_.is_proper and (isinstance(t, TextToken))):
                 if (t.length_char > 4 or v.class0_.is_proper_name):
                     can_be_noun_ = True
             if (can_be_noun_):
                 added = False
                 if (items is not None and len(items) > 1 and
                     (((attrs) & (NounPhraseParseAttr.MULTINOUNS))) !=
                     (NounPhraseParseAttr.NO)):
                     ok1 = True
                     ii = 1
                     while ii < len(items):
                         if (not items[ii].conj_before):
                             ok1 = False
                             break
                         ii += 1
                     if (ok1):
                         if (NounPhraseItem.try_accord_variant(
                                 items,
                             (0 if items is None else len(items)), v,
                                 True)):
                             it.noun_morph.append(
                                 NounPhraseItemTextVar(v, t))
                             it.can_be_noun = True
                             it.multi_nouns = True
                             added = True
                 if (not added):
                     if (NounPhraseItem.try_accord_variant(
                             items, (0 if items is None else len(items)), v,
                             False)):
                         it.noun_morph.append(NounPhraseItemTextVar(v, t))
                         it.can_be_noun = True
                         if (v.class0_.is_personal_pronoun
                                 and t.morph.contains_attr("неизм.", None)
                                 and not it.can_be_adj):
                             itt = NounPhraseItemTextVar(v, t)
                             itt.case_ = MorphCase.ALL_CASES
                             itt.number = MorphNumber.UNDEFINED
                             if (itt.normal_value is None):
                                 pass
                             it.adj_morph.append(itt)
                             it.can_be_adj = True
                     elif ((len(items) > 0 and len(items[0].adj_morph) > 0
                            and items[0].adj_morph[0].number
                            == MorphNumber.PLURAL)
                           and not ((items[0].adj_morph[0].case_)
                                    & v.case_).is_undefined
                           and not items[0].adj_morph[0].class0_.is_verb):
                         if (t.next0_ is not None and t.next0_.is_comma_and
                                 and
                             (isinstance(t.next0_.next0_, TextToken))):
                             npt2 = NounPhraseHelper.try_parse(
                                 t.next0_.next0_, attrs, 0, None)
                             if (npt2 is not None
                                     and npt2.preposition is None
                                     and not ((npt2.morph.case_) & v.case_
                                              & items[0].adj_morph[0].case_
                                              ).is_undefined):
                                 it.noun_morph.append(
                                     NounPhraseItemTextVar(v, t))
                                 it.can_be_noun = True
         if (t0 != t):
             for v in it.adj_morph:
                 v.correct_prefix(Utils.asObjectOrNull(t0, TextToken),
                                  False)
             for v in it.noun_morph:
                 v.correct_prefix(Utils.asObjectOrNull(t0, TextToken), True)
         if (k == 1 and it.can_be_noun and not it.can_be_adj):
             if (t1 is not None):
                 it.end_token = t1
             else:
                 it.end_token = t0.next0_.next0_
             for v in it.noun_morph:
                 if (v.normal_value is not None
                         and (v.normal_value.find('-') < 0)):
                     v.normal_value = "{0}-{1}".format(
                         v.normal_value,
                         it.end_token.get_normal_case_text(
                             None, MorphNumber.UNDEFINED,
                             MorphGender.UNDEFINED, False))
         if (it.can_be_adj):
             if (NounPhraseItem.__m_std_adjectives.try_parse(
                     it.begin_token, TerminParseAttr.NO) is not None):
                 it.is_std_adjective = True
         if (can_be_prepos and it.can_be_noun):
             if (items is not None and len(items) > 0):
                 npt1 = NounPhraseHelper.try_parse(
                     t,
                     Utils.valToEnum((NounPhraseParseAttr.PARSEPREPOSITION)
                                     | (NounPhraseParseAttr.PARSEPRONOUNS) |
                                     (NounPhraseParseAttr.PARSEVERBS),
                                     NounPhraseParseAttr), 0, None)
                 if (npt1 is not None and npt1.end_char > t.end_char):
                     return None
             else:
                 npt1 = NounPhraseHelper.try_parse(
                     t.next0_,
                     Utils.valToEnum((NounPhraseParseAttr.PARSEPRONOUNS) |
                                     (NounPhraseParseAttr.PARSEVERBS),
                                     NounPhraseParseAttr), 0, None)
                 if (npt1 is not None):
                     mc = LanguageHelper.get_case_after_preposition(t.lemma)
                     if (not ((mc) & npt1.morph.case_).is_undefined):
                         return None
         if (it.can_be_noun or it.can_be_adj or k == 1):
             if (it.begin_token.morph.class0_.is_pronoun):
                 tt2 = it.end_token.next0_
                 if ((tt2 is not None and tt2.is_hiphen
                      and not tt2.is_whitespace_after)
                         and not tt2.is_whitespace_before):
                     tt2 = tt2.next0_
                 if (isinstance(tt2, TextToken)):
                     ss = tt2.term
                     if ((ss == "ЖЕ" or ss == "БЫ" or ss == "ЛИ")
                             or ss == "Ж"):
                         it.end_token = tt2
                     elif (ss == "НИБУДЬ" or ss == "ЛИБО"
                           or (((ss == "ТО" and tt2.previous.is_hiphen))
                               and it.can_be_adj)):
                         it.end_token = tt2
                         for m in it.adj_morph:
                             m.normal_value = "{0}-{1}".format(
                                 m.normal_value, ss)
                             if (m.single_number_value is not None):
                                 m.single_number_value = "{0}-{1}".format(
                                     m.single_number_value, ss)
             return it
         if (t0 == t):
             if (t0.is_value("БИЗНЕС", None) and t0.next0_ is not None
                     and t0.next0_.chars == t0.chars):
                 t1 = t0.next0_
                 continue
             return it
     return None