def __correctModel(self) -> None: tt = self.end_token.next0_ if (tt is None or tt.whitespaces_before_count > 2): return if (tt.isValue(":\\/.", None) or tt.is_hiphen): tt = tt.next0_ if (isinstance(tt, NumberToken)): tmp = io.StringIO() print((tt).value, end="", file=tmp) is_lat = LanguageHelper.isLatinChar(self.value[0]) self.end_token = tt tt = tt.next0_ first_pass3157 = True while True: if first_pass3157: first_pass3157 = False else: tt = tt.next0_ if (not (tt is not None)): break if ((isinstance(tt, TextToken)) and tt.length_char == 1 and tt.chars.is_letter): if (not tt.is_whitespace_before or ((tt.previous is not None and tt.previous.is_hiphen))): ch = (tt).term[0] self.end_token = tt ch2 = chr(0) if (LanguageHelper.isLatinChar(ch) and not is_lat): ch2 = LanguageHelper.getCyrForLat(ch) if (ch2 != (chr(0))): ch = ch2 elif (LanguageHelper.isCyrillicChar(ch) and is_lat): ch2 = LanguageHelper.getLatForCyr(ch) if (ch2 != (chr(0))): ch = ch2 print(ch, end="", file=tmp) continue break self.value = "{0}-{1}".format(self.value, Utils.toStringStringIO(tmp)) self.alt_value = MiscHelper.createCyrLatAlternative(self.value) if (not self.end_token.is_whitespace_after and self.end_token.next0_ is not None and ((self.end_token.next0_.is_hiphen or self.end_token.next0_.isCharOf("\\/")))): if (not self.end_token.next0_.is_whitespace_after and (isinstance(self.end_token.next0_.next0_, NumberToken))): self.end_token = self.end_token.next0_.next0_ self.value = "{0}-{1}".format(self.value, (self.end_token).value) if (self.alt_value is not None): self.alt_value = "{0}-{1}".format(self.alt_value, (self.end_token).value)
def find(self, key: str) -> 'Termin': if (Utils.isNullOrEmpty(key)): return None li = [] if (LanguageHelper.isLatinChar(key[0])): li = self.__FindInTree(key, MorphLang.EN) else: li = self.__FindInTree(key, MorphLang.RU) if (li is None): li = self.__FindInTree(key, MorphLang.UA) return (li[0] if li is not None and len(li) > 0 else None)
def _mergeSlots2(self, obj : 'Referent', lang : 'MorphLang') -> None: merge_statistic = True for s in obj.slots: if (s.type_name == GeoReferent.ATTR_NAME or s.type_name == GeoReferent.ATTR_TYPE): nam = s.value if (LanguageHelper.isLatinChar(nam[0])): if (not lang.is_en): continue elif (lang.is_en): continue if (LanguageHelper.endsWith(nam, " ССР")): continue self.addSlot(s.type_name, s.value, False, (s.count if merge_statistic else 0)) if (self.findSlot(GeoReferent.ATTR_NAME, None, True) is None and obj.findSlot(GeoReferent.ATTR_NAME, None, True) is not None): for s in obj.slots: if (s.type_name == GeoReferent.ATTR_NAME): self.addSlot(s.type_name, s.value, False, (s.count if merge_statistic else 0)) if (self.findSlot(GeoReferent.ATTR_TYPE, None, True) is None and obj.findSlot(GeoReferent.ATTR_TYPE, None, True) is not None): for s in obj.slots: if (s.type_name == GeoReferent.ATTR_TYPE): self.addSlot(s.type_name, s.value, False, (s.count if merge_statistic else 0)) if (self.is_territory): if (((self.alpha2 is not None or self.findSlot(GeoReferent.ATTR_TYPE, "государство", True) is not None or self.findSlot(GeoReferent.ATTR_TYPE, "держава", True) is not None) or self.findSlot(GeoReferent.ATTR_TYPE, "империя", True) is not None or self.findSlot(GeoReferent.ATTR_TYPE, "імперія", True) is not None) or self.findSlot(GeoReferent.ATTR_TYPE, "state", True) is not None): s = self.findSlot(GeoReferent.ATTR_TYPE, "территория", True) if (s is not None): self.slots.remove(s) if (self.is_state): for s in self.slots: if (s.type_name == GeoReferent.ATTR_TYPE and ((str(s.value) == "регион" or str(s.value) == "регіон" or str(s.value) == "region"))): self.slots.remove(s) break if (self.is_city): s = Utils.ifNotNull(self.findSlot(GeoReferent.ATTR_TYPE, "город", True), Utils.ifNotNull(self.findSlot(GeoReferent.ATTR_TYPE, "місто", True), self.findSlot(GeoReferent.ATTR_TYPE, "city", True))) if (s is not None): for ss in self.slots: if (ss.type_name == GeoReferent.ATTR_TYPE and ss != s and GeoReferent.__isCity(ss.value)): self.slots.remove(s) break has = False i = 0 while i < len(self.slots): if (self.slots[i].type_name == GeoReferent.ATTR_HIGHER): if (not has): has = True else: del self.slots[i] i -= 1 i += 1 self._mergeExtReferents(obj)
def __toFullString(self, last_name_first : bool, lang : 'MorphLang') -> str: id0_ = None for a in self.slots: if (a.type_name == PersonReferent.ATTR_IDENTITY): s = str(a.value) if (id0_ is None or len(s) > len(id0_)): id0_ = s if (id0_ is not None): return MiscHelper.convertFirstCharUpperAndOtherLower(id0_) sss = self.getStringValue("NAMETYPE") if (sss == "china"): last_name_first = True n = self.getStringValue(PersonReferent.ATTR_LASTNAME) if (n is not None): res = io.StringIO() if (last_name_first): print("{0} ".format(n), end="", file=res, flush=True) s = self.__findForSurname(PersonReferent.ATTR_FIRSTNAME, n, False) if (s is not None): print("{0}".format(s), end="", file=res, flush=True) if (PersonReferent.__isInitial(s)): print('.', end="", file=res) else: print(' ', end="", file=res) s = self.__findForSurname(PersonReferent.ATTR_MIDDLENAME, n, False) if (s is not None): print("{0}".format(s), end="", file=res, flush=True) if (PersonReferent.__isInitial(s)): print('.', end="", file=res) else: print(' ', end="", file=res) if (not last_name_first): print(n, end="", file=res) elif (Utils.getCharAtStringIO(res, res.tell() - 1) == ' '): Utils.setLengthStringIO(res, res.tell() - 1) if (LanguageHelper.isCyrillicChar(n[0])): nl = None for sl in self.slots: if (sl.type_name == PersonReferent.ATTR_LASTNAME): ss = Utils.asObjectOrNull(sl.value, str) if (len(ss) > 0 and LanguageHelper.isLatinChar(ss[0])): nl = ss break if (nl is not None): nal = self.__findForSurname(PersonReferent.ATTR_FIRSTNAME, nl, False) if (nal is None): print(" ({0})".format(nl), end="", file=res, flush=True) elif (PersonReferent.SHOW_LASTNAME_ON_FIRST_POSITION): print(" ({0} {1})".format(nl, nal), end="", file=res, flush=True) else: print(" ({0} {1})".format(nal, nl), end="", file=res, flush=True) return MiscHelper.convertFirstCharUpperAndOtherLower(Utils.toStringStringIO(res)) else: n = self.getStringValue(PersonReferent.ATTR_FIRSTNAME) if ((n) is not None): s = self.__findForSurname(PersonReferent.ATTR_MIDDLENAME, n, False) if (s is not None): n = "{0} {1}".format(n, s) n = MiscHelper.convertFirstCharUpperAndOtherLower(n) nik = self.getStringValue(PersonReferent.ATTR_NICKNAME) tit = self.__findShortestKingTitul(False) if (tit is not None): n = "{0} {1}".format(tit, n) if (nik is not None): n = "{0} {1}".format(n, nik) return n return "?"
def parse(t: 'Token', max_char: int, prev: 'LineToken') -> 'LineToken': from pullenti.ner.TextToken import TextToken from pullenti.ner.NumberToken import NumberToken from pullenti.morph.LanguageHelper import LanguageHelper from pullenti.ner.core.BracketParseAttr import BracketParseAttr from pullenti.ner.core.BracketHelper import BracketHelper from pullenti.ner.decree.DecreeReferent import DecreeReferent if (t is None or t.end_char > max_char): return None res = ListHelper.LineToken(t, t) first_pass3004 = True while True: if first_pass3004: first_pass3004 = False else: t = t.next0_ if (not (t is not None and t.end_char <= max_char)): break if (t.isChar(':')): if (res.is_newline_before and res.begin_token.isValue( "ПРИЛОЖЕНИЕ", "ДОДАТОК")): res.is_list_head = True res.end_token = t break if (t.isChar(';')): if (not t.is_whitespace_after): pass if (t.previous is not None and (isinstance( t.previous.getReferent(), DecreeReferent))): if (not t.is_whitespace_after): continue if (t.next0_ is not None and (isinstance( t.next0_.getReferent(), DecreeReferent))): continue res.is_list_item = True res.end_token = t break if (t.isChar('(')): br = BracketHelper.tryParse(t, BracketParseAttr.NO, 100) if (br is not None): t = br.end_token res.end_token = t continue if (t.is_newline_before and t != res.begin_token): next0__ = True if (t.previous.is_comma or t.previous.is_and or t.isCharOf("(")): next0__ = False elif (t.chars.is_letter or (isinstance(t, NumberToken))): if (t.chars.is_all_lower): next0__ = False elif (t.previous.chars.is_letter): next0__ = False if (next0__): break res.end_token = t if (res.begin_token.is_hiphen): res.is_list_item = (res.begin_token.next0_ is not None and not res.begin_token.next0_.is_hiphen) elif (res.begin_token.isCharOf("·")): res.is_list_item = True res.begin_token = res.begin_token.next0_ elif (res.begin_token.next0_ is not None and ((res.begin_token.next0_.isChar(')') or ((prev is not None and ((prev.is_list_item or prev.is_list_head))))))): if (res.begin_token.length_char == 1 or (isinstance(res.begin_token, NumberToken))): res.is_list_item = True if ((isinstance(res.begin_token, NumberToken)) and (res.begin_token).int_value is not None): res.number = (res.begin_token).int_value elif ((isinstance(res.begin_token, TextToken)) and res.begin_token.length_char == 1): te = (res.begin_token).term if (LanguageHelper.isCyrillicChar(te[0])): res.number = ((ord(te[0])) - (ord('А'))) elif (LanguageHelper.isLatinChar(te[0])): res.number = ((ord(te[0])) - (ord('A'))) return res