def find(self, key : str) -> 'Termin': if (Utils.isNullOrEmpty(key)): return None li = [ ] if (LanguageHelper.is_latin_char(key[0])): li = self.__find_in_tree(key, MorphLang.EN) else: li = self.__find_in_tree(key, MorphLang.RU) if (li is None): li = self.__find_in_tree(key, MorphLang.UA) return (li[0] if li is not None and len(li) > 0 else None)
def __correct_model(self) -> None: tt = self.end_token.next0_ if (tt is None or tt.whitespaces_before_count > 2): return if (tt.is_value(":\\/.", None) or tt.is_hiphen): tt = tt.next0_ if (isinstance(tt, NumberToken)): tmp = io.StringIO() print(tt.value, end="", file=tmp) is_lat = LanguageHelper.is_latin_char(self.value[0]) self.end_token = tt tt = tt.next0_ first_pass3427 = True while True: if first_pass3427: first_pass3427 = False else: tt = tt.next0_ if (not (tt is not None)): break if ((isinstance(tt, TextToken)) and tt.length_char == 1 and tt.chars.is_letter): if (not tt.is_whitespace_before or ((tt.previous is not None and tt.previous.is_hiphen))): ch = tt.term[0] self.end_token = tt ch2 = chr(0) if (LanguageHelper.is_latin_char(ch) and not is_lat): ch2 = LanguageHelper.get_cyr_for_lat(ch) if (ch2 != (chr(0))): ch = ch2 elif (LanguageHelper.is_cyrillic_char(ch) and is_lat): ch2 = LanguageHelper.get_lat_for_cyr(ch) if (ch2 != (chr(0))): ch = ch2 print(ch, end="", file=tmp) continue break self.value = "{0}-{1}".format(self.value, Utils.toStringStringIO(tmp)) self.alt_value = MiscHelper.create_cyr_lat_alternative(self.value) if (not self.end_token.is_whitespace_after and self.end_token.next0_ is not None and ((self.end_token.next0_.is_hiphen or self.end_token.next0_.is_char_of("\\/")))): if (not self.end_token.next0_.is_whitespace_after and (isinstance(self.end_token.next0_.next0_, NumberToken))): self.end_token = self.end_token.next0_.next0_ self.value = "{0}-{1}".format(self.value, self.end_token.value) if (self.alt_value is not None): self.alt_value = "{0}-{1}".format(self.alt_value, self.end_token.value)
def parse(t : 'Token', max_char : int, prev : 'LineToken') -> 'LineToken': from pullenti.morph.LanguageHelper import LanguageHelper from pullenti.ner.NumberToken import NumberToken from pullenti.ner.TextToken import TextToken from pullenti.ner.core.BracketHelper import BracketHelper from pullenti.ner.core.BracketParseAttr import BracketParseAttr from pullenti.ner.decree.DecreeReferent import DecreeReferent if (t is None or t.end_char > max_char): return None res = ListHelper.LineToken(t, t) first_pass3272 = True while True: if first_pass3272: first_pass3272 = False else: t = t.next0_ if (not (t is not None and t.end_char <= max_char)): break if (t.is_char(':')): if (res.is_newline_before and res.begin_token.is_value("ПРИЛОЖЕНИЕ", "ДОДАТОК")): res.is_list_head = True res.end_token = t break if (t.is_char(';')): if (not t.is_whitespace_after): pass if (t.previous is not None and (isinstance(t.previous.get_referent(), DecreeReferent))): if (not t.is_whitespace_after): continue if (t.next0_ is not None and (isinstance(t.next0_.get_referent(), DecreeReferent))): continue res.is_list_item = True res.end_token = t break if (t.is_char('(')): br = BracketHelper.try_parse(t, BracketParseAttr.NO, 100) if (br is not None): t = br.end_token res.end_token = t continue if (t.is_newline_before and t != res.begin_token): next0__ = True if (t.previous.is_comma or t.previous.is_and or t.is_char_of("(")): next0__ = False elif (t.chars.is_letter or (isinstance(t, NumberToken))): if (t.chars.is_all_lower): next0__ = False elif (t.previous.chars.is_letter): next0__ = False if (next0__): break res.end_token = t if (res.begin_token.is_hiphen): res.is_list_item = (res.begin_token.next0_ is not None and not res.begin_token.next0_.is_hiphen) elif (res.begin_token.is_char_of("·")): res.is_list_item = True res.begin_token = res.begin_token.next0_ elif (res.begin_token.next0_ is not None and ((res.begin_token.next0_.is_char(')') or ((prev is not None and ((prev.is_list_item or prev.is_list_head))))))): if (res.begin_token.length_char == 1 or (isinstance(res.begin_token, NumberToken))): res.is_list_item = True if ((isinstance(res.begin_token, NumberToken)) and res.begin_token.int_value is not None): res.number = res.begin_token.int_value elif ((isinstance(res.begin_token, TextToken)) and res.begin_token.length_char == 1): te = res.begin_token.term if (LanguageHelper.is_cyrillic_char(te[0])): res.number = ((ord(te[0])) - (ord('А'))) elif (LanguageHelper.is_latin_char(te[0])): res.number = ((ord(te[0])) - (ord('A'))) return res
def __to_full_string(self, last_name_first: bool, lang: 'MorphLang') -> str: id0_ = None for a in self.slots: if (a.type_name == PersonReferent.ATTR_IDENTITY): s = str(a.value) if (id0_ is None or len(s) > len(id0_)): id0_ = s if (id0_ is not None): return MiscHelper.convert_first_char_upper_and_other_lower(id0_) sss = self.get_string_value("NAMETYPE") if (sss == "china"): last_name_first = True n = self.get_string_value(PersonReferent.ATTR_LASTNAME) if (n is not None): res = io.StringIO() if (last_name_first): print("{0} ".format(n), end="", file=res, flush=True) s = self.__find_for_surname(PersonReferent.ATTR_FIRSTNAME, n, False) if (s is not None): print("{0}".format(s), end="", file=res, flush=True) if (PersonReferent.__is_initial(s)): print('.', end="", file=res) else: print(' ', end="", file=res) s = self.__find_for_surname(PersonReferent.ATTR_MIDDLENAME, n, False) if (s is not None): print("{0}".format(s), end="", file=res, flush=True) if (PersonReferent.__is_initial(s)): print('.', end="", file=res) else: print(' ', end="", file=res) if (not last_name_first): print(n, end="", file=res) elif (Utils.getCharAtStringIO(res, res.tell() - 1) == ' '): Utils.setLengthStringIO(res, res.tell() - 1) if (LanguageHelper.is_cyrillic_char(n[0])): nl = None for sl in self.slots: if (sl.type_name == PersonReferent.ATTR_LASTNAME): ss = Utils.asObjectOrNull(sl.value, str) if (len(ss) > 0 and LanguageHelper.is_latin_char(ss[0])): nl = ss break if (nl is not None): nal = self.__find_for_surname( PersonReferent.ATTR_FIRSTNAME, nl, False) if (nal is None): print(" ({0})".format(nl), end="", file=res, flush=True) elif (PersonReferent.SHOW_LASTNAME_ON_FIRST_POSITION): print(" ({0} {1})".format(nl, nal), end="", file=res, flush=True) else: print(" ({0} {1})".format(nal, nl), end="", file=res, flush=True) return MiscHelper.convert_first_char_upper_and_other_lower( Utils.toStringStringIO(res)) else: n = self.get_string_value(PersonReferent.ATTR_FIRSTNAME) if ((n) is not None): s = self.__find_for_surname(PersonReferent.ATTR_MIDDLENAME, n, False) if (s is not None): n = "{0} {1}".format(n, s) n = MiscHelper.convert_first_char_upper_and_other_lower(n) nik = self.get_string_value(PersonReferent.ATTR_NICKNAME) tit = self.__find_shortest_king_titul(False) if (tit is not None): n = "{0} {1}".format(tit, n) if (nik is not None): n = "{0} {1}".format(n, nik) return n return "?"