Example #1
0
 def find(self, key : str) -> 'Termin':
     if (Utils.isNullOrEmpty(key)): 
         return None
     li = [ ]
     if (LanguageHelper.is_latin_char(key[0])): 
         li = self.__find_in_tree(key, MorphLang.EN)
     else: 
         li = self.__find_in_tree(key, MorphLang.RU)
         if (li is None): 
             li = self.__find_in_tree(key, MorphLang.UA)
     return (li[0] if li is not None and len(li) > 0 else None)
 def __correct_model(self) -> None:
     tt = self.end_token.next0_
     if (tt is None or tt.whitespaces_before_count > 2): 
         return
     if (tt.is_value(":\\/.", None) or tt.is_hiphen): 
         tt = tt.next0_
     if (isinstance(tt, NumberToken)): 
         tmp = io.StringIO()
         print(tt.value, end="", file=tmp)
         is_lat = LanguageHelper.is_latin_char(self.value[0])
         self.end_token = tt
         tt = tt.next0_
         first_pass3427 = True
         while True:
             if first_pass3427: first_pass3427 = False
             else: tt = tt.next0_
             if (not (tt is not None)): break
             if ((isinstance(tt, TextToken)) and tt.length_char == 1 and tt.chars.is_letter): 
                 if (not tt.is_whitespace_before or ((tt.previous is not None and tt.previous.is_hiphen))): 
                     ch = tt.term[0]
                     self.end_token = tt
                     ch2 = chr(0)
                     if (LanguageHelper.is_latin_char(ch) and not is_lat): 
                         ch2 = LanguageHelper.get_cyr_for_lat(ch)
                         if (ch2 != (chr(0))): 
                             ch = ch2
                     elif (LanguageHelper.is_cyrillic_char(ch) and is_lat): 
                         ch2 = LanguageHelper.get_lat_for_cyr(ch)
                         if (ch2 != (chr(0))): 
                             ch = ch2
                     print(ch, end="", file=tmp)
                     continue
             break
         self.value = "{0}-{1}".format(self.value, Utils.toStringStringIO(tmp))
         self.alt_value = MiscHelper.create_cyr_lat_alternative(self.value)
     if (not self.end_token.is_whitespace_after and self.end_token.next0_ is not None and ((self.end_token.next0_.is_hiphen or self.end_token.next0_.is_char_of("\\/")))): 
         if (not self.end_token.next0_.is_whitespace_after and (isinstance(self.end_token.next0_.next0_, NumberToken))): 
             self.end_token = self.end_token.next0_.next0_
             self.value = "{0}-{1}".format(self.value, self.end_token.value)
             if (self.alt_value is not None): 
                 self.alt_value = "{0}-{1}".format(self.alt_value, self.end_token.value)
Example #3
0
 def parse(t : 'Token', max_char : int, prev : 'LineToken') -> 'LineToken':
     from pullenti.morph.LanguageHelper import LanguageHelper
     from pullenti.ner.NumberToken import NumberToken
     from pullenti.ner.TextToken import TextToken
     from pullenti.ner.core.BracketHelper import BracketHelper
     from pullenti.ner.core.BracketParseAttr import BracketParseAttr
     from pullenti.ner.decree.DecreeReferent import DecreeReferent
     if (t is None or t.end_char > max_char): 
         return None
     res = ListHelper.LineToken(t, t)
     first_pass3272 = True
     while True:
         if first_pass3272: first_pass3272 = False
         else: t = t.next0_
         if (not (t is not None and t.end_char <= max_char)): break
         if (t.is_char(':')): 
             if (res.is_newline_before and res.begin_token.is_value("ПРИЛОЖЕНИЕ", "ДОДАТОК")): 
                 res.is_list_head = True
             res.end_token = t
             break
         if (t.is_char(';')): 
             if (not t.is_whitespace_after): 
                 pass
             if (t.previous is not None and (isinstance(t.previous.get_referent(), DecreeReferent))): 
                 if (not t.is_whitespace_after): 
                     continue
                 if (t.next0_ is not None and (isinstance(t.next0_.get_referent(), DecreeReferent))): 
                     continue
             res.is_list_item = True
             res.end_token = t
             break
         if (t.is_char('(')): 
             br = BracketHelper.try_parse(t, BracketParseAttr.NO, 100)
             if (br is not None): 
                 t = br.end_token
                 res.end_token = t
                 continue
         if (t.is_newline_before and t != res.begin_token): 
             next0__ = True
             if (t.previous.is_comma or t.previous.is_and or t.is_char_of("(")): 
                 next0__ = False
             elif (t.chars.is_letter or (isinstance(t, NumberToken))): 
                 if (t.chars.is_all_lower): 
                     next0__ = False
                 elif (t.previous.chars.is_letter): 
                     next0__ = False
             if (next0__): 
                 break
         res.end_token = t
     if (res.begin_token.is_hiphen): 
         res.is_list_item = (res.begin_token.next0_ is not None and not res.begin_token.next0_.is_hiphen)
     elif (res.begin_token.is_char_of("·")): 
         res.is_list_item = True
         res.begin_token = res.begin_token.next0_
     elif (res.begin_token.next0_ is not None and ((res.begin_token.next0_.is_char(')') or ((prev is not None and ((prev.is_list_item or prev.is_list_head))))))): 
         if (res.begin_token.length_char == 1 or (isinstance(res.begin_token, NumberToken))): 
             res.is_list_item = True
             if ((isinstance(res.begin_token, NumberToken)) and res.begin_token.int_value is not None): 
                 res.number = res.begin_token.int_value
             elif ((isinstance(res.begin_token, TextToken)) and res.begin_token.length_char == 1): 
                 te = res.begin_token.term
                 if (LanguageHelper.is_cyrillic_char(te[0])): 
                     res.number = ((ord(te[0])) - (ord('А')))
                 elif (LanguageHelper.is_latin_char(te[0])): 
                     res.number = ((ord(te[0])) - (ord('A')))
     return res
 def __to_full_string(self, last_name_first: bool,
                      lang: 'MorphLang') -> str:
     id0_ = None
     for a in self.slots:
         if (a.type_name == PersonReferent.ATTR_IDENTITY):
             s = str(a.value)
             if (id0_ is None or len(s) > len(id0_)):
                 id0_ = s
     if (id0_ is not None):
         return MiscHelper.convert_first_char_upper_and_other_lower(id0_)
     sss = self.get_string_value("NAMETYPE")
     if (sss == "china"):
         last_name_first = True
     n = self.get_string_value(PersonReferent.ATTR_LASTNAME)
     if (n is not None):
         res = io.StringIO()
         if (last_name_first):
             print("{0} ".format(n), end="", file=res, flush=True)
         s = self.__find_for_surname(PersonReferent.ATTR_FIRSTNAME, n,
                                     False)
         if (s is not None):
             print("{0}".format(s), end="", file=res, flush=True)
             if (PersonReferent.__is_initial(s)):
                 print('.', end="", file=res)
             else:
                 print(' ', end="", file=res)
             s = self.__find_for_surname(PersonReferent.ATTR_MIDDLENAME, n,
                                         False)
             if (s is not None):
                 print("{0}".format(s), end="", file=res, flush=True)
                 if (PersonReferent.__is_initial(s)):
                     print('.', end="", file=res)
                 else:
                     print(' ', end="", file=res)
         if (not last_name_first):
             print(n, end="", file=res)
         elif (Utils.getCharAtStringIO(res, res.tell() - 1) == ' '):
             Utils.setLengthStringIO(res, res.tell() - 1)
         if (LanguageHelper.is_cyrillic_char(n[0])):
             nl = None
             for sl in self.slots:
                 if (sl.type_name == PersonReferent.ATTR_LASTNAME):
                     ss = Utils.asObjectOrNull(sl.value, str)
                     if (len(ss) > 0
                             and LanguageHelper.is_latin_char(ss[0])):
                         nl = ss
                         break
             if (nl is not None):
                 nal = self.__find_for_surname(
                     PersonReferent.ATTR_FIRSTNAME, nl, False)
                 if (nal is None):
                     print(" ({0})".format(nl),
                           end="",
                           file=res,
                           flush=True)
                 elif (PersonReferent.SHOW_LASTNAME_ON_FIRST_POSITION):
                     print(" ({0} {1})".format(nl, nal),
                           end="",
                           file=res,
                           flush=True)
                 else:
                     print(" ({0} {1})".format(nal, nl),
                           end="",
                           file=res,
                           flush=True)
         return MiscHelper.convert_first_char_upper_and_other_lower(
             Utils.toStringStringIO(res))
     else:
         n = self.get_string_value(PersonReferent.ATTR_FIRSTNAME)
         if ((n) is not None):
             s = self.__find_for_surname(PersonReferent.ATTR_MIDDLENAME, n,
                                         False)
             if (s is not None):
                 n = "{0} {1}".format(n, s)
             n = MiscHelper.convert_first_char_upper_and_other_lower(n)
             nik = self.get_string_value(PersonReferent.ATTR_NICKNAME)
             tit = self.__find_shortest_king_titul(False)
             if (tit is not None):
                 n = "{0} {1}".format(tit, n)
             if (nik is not None):
                 n = "{0} {1}".format(n, nik)
             return n
     return "?"