Esempio n. 1
0
 def __correctModel(self) -> None:
     tt = self.end_token.next0_
     if (tt is None or tt.whitespaces_before_count > 2):
         return
     if (tt.isValue(":\\/.", None) or tt.is_hiphen):
         tt = tt.next0_
     if (isinstance(tt, NumberToken)):
         tmp = io.StringIO()
         print((tt).value, end="", file=tmp)
         is_lat = LanguageHelper.isLatinChar(self.value[0])
         self.end_token = tt
         tt = tt.next0_
         first_pass3157 = True
         while True:
             if first_pass3157: first_pass3157 = False
             else: tt = tt.next0_
             if (not (tt is not None)): break
             if ((isinstance(tt, TextToken)) and tt.length_char == 1
                     and tt.chars.is_letter):
                 if (not tt.is_whitespace_before or
                     ((tt.previous is not None and tt.previous.is_hiphen))):
                     ch = (tt).term[0]
                     self.end_token = tt
                     ch2 = chr(0)
                     if (LanguageHelper.isLatinChar(ch) and not is_lat):
                         ch2 = LanguageHelper.getCyrForLat(ch)
                         if (ch2 != (chr(0))):
                             ch = ch2
                     elif (LanguageHelper.isCyrillicChar(ch) and is_lat):
                         ch2 = LanguageHelper.getLatForCyr(ch)
                         if (ch2 != (chr(0))):
                             ch = ch2
                     print(ch, end="", file=tmp)
                     continue
             break
         self.value = "{0}-{1}".format(self.value,
                                       Utils.toStringStringIO(tmp))
         self.alt_value = MiscHelper.createCyrLatAlternative(self.value)
     if (not self.end_token.is_whitespace_after
             and self.end_token.next0_ is not None
             and ((self.end_token.next0_.is_hiphen
                   or self.end_token.next0_.isCharOf("\\/")))):
         if (not self.end_token.next0_.is_whitespace_after and
             (isinstance(self.end_token.next0_.next0_, NumberToken))):
             self.end_token = self.end_token.next0_.next0_
             self.value = "{0}-{1}".format(self.value,
                                           (self.end_token).value)
             if (self.alt_value is not None):
                 self.alt_value = "{0}-{1}".format(self.alt_value,
                                                   (self.end_token).value)
Esempio n. 2
0
 def find(self, key: str) -> 'Termin':
     if (Utils.isNullOrEmpty(key)):
         return None
     li = []
     if (LanguageHelper.isLatinChar(key[0])):
         li = self.__FindInTree(key, MorphLang.EN)
     else:
         li = self.__FindInTree(key, MorphLang.RU)
         if (li is None):
             li = self.__FindInTree(key, MorphLang.UA)
     return (li[0] if li is not None and len(li) > 0 else None)
Esempio n. 3
0
 def _mergeSlots2(self, obj : 'Referent', lang : 'MorphLang') -> None:
     merge_statistic = True
     for s in obj.slots: 
         if (s.type_name == GeoReferent.ATTR_NAME or s.type_name == GeoReferent.ATTR_TYPE): 
             nam = s.value
             if (LanguageHelper.isLatinChar(nam[0])): 
                 if (not lang.is_en): 
                     continue
             elif (lang.is_en): 
                 continue
             if (LanguageHelper.endsWith(nam, " ССР")): 
                 continue
         self.addSlot(s.type_name, s.value, False, (s.count if merge_statistic else 0))
     if (self.findSlot(GeoReferent.ATTR_NAME, None, True) is None and obj.findSlot(GeoReferent.ATTR_NAME, None, True) is not None): 
         for s in obj.slots: 
             if (s.type_name == GeoReferent.ATTR_NAME): 
                 self.addSlot(s.type_name, s.value, False, (s.count if merge_statistic else 0))
     if (self.findSlot(GeoReferent.ATTR_TYPE, None, True) is None and obj.findSlot(GeoReferent.ATTR_TYPE, None, True) is not None): 
         for s in obj.slots: 
             if (s.type_name == GeoReferent.ATTR_TYPE): 
                 self.addSlot(s.type_name, s.value, False, (s.count if merge_statistic else 0))
     if (self.is_territory): 
         if (((self.alpha2 is not None or self.findSlot(GeoReferent.ATTR_TYPE, "государство", True) is not None or self.findSlot(GeoReferent.ATTR_TYPE, "держава", True) is not None) or self.findSlot(GeoReferent.ATTR_TYPE, "империя", True) is not None or self.findSlot(GeoReferent.ATTR_TYPE, "імперія", True) is not None) or self.findSlot(GeoReferent.ATTR_TYPE, "state", True) is not None): 
             s = self.findSlot(GeoReferent.ATTR_TYPE, "территория", True)
             if (s is not None): 
                 self.slots.remove(s)
     if (self.is_state): 
         for s in self.slots: 
             if (s.type_name == GeoReferent.ATTR_TYPE and ((str(s.value) == "регион" or str(s.value) == "регіон" or str(s.value) == "region"))): 
                 self.slots.remove(s)
                 break
     if (self.is_city): 
         s = Utils.ifNotNull(self.findSlot(GeoReferent.ATTR_TYPE, "город", True), Utils.ifNotNull(self.findSlot(GeoReferent.ATTR_TYPE, "місто", True), self.findSlot(GeoReferent.ATTR_TYPE, "city", True)))
         if (s is not None): 
             for ss in self.slots: 
                 if (ss.type_name == GeoReferent.ATTR_TYPE and ss != s and GeoReferent.__isCity(ss.value)): 
                     self.slots.remove(s)
                     break
     has = False
     i = 0
     while i < len(self.slots): 
         if (self.slots[i].type_name == GeoReferent.ATTR_HIGHER): 
             if (not has): 
                 has = True
             else: 
                 del self.slots[i]
                 i -= 1
         i += 1
     self._mergeExtReferents(obj)
Esempio n. 4
0
 def __toFullString(self, last_name_first : bool, lang : 'MorphLang') -> str:
     id0_ = None
     for a in self.slots: 
         if (a.type_name == PersonReferent.ATTR_IDENTITY): 
             s = str(a.value)
             if (id0_ is None or len(s) > len(id0_)): 
                 id0_ = s
     if (id0_ is not None): 
         return MiscHelper.convertFirstCharUpperAndOtherLower(id0_)
     sss = self.getStringValue("NAMETYPE")
     if (sss == "china"): 
         last_name_first = True
     n = self.getStringValue(PersonReferent.ATTR_LASTNAME)
     if (n is not None): 
         res = io.StringIO()
         if (last_name_first): 
             print("{0} ".format(n), end="", file=res, flush=True)
         s = self.__findForSurname(PersonReferent.ATTR_FIRSTNAME, n, False)
         if (s is not None): 
             print("{0}".format(s), end="", file=res, flush=True)
             if (PersonReferent.__isInitial(s)): 
                 print('.', end="", file=res)
             else: 
                 print(' ', end="", file=res)
             s = self.__findForSurname(PersonReferent.ATTR_MIDDLENAME, n, False)
             if (s is not None): 
                 print("{0}".format(s), end="", file=res, flush=True)
                 if (PersonReferent.__isInitial(s)): 
                     print('.', end="", file=res)
                 else: 
                     print(' ', end="", file=res)
         if (not last_name_first): 
             print(n, end="", file=res)
         elif (Utils.getCharAtStringIO(res, res.tell() - 1) == ' '): 
             Utils.setLengthStringIO(res, res.tell() - 1)
         if (LanguageHelper.isCyrillicChar(n[0])): 
             nl = None
             for sl in self.slots: 
                 if (sl.type_name == PersonReferent.ATTR_LASTNAME): 
                     ss = Utils.asObjectOrNull(sl.value, str)
                     if (len(ss) > 0 and LanguageHelper.isLatinChar(ss[0])): 
                         nl = ss
                         break
             if (nl is not None): 
                 nal = self.__findForSurname(PersonReferent.ATTR_FIRSTNAME, nl, False)
                 if (nal is None): 
                     print(" ({0})".format(nl), end="", file=res, flush=True)
                 elif (PersonReferent.SHOW_LASTNAME_ON_FIRST_POSITION): 
                     print(" ({0} {1})".format(nl, nal), end="", file=res, flush=True)
                 else: 
                     print(" ({0} {1})".format(nal, nl), end="", file=res, flush=True)
         return MiscHelper.convertFirstCharUpperAndOtherLower(Utils.toStringStringIO(res))
     else: 
         n = self.getStringValue(PersonReferent.ATTR_FIRSTNAME)
         if ((n) is not None): 
             s = self.__findForSurname(PersonReferent.ATTR_MIDDLENAME, n, False)
             if (s is not None): 
                 n = "{0} {1}".format(n, s)
             n = MiscHelper.convertFirstCharUpperAndOtherLower(n)
             nik = self.getStringValue(PersonReferent.ATTR_NICKNAME)
             tit = self.__findShortestKingTitul(False)
             if (tit is not None): 
                 n = "{0} {1}".format(tit, n)
             if (nik is not None): 
                 n = "{0} {1}".format(n, nik)
             return n
     return "?"
Esempio n. 5
0
 def parse(t: 'Token', max_char: int, prev: 'LineToken') -> 'LineToken':
     from pullenti.ner.TextToken import TextToken
     from pullenti.ner.NumberToken import NumberToken
     from pullenti.morph.LanguageHelper import LanguageHelper
     from pullenti.ner.core.BracketParseAttr import BracketParseAttr
     from pullenti.ner.core.BracketHelper import BracketHelper
     from pullenti.ner.decree.DecreeReferent import DecreeReferent
     if (t is None or t.end_char > max_char):
         return None
     res = ListHelper.LineToken(t, t)
     first_pass3004 = True
     while True:
         if first_pass3004: first_pass3004 = False
         else: t = t.next0_
         if (not (t is not None and t.end_char <= max_char)): break
         if (t.isChar(':')):
             if (res.is_newline_before and res.begin_token.isValue(
                     "ПРИЛОЖЕНИЕ", "ДОДАТОК")):
                 res.is_list_head = True
             res.end_token = t
             break
         if (t.isChar(';')):
             if (not t.is_whitespace_after):
                 pass
             if (t.previous is not None and (isinstance(
                     t.previous.getReferent(), DecreeReferent))):
                 if (not t.is_whitespace_after):
                     continue
                 if (t.next0_ is not None and (isinstance(
                         t.next0_.getReferent(), DecreeReferent))):
                     continue
             res.is_list_item = True
             res.end_token = t
             break
         if (t.isChar('(')):
             br = BracketHelper.tryParse(t, BracketParseAttr.NO, 100)
             if (br is not None):
                 t = br.end_token
                 res.end_token = t
                 continue
         if (t.is_newline_before and t != res.begin_token):
             next0__ = True
             if (t.previous.is_comma or t.previous.is_and
                     or t.isCharOf("(")):
                 next0__ = False
             elif (t.chars.is_letter or (isinstance(t, NumberToken))):
                 if (t.chars.is_all_lower):
                     next0__ = False
                 elif (t.previous.chars.is_letter):
                     next0__ = False
             if (next0__):
                 break
         res.end_token = t
     if (res.begin_token.is_hiphen):
         res.is_list_item = (res.begin_token.next0_ is not None
                             and not res.begin_token.next0_.is_hiphen)
     elif (res.begin_token.isCharOf("·")):
         res.is_list_item = True
         res.begin_token = res.begin_token.next0_
     elif (res.begin_token.next0_ is not None
           and ((res.begin_token.next0_.isChar(')') or
                 ((prev is not None and
                   ((prev.is_list_item or prev.is_list_head))))))):
         if (res.begin_token.length_char == 1
                 or (isinstance(res.begin_token, NumberToken))):
             res.is_list_item = True
             if ((isinstance(res.begin_token, NumberToken))
                     and (res.begin_token).int_value is not None):
                 res.number = (res.begin_token).int_value
             elif ((isinstance(res.begin_token, TextToken))
                   and res.begin_token.length_char == 1):
                 te = (res.begin_token).term
                 if (LanguageHelper.isCyrillicChar(te[0])):
                     res.number = ((ord(te[0])) - (ord('А')))
                 elif (LanguageHelper.isLatinChar(te[0])):
                     res.number = ((ord(te[0])) - (ord('A')))
     return res