Beispiel #1
0
 def addAllAbridges(self,
                    tail_len: int = 0,
                    max_first_len: int = 0,
                    min_first_len: int = 0) -> None:
     """ Добавить все сокращения (с первой буквы до любого согласного) """
     if (len(self.terms) < 1):
         return
     txt = self.terms[0].canonical_text
     if (tail_len == 0):
         for i in range(len(txt) - 2, -1, -1):
             if (not LanguageHelper.isCyrillicVowel(txt[i])):
                 if (min_first_len > 0 and (i < (min_first_len - 1))):
                     break
                 a = Termin.Abridge()
                 a.addPart(txt[0:0 + i + 1], False)
                 j = 1
                 while j < len(self.terms):
                     a.addPart(self.terms[j].canonical_text, False)
                     j += 1
                 if (self.abridges is None):
                     self.abridges = list()
                 self.abridges.append(a)
     else:
         tail = txt[len(txt) - tail_len:]
         txt = txt[0:0 + len(txt) - tail_len - 1]
         for i in range(len(txt) - 2, -1, -1):
             if (max_first_len > 0 and i >= max_first_len):
                 pass
             elif (not LanguageHelper.isCyrillicVowel(txt[i])):
                 self.addAbridge("{0}-{1}".format(txt[0:0 + i + 1], tail))
Beispiel #2
0
 def can_has_ref(self, r: 'Referent') -> bool:
     # Проверка, что этот референт может выступать в качестве ATTR_REF
     nam = self.name
     if (nam is None or r is None):
         return False
     if (isinstance(r, GeoReferent)):
         g = Utils.asObjectOrNull(r, GeoReferent)
         if (LanguageHelper.ends_with_ex(nam, "президент", "губернатор",
                                         None, None)):
             return g.is_state or g.is_region
         if (nam == "мэр" or nam == "градоначальник"):
             return g.is_city
         if (nam == "глава"):
             return True
         return False
     if (r.type_name == "ORGANIZATION"):
         if ((LanguageHelper.ends_with(nam, "губернатор") or nam == "мэр"
              or nam == "градоначальник") or nam == "президент"):
             return False
         if ("министр" in nam):
             if (r.find_slot(None, "министерство", True) is None):
                 return False
         if (nam.endswith("директор")):
             if ((r.find_slot(None, "суд", True)) is not None):
                 return False
         return True
     return False
Beispiel #3
0
 def __init__(self, v : 'MorphRuleVariant'=None, word : str=None) -> None:
     super().__init__(None)
     self.normal_full = None;
     self.normal_case = None;
     self.misc = None;
     self.undef_coef = 0
     self.tag = None;
     if (v is None): 
         return
     v.copy_to(self)
     self.misc = v.misc_info
     self.tag = (v)
     if (v.normal_tail is not None and word is not None): 
         word_begin = word
         if (LanguageHelper.ends_with(word, v.tail)): 
             word_begin = word[0:0+len(word) - len(v.tail)]
         if (len(v.normal_tail) > 0): 
             self.normal_case = (word_begin + v.normal_tail)
         else: 
             self.normal_case = word_begin
     if (v.full_normal_tail is not None and word is not None): 
         word_begin = word
         if (LanguageHelper.ends_with(word, v.tail)): 
             word_begin = word[0:0+len(word) - len(v.tail)]
         if (len(v.full_normal_tail) > 0): 
             self.normal_full = (word_begin + v.full_normal_tail)
         else: 
             self.normal_full = word_begin
Beispiel #4
0
 def _DelSurnameEnd(s : str) -> str:
     if (len(s) < 3): 
         return s
     if (LanguageHelper.endsWithEx(s, "А", "У", "Е", None)): 
         return s[0:0+len(s) - 1]
     if (LanguageHelper.endsWith(s, "ОМ") or LanguageHelper.endsWith(s, "ЫМ")): 
         return s[0:0+len(s) - 2]
     if (LanguageHelper.endsWithEx(s, "Я", "Ю", None, None)): 
         ch1 = s[len(s) - 2]
         if (ch1 == 'Н' or ch1 == 'Л'): 
             return s[0:0+len(s) - 1] + "Ь"
     return s
Beispiel #5
0
 def __findForSurname(self, attr_name : str, surname : str, find_shortest : bool=False) -> str:
     rus = LanguageHelper.isCyrillicChar(surname[0])
     res = None
     for a in self.slots: 
         if (a.type_name == attr_name): 
             v = str(a.value)
             if (LanguageHelper.isCyrillicChar(v[0]) != rus): 
                 continue
             if (res is None): 
                 res = v
             elif (find_shortest and (len(v) < len(res))): 
                 res = v
     return res
Beispiel #6
0
 def __correctModel(self) -> None:
     tt = self.end_token.next0_
     if (tt is None or tt.whitespaces_before_count > 2):
         return
     if (tt.isValue(":\\/.", None) or tt.is_hiphen):
         tt = tt.next0_
     if (isinstance(tt, NumberToken)):
         tmp = io.StringIO()
         print((tt).value, end="", file=tmp)
         is_lat = LanguageHelper.isLatinChar(self.value[0])
         self.end_token = tt
         tt = tt.next0_
         first_pass3157 = True
         while True:
             if first_pass3157: first_pass3157 = False
             else: tt = tt.next0_
             if (not (tt is not None)): break
             if ((isinstance(tt, TextToken)) and tt.length_char == 1
                     and tt.chars.is_letter):
                 if (not tt.is_whitespace_before or
                     ((tt.previous is not None and tt.previous.is_hiphen))):
                     ch = (tt).term[0]
                     self.end_token = tt
                     ch2 = chr(0)
                     if (LanguageHelper.isLatinChar(ch) and not is_lat):
                         ch2 = LanguageHelper.getCyrForLat(ch)
                         if (ch2 != (chr(0))):
                             ch = ch2
                     elif (LanguageHelper.isCyrillicChar(ch) and is_lat):
                         ch2 = LanguageHelper.getLatForCyr(ch)
                         if (ch2 != (chr(0))):
                             ch = ch2
                     print(ch, end="", file=tmp)
                     continue
             break
         self.value = "{0}-{1}".format(self.value,
                                       Utils.toStringStringIO(tmp))
         self.alt_value = MiscHelper.createCyrLatAlternative(self.value)
     if (not self.end_token.is_whitespace_after
             and self.end_token.next0_ is not None
             and ((self.end_token.next0_.is_hiphen
                   or self.end_token.next0_.isCharOf("\\/")))):
         if (not self.end_token.next0_.is_whitespace_after and
             (isinstance(self.end_token.next0_.next0_, NumberToken))):
             self.end_token = self.end_token.next0_.next0_
             self.value = "{0}-{1}".format(self.value,
                                           (self.end_token).value)
             if (self.alt_value is not None):
                 self.alt_value = "{0}-{1}".format(self.alt_value,
                                                   (self.end_token).value)
Beispiel #7
0
 def _mergeSlots2(self, obj : 'Referent', lang : 'MorphLang') -> None:
     merge_statistic = True
     for s in obj.slots: 
         if (s.type_name == GeoReferent.ATTR_NAME or s.type_name == GeoReferent.ATTR_TYPE): 
             nam = s.value
             if (LanguageHelper.isLatinChar(nam[0])): 
                 if (not lang.is_en): 
                     continue
             elif (lang.is_en): 
                 continue
             if (LanguageHelper.endsWith(nam, " ССР")): 
                 continue
         self.addSlot(s.type_name, s.value, False, (s.count if merge_statistic else 0))
     if (self.findSlot(GeoReferent.ATTR_NAME, None, True) is None and obj.findSlot(GeoReferent.ATTR_NAME, None, True) is not None): 
         for s in obj.slots: 
             if (s.type_name == GeoReferent.ATTR_NAME): 
                 self.addSlot(s.type_name, s.value, False, (s.count if merge_statistic else 0))
     if (self.findSlot(GeoReferent.ATTR_TYPE, None, True) is None and obj.findSlot(GeoReferent.ATTR_TYPE, None, True) is not None): 
         for s in obj.slots: 
             if (s.type_name == GeoReferent.ATTR_TYPE): 
                 self.addSlot(s.type_name, s.value, False, (s.count if merge_statistic else 0))
     if (self.is_territory): 
         if (((self.alpha2 is not None or self.findSlot(GeoReferent.ATTR_TYPE, "государство", True) is not None or self.findSlot(GeoReferent.ATTR_TYPE, "держава", True) is not None) or self.findSlot(GeoReferent.ATTR_TYPE, "империя", True) is not None or self.findSlot(GeoReferent.ATTR_TYPE, "імперія", True) is not None) or self.findSlot(GeoReferent.ATTR_TYPE, "state", True) is not None): 
             s = self.findSlot(GeoReferent.ATTR_TYPE, "территория", True)
             if (s is not None): 
                 self.slots.remove(s)
     if (self.is_state): 
         for s in self.slots: 
             if (s.type_name == GeoReferent.ATTR_TYPE and ((str(s.value) == "регион" or str(s.value) == "регіон" or str(s.value) == "region"))): 
                 self.slots.remove(s)
                 break
     if (self.is_city): 
         s = Utils.ifNotNull(self.findSlot(GeoReferent.ATTR_TYPE, "город", True), Utils.ifNotNull(self.findSlot(GeoReferent.ATTR_TYPE, "місто", True), self.findSlot(GeoReferent.ATTR_TYPE, "city", True)))
         if (s is not None): 
             for ss in self.slots: 
                 if (ss.type_name == GeoReferent.ATTR_TYPE and ss != s and GeoReferent.__isCity(ss.value)): 
                     self.slots.remove(s)
                     break
     has = False
     i = 0
     while i < len(self.slots): 
         if (self.slots[i].type_name == GeoReferent.ATTR_HIGHER): 
             if (not has): 
                 has = True
             else: 
                 del self.slots[i]
                 i -= 1
         i += 1
     self._mergeExtReferents(obj)
Beispiel #8
0
 def __getName(self, cyr : bool) -> str:
     name = None
     for i in range(2):
         for s in self.slots: 
             if (s.type_name == GeoReferent.ATTR_NAME): 
                 v = str(s.value)
                 if (Utils.isNullOrEmpty(v)): 
                     continue
                 if (i == 0): 
                     if (not LanguageHelper.isCyrillicChar(v[0])): 
                         if (cyr): 
                             continue
                     elif (not cyr): 
                         continue
                 if (name is None): 
                     name = v
                 elif (len(name) > len(v)): 
                     if ((len(v) < 4) and (len(name) < 10)): 
                         pass
                     elif (name[len(name) - 1] == 'В'): 
                         pass
                     else: 
                         name = v
                 elif ((len(name) < 4) and len(v) >= 4 and (len(v) < 10)): 
                     name = v
         if (name is not None): 
             break
     if (name == "МОЛДОВА"): 
         name = "МОЛДАВИЯ"
     elif (name == "БЕЛАРУСЬ"): 
         name = "БЕЛОРУССИЯ"
     return Utils.ifNotNull(name, "?")
 def try_parse(t: 'Token') -> 'PrepositionToken':
     """ Попытаться выделить предлог с указанного токена
     
     Args:
         t(Token): начальный токен
     
     Returns:
         PrepositionToken: результат или null
     """
     if (not (isinstance(t, TextToken))):
         return None
     tok = PrepositionHelper.__m_ontology.try_parse(t, TerminParseAttr.NO)
     if (tok is not None):
         return PrepositionToken._new529(t, tok.end_token,
                                         tok.termin.canonic_text,
                                         tok.termin.tag)
     mc = t.get_morph_class_in_dictionary()
     if (not mc.is_preposition):
         return None
     res = PrepositionToken(t, t)
     res.normal = t.get_normal_case_text(MorphClass.PREPOSITION,
                                         MorphNumber.UNDEFINED,
                                         MorphGender.UNDEFINED, False)
     res.next_case = LanguageHelper.get_case_after_preposition(res.normal)
     if ((t.next0_ is not None and t.next0_.is_hiphen
          and not t.is_whitespace_after)
             and (isinstance(t.next0_.next0_, TextToken)) and
             t.next0_.next0_.get_morph_class_in_dictionary().is_preposition
         ):
         res.end_token = t.next0_.next0_
     return res
Beispiel #10
0
 def tryParse(t : 'Token', typ : 'NounPhraseParseAttr'=NounPhraseParseAttr.NO, max_char_pos : int=0) -> 'NounPhraseToken':
     """ Попробовать создать именную группу с указанного токена
     
     Args:
         t(Token): начальный токен
         typ(NounPhraseParseAttr): параметры (можно битовую маску)
         max_char_pos(int): максимальная позиция в тексте, до которой выделять, если 0, то без ограничений
     
     Returns:
         NounPhraseToken: именная группа или null
     """
     from pullenti.ner.core._NounPraseHelperInt import _NounPraseHelperInt
     res = _NounPraseHelperInt.tryParse(t, typ, max_char_pos)
     if (res is not None): 
         return res
     if ((((typ) & (NounPhraseParseAttr.PARSEPREPOSITION))) != (NounPhraseParseAttr.NO)): 
         if ((isinstance(t, TextToken)) and t.morph.class0_.is_preposition and (t.whitespaces_after_count < 3)): 
             res = _NounPraseHelperInt.tryParse(t.next0_, typ, max_char_pos)
             if (res is not None): 
                 mc = LanguageHelper.getCaseAfterPreposition((t).lemma)
                 res.preposition = t
                 res.begin_token = t
                 if (not ((mc) & res.morph.case_).is_undefined): 
                     res.morph.removeItems(mc, False)
                 elif (t.morph.class0_.is_adverb): 
                     return None
                 return res
     return None
Beispiel #11
0
 def toString(self,
              short_variant: bool,
              lang: 'MorphLang' = None,
              lev: int = 0) -> str:
     nam = None
     for l_ in range(2):
         for s in self.slots:
             if (((s.type_name == UnitReferent.ATTR_NAME and short_variant))
                     or ((s.type_name == UnitReferent.ATTR_FULLNAME
                          and not short_variant))):
                 val = Utils.asObjectOrNull(s.value, str)
                 if (lang is not None and l_ == 0):
                     if (lang.is_ru != LanguageHelper.isCyrillic(val)):
                         continue
                 nam = val
                 break
         if (nam is not None):
             break
     if (nam is None):
         nam = self.getStringValue(UnitReferent.ATTR_NAME)
     pow0_ = self.getStringValue(UnitReferent.ATTR_POW)
     if (Utils.isNullOrEmpty(pow0_) or lev > 0):
         return Utils.ifNotNull(nam, "?")
     res = ("{0}{1}".format(nam, pow0_) if
            (pow0_[0] != '-') else "{0}<{1}>".format(nam, pow0_))
     if (not short_variant and self.is_unknown):
         res = ("(?)" + res)
     return res
Beispiel #12
0
 def __find_in_tree(self, key : str, lang : 'MorphLang') -> typing.List['Termin']:
     if (key is None): 
         return None
     nod = self.__get_root(lang, ((lang is None or lang.is_undefined)) and LanguageHelper.is_latin(key))
     i = 0
     while i < len(key): 
         ch = ord(key[i])
         nn = None
         if (nod.children is not None): 
             wrapnn582 = RefOutArgWrapper(None)
             Utils.tryGetValue(nod.children, ch, wrapnn582)
             nn = wrapnn582.value
         if (nn is None): 
             if (ch == (32)): 
                 if (nod.termins is not None): 
                     pp = Utils.splitString(key, ' ', False)
                     res = None
                     for t in nod.termins: 
                         if (len(t.terms) == len(pp)): 
                             k = 0
                             k = 1
                             while k < len(pp): 
                                 if (not pp[k] in t.terms[k].variants): 
                                     break
                                 k += 1
                             if (k >= len(pp)): 
                                 if (res is None): 
                                     res = list()
                                 res.append(t)
                     return res
             return None
         nod = nn
         i += 1
     return nod.termins
Beispiel #13
0
 def get_doc_types(name : str, name2 : str) -> typing.List[str]:
     res = list()
     if (name is None): 
         return res
     if (name == "АРЕНДОДАТЕЛЬ"): 
         res.append("ДОГОВОР АРЕНДЫ")
         res.append("ДОГОВОР СУБАРЕНДЫ")
     elif (name == "АРЕНДАТОР"): 
         res.append("ДОГОВОР АРЕНДЫ")
     elif (name == "СУБАРЕНДАТОР"): 
         res.append("ДОГОВОР СУБАРЕНДЫ")
     elif (name == "НАЙМОДАТЕЛЬ" or name == "НАНИМАТЕЛЬ"): 
         res.append("ДОГОВОР НАЙМА")
     elif (name == "АГЕНТ" or name == "ПРИНЦИПАЛ"): 
         res.append("АГЕНТСКИЙ ДОГОВОР")
     elif (name == "ПРОДАВЕЦ" or name == "ПОКУПАТЕЛЬ"): 
         res.append("ДОГОВОР КУПЛИ-ПРОДАЖИ")
     elif (name == "ЗАКАЗЧИК" or name == "ИСПОЛНИТЕЛЬ" or LanguageHelper.ends_with(name, "ПОДРЯДЧИК")): 
         res.append("ДОГОВОР УСЛУГ")
     elif (name == "ПОСТАВЩИК"): 
         res.append("ДОГОВОР ПОСТАВКИ")
     elif (name == "ЛИЦЕНЗИАР" or name == "ЛИЦЕНЗИАТ"): 
         res.append("ЛИЦЕНЗИОННЫЙ ДОГОВОР")
     elif (name == "СТРАХОВЩИК" or name == "СТРАХОВАТЕЛЬ"): 
         res.append("ДОГОВОР СТРАХОВАНИЯ")
     if (name2 is None): 
         return res
     tmp = ParticipantToken.get_doc_types(name2, None)
     for i in range(len(res) - 1, -1, -1):
         if (not res[i] in tmp): 
             del res[i]
     return res
Beispiel #14
0
 def __try_attach_moscowao(li: typing.List['TerrItemToken'],
                           ad: 'AnalyzerData') -> 'ReferentToken':
     if (li[0].termin_item is None
             or not li[0].termin_item.is_moscow_region):
         return None
     if (li[0].is_doubt):
         ok = False
         if (CityAttachHelper.check_city_after(li[0].end_token.next0_)):
             ok = True
         else:
             ali = AddressItemToken.try_parse_list(li[0].end_token.next0_,
                                                   None, 2)
             if (ali is not None and len(ali) > 0
                     and ali[0].typ == AddressItemToken.ItemType.STREET):
                 ok = True
         if (not ok):
             return None
     reg = GeoReferent()
     typ = "АДМИНИСТРАТИВНЫЙ ОКРУГ"
     reg._add_typ(typ)
     name = li[0].termin_item.canonic_text
     if (LanguageHelper.ends_with(name, typ)):
         name = name[0:0 + len(name) - len(typ) - 1].strip()
     reg._add_name(name)
     return ReferentToken(reg, li[0].begin_token, li[0].end_token)
 def to_string(self,
               short_variant: bool,
               lang: 'MorphLang' = None,
               lev: int = 0) -> str:
     res = io.StringIO()
     str0_ = None
     for s in self.slots:
         if (s.type_name == WeaponReferent.ATTR_TYPE):
             n = s.value
             if (str0_ is None or (len(n) < len(str0_))):
                 str0_ = n
     if (str0_ is not None):
         print(str0_.lower(), end="", file=res)
     str0_ = self.get_string_value(WeaponReferent.ATTR_BRAND)
     if ((str0_) is not None):
         print(" {0}".format(
             MiscHelper.convert_first_char_upper_and_other_lower(str0_)),
               end="",
               file=res,
               flush=True)
     str0_ = self.get_string_value(WeaponReferent.ATTR_MODEL)
     if ((str0_) is not None):
         print(" {0}".format(str0_), end="", file=res, flush=True)
     str0_ = self.get_string_value(WeaponReferent.ATTR_NAME)
     if ((str0_) is not None):
         print(" \"{0}\"".format(
             MiscHelper.convert_first_char_upper_and_other_lower(str0_)),
               end="",
               file=res,
               flush=True)
         for s in self.slots:
             if (s.type_name == WeaponReferent.ATTR_NAME and str0_ !=
                 (s.value)):
                 if (LanguageHelper.is_cyrillic_char(str0_[0]) !=
                         LanguageHelper.is_cyrillic_char(s.value[0])):
                     print(" ({0})".format(
                         MiscHelper.
                         convert_first_char_upper_and_other_lower(s.value)),
                           end="",
                           file=res,
                           flush=True)
                     break
     str0_ = self.get_string_value(WeaponReferent.ATTR_NUMBER)
     if ((str0_) is not None):
         print(", номер {0}".format(str0_), end="", file=res, flush=True)
     return Utils.toStringStringIO(res)
Beispiel #16
0
 def mergeSlots(self,
                obj: 'Referent',
                merge_statistic: bool = True) -> None:
     ph = Utils.asObjectOrNull(obj, PhoneReferent)
     if (ph is None):
         return
     if (ph.country_code is not None and self.country_code is None):
         self.country_code = ph.country_code
     if (ph.number is not None
             and LanguageHelper.endsWith(ph.number, self.number)):
         self.number = ph.number
Beispiel #17
0
 def find(self, key : str) -> 'Termin':
     if (Utils.isNullOrEmpty(key)): 
         return None
     li = [ ]
     if (LanguageHelper.is_latin_char(key[0])): 
         li = self.__find_in_tree(key, MorphLang.EN)
     else: 
         li = self.__find_in_tree(key, MorphLang.RU)
         if (li is None): 
             li = self.__find_in_tree(key, MorphLang.UA)
     return (li[0] if li is not None and len(li) > 0 else None)
Beispiel #18
0
 def get_lemma(self) -> str:
     """ Лемма (вариант морфологической нормализации) """
     if (self.__m_lemma is not None):
         return self.__m_lemma
     res = None
     if (self.word_forms is not None and len(self.word_forms) > 0):
         if (len(self.word_forms) == 1):
             res = (Utils.ifNotNull(self.word_forms[0].normal_full,
                                    self.word_forms[0].normal_case))
         if (res is None and not self.char_info.is_all_lower):
             for m in self.word_forms:
                 if (m.class0_.is_proper_surname):
                     s = Utils.ifNotNull(m.normal_full,
                                         Utils.ifNotNull(m.normal_case, ""))
                     if (LanguageHelper.ends_with_ex(
                             s, "ОВ", "ЕВ", None, None)):
                         res = s
                         break
                 elif (m.class0_.is_proper_name and m.is_in_dictionary):
                     return m.normal_case
         if (res is None):
             best = None
             for m in self.word_forms:
                 if (best is None):
                     best = m
                 elif (self.__compare_forms(best, m) > 0):
                     best = m
             res = (Utils.ifNotNull(best.normal_full, best.normal_case))
     if (res is not None):
         if (LanguageHelper.ends_with_ex(res, "АНЫЙ", "ЕНЫЙ", None, None)):
             res = (res[0:0 + len(res) - 3] + "ННЫЙ")
         elif (LanguageHelper.ends_with(res, "ЙСЯ")):
             res = res[0:0 + len(res) - 2]
         elif (LanguageHelper.ends_with(res, "АНИЙ") and res == self.term):
             for wf in self.word_forms:
                 if (wf.is_in_dictionary):
                     return res
             return res[0:0 + len(res) - 1] + "Е"
         return res
     return Utils.ifNotNull(self.term, "?")
 def remove_items_by_preposition(self, prep: 'Token') -> None:
     """ Убрать элементы, не соответствующие по падежу предлогу
     
     Args:
         prep(Token): 
     """
     from pullenti.ner.TextToken import TextToken
     if (not (isinstance(prep, TextToken))):
         return
     mc = LanguageHelper.get_case_after_preposition(prep.lemma)
     if (((mc) & self.case_).is_undefined):
         return
     self.remove_items(mc, False)
Beispiel #20
0
 def correctWordByMorph(self, word: str, lang: 'MorphLang') -> str:
     if (LanguageHelper.isCyrillicChar(word[0])):
         if (lang is not None):
             if (InnerMorphology.M_ENGINE_RU.language.is_ru and lang.is_ru):
                 return InnerMorphology.M_ENGINE_RU.correctWordByMorph(word)
             if (InnerMorphology.M_ENGINE_UA.language.is_ua and lang.is_ua):
                 return InnerMorphology.M_ENGINE_UA.correctWordByMorph(word)
             if (InnerMorphology.M_ENGINE_BY.language.is_by and lang.is_by):
                 return InnerMorphology.M_ENGINE_BY.correctWordByMorph(word)
             if (InnerMorphology.M_ENGINE_KZ.language.is_kz and lang.is_kz):
                 return InnerMorphology.M_ENGINE_KZ.correctWordByMorph(word)
         return InnerMorphology.M_ENGINE_RU.correctWordByMorph(word)
     else:
         return InnerMorphology.M_ENGINE_EN.correctWordByMorph(word)
Beispiel #21
0
 def add(self, tail: str, var: 'MorphRuleVariant') -> None:
     tail = LanguageHelper.correctWord(tail)
     if (var.class0_.is_undefined):
         pass
     li = []
     wrapli34 = RefOutArgWrapper(None)
     inoutres35 = Utils.tryGetValue(self.variants, tail, wrapli34)
     li = wrapli34.value
     if (not inoutres35):
         li = list()
         self.variants[tail] = li
     var.tail = tail
     li.append(var)
     var.rule = self
Beispiel #22
0
 def correct_word_by_morph(self, word: str, lang: 'MorphLang') -> str:
     if (LanguageHelper.is_cyrillic_char(word[0])):
         if (lang is not None):
             if (self.__m_engine_ru.language.is_ru and lang.is_ru):
                 return self.__m_engine_ru.correct_word_by_morph(word)
             if (self.__m_engine_ua.language.is_ua and lang.is_ua):
                 return self.__m_engine_ua.correct_word_by_morph(word)
             if (self.__m_engine_by.language.is_by and lang.is_by):
                 return self.__m_engine_by.correct_word_by_morph(word)
             if (self.__m_engine_kz.language.is_kz and lang.is_kz):
                 return self.__m_engine_kz.correct_word_by_morph(word)
         return self.__m_engine_ru.correct_word_by_morph(word)
     else:
         return self.__m_engine_en.correct_word_by_morph(word)
Beispiel #23
0
 def get_all_wordforms(self, word: str,
                       lang: 'MorphLang') -> typing.List['MorphWordForm']:
     if (LanguageHelper.is_cyrillic_char(word[0])):
         if (lang is not None):
             if (self.__m_engine_ru.language.is_ru and lang.is_ru):
                 return self.__m_engine_ru.get_all_wordforms(word)
             if (self.__m_engine_ua.language.is_ua and lang.is_ua):
                 return self.__m_engine_ua.get_all_wordforms(word)
             if (self.__m_engine_by.language.is_by and lang.is_by):
                 return self.__m_engine_by.get_all_wordforms(word)
             if (self.__m_engine_kz.language.is_kz and lang.is_kz):
                 return self.__m_engine_kz.get_all_wordforms(word)
         return self.__m_engine_ru.get_all_wordforms(word)
     else:
         return self.__m_engine_en.get_all_wordforms(word)
Beispiel #24
0
 def getAllWordforms(self, word: str,
                     lang: 'MorphLang') -> typing.List['MorphWordForm']:
     if (LanguageHelper.isCyrillicChar(word[0])):
         if (lang is not None):
             if (InnerMorphology.M_ENGINE_RU.language.is_ru and lang.is_ru):
                 return InnerMorphology.M_ENGINE_RU.getAllWordforms(word)
             if (InnerMorphology.M_ENGINE_UA.language.is_ua and lang.is_ua):
                 return InnerMorphology.M_ENGINE_UA.getAllWordforms(word)
             if (InnerMorphology.M_ENGINE_BY.language.is_by and lang.is_by):
                 return InnerMorphology.M_ENGINE_BY.getAllWordforms(word)
             if (InnerMorphology.M_ENGINE_KZ.language.is_kz and lang.is_kz):
                 return InnerMorphology.M_ENGINE_KZ.getAllWordforms(word)
         return InnerMorphology.M_ENGINE_RU.getAllWordforms(word)
     else:
         return InnerMorphology.M_ENGINE_EN.getAllWordforms(word)
Beispiel #25
0
 def canBeGeneralFor(self, obj: 'Referent') -> bool:
     if (not self.__canBeEqual(obj, Referent.EqualType.WITHINONETEXT,
                               True)):
         return False
     ph = Utils.asObjectOrNull(obj, PhoneReferent)
     if (self.country_code is not None and ph.country_code is None):
         return False
     if (self.add_number is None):
         if (ph.add_number is not None):
             return True
     elif (ph.add_number is None):
         return False
     if (LanguageHelper.endsWith(ph.number, self.number)):
         return True
     return False
Beispiel #26
0
 def __canBeEqual(self, obj: 'Referent', typ: 'EqualType',
                  ignore_add_number: bool) -> bool:
     ph = Utils.asObjectOrNull(obj, PhoneReferent)
     if (ph is None):
         return False
     if (ph.country_code is not None and self.country_code is not None):
         if (ph.country_code != self.country_code):
             return False
     if (ignore_add_number):
         if (self.add_number is not None and ph.add_number is not None):
             if (ph.add_number != self.add_number):
                 return False
     elif (self.add_number is not None or ph.add_number is not None):
         if (self.add_number != ph.add_number):
             return False
     if (self.number is None or ph.number is None):
         return False
     if (self.number == ph.number):
         return True
     if (typ != Referent.EqualType.DIFFERENTTEXTS):
         if (LanguageHelper.endsWith(self.number, ph.number)
                 or LanguageHelper.endsWith(ph.number, self.number)):
             return True
     return False
 def set_shortest_canonical_text(
         self, ignore_termins_with_notnull_tags: bool = False) -> None:
     self.__m_canonic_text = (None)
     for t in self.termins:
         if (ignore_termins_with_notnull_tags and t.tag is not None):
             continue
         if (len(t.terms) == 0):
             continue
         s = t.canonic_text
         if (not LanguageHelper.is_cyrillic_char(s[0])):
             continue
         if (self.__m_canonic_text is None):
             self.__m_canonic_text = s
         elif (len(s) < len(self.__m_canonic_text)):
             self.__m_canonic_text = s
Beispiel #28
0
 def __FindInTree(self, key: str,
                  lang: 'MorphLang') -> typing.List['Termin']:
     if (key is None):
         return None
     nod = self.__getRoot(lang, ((lang is None or lang.is_undefined))
                          and LanguageHelper.isLatin(key))
     i = 0
     while i < len(key):
         ch = ord(key[i])
         if (nod.children is None):
             return None
         wrapnn616 = RefOutArgWrapper(None)
         inoutres617 = Utils.tryGetValue(nod.children, ch, wrapnn616)
         nn = wrapnn616.value
         if (not inoutres617):
             return None
         nod = nn
         i += 1
     return nod.termins
Beispiel #29
0
 def __remove_from_tree(self, key : str, t : 'Termin') -> None:
     if (key is None): 
         return
     nod = self.__get_root(t.lang, t.lang.is_undefined and LanguageHelper.is_latin(key))
     i = 0
     while i < len(key): 
         ch = ord(key[i])
         if (nod.children is None): 
             return
         nn = None
         wrapnn580 = RefOutArgWrapper(None)
         inoutres581 = Utils.tryGetValue(nod.children, ch, wrapnn580)
         nn = wrapnn580.value
         if (not inoutres581): 
             return
         nod = nn
         i += 1
     if (nod.termins is None): 
         return
     if (t in nod.termins): 
         nod.termins.remove(t)
Beispiel #30
0
 def create_question(li : 'NGItem') -> str:
     res = (Utils.ifNotNull(li.source.prep, "")).lower()
     if (len(res) > 0): 
         res += " "
     cas = li.source.source.morph.case_
     if (not Utils.isNullOrEmpty(li.source.prep)): 
         cas1 = LanguageHelper.get_case_after_preposition(li.source.prep)
         if (not cas1.is_undefined): 
             if (not ((cas1) & cas).is_undefined): 
                 cas = ((cas) & cas1)
     if (cas.is_genitive): 
         res += "чего"
     elif (cas.is_instrumental): 
         res += "чем"
     elif (cas.is_dative): 
         res += "чему"
     elif (cas.is_accusative): 
         res += "что"
     elif (cas.is_prepositional): 
         res += "чём"
     return res