Esempio n. 1
0
 def createNumber(owner : 'FragToken', itok : 'InstrToken1') -> None:
     """ Создать результирующий узел, представляющий номер
     
     Args:
         owner(FragToken): 
         itok(InstrToken1): 
     """
     from pullenti.ner.instrument.internal.FragToken import FragToken
     if (itok.num_begin_token is None or itok.num_end_token is None): 
         return
     num = FragToken._new1456(itok.num_begin_token, itok.num_end_token, InstrumentKind.NUMBER, True, itok)
     owner.children.append(num)
     if (itok.num_typ == NumberTypes.TWODIGITS): 
         owner.number = itok.first_number
         owner.sub_number = itok.last_number
     elif (itok.num_typ == NumberTypes.THREEDIGITS): 
         owner.number = itok.first_number
         owner.sub_number = itok.middle_number
         owner.sub_number2 = itok.last_number
     elif (itok.num_typ == NumberTypes.FOURDIGITS and len(itok.numbers) == 4): 
         owner.number = itok.first_number
         owner.sub_number = PartToken.getNumber(itok.numbers[1])
         owner.sub_number2 = PartToken.getNumber(itok.numbers[2])
         owner.sub_number3 = itok.last_number
     else: 
         owner.number = itok.last_number
     owner.min_number = itok.last_min_number
     owner._itok = itok
Esempio n. 2
0
 def process(self, kit: 'AnalysisKit') -> None:
     from pullenti.ner.instrument.internal.FragToken import FragToken
     t = kit.first_token
     t1 = t
     if (t is None):
         return
     dfr = FragToken.createDocument(t, 0, InstrumentKind.UNDEFINED)
     if (dfr is None):
         return
     ad = kit.getAnalyzerData(self)
     res = dfr.createReferent(ad)
Esempio n. 3
0
 def create_number(owner : 'FragToken', itok : 'InstrToken1') -> None:
     if (itok.num_begin_token is None or itok.num_end_token is None): 
         return
     num = FragToken._new1562(itok.num_begin_token, itok.num_end_token, InstrumentKind.NUMBER, True, itok)
     owner.children.append(num)
     if (itok.num_typ == NumberTypes.TWODIGITS): 
         owner.number = itok.first_number
         owner.sub_number = itok.last_number
     elif (itok.num_typ == NumberTypes.THREEDIGITS): 
         owner.number = itok.first_number
         owner.sub_number = itok.middle_number
         owner.sub_number2 = itok.last_number
     elif (itok.num_typ == NumberTypes.FOURDIGITS and len(itok.numbers) == 4): 
         owner.number = itok.first_number
         owner.sub_number = PartToken.get_number(itok.numbers[1])
         owner.sub_number2 = PartToken.get_number(itok.numbers[2])
         owner.sub_number3 = itok.last_number
     else: 
         owner.number = itok.last_number
     owner.min_number = itok.last_min_number
     owner._itok = itok
Esempio n. 4
0
 def __analize_list_items(chi : typing.List['FragToken'], ind : int) -> int:
     if (ind >= len(chi)): 
         return -1
     res = chi[ind]
     ki = res.kind
     if (((ki == InstrumentKind.CHAPTER or ki == InstrumentKind.CLAUSE or ki == InstrumentKind.CONTENT) or ki == InstrumentKind.ITEM or ki == InstrumentKind.SUBITEM) or ki == InstrumentKind.CLAUSEPART or ki == InstrumentKind.INDENTION): 
         pass
     else: 
         return -1
     if (res.has_changes and res.multiline_changes_value is not None): 
         ci = res.multiline_changes_value
         cit = FragToken._new1340(ci.begin_token, ci.end_token, InstrumentKind.CITATION)
         res.children.append(cit)
         if (BracketHelper.is_bracket(cit.begin_token.previous, True)): 
             cit.begin_token = cit.begin_token.previous
         if (BracketHelper.is_bracket(cit.end_token.next0_, True)): 
             cit.end_token = cit.end_token.next0_
             if (cit.end_token.next0_ is not None and cit.end_token.next0_.is_char_of(";.")): 
                 cit.end_token = cit.end_token.next0_
         res.fill_by_content_children()
         if (res.children[0].has_changes): 
             pass
         cit_kind = InstrumentKind.UNDEFINED
         if (isinstance(ci.tag, DecreeChangeReferent)): 
             dcr = Utils.asObjectOrNull(ci.tag, DecreeChangeReferent)
             if (dcr.value is not None and len(dcr.value.new_items) > 0): 
                 mnem = dcr.value.new_items[0]
                 i = 0
                 i = mnem.find(' ')
                 if (((i)) > 0): 
                     mnem = mnem[0:0+i]
                 cit_kind = PartToken._get_instr_kind_by_typ(PartToken._get_type_by_attr_name(mnem))
             elif (len(dcr.owners) > 0 and (isinstance(dcr.owners[0], DecreePartReferent)) and dcr.kind == DecreeChangeKind.NEW): 
                 pat = Utils.asObjectOrNull(dcr.owners[0], DecreePartReferent)
                 min0_ = 0
                 for s in pat.slots: 
                     ty = PartToken._get_type_by_attr_name(s.type_name)
                     if (ty == PartToken.ItemType.UNDEFINED): 
                         continue
                     l_ = PartToken._get_rank(ty)
                     if (l_ == 0): 
                         continue
                     if (l_ > min0_ or min0_ == 0): 
                         min0_ = l_
                         cit_kind = PartToken._get_instr_kind_by_typ(ty)
         sub = None
         if (cit_kind != InstrumentKind.UNDEFINED and cit_kind != InstrumentKind.APPENDIX): 
             sub = FragToken(ci.begin_token, ci.end_token)
             wr = ContentAnalyzeWhapper()
             wr.analyze(sub, None, True, cit_kind)
             sub.kind = InstrumentKind.CONTENT
         else: 
             sub = FragToken.create_document(ci.begin_token, ci.end_char, cit_kind)
         if (sub is None or len(sub.children) == 0): 
             pass
         elif ((sub.kind == InstrumentKind.CONTENT and len(sub.children) > 0 and sub.children[0].begin_token == sub.begin_token) and sub.children[len(sub.children) - 1].end_token == sub.end_token): 
             cit.children.extend(sub.children)
         else: 
             cit.children.append(sub)
         return 1
     end_char = res.end_char
     if (res._itok is None): 
         res._itok = InstrToken1.parse(res.begin_token, True, None, 0, None, False, res.end_char, False, False)
     lines = ListHelper.LineToken.parse_list(res.begin_token, end_char, None)
     if (lines is None or (len(lines) < 1)): 
         return -1
     ret = 1
     if (res.kind == InstrumentKind.CONTENT): 
         j = ind + 1
         while j < len(chi): 
             if (chi[j].kind == InstrumentKind.CONTENT): 
                 lines2 = ListHelper.LineToken.parse_list(chi[j].begin_token, chi[j].end_char, lines[len(lines) - 1])
                 if (lines2 is None or (len(lines2) < 1)): 
                     break
                 if (not lines2[0].is_list_item): 
                     if ((len(lines2) > 1 and lines2[1].is_list_item and lines2[0].end_token.is_char_of(":")) and not lines2[0].begin_token.chars.is_capital_upper): 
                         lines2[0].is_list_item = True
                     else: 
                         break
                 lines.extend(lines2)
                 ret = ((j - ind) + 1)
             elif (chi[j].kind != InstrumentKind.EDITIONS and chi[j].kind != InstrumentKind.COMMENT): 
                 break
             j += 1
     if (len(lines) < 2): 
         return -1
     if ((len(lines) > 1 and lines[0].is_list_item and lines[1].is_list_item) and lines[0].number != 1): 
         if (len(lines) == 2 or not lines[2].is_list_item): 
             lines[1].is_list_item = False
             lines[0].is_list_item = lines[1].is_list_item
     i = 0
     first_pass3276 = True
     while True:
         if first_pass3276: first_pass3276 = False
         else: i += 1
         if (not (i < len(lines))): break
         if (lines[i].is_list_item): 
             if (i > 0 and lines[i - 1].is_list_item): 
                 continue
             if (((i + 1) < len(lines)) and lines[i + 1].is_list_item): 
                 pass
             else: 
                 lines[i].is_list_item = False
                 continue
             j = 0
             new_line = False
             j = (i + 1)
             while j < len(lines): 
                 if (not lines[j].is_list_item): 
                     break
                 elif (lines[j].is_newline_before): 
                     new_line = True
                 j += 1
             if (new_line): 
                 continue
             if (i > 0 and lines[i - 1].end_token.is_char(':')): 
                 continue
             j = i
             while j < len(lines): 
                 if (not lines[j].is_list_item): 
                     break
                 else: 
                     lines[j].is_list_item = False
                 j += 1
     if (len(lines) > 2): 
         last = lines[len(lines) - 1]
         last2 = lines[len(lines) - 2]
         if ((not last.is_list_item and last.end_token.is_char('.') and last2.is_list_item) and last2.end_token.is_char(';')): 
             if ((last.length_char < (last2.length_char * 2)) or last.begin_token.chars.is_all_lower): 
                 last.is_list_item = True
     i = 0
     while i < (len(lines) - 1): 
         if (not lines[i].is_list_item and not lines[i + 1].is_list_item): 
             if (((i + 2) < len(lines)) and lines[i + 2].is_list_item and lines[i + 1].end_token.is_char(':')): 
                 pass
             else: 
                 lines[i].end_token = lines[i + 1].end_token
                 del lines[i + 1]
                 i -= 1
         i += 1
     i = 0
     while i < (len(lines) - 1): 
         if (lines[i].is_list_item): 
             if (lines[i].number == 1): 
                 ok = True
                 num = 1
                 nonum = 0
                 j = i + 1
                 while j < len(lines): 
                     if (not lines[j].is_list_item): 
                         ok = False
                         break
                     elif (lines[j].number > 0): 
                         num += 1
                         if (lines[j].number != num): 
                             ok = False
                             break
                     else: 
                         nonum += 1
                     j += 1
                 if (not ok or nonum == 0 or (num < 2)): 
                     break
                 lt = lines[i]
                 j = i + 1
                 while j < len(lines): 
                     if (lines[j].number > 0): 
                         lt = lines[j]
                     else: 
                         chli = Utils.asObjectOrNull(lt.tag, list)
                         if (chli is None): 
                             chli = list()
                             lt.tag = (chli)
                         lt.end_token = lines[j].end_token
                         chli.append(lines[j])
                         del lines[j]
                         j -= 1
                     j += 1
         i += 1
     cou = 0
     for li in lines: 
         if (li.is_list_item): 
             cou += 1
     if (cou < 2): 
         return -1
     i = 0
     first_pass3277 = True
     while True:
         if first_pass3277: first_pass3277 = False
         else: i += 1
         if (not (i < len(lines))): break
         if (lines[i].is_list_item): 
             i0 = i
             ok = True
             cou = 1
             while i < len(lines): 
                 if (not lines[i].is_list_item): 
                     break
                 elif (lines[i].number != cou): 
                     ok = False
                 i += 1; cou += 1
             if (not ok): 
                 i = i0
                 while i < len(lines): 
                     if (not lines[i].is_list_item): 
                         break
                     else: 
                         lines[i].number = 0
                     i += 1
             if (cou > 3 and lines[i0].begin_token.get_source_text() != lines[i0 + 1].begin_token.get_source_text() and lines[i0 + 1].begin_token.get_source_text() == lines[i0 + 2].begin_token.get_source_text()): 
                 pref = lines[i0 + 1].begin_token.get_source_text()
                 ok = True
                 j = i0 + 2
                 while j < i: 
                     if (pref != lines[j].begin_token.get_source_text()): 
                         ok = False
                         break
                     j += 1
                 if (not ok): 
                     continue
                 tt = None
                 ok = False
                 tt = lines[i0].end_token.previous
                 while tt is not None and tt != lines[i0].begin_token: 
                     if (tt.get_source_text() == pref): 
                         ok = True
                         break
                     tt = tt.previous
                 if (ok): 
                     li0 = ListHelper.LineToken(lines[i0].begin_token, tt.previous)
                     lines[i0].begin_token = tt
                     lines.insert(i0, li0)
                     i += 1
     for li in lines: 
         li.correct_begin_token()
         ch = FragToken._new1357(li.begin_token, li.end_token, (InstrumentKind.LISTITEM if li.is_list_item else InstrumentKind.CONTENT), li.number)
         if (ch.kind == InstrumentKind.CONTENT and ch.end_token.is_char(':')): 
             ch.kind = InstrumentKind.LISTHEAD
         res.children.append(ch)
         chli = Utils.asObjectOrNull(li.tag, list)
         if (chli is not None): 
             for lt in chli: 
                 ch.children.append(FragToken._new1340(lt.begin_token, lt.end_token, InstrumentKind.LISTITEM))
             if (ch.begin_char < ch.children[0].begin_char): 
                 ch.children.insert(0, FragToken._new1340(ch.begin_token, ch.children[0].begin_token.previous, InstrumentKind.CONTENT))
     return ret
Esempio n. 5
0
 def analyze(res : 'FragToken') -> None:
     if (res.number == 4): 
         pass
     if (len(res.children) == 0): 
         ki = res.kind
         if (((ki == InstrumentKind.CHAPTER or ki == InstrumentKind.CLAUSE or ki == InstrumentKind.CONTENT) or ki == InstrumentKind.ITEM or ki == InstrumentKind.SUBITEM) or ki == InstrumentKind.CLAUSEPART or ki == InstrumentKind.INDENTION): 
             tmp = list()
             tmp.append(res)
             ListHelper.__analize_list_items(tmp, 0)
         return
     if (res.kind == InstrumentKind.CLAUSE and res.number == 12): 
         pass
     i = 0
     first_pass3273 = True
     while True:
         if first_pass3273: first_pass3273 = False
         else: i += 1
         if (not (i < len(res.children))): break
         if (res.children[i].kind == InstrumentKind.INDENTION and ((res.children[i].end_token.is_char_of(":;") or ((((i + 1) < len(res.children)) and res.children[i + 1].kind == InstrumentKind.EDITIONS and res.children[i + 1].end_token.is_char_of(":;")))))): 
             j = 0
             cou = 1
             list_bullet = chr(0)
             j = (i + 1)
             first_pass3274 = True
             while True:
                 if first_pass3274: first_pass3274 = False
                 else: j += 1
                 if (not (j < len(res.children))): break
                 ch = res.children[j]
                 if (ch.kind == InstrumentKind.COMMENT or ch.kind == InstrumentKind.EDITIONS): 
                     continue
                 if (ch.kind != InstrumentKind.INDENTION): 
                     break
                 if (ch.end_token.is_char_of(";") or ((((j + 1) < len(res.children)) and res.children[j + 1].kind == InstrumentKind.EDITIONS and res.children[j + 1].end_token.is_char(';')))): 
                     cou += 1
                     if ((isinstance(ch.begin_token, TextToken)) and not ch.chars.is_letter): 
                         list_bullet = ch.kit.get_text_character(ch.begin_char)
                     continue
                 if (ch.end_token.is_char_of(".")): 
                     cou += 1
                     j += 1
                     break
                 if (ch.end_token.is_char_of(":")): 
                     if ((ord(list_bullet)) != 0 and ch.begin_token.is_char(list_bullet)): 
                         tt = ch.begin_token.next0_
                         while tt is not None and (tt.end_char < ch.end_char): 
                             if (tt.previous.is_char('.') and MiscHelper.can_be_start_of_sentence(tt)): 
                                 ch2 = FragToken._new1357(tt, ch.end_token, InstrumentKind.INDENTION, ch.number)
                                 ch.end_token = tt.previous
                                 res.children.insert(j + 1, ch2)
                                 k = j + 1
                                 while k < len(res.children): 
                                     if (res.children[k].kind == InstrumentKind.INDENTION): 
                                         res.children[k].number += 1
                                     k += 1
                                 cou += 1
                                 j += 1
                                 break
                             tt = tt.next0_
                     break
                 cou += 1
                 j += 1
                 break
             if (cou < 3): 
                 i = j
                 continue
             if ((i > 0 and not res.children[i].end_token.is_char(':') and res.children[i - 1].kind2 == InstrumentKind.UNDEFINED) and res.children[i - 1].end_token.is_char(':')): 
                 res.children[i - 1].kind2 = InstrumentKind.LISTHEAD
             first_pass3275 = True
             while True:
                 if first_pass3275: first_pass3275 = False
                 else: i += 1
                 if (not (i < j)): break
                 ch = res.children[i]
                 if (ch.kind != InstrumentKind.INDENTION): 
                     continue
                 if (ch.end_token.is_char(':')): 
                     ch.kind2 = InstrumentKind.LISTHEAD
                 elif (((i + 1) < j) and res.children[i + 1].kind == InstrumentKind.EDITIONS and res.children[i + 1].end_token.is_char(':')): 
                     ch.kind2 = InstrumentKind.LISTHEAD
                 else: 
                     ch.kind2 = InstrumentKind.LISTITEM
     changed = list()
     i = 0
     while i < len(res.children): 
         if (res.number == 7): 
             pass
         if (len(res.children[i].children) > 0): 
             ListHelper.analyze(res.children[i])
         else: 
             co = ListHelper.__analize_list_items(res.children, i)
             if (co > 0): 
                 changed.append(res.children[i])
                 if (co > 1): 
                     del res.children[i + 1:i + 1+co - 1]
                 i += (co - 1)
         i += 1
     for i in range(len(changed) - 1, -1, -1):
         if (changed[i].kind == InstrumentKind.CONTENT): 
             j = Utils.indexOfList(res.children, changed[i], 0)
             if (j < 0): 
                 continue
             del res.children[j]
             res.children[j:j] = changed[i].children