Python XmlPreprocessor 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: apps.task.utils.text_extraction.xml_wordx.xml_wordx_utils

클래스/타입: XmlPreprocessor

hotexamples.com에서의 예제들: 16

Python XmlPreprocessor - 16개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 apps.task.utils.text_extraction.xml_wordx.xml_wordx_utils.XmlPreprocessor에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

get_clear_tag(13)

get_clear_attribute_val(5)

get_clear_attributes(3)

find_children_by_tag(1)

remove_namespace(1)

예제 #1

파일 보기

    def read_section(self, node: Any) -> None:
        attrs = XmlPreprocessor.get_clear_attributes(node)
        num_str = attrs.get('abstractNumId')
        if not num_str:
            return
        sect_index = int(num_str)
        sect = NumberingSetsSection()
        restart_num_str = attrs.get('restartNumberingAfterBreak') or ''
        sect.restart_after_break = restart_num_str != '0'

        self.collections[sect_index] = sect
        go_deeper = True

        for child in node:
            tag = XmlPreprocessor.get_clear_tag(child)
            # find "<w:lvl w:ilvl="0">"
            if tag == 'lvl':
                self.read_numbering_sets(child, sect)
                go_deeper = False

        try:
            if not go_deeper:
                return
            for child in node:
                self.read_section(child)
        finally:
            sect.initialize()

예제 #2

파일 보기

 def explore_paragraph_numpr(self, pr: DocParagraph, node: Any):
     for elt in node:
         tag = XmlPreprocessor.get_clear_tag(elt)
         if tag == 'ilvl':
             style_val = XmlPreprocessor.get_clear_attributes(elt).get('val')
             if style_val:
                 pr.list_level = int(style_val)
         elif tag == 'numId':
             style_val = XmlPreprocessor.get_clear_attributes(elt).get('val')
             if style_val:
                 pr.list_number = int(style_val)

예제 #3

파일 보기

 def explore_paragraph_properties(self, pr: DocParagraph, node: Any):
     for elt in node:
         tag = XmlPreprocessor.get_clear_tag(elt)
         if tag == 'pStyle':
             style_val = XmlPreprocessor.get_clear_attributes(elt).get('val')
             if style_val:
                 self.apply_paragraph_style(pr, style_val)
         elif tag == 'numPr':
             self.explore_paragraph_numpr(pr, elt)
         else:
             self.explore_paragraph_properties(pr, elt)

예제 #4

파일 보기

파일: xml_wordx_extractor.py 프로젝트: tx-anin/lexpredict-contraxsuite

 def add_hyperlink(self, node: Any, container: List[DocItem]) -> None:
     text_child = XmlPreprocessor.find_children_by_tag(node,
                                                       't',
                                                       first_only=True)
     if not text_child or not text_child[0].text:
         return
     link = self.make_hyperlink_instance()
     link.text = text_child[0].text
     # get link's reference
     # r:id="rId4" -> self.relationship
     rel_id = XmlPreprocessor.get_clear_attribute_val(node, 'id')
     if rel_id:
         rel_link = self.relationship.rel_by_id.get(rel_id) or ''
         link.link = rel_link
     container.append(link)

예제 #5

파일 보기

 def explore_section(self, node: Any, style_set: StyleSets):
     for child in node:
         tag = XmlPreprocessor.get_clear_tag(child)
         if tag == 'aliases':
             val = XmlPreprocessor.get_clear_attribute_val(child, 'val')
             aliases = set((val or '').split(','))
             style_set.aliases = aliases
             continue
         if tag == 'ilvl':
             val = XmlPreprocessor.get_clear_attribute_val(child, 'val')
             style_set.ilvl = int(val)
             continue
         if tag == 'numId':
             val = XmlPreprocessor.get_clear_attribute_val(child, 'val')
             style_set.numId = int(val)
             continue
         self.explore_section(child, style_set)

예제 #6

파일 보기

파일: xml_wordx_extractor.py 프로젝트: tx-anin/lexpredict-contraxsuite

 def traverse_doc_tree(self, node: Any, items_container: List[DocItem]):
     # fill "Items"
     for elt in node:
         tag = XmlPreprocessor.get_clear_tag(elt)
         if tag == 'p':
             self.add_paragraph(elt, items_container)
         elif tag == 'tbl':
             self.add_table(elt, items_container)
         else:
             self.traverse_doc_tree(elt, items_container)

예제 #7

파일 보기

 def read_num_map(self, node: Any) -> None:
     """
     <w:num w:numId="1">
         <w:abstractNumId w:val="1" />
     </w:num>
     """
     num_id_str = XmlPreprocessor.get_clear_attribute_val(node, 'numId')
     if not num_id_str:
         return
     num_id = int(num_id_str)  # list number
     for child in node:
         tag = XmlPreprocessor.get_clear_tag(child)
         if tag != 'abstractNumId':
             continue
         val_str = XmlPreprocessor.get_clear_attribute_val(child, 'val')
         if not val_str:
             break
         sect_id = int(val_str)
         self.list_num_to_section_id[num_id] = sect_id
         return

예제 #8

파일 보기

파일: xml_wordx_relationships.py 프로젝트: marsdev123/lexpredict-contraxsuite

    def traverse_tree(self, node) -> None:
        go_deeper = True
        for child in node:
            tag = XmlPreprocessor.get_clear_tag(child)
            if tag == 'Relationship':
                self.parse_relationship(child)
                go_deeper = False

        if go_deeper:
            for child in node:
                self.traverse_tree(child)

예제 #9

파일 보기

    def read_numbering_sets(self, node: Any, sect: NumberingSetsSection):
        nm_set = NumberingSets()
        lvl = XmlPreprocessor.get_clear_attribute_val(node, 'ilvl')
        nm_set.level = int(lvl)

        for child in node:
            tag = XmlPreprocessor.get_clear_tag(child)
            if tag == 'start':
                nm_set.start = int(
                    XmlPreprocessor.get_clear_attribute_val(child, 'val')
                    or '0')
                continue
            if tag == 'numFmt':
                nm_set.num_fmt = \
                    XmlPreprocessor.get_clear_attribute_val(child, 'val') or 'bullet'
                continue
            if tag == 'lvlText':
                nm_set.lvl_text = \
                    XmlPreprocessor.get_clear_attribute_val(child, 'val') or '*'
                continue
            if tag == 'lvlJc':
                nm_set.lvl_jc = \
                    XmlPreprocessor.get_clear_attribute_val(child, 'val') or 'left'
                continue
        sect.sets.append(nm_set)

예제 #10

파일 보기

파일: xml_wordx_extractor.py 프로젝트: tx-anin/lexpredict-contraxsuite

    def fill_table_rows(self, tbl: DocTable, node: Any):
        go_deeper = True
        for elt in node:
            tag = XmlPreprocessor.get_clear_tag(elt)
            if tag == 'tr':
                go_deeper = False
                self.fill_table_row(tbl, elt)

        if not go_deeper:
            return

        for elt in node:
            self.fill_table_rows(tbl, elt)

예제 #11

파일 보기

    def traverse_tree(self, node: Any) -> None:
        # looking for "<w:style w:type="paragraph" w:styleId="Heading2">"
        get_section = False
        for child in node:
            tag = XmlPreprocessor.get_clear_tag(child)
            if tag == 'style':
                self.read_section(child)
                get_section = True

        if get_section:
            return
        for child in node:
            self.traverse_tree(child)

예제 #12

파일 보기

파일: xml_wordx_extractor.py 프로젝트: tx-anin/lexpredict-contraxsuite

    def fill_table_row(self, tbl: DocTable, node: Any):
        row = DocTableRow()
        tbl.rows.append(row)

        go_deeper = True
        for elt in node:
            tag = XmlPreprocessor.get_clear_tag(elt)
            if tag == 'tc':
                go_deeper = False
                self.fill_table_cell(row, elt)

        if not go_deeper:
            return

        for elt in node:
            self.fill_table_row(tbl, elt)

예제 #13

파일 보기

파일: xml_wordx_extractor.py 프로젝트: tx-anin/lexpredict-contraxsuite

 def explore_paragraph(self, pr: DocParagraph, node: Any):
     for child in node:
         tag = XmlPreprocessor.get_clear_tag(child)
         if tag == 'pPr':
             self.explore_paragraph_properties(pr, child)
         elif tag == 'hyperlink':
             self.add_hyperlink(child, pr.text_items)
         elif tag == 'tab':
             pr.text_items.append(DocText('\t'))
         elif tag == 't':
             if child.text:
                 pr.text_items.append(DocText(child.text))
         elif tag == 'r':
             self.explore_paragraph(pr, child)
         else:
             self.explore_paragraph(pr, child)

예제 #14

파일 보기

    def traverse_tree(self, node: Any) -> None:
        # looking for "<w:abstractNum w:abstractNumId="1" restartNumberingAfterBreak="0">"
        get_section = False
        get_num_map = False
        for child in node:
            tag = XmlPreprocessor.get_clear_tag(child)
            if tag == 'abstractNum':
                self.read_section(child)
                get_section = True
            elif tag == 'num':
                get_num_map = True
                self.read_num_map(child)

        if get_section and get_num_map:
            return
        for child in node:
            self.traverse_tree(child)

예제 #15

파일 보기

 def read_section(self, node: Any) -> None:
     """
     <w:style w:type="paragraph" w:styleId="Heading2">
         <w:aliases w:val="1.1,2nd,B Sub/Bold,B Sub/Bold1,B Sub/Bold11,B Sub/Bold12,B Sub/Bold13" />
         <w:pPr>
             <w:keepNext />
             <w:numPr>
                 <w:ilvl w:val="1" />
                 <w:numId w:val="19" />
             </w:numPr>
         </w:pPr>
     </w:num>
     """
     style_id = XmlPreprocessor.get_clear_attribute_val(node, 'styleId')
     if not style_id:
         return
     style_set = StyleSets()
     style_set.styleId = style_id
     self.sets[style_id] = style_set
     try:
         self.explore_section(node, style_set)
     except:
         del self.sets[style_id]

예제 #16

파일 보기

 def test_remove_namespace(self):
     text = '{http://schemas.openxmlformats.org/wordprocessingml/2006/main"}val'
     self.assertEqual('val', XmlPreprocessor.remove_namespace(text))
     self.assertEqual('val', XmlPreprocessor.remove_namespace('val'))