예제 #1
0
 def cell_with_paragraphs(self):
     tc = (
         a_tc().with_nsdecls()
               .with_child(a_p())
               .with_child(a_p())
               .element
     )
     return _Cell(tc)
예제 #2
0
 def add_table_fixture(self, request):
     tc_cxml, after_tc_cxml = request.param
     # the table has some overhead elements, also a blank para after since
     # it's in a cell.
     after_tc_cxml += (
         '/(w:tblPr/w:tblW{w:type=auto,w:w=0},w:tblGrid),w:p)')
     cell = _Cell(element(tc_cxml), None)
     expected_xml = xml(after_tc_cxml)
     return cell, expected_xml
예제 #3
0
 def add_table_fixture(self, request):
     tc_cxml, after_tc_cxml = request.param
     # the table has some overhead elements, also a blank para after since
     # it's in a cell.
     after_tc_cxml += (
         '/(w:tblPr/w:tblW{w:type=auto,w:w=0},w:tblGrid),w:p)'
     )
     cell = _Cell(element(tc_cxml), None)
     expected_xml = xml(after_tc_cxml)
     return cell, expected_xml
예제 #4
0
파일: converter.py 프로젝트: ivbeg/docx2csv
def __extract_table(table):
    """Extracts table data from table object"""
    results = []
    n = 0
    for tr in table._tbl.tr_lst:
        r = []
        for tc in tr.tc_lst:
            for grid_span_idx in range(tc.grid_span):
                if tc.vMerge == ST_Merge.CONTINUE:
                    r.append(results[n - 1][len(r) - 1])
                elif grid_span_idx > 0:
                    r.append(r[-1])
                else:
                    cell = _Cell(tc, table)
                    r.append(cell.text.replace('\n', ' ').encode('utf8'))
        results.append(r)
        n += 1
    return results
예제 #5
0
def __extract_table(table):
    """Extracts table data from table object"""
    results = []
    n = 0
    for tr in table._tbl.tr_lst:
        r = []
        for tc in tr.tc_lst:
            for grid_span_idx in range(tc.grid_span):
                if tc.vMerge == ST_Merge.CONTINUE:
                    r.append(results[n - 1][len(r) - 1])
                elif grid_span_idx > 0:
                    r.append(r[-1])
                else:
                    cell = _Cell(tc, table)
                    r.append(cell.text.replace('\n', ' ').encode('utf8'))
        results.append(r)
        n += 1
    return results
예제 #6
0
 def cell_text_fixture(self):
     # cell -------------------------
     tc = (
         a_tc().with_nsdecls().with_child(
             a_tcPr()).with_child(
             a_p()).with_child(
             a_tbl()).with_child(
             a_p())
     ).element
     cell = _Cell(tc)
     # text -------------------------
     text = 'foobar'
     # expected_xml -----------------
     expected_xml = (
         a_tc().with_nsdecls().with_child(
             a_tcPr()).with_child(
             a_p().with_child(
                 an_r().with_child(
                     a_t().with_text(text))))
     ).xml()
     return cell, text, expected_xml
예제 #7
0
def table_print(b):
    try:
        for row in b.rows:
            for cell in row.cells:
                for paragraph in cell.paragraphs:
                    if (not len(paragraph.text) == 0):
                        translator = Translator()
                        tran = translator.translate(paragraph.text, dest='hi')
                        paragraph.text = tran.text
    except Exception as e:
        for tc in b._tbl.iter_tcs():
            cell = _Cell(tc, b)
            for b_tc in iter_block_items(cell):
                if isinstance(b_tc, Paragraph):
                    if (not len(b_tc.text) == 0):
                        try:
                            translator = Translator()
                            tran = translator.translate(b_tc.text, dest='hi')
                            b_tc.text = tran.text
                            print("_______Trasnlating______\n")
                        except:
                            pass
예제 #8
0
 def tables_fixture(self, request):
     cell_cxml, expected_count = request.param
     cell = _Cell(element(cell_cxml), None)
     return cell, expected_count
예제 #9
0
 def width_get_fixture(self, request):
     tc_cxml, expected_width = request.param
     cell = _Cell(element(tc_cxml), None)
     return cell, expected_width
예제 #10
0
 def merge_fixture(self, tc_, tc_2_, parent_, merged_tc_):
     cell, other_cell = _Cell(tc_, parent_), _Cell(tc_2_, parent_)
     tc_.merge.return_value = merged_tc_
     return cell, other_cell, merged_tc_
예제 #11
0
 def paragraphs_fixture(self):
     return _Cell(element('w:tc/(w:p, w:p)'), None)
예제 #12
0
 def add_paragraph_fixture(self, request):
     tc_cxml, after_tc_cxml = request.param
     cell = _Cell(element(tc_cxml), None)
     expected_xml = xml(after_tc_cxml)
     return cell, expected_xml
예제 #13
0
 def alignment_set_fixture(self, request):
     cxml, new_value, expected_cxml = request.param
     cell = _Cell(element(cxml), None)
     expected_xml = xml(expected_cxml)
     return cell, new_value, expected_xml
예제 #14
0
 def width_set_fixture(self, request):
     tc_cxml, new_value, expected_cxml = request.param
     cell = _Cell(element(tc_cxml), None)
     expected_xml = xml(expected_cxml)
     return cell, new_value, expected_xml
예제 #15
0
 def add_paragraph_fixture(self, request):
     tc_cxml, after_tc_cxml = request.param
     cell = _Cell(element(tc_cxml), None)
     expected_xml = xml(after_tc_cxml)
     return cell, expected_xml
예제 #16
0
 def add_table_fixture(self, request):
     cell = _Cell(element('w:tc/w:p'), None)
     expected_xml = snippet_seq('new-tbl')[1]
     return cell, expected_xml
예제 #17
0
 def alignment_get_fixture(self, request):
     tc_cxml, expected_value = request.param
     cell = _Cell(element(tc_cxml), None)
     return cell, expected_value
예제 #18
0
 def paragraphs_fixture(self):
     return _Cell(element('w:tc/(w:p, w:p)'), None)
예제 #19
0
 def merge_fixture(self, tc_, tc_2_, parent_, merged_tc_):
     cell, other_cell = _Cell(tc_, parent_), _Cell(tc_2_, parent_)
     tc_.merge.return_value = merged_tc_
     return cell, other_cell, merged_tc_
예제 #20
0
 def alignment_get_fixture(self, request):
     tc_cxml, expected_value = request.param
     cell = _Cell(element(tc_cxml), None)
     return cell, expected_value
예제 #21
0
 def width_set_fixture(self, request):
     tc_cxml, new_value, expected_cxml = request.param
     cell = _Cell(element(tc_cxml), None)
     expected_xml = xml(expected_cxml)
     return cell, new_value, expected_xml
예제 #22
0
 def add_table_fixture(self, request):
     cell = _Cell(element('w:tc/w:p'), None)
     expected_xml = snippet_seq('new-tbl')[1]
     return cell, expected_xml
예제 #23
0
 def tables_fixture(self, request):
     cell_cxml, expected_count = request.param
     cell = _Cell(element(cell_cxml), None)
     return cell, expected_count
예제 #24
0
 def alignment_set_fixture(self, request):
     cxml, new_value, expected_cxml = request.param
     cell = _Cell(element(cxml), None)
     expected_xml = xml(expected_cxml)
     return cell, new_value, expected_xml
예제 #25
0
 def width_get_fixture(self, request):
     tc_cxml, expected_width = request.param
     cell = _Cell(element(tc_cxml), None)
     return cell, expected_width
def pre_story(input_file, un_match_tokens=None):
    # print(un_match_tokens)
    un_match_tokens = un_match_tokens.split('$zl$')
    try:
        import os
        result_file = input_file[:input_file.rfind('.')] + '_new' + input_file[
            input_file.rfind('.'):]
        word_obj = docx.Document(input_file)

        for cur_table_index in range(len(word_obj.tables)):
            cur_table = word_obj.tables[cur_table_index]
            for cur_row_index_cur_table in range(len(cur_table.rows)):
                for cur_col_index_cur_row in range(len(cur_table.columns)):
                    # # 得到当前单元格
                    cur_cell = cur_table.cell(cur_row_index_cur_table,
                                              cur_col_index_cur_row)
                    cur_tc_list = cur_cell._tc
                    cur_cell_text_list = []

                    for cur_tc_index in range(len(cur_tc_list) - 1, -1, -1):
                        # 初始化变量
                        cur_tc = cur_tc_list[cur_tc_index]
                        delete_and_add_element = -1
                        delete_and_add_element += 1
                        cur_tc_text = ''

                        if isinstance(cur_tc, CT_P):
                            # # 如果是文本段落的话, 判断段落是否空,空的话跳过,不空的话,删掉重新加
                            # print(cur_tc, cur_tc.r_lst)

                            for cur_r_in_cur_tc_r_list in cur_tc.r_lst:
                                cur_tc_text += cur_r_in_cur_tc_r_list.text

                            # print(cur_tc, cur_tc_text, bool(cur_tc_text and not cur_tc_text.isspace()))

                            if bool(cur_tc_text and not cur_tc_text.isspace()):
                                # 删掉这个,并增加一个段落。
                                delete_and_add_element = 1
                                cur_cell_text_list.insert(0, cur_tc_text)
                            else:
                                delete_and_add_element = 0

                        elif isinstance(cur_tc, CT_Tbl):
                            delete_and_add_element = 0
                            child_table_cur_tc = cur_tc
                            for child_table_row in child_table_cur_tc.tr_lst:
                                for child_table_cell in child_table_row.tc_lst:
                                    # 嵌套表的当前单元格 child_table_cell
                                    block_list_child_table_cell = list(
                                        child_table_cell.iter_block_items())
                                    cur_cell_child_table_text_list = []
                                    for cur_block_child_table_cell_index in range(
                                            len(block_list_child_table_cell) -
                                            1, -1, -1):
                                        # 初始化嵌套表里的变量。
                                        cur_block_child_table_cell = \
                                            block_list_child_table_cell[cur_block_child_table_cell_index]
                                        delete_and_add_element_child_table = -1
                                        delete_and_add_element_child_table += 1
                                        cur_tc_text_child_table = ''

                                        if isinstance(
                                                cur_block_child_table_cell,
                                                CT_P):
                                            # # 如果是文本段落的话, 判断段落是否空,空的话跳过,不空的话,删掉重新加

                                            for cur_r_in_cur_tc_r_list in cur_block_child_table_cell.r_lst:
                                                cur_tc_text_child_table += cur_r_in_cur_tc_r_list.text
                                            # import pdb
                                            # pdb.set_trace()
                                            # print(cur_tc_text_child_table, bool(cur_tc_text_child_table and not cur_tc_text_child_table.isspace()))
                                            if bool(cur_tc_text_child_table and
                                                    not cur_tc_text_child_table
                                                    .isspace()):
                                                # 删掉这个,并增加一个段落。
                                                delete_and_add_element_child_table = 1
                                                cur_cell_child_table_text_list.insert(
                                                    0, cur_tc_text_child_table)
                                            else:
                                                delete_and_add_element_child_table = 0
                                        else:
                                            # # 如果不是CT_P,就跳过
                                            delete_and_add_element_child_table = 0

                                        if delete_and_add_element_child_table == 1:
                                            for p_zl in block_list_child_table_cell[
                                                    cur_block_child_table_cell_index].r_lst:
                                                p_zl.text = ''
                                            del block_list_child_table_cell[
                                                cur_block_child_table_cell_index]

                                    # # 在嵌套表中增加段落,改颜色
                                    for cur_add_text_index, cur_add_text in enumerate(
                                            cur_cell_child_table_text_list):
                                        cell_buffer = _Cell(
                                            child_table_cell,
                                            child_table_cur_tc)
                                        run = cell_buffer.paragraphs[
                                            -1].add_run(cur_add_text)
                                        # if cur_add_text_index == 0:
                                        #     run = cell_buffer.paragraphs[-1].add_run(cur_add_text)
                                        # else:
                                        #     run = cell_buffer.paragraphs[-1].add_run(cur_add_text)
                                        run.font.name = '宋体'
                                        run.font.size = 140000
                                        # run.font.color.rgb = RGBColor(255, 0, 0)
                                        if not cur_add_text.isspace():
                                            run.font.highlight_color = 4
                                        if un_match_tokens is not None and cur_add_text in un_match_tokens:
                                            run.font.highlight_color = 7
                                            un_match_tokens.remove(
                                                cur_add_text)
                        else:
                            # # 其他情况
                            delete_and_add_element = 0

                        if delete_and_add_element == 1:
                            del cur_tc_list[cur_tc_index]

                    for cur_add_text_index, cur_add_text in enumerate(
                            cur_cell_text_list):
                        if cur_add_text_index == 0:
                            run = cur_cell.add_paragraph().add_run(
                                cur_add_text)
                        else:
                            run = cur_cell.paragraphs[-1].add_run(cur_add_text)
                        run.font.name = '宋体'
                        run.font.size = 140001
                        # run.font.color.rgb = RGBColor(255, 0, 0)
                        if not cur_add_text.isspace():
                            run.font.highlight_color = 4
                        if un_match_tokens is not None and cur_add_text in un_match_tokens:
                            run.font.highlight_color = 7
                            un_match_tokens.remove(cur_add_text)

        for i in range(len(word_obj.paragraphs)):
            cur_p = word_obj.paragraphs[i]
            cur_p_text = cur_p.text
            cur_p.text = ''
            run = cur_p.add_run(cur_p_text)
            if not cur_p_text.isspace():
                run.font.highlight_color = 4
            #print('zlzlzl', cur_p_text, un_match_tokens)
            if un_match_tokens is not None and cur_p_text in un_match_tokens:
                run.font.highlight_color = 7
                un_match_tokens.remove(cur_p_text)

        word_obj.save(result_file)
    except Exception as e:
        result_file = input_file
    return result_file