Example #1
0
def iter_block_items(parent):
    """
    Generate a reference to each paragraph and table child within *parent*,
    in document order. Each returned value is an instance of either Table or
    Paragraph. *parent* would most commonly be a reference to a main
    Document object, but also works for a _Cell object, which itself can
    contain paragraphs and tables.
    """
    if isinstance(parent, _Document):
        parent_elm = parent.element.body
    elif isinstance(parent, _Cell):
        parent_elm = parent._tc
    else:
        raise ValueError("something's not right")

    for child in parent_elm.iterchildren():
        if isinstance(child, CT_P):
            yield Paragraph(child, parent)
        elif isinstance(child, CT_Tbl):
            yield Table(child, parent)
Example #2
0
    def fill_paragraph(self, par):

        nr_rows = len(self.data) if self.data else 0
        if self.headers:
            nr_rows += 1

        nr_cols = len(self.data[0]) if self.data else len(self.headers) if self.headers else 0

        # replace the par element with a table xml element and build a docx.Table from it
        # so we can use that to fill it up
        table_el = CT_Tbl.new_tbl(nr_rows, nr_cols, par.part.document._block_width)
        par._element.getparent().replace(par._element, table_el)
        table = Table(table_el, self)

        if self.headers:
            for i, header in enumerate(self.headers):
                table.rows[0].cells[i].text = str(header)

        for row_idx, row_values in enumerate(self.data, start=1 if self.headers else 0):
            for col_idx, cell_value in enumerate(row_values):
                table.rows[row_idx].cells[col_idx].text = str(cell_value)
Example #3
0
def text_converter(filename):
    """dir =  r"C:\Resume Miner1"
    files = glob.glob(os.path.join(dir,"*"))  
    
    for filename in files:"""

    fileext = os.path.splitext(filename)[1]
    OneText = ''

    regexdoc = re.compile('.*doc.*')
    regexpdf = re.compile('.*pdf.*')

    matchdoc = regexdoc.search(fileext)
    matchpdf = regexpdf.search(fileext)

    if matchdoc:
        doc = Document(filename)

        parent_elm = doc.element.body
        for child in parent_elm.iterchildren():
            if isinstance(child, CT_P):
                para = Paragraph(child, doc)
                OneText = OneText + para.text + '\n'

            elif isinstance(child, CT_Tbl):
                tab = Table(child, doc)
                rowText = ''
                for row in tab.rows:
                    rowCell = ''
                    for cell in row.cells:
                        rowCell = rowCell.rstrip()
                        rowCell = rowCell + cell.text + ':'
                    rowText = rowText + rowCell + '\n'

                OneText = OneText + rowText + '\n'

    if matchpdf:
        OneText = func.extract_text_from_pdf(filename)

    return (OneText)
def iter_block_items(parent):
    """
    Yield each paragraph and table child within *parent*, in document order.
    Each returned value is an instance of either Table or Paragraph. *parent*
    would most commonly be a reference to a main Document object, but
    also works for a _Cell object, which itself can contain paragraphs and tables.
    """
    if isinstance(parent, Document): #The type of root is determined.
        parent_elm = parent.element.body
    elif isinstance(parent, _Cell):
        parent_elm = parent._tc
        print("iter_parent is _Cell")
    elif isinstance(parent,CT_Tc):
        parent_elm = parent
    else:
        raise ValueError("something's not right")
    cell_color_filled_flag = 0
    for child in parent_elm.iterchildren():
        if isinstance(child, CT_P):
            ilvl_val = find_ilvl_val(child)
            #print(ilvl_val)
            #print("iter_child is CT_P")
            yield Paragraph(child, parent), child, ilvl_val, cell_color_filled_flag
            cell_color_filled_flag = 0
            #for cchild in child.iterchildren():
                #print("\t",end="")
                #print(type(cchild))
                #print("\t  ",end="")
        elif isinstance(child, CT_Tbl):
            #print(child.tblStyle_val)
            #print("iter_child is CT_Tbl")
            yield Table(child, parent), child, 0, cell_color_filled_flag
            cell_color_filled_flag = 0
        elif isinstance(child,CT_TcPr):
            for tcpr in child.iterchildren():
                if "shd" in str(tcpr) :
                    cell_color_filled_flag = 1
Example #5
0
def format_space_group(table: Table,
                       cif: CifContainer,
                       column: int,
                       row: int = 6) -> None:
    """
    Sets formating of the space group symbol in row 6.
    """
    space_group = cif.space_group
    it_number = ''
    with suppress(AttributeError):
        it_number = str(cif.spgr_number)
    paragraph = table.cell(row, column).paragraphs[0]
    try:
        # The HM space group symbol
        s = SpaceGroups()
        spgrxml = s.to_mathml(space_group)
        paragraph.alignment = WD_PARAGRAPH_ALIGNMENT.LEFT
        paragraph._element.append(math_to_word(spgrxml))
        paragraph.add_run(' ({})'.format(it_number))
    except Exception:
        if it_number:
            paragraph.add_run('{} ({})'.format(space_group, it_number))
        else:
            paragraph.add_run(space_group)
Example #6
0
from docx import Document
from docx.shared import Inches
from docx.table import Table

# %%
docu = Document("C:/Program Files/pdfbox/2020Q1.docx")
docu

# %%
len(docu.tables)
# %%
type(docu.tables[10])

# %%
table = docu.tables[7]
table = Table()

# %%
table.cell(0, 0).text
# %%
len(docu.paragraphs)

# %%

# %%
for parag in docu.paragraphs:
    print(parag.text)
# %%
print(docu.paragraphs[149].text)
# %%
Example #7
0
 def add_row_fixture(self):
     tbl = _tbl_bldr(rows=1, cols=2).element
     table = Table(tbl)
     expected_xml = _tbl_bldr(rows=2, cols=2).xml()
     return table, expected_xml
 def tables(self):
     return [Table(parent=self, tbl=t) for t in self._tb.tbl_lst]
def __create_variable_rows(table: Table, formula: Formula, total_rows: int):
    # Merge as appropriate
    table.cell(4, 0).merge(table.cell(total_rows - 1, 0))
    table.cell(4, 4).merge(table.cell(4, 6))
    table.cell(total_rows - 2, 1).merge(table.cell(total_rows - 1, 1))
    table.cell(total_rows - 2, 2).merge(table.cell(total_rows - 1, 2))
    table.cell(total_rows - 2, 3).merge(table.cell(total_rows - 1, 3))

    # Numbered cell
    cell = table.cell(4, 0)
    cell.text = '3'
    cell.paragraphs[0].style = 'Cell Text'
    cell.vertical_alignment = WD_CELL_VERTICAL_ALIGNMENT.TOP

    # Header row
    headers = ['Formula Variable', 'Cell', 'Range Name', 'Value']
    for i in range(1, 4+1):
        cell = table.cell(4, i)
        cell.text = headers[i - 1]
        cell.paragraphs[0].style = 'Cell Header'
        cell.shade_cell(__FILL_COLOR)

    # Individual Variable rows
    for i, var in enumerate(formula.variables):
        if var is None:
            continue
        table.cell(5 + i, 4).merge(table.cell(5 + i, 6))
        cell = table.cell(5 + i, 2)
        cell.text = var.coordinate or 'N/A'
        cell.paragraphs[0].style = 'Cell Text'

        cell = table.cell(5 + i, 3)
        cell.text = var.name
        cell.paragraphs[0].style = 'Cell Text'

        # TODO: work on figuring out what the value is supposed to be -- input/output?
        # TODO: May have to be manually entered?  Possibly in the spreadsheet
        # TODO: Two step process of creating the file containing the formulas, then entering the values they should be

        cell = table.cell(5 + i, 4)
        # NOTE: Possibilities for output values
        #  - Name/Formula/Variable (i: val/form, o: val)
        #  - Name/Formula/Variable (i: val/form, o: None) -
        #  - Name/Formula/Variable (i: None, o: None) - Blank cells?
        #  - Global Names?
        cell.text = var.output or ''  # FIXME: output not outputting, 0's causing issue?
        cell.paragraphs[0].style = 'Cell Text'

    # TODO: Add formula variable itself (Last Row)
    cell = table.cell(total_rows - 1, 2)
    cell.text = formula.coordinate
    cell.paragraphs[0].style = 'Cell Text'

    cell = table.cell(total_rows - 1, 3)
    cell.text = formula.name
    cell.paragraphs[0].style = 'Cell Text'

    # TODO: Make separate function for this?
    # Sub header rows
    headers = ['Manual', 'Excel', 'Pass']
    for i in range(4, 6+1):
        cell = table.cell(total_rows - 2, i)
        cell.text = headers[i - 4]
        cell.paragraphs[0].style = 'Cell Header Center'
        cell.shade_cell(__FILL_COLOR)
Example #10
0
 def table_style_set_fixture(self, request):
     tbl_cxml, new_style, expected_cxml = request.param
     table = Table(element(tbl_cxml), None)
     expected_xml = xml(expected_cxml)
     return table, new_style, expected_xml
Example #11
0
 def table_fixture(self):
     table = Table(None, None)
     return table
Example #12
0
 def add_row_fixture(self):
     snippets = snippet_seq('add-row-col')
     tbl = parse_xml(snippets[0])
     table = Table(tbl, None)
     expected_xml = snippets[1]
     return table, expected_xml
Example #13
0
def add_table_rows(table: Table, rows: int) -> None:
    """ Добавить строки в таблицу """
    for _ in range(rows):
        table.add_row()
Example #14
0
def populate_main_table_values(main_table: Table, cif: CifContainer, column=1):
    """
    Fills the main table with residuals. Column, by column.
    """
    radiation_type = cif['_diffrn_radiation_type']
    radiation_wavelength = cif['_diffrn_radiation_wavelength']
    crystal_size_min = cif['_exptl_crystal_size_min']
    crystal_size_mid = cif['_exptl_crystal_size_mid']
    crystal_size_max = cif['_exptl_crystal_size_max']
    limit_h_min = cif['_diffrn_reflns_limit_h_min']
    limit_h_max = cif['_diffrn_reflns_limit_h_max']
    limit_k_min = cif['_diffrn_reflns_limit_k_min']
    limit_k_max = cif['_diffrn_reflns_limit_k_max']
    theta_min = cif['_diffrn_reflns_theta_min']
    theta_max = cif['_diffrn_reflns_theta_max']
    limit_l_min = cif['_diffrn_reflns_limit_l_min']
    limit_l_max = cif['_diffrn_reflns_limit_l_max']
    reflns_number_total = cif['_reflns_number_total']
    reflns_av_R_equivalents = cif['_diffrn_reflns_av_R_equivalents']
    reflns_av_unetI = cif['_diffrn_reflns_av_unetI/netI']
    ls_number_reflns = cif['_refine_ls_number_reflns']
    ls_number_restraints = cif['_refine_ls_number_restraints']
    ls_number_parameters = cif['_refine_ls_number_parameters']
    ls_R_factor_gt = cif['_refine_ls_R_factor_gt']
    ls_wR_factor_gt = cif['_refine_ls_wR_factor_gt']
    ls_R_factor_all = cif['_refine_ls_R_factor_all']
    ls_wR_factor_ref = cif['_refine_ls_wR_factor_ref']
    goof = cif['_refine_ls_goodness_of_fit_ref']

    main_table.cell(0,
                    column).paragraphs[0].add_run(cif.fileobj.name).bold = True
    main_table.cell(1, column).paragraphs[0].add_run(
        cif['_database_code_depnum_ccdc_archive'])

    # Set text for all usual cif keywords by a lookup table:
    for _, key in enumerate(cif_keywords_list):
        # key[1] contains the row number:
        cell = main_table.cell(key[1] + 1, column)
        if cif[key[0]]:
            cell.text = cif[key[0]]
        else:
            cell.text = '?'
            continue
    # Now the special handling:
    # The sum formula:
    if cif['_chemical_formula_sum']:
        sum_formula_group = make_sumform_to_group_of_str_and_numbers(cif)
        for _, word in enumerate(sum_formula_group):
            formula_run = main_table.cell(2, column).paragraphs[0]
            formula_run_subscript = formula_run.add_run(word)
            if isfloat(word):
                formula_run_subscript.font.subscript = True
    else:
        main_table.cell(2, column).paragraphs[0].add_run('no sum formula')
    format_space_group(main_table, cif, column, row=6)
    try:
        completeness = "{0:.1f} %".format(
            round(float(cif['_diffrn_measured_fraction_theta_full']) * 100, 1))
    except ValueError:
        completeness = '?'
    try:
        diff_density_min = "{0:.2f}".format(
            round(float(cif['_refine_diff_density_min']), 2))
    except ValueError:
        diff_density_min = '?'
    try:
        diff_density_max = "{0:.2f}".format(
            round(float(cif['_refine_diff_density_max']), 2))
    except ValueError:
        diff_density_max = '?'

    # now prepare & write all the concatenated & derived cell contents:
    main_table.cell(18, column).text = this_or_quest(crystal_size_max) + timessym + \
                                       this_or_quest(crystal_size_mid) + timessym + \
                                       this_or_quest(crystal_size_min)
    wavelength = str(' ({} ='.format(lambdasym) +
                     this_or_quest(radiation_wavelength) +
                     '{}{})'.format(protected_space, angstrom)).replace(
                         ' ', '')
    # radtype: ('Mo', 'K', '\\a')
    radtype = format_radiation(radiation_type)
    radrun = main_table.cell(21, column).paragraphs[0]
    # radiation type e.g. Mo:
    radrun.add_run(radtype[0])
    # K line:
    radrunita = radrun.add_run(radtype[1])
    radrunita.font.italic = True
    alpha = radrun.add_run(radtype[2])
    alpha.font.italic = True
    alpha.font.subscript = True
    # wavelength lambda:
    radrun.add_run(' ' + wavelength)
    try:
        d_max = ' ({:.2f}{}{})'.format(
            float(radiation_wavelength) / (2 * sin(radians(float(theta_max)))),
            protected_space, angstrom)
        # 2theta range:
        main_table.cell(22, column).text = "{:.2f} to {:.2f}{}" \
            .format(2 * float(theta_min), 2 * float(theta_max), d_max)
    except ValueError:
        main_table.cell(22, column).text = '? to ?'
    main_table.cell(23, column).text = limit_h_min + ' {} h {} '.format(less_or_equal, less_or_equal) + limit_h_max \
                                       + '\n' \
                                       + limit_k_min + ' {} k {} '.format(less_or_equal, less_or_equal) + limit_k_max \
                                       + '\n' \
                                       + limit_l_min + ' {} l {} '.format(less_or_equal, less_or_equal) + limit_l_max
    rint_p = main_table.cell(25, column).paragraphs[0]
    rint_p.add_run(this_or_quest(reflns_number_total) + '\n')
    rint_p.add_run('R').font.italic = True
    rint_p.add_run('int').font.subscript = True
    rint_p.add_run(' = ' + this_or_quest(reflns_av_R_equivalents) + '\n')
    rint_p.add_run('R').font.italic = True
    rint_p.add_run('sigma').font.subscript = True
    rint_p.add_run(' = ' + this_or_quest(reflns_av_unetI))
    main_table.cell(26, column).paragraphs[0].add_run(completeness)
    main_table.cell(27, column).text = this_or_quest(ls_number_reflns) + '/' \
                                       + this_or_quest(ls_number_restraints) + '/' \
                                       + this_or_quest(ls_number_parameters)
    main_table.cell(28, column).paragraphs[0].add_run(goof)
    r2sig_p = main_table.cell(29, column).paragraphs[0]
    r2sig_p.add_run('R').font.italic = True
    r2sig_p.add_run('1').font.subscript = True
    r2sig_p.add_run(' = ' + this_or_quest(ls_R_factor_gt))
    r2sig_p.add_run('\nw')
    r2sig_p.add_run('R').font.italic = True
    r2sig_p.add_run('2').font.subscript = True
    r2sig_p.add_run(' = ' + this_or_quest(ls_wR_factor_gt))
    rfull_p = main_table.cell(30, column).paragraphs[0]
    rfull_p.add_run('R').font.italic = True
    rfull_p.add_run('1').font.subscript = True
    rfull_p.add_run(' = ' + this_or_quest(ls_R_factor_all))
    rfull_p.add_run('\nw')
    rfull_p.add_run('R').font.italic = True
    rfull_p.add_run('2').font.subscript = True
    rfull_p.add_run(' = ' + ls_wR_factor_ref)
    main_table.cell(31,
                    column).text = diff_density_max + '/' + diff_density_min
    if not cif.is_centrosymm:
        main_table.cell(
            32, column).text = cif['_refine_ls_abs_structure_Flack'] or '?'
    exti = cif['_refine_ls_extinction_coef']
    # if exti not in ['.', "'.'", '?', '']:
    #    num = len(main_table.columns[0].cells)
    main_table.columns[column].cells[33].text = exti
Example #15
0
def populate_description_columns(main_table: Table) -> None:
    """
    This Method adds the descriptions to the fist property table column.
    """
    main_table.cell(0, 0).paragraphs[0].add_run('')
    main_table.cell(1, 0).paragraphs[0].add_run('CCDC number')
    main_table.cell(2, 0).paragraphs[0].add_run('Empirical formula')
    main_table.cell(3, 0).paragraphs[0].add_run('Formula weight')
    main_table.cell(4, 0).paragraphs[0].add_run('Temperature [K]')
    main_table.cell(5, 0).paragraphs[0].add_run('Crystal system')
    main_table.cell(6, 0).paragraphs[0].add_run('Space group (number)')
    lgnd6 = main_table.cell(7, 0).paragraphs[0]
    lgnd6.add_run('a').font.italic = True
    lgnd6.add_run(' [{}]'.format(angstrom))
    lgnd7 = main_table.cell(8, 0).paragraphs[0]
    lgnd7.add_run('b').font.italic = True
    lgnd7.add_run(' [{}]'.format(angstrom))
    lgnd8 = main_table.cell(9, 0).paragraphs[0]
    lgnd8.add_run('c').font.italic = True
    lgnd8.add_run(' [{}]'.format(angstrom))
    lgnd9 = main_table.cell(10, 0).paragraphs[0].add_run(
        '\u03B1 [{}]'.format(degree_sign))
    lgnd10 = main_table.cell(11, 0).paragraphs[0].add_run(
        '\u03B2 [{}]'.format(degree_sign))
    lgnd11 = main_table.cell(12, 0).paragraphs[0].add_run(
        '\u03B3 [{}]'.format(degree_sign))
    lgnd12 = main_table.cell(13, 0).paragraphs[0]
    lgnd12.add_run('Volume [{}'.format(angstrom))
    lgnd12.add_run('3').font.superscript = True
    lgnd12.add_run(']')
    lgnd13 = main_table.cell(14,
                             0).paragraphs[0].add_run('Z').font.italic = True
    lgnd14 = main_table.cell(15, 0).paragraphs[0]
    lgnd14.add_run('\u03C1').font.italic = True
    lgnd14.add_run('calc').font.subscript = True
    lgnd14.add_run(' [gcm')
    lgnd14.add_run(minus_sign + '3').font.superscript = True
    lgnd14.add_run(']')
    lgnd15 = main_table.cell(16, 0).paragraphs[0]
    lgnd15.add_run('\u03BC').font.italic = True
    lgnd15.add_run(' [mm')
    lgnd15.add_run(minus_sign + '1').font.superscript = True
    lgnd15.add_run(']')
    lgnd16 = main_table.cell(17, 0).paragraphs[0]
    lgnd16.add_run('F').font.italic = True
    lgnd16.add_run('(000)')
    lgnd17 = main_table.cell(18, 0).paragraphs[0]
    lgnd17.add_run('Crystal size [mm')
    lgnd17.add_run('3').font.superscript = True
    lgnd17.add_run(']')
    lgnd18 = main_table.cell(19, 0).paragraphs[0].add_run('Crystal colour')
    lgnd19 = main_table.cell(20, 0).paragraphs[0].add_run('Crystal shape')
    lgnd20 = main_table.cell(21, 0).paragraphs[0].add_run('Radiation')
    lgnd21 = main_table.cell(22,
                             0).paragraphs[0].add_run('2\u03F4 range [\u00b0]')
    lgnd22 = main_table.cell(23, 0).paragraphs[0].add_run('Index ranges')
    lgnd23 = main_table.cell(24,
                             0).paragraphs[0].add_run('Reflections collected')
    lgnd24 = main_table.cell(
        25, 0).paragraphs[0].add_run('Independent reflections')
    lgnd25 = main_table.cell(26, 0).paragraphs[0]
    # theta_full = cif['_diffrn_reflns_theta_full']
    # if theta_full:
    #    lgnd25.add_run('Completeness to \n\u03B8 = {}°'.format(theta_full))
    # else:
    lgnd25.add_run('Completeness')
    main_table.cell(27,
                    0).paragraphs[0].add_run('Data / Restraints / Parameters')
    lgnd27 = main_table.cell(28, 0).paragraphs[0]
    lgnd27.add_run('Goodness-of-fit on ')
    lgnd27.add_run('F').font.italic = True
    lgnd27.add_run('2').font.superscript = True
    lgnd28 = main_table.cell(29, 0).paragraphs[0]
    lgnd28.add_run('Final ')
    lgnd28.add_run('R').font.italic = True
    lgnd28.add_run(' indexes \n[')
    lgnd28.add_run('I').font.italic = True
    lgnd28.add_run('{}2{}('.format(bequal, sigma_sm))
    lgnd28.add_run('I').font.italic = True
    lgnd28.add_run(')]')
    lgnd29 = main_table.cell(30, 0).paragraphs[0]
    lgnd29.add_run('Final ')
    lgnd29.add_run('R').font.italic = True
    lgnd29.add_run(' indexes \n[all data]')
    lgnd30 = main_table.cell(31, 0).paragraphs[0]
    lgnd30.add_run('Largest peak/hole [e{}'.format(angstrom))
    lgnd30.add_run(minus_sign + '3').font.superscript = True
    lgnd30.add_run(']')
    lgnd31 = main_table.cell(32, 0).paragraphs[0]
    lgnd31.add_run('Flack X parameter')
    main_table.cell(33, 0).paragraphs[0].add_run('Extinction coefficient')
Example #16
0
 def cells_fixture(self, request):
     snippet_idx, cell_count, unique_count, matches = request.param
     tbl_xml = snippet_seq('tbl-cells')[snippet_idx]
     table = Table(parse_xml(tbl_xml), None)
     return table, cell_count, unique_count, matches
Example #17
0
 def column_count_fixture(self):
     tbl_cxml = 'w:tbl/w:tblGrid/(w:gridCol,w:gridCol,w:gridCol)'
     expected_value = 3
     table = Table(element(tbl_cxml), None)
     return table, expected_value
Example #18
0
 def style_get_fixture(self, part_prop_):
     style_id = 'Barbaz'
     tbl_cxml = 'w:tbl/w:tblPr/w:tblStyle{w:val=%s}' % style_id
     table = Table(element(tbl_cxml), None)
     style_ = part_prop_.return_value.get_style.return_value
     return table, style_id, style_
Example #19
0
 def table_style_get_fixture(self, request):
     tbl_cxml, expected_style = request.param
     table = Table(element(tbl_cxml), None)
     return table, expected_style
Example #20
0
 def style_set_fixture(self, request, part_prop_):
     tbl_cxml, value, style_id, expected_cxml = request.param
     table = Table(element(tbl_cxml), None)
     part_prop_.return_value.get_style_id.return_value = style_id
     expected_xml = xml(expected_cxml)
     return table, value, expected_xml
             paragraph_temp.text = paragraph_temp.text.replace(
                 '【', '').replace('】', '')
             # print(paragraph_temp.text)
             # print('------------------->')
             temp_list.append(paragraph_temp)
             # if re.findall('\([一|二|三|四|五|六|七|八|九|十]*\)', paragraph_temp.text) or re.findall('[一|二|三|四|五|六|七|八|九|十]*、', paragraph_temp.text):
             #     break
             # if re.findall('[一|二|三|四|五|六|七|八|九|十]*、', paragraph_temp.text):
             if re.findall("\([一|二|三|四|五|六|七|八|九|十]\)",
                           paragraph_temp.text) or re.findall(
                               '[一|二|三|四|五|六|七|八|九|十]\、',
                               paragraph_temp.text):
                 del (temp_list[-1])
                 break
         if isinstance(sets[j], CT_Tbl):
             table_temp = Table(sets[j], source_document)
             temp_list.append(table_temp)
             # del (temp_list[-1])
     # print(len(temp_list), 'elements')
     # del(temp_list[-1])
     content_list.append(temp_list)
     break
 # if '项目基本情况表' in query_list[pointer]:
 # if query_list[pointer].split('、')[1] not in para.text:
 #     temp_list = []
 #     content_list.append(temp_list)
 #     break
 if '项目基本情况表' in query_list[pointer] and '项目基本情况表' in para.text:
     # print(query_list[pointer].split('、')[1])
     # print('Found tb:')
     temp_list = []
Example #22
0
def convert_table_child(child, doc):
    if child.tag == '{%s}tbl' % NAMESPACES['w']:
        tbl = Table(child, doc._body)
        return tbl

    return False
Example #23
0
 def iter_block_items(self, jubo_raw):
   for content in jubo_raw.element.body.iterchildren():
     if isinstance(content, CT_P):
       yield Paragraph(content, jubo_raw)
     elif isinstance(content, CT_Tbl):
       yield Table(content, jubo_raw)
Example #24
0
 def alignment_get_fixture(self, request):
     tbl_cxml, expected_value = request.param
     table = Table(element(tbl_cxml), None)
     return table, expected_value
Example #25
0
def extract_docx_info(dfile):

    document = docx.Document(dfile)

    #extract text
    text = ''
    if isinstance(document, Document):
        parent_elm = document.element.body
    elif isinstance(document, _Cell):
        parent_elm = document._tc
    else:
        raise ValueError("something's not right")

    for child in parent_elm.iterchildren():
        if isinstance(child, CT_P):
            text = text + "\n" + Paragraph(child, document).text
        elif isinstance(child, CT_Tbl):
            table = Table(child, document)
            for row in table.rows:
                for cell in row.cells:
                    for paragraph in cell.paragraphs:
                        text = text + "\n" + paragraph.text

    #extract fonts
    fonts = []
    for style in document.styles:
        try:
            if style.font != None and style.font.name != None:
                if style.font.name not in fonts:
                    fonts.append(style.font.name)
        except:
            pass

    #extract n_tables
    n_tables = len(document.tables)

    #extract linkedin link and email(if present)
    linkedin = ''
    email = ''
    rels = document.part.rels
    for rel in rels:
        if rels[rel].reltype == RT.HYPERLINK:
            if rels[rel]._target.startswith(
                    'http://www.linkedin.com') and linkedin == '':
                linkedin = rels[rel]._target
            if rels[rel]._target.startswith('mailto') and email == '':
                email = rels[rel]._target[len('mailto') + 1:]

    #extract n_images
    n_images = 0
    document = ZipFile(dfile)
    for name in document.namelist():
        if name.startswith('word/media/image'):
            n_images += 1

    return {
        "linkedin": linkedin,
        "n_tables": n_tables,
        "fonts": fonts,
        "n_images": n_images,
        "text": text,
        "email": email
    }
Example #26
0
 def autofit_get_fixture(self, request):
     tbl_cxml, expected_autofit = request.param
     table = Table(element(tbl_cxml), None)
     return table, expected_autofit
Example #27
0
 def add_column_fixture(self):
     tbl = _tbl_bldr(2, 1).element
     table = Table(tbl)
     expected_xml = _tbl_bldr(2, 2).xml()
     return table, expected_xml
Example #28
0
 def autofit_set_fixture(self, request):
     tbl_cxml, new_value, expected_tbl_cxml = request.param
     table = Table(element(tbl_cxml), None)
     expected_xml = xml(expected_tbl_cxml)
     return table, new_value, expected_xml
Example #29
0
 def table(self):
     tbl = _tbl_bldr(rows=2, cols=2).element
     table = Table(tbl)
     return table
Example #30
0
def populate_main_table_values(main_table: Table, cif: CifContainer):
    """
    Fills the main table with residuals. Column, by column.
    """
    main_table.cell(0, 1).paragraphs[0].add_run(
        cif['_database_code_depnum_ccdc_archive'])
    # Set text for all usual cif keywords by a lookup table:
    add_regular_key_value_pairs(cif, main_table)
    # Now the special handling:
    formula_paragraph = main_table.cell(1, 1).paragraphs[0]
    sum_formula = cif['_chemical_formula_sum'].replace(" ", "")
    add_sum_formula(formula_paragraph, sum_formula)
    spgr_paragraph = main_table.cell(5, 1).paragraphs[0]
    space_group = cif.space_group
    try:
        it_number = str(cif.spgr_number)
    except AttributeError:
        it_number = ''
    format_space_group(spgr_paragraph, space_group, it_number)
    radiation_type = cif['_diffrn_radiation_type']
    radiation_wavelength = cif['_diffrn_radiation_wavelength']
    crystal_size_min = cif['_exptl_crystal_size_min']
    crystal_size_mid = cif['_exptl_crystal_size_mid']
    crystal_size_max = cif['_exptl_crystal_size_max']
    theta_min = cif['_diffrn_reflns_theta_min']
    theta_max = cif['_diffrn_reflns_theta_max']
    limit_h_min = cif['_diffrn_reflns_limit_h_min']
    limit_h_max = cif['_diffrn_reflns_limit_h_max']
    limit_k_min = cif['_diffrn_reflns_limit_k_min']
    limit_k_max = cif['_diffrn_reflns_limit_k_max']
    limit_l_min = cif['_diffrn_reflns_limit_l_min']
    limit_l_max = cif['_diffrn_reflns_limit_l_max']
    ls_number_reflns = cif['_refine_ls_number_reflns']
    ls_number_restraints = cif['_refine_ls_number_restraints']
    ls_number_parameters = cif['_refine_ls_number_parameters']
    goof = cif['_refine_ls_goodness_of_fit_ref']
    try:
        completeness = "{0:.1f} %".format(
            round(float(cif['_diffrn_measured_fraction_theta_full']) * 100, 1))
    except ValueError:
        completeness = '?'
    try:
        diff_density_min = "{0:.2f}".format(
            round(float(cif['_refine_diff_density_min']), 2))
    except ValueError:
        diff_density_min = '?'
    try:
        diff_density_max = "{0:.2f}".format(
            round(float(cif['_refine_diff_density_max']), 2))
    except ValueError:
        diff_density_max = '?'

    # now prepare & write all the concatenated & derived cell contents:
    main_table.cell(17, 1).text = this_or_quest(crystal_size_max) + timessym + \
                                  this_or_quest(crystal_size_mid) + timessym + \
                                  this_or_quest(crystal_size_min)
    wavelength = str(' ({} ='.format(lambdasym) +
                     this_or_quest(radiation_wavelength) +
                     '{}{})'.format(protected_space, angstrom)).replace(
                         ' ', '')
    # radtype: ('Mo', 'K', '\\a')
    radtype = format_radiation(radiation_type)
    radrun = main_table.cell(20, 1).paragraphs[0]
    # radiation type e.g. Mo:
    radrun.add_run(radtype[0])
    # K line:
    radrunita = radrun.add_run(radtype[1])
    radrunita.font.italic = True
    alpha = radrun.add_run(radtype[2])
    alpha.font.italic = True
    alpha.font.subscript = True
    # wavelength lambda:
    radrun.add_run(' ' + wavelength)
    try:
        d_max = ' ({:.2f}{}{})'.format(
            float(radiation_wavelength) / (2 * sin(radians(float(theta_max)))),
            protected_space, angstrom)
        # 2theta range:
        main_table.cell(21, 1).text = "{:.2f} to {:.2f}{}".format(
            2 * float(theta_min), 2 * float(theta_max), d_max)
    except ValueError:
        main_table.cell(21, 1).text = '? to ?'
    main_table.cell(22, 1).text = limit_h_min + ' {} h {} '.format(less_or_equal, less_or_equal) + limit_h_max + '\n' \
                                  + limit_k_min + ' {} k {} '.format(less_or_equal, less_or_equal) + limit_k_max + '\n' \
                                  + limit_l_min + ' {} l {} '.format(less_or_equal, less_or_equal) + limit_l_max
    rint_p = main_table.cell(24, 1).paragraphs[0]
    add_r_int_value(cif, rint_p)
    main_table.cell(25, 1).paragraphs[0].add_run(completeness)
    main_table.cell(26, 1).text = this_or_quest(ls_number_reflns) + '/' \
                                  + this_or_quest(ls_number_restraints) + '/' \
                                  + this_or_quest(ls_number_parameters)
    main_table.cell(27, 1).paragraphs[0].add_run(goof)
    r1sig_p = main_table.cell(28, 1).paragraphs[0]
    rfull_p = main_table.cell(29, 1).paragraphs[0]
    add_r1sig_and_wr2full(cif, r1sig_p, rfull_p)
    main_table.cell(30, 1).text = diff_density_max + '/' + diff_density_min
    if not cif.is_centrosymm:
        main_table.cell(31,
                        1).text = cif['_refine_ls_abs_structure_Flack'] or '?'
    exti = cif['_refine_ls_extinction_coef']
    if exti not in ['.', "'.'", '?', '']:
        num = len(main_table.columns[0].cells)
        main_table.columns[1].cells[num - 1].text = exti