def _get_style_borders(w_styles, style_id): if w_styles is None or style_id is None: return {} w_style = value_of(w_styles, 'w:style[@w:styleId = "{0}"]'.format(style_id)) if w_style is None: return {} # - get parent styles (if it exist) based_on_id = value_of(w_style, 'w:basedOn/@w:val') parent_styles = _get_style_borders(w_styles, based_on_id) # - get child styles w_tbl_borders = value_of(w_style, 'w:tblPr/w:tblBorders') table_borders = _get_table_borders(w_tbl_borders) cell_borders = _get_cell_borders(w_tbl_borders) child_styles = table_borders.copy() child_styles.update( {'x-cell-' + key: value for key, value in cell_borders.items()}) # - *child_styles* override *parent_styles* real_styles = parent_styles.copy() real_styles.update({ key: value for key, value in child_styles.items() if value is not None }) return real_styles
def _get_border_properties(w_tbl_borders, style_xpath_mapping): # - Get the cell properties for each direction: 'top', 'right'... # Values are converted to HTML values, size are in 'pt' properties = [] for style, xpath in style_xpath_mapping: prop = {} color = value_of(w_tbl_borders, xpath.format(attr='color')) if color and color != "auto": prop['color'] = "#" + color shadow = value_of(w_tbl_borders, xpath.format(attr='shadow')) if shadow: prop['shadow'] = {"true": True, "false": False}[shadow] space = value_of(w_tbl_borders, xpath.format(attr='space')) if space: # unit is 'pt' prop['space'] = float(space) sz = value_of(w_tbl_borders, xpath.format(attr='sz')) if sz: # convert eighths of a point to 'pt' prop['sz'] = float(sz) / 8 val = value_of(w_tbl_borders, xpath.format(attr='val')) if val: val = "none" if val == "nil" else val # "nil" is "none" -- no border prop['val'] = _BORDER_STYLE_MAPPING.get(val, 'w-' + val) properties.append((style, prop)) return properties
def __init__(self, w_shd): # type: (etree._Element) -> None #: Shading Pattern self.w_val = value_of(w_shd, "@w:val") # required #: Shading Pattern Color self.w_color = value_of(w_shd, "@w:color") #: Shading Background Color self.w_fill = value_of(w_shd, "@w:fill") #: Shading Pattern Theme Color self.w_themeColor = value_of(w_shd, "@w:themeColor") #: Shading Pattern Theme Color Tint self.w_themeTint = value_of(w_shd, "@w:themeTint") #: Shading Pattern Theme Color Shade self.w_themeShade = value_of(w_shd, "@w:themeShade") #: Shading Background Theme Color self.w_themeFill = value_of(w_shd, "@w:themeFill") #: Shading Background Theme Color Tint self.w_themeFillTint = value_of(w_shd, "@w:themeFillTint") #: Shading Background Theme Color Shade self.w_themeFillShade = value_of(w_shd, "@w:themeFillShade")
def __init__(self, w_pg_sz): #: Page Width self.w_w = value_of(w_pg_sz, "@w:w") #: Page Height self.w_h = value_of(w_pg_sz, "@w:h") #: Page Orientation (Possible values are "landscape" and "portrait"). self.w_orient = value_of(w_pg_sz, "@w:orient") #: Printer Paper Code self.w_code = value_of(w_pg_sz, "@w:code")
def transform_tables(self, tree): self._w_styles = etree.parse( self.styles_path) if self.styles_path else None self._w_styles = self._w_styles or value_of(tree, ".//w:styles") for w_tbl in tree.xpath("//w:tbl", namespaces=NS): table = self.parse_table(w_tbl) table_elem = self.builder.generate_table_tree(table) parent = w_tbl.getparent() index = parent.index(w_tbl) parent.insert(index, table_elem) table_elem.tail = w_tbl.tail parent.remove(w_tbl)
def parse_tr(self, w_tr): """ Parse a ``<w:tr>`` element. See: `Table Row Properties <http://officeopenxml.com/WPtableRowProperties.php>`_. :type w_tr: etree._Element :param w_tr: Table element. """ # - w:tblHeader => the current row should be repeated at the top # of each new page on which the table is displayed. # This is a simple boolean property, so you can specify a val attribute of true or false. # # <w:trPr> # <w:tblHeader/> # </w:trPr> # w_tbl_header = value_of(w_tr, "w:trPr/w:tblHeader") if w_tbl_header is not None: w_tbl_header = value_of(w_tr, "w:trPr/w:tblHeader/@w:val", default=u"true") nature = { "true": u"header", "false": u"body", None: u"body" }[w_tbl_header] state = self._state state.row = state.table.rows[state.row_pos] state.row.nature = nature # - w:trHeight => height of the row # # <w:trPr> # <w:trHeight w:val="567"/> # </w:trPr> # w_tr_height = value_of(w_tr, "w:trPr/w:trHeight") if w_tr_height is not None: h_rule = value_of(w_tr, "w:trPr/w:tblHeader/@w:hRule", default="auto") # Possible values are: # - atLeast (height should be at least the value specified), # - exact (height should be exactly the value specified), or # - auto (height is determined based on the height of the contents, so the value is ignored). style = { 'atLeast': u'min-height', 'exact': u'height', 'auto': None }[h_rule] if style: val = value_of(w_tr, "w:trPr/w:tblHeader/@w:val", default="0") # Specifies the row's height, in twentieths of a point. height = float(val) / 20 # pt state.row.styles[style] = "{0:0.2f}pt".format(height) # - w:ins => revision marks: A row can be marked as "inserted". # # <w:trPr> # <w:ins w:id="0" w:author="Laurent Laporte" w:date="2018-11-21T18:08:00Z"/> # </w:trPr> # w_ins = value_of(w_tr, "w:trPr/w:ins") if w_ins is not None: state.row.styles['x-ins'] = True style_xpath_mapping = [ ('x-ins-id', "w:trPr/w:ins/@w:id"), ('x-ins-author', "w:trPr/w:ins/@w:author"), ('x-ins-date', "w:trPr/w:ins/@w:date"), ] for style, xpath in style_xpath_mapping: value = value_of(w_tr, xpath) if value: state.row.styles[style] = value
def parse_tbl(self, w_tbl): """ Parse a ``<w:tbl>`` element. See: `Table Properties <http://officeopenxml.com/WPtableProperties.php>`_. :type w_tbl: etree._Element :param w_tbl: Table element. .. versionchanged:: 0.4.0 The section width and height are now stored in the 'x-sect-size' table style (units in 'pt'). """ style_id = value_of(w_tbl, "w:tblPr/w:tblStyle/@w:val") # - Table and borders are extracted from the style (if possible) # and then from the ``w:tblPr/w:tblBorders`` properties. style_borders = _get_style_borders(self._w_styles, style_id) w_tbl_borders = value_of(w_tbl, 'w:tblPr/w:tblBorders') # - Table borders (frame) and Cell borders (colsep/rowsep) use the "x-cell-" prefix table_borders = _get_table_borders(w_tbl_borders) real_table_borders = style_borders.copy() real_table_borders.update({ key: value for key, value in table_borders.items() if value is not None }) attrs = real_table_borders.copy() # -- Table shading shd = Shd(value_of(w_tbl, 'w:tblPr/w:shd')) attrs.update(shd.styles) # -- Sections: http://officeopenxml.com/WPsection.php # A section's properties are stored in a sectPr element. # For all sections except the last section, the sectPr element is stored as # a child element of the last paragraph in the section. For the last section, # the sectPr is stored as a child element of the body element. w_sect_pr = value_of( w_tbl, 'following::w:p/w:pPr/w:sectPr | following::w:sectPr') pg_sz = PgSz(value_of(w_sect_pr, 'w:pgSz')) attrs.update(pg_sz.styles) # - w:cols -- Specifies the set of columns for the section. # - ``x-sect-cols``: Section column number # Default value is "1" -- useful for @pgwide sect_cols = value_of(w_sect_pr, 'w:cols/@w:num') if sect_cols is None: if w_sect_pr is None: sect_cols = "1" # type: str else: sect_cols = w_sect_pr.xpath('count(w:cols/w:col)', namespaces=NS) # type: float sect_cols = str( int(sect_cols)) if sect_cols else "1" # type: str attrs['x-sect-cols'] = sect_cols # - The HTML ``class`` attribute is not a regular style. # We use the table ``nature``instead. self._state.table = Table(styles=attrs, nature=style_id)
def parse_tc(self, w_tc): """ Parse a ``<w:tc>`` element. See: `Table Cell Properties <http://officeopenxml.com/WPtableCellProperties.php>`_. :type w_tc: etree._Element :param w_tc: Table element. .. versionchanged:: 0.5.1 XML indentation between cell paragraphs is ignored. """ state = self._state # w:gridSpan => number of logical columns across which the cell spans width = int(value_of(w_tc, "w:tcPr/w:gridSpan/@w:val", default=u"1")) # take the colspan into account: state.col_pos += width - 1 # w:vMerge => specifies that the cell is part of a vertically merged set of cells. w_v_merge = value_of(w_tc, "w:tcPr/w:vMerge") if w_v_merge is not None: w_v_merge = value_of(w_tc, "w:tcPr/w:vMerge/@w:val", default=u"continue") if w_v_merge is None: # no merge height = 1 elif w_v_merge == u"continue": # the current cell continues a previously existing merge group state.table.expand((state.col_pos, state.row.row_pos - 1), height=1) height = None elif w_v_merge == u"restart": # the current cell starts a new merge group height = 1 else: raise NotImplementedError(w_v_merge) if height: styles = {} # -- Cell shading shd = Shd(value_of(w_tc, 'w:tcPr/w:shd')) styles.update(shd.styles) # -- Vertical alignment # # w:vAlign => Specifies the vertical alignment for text between the top and bottom margins of the cell. # # Possible values are: # - bottom - Specifies that the text should be vertically aligned to the bottom margin. # - center - Specifies that the text should be vertically aligned to the center of the cell. # - top - Specifies that the text should be vertically aligned to the top margin. w_v_align = value_of(w_tc, "w:tcPr/w:vAlign") if w_v_align is not None: w_v_align = value_of(w_tc, "w:tcPr/w:vAlign/@w:val", default=u"top") # CSS/Properties/vertical-align # valid values: http://www.datypic.com/sc/ooxml/t-w_ST_VerticalJc.html # fmt: off v_align = { "top": u"top", "center": u"middle", "bottom": u"bottom", "both": u"w-both", }[w_v_align] # fmt: on styles["vertical-align"] = v_align # -- Horizontal alignment # # Horizontal alignment is done at paragraph level, inside the cell. # We can calculate the cell alignment base on the paragraph properties, # for instance ``<w:p><w:pPr><w:jc w:val="right"/>``, # see: http://officeopenxml.com/WPalignment.php # # We use the most common alignment for cell alignment. w_p_list = w_tc.xpath("w:p", namespaces=NS) w_jc_counter = collections.Counter( value_of(w_p, "w:pPr/w:jc/@w:val") for w_p in w_p_list) w_jc = w_jc_counter.most_common(1)[0][0] # type: str or None if w_jc is not None: # CSS/Properties/text-align # valid values: http://www.datypic.com/sc/ooxml/t-w_ST_Jc.html align = { "start": u"left", "end": u"right", "left": u"left", "right": u"right", "center": u"center", "both": u"justify", "distribute": u"justify", # "mediumKashida": None, # "numTab": None, # "lowKashida": None, # "thaiDistribute": None }[w_jc] styles["align"] = align # -- Borders w_tc_borders = value_of(w_tc, 'w:tcPr/w:tcBorders') cell_borders = _get_table_borders(w_tc_borders) styles.update(cell_borders) # todo: calculate the ``@rotate`` attribute. content = w_tc.xpath('w:p | w:tbl', namespaces=NS) # ignore the *tail* (if the XML is indented) for node in content: node.tail = None state.row.insert_cell(content, width=width, height=height, styles=styles)