Esempio n. 1
0
def _get_style_borders(w_styles, style_id):
    if w_styles is None or style_id is None:
        return {}
    w_style = value_of(w_styles,
                       'w:style[@w:styleId = "{0}"]'.format(style_id))
    if w_style is None:
        return {}

    # - get parent styles (if it exist)
    based_on_id = value_of(w_style, 'w:basedOn/@w:val')
    parent_styles = _get_style_borders(w_styles, based_on_id)

    # - get child styles
    w_tbl_borders = value_of(w_style, 'w:tblPr/w:tblBorders')
    table_borders = _get_table_borders(w_tbl_borders)
    cell_borders = _get_cell_borders(w_tbl_borders)
    child_styles = table_borders.copy()
    child_styles.update(
        {'x-cell-' + key: value
         for key, value in cell_borders.items()})

    # - *child_styles* override *parent_styles*
    real_styles = parent_styles.copy()
    real_styles.update({
        key: value
        for key, value in child_styles.items() if value is not None
    })
    return real_styles
Esempio n. 2
0
def _get_border_properties(w_tbl_borders, style_xpath_mapping):
    # - Get the cell properties for each direction: 'top', 'right'...
    #   Values are converted to HTML values, size are in 'pt'
    properties = []
    for style, xpath in style_xpath_mapping:
        prop = {}
        color = value_of(w_tbl_borders, xpath.format(attr='color'))
        if color and color != "auto":
            prop['color'] = "#" + color
        shadow = value_of(w_tbl_borders, xpath.format(attr='shadow'))
        if shadow:
            prop['shadow'] = {"true": True, "false": False}[shadow]
        space = value_of(w_tbl_borders, xpath.format(attr='space'))
        if space:
            # unit is 'pt'
            prop['space'] = float(space)
        sz = value_of(w_tbl_borders, xpath.format(attr='sz'))
        if sz:
            # convert eighths of a point to 'pt'
            prop['sz'] = float(sz) / 8
        val = value_of(w_tbl_borders, xpath.format(attr='val'))
        if val:
            val = "none" if val == "nil" else val  # "nil" is "none" -- no border
            prop['val'] = _BORDER_STYLE_MAPPING.get(val, 'w-' + val)
        properties.append((style, prop))
    return properties
Esempio n. 3
0
    def __init__(self, w_shd):
        # type: (etree._Element) -> None

        #: Shading Pattern
        self.w_val = value_of(w_shd, "@w:val")  # required

        #: Shading Pattern Color
        self.w_color = value_of(w_shd, "@w:color")

        #: Shading Background Color
        self.w_fill = value_of(w_shd, "@w:fill")

        #: Shading Pattern Theme Color
        self.w_themeColor = value_of(w_shd, "@w:themeColor")

        #: Shading Pattern Theme Color Tint
        self.w_themeTint = value_of(w_shd, "@w:themeTint")

        #: Shading Pattern Theme Color Shade
        self.w_themeShade = value_of(w_shd, "@w:themeShade")

        #: Shading Background Theme Color
        self.w_themeFill = value_of(w_shd, "@w:themeFill")

        #: Shading Background Theme Color Tint
        self.w_themeFillTint = value_of(w_shd, "@w:themeFillTint")

        #: Shading Background Theme Color Shade
        self.w_themeFillShade = value_of(w_shd, "@w:themeFillShade")
Esempio n. 4
0
    def __init__(self, w_pg_sz):
        #: Page Width
        self.w_w = value_of(w_pg_sz, "@w:w")

        #: Page Height
        self.w_h = value_of(w_pg_sz, "@w:h")

        #: Page Orientation (Possible values are "landscape" and "portrait").
        self.w_orient = value_of(w_pg_sz, "@w:orient")

        #: Printer Paper Code
        self.w_code = value_of(w_pg_sz, "@w:code")
Esempio n. 5
0
    def transform_tables(self, tree):
        self._w_styles = etree.parse(
            self.styles_path) if self.styles_path else None
        self._w_styles = self._w_styles or value_of(tree, ".//w:styles")

        for w_tbl in tree.xpath("//w:tbl", namespaces=NS):
            table = self.parse_table(w_tbl)
            table_elem = self.builder.generate_table_tree(table)
            parent = w_tbl.getparent()
            index = parent.index(w_tbl)
            parent.insert(index, table_elem)
            table_elem.tail = w_tbl.tail
            parent.remove(w_tbl)
Esempio n. 6
0
    def parse_tr(self, w_tr):
        """
        Parse a ``<w:tr>`` element.

        See: `Table Row Properties <http://officeopenxml.com/WPtableRowProperties.php>`_.

        :type  w_tr: etree._Element
        :param w_tr: Table element.
        """

        # - w:tblHeader => the current row should be repeated at the top
        #   of each new page on which the table is displayed.
        #   This is a simple boolean property, so you can specify a val attribute of true or false.
        #
        #   <w:trPr>
        #     <w:tblHeader/>
        #   </w:trPr>
        #
        w_tbl_header = value_of(w_tr, "w:trPr/w:tblHeader")
        if w_tbl_header is not None:
            w_tbl_header = value_of(w_tr,
                                    "w:trPr/w:tblHeader/@w:val",
                                    default=u"true")
        nature = {
            "true": u"header",
            "false": u"body",
            None: u"body"
        }[w_tbl_header]
        state = self._state
        state.row = state.table.rows[state.row_pos]
        state.row.nature = nature

        # - w:trHeight => height of the row
        #
        #   <w:trPr>
        #     <w:trHeight w:val="567"/>
        #   </w:trPr>
        #
        w_tr_height = value_of(w_tr, "w:trPr/w:trHeight")
        if w_tr_height is not None:
            h_rule = value_of(w_tr,
                              "w:trPr/w:tblHeader/@w:hRule",
                              default="auto")
            # Possible values are:
            # - atLeast (height should be at least the value specified),
            # - exact (height should be exactly the value specified), or
            # - auto (height is determined based on the height of the contents, so the value is ignored).
            style = {
                'atLeast': u'min-height',
                'exact': u'height',
                'auto': None
            }[h_rule]
            if style:
                val = value_of(w_tr, "w:trPr/w:tblHeader/@w:val", default="0")
                # Specifies the row's height, in twentieths of a point.
                height = float(val) / 20  # pt
                state.row.styles[style] = "{0:0.2f}pt".format(height)

        # - w:ins => revision marks: A row can be marked as "inserted".
        #
        #   <w:trPr>
        #     <w:ins w:id="0" w:author="Laurent Laporte" w:date="2018-11-21T18:08:00Z"/>
        #   </w:trPr>
        #
        w_ins = value_of(w_tr, "w:trPr/w:ins")
        if w_ins is not None:
            state.row.styles['x-ins'] = True
            style_xpath_mapping = [
                ('x-ins-id', "w:trPr/w:ins/@w:id"),
                ('x-ins-author', "w:trPr/w:ins/@w:author"),
                ('x-ins-date', "w:trPr/w:ins/@w:date"),
            ]
            for style, xpath in style_xpath_mapping:
                value = value_of(w_tr, xpath)
                if value:
                    state.row.styles[style] = value
Esempio n. 7
0
    def parse_tbl(self, w_tbl):
        """
        Parse a ``<w:tbl>`` element.

        See: `Table Properties <http://officeopenxml.com/WPtableProperties.php>`_.

        :type  w_tbl: etree._Element
        :param w_tbl: Table element.

        .. versionchanged:: 0.4.0
           The section width and height are now stored in the 'x-sect-size' table style (units in 'pt').
        """
        style_id = value_of(w_tbl, "w:tblPr/w:tblStyle/@w:val")

        # - Table and borders are extracted from the style (if possible)
        #   and then from the ``w:tblPr/w:tblBorders`` properties.

        style_borders = _get_style_borders(self._w_styles, style_id)
        w_tbl_borders = value_of(w_tbl, 'w:tblPr/w:tblBorders')

        # - Table borders (frame) and Cell borders (colsep/rowsep) use the "x-cell-" prefix

        table_borders = _get_table_borders(w_tbl_borders)
        real_table_borders = style_borders.copy()
        real_table_borders.update({
            key: value
            for key, value in table_borders.items() if value is not None
        })

        attrs = real_table_borders.copy()

        # -- Table shading
        shd = Shd(value_of(w_tbl, 'w:tblPr/w:shd'))
        attrs.update(shd.styles)

        # -- Sections: http://officeopenxml.com/WPsection.php

        # A section's properties are stored in a sectPr element.
        # For all sections except the last section, the sectPr element is stored as
        # a child element of the last paragraph in the section. For the last section,
        # the sectPr is stored as a child element of the body element.

        w_sect_pr = value_of(
            w_tbl, 'following::w:p/w:pPr/w:sectPr | following::w:sectPr')

        pg_sz = PgSz(value_of(w_sect_pr, 'w:pgSz'))
        attrs.update(pg_sz.styles)

        # - w:cols -- Specifies the set of columns for the section.
        # - ``x-sect-cols``: Section column number
        #   Default value is "1" -- useful for @pgwide
        sect_cols = value_of(w_sect_pr, 'w:cols/@w:num')
        if sect_cols is None:
            if w_sect_pr is None:
                sect_cols = "1"  # type: str
            else:
                sect_cols = w_sect_pr.xpath('count(w:cols/w:col)',
                                            namespaces=NS)  # type: float
                sect_cols = str(
                    int(sect_cols)) if sect_cols else "1"  # type: str
        attrs['x-sect-cols'] = sect_cols

        # - The HTML ``class`` attribute is not a regular style.
        #   We use the table ``nature``instead.

        self._state.table = Table(styles=attrs, nature=style_id)
Esempio n. 8
0
    def parse_tc(self, w_tc):
        """
        Parse a ``<w:tc>`` element.

        See: `Table Cell Properties <http://officeopenxml.com/WPtableCellProperties.php>`_.

        :type  w_tc: etree._Element
        :param w_tc: Table element.

        .. versionchanged:: 0.5.1
           XML indentation between cell paragraphs is ignored.
        """
        state = self._state

        # w:gridSpan => number of logical columns across which the cell spans
        width = int(value_of(w_tc, "w:tcPr/w:gridSpan/@w:val", default=u"1"))

        # take the colspan into account:
        state.col_pos += width - 1

        # w:vMerge => specifies that the cell is part of a vertically merged set of cells.
        w_v_merge = value_of(w_tc, "w:tcPr/w:vMerge")
        if w_v_merge is not None:
            w_v_merge = value_of(w_tc,
                                 "w:tcPr/w:vMerge/@w:val",
                                 default=u"continue")
        if w_v_merge is None:
            # no merge
            height = 1
        elif w_v_merge == u"continue":
            # the current cell continues a previously existing merge group
            state.table.expand((state.col_pos, state.row.row_pos - 1),
                               height=1)
            height = None
        elif w_v_merge == u"restart":
            # the current cell starts a new merge group
            height = 1
        else:
            raise NotImplementedError(w_v_merge)

        if height:
            styles = {}

            # -- Cell shading
            shd = Shd(value_of(w_tc, 'w:tcPr/w:shd'))
            styles.update(shd.styles)

            # -- Vertical alignment
            #
            # w:vAlign => Specifies the vertical alignment for text between the top and bottom margins of the cell.
            #
            # Possible values are:
            # - bottom - Specifies that the text should be vertically aligned to the bottom margin.
            # - center - Specifies that the text should be vertically aligned to the center of the cell.
            # - top - Specifies that the text should be vertically aligned to the top margin.
            w_v_align = value_of(w_tc, "w:tcPr/w:vAlign")
            if w_v_align is not None:
                w_v_align = value_of(w_tc,
                                     "w:tcPr/w:vAlign/@w:val",
                                     default=u"top")
                # CSS/Properties/vertical-align
                # valid values: http://www.datypic.com/sc/ooxml/t-w_ST_VerticalJc.html
                # fmt: off
                v_align = {
                    "top": u"top",
                    "center": u"middle",
                    "bottom": u"bottom",
                    "both": u"w-both",
                }[w_v_align]
                # fmt: on
                styles["vertical-align"] = v_align

            # -- Horizontal alignment
            #
            # Horizontal alignment is done at paragraph level, inside the cell.
            # We can calculate the cell alignment base on the paragraph properties,
            # for instance ``<w:p><w:pPr><w:jc w:val="right"/>``,
            # see: http://officeopenxml.com/WPalignment.php
            #
            # We use the most common alignment for cell alignment.
            w_p_list = w_tc.xpath("w:p", namespaces=NS)
            w_jc_counter = collections.Counter(
                value_of(w_p, "w:pPr/w:jc/@w:val") for w_p in w_p_list)
            w_jc = w_jc_counter.most_common(1)[0][0]  # type: str or None
            if w_jc is not None:
                # CSS/Properties/text-align
                # valid values: http://www.datypic.com/sc/ooxml/t-w_ST_Jc.html
                align = {
                    "start": u"left",
                    "end": u"right",
                    "left": u"left",
                    "right": u"right",
                    "center": u"center",
                    "both": u"justify",
                    "distribute": u"justify",
                    # "mediumKashida": None,
                    # "numTab": None,
                    # "lowKashida": None,
                    # "thaiDistribute": None
                }[w_jc]
                styles["align"] = align

            # -- Borders
            w_tc_borders = value_of(w_tc, 'w:tcPr/w:tcBorders')
            cell_borders = _get_table_borders(w_tc_borders)
            styles.update(cell_borders)

            # todo: calculate the ``@rotate`` attribute.

            content = w_tc.xpath('w:p | w:tbl', namespaces=NS)
            # ignore the *tail* (if the XML is indented)
            for node in content:
                node.tail = None
            state.row.insert_cell(content,
                                  width=width,
                                  height=height,
                                  styles=styles)