Esempio n. 1
0
def test_build_tbl__no_seq():
    builder = FormexBuilder()
    table1 = Table()
    table1.rows[1].insert_cell(u"text1")
    table1_elem = builder.build_tbl(table1)
    table2 = Table()
    table2.rows[1].insert_cell(u"text2")
    table2_elem = builder.build_tbl(table2)
    assert table1_elem.attrib['NO.SEQ'] == u"0001"
    assert table2_elem.attrib['NO.SEQ'] == u"0002"
Esempio n. 2
0
def test_build_cell__align(cell_styles, expected):
    builder = CalsBuilder()

    # -- create a minimal <row> element
    row_elem = etree.XML("<row/>")

    # -- setup a minimal cell
    p_elem = etree.XML("<p>text</p>")
    cell = Cell([p_elem], x=1, y=1, styles=cell_styles)
    builder.setup_table(Table([cell]))

    # -- build the cell
    builder.build_cell(row_elem, cell)

    # -- check the '<entry>' attributes
    entry_elem = row_elem[0]  # type: ElementType
    assert entry_elem.tag == u"entry"

    # we don't want to check @colsep/@rowsep here
    actual = dict(entry_elem.attrib)
    actual.pop("colsep", None)
    actual.pop("rowsep", None)

    assert actual == expected
    assert entry_elem[0] == p_elem
Esempio n. 3
0
def test_build_cell__with_sep(cell_styles, expected):
    # .. note::
    #
    #   CALS default value for @colsep and @rowsep is "1".

    builder = CalsBuilder()

    # -- create a minimal <row> element
    row_elem = etree.XML("<row/>")

    # -- setup a minimal cell
    p_elem = etree.XML("<p>text</p>")
    cell_x1_y1 = Cell([p_elem], x=1, y=1, styles=cell_styles)
    cell_x2_y1 = Cell([p_elem], x=2, y=1, styles=cell_styles)
    cell_x1_y2 = Cell([p_elem], x=1, y=2, styles=cell_styles)
    cell_x2_y2 = Cell([p_elem], x=2, y=2, styles=cell_styles)
    builder.setup_table(Table([cell_x1_y1, cell_x2_y1, cell_x1_y2,
                               cell_x2_y2]))

    # -- build the cell
    builder.build_cell(row_elem, cell_x1_y1)

    # -- check the '<entry>' attributes
    entry_elem = row_elem[0]  # type: ElementType
    assert entry_elem.tag == u"entry"
    assert entry_elem.attrib == expected
    assert entry_elem[0] == p_elem
Esempio n. 4
0
def test_build_title__subtitle():
    table = Table()
    content = [P(u"TITLE"), P(u"Subtitle 1"), P(u"Subtitle 2")]
    table.rows[1].insert_cell(content, styles={"align": "center"})

    builder = FormexBuilder()
    tbl_elem = TBL()
    builder.build_title(tbl_elem, table.rows[0])

    # -- check the '<TITLE>' attributes
    title_elem = tbl_elem[0]  # type: etree._Element
    xml_parser = etree.XMLParser(remove_blank_text=True)
    # fmt: off
    expected = etree.XML(u"""\
    <TITLE>
      <TI>
        <P>TITLE</P>
      </TI>
      <STI>
        <P>Subtitle 1</P>
        <P>Subtitle 2</P>
      </STI>
    </TITLE>""",
                         parser=xml_parser)
    # fmt: on

    diff_list = xmldiff.main.diff_trees(title_elem, expected)
    if diff_list:
        print(etree.tounicode(title_elem, pretty_print=True, with_tail=False),
              file=sys.stderr)
        assert diff_list == []
Esempio n. 5
0
def test_fill_missing_cells():
    # fmt: off
    table = Table()
    table.rows[1].insert_cell("one")
    table.rows[1].insert_cell("two")
    table.rows[1].insert_cell("three")
    table.rows[1].insert_cell("four", height=2)
    table.rows[2].insert_cell("un-deux", width=2)

    table.fill_missing(table.bounding_box, "???")
    text = str(table)
    assert text == textwrap.dedent("""\
    +-----------+-----------+-----------+-----------+
    |    one    |    two    |   three   |   four    |
    +-----------------------+-----------|           |
    |  un-deux              |    ???    |           |
    +-----------------------+-----------+-----------+""")
Esempio n. 6
0
def test_build_tbl__empty_cell():
    builder = FormexBuilder()
    table1 = Table()
    table1.rows[1].insert_cell(u"")
    table1_elem = builder.build_tbl(table1)
    cell_elem = table1_elem.xpath('//CELL')[0]
    assert len(cell_elem) == 1
    assert cell_elem[0].tag == 'IE'
Esempio n. 7
0
def test_colspan():
    # fmt: off
    table = Table()
    table.cols[1].insert_cell("one")
    table.cols[1].insert_cell("spanned", width=2)
    table.cols[2].insert_cell("two")
    text = str(table)
    assert text == textwrap.dedent("""\
    +-----------+-----------+
    |    one    |    two    |
    +-----------------------+
    |  spanned              |
    +-----------------------+""")
Esempio n. 8
0
def test_rowspan():
    # fmt: off
    table = Table()
    table.rows[1].insert_cell("one")
    table.rows[1].insert_cell("spanned", height=2)
    table.rows[2].insert_cell("two")
    text = str(table)
    assert text == textwrap.dedent("""\
    +-----------+-----------+
    |    one    |  spanned  |
    +-----------|           |
    |    two    |           |
    +-----------+-----------+""")
Esempio n. 9
0
def test_build_cell__body(kwargs, attrib):
    builder = FormexBuilder()

    p_elem = P(u"text")
    cell_x1_y1 = Cell([p_elem], x=1, y=1, **kwargs)
    table = Table([cell_x1_y1])
    builder._table = table

    # -- build the cell
    row_elem = ROW()
    row_y1 = next(iter(table.rows))
    builder.build_cell(row_elem, cell_x1_y1, row_y1)

    # -- check the '<CELL>' attributes
    entry_elem = row_elem[0]  # type: etree._Element
    assert entry_elem.tag == u"CELL"
    assert entry_elem.attrib == attrib
    assert entry_elem[0] == p_elem
Esempio n. 10
0
def test_parse_tc(w_tc_content, expected):
    builder = BaseBuilder()
    parser = OoxmlParser(builder)

    # -- setup a minimal table
    state = parser._state
    state.table = Table()
    state.row = state.table.rows[1]
    state.next_col()

    # -- parse a <w:tc/>
    w_tc = etree.XML(w_tc_content)
    parser.parse_tc(w_tc)

    # -- check the styles
    table = state.table
    cell = table[(1, 1)]
    assert expected == cell.styles
Esempio n. 11
0
def test_build_tgroup__tgroup_sorting(tgroup_sorting, expected_tags):
    # -- create a table with different natures
    table = Table()
    row1 = table.rows[1]
    row2 = table.rows[2]
    row3 = table.rows[3]
    row1.nature = "header"
    row2.nature = "body"
    row3.nature = "footer"
    row1.insert_cell("a")
    row2.insert_cell("b")
    row3.insert_cell("c")

    # -- create a builder
    table_elem = etree.Element("table")
    builder = CalsBuilder(tgroup_sorting=tgroup_sorting)
    builder.setup_table(table)
    builder.build_tgroup(table_elem, table)

    # -- check the tgroup children name and order
    actual_tags = [elem.tag for elem in table_elem.xpath("tgroup/*")]
    assert actual_tags == expected_tags
Esempio n. 12
0
def test_parse_tc(w_tc_content, expected):
    builder = BaseBuilder()
    parser = OoxmlParser(builder)

    # -- setup a minimal table
    state = parser._state
    state.table = Table()
    state.row = state.table.rows[1]
    state.next_col()

    # -- parse a <w:tc/>
    w_tc = etree.XML(w_tc_content)
    parser.parse_tc(w_tc)

    # -- check the styles
    table = state.table
    cell = table[(1, 1)]

    # Ignore cell styles extensions (like 'x-cell-empty').
    actual = {
        k: v
        for k, v in cell.styles.items() if not k.startswith("x-cell-")
    }
    assert expected == actual
Esempio n. 13
0
    def parse_tbl(self, w_tbl):
        """
        Parse a ``<w:tbl>`` element.

        See: `Table Properties <http://officeopenxml.com/WPtableProperties.php>`_.

        :type  w_tbl: etree._Element
        :param w_tbl: Table element.

        .. versionchanged:: 0.4.0
           The section width and height are now stored in the 'x-sect-size' table style (units in 'pt').
        """
        style_id = value_of(w_tbl, "w:tblPr/w:tblStyle/@w:val")

        # - Table and borders are extracted from the style (if possible)
        #   and then from the ``w:tblPr/w:tblBorders`` properties.

        style_borders = _get_style_borders(self._w_styles, style_id)
        w_tbl_borders = value_of(w_tbl, 'w:tblPr/w:tblBorders')

        # - Table borders (frame) and Cell borders (colsep/rowsep) use the "x-cell-" prefix

        table_borders = _get_table_borders(w_tbl_borders)
        real_table_borders = style_borders.copy()
        real_table_borders.update({
            key: value
            for key, value in table_borders.items() if value is not None
        })

        attrs = real_table_borders.copy()

        # -- Table shading
        shd = Shd(value_of(w_tbl, 'w:tblPr/w:shd'))
        attrs.update(shd.styles)

        # -- Sections: http://officeopenxml.com/WPsection.php

        # A section's properties are stored in a sectPr element.
        # For all sections except the last section, the sectPr element is stored as
        # a child element of the last paragraph in the section. For the last section,
        # the sectPr is stored as a child element of the body element.

        w_sect_pr = value_of(
            w_tbl, 'following::w:p/w:pPr/w:sectPr | following::w:sectPr')

        pg_sz = PgSz(value_of(w_sect_pr, 'w:pgSz'))
        attrs.update(pg_sz.styles)

        # - w:cols -- Specifies the set of columns for the section.
        # - ``x-sect-cols``: Section column number
        #   Default value is "1" -- useful for @pgwide
        sect_cols = value_of(w_sect_pr, 'w:cols/@w:num')
        if sect_cols is None:
            if w_sect_pr is None:
                sect_cols = "1"  # type: str
            else:
                sect_cols = w_sect_pr.xpath('count(w:cols/w:col)',
                                            namespaces=NS)  # type: float
                sect_cols = str(
                    int(sect_cols)) if sect_cols else "1"  # type: str
        attrs['x-sect-cols'] = sect_cols

        # - The HTML ``class`` attribute is not a regular style.
        #   We use the table ``nature``instead.

        self._state.table = Table(styles=attrs, nature=style_id)
Esempio n. 14
0
def test_build_tbl__use_cals():
    # see: formex-4/samples/jo-compl-2002C_061/C_2002061EN.01000403.xml

    table = Table(
        styles={
            "border-top": "solid",
            "border-bottom": "solid",
            "x-sect-orient": "landscape",
            "x-sect-cols": "1",
            "background-color": "blue",
            "width": "180",
        })
    table.rows[1].nature = "header"
    table.rows[1].insert_cell([P(u"Expert group")], styles={"align": "center"})
    table.rows[1].insert_cell([P(u"First name and surname of the expert")],
                              styles={"align": "center"})
    table.rows[2].insert_cell([P(u"Control of infectious diseases")])
    table.rows[2].insert_cell([P(u"Michael Angelo BORG")])

    builder = FormexBuilder(use_cals=True, cals_ns=None)
    table_elem = builder.build_tbl(table)

    xml_parser = etree.XMLParser(remove_blank_text=True)

    # fmt: off
    expected = etree.XML(u"""\
    <TBL NO.SEQ="0001" COLS="2" PAGE.SIZE="SINGLE.LANDSCAPE">
      <CORPUS frame="topbot" colsep="0" rowsep="0" orient="land" pgwide="1" bgcolor="blue" width="180.00mm">
        <colspec colname="c1" colnum="1"/>
        <colspec colname="c2" colnum="2"/>
        <ROW TYPE="HEADER">
          <CELL COL="1" align="center">
            <P>Expert group</P>
          </CELL>
          <CELL COL="2" align="center">
            <P>First name and surname of the expert</P>
          </CELL>
        </ROW>
        <ROW>
          <CELL COL="1">
            <P>Control of infectious diseases</P>
          </CELL>
          <CELL COL="2">
            <P>Michael Angelo BORG</P>
          </CELL>
        </ROW>
      </CORPUS>
    </TBL>""",
                         parser=xml_parser)
    # fmt: on

    for elem in table_elem.xpath("//*"):
        elem.text = elem.text or None
    for elem in expected.xpath("//*"):
        elem.text = elem.text or None

    diff_list = xmldiff.main.diff_trees(table_elem, expected)
    if diff_list:
        print(etree.tounicode(table_elem, pretty_print=True, with_tail=False),
              file=sys.stderr)
        assert diff_list == []
Esempio n. 15
0
 def setup_table(self, styles=None, nature=None):
     table = Table(styles=styles, nature=nature)
     self._state.table = table
     return self._state
Esempio n. 16
0
def test_build_tbl__orient(orient, size, expected):
    builder = FormexBuilder()
    table = Table(styles={'x-sect-orient': orient, 'x-sect-size': size})
    table.rows[1].insert_cell(u"text")
    table_elem = builder.build_tbl(table)
    assert table_elem.attrib == expected
Esempio n. 17
0
def test_build_tbl__with_title():
    # see: formex-4/samples/jo-compl-2002C_280/C_2002280EN.01000101.xml

    table = Table()
    table.rows[1].insert_cell([P(u"1 euro =")],
                              width=3,
                              styles={"align": "center"})
    table.rows[2].nature = "header"
    table.rows[2].insert_cell([P()], styles={"x-cell-empty": "true"})
    table.rows[2].insert_cell([P(u"Currency")])
    table.rows[2].insert_cell([P(u"Exchange rate")])
    table.rows[3].insert_cell([P(u"USD")])
    table.rows[3].insert_cell([P(u"US dollar")])
    table.rows[3].insert_cell([P(u"1,0029")])
    table.rows[4].insert_cell([P(u"JPY")])
    table.rows[4].insert_cell([P(u"Japanese yen")])
    table.rows[4].insert_cell([P(u"121,05")])

    builder = FormexBuilder(detect_titles=True)
    table_elem = builder.build_tbl(table)

    xml_parser = etree.XMLParser(remove_blank_text=True)

    # fmt: off
    expected = etree.XML(u"""\
    <TBL COLS="3" NO.SEQ="0001">
      <TITLE>
        <TI>
          <P>1 euro =</P>
        </TI>
        <STI/>
      </TITLE>
      <CORPUS>
        <ROW TYPE="HEADER">
          <CELL COL="1">
            <IE/>
          </CELL>
          <CELL COL="2">
            <P>Currency</P>
          </CELL>
          <CELL COL="3">
            <P>Exchange rate</P>
          </CELL>
        </ROW>
        <ROW>
          <CELL COL="1">
            <P>USD</P>
          </CELL>
          <CELL COL="2">
            <P>US dollar</P>
          </CELL>
          <CELL COL="3">
            <P>1,0029</P>
          </CELL>
        </ROW>
        <ROW>
          <CELL COL="1">
            <P>JPY</P>
          </CELL>
          <CELL COL="2">
            <P>Japanese yen</P>
          </CELL>
          <CELL COL="3">
            <P>121,05</P>
          </CELL>
        </ROW>
      </CORPUS>
    </TBL>""",
                         parser=xml_parser)
    # fmt: on

    for elem in table_elem.xpath("//*"):
        elem.text = elem.text or None
    for elem in expected.xpath("//*"):
        elem.text = elem.text or None

    diff_list = xmldiff.main.diff_trees(table_elem, expected)
    if diff_list:
        print(etree.tounicode(table_elem, pretty_print=True, with_tail=False),
              file=sys.stderr)
        assert diff_list == []
Esempio n. 18
0
def test_build_tbl():
    # see: formex-4/samples/jo-compl-2002C_061/C_2002061EN.01000403.xml

    table = Table()
    table.rows[1].nature = "header"
    table.rows[1].insert_cell([P(u"Expert group")])
    table.rows[1].insert_cell([P(u"First name and surname of the expert")])
    table.rows[2].insert_cell([P(u"Control of infectious diseases")])
    table.rows[2].insert_cell([P(u"Michael Angelo BORG")])
    table.rows[3].insert_cell([P(u"Information society")], height=3)
    table.rows[3].insert_cell([P(u"Tony HEY")])
    table.rows[4].insert_cell([P(u"José L. ENCARNAÇÃO")])
    table.rows[5].insert_cell([P(u"Berit SVENDSEN")])
    table.rows[6].insert_cell([P(u"Controlled thermonuclear fusion")])
    table.rows[6].insert_cell([P(u"Pekka PIRILÄ")])

    builder = FormexBuilder()
    table_elem = builder.build_tbl(table)

    xml_parser = etree.XMLParser(remove_blank_text=True)

    # fmt: off
    expected = etree.XML(u"""\
    <TBL COLS="2" NO.SEQ="0001">
      <CORPUS>
        <ROW TYPE="HEADER">
          <CELL COL="1">
            <P>Expert group</P>
          </CELL>
          <CELL COL="2">
            <P>First name and surname of the expert</P>
          </CELL>
        </ROW>
        <ROW>
          <CELL COL="1">
            <P>Control of infectious diseases</P>
          </CELL>
          <CELL COL="2">
            <P>Michael Angelo BORG</P>
          </CELL>
        </ROW>
        <ROW>
          <CELL COL="1" ROWSPAN="3">
            <P>Information society</P>
          </CELL>
          <CELL COL="2">
            <P>Tony HEY</P>
          </CELL>
        </ROW>
        <ROW>
          <CELL COL="2">
            <P>José L. ENCARNAÇÃO</P>
          </CELL>
        </ROW>
        <ROW>
          <CELL COL="2">
            <P>Berit SVENDSEN</P>
          </CELL>
        </ROW>
        <ROW>
          <CELL COL="1">
            <P>Controlled thermonuclear fusion</P>
          </CELL>
          <CELL COL="2">
            <P>Pekka PIRILÄ</P>
          </CELL>
        </ROW>
      </CORPUS>
    </TBL>""",
                         parser=xml_parser)
    # fmt: on

    for elem in table_elem.xpath("//*"):
        elem.text = elem.text or None
    for elem in expected.xpath("//*"):
        elem.text = elem.text or None

    diff_list = xmldiff.main.diff_trees(table_elem, expected)
    if diff_list:
        print(etree.tounicode(table_elem, pretty_print=True, with_tail=False),
              file=sys.stderr)
        assert diff_list == []
Esempio n. 19
0
def test_build_table():
    # see: formex-4/samples/jo-compl-2002C_061/C_2002061EN.01000403.xml

    table = Table(
        styles={
            "border-top": "solid",
            "border-bottom": "solid",
            "x-sect-orient": "landscape",
            "x-sect-cols": "1",
            "background-color": "yellow",
            "width": "247",
        })
    table.rows[1].nature = "header"
    table.rows[1].insert_cell(u"Expert group")
    table.rows[1].insert_cell(u"First name and surname of the expert")
    table.rows[2].insert_cell(u"Control of infectious diseases")
    table.rows[2].insert_cell(u"Michael Angelo BORG")
    table.rows[3].insert_cell(u"Information society", height=3)
    table.rows[3].insert_cell(u"Tony HEY")
    table.rows[4].insert_cell(u"José L. ENCARNAÇÃO")
    table.rows[5].insert_cell(u"Berit SVENDSEN")
    table.rows[6].insert_cell(u"Controlled thermonuclear fusion")
    table.rows[6].insert_cell(u"Pekka PIRILÄ")

    builder = CalsBuilder()
    table_elem = builder.build_table(table)

    xml_parser = etree.XMLParser(remove_blank_text=True)

    # fmt: off
    expected = etree.XML(u"""\
    <table frame="topbot" colsep="0" rowsep="0" orient="land" pgwide="1" bgcolor="yellow" width="247.00mm">
      <tgroup cols="2">
        <colspec colnum="1" colname="c1"/>
        <colspec colnum="2" colname="c2"/>
        <thead>
          <row>
            <entry>Expert group</entry>
            <entry>First name and surname of the expert</entry>
          </row>
        </thead>
        <tbody>
          <row>
            <entry>Control of infectious diseases</entry>
            <entry>Michael Angelo BORG</entry>
          </row>
          <row>
            <entry morerows="2">Information society</entry>
            <entry>Tony HEY</entry>
          </row>
          <row>
            <entry>José L. ENCARNAÇÃO</entry>
          </row>
          <row>
            <entry>Berit SVENDSEN</entry>
          </row>
          <row>
            <entry>Controlled thermonuclear fusion</entry>
            <entry>Pekka PIRILÄ</entry>
          </row>
        </tbody>
      </tgroup>
    </table>""",
                         parser=xml_parser)
    # fmt: on

    for elem in table_elem.xpath("//*"):
        elem.text = elem.text or None
    for elem in expected.xpath("//*"):
        elem.text = elem.text or None

    diff_list = xmldiff.main.diff_trees(table_elem, expected)
    if diff_list:
        print(etree.tounicode(table_elem, pretty_print=True, with_tail=False),
              file=sys.stderr)
        assert diff_list == []
Esempio n. 20
0
def test_setup_table():
    builder = CalsBuilder()
    table = Table()
    result = builder.setup_table(table)
    assert result == table