def test_merge_multi_page_table_data_different_tables(self):
        first_part = Table([(0, 100)], [(1000, 1500)])
        first_cell = Cell(0, 1000, 100, 1500)
        first_cell.text = 'FIRST CELL'
        first_part.cells[0][0] = first_cell

        second_part = Table([(0, 100)], [(1000, 1500)])
        second_cell = Cell(0, 1000, 100, 1500)
        second_cell.text = 'SECOND CELL'
        second_part.cells[0][0] = second_cell

        actual = merge_multi_page_table_data([first_part, second_part])
        expected = [[['FIRST CELL']], [['SECOND CELL']]] # two 1:1 tables
        self.assertListEqual(actual, expected)
    def test_table_starts_at_top_false(self):
        table = Table([(0, 100)], [(1000, 1500)])
        table.cells[0][0] = Cell(0, 1000, 100, 1500)

        self.assertFalse(table_is_first_page_element(table))
Esempio n. 3
0
 def _make_table(page, order):
     t = Table([], [])
     t.page = page
     t.order = order
     return t
    def test_table_starts_at_top_true(self):
        #               x1 x2       y1   y2
        table = Table([(0, 100)], [(749, 1000)])
        table.cells[0][0] = Cell(0, 749, 100, 1000)

        self.assertTrue(table_is_first_page_element(table))
Esempio n. 5
0
    def _generate_table(self, table_idx, cols, rows, **kwargs):
        table = Table(cols, rows)
        table = table.set_all_edges()

        pos_errors = []
        # TODO: have a single list in place of two directional ones?
        # sorted on x-coordinate based on reading order i.e. LTR or RTL
        for direction in ["horizontal"]:
            for t in self.t_bbox[direction]:
                indices, error = get_table_index(
                    table,
                    t,
                    direction,
                    split_text=self.split_text,
                    flag_size=self.flag_size,
                    strip_text=self.strip_text,
                )
                if indices[:2] != (-1, -1):
                    pos_errors.append(error)
                    for r_idx, c_idx, text in indices:
                        table.cells[r_idx][c_idx].text = text
        accuracy = compute_accuracy([[100, pos_errors]])

        data = table.data
        table.df = pd.DataFrame(data)
        table.shape = table.df.shape

        whitespace = compute_whitespace(data)
        table.flavor = "stream"
        table.accuracy = accuracy
        table.whitespace = whitespace
        table.order = table_idx + 1
        # table.page = int(os.path.basename(self.rootname).split("page-")[-1])

        # for plotting
        _text = []
        _text.extend([(t.left, t.bottom, t.right, t.top) for t in self.horizontal_text])
        table._text = _text
        table._image = None
        table._segments = None
        table._textedges = self.textedges

        return table