def test_merge_multi_page_table_data_different_tables(self): first_part = Table([(0, 100)], [(1000, 1500)]) first_cell = Cell(0, 1000, 100, 1500) first_cell.text = 'FIRST CELL' first_part.cells[0][0] = first_cell second_part = Table([(0, 100)], [(1000, 1500)]) second_cell = Cell(0, 1000, 100, 1500) second_cell.text = 'SECOND CELL' second_part.cells[0][0] = second_cell actual = merge_multi_page_table_data([first_part, second_part]) expected = [[['FIRST CELL']], [['SECOND CELL']]] # two 1:1 tables self.assertListEqual(actual, expected)
def test_table_starts_at_top_false(self): table = Table([(0, 100)], [(1000, 1500)]) table.cells[0][0] = Cell(0, 1000, 100, 1500) self.assertFalse(table_is_first_page_element(table))
def _make_table(page, order): t = Table([], []) t.page = page t.order = order return t
def test_table_starts_at_top_true(self): # x1 x2 y1 y2 table = Table([(0, 100)], [(749, 1000)]) table.cells[0][0] = Cell(0, 749, 100, 1000) self.assertTrue(table_is_first_page_element(table))
def _generate_table(self, table_idx, cols, rows, **kwargs): table = Table(cols, rows) table = table.set_all_edges() pos_errors = [] # TODO: have a single list in place of two directional ones? # sorted on x-coordinate based on reading order i.e. LTR or RTL for direction in ["horizontal"]: for t in self.t_bbox[direction]: indices, error = get_table_index( table, t, direction, split_text=self.split_text, flag_size=self.flag_size, strip_text=self.strip_text, ) if indices[:2] != (-1, -1): pos_errors.append(error) for r_idx, c_idx, text in indices: table.cells[r_idx][c_idx].text = text accuracy = compute_accuracy([[100, pos_errors]]) data = table.data table.df = pd.DataFrame(data) table.shape = table.df.shape whitespace = compute_whitespace(data) table.flavor = "stream" table.accuracy = accuracy table.whitespace = whitespace table.order = table_idx + 1 # table.page = int(os.path.basename(self.rootname).split("page-")[-1]) # for plotting _text = [] _text.extend([(t.left, t.bottom, t.right, t.top) for t in self.horizontal_text]) table._text = _text table._image = None table._segments = None table._textedges = self.textedges return table