예제 #1
0
파일: stream.py 프로젝트: vijuc895/camelot
    def _generate_table(self, table_idx, cols, rows, **kwargs):
        table = Table(cols, rows)
        table = table.set_all_edges()

        pos_errors = []
        # TODO: have a single list in place of two directional ones?
        # sorted on x-coordinate based on reading order i.e. LTR or RTL
        for direction in ["horizontal"]:
            for t in self.t_bbox[direction]:
                indices, error = get_table_index(
                    table,
                    t,
                    direction,
                    split_text=self.split_text,
                    flag_size=self.flag_size,
                    strip_text=self.strip_text,
                )
                if indices[:2] != (-1, -1):
                    pos_errors.append(error)
                    for r_idx, c_idx, text in indices:
                        table.cells[r_idx][c_idx].text = text
        accuracy = compute_accuracy([[100, pos_errors]])

        data = table.data
        table.df = pd.DataFrame(data)
        table.shape = table.df.shape

        whitespace = compute_whitespace(data)
        table.flavor = "stream"
        table.accuracy = accuracy
        table.whitespace = whitespace
        table.order = table_idx + 1
        # table.page = int(os.path.basename(self.rootname).split("page-")[-1])

        # for plotting
        _text = []
        _text.extend([(t.left, t.bottom, t.right, t.top) for t in self.horizontal_text])
        table._text = _text
        table._image = None
        table._segments = None
        table._textedges = self.textedges

        return table
예제 #2
0
 def _make_table(page, order):
     t = Table([], [])
     t.page = page
     t.order = order
     return t