def _outputFullTable(self,pages): csvData = [] for page in self.document.pages: for table in page.tables: csvRow = [] #csvRow.append("Table") csvData.append(csvRow) for row in table.rows: csvRow = [] for cell in row.cells: csvRow.append(cell.text) csvData.append(csvRow) csvData.append([]) csvData.append([]) opath = "{}page-{}-tables.csv".format(self.outputPath, 'Full') S3Helper.writeCSVRaw(csvData, self.bucketName, opath)
def _outputTable(self, page, p): csvData = [] for table in page.tables: csvRow = [] csvRow.append("Table") csvData.append(csvRow) for row in table.rows: csvRow = [] for cell in row.cells: csvRow.append(cell.text) csvData.append(csvRow) csvData.append([]) csvData.append([]) opath = "{}page-{}-tables.csv".format(self.outputPath, p) S3Helper.writeCSVRaw(csvData, self.bucketName, opath) self.saveItem(self.documentId, "page-{}-Tables".format(p), opath)
def _outputTable(self, page, p, no_write=False): csvData = [] for table in page.tables: csvRow = [] csvRow.append("Table") csvData.append(csvRow) for row in table.rows: csvRow = [] for cell in row.cells: csvRow.append(cell.text) csvData.append(csvRow) csvData.append([]) csvData.append([]) if no_write: return csvData else: opath = "{}/page-{}/tables.csv".format(self.outputPath, p) S3Helper.writeCSVRaw(csvData, self.bucketName, opath)
def _outputTable(self, page, p): page_number = self.metadata['page_number'] csvData = [] for table in page.tables: csvRow = [] csvRow.append("Table") csvData.append(csvRow) for row in table.rows: csvRow = [] for cell in row.cells: csvRow.append(cell.text) csvData.append(csvRow) csvData.append([]) csvData.append([]) opath = "{}page-{}-tables.csv".format(self.outputPath, page_number) S3Helper.writeCSVRaw(csvData, self.bucketName, opath) return opath