def processDocument(self, ips, i, document): print("\nTextracting Document # {}: {}".format(i, document)) print('=' * (len(document) + 30)) # Get document textracted dp = DocumentProcessor(ips["bucketName"], document, ips["awsRegion"], ips["text"], ips["forms"], ips["tables"]) response = dp.run() print("Recieved Textract response...") #FileHelper.writeToFile("temp-response.json", json.dumps(response)) #Generate output files print("Generating output...") name, ext = FileHelper.getFileNameAndExtension(document) opg = OutputGenerator( response, os.path.join(ips["output"], "{}-{}".format(name, ext)), ips["forms"], ips["tables"]) opg.run() if (ips["insights"] or ips["medical-insights"] or ips["translate"]): opg.generateInsights(ips["insights"], ips["medical-insights"], ips["translate"], ips["awsRegion"]) print("{} textracted successfully.".format(document))
def processDocument(self, ips, i, document): print("\nTextracting Document # {}: {}".format(i, document)) print('=' * (len(document)+30)) # Get document textracted dp = DocumentProcessor(ips["bucketName"], document, ips["awsRegion"], ips["text"], ips["forms"], ips["tables"]) response = dp.run() blocks=[] for docs in response: blockList= docs['Blocks'] for block in blockList: blocks.append(block) blocks_map = {} table_blocks=[] for block in blocks: blocks_map[block['Id']] = block if block['BlockType'] == "TABLE": table_blocks.append(block) if len(table_blocks) <= 0: return "<b> NO Table FOUND </b>" csv = '' for index, table in enumerate(table_blocks): csv += self.table_csv(table, blocks_map, index +1) csv += '\n\n' return csv