def processDocument(self, ips, i, document): print("\nTextracting Document # {}: {}".format(i, document)) print('=' * (len(document) + 30)) # Get document textracted dp = DocumentProcessor(ips["bucketName"], document, ips["awsRegion"], ips["text"], ips["forms"], ips["tables"]) response = dp.run() print("Recieved Textract response...") #FileHelper.writeToFile("temp-response.json", json.dumps(response)) #Generate output files print("Generating output...") name, ext = FileHelper.getFileNameAndExtension(document) opg = OutputGenerator( response, os.path.join(ips["output"], "{}-{}".format(name, ext)), ips["forms"], ips["tables"]) opg.run() if (ips["insights"] or ips["medical-insights"] or ips["translate"]): opg.generateInsights(ips["insights"], ips["medical-insights"], ips["translate"], ips["awsRegion"]) print("{} textracted successfully.".format(document))
def generateOutput(filePath, response): print("Generating output...") name, ext = FileHelper.getFileNameAndExtension(filePath) opg = OutputGenerator(response, "{}-v2-{}".format(name, ext), True, True) opg.run() opg.generateInsights(True, True, 'es', 'us-east-1')