Python DocumentProcessor Examples

Programming Language: Python

Namespace/Package Name: tdp

Examples at hotexamples.com: 2

Python DocumentProcessor - 2 examples found. These are the top rated real world Python examples of tdp.DocumentProcessor extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

DocumentProcessor(2)

run(2)

Frequently Used Methods

DocumentProcessor (2)

run (2)

Example #1

Show file

File: textractor.py Project: tbouchik/amazon-textract-textractor

    def processDocument(self, ips, i, document):
        print("\nTextracting Document # {}: {}".format(i, document))
        print('=' * (len(document) + 30))

        # Get document textracted
        dp = DocumentProcessor(ips["bucketName"], document, ips["awsRegion"],
                               ips["text"], ips["forms"], ips["tables"])
        response = dp.run()
        print("Recieved Textract response...")

        #FileHelper.writeToFile("temp-response.json", json.dumps(response))

        #Generate output files
        print("Generating output...")
        name, ext = FileHelper.getFileNameAndExtension(document)
        opg = OutputGenerator(
            response, os.path.join(ips["output"], "{}-{}".format(name, ext)),
            ips["forms"], ips["tables"])
        opg.run()

        if (ips["insights"] or ips["medical-insights"] or ips["translate"]):
            opg.generateInsights(ips["insights"], ips["medical-insights"],
                                 ips["translate"], ips["awsRegion"])

        print("{} textracted successfully.".format(document))

Example #2

Show file

File: textractor.py Project: marker-aman/Text_Extaction

    def processDocument(self, ips, i, document):
        print("\nTextracting Document # {}: {}".format(i, document))
        print('=' * (len(document)+30))

        # Get document textracted
        dp = DocumentProcessor(ips["bucketName"], document, ips["awsRegion"], ips["text"], ips["forms"], ips["tables"])
        response = dp.run()
        blocks=[]
        for docs in response:
            blockList= docs['Blocks']
            for block in blockList:
                 blocks.append(block)

        blocks_map = {}
        table_blocks=[]

        for block in blocks:
            blocks_map[block['Id']] = block
            if block['BlockType'] == "TABLE":
                table_blocks.append(block)
        if len(table_blocks) <= 0:
            return "<b> NO Table FOUND </b>"

        csv = ''
        for index, table in enumerate(table_blocks):
            csv += self.table_csv(table, blocks_map, index +1)
            csv += '\n\n'

        return csv