Ejemplo n.º 1
0
def callTextract(bucketName, objectName, detectText, detectForms, detectTables):
    textract = AwsHelper().getClient('textract')
    if(not detectForms and not detectTables):
        response = textract.detect_document_text(
            Document={
                'S3Object': {
                    'Bucket': bucketName,
                    'Name': objectName
                }
            }
        )
    else:
        features  = []
        if(detectTables):
            features.append("TABLES")
        if(detectForms):
            features.append("FORMS")

        response = textract.analyze_document(
            Document={
                'S3Object': {
                    'Bucket': bucketName,
                    'Name': objectName
                }
            },
            FeatureTypes=features
        )

    return response
Ejemplo n.º 2
0
    def _callTextract(self):
        textract = AwsHelper().getClient('textract',
                                         self.inputParameters.awsRegion)
        if (not self.inputParameters.detectForms
                and not self.inputParameters.detectTables):
            if (self.inputParameters.isLocalDocument):
                with open(self.inputParameters.documentPath, 'rb') as document:
                    imageData = document.read()
                    imageBytes = bytearray(imageData)

                response = textract.detect_document_text(
                    Document={'Bytes': imageBytes})
            else:
                response = textract.detect_document_text(
                    Document={
                        'S3Object': {
                            'Bucket': self.inputParameters.bucketName,
                            'Name': self.inputParameters.documentPath
                        }
                    })
        else:
            features = []
            if (self.inputParameters.detectTables):
                features.append("TABLES")
            if (self.inputParameters.detectForms):
                features.append("FORMS")

            if (self.inputParameters.isLocalDocument):
                with open(self.inputParameters.documentPath, 'rb') as document:
                    imageData = document.read()
                    imageBytes = bytearray(imageData)

                response = textract.analyze_document(
                    Document={'Bytes': imageBytes}, FeatureTypes=features)
            else:
                response = textract.analyze_document(Document={
                    'S3Object': {
                        'Bucket': self.inputParameters.bucketName,
                        'Name': self.inputParameters.documentPath
                    }
                },
                                                     FeatureTypes=features)

        return response
Ejemplo n.º 3
0
def callTextract(bucketName, objectName, detectText, detectForms, detectTables):
    textract = AwsHelper().getClient("textract")
    if not detectForms and not detectTables:
        response = textract.detect_document_text(
            Document={"S3Object": {"Bucket": bucketName, "Name": objectName}}
        )
    else:
        features = []
        if detectTables:
            features.append("TABLES")
        if detectForms:
            features.append("FORMS")

        response = textract.analyze_document(
            Document={"S3Object": {"Bucket": bucketName, "Name": objectName}},
            FeatureTypes=features,
        )

    return response