def callTextract(bucketName, objectName, detectText, detectForms, detectTables): textract = AwsHelper().getClient('textract') if(not detectForms and not detectTables): response = textract.detect_document_text( Document={ 'S3Object': { 'Bucket': bucketName, 'Name': objectName } } ) else: features = [] if(detectTables): features.append("TABLES") if(detectForms): features.append("FORMS") response = textract.analyze_document( Document={ 'S3Object': { 'Bucket': bucketName, 'Name': objectName } }, FeatureTypes=features ) return response
def callTextract(bucketName, objectName): textract = AwsHelper().getClient('textract') response = textract.detect_document_text( Document={'S3Object': { 'Bucket': bucketName, 'Name': objectName }}) return response
def _callTextract(self): textract = AwsHelper().getClient('textract', self.inputParameters.awsRegion) if (not self.inputParameters.detectForms and not self.inputParameters.detectTables): if (self.inputParameters.isLocalDocument): with open(self.inputParameters.documentPath, 'rb') as document: imageData = document.read() imageBytes = bytearray(imageData) response = textract.detect_document_text( Document={'Bytes': imageBytes}) else: response = textract.detect_document_text( Document={ 'S3Object': { 'Bucket': self.inputParameters.bucketName, 'Name': self.inputParameters.documentPath } }) else: features = [] if (self.inputParameters.detectTables): features.append("TABLES") if (self.inputParameters.detectForms): features.append("FORMS") if (self.inputParameters.isLocalDocument): with open(self.inputParameters.documentPath, 'rb') as document: imageData = document.read() imageBytes = bytearray(imageData) response = textract.analyze_document( Document={'Bytes': imageBytes}, FeatureTypes=features) else: response = textract.analyze_document(Document={ 'S3Object': { 'Bucket': self.inputParameters.bucketName, 'Name': self.inputParameters.documentPath } }, FeatureTypes=features) return response
def callTextract(bucketName, objectName, detectText, detectForms, detectTables): textract = AwsHelper().getClient("textract") if not detectForms and not detectTables: response = textract.detect_document_text( Document={"S3Object": {"Bucket": bucketName, "Name": objectName}} ) else: features = [] if detectTables: features.append("TABLES") if detectForms: features.append("FORMS") response = textract.analyze_document( Document={"S3Object": {"Bucket": bucketName, "Name": objectName}}, FeatureTypes=features, ) return response