def processImage(documentId, bucketName, objectName, callerId): response = callTextract(bucketName, objectName) print("Generating output for documentId: {}".format(documentId)) opg = OutputGenerator(documentId=documentId, response=response, bucketName=textractBucketName, objectName=objectName, forms=False, tables=False) tagging = "documentId={}".format(documentId) opg.writeTextractOutputs(taggingStr=tagging) lineage_client.recordLineage({ "documentId": documentId, "callerId": callerId, "sourceBucketName": bucketName, "targetBucketName": textractBucketName, "sourceFileName": objectName, "targetFileName": objectName })
def processRequest(request): output = "" status = request['jobStatus'] jobId = request['jobId'] jobTag = request['jobTag'] jobAPI = request['jobAPI'] bucketName = request['bucketName'] objectName = request['objectName'] pipeline_client.body = { "documentId": jobTag, "bucketName": bucketName, "objectName": objectName, "stage": PIPELINE_STAGE } if status == 'FAILED': pipeline_client.stageFailed( "Textract Analysis didn't complete successfully") raise Exception( "Textract job for document ID {}; bucketName {} fileName {}; failed during Textract analysis. Please double check the document quality" .format(jobTag, bucketName, objectName)) pipeline_client.stageInProgress() try: pages = getJobResults(jobAPI, jobId) except Exception as e: pipeline_client.stageFailed() raise (e) print("Result pages received: {}".format(len(pages))) detectForms = False detectTables = False if (jobAPI == "StartDocumentAnalysis"): detectForms = True detectTables = True try: opg = OutputGenerator(documentId=jobTag, response=pages, bucketName=textractBucketName, objectName=objectName, forms=detectForms, tables=detectTables) except Exception as e: pipeline_client.stageFailed( "Could not convert results from Textract into processable object. Try uploading again." ) raise (e) tagging = "documentId={}".format(jobTag) opg.writeTextractOutputs(taggingStr=tagging) lineage_client.recordLineage({ "documentId": jobTag, "callerId": request["callerId"], "sourceBucketName": bucketName, "targetBucketName": textractBucketName, "sourceFileName": objectName, "targetFileName": objectName }) output = "Processed -> Document: {}, Object: {}/{} processed.".format( jobTag, bucketName, objectName) pipeline_client.stageSucceeded() print(output) return {'statusCode': 200, 'body': output}