def lambda_handler(event, context): print("Comprehend Event: {}".format(event)) bucketName = event['Records'][0]['s3']['bucket']['name'] objectName = urllib.parse.unquote_plus( event['Records'][0]['s3']['object']['key']) callerId = context.invoked_function_arn assert (FileHelper().getFileNameAndExtension(objectName.lower()) == ( 'fullresponse', 'json' )), "File detected does not match expected format: 'fullresponse.json'" runComprehend(bucketName, objectName, callerId)
def processRequest(request): output = "" logger.info("request: {}".format(request)) up = urlparse(request["s3uri"], allow_fragments=False) accountid = request["accountId"] jobid = request["jobId"] bucketName = up.netloc objectkey = up.path.lstrip('/') basePrefixPath = objectkey + accountid + "-TranslateText-" + jobid + "/" languageCode = request["langCode"] logger.debug("Base Prefix Path:{}".format(basePrefixPath)) captions = Captions() #filter only the delimited files with .delimited suffix objs = S3Helper().getFilteredFileNames(bucketName, basePrefixPath, ["delimited"]) for obj in objs: try: #Read the Delimited file contents content = S3Helper().readFromS3(bucketName, obj) fileName = FileHelper().getFileName(obj) sourceFileName = FileHelper().getFileName( obj.replace("{}.".format(languageCode), "")) logger.debug("SourceFileKey:{}.processed".format(sourceFileName)) soureFileKey = "input/{}.processed".format(sourceFileName) vttObject = {} vttObject["Bucket"] = bucketName vttObject["Key"] = soureFileKey captions_list = [] #Based on the file format, call the right method to load the file as python object if (fileName.endswith("vtt")): captions_list = captions.vttToCaptions(vttObject) elif (fileName.endswith("srt")): captions_list = captions.srtToCaptions(vttObject) # Replace the text captions with the translated content translatedCaptionsList = captions.DelimitedToWebCaptions( captions_list, content, "<span>", 15) translatedText = "" # Recreate the Caption files in VTT or SRT format if (fileName.endswith("vtt")): translatedText = captions.captionsToVTT(translatedCaptionsList) elif (fileName.endswith("srt")): translatedText = captions.captionsToSRT(translatedCaptionsList) logger.debug(translatedText) logger.debug(content) newObjectKey = "output/{}".format(fileName) # Write the VTT or SRT file into the output S3 folder S3Helper().writeToS3(str(translatedText), bucketName, newObjectKey) output = "Output Object: {}/{}".format(bucketName, newObjectKey) logger.debug(output) except ClientError as e: logger.error("An error occured with S3 bucket operations: %s" % e) except: e = sys.exc_info()[0] logger.error("Error occured processing the captions file: %s" % e) objs = S3Helper().getFilteredFileNames(bucketName, "captions-in/", ["delimited"]) if (request["delete_captionsin"] and request["delete_captionsin"] == "true"): for obj in objs: try: logger.debug( "Deleting temp delimited caption files {}".format(obj)) S3Helper().deleteObject(bucketName, obj) except ClientError as e: logger.error("An error occured with S3 bucket operations: %s" % e) except: e = sys.exc_info()[0] logger.error( "Error occured in deleting the delimited captions file: %s" % e)