コード例 #1
0
def lambda_handler(event, context):
    print("Comprehend Event: {}".format(event))

    bucketName = event['Records'][0]['s3']['bucket']['name']
    objectName = urllib.parse.unquote_plus(
        event['Records'][0]['s3']['object']['key'])
    callerId = context.invoked_function_arn
    assert (FileHelper().getFileNameAndExtension(objectName.lower()) == (
        'fullresponse', 'json'
    )), "File detected does not match expected format: 'fullresponse.json'"

    runComprehend(bucketName, objectName, callerId)
コード例 #2
0
def processRequest(request):
    output = ""
    logger.info("request: {}".format(request))
    up = urlparse(request["s3uri"], allow_fragments=False)
    accountid = request["accountId"]
    jobid = request["jobId"]
    bucketName = up.netloc
    objectkey = up.path.lstrip('/')
    basePrefixPath = objectkey + accountid + "-TranslateText-" + jobid + "/"
    languageCode = request["langCode"]
    logger.debug("Base Prefix Path:{}".format(basePrefixPath))
    captions = Captions()
    #filter only the delimited files with .delimited suffix
    objs = S3Helper().getFilteredFileNames(bucketName, basePrefixPath,
                                           ["delimited"])
    for obj in objs:
        try:
            #Read the Delimited file contents
            content = S3Helper().readFromS3(bucketName, obj)
            fileName = FileHelper().getFileName(obj)
            sourceFileName = FileHelper().getFileName(
                obj.replace("{}.".format(languageCode), ""))
            logger.debug("SourceFileKey:{}.processed".format(sourceFileName))
            soureFileKey = "input/{}.processed".format(sourceFileName)
            vttObject = {}
            vttObject["Bucket"] = bucketName
            vttObject["Key"] = soureFileKey
            captions_list = []
            #Based on the file format, call the right method to load the file as python object
            if (fileName.endswith("vtt")):
                captions_list = captions.vttToCaptions(vttObject)
            elif (fileName.endswith("srt")):
                captions_list = captions.srtToCaptions(vttObject)
            # Replace the text captions with the translated content
            translatedCaptionsList = captions.DelimitedToWebCaptions(
                captions_list, content, "<span>", 15)
            translatedText = ""
            # Recreate the Caption files in VTT or SRT format
            if (fileName.endswith("vtt")):
                translatedText = captions.captionsToVTT(translatedCaptionsList)
            elif (fileName.endswith("srt")):
                translatedText = captions.captionsToSRT(translatedCaptionsList)
            logger.debug(translatedText)
            logger.debug(content)
            newObjectKey = "output/{}".format(fileName)
            # Write the VTT or SRT file into the output S3 folder
            S3Helper().writeToS3(str(translatedText), bucketName, newObjectKey)
            output = "Output Object: {}/{}".format(bucketName, newObjectKey)
            logger.debug(output)
        except ClientError as e:
            logger.error("An error occured with S3 bucket operations: %s" % e)
        except:
            e = sys.exc_info()[0]
            logger.error("Error occured processing the captions file: %s" % e)
    objs = S3Helper().getFilteredFileNames(bucketName, "captions-in/",
                                           ["delimited"])
    if (request["delete_captionsin"]
            and request["delete_captionsin"] == "true"):
        for obj in objs:
            try:
                logger.debug(
                    "Deleting temp delimited caption files {}".format(obj))
                S3Helper().deleteObject(bucketName, obj)
            except ClientError as e:
                logger.error("An error occured with S3 bucket operations: %s" %
                             e)
            except:
                e = sys.exc_info()[0]
                logger.error(
                    "Error occured in deleting the delimited captions file: %s"
                    % e)