Beispiel #1
0
def get_webcaptions_json(operator_object, lang):
    try:
        print("get_webcaptions_json({}".format(lang))
        asset_id = operator_object.asset_id
        workflow_id = operator_object.workflow_execution_id
    except KeyError:
        operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(CaptionsError="Missing a required metadata key {e}".format(e=e))
        raise MasExecutionError(operator_object.return_output_object())

    try:
        webcaptions_storage_path = dataplane.generate_media_storage_path(asset_id, workflow_id)
        bucket = webcaptions_storage_path['S3Bucket'] 
        key = webcaptions_storage_path['S3Key']+"WebCaptions"+"_"+lang+".json"
            

        print("get object {} {}".format(bucket, key))
        data = s3.get_object(Bucket=bucket, Key=key)
        webcaptions = json.loads(data['Body'].read().decode('utf-8'))
        
    
    except Exception as e:
        operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(CaptionsError="Unable to get webcaptions from dataplane {e}".format(e=e))
        raise MasExecutionError(operator_object.return_output_object())

    return webcaptions
    def PutWebCaptionsCollection(self, operator, collection):

        collection_dict = {}
        collection_dict["CaptionsCollection"] = collection
        response = dataplane.store_asset_metadata(self.asset_id,
                                                  self.operator_object.name,
                                                  self.workflow_id,
                                                  collection_dict)

        if "Status" not in response:
            self.operator_object.update_workflow_status("Error")
            self.operator_object.add_workflow_metadata(
                WebCaptionsError=
                "Unable to store captions collection metadata {e}".format(
                    e=response))
            raise MasExecutionError(
                self.operator_object.return_output_object())
        else:
            if response["Status"] == "Success":
                self.operator_object.update_workflow_status("Complete")
                return self.operator_object.return_output_object()
            else:
                self.operator_object.update_workflow_status("Error")
                self.operator_object.add_workflow_metadata(
                    WebCaptionsError="Unable to store captions collection {e}".
                    format(e=response))
                raise MasExecutionError(
                    self.operator_object.return_output_object())
def lambda_handler(event, context):
    print("We got the following event:\n", event)
    try:
        s3bucket = event["Input"]["Media"]["ProxyEncode"]["S3Bucket"]
        s3key = event["Input"]["Media"]["ProxyEncode"]["S3Key"]
        workflow_id = str(event["WorkflowExecutionId"])
        asset_id = event['AssetId']
    except Exception:
        output_object.update_workflow_status("Error")
        output_object.add_workflow_metadata(TechnicalCueDetectionError="No valid inputs")
        raise MasExecutionError(output_object.return_output_object())
    print("Processing s3://" + s3bucket + "/" + s3key)
    valid_video_types = [".avi", ".mp4", ".mov"]
    file_type = os.path.splitext(s3key)[1].lower()
    if file_type in valid_video_types:
        # Video processing is asynchronous.
        job_id = start_technical_cue_detection(s3bucket, urllib.parse.unquote_plus(s3key))
        output_object.update_workflow_status("Executing")
        output_object.add_workflow_metadata(JobId=job_id, AssetId=asset_id, WorkflowExecutionId=workflow_id)
        return output_object.return_output_object()
    else:
        print("ERROR: invalid file type")
        output_object.update_workflow_status("Error")
        output_object.add_workflow_metadata(TechnicalCueDetectionError="Not a valid file type")
        raise MasExecutionError(output_object.return_output_object())
    def PutWebCaptions(self, webcaptions, language_code=None, source=""):

        webcaptions_operator_name = self.WebCaptionsOperatorName(language_code, source)

        WebCaptions = {"WebCaptions": webcaptions}
        response = dataplane.store_asset_metadata(asset_id=self.asset_id, operator_name=webcaptions_operator_name,
                     workflow_id=self.workflow_id, results=WebCaptions, paginate=False)

        if "Status" not in response:
            self.operator_object.update_workflow_status("Error")
            self.operator_object.add_workflow_metadata(WebCaptionsError="Unable to store captions {} {e}".format(webcaptions_operator_name ,e=response))
            raise MasExecutionError(self.operator_object.return_output_object())
        else:
            if response["Status"] == "Success":
                return self.operator_object.return_output_object()
            else:
                self.operator_object.update_workflow_status("Error")
                self.operator_object.add_workflow_metadata(
                    WebCaptionsError="Unable to store captions {} {e}".format(webcaptions_operator_name, e=response))
                raise MasExecutionError(self.operator_object.return_output_object())

        metadata = {
            "OperatorName": webcaptions_operator_name,
            "WorkflowId": self.workflow_id,
            "LanguageCode": language_code
        }

        return metadata
Beispiel #5
0
def lambda_handler(event, context):
    print("We got the following event:\n", event)
    try:
        if "Video" in event["Input"]["Media"]:
            s3bucket = event["Input"]["Media"]["ProxyEncode"]["S3Bucket"]
            s3key = event["Input"]["Media"]["ProxyEncode"]["S3Key"]
        elif "Image" in event["Input"]["Media"]:
            s3bucket = event["Input"]["Media"]["Image"]["S3Bucket"]
            s3key = event["Input"]["Media"]["Image"]["S3Key"]
        workflow_id = str(event["WorkflowExecutionId"])
        asset_id = event['AssetId']
    except Exception:
        output_object.update_workflow_status("Error")
        output_object.add_workflow_metadata(PersonTrackingError="No valid inputs")
        raise MasExecutionError(output_object.return_output_object())
    print("Processing s3://"+s3bucket+"/"+s3key)
    valid_video_types = [".avi", ".mp4", ".mov"]
    valid_image_types = [".png", ".jpg", ".jpeg"]
    file_type = os.path.splitext(s3key)[1].lower()
    if file_type in valid_image_types:
        # TODO: implement image handling
        output_object.update_workflow_status("Complete")
        output_object.add_workflow_metadata(WorkflowExecutionId=workflow_id)
        return output_object.return_output_object()
    elif file_type in valid_video_types:
        job_id = start_person_tracking(s3bucket, urllib.parse.unquote_plus(s3key))
        output_object.update_workflow_status("Executing")
        output_object.add_workflow_metadata(JobId=job_id, AssetId=asset_id, WorkflowExecutionId=workflow_id)
        return output_object.return_output_object()
    else:
        print("ERROR: invalid file type")
        output_object.update_workflow_status("Error")
        output_object.add_workflow_metadata(PersonTrackingError="Not a valid file type")
        raise MasExecutionError(output_object.return_output_object())
Beispiel #6
0
def lambda_handler(event, context):
    print("We got the following event:\n", event)
    try:
        if "ProxyEncode" in event["Input"]["Media"]:
            s3bucket = event["Input"]["Media"]["ProxyEncode"]["S3Bucket"]
            s3key = event["Input"]["Media"]["ProxyEncode"]["S3Key"]
        elif "Video" in event["Input"]["Media"]:
            s3bucket = event["Input"]["Media"]["Video"]["S3Bucket"]
            s3key = event["Input"]["Media"]["Video"]["S3Key"]
        elif "Image" in event["Input"]["Media"]:
            s3bucket = event["Input"]["Media"]["Image"]["S3Bucket"]
            s3key = event["Input"]["Media"]["Image"]["S3Key"]
        workflow_id = str(event["WorkflowExecutionId"])
        asset_id = event['AssetId']
    except Exception:
        output_object.update_workflow_status("Error")
        output_object.add_workflow_metadata(LabelDetectionError="No valid inputs")
        raise MasExecutionError(output_object.return_output_object())
    print("Processing s3://"+s3bucket+"/"+s3key)
    valid_video_types = [".avi", ".mp4", ".mov"]
    valid_image_types = [".png", ".jpg", ".jpeg"]
    file_type = os.path.splitext(s3key)[1].lower()
    if file_type in valid_image_types:
        # Image processing is synchronous.
        response = detect_labels(s3bucket, urllib.parse.unquote_plus(s3key))
        output_object.add_workflow_metadata(AssetId=asset_id,WorkflowExecutionId=workflow_id)
        dataplane = DataPlane()
        metadata_upload = dataplane.store_asset_metadata(asset_id, operator_name, workflow_id, response)
        if "Status" not in metadata_upload:
            output_object.update_workflow_status("Error")
            output_object.add_workflow_metadata(
                LabelDetectionError="Unable to upload metadata for asset: {asset}".format(asset=asset_id))
            raise MasExecutionError(output_object.return_output_object())
        else:
            if metadata_upload["Status"] == "Success":
                print("Uploaded metadata for asset: {asset}".format(asset=asset_id))
                output_object.update_workflow_status("Complete")
                return output_object.return_output_object()
            elif metadata_upload["Status"] == "Failed":
                output_object.update_workflow_status("Error")
                output_object.add_workflow_metadata(
                    LabelDetectionError="Unable to upload metadata for asset: {asset}".format(asset=asset_id))
                raise MasExecutionError(output_object.return_output_object())
            else:
                output_object.update_workflow_status("Error")
                output_object.add_workflow_metadata(
                    LabelDetectionError="Unable to upload metadata for asset: {asset}".format(asset=asset_id))
                raise MasExecutionError(output_object.return_output_object())
    elif file_type in valid_video_types:
        # Video processing is asynchronous.
        job_id = start_label_detection(s3bucket, urllib.parse.unquote_plus(s3key))
        output_object.update_workflow_status("Executing")
        output_object.add_workflow_metadata(JobId=job_id, AssetId=asset_id, WorkflowExecutionId=workflow_id)
        return output_object.return_output_object()
    else:
        print("ERROR: invalid file type")
        output_object.update_workflow_status("Error")
        output_object.add_workflow_metadata(LabelDetectionError="Not a valid file type")
        raise MasExecutionError(output_object.return_output_object())
Beispiel #7
0
def check_wait_operation_lambda(event, context):
    '''
    Check if a workflow is still in a Waiting state.

    event is 
    - Operation input
    - Operation configuration

    returns:
    Operation output

    '''
    logger.info(json.dumps(event))

    operator_object = MediaInsightsOperationHelper(event)
    execution_table = DYNAMO_RESOURCE.Table(WORKFLOW_EXECUTION_TABLE_NAME)

    response = execution_table.get_item(
            Key={
                'Id': operator_object.workflow_execution_id
            },
            ConsistentRead=True)

    if "Item" in response:
        workflow_execution = response["Item"]
    else:
        workflow_execution = None
        # raise ChaliceViewError(
        operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(WaitError="Unable to find Waiting workflow execution {} {e}".format(
            operator_object.workflow_execution_id, e=str(e)))
        raise MasExecutionError(operator_object.return_output_object())

    logger.info("workflow_execution: {}".format(
        json.dumps(workflow_execution)))

    if workflow_execution["Status"] == awsmie.WORKFLOW_STATUS_WAITING:
        operator_object.update_workflow_status("Executing")
        return operator_object.return_output_object()
    elif workflow_execution["Status"] == awsmie.WORKFLOW_STATUS_STARTED:
        operator_object.update_workflow_status("Complete")
    else:
        operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(WaitError="Unexpected workflow execution status {}".format(
            workflow_execution["Status"]))
        raise MasExecutionError(operator_object.return_output_object())

    return operator_object.return_output_object()
def start_wait_operation_lambda(event, context):
    '''
    Pause a workflow to wait for external processing

    event is 
    - Operation input
    - Operation configuration

    returns:
    Operation output

    '''
    logger.info(json.dumps(event))

    operator_object = MediaInsightsOperationHelper(event)

    try:
        update_workflow_execution_status(operator_object.workflow_execution_id,
                                         awsmie.WORKFLOW_STATUS_WAITING, "")
    except Exception as e:
        operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(
            WaitError="Unable to set workflow status to Waiting {e}".format(
                e=str(e)))
        raise MasExecutionError(operator_object.return_output_object())

    return operator_object.return_output_object()
    def __init__(self, operator_object):
        """
        :param event: The event passed in to the operator

        """
        print("WebCaptions operator_object = {}".format(operator_object))
        self.operator_object = operator_object

        try:
            self.transcribe_operator_name = "Transcribe"
            self.workflow_id = operator_object.workflow_execution_id
            self.asset_id = operator_object.asset_id
            self.marker = "<123>"
            if "SourceLanguageCode" in self.operator_object.configuration:
                self.source_language_code = self.operator_object.configuration[
                    "SourceLanguageCode"]
                self.operator_name_with_lang = self.operator_object.name + "_" + self.source_language_code
            if "TargetLanguageCode" in self.operator_object.configuration:
                self.target_language_code = self.operator_object.configuration[
                    "TargetLanguageCode"]
        except KeyError as e:
            self.operator_object.update_workflow_status("Error")
            self.operator_object.add_workflow_metadata(
                WebCaptionsError="No valid inputs {e}".format(e=e))
            raise MasExecutionError(operator_object.return_output_object())
def create_vtt(event, context):
    print("We got the following event:\n", event)

    operator_object = MediaInsightsOperationHelper(event)
    webcaptions_object = WebCaptions(operator_object)

    try:
        targetLanguageCodes = webcaptions_object.operator_object.configuration[
            "TargetLanguageCodes"]
    except KeyError as e:
        webcaptions_object.operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(
            WebCaptionsError="Missing a required metadata key {e}".format(e=e))
        raise MasExecutionError(operator_object.return_output_object())

    captions_collection = []
    for lang in targetLanguageCodes:
        webcaptions = []

        webcaptions = webcaptions_object.GetWebCaptions(lang)

        #captions = get_webcaptions_json(self.operator_object, lang)

        vtt = webcaptions_object.WebCaptionsToVTT(webcaptions)
        metadata = webcaptions_object.PutVTT(lang, vtt)

        captions_collection.append(metadata)

    data = {}
    data["CaptionsCollection"] = captions_collection

    webcaptions_object.PutMediaCollection(operator_object.name, data)

    operator_object.update_workflow_status("Complete")
    return operator_object.return_output_object()
def start_translate_webcaptions(event, context):
    print("We got the following event:\n", event)

    operator_object = MediaInsightsOperationHelper(event)
    webcaptions_object = WebCaptions(operator_object)

    try:
        source_lang = operator_object.configuration["SourceLanguageCode"]
        target_langs = operator_object.configuration["TargetLanguageCodes"]
    except KeyError:
        operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(
            TranslateError="Language codes are not defined")
        raise MasExecutionError(operator_object.return_output_object())
    try:
        terminology_names = operator_object.configuration["TerminologyNames"]
    except KeyError:
        terminology_names = []

    #webcaptions = get_webcaptions(operator_object, source_lang)
    webcaptions = webcaptions_object.GetWebCaptions(source_lang)

    # Translate takes a list of target languages, but it only allow on item in the list.  Too bad
    # life would be so much easier if it truely allowed many targets.
    webcaptions_object.TranslateWebCaptions(webcaptions, source_lang,
                                            target_langs, terminology_names)

    return operator_object.return_output_object()
def vttToWebCaptions(operator_object, vttObject):

    webcaptions = []

    # Get metadata
    s3 = boto3.client('s3')
    try:
        print("Getting data from s3://" + vttObject["Bucket"] + "/" +
              vttObject["Key"])
        data = s3.get_object(Bucket=vttObject["Bucket"], Key=vttObject["Key"])
        vtt = data['Body'].read().decode('utf-8')
    except Exception as e:
        operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(
            WebCaptionsError="Unable read VTT file. " + str(e))
        raise MasExecutionError(operator_object.return_output_object())

    buffer = StringIO(vtt)

    for caption in webvtt.read_buffer(buffer):
        webcaption = {}
        webcaption["start"] = formatTimeVTTtoSeconds(caption.start)
        webcaption["end"] = formatTimeVTTtoSeconds(caption.end)
        webcaption["caption"] = caption.text
        webcaptions.append(webcaption)

    return webcaptions
    def __init__(self, operator_object):
        """
        :param event: The event passed in to the operator

        """
        print("WebCaptions operator_object = {}".format(operator_object))
        self.operator_object = operator_object

        try:
            self.transcribe_operator_name = "TranscribeVideo"
            self.workflow_id = operator_object.workflow_execution_id
            self.asset_id = operator_object.asset_id
            self.marker = "<span>"
            self.contentType = "text/html"
            self.existing_subtitles = False
            if "SourceLanguageCode" in self.operator_object.configuration:
                self.source_language_code = self.operator_object.configuration["SourceLanguageCode"]
                self.operator_name_with_lang = self.operator_object.name+"_"+self.source_language_code
            if "TargetLanguageCode" in self.operator_object.configuration:
                self.target_language_code = self.operator_object.configuration["TargetLanguageCode"]
            if "ExistingSubtitlesObject" in self.operator_object.configuration:
                self.existing_subtitles_object = self.operator_object.configuration["ExistingSubtitlesObject"]
                self.existing_subtitles = True
        except KeyError as e:
            self.operator_object.update_workflow_status("Error")
            self.operator_object.add_workflow_metadata(WebCaptionsError="No valid inputs {e}".format(e=e))
            raise MasExecutionError(operator_object.return_output_object())
Beispiel #14
0
def start_face_search(bucket, key, collection_id):
    rek = boto3.client('rekognition')
    try:
        # First make sure we can access the face collection
        rek.describe_collection(CollectionId=collection_id)
        # Then start face recognition
        response = rek.start_face_search(
            Video={
                'S3Object': {
                    'Bucket': bucket,
                    'Name': key
                }
            },
            CollectionId=collection_id,
            NotificationChannel={
                'SNSTopicArn': os.environ['REKOGNITION_SNS_TOPIC_ARN'],
                'RoleArn': os.environ['REKOGNITION_ROLE_ARN']
            },
        )
        print('Job Id (face search): ' + response['JobId'])
        return response['JobId']
    except Exception as e:
        output_object.update_workflow_status("Error")
        output_object.add_workflow_metadata(FaceSearchError=str(e))
        raise MasExecutionError(output_object.return_output_object())
Beispiel #15
0
def lambda_handler(event, context):
    print("We got the following event:\n", event)
    operator_object = MediaInsightsOperationHelper(event)
    # Get media metadata from input event
    try:
        asset_id = operator_object.asset_id
        bucket = operator_object.input["Media"]["Video"]["S3Bucket"]
    except Exception as exception:
        operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(
            VmapGenerationError="Missing a required metadata key {e}".format(
                e=exception))
        raise MasExecutionError(operator_object.return_output_object())
    # Get slots metadata from dataplane
    try:
        slots = {}
        params = {"asset_id": asset_id, "operator_name": "slotDetection"}
        while True:
            resp = dataplane.retrieve_asset_metadata(**params)
            if "operator" in resp and resp["operator"] == "slotDetection":
                __update_and_merge_lists(slots, resp["results"])
            if "cursor" not in resp:
                break
            params["cursor"] = resp["cursor"]
        print("slots: {}".format(slots))
    except Exception as exception:
        operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(
            VmapGenerationError="Unable to retrieve metadata for asset {}: {}".
            format(asset_id, exception))
        raise MasExecutionError(operator_object.return_output_object())
    try:
        # Select slots with highest scores
        slots["slots"].sort(key=lambda slot: slot["Score"])
        top_slots = slots["slots"][-top_slots_qty:]
        # Generate VMAP and add object
        key = 'private/assets/{}/vmap/ad_breaks.vmap'.format(asset_id)
        __write_vmap(top_slots, bucket, key)
        operator_object.add_media_object("VMAP", bucket, key)
        # Set workflow status complete
        operator_object.update_workflow_status("Complete")
        return operator_object.return_output_object()
    except Exception as exception:
        print("Exception:\n", exception)
        operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(VmapGenerationError=exception)
        raise MasExecutionError(operator_object.return_output_object())
Beispiel #16
0
def search_faces_by_image(bucket, key, collection_id):
    try:
        response = rek.search_faces_by_image(CollectionId=collection_id, Image={'S3Object':{'Bucket':bucket, 'Name':key}})
    except Exception as e:
        output_object.update_workflow_status("Error")
        output_object.add_workflow_metadata(FaceSearchError=str(e))
        raise MasExecutionError(output_object.return_output_object())
    return response
Beispiel #17
0
def detect_text(bucket, key):
    try:
        response = rek.detect_text(Image={'S3Object':{'Bucket':bucket, 'Name':key}})
    except Exception as e:
        output_object.update_workflow_status("Error")
        output_object.add_workflow_metadata(TextDetectionError=str(e))
        raise MasExecutionError(output_object.return_output_object())
    return response
def recognize_celebrities(bucket, key):
    try:
        response = rek.recognize_celebrities(Image={'S3Object':{'Bucket':bucket, 'Name':key}})
    except Exception as e:
        output_object.update_workflow_status("Error")
        output_object.add_workflow_metadata(CelebrityRecognitionError=str(e))
        raise MasExecutionError(output_object.return_output_object())
    return response
Beispiel #19
0
def detect_moderation_labels(bucket, key):
    rek = boto3.client('rekognition')
    try:
        response = rek.detect_moderation_labels(Image={'S3Object':{'Bucket':bucket, 'Name':key}})
    except Exception as e:
        output_object.update_workflow_status("Error")
        output_object.add_workflow_metadata(ContentModerationError=str(e))
        raise MasExecutionError(output_object.return_output_object())
    return response
def test_lambda_handler(event, context, operator_name, mediaType, status,
                        type):

    try:
        print(json.dumps(event))
        # set output status, media, and metatdata for workflow - these get passed to other
        # stages of the workflow through the control plane
        dataplane = DataPlane()

        operator_object = MediaInsightsOperationHelper(event)
        operator_object.update_workflow_status("Complete")
        metadata = {}
        metadata[operator_object.name] = {
            "Meta": "Workflow metadata for " + operator_object.name
        }

        if "TestCustomConfig" in operator_object.configuration:
            metadata[operator_object.
                     name]["TestCustomConfig"] = operator_object.configuration[
                         "TestCustomConfig"]

        operator_object.add_workflow_metadata_json(metadata)

        if "OutputMediaType" in operator_object.configuration:
            mediaType = operator_object.configuration["OutputMediaType"]

        if mediaType == "Video":
            operator_object.add_media_object(
                "Video", "S3BucketFrom{}".format(operator_object.name),
                "S3/Key/From/{}/video".format(operator_object.name))
        elif mediaType == "Audio":
            operator_object.add_media_object(
                "Audio", "S3BucketFrom{}".format(operator_object.name),
                "S3/Key/From/{}/audio".format(operator_object.name))
        elif mediaType == "Image":
            operator_object.add_media_object(
                "Text", "S3BucketFrom{}".format(operator_object.name),
                "S3/Key/From/{}/image".format(operator_object.name))
        elif mediaType == "Text":
            operator_object.add_media_object(
                "Text", "S3BucketFrom{}".format(operator_object.name),
                "S3/Key/From/{}/text".format(operator_object.name))

    except Exception as e:
        operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(
            Message="Oh no! Something went wrong: {}".format(str(e)))
        raise MasExecutionError(operator_object.return_output_object())
    else:
        if status == "Fail":
            operator_object.update_workflow_status("Error")
        else:
            operator_object.update_workflow_status("Complete")
        return operator_object.return_output_object()
Beispiel #21
0
def put_webcaptions_json(operator_object, webcaptions, lang):
    try:
        print("put_webcaptions_json({}".format(lang))
        asset_id = operator_object.asset_id
        webcaptions_lang = "WebCaptions"+"_"+lang
        workflow_id = operator_object.workflow_execution_id
    except KeyError:
        operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(CaptionsError="Missing a required metadata key {e}".format(e=e))
        raise MasExecutionError(operator_object.return_output_object())

    webcaptions_storage_path = dataplane.generate_media_storage_path(asset_id, workflow_id)
    bucket = webcaptions_storage_path['S3Bucket'] 
    key = webcaptions_storage_path['S3Key']+"WebCaptions"+"_"+lang+".json"
        

    print("put object {} {}".format(bucket, key))
    s3.put_object(Bucket=bucket, Key=key, Body=json.dumps(webcaptions))

    operator_metadata = {"S3Bucket": bucket, "S3Key": key, "Operator": "WebCaptions"+"_"+lang}
    metadata_upload = dataplane.store_asset_metadata(asset_id, "WebCaptions"+"_"+lang, workflow_id,
                                operator_metadata)

    if "Status" not in metadata_upload:
        operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(CaptionsError="Unable to store webcaptions file {e}".format(e=metadata_upload))
        raise MasExecutionError(operator_object.return_output_object())
    else:
        if metadata_upload["Status"] == "Success":
            operator_object.update_workflow_status("Complete")
            return operator_object.return_output_object()
        else:
            operator_object.update_workflow_status("Error")
            operator_object.add_workflow_metadata(
                CaptionsError="Unable to store webcaptions file {e}".format(e=metadata_upload))
            raise MasExecutionError(operator_object.return_output_object())

    return operator_metadata
    def CaptionsOperatorName(self, language_code=None):

        # Shouldn't assume WebCaptions operator is WebCaptions, maybe pass it in the configuration?
        operator_name = "Captions"

        if language_code != None:
            return operator_name+"_"+language_code

        try:
            name = operator_name+"_"+self.source_language_code
        except KeyError:
            self.operator_object.update_workflow_status("Error")
            self.operator_object.add_workflow_metadata(WebCaptionsError="Missing language code for WebCaptions {e}".format(e=e))
            raise MasExecutionError(self.operator_object.return_output_object())

        print("CaptionsOperatorName() Name {}".format(name))
        return name
def start_label_detection(bucket, key):
    try:
        response = rek.start_label_detection(
            Video={'S3Object': {
                'Bucket': bucket,
                'Name': key
            }},
            NotificationChannel={
                'SNSTopicArn': os.environ['REKOGNITION_SNS_TOPIC_ARN'],
                'RoleArn': os.environ['REKOGNITION_ROLE_ARN']
            })
        print('Job Id (label_detection): ' + response['JobId'])
        return response['JobId']
    except Exception as e:
        output_object.update_workflow_status("Error")
        output_object.add_workflow_metadata(LabelDetectionError=str(e))
        raise MasExecutionError(output_object.return_output_object())
def start_technical_cue_detection(bucket, key):
    try:
        response = rek.start_segment_detection(
            Video={'S3Object': {
                'Bucket': bucket,
                'Name': key
            }},
            NotificationChannel={
                'SNSTopicArn': os.environ['REKOGNITION_SNS_TOPIC_ARN'],
                'RoleArn': os.environ['REKOGNITION_ROLE_ARN']
            },
            SegmentTypes=['TECHNICAL_CUE'])
        print('Job Id (techncal_cue_detection): ' + response['JobId'])
        return response['JobId']
    except Exception as e:
        output_object.update_workflow_status("Error")
        output_object.add_workflow_metadata(TechnicalCueDetectionError=str(e))
        raise MasExecutionError(output_object.return_output_object())
Beispiel #25
0
def start_face_detection(bucket, key):
    rek = boto3.client('rekognition')
    try:
        response = rek.start_face_detection(
            Video={'S3Object': {
                'Bucket': bucket,
                'Name': key
            }},
            NotificationChannel={
                'SNSTopicArn': os.environ['REKOGNITION_SNS_TOPIC_ARN'],
                'RoleArn': os.environ['REKOGNITION_ROLE_ARN']
            },
            FaceAttributes='ALL')
        print('Job Id (face detection): ' + response['JobId'])
        return response['JobId']
    except Exception as e:
        output_object.update_workflow_status("Error")
        output_object.add_workflow_metadata(FaceDetectionError=str(e))
        raise MasExecutionError(output_object.return_output_object())
Beispiel #26
0
def lambda_handler(event, context):
    print("We got the following event:\n", event)
    operator_object = MediaInsightsOperationHelper(event)

    try:
        workflow_id = str(operator_object.workflow_execution_id)
        bucket = operator_object.input["Media"]["Video"]["S3Bucket"]
        key = operator_object.input["Media"]["Video"]["S3Key"]
    except KeyError as e:
        operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(
            MediaconvertError="Missing a required metadata key {e}".format(
                e=e))
        raise MasExecutionError(operator_object.return_output_object())

    # Adding in exception block for now since we aren't guaranteed an asset id will be present, should remove later
    try:
        asset_id = operator_object.asset_id
    except KeyError as e:
        print("No asset id passed in with this workflow", e)
        asset_id = ''

    file_input = "s3://" + bucket + "/" + key
    destination = "s3://" + bucket + "/" + 'private/assets/' + asset_id + "/workflows/" + workflow_id + "/"
    thumbnail_destination = "s3://" + bucket + "/" + 'private/assets/' + asset_id + "/"

    try:
        response = mediaconvert.describe_endpoints()
    except Exception as e:
        print("Exception:\n", e)
        operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(MediaconvertError=str(e))
        raise MasExecutionError(operator_object.return_output_object())
    else:
        mediaconvert_endpoint = response["Endpoints"][0]["Url"]
        customer_mediaconvert = boto3.client(
            "mediaconvert",
            region_name=region,
            endpoint_url=mediaconvert_endpoint)

    try:
        response = customer_mediaconvert.create_job(
            Role=mediaconvert_role,
            Settings={
                "OutputGroups": [{
                    "Name":
                    "File Group",
                    "Outputs": [{
                        "ContainerSettings": {
                            "Container": "MP4",
                            "Mp4Settings": {
                                "CslgAtom": "INCLUDE",
                                "FreeSpaceBox": "EXCLUDE",
                                "MoovPlacement": "PROGRESSIVE_DOWNLOAD"
                            }
                        },
                        "AudioDescriptions": [{
                            "AudioTypeControl":
                            "FOLLOW_INPUT",
                            "AudioSourceName":
                            "Audio Selector 1",
                            "CodecSettings": {
                                "Codec": "AAC",
                                "AacSettings": {
                                    "AudioDescriptionBroadcasterMix": "NORMAL",
                                    "Bitrate": 96000,
                                    "RateControlMode": "CBR",
                                    "CodecProfile": "LC",
                                    "CodingMode": "CODING_MODE_2_0",
                                    "RawFormat": "NONE",
                                    "SampleRate": 48000,
                                    "Specification": "MPEG4"
                                }
                            },
                            "LanguageCodeControl":
                            "FOLLOW_INPUT"
                        }],
                        "Extension":
                        "mp4",
                        "NameModifier":
                        "_audio"
                    }],
                    "OutputGroupSettings": {
                        "Type": "FILE_GROUP_SETTINGS",
                        "FileGroupSettings": {
                            "Destination": destination
                        }
                    }
                }, {
                    "CustomName":
                    "thumbnail",
                    "Name":
                    "File Group",
                    "Outputs": [{
                        "ContainerSettings": {
                            "Container": "RAW"
                        },
                        "VideoDescription": {
                            "ScalingBehavior": "DEFAULT",
                            "TimecodeInsertion": "DISABLED",
                            "AntiAlias": "ENABLED",
                            "Sharpness": 50,
                            "CodecSettings": {
                                "Codec": "FRAME_CAPTURE",
                                "FrameCaptureSettings": {
                                    "FramerateNumerator": 1,
                                    "FramerateDenominator": 5,
                                    "MaxCaptures": 2,
                                    "Quality": 80
                                }
                            },
                            "DropFrameTimecode": "ENABLED",
                            "ColorMetadata": "INSERT"
                        },
                        "NameModifier": "_thumbnail"
                    }],
                    "OutputGroupSettings": {
                        "Type": "FILE_GROUP_SETTINGS",
                        "FileGroupSettings": {
                            "Destination": thumbnail_destination
                        }
                    }
                }],
                "AdAvailOffset":
                0,
                "Inputs": [{
                    "AudioSelectors": {
                        "Audio Selector 1": {
                            "Offset": 0,
                            "DefaultSelection": "DEFAULT",
                            "ProgramSelection": 1
                        }
                    },
                    "VideoSelector": {
                        "ColorSpace": "FOLLOW"
                    },
                    "FilterEnable": "AUTO",
                    "PsiControl": "USE_PSI",
                    "FilterStrength": 0,
                    "DeblockFilter": "DISABLED",
                    "DenoiseFilter": "DISABLED",
                    "TimecodeSource": "EMBEDDED",
                    "FileInput": file_input
                }]
            })
    # TODO: Add support for boto client error handling
    except Exception as e:
        print("Exception:\n", e)
        operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(MediaconvertError=str(e))
        raise MasExecutionError(operator_object.return_output_object())
    else:
        job_id = response['Job']['Id']
        operator_object.update_workflow_status("Executing")
        operator_object.add_workflow_metadata(MediaconvertJobId=job_id,
                                              MediaconvertInputFile=key,
                                              AssetId=asset_id,
                                              WorkflowExecutionId=workflow_id)
        return operator_object.return_output_object()
def lambda_handler(event, context):
    print("We got this event:\n", event)
    operator_object = MediaInsightsOperationHelper(event)
    # If Transcribe wasn't run due to silent audio, then we're done
    if "Mediainfo_num_audio_tracks" in event["Input"]["MetaData"] and event[
            "Input"]["MetaData"]["Mediainfo_num_audio_tracks"] == "0":
        operator_object.update_workflow_status("Complete")
        return operator_object.return_output_object()
    try:
        job_id = operator_object.metadata["TranscribeJobId"]
        workflow_id = operator_object.workflow_execution_id
        asset_id = operator_object.asset_id
    except KeyError as e:
        operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(
            TranscribeError="Missing a required metadata key {e}".format(e=e))
        raise MasExecutionError(operator_object.return_output_object())
    try:
        response = transcribe.get_transcription_job(
            TranscriptionJobName=job_id)
        print(response)
    except Exception as e:
        operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(TranscribeError=str(e),
                                              TranscribeJobId=job_id)
        raise MasExecutionError(operator_object.return_output_object())
    else:
        if response["TranscriptionJob"][
                "TranscriptionJobStatus"] == "IN_PROGRESS":
            operator_object.update_workflow_status("Executing")
            operator_object.add_workflow_metadata(
                TranscribeJobId=job_id,
                AssetId=asset_id,
                WorkflowExecutionId=workflow_id)
            return operator_object.return_output_object()
        elif response["TranscriptionJob"][
                "TranscriptionJobStatus"] == "FAILED":
            operator_object.update_workflow_status("Error")
            operator_object.add_workflow_metadata(
                TranscribeJobId=job_id,
                TranscribeError=str(
                    response["TranscriptionJob"]["FailureReason"]))
            raise MasExecutionError(operator_object.return_output_object())
        elif response["TranscriptionJob"][
                "TranscriptionJobStatus"] == "COMPLETED":
            transcribe_uri = response["TranscriptionJob"]["Transcript"][
                "TranscriptFileUri"]
            http = urllib3.PoolManager()
            transcription = http.request('GET', transcribe_uri)
            transcription_data = transcription.data.decode("utf-8")
            transcription_json = json.loads(transcription_data)

            text_only_transcript = ''

            for transcripts in transcription_json["results"]["transcripts"]:
                transcript = transcripts["transcript"]
                text_only_transcript = text_only_transcript.join(transcript)

            print(text_only_transcript)

            dataplane = DataPlane()
            s3 = boto3.client('s3')

            transcript_storage_path = dataplane.generate_media_storage_path(
                asset_id, workflow_id)

            key = transcript_storage_path['S3Key'] + "transcript.txt"
            bucket = transcript_storage_path['S3Bucket']

            s3.put_object(Bucket=bucket, Key=key, Body=text_only_transcript)

            transcription_json["TextTranscriptUri"] = {
                "S3Bucket": bucket,
                "S3Key": key
            }

            metadata_upload = dataplane.store_asset_metadata(
                asset_id, operator_object.name, workflow_id,
                transcription_json)
            if "Status" not in metadata_upload:
                operator_object.add_workflow_metadata(
                    TranscribeError=
                    "Unable to upload metadata for asset: {asset}".format(
                        asset=asset_id),
                    TranscribeJobId=job_id)
                operator_object.update_workflow_status("Error")
                raise MasExecutionError(operator_object.return_output_object())
            else:
                if metadata_upload['Status'] == 'Success':
                    operator_object.add_media_object('Text',
                                                     metadata_upload['Bucket'],
                                                     metadata_upload['Key'])
                    operator_object.add_workflow_metadata(
                        TranscribeJobId=job_id)
                    operator_object.update_workflow_status("Complete")
                    return operator_object.return_output_object()
                else:
                    operator_object.add_workflow_metadata(
                        TranscribeError=
                        "Unable to upload metadata for asset: {asset}".format(
                            asset=asset_id),
                        TranscribeJobId=job_id)
                    operator_object.update_workflow_status("Error")
                    raise MasExecutionError(
                        operator_object.return_output_object())
        else:
            operator_object.update_workflow_status("Error")
            operator_object.add_workflow_metadata(
                TranscribeError="Unable to determine status")
            raise MasExecutionError(operator_object.return_output_object())
Beispiel #28
0
def lambda_handler(event, context):
    print("We got this event:\n", event)
    valid_types = ["mp3", "mp4", "wav", "flac"]
    identify_language = False
    transcribe_job_config = {}
    optional_settings = {}
    model_settings = {}
    job_execution_settings = {}
    content_redaction_settings = {}
    identify_language = False
    language_options = []
    operator_object = MediaInsightsOperationHelper(event)
    workflow_id = str(event["WorkflowExecutionId"])
    asset_id = event['AssetId']
    job_id = "transcribe" + "-" + workflow_id

    try:
        if "ProxyEncode" in event["Input"]["Media"]:
            bucket = event["Input"]["Media"]["ProxyEncode"]["S3Bucket"]
            key = event["Input"]["Media"]["ProxyEncode"]["S3Key"]
        elif "Video" in event["Input"]["Media"]:
            bucket = event["Input"]["Media"]["Video"]["S3Bucket"]
            key = event["Input"]["Media"]["Video"]["S3Key"]
        elif "Audio" in event["Input"]["Media"]:
            bucket = event["Input"]["Media"]["Audio"]["S3Bucket"]
            key = event["Input"]["Media"]["Audio"]["S3Key"]
        file_type = key.split('.')[-1]
    except Exception:
        operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(
            TranscribeError="No valid inputs")
        raise MasExecutionError(operator_object.return_output_object())

    if file_type not in valid_types:
        operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(
            TranscribeError="Not a valid file type")
        raise MasExecutionError(operator_object.return_output_object())
    try:
        language_code = operator_object.configuration["TranscribeLanguage"]
        custom_vocab = operator_object.configuration["VocabularyName"]
        optional_settings["VocabularyName"] = custom_vocab
    except KeyError:
        # No custom vocab
        pass
    try:
        if "TranscribeLanguage" in operator_object.configuration:
            language_code = operator_object.configuration["TranscribeLanguage"]
            if language_code == 'auto':
                identify_language = True
        else:
            identify_language = True
    except KeyError:
        operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(
            TranscribeError="No language code defined")
        raise MasExecutionError(operator_object.return_output_object())

    media_file = 'https://s3.' + region + '.amazonaws.com/' + bucket + '/' + key

    # Read optional transcription job settings:
    if "VocabularyName" in operator_object.configuration:
        option_value = operator_object.configuration["VocabularyName"]
        optional_settings["VocabularyName"] = option_value
    if "ShowSpeakerLabels" in operator_object.configuration:
        option_value = operator_object.configuration["ShowSpeakerLabels"]
        optional_settings["ShowSpeakerLabels"] = option_value
    if "MaxSpeakerLabels" in operator_object.configuration:
        option_value = operator_object.configuration["MaxSpeakerLabels"]
        optional_settings["MaxSpeakerLabels"] = option_value
    if "ChannelIdentification" in operator_object.configuration:
        option_value = operator_object.configuration["ChannelIdentification"]
        optional_settings["ChannelIdentification"] = option_value
    if "MaxAlternatives" in operator_object.configuration:
        option_value = operator_object.configuration["MaxAlternatives"]
        optional_settings["MaxAlternatives"] = option_value
    if "VocabularyFilterName" in operator_object.configuration:
        option_value = operator_object.configuration["VocabularyFilterName"]
        optional_settings["VocabularyFilterName"] = option_value
    if "VocabularyFilterMethod" in operator_object.configuration:
        option_value = operator_object.configuration["VocabularyFilterMethod"]
        optional_settings["VocabularyFilterMethod"] = option_value
    if "LanguageModelName" in operator_object.configuration:
        option_value = operator_object.configuration["LanguageModelName"]
        model_settings["LanguageModelName"] = option_value
    if "AllowDeferredExecution" in operator_object.configuration:
        option_value = operator_object.configuration["AllowDeferredExecution"]
        job_execution_settings["AllowDeferredExecution"] = option_value
    if "DataAccessRoleArn" in operator_object.configuration:
        option_value = operator_object.configuration["DataAccessRoleArn"]
        job_execution_settings["DataAccessRoleArn"] = option_value
    if "RedactionType" in operator_object.configuration:
        option_value = operator_object.configuration["RedactionType"]
        content_redaction_settings["RedactionType"] = option_value
    if "RedactionOutput" in operator_object.configuration:
        option_value = operator_object.configuration["RedactionOutput"]
        content_redaction_settings["RedactionOutput"] = option_value
    if "IdentifyLanguage" in operator_object.configuration:
        option_value = operator_object.configuration["IdentifyLanguage"]
        identify_language = option_value
    if "LanguageOptions" in operator_object.configuration:
        option_value = operator_object.configuration["LanguageOptions"]
        language_options = option_value

    # Combine all the defined transcription job settings into a single dict:
    transcribe_job_config["TranscriptionJobName"] = job_id
    transcribe_job_config["Media"] = {"MediaFileUri": media_file}
    transcribe_job_config["MediaFormat"] = file_type
    transcribe_job_config["LanguageCode"] = language_code
    transcribe_job_config["IdentifyLanguage"] = identify_language
    if len(optional_settings) > 0:
        transcribe_job_config["Settings"] = optional_settings
    if len(model_settings) > 0:
        transcribe_job_config["ModelSettings"] = model_settings
    if len(job_execution_settings) > 0:
        transcribe_job_config["JobExecutionSettings"] = job_execution_settings
    if len(content_redaction_settings) > 0:
        transcribe_job_config["ContentRedaction"] = content_redaction_settings
    if len(language_options) > 0:
        transcribe_job_config["LanguageOptions"] = language_options

    # If mediainfo data is available then use it to avoid transcribing silent videos.
    if "Mediainfo_num_audio_tracks" in event["Input"]["MetaData"]:
        num_audio_tracks = event["Input"]["MetaData"][
            "Mediainfo_num_audio_tracks"]
        # Check to see if audio tracks were detected by mediainfo
        if num_audio_tracks == "0":
            # If there is no input audio then we're done.
            operator_object.update_workflow_status("Complete")
            return operator_object.return_output_object()

    try:
        if identify_language:
            transcribe_job_config['IdentifyLanguage'] = True
            del transcribe_job_config["LanguageCode"]
        # Run the transcribe job.
        # The ** operator converts the job config dict to keyword arguments.
        response = transcribe.start_transcription_job(**transcribe_job_config)
        print(response)
    except Exception as e:
        operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(transcribe_error=str(e))
        raise MasExecutionError(operator_object.return_output_object())
    else:
        if response["TranscriptionJob"][
                "TranscriptionJobStatus"] == "IN_PROGRESS":
            operator_object.update_workflow_status("Executing")
            operator_object.add_workflow_metadata(
                TranscribeJobId=job_id,
                AssetId=asset_id,
                WorkflowExecutionId=workflow_id)
            return operator_object.return_output_object()
        elif response["TranscriptionJob"][
                "TranscriptionJobStatus"] == "FAILED":
            operator_object.update_workflow_status("Error")
            operator_object.add_workflow_metadata(
                TranscribeJobId=job_id,
                TranscribeError=str(
                    response["TranscriptionJob"]["FailureReason"]))
            raise MasExecutionError(operator_object.return_output_object())
        elif response["TranscriptionJob"][
                "TranscriptionJobStatus"] == "COMPLETE":
            operator_object.update_workflow_status("Executing")
            operator_object.add_workflow_metadata(
                TranscribeJobId=job_id,
                AssetId=asset_id,
                WorkflowExecutionId=workflow_id)
            return operator_object.return_output_object()
        else:
            operator_object.update_workflow_status("Error")
            operator_object.add_workflow_metadata(
                TranscribeJobId=job_id,
                TranscribeError="Unhandled error for this job: {job_id}".
                format(job_id=job_id))
            raise MasExecutionError(operator_object.return_output_object())
Beispiel #29
0
def web_to_vtt(event, context):
    print("We got the following event:\n", event)

    operator_object = MediaInsightsOperationHelper(event)

    try:
        asset_id = operator_object.asset_id
    except KeyError:
        print('No asset id for this workflow')
        asset_id = ''

    try:
        targetLanguageCodes = operator_object.configuration["TargetLanguageCodes"]
        workflow_id = operator_object.workflow_execution_id
    except KeyError as e:
        operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(CaptionsError="Missing a required metadata key {e}".format(e=e))
        raise MasExecutionError(operator_object.return_output_object())

    captions_collection = []
    for lang in targetLanguageCodes:
        captions = []
        captionsOperatorName = "WebCaptions_"+lang

        # response = dataplane.retrieve_asset_metadata(asset_id, operator_name=captionsOperatorName)
        
        
        # #FIXME Dataplane should only return WebCaptions data from this call, but it is returning everything
        # if "operator" in response and response["operator"] == captionsOperatorName:
        #     captions.append(response["results"])

        # while "cursor" in response:
        #     response = dataplane.retrieve_asset_metadata(asset_id, operator_name=captionsOperatorName, cursor=response["cursor"])
            
        #     #FIXME Dataplane should only return WebCaptions data from this call, but it is returning everything
        #     if response["operator"] == captionsOperatorName:
        #         captions.append(response["results"])

        captions = get_webcaptions_json(operator_object, lang)

        vtt = 'WEBVTT\n\n'

        for i in range(len(captions)):

            caption = captions[i]

            vtt += formatTimeVTT(float(caption["start"])) + ' --> ' + formatTimeVTT(float(caption["end"])) + '\n'
            vtt += caption["caption"] + '\n\n'

        response = dataplane.generate_media_storage_path(asset_id, workflow_id)
        
        print(json.dumps(response))
        
        bucket = response["S3Bucket"]
        key = response["S3Key"]+'Captions_'+lang+'.vtt'
        s3_object = s3_resource.Object(bucket, key)

        s3_object.put(Body=vtt)

        metadata = {
            "OperatorName": "VTTCaptions_"+lang,
            "Results": {"S3Bucket": bucket, "S3Key": key},
            "WorkflowId": workflow_id,
            "LanguageCode": lang
        }


        captions_collection.append(metadata)

    data = {}
    data["CaptionsCollection"] = captions_collection
    metadata_upload = dataplane.store_asset_metadata(asset_id, operator_object.name, workflow_id,
                                data)

    if "Status" not in metadata_upload:
        operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(CaptionsError="Unable to store srt captions file {e}".format(e=metadata_upload))
        raise MasExecutionError(operator_object.return_output_object())
    else:
        if metadata_upload["Status"] == "Success":
            operator_object.update_workflow_status("Complete")
            return operator_object.return_output_object()
        else:
            operator_object.update_workflow_status("Error")
            operator_object.add_workflow_metadata(
                CaptionsError="Unable to store srt captions file {e}".format(e=metadata_upload))
            raise MasExecutionError(operator_object.return_output_object())
Beispiel #30
0
def web_captions(event, context):

    print("We got the following event:\n", event)

    operator_object = MediaInsightsOperationHelper(event)

    try:
        bucket = operator_object.input["Media"]["Text"]["S3Bucket"]
        key = operator_object.input["Media"]["Text"]["S3Key"]
    except KeyError as e:
        operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(CaptionsError="No valid inputs {e}".format(e=e))
        raise MasExecutionError(operator_object.return_output_object())

    try:
        lang = operator_object.configuration["SourceLanguageCode"]
    except KeyError as e:
        operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(CaptionsError="No language codes {e}".format(e=e))
        raise MasExecutionError(operator_object.return_output_object())

    try:
        workflow_id = operator_object.workflow_execution_id
    except KeyError as e:
        operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(CaptionsError="Missing a required metadata key {e}".format(e=e))
        raise MasExecutionError(operator_object.return_output_object())

    try:
        asset_id = operator_object.asset_id
    except KeyError:
        print('No asset id for this workflow')
        asset_id = ''

    try:
        s3_response = s3.get_object(Bucket=bucket, Key=key)
        transcribe_metadata = json.loads(s3_response["Body"].read().decode("utf-8"))
    except Exception as e:
        operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(CaptionsError="Unable to read transcription from S3: {e}".format(e=str(e)))
        raise MasExecutionError(operator_object.return_output_object())

    endTime = 0.0
    maxLength = 50
    wordCount = 0
    maxWords = 12
    maxSilence = 1.5

    captions = []
    caption = None
    

    for item in transcribe_metadata["results"]["items"]:
        

        isPunctuation = item["type"] == "punctuation"

        if caption is None:

            # Start of a line with punctuation, just skip it
            if isPunctuation:
                continue

            # Create a new caption line
            caption = {
                "start": float(item["start_time"]),
                "caption": "",
                "wordConfidence": []
            }

        if not isPunctuation:

            startTime = float(item["start_time"])

            # Check to see if there has been a long silence
            # between the last recorded word and start a new
            # caption if this is the case, ending the last time
            # as this one starts.

            if (len(caption["caption"]) > 0) and ((endTime + maxSilence) < startTime):

                caption["end"] = startTime
                captions.append(caption)

                caption = {
                    "start": float(startTime),
                    "caption": "",
                    "wordConfidence": []
                }

                wordCount = 0

            endTime = float(item["end_time"])

        requiresSpace = (not isPunctuation) and (len(caption["caption"]) > 0)

        if requiresSpace:
            caption["caption"] += " "

        # Process tweaks

        text = item["alternatives"][0]["content"]
        confidence = item["alternatives"][0]["confidence"]
        textLower = text.lower()

        caption["caption"] += text

        # Track raw word confidence
        if not isPunctuation:
            caption["wordConfidence"].append(
                {
                    "w": textLower,
                    "c": float(confidence)
                }
            )
            # Count words
            wordCount += 1

        # If we have reached a good amount of text finalize the caption

        if (wordCount >= maxWords) or (len(caption["caption"]) >= maxLength):
            caption["end"] = endTime
            captions.append(caption)
            wordCount = 0
            caption = None
            

    # Close the last caption if required

    if caption is not None:
        caption["end"] = endTime
        captions.append(caption)
        
    webcaptions_name = "WebCaptions"+"_"+lang
    i=0
    for asset in captions:
        i=i+1

        if i != len(captions):
            metadata_upload = dataplane.store_asset_metadata(asset_id=asset_id, operator_name=webcaptions_name, 
                                    workflow_id=workflow_id, results=asset, paginate=True, end=False)
            
            if "Status" not in metadata_upload:
                operator_object.update_workflow_status("Error")
                operator_object.add_workflow_metadata(
                    CaptionsError="Unable to store web captions {e}".format(e=metadata_upload))
                raise MasExecutionError(operator_object.return_output_object())
            else:
                if metadata_upload["Status"] == "Success":
                    pass
                else:
                    operator_object.update_workflow_status("Error")
                    operator_object.add_workflow_metadata(
                        CaptionsError="Unable to store web captions {e}".format(e=metadata_upload))
                    raise MasExecutionError(operator_object.return_output_object())
        else:
            metadata_upload = dataplane.store_asset_metadata(asset_id=asset_id, operator_name=webcaptions_name, 
                                    workflow_id=workflow_id, results=asset, paginate=True, end=True)
            if "Status" not in metadata_upload:
                operator_object.update_workflow_status("Error")
                operator_object.add_workflow_metadata(
                    CaptionsError="Unable to store web captions {e}".format(e=metadata_upload))
                raise MasExecutionError(operator_object.return_output_object())
            else:
                if metadata_upload["Status"] == "Success":
                    response_json = metadata_upload
                    operator_object.add_workflow_metadata(WebCaptionsS3Bucket=response_json['Bucket'],
                                                          WebCaptionsS3Key=response_json['Key'])
                    operator_object.update_workflow_status("Complete")
                    return operator_object.return_output_object()
                else:
                    operator_object.update_workflow_status("Error")
                    operator_object.add_workflow_metadata(
                        CaptionsError="Unable to store web captions {e}".format(e=metadata_upload))
                    raise MasExecutionError(operator_object.return_output_object())