def create_vtt(event, context):
    print("We got the following event:\n", event)

    operator_object = MediaInsightsOperationHelper(event)
    webcaptions_object = WebCaptions(operator_object)

    try:
        targetLanguageCodes = webcaptions_object.operator_object.configuration[
            "TargetLanguageCodes"]
    except KeyError as e:
        webcaptions_object.operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(
            WebCaptionsError="Missing a required metadata key {e}".format(e=e))
        raise MasExecutionError(operator_object.return_output_object())

    captions_collection = []
    for lang in targetLanguageCodes:
        webcaptions = []

        webcaptions = webcaptions_object.GetWebCaptions(lang)

        #captions = get_webcaptions_json(self.operator_object, lang)

        vtt = webcaptions_object.WebCaptionsToVTT(webcaptions)
        metadata = webcaptions_object.PutVTT(lang, vtt)

        captions_collection.append(metadata)

    data = {}
    data["CaptionsCollection"] = captions_collection

    webcaptions_object.PutMediaCollection(operator_object.name, data)

    operator_object.update_workflow_status("Complete")
    return operator_object.return_output_object()
def start_translate_webcaptions(event, context):
    print("We got the following event:\n", event)

    operator_object = MediaInsightsOperationHelper(event)
    webcaptions_object = WebCaptions(operator_object)

    try:
        source_lang = operator_object.configuration["SourceLanguageCode"]
        target_langs = operator_object.configuration["TargetLanguageCodes"]
    except KeyError:
        operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(
            TranslateError="Language codes are not defined")
        raise MasExecutionError(operator_object.return_output_object())
    try:
        terminology_names = operator_object.configuration["TerminologyNames"]
    except KeyError:
        terminology_names = []

    #webcaptions = get_webcaptions(operator_object, source_lang)
    webcaptions = webcaptions_object.GetWebCaptions(source_lang)

    # Translate takes a list of target languages, but it only allow on item in the list.  Too bad
    # life would be so much easier if it truely allowed many targets.
    webcaptions_object.TranslateWebCaptions(webcaptions, source_lang,
                                            target_langs, terminology_names)

    return operator_object.return_output_object()
def start_wait_operation_lambda(event, context):
    '''
    Pause a workflow to wait for external processing

    event is 
    - Operation input
    - Operation configuration

    returns:
    Operation output

    '''
    logger.info(json.dumps(event))

    operator_object = MediaInsightsOperationHelper(event)

    try:
        update_workflow_execution_status(operator_object.workflow_execution_id,
                                         awsmie.WORKFLOW_STATUS_WAITING, "")
    except Exception as e:
        operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(
            WaitError="Unable to set workflow status to Waiting {e}".format(
                e=str(e)))
        raise MasExecutionError(operator_object.return_output_object())

    return operator_object.return_output_object()
def web_captions(event, context):

    print("We got the following event:\n", event)
    operator_object = MediaInsightsOperationHelper(event)

    webcaptions_object = WebCaptions(operator_object)

    transcript = webcaptions_object.GetTranscript()
    webcaptions = webcaptions_object.TranscribeToWebCaptions(transcript)
    webcaptions_object.PutWebCaptions(webcaptions)

    operator_object.update_workflow_status("Complete")
    return operator_object.return_output_object()
예제 #5
0
def test_lambda_handler(event, context, operator_name, mediaType, status,
                        type):

    try:
        print(json.dumps(event))
        # set output status, media, and metatdata for workflow - these get passed to other
        # stages of the workflow through the control plane
        dataplane = DataPlane()

        operator_object = MediaInsightsOperationHelper(event)
        operator_object.update_workflow_status("Complete")
        metadata = {}
        metadata[operator_object.name] = {
            "Meta": "Workflow metadata for " + operator_object.name
        }

        if "TestCustomConfig" in operator_object.configuration:
            metadata[operator_object.
                     name]["TestCustomConfig"] = operator_object.configuration[
                         "TestCustomConfig"]

        operator_object.add_workflow_metadata_json(metadata)

        if "OutputMediaType" in operator_object.configuration:
            mediaType = operator_object.configuration["OutputMediaType"]

        if mediaType == "Video":
            operator_object.add_media_object(
                "Video", "S3BucketFrom{}".format(operator_object.name),
                "S3/Key/From/{}/video".format(operator_object.name))
        elif mediaType == "Audio":
            operator_object.add_media_object(
                "Audio", "S3BucketFrom{}".format(operator_object.name),
                "S3/Key/From/{}/audio".format(operator_object.name))
        elif mediaType == "Image":
            operator_object.add_media_object(
                "Text", "S3BucketFrom{}".format(operator_object.name),
                "S3/Key/From/{}/image".format(operator_object.name))
        elif mediaType == "Text":
            operator_object.add_media_object(
                "Text", "S3BucketFrom{}".format(operator_object.name),
                "S3/Key/From/{}/text".format(operator_object.name))

    except Exception as e:
        operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(
            Message="Oh no! Something went wrong: {}".format(str(e)))
        raise MasExecutionError(operator_object.return_output_object())
    else:
        if status == "Fail":
            operator_object.update_workflow_status("Error")
        else:
            operator_object.update_workflow_status("Complete")
        return operator_object.return_output_object()
def filter_operation_lambda(event, context):
    '''
    event is 
    - Operation input
    - Operation configuration

    returns:
    Operation output
    - Operation status "Skipped" if operation should be skipped
    '''
    logger.info(json.dumps(event))

    operation_object = MediaInsightsOperationHelper(event)

    if operation_object.configuration[
            "MediaType"] != "MetadataOnly" and operation_object.configuration[
                "MediaType"] not in operation_object.input["Media"]:

        operation_object.update_workflow_status(
            awsmie.OPERATION_STATUS_SKIPPED)

    elif operation_object.configuration["Enabled"] == False:

        operation_object.update_workflow_status(
            awsmie.OPERATION_STATUS_SKIPPED)

    else:

        operation_object.update_workflow_status(
            awsmie.OPERATION_STATUS_STARTED)

    return operation_object.return_output_object()
def web_captions(event, context):

    print("We got the following event:\n", event)
    operator_object = MediaInsightsOperationHelper(event)

    webcaptions_object = WebCaptions(operator_object)

    transcript = webcaptions_object.GetTranscript()
    webcaptions = webcaptions_object.TranscribeToWebCaptions(transcript)

    # Save the the original Transcribe generated captions to compare to any ground truth modifications
    # made later so we can calculate quality metrics of the machine translation
    webcaptions_object.PutWebCaptions(webcaptions, source="TranscribeVideo")

    # if a vtt file was input, use that as the most recent version of the webcaptions file
    if webcaptions_object.existing_subtitles:
        webcaptions = vttToWebCaptions(operator_object, webcaptions_object.existing_subtitles_object)

    webcaptions_object.PutWebCaptions(webcaptions)

    operator_object.update_workflow_status("Complete")
    return operator_object.return_output_object()
예제 #8
0
def check_wait_operation_lambda(event, context):
    '''
    Check if a workflow is still in a Waiting state.

    event is 
    - Operation input
    - Operation configuration

    returns:
    Operation output

    '''
    logger.info(json.dumps(event))

    operator_object = MediaInsightsOperationHelper(event)
    execution_table = DYNAMO_RESOURCE.Table(WORKFLOW_EXECUTION_TABLE_NAME)

    response = execution_table.get_item(
            Key={
                'Id': operator_object.workflow_execution_id
            },
            ConsistentRead=True)

    if "Item" in response:
        workflow_execution = response["Item"]
    else:
        workflow_execution = None
        # raise ChaliceViewError(
        operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(WaitError="Unable to find Waiting workflow execution {} {e}".format(
            operator_object.workflow_execution_id, e=str(e)))
        raise MasExecutionError(operator_object.return_output_object())

    logger.info("workflow_execution: {}".format(
        json.dumps(workflow_execution)))

    if workflow_execution["Status"] == awsmie.WORKFLOW_STATUS_WAITING:
        operator_object.update_workflow_status("Executing")
        return operator_object.return_output_object()
    elif workflow_execution["Status"] == awsmie.WORKFLOW_STATUS_STARTED:
        operator_object.update_workflow_status("Complete")
    else:
        operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(WaitError="Unexpected workflow execution status {}".format(
            workflow_execution["Status"]))
        raise MasExecutionError(operator_object.return_output_object())

    return operator_object.return_output_object()
예제 #9
0
def lambda_handler(event, context):
    print("We got the following event:\n", event)
    operator_object = MediaInsightsOperationHelper(event)
    # Get media metadata from input event
    try:
        asset_id = operator_object.asset_id
        bucket = operator_object.input["Media"]["Video"]["S3Bucket"]
    except Exception as exception:
        operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(
            VmapGenerationError="Missing a required metadata key {e}".format(
                e=exception))
        raise MasExecutionError(operator_object.return_output_object())
    # Get slots metadata from dataplane
    try:
        slots = {}
        params = {"asset_id": asset_id, "operator_name": "slotDetection"}
        while True:
            resp = dataplane.retrieve_asset_metadata(**params)
            if "operator" in resp and resp["operator"] == "slotDetection":
                __update_and_merge_lists(slots, resp["results"])
            if "cursor" not in resp:
                break
            params["cursor"] = resp["cursor"]
        print("slots: {}".format(slots))
    except Exception as exception:
        operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(
            VmapGenerationError="Unable to retrieve metadata for asset {}: {}".
            format(asset_id, exception))
        raise MasExecutionError(operator_object.return_output_object())
    try:
        # Select slots with highest scores
        slots["slots"].sort(key=lambda slot: slot["Score"])
        top_slots = slots["slots"][-top_slots_qty:]
        # Generate VMAP and add object
        key = 'private/assets/{}/vmap/ad_breaks.vmap'.format(asset_id)
        __write_vmap(top_slots, bucket, key)
        operator_object.add_media_object("VMAP", bucket, key)
        # Set workflow status complete
        operator_object.update_workflow_status("Complete")
        return operator_object.return_output_object()
    except Exception as exception:
        print("Exception:\n", exception)
        operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(VmapGenerationError=exception)
        raise MasExecutionError(operator_object.return_output_object())
예제 #10
0
def lambda_handler(event, context):
    print("We got this event:\n", event)
    operator_object = MediaInsightsOperationHelper(event)
    try:
        workflow_id = operator_object.workflow_execution_id
    except KeyError as e:
        operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(comprehend_error="Missing a required metadata key {e}".format(e=e))
        raise MasExecutionError(operator_object.return_output_object())
    try:
        bucket = operator_object.input["Media"]["Text"]["S3Bucket"]
        key = operator_object.input["Media"]["Text"]["S3Key"]
        # If operator_object.input["Media"]["Text"]["S3Key"] is a json file,
        # then we're working with metadata about the text file and need to
        # get the actual transcript text from the TextTranscriptUri field.
        # Otherwise we assume operator_object.input["Media"]["Text"]["S3Key"]
        # contains only the transcript text.
        file_ext = str(key.split('.')[-1])
        if file_ext == "json":
            obj = s3.get_object(
                Bucket=bucket,
                Key=key
            )
            results = obj['Body'].read().decode('utf-8')
            results_json = json.loads(results)
            try:
                uri_data = results_json["TextTranscriptUri"]
            except KeyError:
                raise MasExecutionError("JSON can only be passed in from AWS transcribe")
            else:
                bucket = uri_data['S3Bucket']
                key = uri_data['S3Key']
        uri = "s3://" + bucket + '/' + key
        # If input text is empty then we're done.
        response = s3.head_object(Bucket=bucket, Key=key)
        if response['ContentLength'] < 1:
            operator_object.update_workflow_status("Complete")
            operator_object.add_workflow_metadata(comprehend_phrases_job_id="Empty input --> empty output.")
            return operator_object.return_output_object()
    except KeyError:
        operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(comprehend_error="No valid inputs")
        raise MasExecutionError(operator_object.return_output_object())
    try:
        asset_id = operator_object.asset_id
    except KeyError:
        print('No asset id for this workflow')
        asset_id = ''
    dataplane = DataPlane()
    output_uri_request = dataplane.generate_media_storage_path(asset_id, workflow_id)
    output_uri = "s3://{bucket}/{key}".format(bucket=output_uri_request["S3Bucket"], key=output_uri_request["S3Key"] + "/comprehend_phrases")
    try:
        comprehend.start_key_phrases_detection_job(
            InputDataConfig={
                'S3Uri': uri,
                'InputFormat': 'ONE_DOC_PER_FILE'
            },
            OutputDataConfig={
                'S3Uri': output_uri
            },
            DataAccessRoleArn=comprehend_role,
            JobName=workflow_id,
            LanguageCode='en'
        )
    except Exception as e:
        operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(comprehend_error="Unable to get response from comprehend: {e}".format(e=str(e)))
        raise MasExecutionError(operator_object.return_output_object())
    else:
        comprehend_job_id = workflow_id
        operator_object.add_workflow_metadata(comprehend_phrases_job_id=comprehend_job_id, output_uri=output_uri)
        operator_object.update_workflow_status('Executing')
        return operator_object.return_output_object()
def lambda_handler(event, context):
    print("We got this event:\n", event)
    operator_object = MediaInsightsOperationHelper(event)
    try:
        job_id = operator_object.metadata["comprehend_phrases_job_id"]
        asset_id = operator_object.asset_id
        workflow_id = operator_object.workflow_execution_id
        # If Comprehend wasn't run due to empty text input, then we're done
        if job_id == "Empty input --> empty output.":
            operator_object.update_workflow_status("Complete")
            return operator_object.return_output_object()
    except KeyError:
        operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(
            comprehend_error="No valid job id")
        raise MasExecutionError(operator_object.return_output_object())
    try:
        response = comprehend.list_key_phrases_detection_jobs(Filter={
            'JobName':
            job_id,
        }, )
    except Exception as e:
        operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(
            comprehend_error="Unable to get response from comprehend: {e}".
            format(e=e))
        raise MasExecutionError(operator_object.return_output_object())
    else:
        print(response)
        comprehend_status = response["KeyPhrasesDetectionJobPropertiesList"][
            0]["JobStatus"]
        if comprehend_status == "SUBMITTED" or comprehend_status == "IN_PROGRESS":
            operator_object.add_workflow_metadata(
                comprehend_phrases_job_id=job_id)
            operator_object.update_workflow_status("Executing")
            return operator_object.return_output_object()
        elif comprehend_status == "COMPLETED":
            output_uri = response["KeyPhrasesDetectionJobPropertiesList"][0][
                "OutputDataConfig"]["S3Uri"]
            delimeter = '/'
            bucket = delimeter.join(output_uri.split(delimeter)[2:3])
            file_name = output_uri.split(delimeter)[-1]
            key = delimeter.join(
                output_uri.split(delimeter)[3:-1]) + '/' + file_name
            comprehend_tarball = read_from_s3(bucket, key)
            comprehend_data = {
                "LanguageCode":
                response['KeyPhrasesDetectionJobPropertiesList'][0]
                ['LanguageCode'],
                "Results": []
            }
            if comprehend_tarball["Status"] == "Success":
                input_bytes = comprehend_tarball["Object"]
                with tarfile.open(fileobj=BytesIO(input_bytes)) as tf:
                    for member in tf:
                        if member.isfile():
                            comprehend_data["Results"].append(
                                tf.extractfile(member).read().decode('utf-8'))
                dataplane = DataPlane()
                metadata_upload = dataplane.store_asset_metadata(
                    asset_id, "key_phrases", workflow_id, comprehend_data)
                if "Status" not in metadata_upload:
                    operator_object.update_workflow_status("Error")
                    operator_object.add_workflow_metadata(
                        comprehend_error="Unable to store key phrases data {e}"
                        .format(e=metadata_upload))
                    raise MasExecutionError(
                        operator_object.return_output_object())
                else:
                    if metadata_upload["Status"] == "Success":
                        operator_object.add_workflow_metadata(
                            comprehend_entity_job_id=job_id,
                            output_uri=output_uri)
                        operator_object.update_workflow_status("Complete")
                        return operator_object.return_output_object()
                    else:
                        operator_object.update_workflow_status("Error")
                        operator_object.add_workflow_metadata(
                            comprehend_error=
                            "Unable to store key phrases data {e}".format(
                                e=metadata_upload))
                        raise MasExecutionError(
                            operator_object.return_output_object())
            else:
                operator_object.update_workflow_status("Error")
                operator_object.add_workflow_metadata(
                    comprehend_entity_job_id=job_id,
                    comprehend_error="could not retrieve output from s3: {e}".
                    format(e=comprehend_tarball["Message"]))
                raise MasExecutionError(operator_object.return_output_object())
        else:
            operator_object.update_workflow_status("Error")
            operator_object.add_workflow_metadata(
                comprehend_phrases_job_id=job_id,
                comprehend_error="comprehend returned as failed: {e}".format(
                    e=response["KeyPhrasesDetectionJobPropertiesList"][0]
                    ["Message"]))
            raise MasExecutionError(operator_object.return_output_object())
def lambda_handler(event, context):
    print("We got this event:\n", event)
    operator_object = MediaInsightsOperationHelper(event)
    try:
        workflow_id = operator_object.workflow_execution_id
    except KeyError as e:
        operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(
            comprehend_error="Missing a required metadata key {e}".format(e=e))
        raise MasExecutionError(operator_object.return_output_object())
    try:
        bucket = operator_object.input["Media"]["Text"]["S3Bucket"]
        key = operator_object.input["Media"]["Text"]["S3Key"]
        # If operator_object.input["Media"]["Text"]["S3Key"] is a json file,
        # then we're working with metadata about the text file and need to
        # get the actual transcript text from the TextTranscriptUri field.
        # Otherwise we assume operator_object.input["Media"]["Text"]["S3Key"]
        # contains only the transcript text.
        file_ext = str(key.split('.')[-1])
        if file_ext == "json":
            obj = s3.get_object(Bucket=bucket, Key=key)
            results = obj['Body'].read().decode('utf-8')
            results_json = json.loads(results)
            try:
                uri_data = results_json["TextTranscriptUri"]
            except KeyError:
                raise MasExecutionError(
                    "JSON can only be passed in from AWS transcribe")
            else:
                bucket = uri_data['S3Bucket']
                key = uri_data['S3Key']
        uri = "s3://" + bucket + '/' + key
        # If input text is empty then we're done.
        response = s3.head_object(Bucket=bucket, Key=key)
        # If a KmsKey is specified as an input to this operator, then use that
        # to enable encryption in the Comprehend job.
        kms_key_id = ""
        if "KmsKeyId" in operator_object.configuration:
            kms_key_id = operator_object.configuration["KmsKeyId"]
            print(
                "Found a KMS Key Id. Encryption will be enabled in the Comprehend job."
            )
        else:
            print(
                "No KMS Key was specified. Encryption will not be enabled in the Comprehend job."
            )
        if response['ContentLength'] < 1:
            operator_object.update_workflow_status("Complete")
            operator_object.add_workflow_metadata(
                comprehend_entity_job_id="Empty input --> empty output.")
            return operator_object.return_output_object()
    except KeyError:
        operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(
            comprehend_error="No valid inputs")
        raise MasExecutionError(operator_object.return_output_object())
    try:
        asset_id = operator_object.asset_id
    except KeyError:
        print('No asset id for this workflow')
        asset_id = ''
    dataplane = DataPlane()
    output_uri_request = dataplane.generate_media_storage_path(
        asset_id, workflow_id)
    output_uri = "s3://{bucket}/{key}".format(
        bucket=output_uri_request["S3Bucket"],
        key=output_uri_request["S3Key"] + '/comprehend_entities')
    try:
        if kms_key_id != '':
            # If the user specified a KMS key then enable comprehend job encryption.
            comprehend.start_entities_detection_job(
                InputDataConfig={
                    "S3Uri": uri,
                    "InputFormat": "ONE_DOC_PER_FILE"
                },
                OutputDataConfig={
                    "S3Uri": output_uri,
                    "KmsKeyId": kms_key_id
                },
                DataAccessRoleArn=comprehend_role,
                VolumeKmsKeyId=kms_key_id,
                JobName=workflow_id,
                LanguageCode="en")
        else:
            comprehend.start_entities_detection_job(
                InputDataConfig={
                    "S3Uri": uri,
                    "InputFormat": "ONE_DOC_PER_FILE"
                },
                OutputDataConfig={"S3Uri": output_uri},
                DataAccessRoleArn=comprehend_role,
                JobName=workflow_id,
                LanguageCode="en")
    except Exception as e:
        operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(
            comprehend_error="Unable to get response from comprehend: {e}".
            format(e=str(e)))
        raise MasExecutionError(operator_object.return_output_object())
    else:
        comprehend_job_id = workflow_id
        operator_object.add_workflow_metadata(
            comprehend_entity_job_id=comprehend_job_id,
            entity_output_uri=output_uri)
        operator_object.update_workflow_status('Executing')
        return operator_object.return_output_object()
예제 #13
0
def web_to_vtt(event, context):
    print("We got the following event:\n", event)

    operator_object = MediaInsightsOperationHelper(event)

    try:
        asset_id = operator_object.asset_id
    except KeyError:
        print('No asset id for this workflow')
        asset_id = ''

    try:
        targetLanguageCodes = operator_object.configuration["TargetLanguageCodes"]
        workflow_id = operator_object.workflow_execution_id
    except KeyError as e:
        operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(CaptionsError="Missing a required metadata key {e}".format(e=e))
        raise MasExecutionError(operator_object.return_output_object())

    captions_collection = []
    for lang in targetLanguageCodes:
        captions = []
        captionsOperatorName = "WebCaptions_"+lang

        # response = dataplane.retrieve_asset_metadata(asset_id, operator_name=captionsOperatorName)
        
        
        # #FIXME Dataplane should only return WebCaptions data from this call, but it is returning everything
        # if "operator" in response and response["operator"] == captionsOperatorName:
        #     captions.append(response["results"])

        # while "cursor" in response:
        #     response = dataplane.retrieve_asset_metadata(asset_id, operator_name=captionsOperatorName, cursor=response["cursor"])
            
        #     #FIXME Dataplane should only return WebCaptions data from this call, but it is returning everything
        #     if response["operator"] == captionsOperatorName:
        #         captions.append(response["results"])

        captions = get_webcaptions_json(operator_object, lang)

        vtt = 'WEBVTT\n\n'

        for i in range(len(captions)):

            caption = captions[i]

            vtt += formatTimeVTT(float(caption["start"])) + ' --> ' + formatTimeVTT(float(caption["end"])) + '\n'
            vtt += caption["caption"] + '\n\n'

        response = dataplane.generate_media_storage_path(asset_id, workflow_id)
        
        print(json.dumps(response))
        
        bucket = response["S3Bucket"]
        key = response["S3Key"]+'Captions_'+lang+'.vtt'
        s3_object = s3_resource.Object(bucket, key)

        s3_object.put(Body=vtt)

        metadata = {
            "OperatorName": "VTTCaptions_"+lang,
            "Results": {"S3Bucket": bucket, "S3Key": key},
            "WorkflowId": workflow_id,
            "LanguageCode": lang
        }


        captions_collection.append(metadata)

    data = {}
    data["CaptionsCollection"] = captions_collection
    metadata_upload = dataplane.store_asset_metadata(asset_id, operator_object.name, workflow_id,
                                data)

    if "Status" not in metadata_upload:
        operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(CaptionsError="Unable to store srt captions file {e}".format(e=metadata_upload))
        raise MasExecutionError(operator_object.return_output_object())
    else:
        if metadata_upload["Status"] == "Success":
            operator_object.update_workflow_status("Complete")
            return operator_object.return_output_object()
        else:
            operator_object.update_workflow_status("Error")
            operator_object.add_workflow_metadata(
                CaptionsError="Unable to store srt captions file {e}".format(e=metadata_upload))
            raise MasExecutionError(operator_object.return_output_object())
예제 #14
0
def lambda_handler(event, context):
    print("We got the following event:\n", event)

    operator_object = MediaInsightsOperationHelper(event)

    try:
        bucket = operator_object.input["Media"]["Text"]["S3Bucket"]
        key = operator_object.input["Media"]["Text"]["S3Key"]
    except KeyError as e:
        operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(
            TranslateError="No valid inputs {e}".format(e=e))
        raise MasExecutionError(operator_object.return_output_object())

    try:
        workflow_id = operator_object.workflow_execution_id
    except KeyError as e:
        operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(
            TranslateError="Missing a required metadata key {e}".format(e=e))
        raise MasExecutionError(operator_object.return_output_object())

    try:
        asset_id = operator_object.asset_id
    except KeyError:
        print('No asset id for this workflow')
        asset_id = ''

    try:
        source_lang = operator_object.configuration["SourceLanguageCode"]
        target_lang = operator_object.configuration["TargetLanguageCode"]
    except KeyError:
        operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(
            TranslateError="Language codes are not defined")
        raise MasExecutionError(operator_object.return_output_object())

    try:
        s3_response = s3.get_object(Bucket=bucket, Key=key)
        transcribe_metadata = json.loads(
            s3_response["Body"].read().decode("utf-8"))
        transcript = transcribe_metadata["results"]["transcripts"][0][
            "transcript"]
    except Exception as e:
        operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(
            TranslateError="Unable to read transcription from S3: {e}".format(
                e=str(e)))
        raise MasExecutionError(operator_object.return_output_object())

    # If input text is empty then we're done.
    if len(transcript) < 1:
        operator_object.update_workflow_status("Complete")
        return operator_object.return_output_object()

    # Tell the NLTK data loader to look for files in /tmp/
    nltk.data.path.append("/tmp/")
    # Download NLTK tokenizers to /tmp/
    # We use /tmp because that's where AWS Lambda provides write access to the local file system.
    nltk.download('punkt', download_dir='/tmp/')
    # Create language tokenizer according to user-specified source language.
    # Default to English.
    if source_lang == 'fr':
        print("Using French dictionary to find sentence boundaries.")
        tokenizer = nltk.data.load('tokenizers/punkt/french.pickle')
    elif source_lang == 'de':
        print("Using German dictionary to find sentence boundaries.")
        tokenizer = nltk.data.load('tokenizers/punkt/german.pickle')
    elif source_lang == 're':
        print("Using Russian dictionary to find sentence boundaries.")
        tokenizer = nltk.data.load('tokenizers/punkt/russian.pickle')
    elif source_lang == 'it':
        print("Using Italian dictionary to find sentence boundaries.")
        tokenizer = nltk.data.load('tokenizers/punkt/italian.pickle')
    elif source_lang == 'pt':
        print("Using Portuguese dictionary to find sentence boundaries.")
        tokenizer = nltk.data.load('tokenizers/punkt/portuguese.pickle')
    elif source_lang == 'es':
        print("Using Spanish dictionary to find sentence boundaries.")
        tokenizer = nltk.data.load('tokenizers/punkt/spanish.pickle')
    else:
        print("Using English dictionary to find sentence boundaries.")
        tokenizer = nltk.data.load('tokenizers/punkt/english.pickle')

    # Split input text into a list of sentences
    sentences = tokenizer.tokenize(transcript)
    print("Input text length: " + str(len(transcript)))
    print("Number of sentences: " + str(len(sentences)))
    translated_text = ''
    transcript_chunk = ''
    for sentence in sentences:
        # Translate can handle 5000 unicode characters but we'll process no
        # more than 1000 just to be on the safe side.
        # Even by limiting input text to 3000 characters, we've still seen
        # translate throttling with a RateExceeded exception.
        # Reducing input text to 1000 characters seemed to fix this.
        if (len(sentence) + len(transcript_chunk) < 1000):
            transcript_chunk = transcript_chunk + ' ' + sentence
        else:
            try:
                print("Translation input text length: " +
                      str(len(transcript_chunk)))
                translation_chunk = translate_client.translate_text(
                    Text=transcript_chunk,
                    SourceLanguageCode=source_lang,
                    TargetLanguageCode=target_lang)
                print("Translation output text length: " +
                      str(len(translation_chunk)))
            except Exception as e:
                operator_object.update_workflow_status("Error")
                operator_object.add_workflow_metadata(
                    TranslateError="Unable to get response from translate: {e}"
                    .format(e=str(e)))
                raise MasExecutionError(operator_object.return_output_object())
            translated_text = translated_text + ' ' + translation_chunk[
                "TranslatedText"]
            transcript_chunk = sentence
    print("Translating the final chunk of input text...")
    try:
        print("Translation input text length: " + str(len(transcript_chunk)))
        translation_chunk = translate_client.translate_text(
            Text=transcript_chunk,
            SourceLanguageCode=source_lang,
            TargetLanguageCode=target_lang)
        print("Translation output text length: " + str(len(translation_chunk)))
    except Exception as e:
        operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(
            TranslateError="Unable to get response from translate: {e}".format(
                e=str(e)))
        raise MasExecutionError(operator_object.return_output_object())
    translated_text = translated_text + ' ' + translation_chunk[
        "TranslatedText"]
    # Put final result into a JSON object because the MIE dataplane requires it to be so.
    translation_result = {}
    translation_result["TranslatedText"] = translated_text
    translation_result["SourceLanguageCode"] = source_lang
    translation_result["TargetLanguageCode"] = target_lang
    print("Final translation text length: " + str(len(translated_text)))
    dataplane = DataPlane()
    metadata_upload = dataplane.store_asset_metadata(asset_id,
                                                     operator_object.name,
                                                     workflow_id,
                                                     translation_result)
    if "Status" not in metadata_upload:
        operator_object.add_workflow_metadata(
            TranslateError="Unable to upload metadata for asset: {asset}".
            format(asset=asset_id))
        operator_object.update_workflow_status("Error")
        raise MasExecutionError(operator_object.return_output_object())
    else:
        if metadata_upload['Status'] == 'Success':
            operator_object.add_media_object('Text', metadata_upload['Bucket'],
                                             metadata_upload['Key'])
            operator_object.update_workflow_status("Complete")
            return operator_object.return_output_object()
        else:
            operator_object.add_workflow_metadata(
                TranslateError="Unable to upload metadata for asset: {asset}".
                format(asset=asset_id))
            operator_object.update_workflow_status("Error")
            raise MasExecutionError(operator_object.return_output_object())
예제 #15
0
def lambda_handler(event, context):
    print("We got the following event:\n", event)
    operator_object = MediaInsightsOperationHelper(event)
    # Get MediaConvert job id
    try:
        job_id = operator_object.metadata["MediaconvertJobId"]
        workflow_id = operator_object.workflow_execution_id
        input_file = operator_object.metadata["MediaconvertInputFile"]
    except KeyError as e:
        operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(MediaconvertError="Missing a required metadata key {e}".format(e=e))
        raise MasExecutionError(operator_object.return_output_object())
    # Get asset id
    try:
        asset_id = operator_object.asset_id
    except KeyError as e:
        print("No asset_id in this workflow")
        asset_id = ''

    # Get mediaconvert endpoint from cache if available
    if ("MEDIACONVERT_ENDPOINT" in os.environ):
        mediaconvert_endpoint = os.environ["MEDIACONVERT_ENDPOINT"]
        customer_mediaconvert = boto3.client("mediaconvert", region_name=region, endpoint_url=mediaconvert_endpoint)
    else:
        try:
            response = mediaconvert.describe_endpoints()
        except Exception as e:
            print("Exception:\n", e)
            operator_object.update_workflow_status("Error")
            operator_object.add_workflow_metadata(MediaconvertError=str(e))
            raise MasExecutionError(operator_object.return_output_object())
        else:
            mediaconvert_endpoint = response["Endpoints"][0]["Url"]
            # Cache the mediaconvert endpoint in order to avoid getting throttled on
            # the DescribeEndpoints API.
            os.environ["MEDIACONVERT_ENDPOINT"] = mediaconvert_endpoint
            customer_mediaconvert = boto3.client("mediaconvert", region_name=region, endpoint_url=mediaconvert_endpoint)

    # Get MediaConvert job results
    try:
        response = customer_mediaconvert.get_job(Id=job_id)
    except Exception as e:
        print("Exception:\n", e)
        operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(MediaconvertError=e, MediaconvertJobId=job_id)
        raise MasExecutionError(operator_object.return_output_object())
    else:
        if response["Job"]["Status"] == 'IN_PROGRESS' or response["Job"]["Status"] == 'PROGRESSING':
            operator_object.update_workflow_status("Executing")
            operator_object.add_workflow_metadata(MediaconvertJobId=job_id, MediaconvertInputFile=input_file, AssetId=asset_id, WorkflowExecutionId=workflow_id)
            return operator_object.return_output_object()
        elif response["Job"]["Status"] == 'COMPLETE':
            input_filename = os.path.splitext(operator_object.metadata["MediaconvertInputFile"].split("/")[-1])[0]
            # Get Thumbnail object
            thumbnail_output_uri = response["Job"]["Settings"]["OutputGroups"][0]["OutputGroupSettings"]["FileGroupSettings"]["Destination"]
            thumbnail_extension = response["Job"]["Settings"]["OutputGroups"][0]["Outputs"][0]["Extension"]
            thumbnail_modifier = response["Job"]["Settings"]["OutputGroups"][0]["Outputs"][0]["NameModifier"]
            thumbnail_bucket = thumbnail_output_uri.split("/")[2]
            thumbnail_folder = "/".join(thumbnail_output_uri.split("/")[3:-1])
            thumbnail_key = thumbnail_folder + "/" + input_filename + thumbnail_modifier + "." + thumbnail_extension
            operator_object.add_media_object("Thumbnail", thumbnail_bucket, thumbnail_key)
            # Get audio object
            audio_output_uri = response["Job"]["Settings"]["OutputGroups"][1]["OutputGroupSettings"]["FileGroupSettings"]["Destination"]
            audio_extension = response["Job"]["Settings"]["OutputGroups"][1]["Outputs"][0]["Extension"]
            audio_modifier = response["Job"]["Settings"]["OutputGroups"][1]["Outputs"][0]["NameModifier"]
            audio_bucket = audio_output_uri.split("/")[2]
            audio_folder = "/".join(audio_output_uri.split("/")[3:-1])
            audio_key = audio_folder + "/" + input_filename + audio_modifier + "." + audio_extension
            operator_object.add_media_object("Audio", audio_bucket, audio_key)
            operator_object.add_workflow_metadata(MediaconvertJobId=job_id)
            # Get mp4 proxy encode object
            proxy_encode_output_uri = response["Job"]["Settings"]["OutputGroups"][2]["OutputGroupSettings"]["FileGroupSettings"]["Destination"]
            proxy_encode_extension = response["Job"]["Settings"]["OutputGroups"][2]["Outputs"][0]["Extension"]
            proxy_encode_modifier = response["Job"]["Settings"]["OutputGroups"][2]["Outputs"][0]["NameModifier"]
            proxy_encode_bucket = proxy_encode_output_uri.split("/")[2]
            proxy_encode_folder = "/".join(proxy_encode_output_uri.split("/")[3:-1])
            proxy_encode_key = proxy_encode_folder + "/" + input_filename + proxy_encode_modifier + "." + proxy_encode_extension
            operator_object.add_media_object("ProxyEncode", proxy_encode_bucket, proxy_encode_key)
            # Set workflow status complete
            operator_object.update_workflow_status("Complete")
            return operator_object.return_output_object()
        else:
            operator_object.update_workflow_status("Error")
            operator_object.add_workflow_metadata(
                MediaconvertError="Unhandled exception, unable to get status from mediaconvert: {response}".format(response=response), MediaconvertJobId=job_id)
            raise MasExecutionError(operator_object.return_output_object())
def lambda_handler(event, context):
    print("We got the following event:\n", event)

    operator_object = MediaInsightsOperationHelper(event)

    try:
        job_id = operator_object.metadata["MediaconvertJobId"]
        workflow_id = operator_object.workflow_execution_id
        input_file = operator_object.metadata["MediaconvertInputFile"]
    except KeyError as e:
        operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(
            MediaconvertError="Missing a required metadata key {e}".format(
                e=e))
        raise MasExecutionError(operator_object.return_output_object())

    try:
        asset_id = operator_object.asset_id
    except KeyError as e:
        print("No asset_id in this workflow")
        asset_id = ''

    try:
        response = mediaconvert.describe_endpoints()
    except Exception as e:
        print("Exception:\n", e)
        operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(MediaconvertError=str(e))
        raise MasExecutionError(operator_object.return_output_object())
    else:
        mediaconvert_endpoint = response["Endpoints"][0]["Url"]
        customer_mediaconvert = boto3.client(
            "mediaconvert",
            region_name=region,
            endpoint_url=mediaconvert_endpoint)

    try:
        response = customer_mediaconvert.get_job(Id=job_id)
    except Exception as e:
        print("Exception:\n", e)
        operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(MediaconvertError=e,
                                              MediaconvertJobId=job_id)
        raise MasExecutionError(operator_object.return_output_object())
    else:
        if response["Job"]["Status"] == 'IN_PROGRESS' or response["Job"][
                "Status"] == 'PROGRESSING':
            operator_object.update_workflow_status("Executing")
            operator_object.add_workflow_metadata(
                MediaconvertJobId=job_id,
                MediaconvertInputFile=input_file,
                AssetId=asset_id,
                WorkflowExecutionId=workflow_id)
            return operator_object.return_output_object()
        elif response["Job"]["Status"] == 'COMPLETE':
            # TODO: Store job details as metadata in dataplane
            # TODO: Get output uri from dataplane
            output_uri = response["Job"]["Settings"]["OutputGroups"][0][
                "OutputGroupSettings"]["FileGroupSettings"]["Destination"]

            extension = response["Job"]["Settings"]["OutputGroups"][0][
                "Outputs"][0]["Extension"]
            modifier = response["Job"]["Settings"]["OutputGroups"][0][
                "Outputs"][0]["NameModifier"]

            bucket = output_uri.split("/")[2]
            folder = "/".join(output_uri.split("/")[3:-1])

            file_name = os.path.splitext(
                operator_object.metadata["MediaconvertInputFile"])[0].split(
                    "/")[-1]

            key = folder + "/" + file_name + modifier + "." + extension

            operator_object.add_media_object("Audio", bucket, key)
            operator_object.add_workflow_metadata(MediaconvertJobId=job_id)
            operator_object.update_workflow_status("Complete")

            return operator_object.return_output_object()
        else:
            operator_object.update_workflow_status("Error")
            operator_object.add_workflow_metadata(
                MediaconvertError=
                "Unhandled exception, unable to get status from mediaconvert: {response}"
                .format(response=response),
                MediaconvertJobId=job_id)
            raise MasExecutionError(operator_object.return_output_object())
def start_polly_webcaptions(event, context):
    print("We got this event:\n", event)

    operator_object = MediaInsightsOperationHelper(event)
    webcaptions_object = WebCaptions(operator_object)

    captions_collection = webcaptions_object.GetWebCaptionsCollection()
    print("INPUT CAPTIONS COLLECTION")
    print(json.dumps(captions_collection))

    for caption in captions_collection:

        # Always start from WebCaptions data since these are the most recently edited version
        # Convert WebCaptions to a text only transcript
        transcript = webcaptions_object.GetTextOnlyTranscript(
            caption["TargetLanguageCode"])

        # If input text is empty then we're done.
        if len(transcript) < 1:
            operator_object.update_workflow_status("Complete")
            return operator_object.return_output_object()

        # Get language code of the transcript, we should just pass this along in the event later
        language_code = translate_to_polly_language_code(
            caption["TargetLanguageCode"])

        if language_code == "not supported":
            caption["PollyStatus"] = "not supported"
        else:

            try:
                # set voice_id based on language
                response = polly.describe_voices(
                    #Engine='standard'|'neural',
                    LanguageCode=language_code
                    #IncludeAdditionalLanguageCodes=True|False,
                    #NextToken='string'
                )

            except Exception as e:
                operator_object.update_workflow_status("Error")
                operator_object.add_workflow_metadata(
                    PollyCollectionError=
                    "Unable to get response from polly describe_voices: {e}".
                    format(e=str(e)))
                raise MasExecutionError(operator_object.return_output_object())
            else:
                # just take the fisrt voice in the list.  Maybe later we can extend to choose voice based on other criteria such
                # as gender
                voice_id = response["Voices"][0]["Id"]
                caption["VoiceId"] = voice_id

            caption["PollyAudio"] = {}
            caption["PollyAudio"][
                "S3Key"] = 'private/assets/' + operator_object.asset_id + "/workflows/" + operator_object.workflow_execution_id + "/" + "audio_only" + "_" + caption[
                    "TargetLanguageCode"]
            caption["PollyAudio"]["S3Bucket"] = caption["TranslationText"][
                "S3Bucket"]

            try:
                polly_response = polly.start_speech_synthesis_task(
                    OutputFormat='mp3',
                    OutputS3BucketName=caption["PollyAudio"]["S3Bucket"],
                    OutputS3KeyPrefix=caption["PollyAudio"]["S3Key"],
                    Text=transcript,
                    TextType='text',
                    VoiceId=voice_id)

            except Exception as e:
                operator_object.update_workflow_status("Error")
                operator_object.add_workflow_metadata(
                    PollyCollectionError=
                    "Unable to get response from polly: {e}".format(e=str(e)))
                raise MasExecutionError(operator_object.return_output_object())
            else:
                polly_job_id = polly_response['SynthesisTask']['TaskId']
                caption["PollyTaskId"] = polly_job_id
                caption["PollyStatus"] = "started"

                # Polly adds the polly task id to the S3 Key of the output
                caption["PollyAudio"][
                    "S3Key"] = 'private/assets/' + operator_object.asset_id + "/workflows/" + operator_object.workflow_execution_id + "/" + "audio_only" + "_" + caption[
                        "TargetLanguageCode"] + "." + polly_job_id + ".mp3"

    operator_object.add_workflow_metadata(
        PollyCollection=captions_collection,
        WorkflowExecutionId=operator_object.workflow_execution_id,
        AssetId=operator_object.asset_id)
    operator_object.update_workflow_status('Executing')
    return operator_object.return_output_object()
예제 #18
0
def lambda_handler(event, context):

    print("We got this event:\n", event)

    operator_object = MediaInsightsOperationHelper(event)

    try:
        task_id = operator_object.metadata["PollyJobId"]
        workflow_id = operator_object.workflow_execution_id
        asset_id = operator_object.asset_id
    except KeyError as e:
        operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(
            PollyError="Missing a required metadata key {e}".format(e=e))
        raise MasExecutionError(operator_object.return_output_object())
    try:
        polly_response = polly.get_speech_synthesis_task(TaskId=task_id)
    except Exception as e:
        operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(
            PollyError="Unable to get response from polly: {e}".format(
                e=str(e)))
        raise MasExecutionError(operator_object.return_output_object())
    else:
        polly_status = polly_response["SynthesisTask"]["TaskStatus"]
        print("The status from polly is:\n", polly_status)
        if polly_status == "inProgress":
            polly_job_id = polly_response["SynthesisTask"]["TaskId"]
            operator_object.add_workflow_metadata(
                PollyJobId=polly_job_id,
                AssetId=asset_id,
                WorkflowExecutionId=workflow_id)
            operator_object.update_workflow_status("Executing")
            return operator_object.return_output_object()
        elif polly_status == "completed":
            # TODO: Store job details as metadata in dataplane

            uri = polly_response["SynthesisTask"]["OutputUri"]
            file = uri.split("/")[5]
            folder = uri.split("/")[4]
            bucket = uri.split("/")[3]
            key = folder + "/" + file

            operator_object.add_workflow_metadata(PollyJobId=task_id)
            operator_object.add_media_object("Audio", bucket, key)
            operator_object.update_workflow_status("Complete")

            return operator_object.return_output_object()

        elif polly_status == "scheduled":
            operator_object.add_workflow_metadata(
                PollyJobId=task_id,
                AssetId=asset_id,
                WorkflowExecutionId=workflow_id)
            operator_object.update_workflow_status("Executing")
            return operator_object.return_output_object()

        elif polly_status == "failed":
            operator_object.update_workflow_status("Error")
            operator_object.add_workflow_metadata(
                PollyError="Polly returned as failed: {e}".format(e=str(
                    polly_response["SynthesisTask"]["TaskStatusReason"])))
            raise MasExecutionError(operator_object.return_output_object())
        else:
            operator_object.update_workflow_status("Error")
            operator_object.add_workflow_metadata(
                PollyError="Polly returned as failed: {e}".format(e=str(
                    polly_response["SynthesisTask"]["TaskStatusReason"])))
            raise MasExecutionError(operator_object.return_output_object())
예제 #19
0
def lambda_handler(event, context):
    print("We got the following event:\n", event)
    operator_object = MediaInsightsOperationHelper(event)

    try:
        workflow_id = str(operator_object.workflow_execution_id)
        asset_id = operator_object.asset_id
        bucket = operator_object.input["Media"]["Video"]["S3Bucket"]
        key = operator_object.input["Media"]["Video"]["S3Key"]
    except KeyError as e:
        operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(VideoTranscodingError="Missing a required metadata key {e}".format(e=e))
        raise MasExecutionError(operator_object.return_output_object())

    file_input = "s3://" + bucket + "/" + key
    hls_destination = "s3://" + bucket + "/private/assets/" + asset_id + "/hls/playlist"
    proxy_destination = "s3://" + bucket + "/private/assets/" + asset_id + "/proxy/" + asset_id
    audio_destination = "s3://" + bucket + "/private/assets/" + asset_id + "/audio/" + asset_id

    # Get mediaconvert endpoint from cache if available
    if ("MEDIACONVERT_ENDPOINT" in os.environ):
        mediaconvert_endpoint = os.environ["MEDIACONVERT_ENDPOINT"]
        customer_mediaconvert = boto3.client("mediaconvert", region_name=region, endpoint_url=mediaconvert_endpoint)
    else:
        try:
            response = mediaconvert.describe_endpoints()
        except Exception as e:
            print("Exception:\n", e)
            operator_object.update_workflow_status("Error")
            operator_object.add_workflow_metadata(VideoTranscodingError=str(e))
            raise MasExecutionError(operator_object.return_output_object())
        else:
            mediaconvert_endpoint = response["Endpoints"][0]["Url"]
            # Cache the mediaconvert endpoint in order to avoid getting throttled on
            # the DescribeEndpoints API.
            os.environ["MEDIACONVERT_ENDPOINT"] = mediaconvert_endpoint
            customer_mediaconvert = boto3.client("mediaconvert", region_name=region, endpoint_url=mediaconvert_endpoint)

    try:
        response = customer_mediaconvert.create_job(
            Role=mediaconvert_role,
            Settings={
                "OutputGroups": [
                    {
                        "Name": "Apple HLS",
                        "Outputs": [
                            {
                                "Preset": "System-Avc_16x9_1080p_29_97fps_8500kbps",
                                "NameModifier": "_hls"
                            }
                        ],
                        "OutputGroupSettings": {
                            "Type": "HLS_GROUP_SETTINGS",
                            "HlsGroupSettings": {
                                "ManifestDurationFormat": "INTEGER",
                                "SegmentLength": 1,
                                "TimedMetadataId3Period": 10,
                                "CaptionLanguageSetting": "OMIT",
                                "TimedMetadataId3Frame": "PRIV",
                                "CodecSpecification": "RFC_4281",
                                "OutputSelection": "MANIFESTS_AND_SEGMENTS",
                                "ProgramDateTimePeriod": 600,
                                "MinSegmentLength": 0,
                                "MinFinalSegmentLength": 0,
                                "DirectoryStructure": "SINGLE_DIRECTORY",
                                "ProgramDateTime": "EXCLUDE",
                                "SegmentControl": "SEGMENTED_FILES",
                                "ManifestCompression": "NONE",
                                "ClientCache": "ENABLED",
                                "StreamInfResolution": "INCLUDE",
                                "Destination": hls_destination
                            }
                        }
                    },
                    {
                        "CustomName": "Proxy",
                        "Name": "File Group",
                        "Outputs": [
                            {
                                "VideoDescription": {
                                    "ScalingBehavior": "DEFAULT",
                                    "TimecodeInsertion": "DISABLED",
                                    "AntiAlias": "ENABLED",
                                    "Sharpness": 50,
                                    "CodecSettings": {
                                        "Codec": "H_264",
                                        "H264Settings": {
                                            "InterlaceMode": "PROGRESSIVE",
                                            "NumberReferenceFrames": 3,
                                            "Syntax": "DEFAULT",
                                            "Softness": 0,
                                            "GopClosedCadence": 1,
                                            "GopSize": 90,
                                            "Slices": 1,
                                            "GopBReference": "DISABLED",
                                            "SlowPal": "DISABLED",
                                            "SpatialAdaptiveQuantization": "ENABLED",
                                            "TemporalAdaptiveQuantization": "ENABLED",
                                            "FlickerAdaptiveQuantization": "DISABLED",
                                            "EntropyEncoding": "CABAC",
                                            "Bitrate": 5000000,
                                            "FramerateControl": "SPECIFIED",
                                            "RateControlMode": "CBR",
                                            "CodecProfile": "MAIN",
                                            "Telecine": "NONE",
                                            "MinIInterval": 0,
                                            "AdaptiveQuantization": "HIGH",
                                            "CodecLevel": "AUTO",
                                            "FieldEncoding": "PAFF",
                                            "SceneChangeDetect": "ENABLED",
                                            "QualityTuningLevel": "SINGLE_PASS",
                                            "FramerateConversionAlgorithm": "DUPLICATE_DROP",
                                            "UnregisteredSeiTimecode": "DISABLED",
                                            "GopSizeUnits": "FRAMES",
                                            "ParControl": "SPECIFIED",
                                            "NumberBFramesBetweenReferenceFrames": 2,
                                            "RepeatPps": "DISABLED",
                                            "FramerateNumerator": 30,
                                            "FramerateDenominator": 1,
                                            "ParNumerator": 1,
                                            "ParDenominator": 1
                                        }
                                    },
                                    "AfdSignaling": "NONE",
                                    "DropFrameTimecode": "ENABLED",
                                    "RespondToAfd": "NONE",
                                    "ColorMetadata": "INSERT"
                                },
                                "AudioDescriptions": [
                                    {
                                        "AudioTypeControl": "FOLLOW_INPUT",
                                        "CodecSettings": {
                                            "Codec": "AAC",
                                            "AacSettings": {
                                                "AudioDescriptionBroadcasterMix": "NORMAL",
                                                "RateControlMode": "CBR",
                                                "CodecProfile": "LC",
                                                "CodingMode": "CODING_MODE_2_0",
                                                "RawFormat": "NONE",
                                                "SampleRate": 48000,
                                                "Specification": "MPEG4",
                                                "Bitrate": 64000
                                            }
                                        },
                                        "LanguageCodeControl": "FOLLOW_INPUT",
                                        "AudioSourceName": "Audio Selector 1"
                                    }
                                ],
                                "ContainerSettings": {
                                    "Container": "MP4",
                                    "Mp4Settings": {
                                        "CslgAtom": "INCLUDE",
                                        "FreeSpaceBox": "EXCLUDE",
                                        "MoovPlacement": "PROGRESSIVE_DOWNLOAD"
                                    }
                                },
                                "Extension": "mp4",
                                "NameModifier": "_proxy"
                            }
                        ],
                        "OutputGroupSettings": {
                            "Type": "FILE_GROUP_SETTINGS",
                            "FileGroupSettings": {
                                "Destination": proxy_destination
                            }
                        }
                    },
                    {
                        "CustomName": "Audio",
                        "Name": "File Group",
                        "Outputs": [
                            {
                                "ContainerSettings": {
                                    "Container": "MP4",
                                    "Mp4Settings": {
                                        "CslgAtom": "INCLUDE",
                                        "CttsVersion": 0,
                                        "FreeSpaceBox": "EXCLUDE",
                                        "MoovPlacement": "PROGRESSIVE_DOWNLOAD"
                                    }
                                },
                                "AudioDescriptions": [
                                    {
                                        "AudioTypeControl": "FOLLOW_INPUT",
                                        "AudioSourceName": "Audio Selector 1",
                                        "AudioNormalizationSettings": {
                                            "Algorithm": "ITU_BS_1770_2",
                                            "AlgorithmControl": "MEASURE_ONLY",
                                            "LoudnessLogging": "LOG",
                                            "PeakCalculation": "NONE"
                                        },
                                        "CodecSettings": {
                                            "Codec": "AAC",
                                            "AacSettings": {
                                                "AudioDescriptionBroadcasterMix": "NORMAL",
                                                "Bitrate": 96000,
                                                "RateControlMode": "CBR",
                                                "CodecProfile": "LC",
                                                "CodingMode": "CODING_MODE_2_0",
                                                "RawFormat": "NONE",
                                                "SampleRate": 48000,
                                                "Specification": "MPEG4"
                                            }
                                        },
                                        "LanguageCodeControl": "FOLLOW_INPUT"
                                    }
                                ],
                                "Extension": "mp4",
                                "NameModifier": "_audio"
                            }
                        ],
                        "OutputGroupSettings": {
                            "Type": "FILE_GROUP_SETTINGS",
                            "FileGroupSettings": {
                                "Destination": audio_destination
                            }
                        }
                    }
                ],
                "Inputs": [{
                    "AudioSelectors": {
                        "Audio Selector 1": {
                            "Offset": 0,
                            "DefaultSelection": "DEFAULT",
                            "ProgramSelection": 1
                        }
                    },
                    "VideoSelector": {
                        "ColorSpace": "FOLLOW",
                        "Rotate": "DEGREE_0",
                        "AlphaBehavior": "DISCARD"
                    },
                    "FilterEnable": "AUTO",
                    "PsiControl": "USE_PSI",
                    "FilterStrength": 0,
                    "DeblockFilter": "DISABLED",
                    "DenoiseFilter": "DISABLED",
                    "TimecodeSource": "EMBEDDED",
                    "FileInput": file_input
                }]
            }
        )
    # TODO: Add support for boto client error handling
    except Exception as e:
        print("Exception:\n", e)
        operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(VideoTranscodingError=str(e))
        raise MasExecutionError(operator_object.return_output_object())
    else:
        job_id = response['Job']['Id']
        operator_object.update_workflow_status("Executing")
        operator_object.add_workflow_metadata(VideoTranscodingJobId=job_id, VideoTranscodingInputFile=file_input, AssetId=asset_id, WorkflowExecutionId=workflow_id)
        return operator_object.return_output_object()
예제 #20
0
def lambda_handler(event, context):
    print("We got the following event:\n", event)
    operator_object = MediaInsightsOperationHelper(event)
    # Get media metadata from input event
    try:
        workflow_id = operator_object.workflow_execution_id
        asset_id = operator_object.asset_id
        loudness_bucket = operator_object.input["Media"]["Loudness"]["S3Bucket"]
        loudness_key = operator_object.input["Media"]["Loudness"]["S3Key"]
    except Exception as exception:
        operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(
            SlotDetectionError="Missing a required metadata key {e}".format(e=exception))
        raise MasExecutionError(operator_object.return_output_object())
    # Get asset metadata from dataplane
    try:
        asset_metadata = __get_asset_metadata(asset_id)
    except Exception as exception:
        operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(
            SlotDetectionError="Unable to retrieve metadata for asset {}: {}".format(asset_id, exception))
        raise MasExecutionError(operator_object.return_output_object())
    try:
        # Get detected reasons' timestamps from media and asset metadata
        silences = detect_silences(loudness_bucket, loudness_key)
        black_frames, end_credits = detect_technical_cues(asset_metadata)
        shots = detect_shots(asset_metadata)
        reasons_timestamps = {
            "Silence": silences,
            "BlackFrame": black_frames,
            "ShotChange": shots,
            "EndCredits": end_credits
        }
        media_info = asset_metadata["shotDetection"]["VideoMetadata"][0]
        # Create slots from reasons' timestamps
        print("reasons_timestamps: {}".format(reasons_timestamps))
        slots = []
        for reason in reasons_timestamps:
            for timestamp in reasons_timestamps[reason]:
                slots.append({
                    "Timestamp": float(timestamp),
                    "Score": 1.0,
                    "Reasons": [reason]
                })
        print("slots: {}".format(slots))
        # Consolidate slots and calculate scores
        slots = calculate_scores(slots, media_info, asset_metadata)
        print("scored_slots: {}".format(slots))
    except Exception as exception:
        operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(SlotDetectionError=str(exception))
        raise MasExecutionError(operator_object.return_output_object())

    operator_object.add_workflow_metadata(
        AssetId=asset_id,
        WorkflowExecutionId=workflow_id)
    operator_object.update_workflow_status("Complete")

    metadata_upload = dataplane.store_asset_metadata(
        asset_id=asset_id,
        operator_name=operator_object.name,
        workflow_id=workflow_id,
        results={"slots": slots}
    )
    print("metadata_upload: {}".format(metadata_upload))
    if metadata_upload["Status"] == "Success":
        print("Uploaded metadata for asset: {asset}".format(asset=asset_id))
    elif metadata_upload["Status"] == "Failed":
        operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(
            SlotDetectionError="Unable to upload metadata for asset {}: {}".format(asset_id, metadata_upload))
        raise MasExecutionError(operator_object.return_output_object())
    else:
        operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(
            SlotDetectionError="Unable to upload metadata for asset {}: {}".format(asset_id, metadata_upload))
        raise MasExecutionError(operator_object.return_output_object())

    return operator_object.return_output_object()
예제 #21
0
def check_translate_webcaptions(event, context):

    print("We got this event:\n", event)
    operator_object = MediaInsightsOperationHelper(event)
    webcaptions_object = WebCaptions(operator_object)

    try:
        translate_jobs = operator_object.metadata[
            "TextTranslateJobPropertiesList"]
        workflow_id = operator_object.workflow_execution_id
        asset_id = operator_object.asset_id
        transcript_storage_path = dataplane.generate_media_storage_path(
            asset_id, workflow_id)
        bucket = transcript_storage_path['S3Bucket']
        translation_output_path = transcript_storage_path[
            'S3Key'] + "webcaptions_translate_output/"

    except KeyError as e:
        operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(
            TranslateError="Missing a required metadata key {e}".format(e=e))
        raise MasExecutionError(operator_object.return_output_object())

    # Check the status of each job
    # - IF ANY job has an error, we fail the workflow and return from the loop
    # - IF ANY job is still running, the workflow is still Executing
    # - If ALL jobs are complete, we reach the end of the loop and the workflow is complete
    for job in translate_jobs:

        try:
            job_id = job["JobId"]
            job_status_list = []

        except KeyError as e:
            operator_object.update_workflow_status("Error")
            operator_object.add_workflow_metadata(
                TranslateError="Missing a required metadata key {e}".format(
                    e=e))
            raise MasExecutionError(operator_object.return_output_object())
        try:
            response = translate_client.describe_text_translation_job(
                JobId=job_id)
            print(response)
            job_status = {
                "JobId": job_id,
                "Status": response["TextTranslationJobProperties"]["JobStatus"]
            }

        except Exception as e:
            operator_object.update_workflow_status("Error")
            operator_object.add_workflow_metadata(TranslateError=str(e),
                                                  TranslateJobId=job_id)
            raise MasExecutionError(operator_object.return_output_object())
        else:
            if response["TextTranslationJobProperties"]["JobStatus"] in [
                    "IN_PROGRESS", "SUBMITTED"
            ]:
                operator_object.update_workflow_status("Executing")
                operator_object.add_workflow_metadata(
                    TextTranslateJobStatusList=job_status_list,
                    AssetId=asset_id,
                    WorkflowExecutionId=workflow_id)
                return operator_object.return_output_object()
            elif response["TextTranslationJobProperties"]["JobStatus"] in [
                    "FAILED", "COMPLETED_WITH_ERROR", "STOP_REQUESTED",
                    "STOPPED"
            ]:
                operator_object.update_workflow_status("Error")
                operator_object.add_workflow_metadata(
                    TextTranslateJobStatusList=job_status_list,
                    AssetId=asset_id,
                    WorkflowExecutionId=workflow_id)
                raise MasExecutionError(operator_object.return_output_object())
            elif response["TextTranslationJobProperties"][
                    "JobStatus"] == "COMPLETED":
                print("{} is complete".format(job_id))
                operator_object.add_workflow_metadata(
                    TextTranslateJobStatusList=job_status_list,
                    AssetId=asset_id,
                    WorkflowExecutionId=workflow_id)

    # If we made it here, then all the translate jobs are complete.
    # Convert the translations back to WebCaptions and write them out
    # to the dataplane
    translation_storage_path = dataplane.generate_media_storage_path(
        asset_id, workflow_id)
    bucket = translation_storage_path['S3Bucket']
    translation_path = translation_storage_path['S3Key']

    webcaptions_collection = []
    for job in translate_jobs:
        try:
            print("Save translation for job {}".format(job["JobId"]))

            translateJobDescription = translate_client.describe_text_translation_job(
                JobId=job["JobId"])
            translateJobS3Uri = translateJobDescription[
                "TextTranslationJobProperties"]["OutputDataConfig"]["S3Uri"]
            translateJobUrl = urlparse(translateJobS3Uri,
                                       allow_fragments=False)
            translateJobLanguageCode = translateJobDescription[
                "TextTranslationJobProperties"]["TargetLanguageCodes"][0]

            translateJobS3Location = {
                "Uri": translateJobS3Uri,
                "Bucket": translateJobUrl.netloc,
                "Key": translateJobUrl.path.strip("/")
            }

            # use input web captions to convert translation output to web captions format
            for outputS3ObjectKey in map(
                    lambda s: s.key,
                    s3_resource.Bucket(
                        translateJobS3Location["Bucket"]).objects.filter(
                            Prefix=translateJobS3Location["Key"] + "/",
                            Delimiter="/")):
                print("Save translation for each output of job {} output {}".
                      format(job["JobId"], outputS3ObjectKey))

                outputFilename = ntpath.basename(outputS3ObjectKey)

                translateOutput = s3_resource.Object(
                    translateJobS3Location["Bucket"],
                    outputS3ObjectKey).get()["Body"].read().decode("utf-8")
                #inputWebCaptions = get_webcaptions(operator_object, translateJobDescription["TextTranslationJobProperties"]["SourceLanguageCode"])
                inputWebCaptions = webcaptions_object.GetWebCaptions(
                    translateJobDescription["TextTranslationJobProperties"]
                    ["SourceLanguageCode"])
                outputWebCaptions = webcaptions_object.DelimitedToWebCaptions(
                    inputWebCaptions, translateOutput, "<123>", 15)
                print(outputS3ObjectKey)
                (targetLanguageCode, basename, ext) = outputFilename.split(".")
                #put_webcaptions(operator_object, outputWebCaptions, targetLanguageCode)
                operator_metadata = webcaptions_object.PutWebCaptions(
                    outputWebCaptions, targetLanguageCode)

                # Save a copy of the translation text without delimiters
                translation_text = translateOutput.replace("<123>", "")
                translation_text_key = translation_path + "translation" + "_" + targetLanguageCode + ".txt"
                s3_object = s3_resource.Object(bucket, translation_text_key)
                s3_object.put(Body=translation_text)

            metadata = {
                "OperatorName":
                "TranslateWebCaptions_" + translateJobLanguageCode,
                "TranslationText": {
                    "S3Bucket": bucket,
                    "S3Key": translation_text_key
                },
                "WebCaptions": operator_metadata,
                "WorkflowId": workflow_id,
                "TargetLanguageCode": translateJobLanguageCode
            }
            print(json.dumps(metadata))

            webcaptions_collection.append(metadata)

        except Exception as e:
            operator_object.update_workflow_status("Error")
            operator_object.add_workflow_metadata(
                CaptionsError=
                "Unable to construct path to translate output in S3: {e}".
                format(e=str(e)))
            raise MasExecutionError(operator_object.return_output_object())

    data = {}
    data["CaptionsCollection"] = webcaptions_collection
    webcaptions_object.PutMediaCollection(operator_object.name, data)

    return operator_object.return_output_object()
def lambda_handler(event, context):
    print("We got this event:\n", event)
    operator_object = MediaInsightsOperationHelper(event)
    # If Transcribe wasn't run due to silent audio, then we're done
    if "Mediainfo_num_audio_tracks" in event["Input"]["MetaData"] and event[
            "Input"]["MetaData"]["Mediainfo_num_audio_tracks"] == "0":
        operator_object.update_workflow_status("Complete")
        return operator_object.return_output_object()
    try:
        job_id = operator_object.metadata["TranscribeJobId"]
        workflow_id = operator_object.workflow_execution_id
        asset_id = operator_object.asset_id
    except KeyError as e:
        operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(
            TranscribeError="Missing a required metadata key {e}".format(e=e))
        raise MasExecutionError(operator_object.return_output_object())
    try:
        response = transcribe.get_transcription_job(
            TranscriptionJobName=job_id)
        print(response)
    except Exception as e:
        operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(TranscribeError=str(e),
                                              TranscribeJobId=job_id)
        raise MasExecutionError(operator_object.return_output_object())
    else:
        if response["TranscriptionJob"][
                "TranscriptionJobStatus"] == "IN_PROGRESS":
            operator_object.update_workflow_status("Executing")
            operator_object.add_workflow_metadata(
                TranscribeJobId=job_id,
                AssetId=asset_id,
                WorkflowExecutionId=workflow_id)
            return operator_object.return_output_object()
        elif response["TranscriptionJob"][
                "TranscriptionJobStatus"] == "FAILED":
            operator_object.update_workflow_status("Error")
            operator_object.add_workflow_metadata(
                TranscribeJobId=job_id,
                TranscribeError=str(
                    response["TranscriptionJob"]["FailureReason"]))
            raise MasExecutionError(operator_object.return_output_object())
        elif response["TranscriptionJob"][
                "TranscriptionJobStatus"] == "COMPLETED":
            transcribe_uri = response["TranscriptionJob"]["Transcript"][
                "TranscriptFileUri"]
            http = urllib3.PoolManager()
            transcription = http.request('GET', transcribe_uri)
            transcription_data = transcription.data.decode("utf-8")
            transcription_json = json.loads(transcription_data)

            text_only_transcript = ''

            for transcripts in transcription_json["results"]["transcripts"]:
                transcript = transcripts["transcript"]
                text_only_transcript = text_only_transcript.join(transcript)

            print(text_only_transcript)

            dataplane = DataPlane()
            s3 = boto3.client('s3')

            transcript_storage_path = dataplane.generate_media_storage_path(
                asset_id, workflow_id)

            key = transcript_storage_path['S3Key'] + "transcript.txt"
            bucket = transcript_storage_path['S3Bucket']

            s3.put_object(Bucket=bucket, Key=key, Body=text_only_transcript)

            transcription_json["TextTranscriptUri"] = {
                "S3Bucket": bucket,
                "S3Key": key
            }

            metadata_upload = dataplane.store_asset_metadata(
                asset_id, operator_object.name, workflow_id,
                transcription_json)
            if "Status" not in metadata_upload:
                operator_object.add_workflow_metadata(
                    TranscribeError=
                    "Unable to upload metadata for asset: {asset}".format(
                        asset=asset_id),
                    TranscribeJobId=job_id)
                operator_object.update_workflow_status("Error")
                raise MasExecutionError(operator_object.return_output_object())
            else:
                if metadata_upload['Status'] == 'Success':
                    operator_object.add_media_object('Text',
                                                     metadata_upload['Bucket'],
                                                     metadata_upload['Key'])
                    operator_object.add_workflow_metadata(
                        TranscribeJobId=job_id)
                    operator_object.update_workflow_status("Complete")
                    return operator_object.return_output_object()
                else:
                    operator_object.add_workflow_metadata(
                        TranscribeError=
                        "Unable to upload metadata for asset: {asset}".format(
                            asset=asset_id),
                        TranscribeJobId=job_id)
                    operator_object.update_workflow_status("Error")
                    raise MasExecutionError(
                        operator_object.return_output_object())
        else:
            operator_object.update_workflow_status("Error")
            operator_object.add_workflow_metadata(
                TranscribeError="Unable to determine status")
            raise MasExecutionError(operator_object.return_output_object())
예제 #23
0
def lambda_handler(event, context):
    print("We got the following event:\n", event)
    operator_object = MediaInsightsOperationHelper(event)

    try:
        workflow_id = str(operator_object.workflow_execution_id)
        bucket = operator_object.input["Media"]["Video"]["S3Bucket"]
        key = operator_object.input["Media"]["Video"]["S3Key"]
    except KeyError as e:
        operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(
            ThumbnailError="Missing a required metadata key {e}".format(e=e))
        raise MasExecutionError(operator_object.return_output_object())

    # Adding in exception block for now since we aren't guaranteed an asset id will be present, should remove later
    try:
        asset_id = operator_object.asset_id
    except KeyError as e:
        print("No asset id passed in with this workflow", e)
        asset_id = ''
    file_input = "s3://" + bucket + "/" + key
    audio_destination = "s3://" + bucket + "/" + 'private/assets/' + asset_id + "/workflows/" + workflow_id + "/"
    thumbnail_destination = "s3://" + bucket + "/" + 'private/assets/' + asset_id + "/"
    proxy_destination = "s3://" + bucket + "/" + 'private/assets/' + asset_id + "/"

    # Get user-defined location for generic data file
    if "ThumbnailPosition" in operator_object.configuration:
        thumbnail_position = int(
            operator_object.configuration["ThumbnailPosition"])
    else:
        thumbnail_position = 7

    # Get mediaconvert endpoint from cache if available
    if ("MEDIACONVERT_ENDPOINT" in os.environ):
        mediaconvert_endpoint = os.environ["MEDIACONVERT_ENDPOINT"]
        customer_mediaconvert = boto3.client(
            "mediaconvert",
            region_name=region,
            endpoint_url=mediaconvert_endpoint)
    else:
        try:
            response = mediaconvert.describe_endpoints()
        except Exception as e:
            print("Exception:\n", e)
            operator_object.update_workflow_status("Error")
            operator_object.add_workflow_metadata(ThumbnailError=str(e))
            raise MasExecutionError(operator_object.return_output_object())
        else:
            mediaconvert_endpoint = response["Endpoints"][0]["Url"]
            # Cache the mediaconvert endpoint in order to avoid getting throttled on
            # the DescribeEndpoints API.
            os.environ["MEDIACONVERT_ENDPOINT"] = mediaconvert_endpoint
            customer_mediaconvert = boto3.client(
                "mediaconvert",
                region_name=region,
                endpoint_url=mediaconvert_endpoint)

    try:
        response = customer_mediaconvert.create_job(
            Role=mediaconvert_role,
            Settings={
                "OutputGroups": [{
                    "CustomName":
                    "thumbnail",
                    "Name":
                    "File Group",
                    "Outputs": [{
                        "ContainerSettings": {
                            "Container": "RAW"
                        },
                        "VideoDescription": {
                            "ScalingBehavior": "DEFAULT",
                            "TimecodeInsertion": "DISABLED",
                            "AntiAlias": "ENABLED",
                            "Sharpness": 50,
                            "CodecSettings": {
                                "Codec": "FRAME_CAPTURE",
                                "FrameCaptureSettings": {
                                    "FramerateNumerator": 1,
                                    "FramerateDenominator": thumbnail_position,
                                    "MaxCaptures": 2,
                                    "Quality": 80
                                }
                            },
                            "DropFrameTimecode": "ENABLED",
                            "ColorMetadata": "INSERT"
                        },
                        "Extension": "jpg",
                        "NameModifier": "_thumbnail"
                    }],
                    "OutputGroupSettings": {
                        "Type": "FILE_GROUP_SETTINGS",
                        "FileGroupSettings": {
                            "Destination": thumbnail_destination
                        }
                    }
                }, {
                    "Name":
                    "File Group",
                    "Outputs": [{
                        "ContainerSettings": {
                            "Container": "MP4",
                            "Mp4Settings": {
                                "CslgAtom": "INCLUDE",
                                "FreeSpaceBox": "EXCLUDE",
                                "MoovPlacement": "PROGRESSIVE_DOWNLOAD"
                            }
                        },
                        "AudioDescriptions": [{
                            "AudioTypeControl":
                            "FOLLOW_INPUT",
                            "AudioSourceName":
                            "Audio Selector 1",
                            "CodecSettings": {
                                "Codec": "AAC",
                                "AacSettings": {
                                    "AudioDescriptionBroadcasterMix": "NORMAL",
                                    "Bitrate": 96000,
                                    "RateControlMode": "CBR",
                                    "CodecProfile": "LC",
                                    "CodingMode": "CODING_MODE_2_0",
                                    "RawFormat": "NONE",
                                    "SampleRate": 48000,
                                    "Specification": "MPEG4"
                                }
                            },
                            "LanguageCodeControl":
                            "FOLLOW_INPUT"
                        }],
                        "Extension":
                        "mp4",
                        "NameModifier":
                        "_audio"
                    }],
                    "OutputGroupSettings": {
                        "Type": "FILE_GROUP_SETTINGS",
                        "FileGroupSettings": {
                            "Destination": audio_destination
                        }
                    }
                }, {
                    "CustomName":
                    "proxy",
                    "Name":
                    "File Group",
                    "Outputs": [{
                        "VideoDescription": {
                            "ScalingBehavior": "DEFAULT",
                            "TimecodeInsertion": "DISABLED",
                            "AntiAlias": "ENABLED",
                            "Sharpness": 50,
                            "CodecSettings": {
                                "Codec": "H_264",
                                "H264Settings": {
                                    "InterlaceMode": "PROGRESSIVE",
                                    "NumberReferenceFrames": 3,
                                    "Syntax": "DEFAULT",
                                    "Softness": 0,
                                    "GopClosedCadence": 1,
                                    "GopSize": 90,
                                    "Slices": 1,
                                    "GopBReference": "DISABLED",
                                    "SlowPal": "DISABLED",
                                    "SpatialAdaptiveQuantization": "ENABLED",
                                    "TemporalAdaptiveQuantization": "ENABLED",
                                    "FlickerAdaptiveQuantization": "DISABLED",
                                    "EntropyEncoding": "CABAC",
                                    "Bitrate": 5000000,
                                    "FramerateControl": "SPECIFIED",
                                    "RateControlMode": "CBR",
                                    "CodecProfile": "MAIN",
                                    "Telecine": "NONE",
                                    "MinIInterval": 0,
                                    "AdaptiveQuantization": "HIGH",
                                    "CodecLevel": "AUTO",
                                    "FieldEncoding": "PAFF",
                                    "SceneChangeDetect": "ENABLED",
                                    "QualityTuningLevel": "SINGLE_PASS",
                                    "FramerateConversionAlgorithm":
                                    "DUPLICATE_DROP",
                                    "UnregisteredSeiTimecode": "DISABLED",
                                    "GopSizeUnits": "FRAMES",
                                    "ParControl": "SPECIFIED",
                                    "NumberBFramesBetweenReferenceFrames": 2,
                                    "RepeatPps": "DISABLED",
                                    "FramerateNumerator": 30,
                                    "FramerateDenominator": 1,
                                    "ParNumerator": 1,
                                    "ParDenominator": 1
                                }
                            },
                            "AfdSignaling": "NONE",
                            "DropFrameTimecode": "ENABLED",
                            "RespondToAfd": "NONE",
                            "ColorMetadata": "INSERT"
                        },
                        "AudioDescriptions": [{
                            "AudioTypeControl":
                            "FOLLOW_INPUT",
                            "CodecSettings": {
                                "Codec": "AAC",
                                "AacSettings": {
                                    "AudioDescriptionBroadcasterMix": "NORMAL",
                                    "RateControlMode": "CBR",
                                    "CodecProfile": "LC",
                                    "CodingMode": "CODING_MODE_2_0",
                                    "RawFormat": "NONE",
                                    "SampleRate": 48000,
                                    "Specification": "MPEG4",
                                    "Bitrate": 64000
                                }
                            },
                            "LanguageCodeControl":
                            "FOLLOW_INPUT",
                            "AudioSourceName":
                            "Audio Selector 1"
                        }],
                        "ContainerSettings": {
                            "Container": "MP4",
                            "Mp4Settings": {
                                "CslgAtom": "INCLUDE",
                                "FreeSpaceBox": "EXCLUDE",
                                "MoovPlacement": "PROGRESSIVE_DOWNLOAD"
                            }
                        },
                        "Extension":
                        "mp4",
                        "NameModifier":
                        "_proxy"
                    }],
                    "OutputGroupSettings": {
                        "Type": "FILE_GROUP_SETTINGS",
                        "FileGroupSettings": {
                            "Destination": proxy_destination
                        }
                    }
                }],
                "Inputs": [{
                    "AudioSelectors": {
                        "Audio Selector 1": {
                            "Offset": 0,
                            "DefaultSelection": "DEFAULT",
                            "ProgramSelection": 1
                        }
                    },
                    "VideoSelector": {
                        "ColorSpace": "FOLLOW"
                    },
                    "FileInput": file_input
                }]
            })

    # TODO: Add support for boto client error handling
    except Exception as e:
        print("Exception:\n", e)
        operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(ThumbnailError=str(e))
        raise MasExecutionError(operator_object.return_output_object())
    else:
        job_id = response['Job']['Id']
        operator_object.update_workflow_status("Executing")
        operator_object.add_workflow_metadata(MediaconvertJobId=job_id,
                                              MediaconvertInputFile=file_input,
                                              AssetId=asset_id,
                                              WorkflowExecutionId=workflow_id)
        return operator_object.return_output_object()
예제 #24
0
def lambda_handler(event, context):
    print("We got this event:\n", event)
    valid_types = ["mp3", "mp4", "wav", "flac"]
    optional_settings = {}
    operator_object = MediaInsightsOperationHelper(event)
    workflow_id = str(operator_object.workflow_execution_id)
    job_id = "transcribe" + "-" + workflow_id

    # Adding in exception block for now since we aren't guaranteed an asset id will be present, should remove later
    try:
        asset_id = operator_object.asset_id
    except KeyError as e:
        print("No asset id passed in with this workflow", e)
        asset_id = ''

    try:
        bucket = operator_object.input["Media"]["Audio"]["S3Bucket"]
        key = operator_object.input["Media"]["Audio"]["S3Key"]
        file_type = key.split('.')[-1]
    # TODO: Do we want to add support for video?
    except KeyError:
        bucket = operator_object.input["Media"]["Video"]["S3Bucket"]
        key = operator_object.input["Media"]["Video"]["S3Key"]
        file_type = key.split('.')[-1]
    except Exception:
        operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(
            TranscribeError="No valid inputs")
        raise MasExecutionError(operator_object.return_output_object())
    if file_type not in valid_types:
        operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(
            TranscribeError="Not a valid file type")
        raise MasExecutionError(operator_object.return_output_object())
    try:
        custom_vocab = operator_object.configuration["VocabularyName"]
        optional_settings["VocabularyName"] = custom_vocab
    except KeyError:
        # No custom vocab
        pass
    try:
        language_code = operator_object.configuration["TranscribeLanguage"]
    except KeyError:
        operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(
            TranscribeError="No language code defined")
        raise MasExecutionError(operator_object.return_output_object())

    media_file = 'https://s3.' + region + '.amazonaws.com/' + bucket + '/' + key

    # If mediainfo data is available then use it to avoid transcribing silent videos.
    if "Mediainfo_num_audio_tracks" in event["Input"]["MetaData"]:
        num_audio_tracks = event["Input"]["MetaData"][
            "Mediainfo_num_audio_tracks"]
        # Check to see if audio tracks were detected by mediainfo
        if num_audio_tracks == "0":
            # If there is no input audio then we're done.
            operator_object.update_workflow_status("Complete")
            return operator_object.return_output_object()
    try:
        response = transcribe.start_transcription_job(
            TranscriptionJobName=job_id,
            LanguageCode=language_code,
            Media={"MediaFileUri": media_file},
            MediaFormat=file_type,
            Settings=optional_settings)
        print(response)
    except Exception as e:
        operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(transcribe_error=str(e))
        raise MasExecutionError(operator_object.return_output_object())
    else:
        if response["TranscriptionJob"][
                "TranscriptionJobStatus"] == "IN_PROGRESS":
            operator_object.update_workflow_status("Executing")
            operator_object.add_workflow_metadata(
                TranscribeJobId=job_id,
                AssetId=asset_id,
                WorkflowExecutionId=workflow_id)
            return operator_object.return_output_object()
        elif response["TranscriptionJob"][
                "TranscriptionJobStatus"] == "FAILED":
            operator_object.update_workflow_status("Error")
            operator_object.add_workflow_metadata(
                TranscribeJobId=job_id,
                TranscribeError=str(
                    response["TranscriptionJob"]["FailureReason"]))
            raise MasExecutionError(operator_object.return_output_object())
        elif response["TranscriptionJob"][
                "TranscriptionJobStatus"] == "COMPLETE":
            operator_object.update_workflow_status("Executing")
            operator_object.add_workflow_metadata(
                TranscribeJobId=job_id,
                AssetId=asset_id,
                WorkflowExecutionId=workflow_id)
            return operator_object.return_output_object()
        else:
            operator_object.update_workflow_status("Error")
            operator_object.add_workflow_metadata(
                TranscribeJobId=job_id,
                TranscribeError="Unhandled error for this job: {job_id}".
                format(job_id=job_id))
            raise MasExecutionError(operator_object.return_output_object())
예제 #25
0
def web_captions(event, context):

    print("We got the following event:\n", event)

    operator_object = MediaInsightsOperationHelper(event)

    try:
        bucket = operator_object.input["Media"]["Text"]["S3Bucket"]
        key = operator_object.input["Media"]["Text"]["S3Key"]
    except KeyError as e:
        operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(CaptionsError="No valid inputs {e}".format(e=e))
        raise MasExecutionError(operator_object.return_output_object())

    try:
        lang = operator_object.configuration["SourceLanguageCode"]
    except KeyError as e:
        operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(CaptionsError="No language codes {e}".format(e=e))
        raise MasExecutionError(operator_object.return_output_object())

    try:
        workflow_id = operator_object.workflow_execution_id
    except KeyError as e:
        operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(CaptionsError="Missing a required metadata key {e}".format(e=e))
        raise MasExecutionError(operator_object.return_output_object())

    try:
        asset_id = operator_object.asset_id
    except KeyError:
        print('No asset id for this workflow')
        asset_id = ''

    try:
        s3_response = s3.get_object(Bucket=bucket, Key=key)
        transcribe_metadata = json.loads(s3_response["Body"].read().decode("utf-8"))
    except Exception as e:
        operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(CaptionsError="Unable to read transcription from S3: {e}".format(e=str(e)))
        raise MasExecutionError(operator_object.return_output_object())

    endTime = 0.0
    maxLength = 50
    wordCount = 0
    maxWords = 12
    maxSilence = 1.5

    captions = []
    caption = None
    

    for item in transcribe_metadata["results"]["items"]:
        

        isPunctuation = item["type"] == "punctuation"

        if caption is None:

            # Start of a line with punctuation, just skip it
            if isPunctuation:
                continue

            # Create a new caption line
            caption = {
                "start": float(item["start_time"]),
                "caption": "",
                "wordConfidence": []
            }

        if not isPunctuation:

            startTime = float(item["start_time"])

            # Check to see if there has been a long silence
            # between the last recorded word and start a new
            # caption if this is the case, ending the last time
            # as this one starts.

            if (len(caption["caption"]) > 0) and ((endTime + maxSilence) < startTime):

                caption["end"] = startTime
                captions.append(caption)

                caption = {
                    "start": float(startTime),
                    "caption": "",
                    "wordConfidence": []
                }

                wordCount = 0

            endTime = float(item["end_time"])

        requiresSpace = (not isPunctuation) and (len(caption["caption"]) > 0)

        if requiresSpace:
            caption["caption"] += " "

        # Process tweaks

        text = item["alternatives"][0]["content"]
        confidence = item["alternatives"][0]["confidence"]
        textLower = text.lower()

        caption["caption"] += text

        # Track raw word confidence
        if not isPunctuation:
            caption["wordConfidence"].append(
                {
                    "w": textLower,
                    "c": float(confidence)
                }
            )
            # Count words
            wordCount += 1

        # If we have reached a good amount of text finalize the caption

        if (wordCount >= maxWords) or (len(caption["caption"]) >= maxLength):
            caption["end"] = endTime
            captions.append(caption)
            wordCount = 0
            caption = None
            

    # Close the last caption if required

    if caption is not None:
        caption["end"] = endTime
        captions.append(caption)
        
    webcaptions_name = "WebCaptions"+"_"+lang
    i=0
    for asset in captions:
        i=i+1

        if i != len(captions):
            metadata_upload = dataplane.store_asset_metadata(asset_id=asset_id, operator_name=webcaptions_name, 
                                    workflow_id=workflow_id, results=asset, paginate=True, end=False)
            
            if "Status" not in metadata_upload:
                operator_object.update_workflow_status("Error")
                operator_object.add_workflow_metadata(
                    CaptionsError="Unable to store web captions {e}".format(e=metadata_upload))
                raise MasExecutionError(operator_object.return_output_object())
            else:
                if metadata_upload["Status"] == "Success":
                    pass
                else:
                    operator_object.update_workflow_status("Error")
                    operator_object.add_workflow_metadata(
                        CaptionsError="Unable to store web captions {e}".format(e=metadata_upload))
                    raise MasExecutionError(operator_object.return_output_object())
        else:
            metadata_upload = dataplane.store_asset_metadata(asset_id=asset_id, operator_name=webcaptions_name, 
                                    workflow_id=workflow_id, results=asset, paginate=True, end=True)
            if "Status" not in metadata_upload:
                operator_object.update_workflow_status("Error")
                operator_object.add_workflow_metadata(
                    CaptionsError="Unable to store web captions {e}".format(e=metadata_upload))
                raise MasExecutionError(operator_object.return_output_object())
            else:
                if metadata_upload["Status"] == "Success":
                    response_json = metadata_upload
                    operator_object.add_workflow_metadata(WebCaptionsS3Bucket=response_json['Bucket'],
                                                          WebCaptionsS3Key=response_json['Key'])
                    operator_object.update_workflow_status("Complete")
                    return operator_object.return_output_object()
                else:
                    operator_object.update_workflow_status("Error")
                    operator_object.add_workflow_metadata(
                        CaptionsError="Unable to store web captions {e}".format(e=metadata_upload))
                    raise MasExecutionError(operator_object.return_output_object())
예제 #26
0
def lambda_handler(event, context):
    print("We got this event:\n", event)

    operator_object = MediaInsightsOperationHelper(event)

    try:
        workflow_id = operator_object.workflow_execution_id
    except KeyError as e:
        operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(PollyError="Missing a required metadata key {e}".format(e=e))
        raise MasExecutionError(operator_object.return_output_object())
    try:
        asset_id = operator_object.asset_id
    except KeyError:
        print('No asset id passed along with this workflow')
        asset_id = ''

    try:
        bucket = operator_object.input["Media"]["Text"]["S3Bucket"]
        key = operator_object.input["Media"]["Text"]["S3Key"]
    except KeyError:
        operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(PollyError="No valid inputs")
        raise MasExecutionError(operator_object.return_output_object())
    try:
        s3_response = s3.get_object(Bucket=bucket, Key=key)
        translate_metadata = json.loads(s3_response["Body"].read().decode("utf-8"))
        translation = translate_metadata["TranslatedText"]
    except Exception as e:
        operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(PollyError="Unable to read translation from S3: {e}".format(e=str(e)))
        raise MasExecutionError(operator_object.return_output_object())

    # If input text is empty then we're done.
    if len(translation) < 1:
        operator_object.update_workflow_status("Complete")
        return operator_object.return_output_object()


    voices = {'en': 'Kendra', 'ru': 'Maxim', 'es': 'Lucia', 'fr': 'Mathieu'}

    # Get language code of the translation, we should just pass this along in the event later
    try:
        comprehend = boto3.client('comprehend')

        language = comprehend.detect_dominant_language(
            Text=translation
        )
    except Exception as e:
        operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(PollyError="Unable to determine the language with comprehend: {e}".format(e=str(e)))
        raise MasExecutionError(operator_object.return_output_object())

    else:
        language_code = language['Languages'][0]['LanguageCode']
        if language_code not in voices:
            operator_object.update_workflow_status("Error")
            operator_object.add_workflow_metadata(PollyError="The only supported languages are: {e}".format(e=voices.keys()))
            raise MasExecutionError(operator_object.return_output_object())
        else:
            voice_id = voices[language_code]

    print("Translation received from S3:\n", translation)

    output_key = '/private/assets/' + asset_id + "/workflows/" + workflow_id + "/" + "translation"

    try:
        polly_response = polly.start_speech_synthesis_task(
            OutputFormat='mp3',
            OutputS3BucketName=bucket,
            OutputS3KeyPrefix=output_key,
            Text=translation,
            TextType='text',
            VoiceId=voice_id
        )

    except Exception as e:
        operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(PollyError="Unable to get response from polly: {e}".format(e=str(e)))
        raise MasExecutionError(operator_object.return_output_object())
    else:
        polly_job_id = polly_response['SynthesisTask']['TaskId']
        operator_object.add_workflow_metadata(PollyJobId=polly_job_id, WorkflowExecutionId=workflow_id, AssetId=asset_id)
        operator_object.update_workflow_status('Executing')
        return operator_object.return_output_object()
예제 #27
0
def lambda_handler(event, context):
    print("We got this event:\n", event)
    valid_types = ["mp3", "mp4", "wav", "flac"]
    identify_language = False
    transcribe_job_config = {}
    optional_settings = {}
    model_settings = {}
    job_execution_settings = {}
    content_redaction_settings = {}
    identify_language = False
    language_options = []
    operator_object = MediaInsightsOperationHelper(event)
    workflow_id = str(event["WorkflowExecutionId"])
    asset_id = event['AssetId']
    job_id = "transcribe" + "-" + workflow_id

    try:
        if "ProxyEncode" in event["Input"]["Media"]:
            bucket = event["Input"]["Media"]["ProxyEncode"]["S3Bucket"]
            key = event["Input"]["Media"]["ProxyEncode"]["S3Key"]
        elif "Video" in event["Input"]["Media"]:
            bucket = event["Input"]["Media"]["Video"]["S3Bucket"]
            key = event["Input"]["Media"]["Video"]["S3Key"]
        elif "Audio" in event["Input"]["Media"]:
            bucket = event["Input"]["Media"]["Audio"]["S3Bucket"]
            key = event["Input"]["Media"]["Audio"]["S3Key"]
        file_type = key.split('.')[-1]
    except Exception:
        operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(
            TranscribeError="No valid inputs")
        raise MasExecutionError(operator_object.return_output_object())

    if file_type not in valid_types:
        operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(
            TranscribeError="Not a valid file type")
        raise MasExecutionError(operator_object.return_output_object())
    try:
        language_code = operator_object.configuration["TranscribeLanguage"]
        custom_vocab = operator_object.configuration["VocabularyName"]
        optional_settings["VocabularyName"] = custom_vocab
    except KeyError:
        # No custom vocab
        pass
    try:
        if "TranscribeLanguage" in operator_object.configuration:
            language_code = operator_object.configuration["TranscribeLanguage"]
            if language_code == 'auto':
                identify_language = True
        else:
            identify_language = True
    except KeyError:
        operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(
            TranscribeError="No language code defined")
        raise MasExecutionError(operator_object.return_output_object())

    media_file = 'https://s3.' + region + '.amazonaws.com/' + bucket + '/' + key

    # Read optional transcription job settings:
    if "VocabularyName" in operator_object.configuration:
        option_value = operator_object.configuration["VocabularyName"]
        optional_settings["VocabularyName"] = option_value
    if "ShowSpeakerLabels" in operator_object.configuration:
        option_value = operator_object.configuration["ShowSpeakerLabels"]
        optional_settings["ShowSpeakerLabels"] = option_value
    if "MaxSpeakerLabels" in operator_object.configuration:
        option_value = operator_object.configuration["MaxSpeakerLabels"]
        optional_settings["MaxSpeakerLabels"] = option_value
    if "ChannelIdentification" in operator_object.configuration:
        option_value = operator_object.configuration["ChannelIdentification"]
        optional_settings["ChannelIdentification"] = option_value
    if "MaxAlternatives" in operator_object.configuration:
        option_value = operator_object.configuration["MaxAlternatives"]
        optional_settings["MaxAlternatives"] = option_value
    if "VocabularyFilterName" in operator_object.configuration:
        option_value = operator_object.configuration["VocabularyFilterName"]
        optional_settings["VocabularyFilterName"] = option_value
    if "VocabularyFilterMethod" in operator_object.configuration:
        option_value = operator_object.configuration["VocabularyFilterMethod"]
        optional_settings["VocabularyFilterMethod"] = option_value
    if "LanguageModelName" in operator_object.configuration:
        option_value = operator_object.configuration["LanguageModelName"]
        model_settings["LanguageModelName"] = option_value
    if "AllowDeferredExecution" in operator_object.configuration:
        option_value = operator_object.configuration["AllowDeferredExecution"]
        job_execution_settings["AllowDeferredExecution"] = option_value
    if "DataAccessRoleArn" in operator_object.configuration:
        option_value = operator_object.configuration["DataAccessRoleArn"]
        job_execution_settings["DataAccessRoleArn"] = option_value
    if "RedactionType" in operator_object.configuration:
        option_value = operator_object.configuration["RedactionType"]
        content_redaction_settings["RedactionType"] = option_value
    if "RedactionOutput" in operator_object.configuration:
        option_value = operator_object.configuration["RedactionOutput"]
        content_redaction_settings["RedactionOutput"] = option_value
    if "IdentifyLanguage" in operator_object.configuration:
        option_value = operator_object.configuration["IdentifyLanguage"]
        identify_language = option_value
    if "LanguageOptions" in operator_object.configuration:
        option_value = operator_object.configuration["LanguageOptions"]
        language_options = option_value

    # Combine all the defined transcription job settings into a single dict:
    transcribe_job_config["TranscriptionJobName"] = job_id
    transcribe_job_config["Media"] = {"MediaFileUri": media_file}
    transcribe_job_config["MediaFormat"] = file_type
    transcribe_job_config["LanguageCode"] = language_code
    transcribe_job_config["IdentifyLanguage"] = identify_language
    if len(optional_settings) > 0:
        transcribe_job_config["Settings"] = optional_settings
    if len(model_settings) > 0:
        transcribe_job_config["ModelSettings"] = model_settings
    if len(job_execution_settings) > 0:
        transcribe_job_config["JobExecutionSettings"] = job_execution_settings
    if len(content_redaction_settings) > 0:
        transcribe_job_config["ContentRedaction"] = content_redaction_settings
    if len(language_options) > 0:
        transcribe_job_config["LanguageOptions"] = language_options

    # If mediainfo data is available then use it to avoid transcribing silent videos.
    if "Mediainfo_num_audio_tracks" in event["Input"]["MetaData"]:
        num_audio_tracks = event["Input"]["MetaData"][
            "Mediainfo_num_audio_tracks"]
        # Check to see if audio tracks were detected by mediainfo
        if num_audio_tracks == "0":
            # If there is no input audio then we're done.
            operator_object.update_workflow_status("Complete")
            return operator_object.return_output_object()

    try:
        if identify_language:
            transcribe_job_config['IdentifyLanguage'] = True
            del transcribe_job_config["LanguageCode"]
        # Run the transcribe job.
        # The ** operator converts the job config dict to keyword arguments.
        response = transcribe.start_transcription_job(**transcribe_job_config)
        print(response)
    except Exception as e:
        operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(transcribe_error=str(e))
        raise MasExecutionError(operator_object.return_output_object())
    else:
        if response["TranscriptionJob"][
                "TranscriptionJobStatus"] == "IN_PROGRESS":
            operator_object.update_workflow_status("Executing")
            operator_object.add_workflow_metadata(
                TranscribeJobId=job_id,
                AssetId=asset_id,
                WorkflowExecutionId=workflow_id)
            return operator_object.return_output_object()
        elif response["TranscriptionJob"][
                "TranscriptionJobStatus"] == "FAILED":
            operator_object.update_workflow_status("Error")
            operator_object.add_workflow_metadata(
                TranscribeJobId=job_id,
                TranscribeError=str(
                    response["TranscriptionJob"]["FailureReason"]))
            raise MasExecutionError(operator_object.return_output_object())
        elif response["TranscriptionJob"][
                "TranscriptionJobStatus"] == "COMPLETE":
            operator_object.update_workflow_status("Executing")
            operator_object.add_workflow_metadata(
                TranscribeJobId=job_id,
                AssetId=asset_id,
                WorkflowExecutionId=workflow_id)
            return operator_object.return_output_object()
        else:
            operator_object.update_workflow_status("Error")
            operator_object.add_workflow_metadata(
                TranscribeJobId=job_id,
                TranscribeError="Unhandled error for this job: {job_id}".
                format(job_id=job_id))
            raise MasExecutionError(operator_object.return_output_object())
def check_polly_webcaptions(event, context):
    print("We got this event:\n", event)

    operator_object = MediaInsightsOperationHelper(event)
    webcaptions_object = WebCaptions(operator_object)

    print("We got this event:\n", event)

    operator_object = MediaInsightsOperationHelper(event)

    try:
        polly_collection = operator_object.metadata["PollyCollection"]
    except KeyError as e:
        operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(
            PollyCollectionError="Missing a required metadata key {e}".format(
                e=e))
        raise MasExecutionError(operator_object.return_output_object())

    finished_tasks = 0
    for caption in polly_collection:
        if caption["PollyStatus"] in ["completed", "failed", "not supported"]:
            finished_tasks = finished_tasks + 1
        else:
            try:
                polly_response = polly.get_speech_synthesis_task(
                    TaskId=caption["PollyTaskId"])
            except Exception as e:
                operator_object.update_workflow_status("Error")
                operator_object.add_workflow_metadata(
                    PollyCollectionError=
                    "Unable to get response from polly: {e}".format(e=str(e)))
                raise MasExecutionError(operator_object.return_output_object())
            else:
                polly_status = polly_response["SynthesisTask"]["TaskStatus"]
                print("The status from polly is:\n", polly_status)
                if polly_status in ["inProgress", "scheduled"]:
                    operator_object.update_workflow_status("Executing")
                elif polly_status == "completed":
                    # TODO: Store job details as metadata in dataplane
                    finished_tasks = finished_tasks + 1

                    caption["PollyAudio"]["Uri"] = polly_response[
                        "SynthesisTask"]["OutputUri"]

                    operator_object.update_workflow_status("Executing")

                elif polly_status == "failed":
                    finished_tasks = finished_tasks + 1
                    operator_object.update_workflow_status("Error")
                    operator_object.add_workflow_metadata(
                        PollyCollectionError="Polly returned as failed: {e}".
                        format(e=str(polly_response["SynthesisTask"]
                                     ["TaskStatusReason"])))
                    raise MasExecutionError(
                        operator_object.return_output_object())
                else:
                    operator_object.update_workflow_status("Error")
                    operator_object.add_workflow_metadata(
                        PollyCollectionError="Polly returned as failed: {e}".
                        format(e=str(polly_response["SynthesisTask"]
                                     ["TaskStatusReason"])))
                    raise MasExecutionError(
                        operator_object.return_output_object())

    # If all the Polly jobs are done then the operator is complete
    if finished_tasks == len(polly_collection):
        operator_object.update_workflow_status("Complete")
        webcaptions_object.PutWebCaptionsCollection("CaptionsCollection",
                                                    polly_collection)

    operator_object.add_workflow_metadata(PollyCollection=polly_collection)

    return operator_object.return_output_object()
예제 #29
0
def lambda_handler(event, context):
    print("We got the following event:\n", event)
    operator_object = MediaInsightsOperationHelper(event)

    try:
        workflow_id = str(operator_object.workflow_execution_id)
        bucket = operator_object.input["Media"]["Video"]["S3Bucket"]
        key = operator_object.input["Media"]["Video"]["S3Key"]
    except KeyError as e:
        operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(
            MediaconvertError="Missing a required metadata key {e}".format(
                e=e))
        raise MasExecutionError(operator_object.return_output_object())

    # Adding in exception block for now since we aren't guaranteed an asset id will be present, should remove later
    try:
        asset_id = operator_object.asset_id
    except KeyError as e:
        print("No asset id passed in with this workflow", e)
        asset_id = ''

    file_input = "s3://" + bucket + "/" + key
    destination = "s3://" + bucket + "/" + 'private/assets/' + asset_id + "/workflows/" + workflow_id + "/"
    thumbnail_destination = "s3://" + bucket + "/" + 'private/assets/' + asset_id + "/"

    try:
        response = mediaconvert.describe_endpoints()
    except Exception as e:
        print("Exception:\n", e)
        operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(MediaconvertError=str(e))
        raise MasExecutionError(operator_object.return_output_object())
    else:
        mediaconvert_endpoint = response["Endpoints"][0]["Url"]
        customer_mediaconvert = boto3.client(
            "mediaconvert",
            region_name=region,
            endpoint_url=mediaconvert_endpoint)

    try:
        response = customer_mediaconvert.create_job(
            Role=mediaconvert_role,
            Settings={
                "OutputGroups": [{
                    "Name":
                    "File Group",
                    "Outputs": [{
                        "ContainerSettings": {
                            "Container": "MP4",
                            "Mp4Settings": {
                                "CslgAtom": "INCLUDE",
                                "FreeSpaceBox": "EXCLUDE",
                                "MoovPlacement": "PROGRESSIVE_DOWNLOAD"
                            }
                        },
                        "AudioDescriptions": [{
                            "AudioTypeControl":
                            "FOLLOW_INPUT",
                            "AudioSourceName":
                            "Audio Selector 1",
                            "CodecSettings": {
                                "Codec": "AAC",
                                "AacSettings": {
                                    "AudioDescriptionBroadcasterMix": "NORMAL",
                                    "Bitrate": 96000,
                                    "RateControlMode": "CBR",
                                    "CodecProfile": "LC",
                                    "CodingMode": "CODING_MODE_2_0",
                                    "RawFormat": "NONE",
                                    "SampleRate": 48000,
                                    "Specification": "MPEG4"
                                }
                            },
                            "LanguageCodeControl":
                            "FOLLOW_INPUT"
                        }],
                        "Extension":
                        "mp4",
                        "NameModifier":
                        "_audio"
                    }],
                    "OutputGroupSettings": {
                        "Type": "FILE_GROUP_SETTINGS",
                        "FileGroupSettings": {
                            "Destination": destination
                        }
                    }
                }, {
                    "CustomName":
                    "thumbnail",
                    "Name":
                    "File Group",
                    "Outputs": [{
                        "ContainerSettings": {
                            "Container": "RAW"
                        },
                        "VideoDescription": {
                            "ScalingBehavior": "DEFAULT",
                            "TimecodeInsertion": "DISABLED",
                            "AntiAlias": "ENABLED",
                            "Sharpness": 50,
                            "CodecSettings": {
                                "Codec": "FRAME_CAPTURE",
                                "FrameCaptureSettings": {
                                    "FramerateNumerator": 1,
                                    "FramerateDenominator": 5,
                                    "MaxCaptures": 2,
                                    "Quality": 80
                                }
                            },
                            "DropFrameTimecode": "ENABLED",
                            "ColorMetadata": "INSERT"
                        },
                        "NameModifier": "_thumbnail"
                    }],
                    "OutputGroupSettings": {
                        "Type": "FILE_GROUP_SETTINGS",
                        "FileGroupSettings": {
                            "Destination": thumbnail_destination
                        }
                    }
                }],
                "AdAvailOffset":
                0,
                "Inputs": [{
                    "AudioSelectors": {
                        "Audio Selector 1": {
                            "Offset": 0,
                            "DefaultSelection": "DEFAULT",
                            "ProgramSelection": 1
                        }
                    },
                    "VideoSelector": {
                        "ColorSpace": "FOLLOW"
                    },
                    "FilterEnable": "AUTO",
                    "PsiControl": "USE_PSI",
                    "FilterStrength": 0,
                    "DeblockFilter": "DISABLED",
                    "DenoiseFilter": "DISABLED",
                    "TimecodeSource": "EMBEDDED",
                    "FileInput": file_input
                }]
            })
    # TODO: Add support for boto client error handling
    except Exception as e:
        print("Exception:\n", e)
        operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(MediaconvertError=str(e))
        raise MasExecutionError(operator_object.return_output_object())
    else:
        job_id = response['Job']['Id']
        operator_object.update_workflow_status("Executing")
        operator_object.add_workflow_metadata(MediaconvertJobId=job_id,
                                              MediaconvertInputFile=key,
                                              AssetId=asset_id,
                                              WorkflowExecutionId=workflow_id)
        return operator_object.return_output_object()
def lambda_handler(event, context):
    operator_object = MediaInsightsOperationHelper(event)
    # Get operator parameters
    try:
        workflow_id = str(event["WorkflowExecutionId"])
        asset_id = event['AssetId']
        if "Video" in operator_object.input["Media"]:
            bucket = operator_object.input["Media"]["Video"]["S3Bucket"]
            key = operator_object.input["Media"]["Video"]["S3Key"]
            file_type = key.split('.')[-1]
        elif "Audio" in operator_object.input["Media"]:
            bucket = operator_object.input["Media"]["Audio"]["S3Bucket"]
            key = operator_object.input["Media"]["Audio"]["S3Key"]
            file_type = key.split('.')[-1]
        elif "Image" in operator_object.input["Media"]:
            bucket = operator_object.input["Media"]["Image"]["S3Bucket"]
            key = operator_object.input["Media"]["Image"]["S3Key"]
            file_type = key.split('.')[-1]
        elif "Text" in operator_object.input["Media"]:
            bucket = operator_object.input["Media"]["Text"]["S3Bucket"]
            key = operator_object.input["Media"]["Text"]["S3Key"]
            file_type = key.split('.')[-1]
    except Exception:
        operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(
            GenericDataLookupError="No valid inputs")
        raise MasExecutionError(operator_object.return_output_object())

    # Get the metadata filename
    print("Looking up metadata for s3://" + bucket + "/" + key)
    # Get user-defined location for generic data file
    if "Key" in operator_object.configuration:
        metadata_filename = operator_object.configuration["Key"]
    else:
        operator_object.add_workflow_metadata(
            GenericDataLookupError="Missing S3 key for data file.")
        operator_object.update_workflow_status("Error")
        raise MasExecutionError(operator_object.return_output_object())
    if "Bucket" in operator_object.configuration:
        metadata_bucket = operator_object.configuration["Bucket"]
    else:
        operator_object.add_workflow_metadata(
            GenericDataLookupError="Missing S3 bucket for data file.")
        operator_object.update_workflow_status("Error")
        raise MasExecutionError(operator_object.return_output_object())

    # Get metadata
    s3 = boto3.client('s3')
    try:
        print("Getting data from s3://" + metadata_bucket + "/" +
              metadata_filename)
        data = s3.get_object(Bucket=metadata_bucket, Key=metadata_filename)
        metadata_json = json.loads(data['Body'].read().decode('utf-8'))
    except Exception as e:
        operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(
            GenericDataLookupError="Unable read datafile. " + str(e))
        raise MasExecutionError(operator_object.return_output_object())

    # Verify that the metadata is a dict, as required by the dataplane
    if (type(metadata_json) != dict):
        operator_object.update_workflow_status("Error")
        operator_object.add_workflow_metadata(
            GenericDataLookupError="Metadata must be of type dict. Found " +
            str(type(metadata_json)) + " instead.")
        raise MasExecutionError(operator_object.return_output_object())

    # Save metadata to dataplane
    operator_object.add_workflow_metadata(AssetId=asset_id,
                                          WorkflowExecutionId=workflow_id)
    dataplane = DataPlane()
    metadata_upload = dataplane.store_asset_metadata(asset_id,
                                                     operator_object.name,
                                                     workflow_id,
                                                     metadata_json)

    # Validate that the metadata was saved to the dataplane
    if "Status" not in metadata_upload:
        operator_object.add_workflow_metadata(
            GenericDataLookupError=
            "Unable to upload metadata for asset: {asset}".format(
                asset=asset_id))
        operator_object.update_workflow_status("Error")
        raise MasExecutionError(operator_object.return_output_object())
    else:
        # Update the workflow status
        if metadata_upload["Status"] == "Success":
            print(
                "Uploaded metadata for asset: {asset}".format(asset=asset_id))
            operator_object.update_workflow_status("Complete")
            return operator_object.return_output_object()
        else:
            operator_object.add_workflow_metadata(
                GenericDataLookupError=
                "Unable to upload metadata for asset: {asset}".format(
                    asset=asset_id))
            operator_object.update_workflow_status("Error")
            raise MasExecutionError(operator_object.return_output_object())