Beispiel #1
0
    def start_3play_transcription_process(self, encoded_file):
        """
        3PlayMedia Transcription Flow

        Arguments:
            encoded_file (str): name of encoded file to construct video url
        """
        try:
            # Picks the first course from the list as there may be multiple
            # course runs in that list (i.e. all having the same org).
            org = extract_course_org(self.video_proto.platform_course_url[0])
            transcript_secrets = TranscriptCredentials.objects.get(org=org, provider=self.video_query.provider)

            # update transcript status for video.
            val_api_client = VALAPICall(video_proto=None, val_status=None)
            utils.update_video_status(
                val_api_client=val_api_client,
                video=self.video_query,
                status=TranscriptStatus.IN_PROGRESS
            )

            # Initialize 3playMedia client and start transcription process
            s3_video_url = build_url(
                self.auth_dict['s3_base_url'],
                self.auth_dict['edx_s3_endpoint_bucket'],
                encoded_file
            )
            callback_url = build_url(
                self.auth_dict['veda_base_url'],
                reverse(
                    '3play_media_callback',
                    args=[self.auth_dict['transcript_provider_request_token']]
                ),
                # Additional attributes that'll come back with the callback
                org=org,
                edx_video_id=self.video_query.studio_id,
                lang_code=self.video_query.source_language,
            )
            three_play_media = ThreePlayMediaClient(
                org=org,
                video=self.video_query,
                media_url=s3_video_url,
                api_key=transcript_secrets.api_key,
                api_secret=transcript_secrets.api_secret,
                callback_url=callback_url,
                turnaround_level=self.video_query.three_play_turnaround,
                three_play_api_base_url=self.auth_dict['three_play_api_base_url'],
            )
            three_play_media.generate_transcripts()

        except TranscriptCredentials.DoesNotExist:
            LOGGER.warning(
                '[DELIVERY] : Transcript preference is not found for provider=%s, video=%s',
                self.video_query.provider,
                self.video_query.studio_id,
            )
 def test_video_status_update(self, status, update_val_status):
     """
     Tests that  utils.video_status_update works as expected.
     """
     val_api_client = MagicMock()
     video = Mock(studio_id='1234', transcript_status='earlier status')
     # Make call to update_video_status.
     utils.update_video_status(val_api_client=val_api_client,
                               video=video,
                               status=status)
     # Assert the status and call to edx-val api method.
     self.assertEqual(val_api_client.update_video_status.called,
                      update_val_status)
     self.assertEqual(video.transcript_status, status)
Beispiel #3
0
    def cielo24_transcription_flow(self, encoded_file):
        """
        Cielo24 transcription flow.

        Arguments:
            encoded_file (str): name of encoded file to construct video url
        """
        org = extract_course_org(self.video_proto.platform_course_url[0])

        try:
            api_key = TranscriptCredentials.objects.get(org=org, provider=self.video_query.provider).api_key
        except TranscriptCredentials.DoesNotExist:
            LOGGER.warn('[DELIVERY] Unable to find cielo24 api_key for org=%s', org)
            return None

        s3_video_url = build_url(
            self.auth_dict['s3_base_url'],
            self.auth_dict['edx_s3_endpoint_bucket'],
            encoded_file
        )

        callback_base_url = build_url(
            self.auth_dict['veda_base_url'],
            reverse(
                'cielo24_transcript_completed',
                args=[self.auth_dict['transcript_provider_request_token']]
            )
        )

        # update transcript status for video.
        val_api_client = VALAPICall(video_proto=None, val_status=None)
        utils.update_video_status(
            val_api_client=val_api_client,
            video=self.video_query,
            status=TranscriptStatus.IN_PROGRESS
        )

        cielo24 = Cielo24Transcript(
            self.video_query,
            org,
            api_key,
            self.video_query.cielo24_turnaround,
            self.video_query.cielo24_fidelity,
            self.video_query.preferred_languages,
            s3_video_url,
            callback_base_url,
            self.auth_dict['cielo24_api_base_url'],
        )
        cielo24.start_transcription_flow()
def handle_video_translations(video, translations, file_id, api_key,
                              log_prefix):
    """
    It is a sub-module of `retrieve_three_play_translations` to handle
    all the completed translations for a single video.

    Arguments:
        video: Video data object whose translations need to be handled here.
        translations: A list containing translations metadata information received from 3play Media.
        file_id: It is file identifier that is assigned to a Video by 3Play Media.
        api_key: An api key to communicate to the 3Play Media.
        log_prefix: A logging prefix used by the main process.

    Steps include:
        - Fetch translated transcript content from 3Play Media.
        - Validate the content of received translated transcript.
        - Convert translated SRT transcript to SJson format and upload it to S3.
        - Update edx-val for a completed transcript.
        - update transcript status for video in edx-val as well as edx-video-pipeline.
    """
    video_translation_processes = get_in_progress_translation_processes(video)
    for translation_metadata in translations:

        translation_id = translation_metadata['id']
        translation_state = translation_metadata['state']
        target_language = translation_metadata[
            'target_language_iso_639_1_code']

        LOGGER.info(
            '[3PlayMedia Task] Translation metadata retrieved -- video=%s, translation_id=%s, language=%s, status=%s.',
            video.studio_id, translation_id, target_language,
            translation_state)

        if translation_state == COMPLETE:
            # Fetch the corresponding tracking process.
            translation_process = get_in_progress_translation_process(
                video_translation_processes,
                file_id=file_id,
                translation_id=translation_id,
                target_language=target_language)
            if translation_process is None:
                continue

            # 1 - Fetch translated transcript content from 3Play Media.
            srt_transcript = get_transcript_content_from_3play_media(
                api_key=api_key,
                edx_video_id=video.studio_id,
                file_id=file_id,
                translation_id=translation_id,
                target_language=target_language,
            )
            if srt_transcript is None:
                continue

            # 2 - Validate the content of received translated transcript.
            is_transcript_valid = validate_transcript_response(
                edx_video_id=video.studio_id,
                file_id=file_id,
                transcript=srt_transcript,
                lang_code=target_language,
                log_prefix=log_prefix)
            if is_transcript_valid:
                translation_process.update(status=TranscriptStatus.READY)
            else:
                translation_process.update(status=TranscriptStatus.FAILED)
                continue

            # 3 - Convert SRT translation to SJson format and upload it to S3.
            sjson_file = convert_to_sjson_and_upload_to_s3(
                srt_transcript=srt_transcript,
                target_language=target_language,
                edx_video_id=video.studio_id,
                file_id=file_id,
            )

            # 4 Update edx-val with completed transcript information
            val_api = VALAPICall(video_proto=None, val_status=None)
            val_api.update_val_transcript(
                video_id=video.studio_id,
                lang_code=target_language,
                name=sjson_file,
                transcript_format=TRANSCRIPT_SJSON,
                provider=TranscriptProvider.THREE_PLAY,
            )

            LOGGER.info(
                '[3PlayMedia Task] Translation retrieval was successful -- video=%s, translation_id=%s, language=%s.',
                video.studio_id, translation_id, target_language)

            # 5 - if all the processes for this video are complete, update transcript status
            # for video in edx-val as well as edx-video-pipeline.
            video_jobs = TranscriptProcessMetadata.objects.filter(video=video)
            if all(video_job.status == TranscriptStatus.READY
                   for video_job in video_jobs):
                utils.update_video_status(val_api_client=val_api,
                                          video=video,
                                          status=TranscriptStatus.READY)
def three_play_transcription_callback(sender, **kwargs):
    """
    This is a receiver for 3Play Media callback signal.

    Arguments:
        sender: sender of the signal
        kwargs(dict): video transcription metadata

    Process:
        * download transcript(SRT) from 3PlayMedia
        * convert SRT to SJSON
        * upload SJSON to AWS S3
        * order translations for all the preferred languages
        * update transcript status in VAL
    """
    log_prefix = u'3PlayMedia Callback'
    # Extract all the must have attributes
    org = kwargs['org']
    edx_video_id = kwargs['edx_video_id']
    lang_code = kwargs['lang_code']
    file_id = kwargs['file_id']
    state = kwargs['status']

    try:
        process = TranscriptProcessMetadata.objects.filter(
            provider=TranscriptProvider.THREE_PLAY,
            process_id=file_id,
            lang_code=lang_code,
        ).latest()
    except TranscriptProcessMetadata.DoesNotExist:
        LOGGER.exception(
            u'[3PlayMedia Callback] Unable to get transcript process for org=%s, edx_video_id=%s, file_id=%s.',
            org,
            edx_video_id,
            file_id,
        )
        return

    # On completion of a transcript
    # Indicates that the default video speech transcription has been done successfully.
    if state == COMPLETE:
        log_args = (edx_video_id, lang_code, file_id)

        # 1 - Retrieve transcript credentials
        transcript_secrets = get_transcript_credentials(
            provider=TranscriptProvider.THREE_PLAY,
            org=org,
            edx_video_id=edx_video_id,
            file_id=file_id,
            log_prefix=log_prefix,
        )
        if not transcript_secrets:
            process.update(status=TranscriptStatus.FAILED)
            return

        # 2 - Fetch the transcript from 3Play Media.
        try:
            srt_transcript = fetch_srt_data(
                THREE_PLAY_TRANSCRIPT_URL.format(file_id=file_id),
                apikey=transcript_secrets.api_key,
            )
        except TranscriptFetchError:
            LOGGER.exception(
                u'[3PlayMedia Callback] Fetch request failed for video=%s -- lang_code=%s -- process_id=%s',
                *log_args)
            process.update(status=TranscriptStatus.FAILED)
            return

        # 3 - Validate transcript content received from 3Play Media and mark the transcription process.
        is_valid_transcript = validate_transcript_response(
            edx_video_id=edx_video_id,
            file_id=file_id,
            transcript=srt_transcript,
            lang_code=lang_code,
            log_prefix=log_prefix,
        )
        if is_valid_transcript:
            process.update(status=TranscriptStatus.READY)
        else:
            process.update(status=TranscriptStatus.FAILED)

        # 4 - Convert SRT transcript to SJson format and upload it to S3.
        try:
            sjson_transcript = convert_srt_to_sjson(srt_transcript)
            sjson_file = upload_sjson_to_s3(CONFIG, sjson_transcript)
        except Exception:
            # in case of any exception, log and raise.
            LOGGER.exception(
                u'[3PlayMedia Callback] Request failed for video=%s -- lang_code=%s -- process_id=%s',
                *log_args)
            raise

        # 5 - Update edx-val with completed transcript information.
        val_api = VALAPICall(video_proto=None, val_status=None)
        val_api.update_val_transcript(
            video_id=process.video.studio_id,
            lang_code=lang_code,
            name=sjson_file,
            transcript_format=TRANSCRIPT_SJSON,
            provider=TranscriptProvider.THREE_PLAY,
        )

        # 6 - Translation Phase
        # That's the phase for kicking off translation processes for all the
        # preferred languages except the video's speech language.
        target_languages = list(process.video.preferred_languages)
        target_languages.remove(lang_code)

        # Create the translation tracking processes for all the target languages.
        for target_language in target_languages:
            TranscriptProcessMetadata.objects.create(
                video=process.video,
                provider=TranscriptProvider.THREE_PLAY,
                process_id=file_id,
                lang_code=target_language,
                status=TranscriptStatus.PENDING,
            )

        # Order translations for target languages
        try:
            order_translations(file_id,
                               transcript_secrets.api_key,
                               transcript_secrets.api_secret,
                               source_language=lang_code,
                               target_languages=target_languages)
        except TranscriptTranslationError:
            LOGGER.exception(
                u'[3PlayMedia Callback] Translation could not be performed - video=%s, lang_code=%s, file_id=%s.',
                *log_args)
        except Exception:
            LOGGER.exception(
                u'[3PlayMedia Callback] Error while translating the transcripts - video=%s, lang_code=%s, file_id=%s',
                *log_args)
            raise

        # 7 - Update transcript status.
        # It will be for edx-val as well as edx-video-pipeline and this will be the case when
        # there is only one transcript language for a video(that is, already been processed).
        if not target_languages:
            utils.update_video_status(val_api_client=val_api,
                                      video=process.video,
                                      status=TranscriptStatus.READY)

        # On success, a happy farewell log.
        LOGGER.info((
            u'[3PlayMedia Callback] Video speech transcription was successful for'
            u' video=%s -- lang_code=%s -- process_id=%s'), *log_args)

    elif state == ERROR:
        # Fail the process
        process.status = TranscriptStatus.FAILED
        process.save()
        # Log the error information
        LOGGER.error(
            u'[3PlayMedia Callback] Error while transcription - error=%s, org=%s, edx_video_id=%s, file_id=%s.',
            kwargs['error_description'],
            org,
            edx_video_id,
            file_id,
        )
    else:
        # Status must be either 'complete' or 'error'
        # more details on http://support.3playmedia.com/hc/en-us/articles/227729828-Files-API-Methods
        LOGGER.error(
            u'[3PlayMedia Callback] Got invalid status - status=%s, org=%s, edx_video_id=%s, file_id=%s.',
            state,
            org,
            edx_video_id,
            file_id,
        )
def cielo24_transcript_callback(sender, **kwargs):
    """
    * download transcript(SRT) from Cielo24
    * convert SRT to SJSON
    * upload SJSON to AWS S3
    * update transcript status in VAL
    """
    process_metadata = None
    transcript_prefs = None

    org = kwargs['org']
    job_id = kwargs['job_id']
    video_id = kwargs['video_id']
    iwp_name = kwargs['iwp_name']
    lang_code = kwargs['lang_code']

    LOGGER.info(
        '[CIELO24 TRANSCRIPTS] Transcript complete request received for '
        'video=%s -- org=%s -- lang=%s -- job_id=%s -- iwp_name=%s', video_id,
        org, lang_code, job_id, iwp_name)

    # get transcript credentials for an organization
    try:
        transcript_prefs = TranscriptCredentials.objects.get(
            org=org,
            provider=TranscriptProvider.CIELO24,
        )
    except TranscriptCredentials.DoesNotExist:
        LOGGER.exception(
            '[CIELO24 TRANSCRIPTS] Unable to get transcript credentials for job_id=%s',
            job_id)

    # mark the transcript for a particular language as ready
    try:
        process_metadata = TranscriptProcessMetadata.objects.filter(
            provider=TranscriptProvider.CIELO24,
            process_id=job_id,
            lang_code=lang_code).latest()
    except TranscriptProcessMetadata.DoesNotExist:
        LOGGER.exception(
            '[CIELO24 TRANSCRIPTS] Unable to get transcript process metadata for job_id=%s',
            job_id)

    # if transcript credentials are missing then we can do nothing
    if not transcript_prefs and process_metadata:
        process_metadata.status = TranscriptStatus.FAILED
        process_metadata.save()

    if transcript_prefs and process_metadata:
        api_key = transcript_prefs.api_key
        try:
            srt_data = fetch_srt_data(CIELO24_GET_CAPTION_URL,
                                      v=CIELO24_API_VERSION,
                                      job_id=job_id,
                                      api_token=api_key,
                                      caption_format='SRT')
        except TranscriptFetchError:
            process_metadata.status = TranscriptStatus.FAILED
            process_metadata.save()
            LOGGER.exception(
                '[CIELO24 TRANSCRIPTS] Fetch request failed for video=%s -- lang=%s -- job_id=%s',
                video_id, lang_code, job_id)
            return

        process_metadata.status = TranscriptStatus.READY
        process_metadata.save()

        try:
            sjson = convert_srt_to_sjson(srt_data)
            sjson_file_name = upload_sjson_to_s3(CONFIG, sjson)
        except Exception:
            LOGGER.exception(
                '[CIELO24 TRANSCRIPTS] Request failed for video=%s -- lang=%s -- job_id=%s.',
                video_id, lang_code, job_id)
            raise

        # update edx-val with completed transcript information
        val_api = VALAPICall(process_metadata.video, val_status=None)
        val_api.update_val_transcript(
            video_id=process_metadata.video.studio_id,
            lang_code=lang_code,
            name=sjson_file_name,
            transcript_format=TRANSCRIPT_SJSON,
            provider=TranscriptProvider.CIELO24)

        # update transcript status for video in edx-val only if all langauge transcripts are ready
        video_jobs = TranscriptProcessMetadata.objects.filter(
            video__studio_id=video_id)
        if all(video_job.status == TranscriptStatus.READY
               for video_job in video_jobs):
            utils.update_video_status(val_api_client=val_api,
                                      video=process_metadata.video,
                                      status=TranscriptStatus.READY)