def start_3play_transcription_process(self, encoded_file): """ 3PlayMedia Transcription Flow Arguments: encoded_file (str): name of encoded file to construct video url """ try: # Picks the first course from the list as there may be multiple # course runs in that list (i.e. all having the same org). org = extract_course_org(self.video_proto.platform_course_url[0]) transcript_secrets = TranscriptCredentials.objects.get(org=org, provider=self.video_query.provider) # update transcript status for video. val_api_client = VALAPICall(video_proto=None, val_status=None) utils.update_video_status( val_api_client=val_api_client, video=self.video_query, status=TranscriptStatus.IN_PROGRESS ) # Initialize 3playMedia client and start transcription process s3_video_url = build_url( self.auth_dict['s3_base_url'], self.auth_dict['edx_s3_endpoint_bucket'], encoded_file ) callback_url = build_url( self.auth_dict['veda_base_url'], reverse( '3play_media_callback', args=[self.auth_dict['transcript_provider_request_token']] ), # Additional attributes that'll come back with the callback org=org, edx_video_id=self.video_query.studio_id, lang_code=self.video_query.source_language, ) three_play_media = ThreePlayMediaClient( org=org, video=self.video_query, media_url=s3_video_url, api_key=transcript_secrets.api_key, api_secret=transcript_secrets.api_secret, callback_url=callback_url, turnaround_level=self.video_query.three_play_turnaround, three_play_api_base_url=self.auth_dict['three_play_api_base_url'], ) three_play_media.generate_transcripts() except TranscriptCredentials.DoesNotExist: LOGGER.warning( '[DELIVERY] : Transcript preference is not found for provider=%s, video=%s', self.video_query.provider, self.video_query.studio_id, )
def test_video_status_update(self, status, update_val_status): """ Tests that utils.video_status_update works as expected. """ val_api_client = MagicMock() video = Mock(studio_id='1234', transcript_status='earlier status') # Make call to update_video_status. utils.update_video_status(val_api_client=val_api_client, video=video, status=status) # Assert the status and call to edx-val api method. self.assertEqual(val_api_client.update_video_status.called, update_val_status) self.assertEqual(video.transcript_status, status)
def cielo24_transcription_flow(self, encoded_file): """ Cielo24 transcription flow. Arguments: encoded_file (str): name of encoded file to construct video url """ org = extract_course_org(self.video_proto.platform_course_url[0]) try: api_key = TranscriptCredentials.objects.get(org=org, provider=self.video_query.provider).api_key except TranscriptCredentials.DoesNotExist: LOGGER.warn('[DELIVERY] Unable to find cielo24 api_key for org=%s', org) return None s3_video_url = build_url( self.auth_dict['s3_base_url'], self.auth_dict['edx_s3_endpoint_bucket'], encoded_file ) callback_base_url = build_url( self.auth_dict['veda_base_url'], reverse( 'cielo24_transcript_completed', args=[self.auth_dict['transcript_provider_request_token']] ) ) # update transcript status for video. val_api_client = VALAPICall(video_proto=None, val_status=None) utils.update_video_status( val_api_client=val_api_client, video=self.video_query, status=TranscriptStatus.IN_PROGRESS ) cielo24 = Cielo24Transcript( self.video_query, org, api_key, self.video_query.cielo24_turnaround, self.video_query.cielo24_fidelity, self.video_query.preferred_languages, s3_video_url, callback_base_url, self.auth_dict['cielo24_api_base_url'], ) cielo24.start_transcription_flow()
def handle_video_translations(video, translations, file_id, api_key, log_prefix): """ It is a sub-module of `retrieve_three_play_translations` to handle all the completed translations for a single video. Arguments: video: Video data object whose translations need to be handled here. translations: A list containing translations metadata information received from 3play Media. file_id: It is file identifier that is assigned to a Video by 3Play Media. api_key: An api key to communicate to the 3Play Media. log_prefix: A logging prefix used by the main process. Steps include: - Fetch translated transcript content from 3Play Media. - Validate the content of received translated transcript. - Convert translated SRT transcript to SJson format and upload it to S3. - Update edx-val for a completed transcript. - update transcript status for video in edx-val as well as edx-video-pipeline. """ video_translation_processes = get_in_progress_translation_processes(video) for translation_metadata in translations: translation_id = translation_metadata['id'] translation_state = translation_metadata['state'] target_language = translation_metadata[ 'target_language_iso_639_1_code'] LOGGER.info( '[3PlayMedia Task] Translation metadata retrieved -- video=%s, translation_id=%s, language=%s, status=%s.', video.studio_id, translation_id, target_language, translation_state) if translation_state == COMPLETE: # Fetch the corresponding tracking process. translation_process = get_in_progress_translation_process( video_translation_processes, file_id=file_id, translation_id=translation_id, target_language=target_language) if translation_process is None: continue # 1 - Fetch translated transcript content from 3Play Media. srt_transcript = get_transcript_content_from_3play_media( api_key=api_key, edx_video_id=video.studio_id, file_id=file_id, translation_id=translation_id, target_language=target_language, ) if srt_transcript is None: continue # 2 - Validate the content of received translated transcript. is_transcript_valid = validate_transcript_response( edx_video_id=video.studio_id, file_id=file_id, transcript=srt_transcript, lang_code=target_language, log_prefix=log_prefix) if is_transcript_valid: translation_process.update(status=TranscriptStatus.READY) else: translation_process.update(status=TranscriptStatus.FAILED) continue # 3 - Convert SRT translation to SJson format and upload it to S3. sjson_file = convert_to_sjson_and_upload_to_s3( srt_transcript=srt_transcript, target_language=target_language, edx_video_id=video.studio_id, file_id=file_id, ) # 4 Update edx-val with completed transcript information val_api = VALAPICall(video_proto=None, val_status=None) val_api.update_val_transcript( video_id=video.studio_id, lang_code=target_language, name=sjson_file, transcript_format=TRANSCRIPT_SJSON, provider=TranscriptProvider.THREE_PLAY, ) LOGGER.info( '[3PlayMedia Task] Translation retrieval was successful -- video=%s, translation_id=%s, language=%s.', video.studio_id, translation_id, target_language) # 5 - if all the processes for this video are complete, update transcript status # for video in edx-val as well as edx-video-pipeline. video_jobs = TranscriptProcessMetadata.objects.filter(video=video) if all(video_job.status == TranscriptStatus.READY for video_job in video_jobs): utils.update_video_status(val_api_client=val_api, video=video, status=TranscriptStatus.READY)
def three_play_transcription_callback(sender, **kwargs): """ This is a receiver for 3Play Media callback signal. Arguments: sender: sender of the signal kwargs(dict): video transcription metadata Process: * download transcript(SRT) from 3PlayMedia * convert SRT to SJSON * upload SJSON to AWS S3 * order translations for all the preferred languages * update transcript status in VAL """ log_prefix = u'3PlayMedia Callback' # Extract all the must have attributes org = kwargs['org'] edx_video_id = kwargs['edx_video_id'] lang_code = kwargs['lang_code'] file_id = kwargs['file_id'] state = kwargs['status'] try: process = TranscriptProcessMetadata.objects.filter( provider=TranscriptProvider.THREE_PLAY, process_id=file_id, lang_code=lang_code, ).latest() except TranscriptProcessMetadata.DoesNotExist: LOGGER.exception( u'[3PlayMedia Callback] Unable to get transcript process for org=%s, edx_video_id=%s, file_id=%s.', org, edx_video_id, file_id, ) return # On completion of a transcript # Indicates that the default video speech transcription has been done successfully. if state == COMPLETE: log_args = (edx_video_id, lang_code, file_id) # 1 - Retrieve transcript credentials transcript_secrets = get_transcript_credentials( provider=TranscriptProvider.THREE_PLAY, org=org, edx_video_id=edx_video_id, file_id=file_id, log_prefix=log_prefix, ) if not transcript_secrets: process.update(status=TranscriptStatus.FAILED) return # 2 - Fetch the transcript from 3Play Media. try: srt_transcript = fetch_srt_data( THREE_PLAY_TRANSCRIPT_URL.format(file_id=file_id), apikey=transcript_secrets.api_key, ) except TranscriptFetchError: LOGGER.exception( u'[3PlayMedia Callback] Fetch request failed for video=%s -- lang_code=%s -- process_id=%s', *log_args) process.update(status=TranscriptStatus.FAILED) return # 3 - Validate transcript content received from 3Play Media and mark the transcription process. is_valid_transcript = validate_transcript_response( edx_video_id=edx_video_id, file_id=file_id, transcript=srt_transcript, lang_code=lang_code, log_prefix=log_prefix, ) if is_valid_transcript: process.update(status=TranscriptStatus.READY) else: process.update(status=TranscriptStatus.FAILED) # 4 - Convert SRT transcript to SJson format and upload it to S3. try: sjson_transcript = convert_srt_to_sjson(srt_transcript) sjson_file = upload_sjson_to_s3(CONFIG, sjson_transcript) except Exception: # in case of any exception, log and raise. LOGGER.exception( u'[3PlayMedia Callback] Request failed for video=%s -- lang_code=%s -- process_id=%s', *log_args) raise # 5 - Update edx-val with completed transcript information. val_api = VALAPICall(video_proto=None, val_status=None) val_api.update_val_transcript( video_id=process.video.studio_id, lang_code=lang_code, name=sjson_file, transcript_format=TRANSCRIPT_SJSON, provider=TranscriptProvider.THREE_PLAY, ) # 6 - Translation Phase # That's the phase for kicking off translation processes for all the # preferred languages except the video's speech language. target_languages = list(process.video.preferred_languages) target_languages.remove(lang_code) # Create the translation tracking processes for all the target languages. for target_language in target_languages: TranscriptProcessMetadata.objects.create( video=process.video, provider=TranscriptProvider.THREE_PLAY, process_id=file_id, lang_code=target_language, status=TranscriptStatus.PENDING, ) # Order translations for target languages try: order_translations(file_id, transcript_secrets.api_key, transcript_secrets.api_secret, source_language=lang_code, target_languages=target_languages) except TranscriptTranslationError: LOGGER.exception( u'[3PlayMedia Callback] Translation could not be performed - video=%s, lang_code=%s, file_id=%s.', *log_args) except Exception: LOGGER.exception( u'[3PlayMedia Callback] Error while translating the transcripts - video=%s, lang_code=%s, file_id=%s', *log_args) raise # 7 - Update transcript status. # It will be for edx-val as well as edx-video-pipeline and this will be the case when # there is only one transcript language for a video(that is, already been processed). if not target_languages: utils.update_video_status(val_api_client=val_api, video=process.video, status=TranscriptStatus.READY) # On success, a happy farewell log. LOGGER.info(( u'[3PlayMedia Callback] Video speech transcription was successful for' u' video=%s -- lang_code=%s -- process_id=%s'), *log_args) elif state == ERROR: # Fail the process process.status = TranscriptStatus.FAILED process.save() # Log the error information LOGGER.error( u'[3PlayMedia Callback] Error while transcription - error=%s, org=%s, edx_video_id=%s, file_id=%s.', kwargs['error_description'], org, edx_video_id, file_id, ) else: # Status must be either 'complete' or 'error' # more details on http://support.3playmedia.com/hc/en-us/articles/227729828-Files-API-Methods LOGGER.error( u'[3PlayMedia Callback] Got invalid status - status=%s, org=%s, edx_video_id=%s, file_id=%s.', state, org, edx_video_id, file_id, )
def cielo24_transcript_callback(sender, **kwargs): """ * download transcript(SRT) from Cielo24 * convert SRT to SJSON * upload SJSON to AWS S3 * update transcript status in VAL """ process_metadata = None transcript_prefs = None org = kwargs['org'] job_id = kwargs['job_id'] video_id = kwargs['video_id'] iwp_name = kwargs['iwp_name'] lang_code = kwargs['lang_code'] LOGGER.info( '[CIELO24 TRANSCRIPTS] Transcript complete request received for ' 'video=%s -- org=%s -- lang=%s -- job_id=%s -- iwp_name=%s', video_id, org, lang_code, job_id, iwp_name) # get transcript credentials for an organization try: transcript_prefs = TranscriptCredentials.objects.get( org=org, provider=TranscriptProvider.CIELO24, ) except TranscriptCredentials.DoesNotExist: LOGGER.exception( '[CIELO24 TRANSCRIPTS] Unable to get transcript credentials for job_id=%s', job_id) # mark the transcript for a particular language as ready try: process_metadata = TranscriptProcessMetadata.objects.filter( provider=TranscriptProvider.CIELO24, process_id=job_id, lang_code=lang_code).latest() except TranscriptProcessMetadata.DoesNotExist: LOGGER.exception( '[CIELO24 TRANSCRIPTS] Unable to get transcript process metadata for job_id=%s', job_id) # if transcript credentials are missing then we can do nothing if not transcript_prefs and process_metadata: process_metadata.status = TranscriptStatus.FAILED process_metadata.save() if transcript_prefs and process_metadata: api_key = transcript_prefs.api_key try: srt_data = fetch_srt_data(CIELO24_GET_CAPTION_URL, v=CIELO24_API_VERSION, job_id=job_id, api_token=api_key, caption_format='SRT') except TranscriptFetchError: process_metadata.status = TranscriptStatus.FAILED process_metadata.save() LOGGER.exception( '[CIELO24 TRANSCRIPTS] Fetch request failed for video=%s -- lang=%s -- job_id=%s', video_id, lang_code, job_id) return process_metadata.status = TranscriptStatus.READY process_metadata.save() try: sjson = convert_srt_to_sjson(srt_data) sjson_file_name = upload_sjson_to_s3(CONFIG, sjson) except Exception: LOGGER.exception( '[CIELO24 TRANSCRIPTS] Request failed for video=%s -- lang=%s -- job_id=%s.', video_id, lang_code, job_id) raise # update edx-val with completed transcript information val_api = VALAPICall(process_metadata.video, val_status=None) val_api.update_val_transcript( video_id=process_metadata.video.studio_id, lang_code=lang_code, name=sjson_file_name, transcript_format=TRANSCRIPT_SJSON, provider=TranscriptProvider.CIELO24) # update transcript status for video in edx-val only if all langauge transcripts are ready video_jobs = TranscriptProcessMetadata.objects.filter( video__studio_id=video_id) if all(video_job.status == TranscriptStatus.READY for video_job in video_jobs): utils.update_video_status(val_api_client=val_api, video=process_metadata.video, status=TranscriptStatus.READY)