def validate_transcript_upload_data(request): """ Validates video transcript file. Arguments: request: A WSGI request's data part. Returns: Tuple containing an error and validated data If there is a validation error then, validated data will be empty. """ error, validated_data = None, {} data, files = request.POST, request.FILES video_locator = data.get('locator') edx_video_id = data.get('edx_video_id') if not video_locator: error = _(u'Video locator is required.') elif 'transcript-file' not in files: error = _(u'A transcript file is required.') elif os.path.splitext(files['transcript-file'].name)[1][1:] != Transcript.SRT: error = _(u'This transcript file type is not supported.') elif 'edx_video_id' not in data: error = _(u'Video ID is required.') if not error: error, video = validate_video_module(request, video_locator) if not error: validated_data.update({ 'video': video, 'edx_video_id': clean_video_id(edx_video_id) or clean_video_id(video.edx_video_id), 'transcript_file': files['transcript-file'] }) return error, validated_data
def validate_transcript_upload_data(request): """ Validates video transcript file. Arguments: request: A WSGI request's data part. Returns: Tuple containing an error and validated data If there is a validation error then, validated data will be empty. """ error, validated_data = None, {} data, files = request.POST, request.FILES video_locator = data.get('locator') edx_video_id = data.get('edx_video_id') if not video_locator: error = _(u'Video locator is required.') elif 'transcript-file' not in files: error = _(u'A transcript file is required.') elif os.path.splitext(files['transcript-file'].name)[1][1:] != Transcript.SRT: error = _(u'This transcript file type is not supported.') elif 'edx_video_id' not in data: error = _(u'Video ID is required.') if not error: error, video = validate_video_module(request, video_locator) if not error: validated_data.update({ 'video': video, 'edx_video_id': clean_video_id(edx_video_id) or clean_video_id(video.edx_video_id), 'transcript_file': files['transcript-file'] }) return error, validated_data
def save_transcript_to_storage(edx_video_id, language_code, transcript_content, file_format=Transcript.SJSON, force_update=False): """ Pushes a given transcript's data to django storage. """ try: result = None edx_video_id = clean_video_id(edx_video_id) if force_update: result = create_or_update_video_transcript( edx_video_id, language_code, dict({'file_format': file_format}), ContentFile(transcript_content)) LOGGER.info( "[Transcript migration] save_transcript_to_storage %s for %s with create_or_update method", True if result else False, edx_video_id) else: result = create_video_transcript(edx_video_id, language_code, file_format, ContentFile(transcript_content)) LOGGER.info( "[Transcript migration] save_transcript_to_storage %s for %s with create method", result, edx_video_id) return result except ValCannotCreateError as err: LOGGER.exception( "[Transcript migration] save_transcript_to_storage_failed: %s", err) raise
def test_get_transcript_val_exceptions(self, exception_to_raise, mock_Transcript): """ Verify that `get_transcript_from_val` function raises `NotFoundError` when specified exceptions raised. """ mock_Transcript.convert.side_effect = exception_to_raise transcripts_info = self.video.get_transcripts_info() lang = self.video.get_default_transcript_language(transcripts_info) edx_video_id = transcripts_utils.clean_video_id(self.video.edx_video_id) with self.assertRaises(NotFoundError): transcripts_utils.get_transcript_from_val( edx_video_id, lang=lang, output_format=transcripts_utils.Transcript.SRT )
def async_migrate_transcript(self, course_key, **kwargs): #pylint: disable=unused-argument """ Migrates the transcripts of all videos in a course as a new celery task. """ try: if not modulestore().get_course(CourseKey.from_string(course_key)): raise KeyError(u'Invalid course key: ' + unicode(course_key)) except KeyError as exc: LOGGER.exception('[Transcript migration] Exception: %r', text_type(exc)) return 'Failed: course {course_key} with exception {exception}'.format( course_key=course_key, exception=text_type(exc) ) force_update = kwargs['force_update'] sub_tasks = [] all_videos = get_videos_from_store(CourseKey.from_string(course_key)) LOGGER.info( "[Transcript migration] process for course %s started. Migrating %s videos", course_key, len(all_videos) ) for video in all_videos: all_lang_transcripts = video.transcripts english_transcript = video.sub if english_transcript: all_lang_transcripts.update({'en': video.sub}) for lang, _ in all_lang_transcripts.items(): transcript_already_present = is_transcript_available( clean_video_id(video.edx_video_id), lang ) if transcript_already_present and force_update: sub_tasks.append(async_migrate_transcript_subtask.s( video, lang, True, **kwargs )) elif not transcript_already_present: sub_tasks.append(async_migrate_transcript_subtask.s( video, lang, False, **kwargs )) LOGGER.info("[Transcript migration] Migrating %s transcripts", len(sub_tasks)) callback = task_status_callback.s() status = chord(sub_tasks)(callback) LOGGER.info( "[Transcript migration] process for course %s ended. Processed %s transcripts", course_key, len(status.get()) ) return status.get()
def test_get_transcript_val_exceptions(self, exception_to_raise, mock_Transcript): """ Verify that `get_transcript_from_val` function raises `NotFoundError` when specified exceptions raised. """ mock_Transcript.convert.side_effect = exception_to_raise transcripts_info = self.video.get_transcripts_info() lang = self.video.get_default_transcript_language(transcripts_info) edx_video_id = transcripts_utils.clean_video_id(self.video.edx_video_id) with self.assertRaises(NotFoundError): transcripts_utils.get_transcript_from_val( edx_video_id, lang=lang, output_format=transcripts_utils.Transcript.SRT )
def link_video_to_component(video_component, user): """ Links a VAL video to the video component. Arguments: video_component: video descriptor item. user: A requesting user. Returns: A cleaned Video ID. """ edx_video_id = clean_video_id(video_component.edx_video_id) if not edx_video_id: edx_video_id = create_external_video(display_name=u'external video') video_component.edx_video_id = edx_video_id video_component.save_with_metadata(user) return edx_video_id
def link_video_to_component(video_component, user): """ Links a VAL video to the video component. Arguments: video_component: video descriptor item. user: A requesting user. Returns: A cleaned Video ID. """ edx_video_id = clean_video_id(video_component.edx_video_id) if not edx_video_id: edx_video_id = create_external_video(display_name=u'external video') video_component.edx_video_id = edx_video_id video_component.save_with_metadata(user) return edx_video_id
def async_migrate_transcript(self, course_key, **kwargs): #pylint: disable=unused-argument """ Migrates the transcripts of all videos in a course as a new celery task. """ try: if not modulestore().get_course(CourseKey.from_string(course_key)): raise KeyError(u'Invalid course key: ' + unicode(course_key)) except KeyError as exc: LOGGER.exception('[Transcript migration] Exception: %r', text_type(exc)) return 'Failed: course {course_key} with exception {exception}'.format( course_key=course_key, exception=text_type(exc)) force_update = kwargs['force_update'] sub_tasks = [] all_videos = get_videos_from_store(CourseKey.from_string(course_key)) LOGGER.info( "[Transcript migration] process for course %s started. Migrating %s videos", course_key, len(all_videos)) for video in all_videos: all_lang_transcripts = video.transcripts english_transcript = video.sub if english_transcript: all_lang_transcripts.update({'en': video.sub}) for lang, _ in all_lang_transcripts.items(): transcript_already_present = is_transcript_available( clean_video_id(video.edx_video_id), lang) if transcript_already_present and force_update: sub_tasks.append( async_migrate_transcript_subtask.s(video, lang, True, **kwargs)) elif not transcript_already_present: sub_tasks.append( async_migrate_transcript_subtask.s(video, lang, False, **kwargs)) LOGGER.info("[Transcript migration] Migrating %s transcripts", len(sub_tasks)) callback = task_status_callback.s() status = chord(sub_tasks)(callback) LOGGER.info( "[Transcript migration] process for course %s ended. Processed %s transcripts", course_key, len(status.get())) return status.get()
def async_migrate_transcript_subtask(self, *args, **kwargs): #pylint: disable=unused-argument """ Migrates a transcript of a given video in a course as a new celery task. """ video, language_code, force_update = args commit = kwargs['commit'] result = None if commit is not True: return 'Language {0} transcript of video {1} will be migrated'.format( language_code, video.edx_video_id ) LOGGER.info("[Transcript migration] process for %s transcript started", language_code) try: transcript_info = video.get_transcripts_info() transcript_content, _, _ = get_transcript_from_contentstore( video, language_code, Transcript.SJSON, transcript_info) edx_video_id = clean_video_id(video.edx_video_id) if not edx_video_id: video.edx_video_id = create_external_video('external-video') video.save_with_metadata(user=User.objects.get(username='******')) if edx_video_id: result = save_transcript_to_storage( edx_video_id, language_code, transcript_content, Transcript.SJSON, force_update ) except (NotFoundError, TranscriptsGenerationException, ValCannotCreateError) as exc: LOGGER.exception('[Transcript migration] Exception: %r', text_type(exc)) return 'Failed: language {language} of video {video} with exception {exception}'.format( language=language_code, video=video.edx_video_id, exception=text_type(exc) ) LOGGER.info("[Transcript migration] process for %s transcript ended", language_code) if result is not None: return 'Success: language {0} of video {1}'.format(language_code, video.edx_video_id) else: return 'Failed: language {0} of video {1}'.format(language_code, video.edx_video_id)
def async_migrate_transcript_subtask(self, *args, **kwargs): #pylint: disable=unused-argument """ Migrates a transcript of a given video in a course as a new celery task. """ video, language_code, force_update = args commit = kwargs['commit'] result = None if commit is not True: return 'Language {0} transcript of video {1} will be migrated'.format( language_code, video.edx_video_id) LOGGER.info("[Transcript migration] process for %s transcript started", language_code) try: transcript_info = video.get_transcripts_info() transcript_content, _, _ = get_transcript_from_contentstore( video, language_code, Transcript.SJSON, transcript_info) edx_video_id = clean_video_id(video.edx_video_id) if not edx_video_id: video.edx_video_id = create_external_video('external-video') video.save_with_metadata(user=User.objects.get(username='******')) if edx_video_id: result = save_transcript_to_storage(edx_video_id, language_code, transcript_content, Transcript.SJSON, force_update) except (NotFoundError, TranscriptsGenerationException, ValCannotCreateError) as exc: LOGGER.exception('[Transcript migration] Exception: %r', text_type(exc)) return 'Failed: language {language} of video {video} with exception {exception}'.format( language=language_code, video=video.edx_video_id, exception=text_type(exc)) LOGGER.info("[Transcript migration] process for %s transcript ended", language_code) if result is not None: return 'Success: language {0} of video {1}'.format( language_code, video.edx_video_id) else: return 'Failed: language {0} of video {1}'.format( language_code, video.edx_video_id)
def _validate_transcripts_data(request): """ Validates, that request contains all proper data for transcripts processing. Returns tuple of 3 elements:: data: dict, loaded json from request, videos: parsed `data` to useful format, item: video item from storage Raises `TranscriptsRequestValidationException` if validation is unsuccessful or `PermissionDenied` if user has no access. """ data = json.loads(request.GET.get('data', '{}')) if not data: raise TranscriptsRequestValidationException( _('Incoming video data is empty.')) try: item = _get_item(request, data) except (InvalidKeyError, ItemNotFoundError): raise TranscriptsRequestValidationException( _("Can't find item by locator.")) if item.category != 'video': raise TranscriptsRequestValidationException( _('Transcripts are supported only for "video" modules.')) # parse data form request.GET.['data']['video'] to useful format videos = {'youtube': '', 'html5': {}} for video_data in data.get('videos'): if video_data['type'] == 'youtube': videos['youtube'] = video_data['video'] elif video_data['type'] == 'edx_video_id': if clean_video_id(video_data['video']): videos['edx_video_id'] = video_data['video'] else: # do not add same html5 videos if videos['html5'].get('video') != video_data['video']: videos['html5'][video_data['video']] = video_data['mode'] return data, videos, item
def _validate_transcripts_data(request): """ Validates, that request contains all proper data for transcripts processing. Returns tuple of 3 elements:: data: dict, loaded json from request, videos: parsed `data` to useful format, item: video item from storage Raises `TranscriptsRequestValidationException` if validation is unsuccessful or `PermissionDenied` if user has no access. """ data = json.loads(request.GET.get('data', '{}')) if not data: raise TranscriptsRequestValidationException(_('Incoming video data is empty.')) try: item = _get_item(request, data) except (InvalidKeyError, ItemNotFoundError): raise TranscriptsRequestValidationException(_("Can't find item by locator.")) if item.category != 'video': raise TranscriptsRequestValidationException(_('Transcripts are supported only for "video" modules.')) # parse data form request.GET.['data']['video'] to useful format videos = {'youtube': '', 'html5': {}} for video_data in data.get('videos'): if video_data['type'] == 'youtube': videos['youtube'] = video_data['video'] elif video_data['type'] == 'edx_video_id': if clean_video_id(video_data['video']): videos['edx_video_id'] = video_data['video'] else: # do not add same html5 videos if videos['html5'].get('video') != video_data['video']: videos['html5'][video_data['video']] = video_data['mode'] return data, videos, item
def save_transcript_to_storage( edx_video_id, language_code, transcript_content, file_format=Transcript.SJSON, force_update=False ): """ Pushes a given transcript's data to django storage. """ try: result = None edx_video_id = clean_video_id(edx_video_id) if force_update: result = create_or_update_video_transcript( edx_video_id, language_code, dict({'file_format': file_format}), ContentFile(transcript_content) ) LOGGER.info("[Transcript migration] save_transcript_to_storage %s for %s with create_or_update method", True if result else False, edx_video_id) else: result = create_video_transcript( edx_video_id, language_code, file_format, ContentFile(transcript_content) ) LOGGER.info( "[Transcript migration] save_transcript_to_storage %s for %s with create method", result, edx_video_id ) return result except ValCannotCreateError as err: LOGGER.exception("[Transcript migration] save_transcript_to_storage_failed: %s", err) raise
def check_transcripts(request): """ Check state of transcripts availability. request.GET['data'] has key `videos`, which can contain any of the following:: [ {u'type': u'youtube', u'video': u'OEoXaMPEzfM', u'mode': u'youtube'}, {u'type': u'html5', u'video': u'video1', u'mode': u'mp4'} {u'type': u'html5', u'video': u'video2', u'mode': u'webm'} ] `type` is youtube or html5 `video` is html5 or youtube video_id `mode` is youtube, ,p4 or webm Returns transcripts_presence dict:: html5_local: list of html5 ids, if subtitles exist locally for them; is_youtube_mode: bool, if we have youtube_id, and as youtube mode is of higher priority, reflect this with flag; youtube_local: bool, if youtube transcripts exist locally; youtube_server: bool, if youtube transcripts exist on server; youtube_diff: bool, if youtube transcripts exist on youtube server, and are different from local youtube ones; current_item_subs: string, value of item.sub field; status: string, 'Error' or 'Success'; subs: string, new value of item.sub field, that should be set in module; command: string, action to front-end what to do and what to show to user. """ transcripts_presence = { 'html5_local': [], 'html5_equal': False, 'is_youtube_mode': False, 'youtube_local': False, 'youtube_server': False, 'youtube_diff': True, 'current_item_subs': None, 'status': 'Error', } try: __, videos, item = _validate_transcripts_data(request) except TranscriptsRequestValidationException as e: return error_response(transcripts_presence, text_type(e)) transcripts_presence['status'] = 'Success' try: edx_video_id = clean_video_id(videos.get('edx_video_id')) get_transcript_from_val(edx_video_id=edx_video_id, lang=u'en') command = 'found' except NotFoundError: filename = 'subs_{0}.srt.sjson'.format(item.sub) content_location = StaticContent.compute_location(item.location.course_key, filename) try: local_transcripts = contentstore().find(content_location).data transcripts_presence['current_item_subs'] = item.sub except NotFoundError: pass # Check for youtube transcripts presence youtube_id = videos.get('youtube', None) if youtube_id: transcripts_presence['is_youtube_mode'] = True # youtube local filename = 'subs_{0}.srt.sjson'.format(youtube_id) content_location = StaticContent.compute_location(item.location.course_key, filename) try: local_transcripts = contentstore().find(content_location).data transcripts_presence['youtube_local'] = True except NotFoundError: log.debug(u"Can't find transcripts in storage for youtube id: %s", youtube_id) # youtube server youtube_text_api = copy.deepcopy(settings.YOUTUBE['TEXT_API']) youtube_text_api['params']['v'] = youtube_id youtube_transcript_name = youtube_video_transcript_name(youtube_text_api) if youtube_transcript_name: youtube_text_api['params']['name'] = youtube_transcript_name youtube_response = requests.get('http://' + youtube_text_api['url'], params=youtube_text_api['params']) if youtube_response.status_code == 200 and youtube_response.text: transcripts_presence['youtube_server'] = True #check youtube local and server transcripts for equality if transcripts_presence['youtube_server'] and transcripts_presence['youtube_local']: try: youtube_server_subs = get_transcripts_from_youtube( youtube_id, settings, item.runtime.service(item, "i18n") ) if json.loads(local_transcripts) == youtube_server_subs: # check transcripts for equality transcripts_presence['youtube_diff'] = False except GetTranscriptsFromYouTubeException: pass # Check for html5 local transcripts presence html5_subs = [] for html5_id in videos['html5']: filename = 'subs_{0}.srt.sjson'.format(html5_id) content_location = StaticContent.compute_location(item.location.course_key, filename) try: html5_subs.append(contentstore().find(content_location).data) transcripts_presence['html5_local'].append(html5_id) except NotFoundError: log.debug(u"Can't find transcripts in storage for non-youtube video_id: %s", html5_id) if len(html5_subs) == 2: # check html5 transcripts for equality transcripts_presence['html5_equal'] = json.loads(html5_subs[0]) == json.loads(html5_subs[1]) command, __ = _transcripts_logic(transcripts_presence, videos) transcripts_presence.update({'command': command}) return JsonResponse(transcripts_presence)
def check_transcripts(request): """ Check state of transcripts availability. request.GET['data'] has key `videos`, which can contain any of the following:: [ {u'type': u'youtube', u'video': u'OEoXaMPEzfM', u'mode': u'youtube'}, {u'type': u'html5', u'video': u'video1', u'mode': u'mp4'} {u'type': u'html5', u'video': u'video2', u'mode': u'webm'} ] `type` is youtube or html5 `video` is html5 or youtube video_id `mode` is youtube, ,p4 or webm Returns transcripts_presence dict:: html5_local: list of html5 ids, if subtitles exist locally for them; is_youtube_mode: bool, if we have youtube_id, and as youtube mode is of higher priority, reflect this with flag; youtube_local: bool, if youtube transcripts exist locally; youtube_server: bool, if youtube transcripts exist on server; youtube_diff: bool, if youtube transcripts exist on youtube server, and are different from local youtube ones; current_item_subs: string, value of item.sub field; status: string, 'Error' or 'Success'; subs: string, new value of item.sub field, that should be set in module; command: string, action to front-end what to do and what to show to user. """ transcripts_presence = { 'html5_local': [], 'html5_equal': False, 'is_youtube_mode': False, 'youtube_local': False, 'youtube_server': False, 'youtube_diff': True, 'current_item_subs': None, 'status': 'Error', } try: __, videos, item = _validate_transcripts_data(request) except TranscriptsRequestValidationException as e: return error_response(transcripts_presence, text_type(e)) transcripts_presence['status'] = 'Success' try: edx_video_id = clean_video_id(videos.get('edx_video_id')) get_transcript_from_val(edx_video_id=edx_video_id, lang=u'en') command = 'found' except NotFoundError: filename = 'subs_{0}.srt.sjson'.format(item.sub) content_location = StaticContent.compute_location( item.location.course_key, filename) try: local_transcripts = contentstore().find( content_location).data.decode('utf-8') transcripts_presence['current_item_subs'] = item.sub except NotFoundError: pass # Check for youtube transcripts presence youtube_id = videos.get('youtube', None) if youtube_id: transcripts_presence['is_youtube_mode'] = True # youtube local filename = 'subs_{0}.srt.sjson'.format(youtube_id) content_location = StaticContent.compute_location( item.location.course_key, filename) try: local_transcripts = contentstore().find( content_location).data.decode('utf-8') transcripts_presence['youtube_local'] = True except NotFoundError: log.debug( u"Can't find transcripts in storage for youtube id: %s", youtube_id) # youtube server youtube_text_api = copy.deepcopy(settings.YOUTUBE['TEXT_API']) youtube_text_api['params']['v'] = youtube_id youtube_transcript_name = youtube_video_transcript_name( youtube_text_api) if youtube_transcript_name: youtube_text_api['params']['name'] = youtube_transcript_name youtube_response = requests.get('http://' + youtube_text_api['url'], params=youtube_text_api['params']) if youtube_response.status_code == 200 and youtube_response.text: transcripts_presence['youtube_server'] = True #check youtube local and server transcripts for equality if transcripts_presence['youtube_server'] and transcripts_presence[ 'youtube_local']: try: youtube_server_subs = get_transcripts_from_youtube( youtube_id, settings, item.runtime.service(item, "i18n")) if json.loads( local_transcripts ) == youtube_server_subs: # check transcripts for equality transcripts_presence['youtube_diff'] = False except GetTranscriptsFromYouTubeException: pass # Check for html5 local transcripts presence html5_subs = [] for html5_id in videos['html5']: filename = 'subs_{0}.srt.sjson'.format(html5_id) content_location = StaticContent.compute_location( item.location.course_key, filename) try: html5_subs.append(contentstore().find(content_location).data) transcripts_presence['html5_local'].append(html5_id) except NotFoundError: log.debug( u"Can't find transcripts in storage for non-youtube video_id: %s", html5_id) if len(html5_subs) == 2: # check html5 transcripts for equality transcripts_presence['html5_equal'] = (json.loads( html5_subs[0].decode('utf-8')) == json.loads( html5_subs[1].decode('utf-8'))) command, __ = _transcripts_logic(transcripts_presence, videos) transcripts_presence.update({'command': command}) return JsonResponse(transcripts_presence)
def async_migrate_transcript_subtask(self, *args, **kwargs): # pylint: disable=unused-argument """ Migrates a transcript of a given video in a course as a new celery task. """ success, failure = 'Success', 'Failure' video_location, revision, language_code, force_update = args command_run = kwargs['command_run'] store = modulestore() video = store.get_item( usage_key=BlockUsageLocator.from_string(video_location), revision=revision) edx_video_id = clean_video_id(video.edx_video_id) if not kwargs['commit']: LOGGER.info( ('[%s] [run=%s] [video-transcript-will-be-migrated] ' '[revision=%s] [video=%s] [edx_video_id=%s] [language_code=%s]'), MIGRATION_LOGS_PREFIX, command_run, revision, video_location, edx_video_id, language_code) return success LOGGER.info(( '[%s] [run=%s] [transcripts-migration-process-started-for-video-transcript] [revision=%s] ' '[video=%s] [edx_video_id=%s] [language_code=%s]'), MIGRATION_LOGS_PREFIX, command_run, revision, video_location, edx_video_id, language_code) try: transcripts_info = video.get_transcripts_info() transcript_content, _, _ = get_transcript_from_contentstore( video=video, language=language_code, output_format=Transcript.SJSON, transcripts_info=transcripts_info, ) is_video_valid = edx_video_id and is_video_available(edx_video_id) if not is_video_valid: edx_video_id = create_external_video('external-video') video.edx_video_id = edx_video_id # determine branch published/draft branch_setting = (ModuleStoreEnum.Branch.published_only if revision == ModuleStoreEnum.RevisionOption.published_only else ModuleStoreEnum.Branch.draft_preferred) with store.branch_setting(branch_setting): store.update_item(video, ModuleStoreEnum.UserID.mgmt_command) LOGGER.info( '[%s] [run=%s] [generated-edx-video-id] [revision=%s] [video=%s] [edx_video_id=%s] [language_code=%s]', MIGRATION_LOGS_PREFIX, command_run, revision, video_location, edx_video_id, language_code) save_transcript_to_storage( command_run=command_run, edx_video_id=edx_video_id, language_code=language_code, transcript_content=transcript_content, file_format=Transcript.SJSON, force_update=force_update, ) except (NotFoundError, TranscriptsGenerationException, ValCannotCreateError): LOGGER.exception(( '[%s] [run=%s] [video-transcript-migration-failed-with-known-exc] [revision=%s] [video=%s] ' '[edx_video_id=%s] [language_code=%s]'), MIGRATION_LOGS_PREFIX, command_run, revision, video_location, edx_video_id, language_code) return failure except Exception: LOGGER.exception(( '[%s] [run=%s] [video-transcript-migration-failed-with-unknown-exc] [revision=%s] ' '[video=%s] [edx_video_id=%s] [language_code=%s]'), MIGRATION_LOGS_PREFIX, command_run, revision, video_location, edx_video_id, language_code) raise LOGGER.info(( '[%s] [run=%s] [video-transcript-migration-succeeded-for-a-video] [revision=%s] ' '[video=%s] [edx_video_id=%s] [language_code=%s]'), MIGRATION_LOGS_PREFIX, command_run, revision, video_location, edx_video_id, language_code) return success
def async_migrate_transcript_subtask(self, *args, **kwargs): # pylint: disable=unused-argument """ Migrates a transcript of a given video in a course as a new celery task. """ video_location, language_code, force_update = args store = modulestore() video = store.get_item(usage_key=BlockUsageLocator.from_string(video_location)) commit = kwargs['commit'] if not commit: return 'Language {language_code} transcript of video {edx_video_id} will be migrated'.format( language_code=language_code, edx_video_id=video.edx_video_id ) # Start transcript's migration edx_video_id = clean_video_id(video.edx_video_id) LOGGER.info( "[Transcript migration] migration process is started for video [%s] language [%s].", edx_video_id, language_code ) try: transcripts_info = video.get_transcripts_info() transcript_content, _, _ = get_transcript_from_contentstore( video=video, language=language_code, output_format=Transcript.SJSON, transcripts_info=transcripts_info, ) if not edx_video_id: edx_video_id = create_external_video('external-video') video.edx_video_id = edx_video_id store.update_item(video, ModuleStoreEnum.UserID.mgmt_command) save_transcript_to_storage( edx_video_id=edx_video_id, language_code=language_code, transcript_content=transcript_content, file_format=Transcript.SJSON, force_update=force_update, ) except (NotFoundError, TranscriptsGenerationException, ValCannotCreateError) as exc: LOGGER.exception( '[Transcript migration] transcript migration failed for video [%s] and language [%s].', edx_video_id, language_code ) message = 'Failed: language {language} of video {video} with exception {exception}'.format( language=language_code, video=video.edx_video_id, exception=text_type(exc) ) except Exception: LOGGER.exception( '[Transcript migration] transcript migration failed for video [%s] and language [%s].', edx_video_id, language_code ) raise else: message = ( 'Success: transcript (language: {language_code}, edx_video_id: {edx_video_id}) has been migrated ' 'for video [{location}].' ).format(edx_video_id=edx_video_id, language_code=language_code, location=unicode(video.location)) return message
def async_migrate_transcript_subtask(self, *args, **kwargs): # pylint: disable=unused-argument """ Migrates a transcript of a given video in a course as a new celery task. """ success, failure = 'Success', 'Failure' video_location, revision, language_code, force_update = args command_run = kwargs['command_run'] store = modulestore() video = store.get_item(usage_key=BlockUsageLocator.from_string(video_location), revision=revision) edx_video_id = clean_video_id(video.edx_video_id) if not kwargs['commit']: LOGGER.info( ('[%s] [run=%s] [video-transcript-will-be-migrated] ' '[revision=%s] [video=%s] [edx_video_id=%s] [language_code=%s]'), MIGRATION_LOGS_PREFIX, command_run, revision, video_location, edx_video_id, language_code ) return success LOGGER.info( ('[%s] [run=%s] [transcripts-migration-process-started-for-video-transcript] [revision=%s] ' '[video=%s] [edx_video_id=%s] [language_code=%s]'), MIGRATION_LOGS_PREFIX, command_run, revision, video_location, edx_video_id, language_code ) try: transcripts_info = video.get_transcripts_info() transcript_content, _, _ = get_transcript_from_contentstore( video=video, language=language_code, output_format=Transcript.SJSON, transcripts_info=transcripts_info, ) is_video_valid = edx_video_id and is_video_available(edx_video_id) if not is_video_valid: edx_video_id = create_external_video('external-video') video.edx_video_id = edx_video_id # determine branch published/draft branch_setting = ( ModuleStoreEnum.Branch.published_only if revision == ModuleStoreEnum.RevisionOption.published_only else ModuleStoreEnum.Branch.draft_preferred ) with store.branch_setting(branch_setting): store.update_item(video, ModuleStoreEnum.UserID.mgmt_command) LOGGER.info( '[%s] [run=%s] [generated-edx-video-id] [revision=%s] [video=%s] [edx_video_id=%s] [language_code=%s]', MIGRATION_LOGS_PREFIX, command_run, revision, video_location, edx_video_id, language_code ) save_transcript_to_storage( command_run=command_run, edx_video_id=edx_video_id, language_code=language_code, transcript_content=transcript_content, file_format=Transcript.SJSON, force_update=force_update, ) except (NotFoundError, TranscriptsGenerationException, ValCannotCreateError): LOGGER.exception( ('[%s] [run=%s] [video-transcript-migration-failed-with-known-exc] [revision=%s] [video=%s] ' '[edx_video_id=%s] [language_code=%s]'), MIGRATION_LOGS_PREFIX, command_run, revision, video_location, edx_video_id, language_code ) return failure except Exception: LOGGER.exception( ('[%s] [run=%s] [video-transcript-migration-failed-with-unknown-exc] [revision=%s] ' '[video=%s] [edx_video_id=%s] [language_code=%s]'), MIGRATION_LOGS_PREFIX, command_run, revision, video_location, edx_video_id, language_code ) raise LOGGER.info( ('[%s] [run=%s] [video-transcript-migration-succeeded-for-a-video] [revision=%s] ' '[video=%s] [edx_video_id=%s] [language_code=%s]'), MIGRATION_LOGS_PREFIX, command_run, revision, video_location, edx_video_id, language_code ) return success