def test_migrated_transcripts_count_with_commit(self): """ Test migrating transcripts with commit """ # check that transcript does not exist languages = api.get_available_transcript_languages( self.video_descriptor.edx_video_id) self.assertEqual(len(languages), 0) self.assertFalse( api.is_transcript_available(self.video_descriptor.edx_video_id, 'hr')) self.assertFalse( api.is_transcript_available(self.video_descriptor.edx_video_id, 'ge')) # now call migrate_transcripts command and check the transcript availability call_command('migrate_transcripts', '--course-id', unicode(self.course.id), '--commit') languages = api.get_available_transcript_languages( self.video_descriptor.edx_video_id) self.assertEqual(len(languages), 2) self.assertTrue( api.is_transcript_available(self.video_descriptor.edx_video_id, 'hr')) self.assertTrue( api.is_transcript_available(self.video_descriptor.edx_video_id, 'ge'))
def test_migrated_transcripts_without_commit(self): """ Test migrating transcripts as a dry-run """ # check that transcripts do not exist languages = api.get_available_transcript_languages( self.video_descriptor.edx_video_id) self.assertEqual(len(languages), 0) self.assertFalse( api.is_transcript_available(self.video_descriptor.edx_video_id, 'hr')) self.assertFalse( api.is_transcript_available(self.video_descriptor.edx_video_id, 'ge')) # now call migrate_transcripts command and check the transcript availability call_command('migrate_transcripts', '--course-id', unicode(self.course.id)) # check that transcripts still do not exist languages = api.get_available_transcript_languages( self.video_descriptor.edx_video_id) self.assertEqual(len(languages), 0) self.assertFalse( api.is_transcript_available(self.video_descriptor.edx_video_id, 'hr')) self.assertFalse( api.is_transcript_available(self.video_descriptor.edx_video_id, 'ge'))
def test_migrate_transcripts_idempotency(self): """ Test migrating transcripts multiple times """ translations = self.video_descriptor.available_translations(self.video_descriptor.get_transcripts_info()) self.assertItemsEqual(translations, ['hr', 'ge']) self.assertFalse(api.is_transcript_available(self.video_descriptor.edx_video_id, 'hr')) self.assertFalse(api.is_transcript_available(self.video_descriptor.edx_video_id, 'ge')) # now call migrate_transcripts command and check the transcript availability call_command('migrate_transcripts', '--course-id', six.text_type(self.course.id), '--commit') self.assertTrue(api.is_transcript_available(self.video_descriptor.edx_video_id, 'hr')) self.assertTrue(api.is_transcript_available(self.video_descriptor.edx_video_id, 'ge')) # now call migrate_transcripts command again and check the transcript availability call_command('migrate_transcripts', '--course-id', six.text_type(self.course.id), '--commit') self.assertTrue(api.is_transcript_available(self.video_descriptor.edx_video_id, 'hr')) self.assertTrue(api.is_transcript_available(self.video_descriptor.edx_video_id, 'ge')) # now call migrate_transcripts command with --force-update and check the transcript availability call_command('migrate_transcripts', '--course-id', six.text_type(self.course.id), '--force-update', '--commit') self.assertTrue(api.is_transcript_available(self.video_descriptor.edx_video_id, 'hr')) self.assertTrue(api.is_transcript_available(self.video_descriptor.edx_video_id, 'ge'))
def test_migrate_transcripts_idempotency(self): """ Test migrating transcripts multiple times """ translations = self.video_descriptor.available_translations(self.video_descriptor.get_transcripts_info()) self.assertItemsEqual(translations, ['hr', 'ge']) self.assertFalse(api.is_transcript_available(self.video_descriptor.edx_video_id, 'hr')) self.assertFalse(api.is_transcript_available(self.video_descriptor.edx_video_id, 'ge')) # now call migrate_transcripts command and check the transcript availability call_command('migrate_transcripts', '--course-id', unicode(self.course.id), '--commit') self.assertTrue(api.is_transcript_available(self.video_descriptor.edx_video_id, 'hr')) self.assertTrue(api.is_transcript_available(self.video_descriptor.edx_video_id, 'ge')) # now call migrate_transcripts command again and check the transcript availability call_command('migrate_transcripts', '--course-id', unicode(self.course.id), '--commit') self.assertTrue(api.is_transcript_available(self.video_descriptor.edx_video_id, 'hr')) self.assertTrue(api.is_transcript_available(self.video_descriptor.edx_video_id, 'ge')) # now call migrate_transcripts command with --force-update and check the transcript availability call_command('migrate_transcripts', '--course-id', unicode(self.course.id), '--force-update', '--commit') self.assertTrue(api.is_transcript_available(self.video_descriptor.edx_video_id, 'hr')) self.assertTrue(api.is_transcript_available(self.video_descriptor.edx_video_id, 'ge'))
def save_transcript_to_storage(command_run, edx_video_id, language_code, transcript_content, file_format, force_update): """ Pushes a given transcript's data to django storage. Arguments: command_run: A positive integer indicating the current run edx_video_id: video ID language_code: language code transcript_content: content of the transcript file_format: format of the transcript file force_update: tells whether it needs to perform force update in case of an existing transcript for the given video. """ transcript_present = is_transcript_available(video_id=edx_video_id, language_code=language_code) if transcript_present and force_update: create_or_update_video_transcript(edx_video_id, language_code, dict({'file_format': file_format}), ContentFile(transcript_content)) elif not transcript_present: create_video_transcript(edx_video_id, language_code, file_format, ContentFile(transcript_content)) else: LOGGER.info( "[%s] [run=%s] [do-not-override-existing-transcript] [edx_video_id=%s] [language_code=%s]", MIGRATION_LOGS_PREFIX, command_run, edx_video_id, language_code)
def save_transcript_to_storage(command_run, edx_video_id, language_code, transcript_content, file_format, force_update): """ Pushes a given transcript's data to django storage. Arguments: command_run: A positive integer indicating the current run edx_video_id: video ID language_code: language code transcript_content: content of the transcript file_format: format of the transcript file force_update: tells whether it needs to perform force update in case of an existing transcript for the given video. """ transcript_present = is_transcript_available(video_id=edx_video_id, language_code=language_code) if transcript_present and force_update: create_or_update_video_transcript( edx_video_id, language_code, dict({'file_format': file_format}), ContentFile(transcript_content) ) elif not transcript_present: create_video_transcript( edx_video_id, language_code, file_format, ContentFile(transcript_content) ) else: LOGGER.info( "[%s] [run=%s] [do-not-override-existing-transcript] [edx_video_id=%s] [language_code=%s]", MIGRATION_LOGS_PREFIX, command_run, edx_video_id, language_code )
def save_transcript_to_storage(edx_video_id, language_code, transcript_content, file_format, force_update): """ Pushes a given transcript's data to django storage. Arguments: edx_video_id: video ID language_code: language code transcript_content: content of the transcript file_format: format of the transcript file force_update: tells whether it needs to perform force update in case of an existing transcript for the given video. """ transcript_present = is_transcript_available(video_id=edx_video_id, language_code=language_code) if transcript_present and force_update: create_or_update_video_transcript( edx_video_id, language_code, dict({'file_format': file_format}), ContentFile(transcript_content) ) elif not transcript_present: create_video_transcript( edx_video_id, language_code, file_format, ContentFile(transcript_content) )
def test_migrated_transcripts_count_with_commit(self): """ Test migrating transcripts with commit """ # check that transcript does not exist languages = api.get_available_transcript_languages(self.video_descriptor.edx_video_id) self.assertEqual(len(languages), 0) self.assertFalse(api.is_transcript_available(self.video_descriptor.edx_video_id, 'hr')) self.assertFalse(api.is_transcript_available(self.video_descriptor.edx_video_id, 'ge')) # now call migrate_transcripts command and check the transcript availability call_command('migrate_transcripts', '--course-id', unicode(self.course.id), '--commit') languages = api.get_available_transcript_languages(self.video_descriptor.edx_video_id) self.assertEqual(len(languages), 2) self.assertTrue(api.is_transcript_available(self.video_descriptor.edx_video_id, 'hr')) self.assertTrue(api.is_transcript_available(self.video_descriptor.edx_video_id, 'ge'))
def test_migrated_transcripts_without_commit(self): """ Test migrating transcripts as a dry-run """ # check that transcripts do not exist languages = api.get_available_transcript_languages(self.video_descriptor.edx_video_id) self.assertEqual(len(languages), 0) self.assertFalse(api.is_transcript_available(self.video_descriptor.edx_video_id, 'hr')) self.assertFalse(api.is_transcript_available(self.video_descriptor.edx_video_id, 'ge')) # now call migrate_transcripts command and check the transcript availability call_command('migrate_transcripts', '--course-id', unicode(self.course.id)) # check that transcripts still do not exist languages = api.get_available_transcript_languages(self.video_descriptor.edx_video_id) self.assertEqual(len(languages), 0) self.assertFalse(api.is_transcript_available(self.video_descriptor.edx_video_id, 'hr')) self.assertFalse(api.is_transcript_available(self.video_descriptor.edx_video_id, 'ge'))
def async_migrate_transcript(self, course_key, **kwargs): #pylint: disable=unused-argument """ Migrates the transcripts of all videos in a course as a new celery task. """ try: if not modulestore().get_course(CourseKey.from_string(course_key)): raise KeyError(u'Invalid course key: ' + unicode(course_key)) except KeyError as exc: LOGGER.exception('[Transcript migration] Exception: %r', text_type(exc)) return 'Failed: course {course_key} with exception {exception}'.format( course_key=course_key, exception=text_type(exc) ) force_update = kwargs['force_update'] sub_tasks = [] all_videos = get_videos_from_store(CourseKey.from_string(course_key)) LOGGER.info( "[Transcript migration] process for course %s started. Migrating %s videos", course_key, len(all_videos) ) for video in all_videos: all_lang_transcripts = video.transcripts english_transcript = video.sub if english_transcript: all_lang_transcripts.update({'en': video.sub}) for lang, _ in all_lang_transcripts.items(): transcript_already_present = is_transcript_available( clean_video_id(video.edx_video_id), lang ) if transcript_already_present and force_update: sub_tasks.append(async_migrate_transcript_subtask.s( video, lang, True, **kwargs )) elif not transcript_already_present: sub_tasks.append(async_migrate_transcript_subtask.s( video, lang, False, **kwargs )) LOGGER.info("[Transcript migration] Migrating %s transcripts", len(sub_tasks)) callback = task_status_callback.s() status = chord(sub_tasks)(callback) LOGGER.info( "[Transcript migration] process for course %s ended. Processed %s transcripts", course_key, len(status.get()) ) return status.get()
def test_migrate_transcripts_availability(self): """ Test migrating transcripts """ translations = self.video_descriptor.available_translations( self.video_descriptor.get_transcripts_info()) self.assertItemsEqual(translations, ['hr', 'ge']) self.assertFalse( api.is_transcript_available(self.video_descriptor.edx_video_id, 'hr')) self.assertFalse( api.is_transcript_available(self.video_descriptor.edx_video_id, 'ge')) # now call migrate_transcripts command and check the transcript availability call_command('migrate_transcripts', '--course-id', unicode(self.course.id), '--commit') self.assertTrue( api.is_transcript_available(self.video_descriptor.edx_video_id, 'hr')) self.assertTrue( api.is_transcript_available(self.video_descriptor.edx_video_id, 'ge'))
def async_migrate_transcript(self, course_key, **kwargs): #pylint: disable=unused-argument """ Migrates the transcripts of all videos in a course as a new celery task. """ try: if not modulestore().get_course(CourseKey.from_string(course_key)): raise KeyError(u'Invalid course key: ' + unicode(course_key)) except KeyError as exc: LOGGER.exception('[Transcript migration] Exception: %r', text_type(exc)) return 'Failed: course {course_key} with exception {exception}'.format( course_key=course_key, exception=text_type(exc)) force_update = kwargs['force_update'] sub_tasks = [] all_videos = get_videos_from_store(CourseKey.from_string(course_key)) LOGGER.info( "[Transcript migration] process for course %s started. Migrating %s videos", course_key, len(all_videos)) for video in all_videos: all_lang_transcripts = video.transcripts english_transcript = video.sub if english_transcript: all_lang_transcripts.update({'en': video.sub}) for lang, _ in all_lang_transcripts.items(): transcript_already_present = is_transcript_available( clean_video_id(video.edx_video_id), lang) if transcript_already_present and force_update: sub_tasks.append( async_migrate_transcript_subtask.s(video, lang, True, **kwargs)) elif not transcript_already_present: sub_tasks.append( async_migrate_transcript_subtask.s(video, lang, False, **kwargs)) LOGGER.info("[Transcript migration] Migrating %s transcripts", len(sub_tasks)) callback = task_status_callback.s() status = chord(sub_tasks)(callback) LOGGER.info( "[Transcript migration] process for course %s ended. Processed %s transcripts", course_key, len(status.get())) return status.get()