def enqueue_update_thumbnail_tasks(course_videos, videos_per_task, run): """ Enqueue tasks to update video thumbnails from youtube. Arguments: course_videos: A list of tuples, each containing course ID, video ID and youtube ID. videos_per_task: Number of course videos that can be processed by a single celery task. run: This tracks the YT thumbnail scraping job runs. """ tasks = [] batch_size = len(course_videos) # Further slice the course-videos batch into chunks on the # basis of number of course-videos per task. start = 0 end = videos_per_task chunks_count = int(ceil(batch_size / float(videos_per_task))) for __ in range(0, chunks_count): # pylint: disable=C7620 course_videos_chunk = course_videos[start:end] tasks.append(task_scrape_youtube_thumbnail.s(course_videos_chunk, run)) start = end end += videos_per_task # Kick off a chord of scraping tasks callback = task_scrape_youtube_thumbnail_callback.s( run=run, batch_size=batch_size, videos_per_task=videos_per_task, ) chord(tasks)(callback)
def test_overrides(): """ Test to ensure ChordableDjangoBackend promitives override as expected. """ @chord_task() def inner_task1(*args): # pylint: disable=missing-docstring, unused-argument pass @chord_task(backend=current_app.backend) def inner_task2(*args): # pylint: disable=missing-docstring, unused-argument pass @chord_task(backend=ChordableDjangoBackend(current_app)) def inner_task3(*args): # pylint: disable=missing-docstring, unused-argument pass test_chord1 = chord(inner_task1(i) for i in _range(10)) test_chord2 = chord((inner_task2(i) for i in _range(10)), app=current_app) test_chord3 = chord( (inner_task3(i) for i in _range(10)), app=ChordableDjangoBackend.get_suitable_app(current_app)) assert isinstance(inner_task1.backend, ChordableDjangoBackend) assert isinstance(inner_task2.backend, ChordableDjangoBackend) assert isinstance(inner_task3.backend, ChordableDjangoBackend) assert isinstance(test_chord1.app.backend, ChordableDjangoBackend) assert isinstance(test_chord2.app.backend, ChordableDjangoBackend) assert isinstance(test_chord3.app.backend, ChordableDjangoBackend)
def async_migrate_transcript(self, course_key, **kwargs): # pylint: disable=unused-argument """ Migrates the transcripts of all videos in a course as a new celery task. """ sub_tasks, force_update = [], kwargs['force_update'] course_videos = get_course_videos(CourseKey.from_string(course_key)) LOGGER.info( "[%s] process for course %s started. Migrating transcripts from %s videos.", MIGRATION_LOGS_PREFIX, course_key, len(course_videos) ) for video in course_videos: all_transcripts = video.transcripts english_transcript = video.sub if english_transcript: all_transcripts.update({'en': video.sub}) for lang in all_transcripts.keys(): sub_tasks.append(async_migrate_transcript_subtask.s(unicode(video.location), lang, force_update, **kwargs)) LOGGER.info( "[%s] Migrating %s transcripts for course %s.", MIGRATION_LOGS_PREFIX, len(sub_tasks), course_key ) callback = task_status_callback.s() chord(sub_tasks)(callback) LOGGER.info( "[%s] task submission for course %s ended.", MIGRATION_LOGS_PREFIX, course_key )
def enqueue_update_thumbnail_tasks(course_videos, videos_per_task, run): """ Enqueue tasks to update video thumbnails from youtube. Arguments: course_videos: A list of tuples, each containing course ID, video ID and youtube ID. videos_per_task: Number of course videos that can be processed by a single celery task. run: This tracks the YT thumbnail scraping job runs. """ tasks = [] batch_size = len(course_videos) # Further slice the course-videos batch into chunks on the # basis of number of course-videos per task. start = 0 end = videos_per_task chunks_count = int(ceil(batch_size / float(videos_per_task))) for __ in xrange(0, chunks_count): course_videos_chunk = course_videos[start:end] tasks.append(task_scrape_youtube_thumbnail.s( course_videos_chunk, run )) start = end end += videos_per_task # Kick off a chord of scraping tasks callback = task_scrape_youtube_thumbnail_callback.s( run=run, batch_size=batch_size, videos_per_task=videos_per_task, ) chord(tasks)(callback)
def async_migrate_transcript(self, course_key, **kwargs): # pylint: disable=unused-argument """ Migrates the transcripts of all videos in a course as a new celery task. """ force_update = kwargs['force_update'] command_run = kwargs['command_run'] course_videos = get_course_videos(CourseKey.from_string(course_key)) LOGGER.info( "[%s] [run=%s] [video-transcripts-migration-process-started-for-course] [course=%s]", MIGRATION_LOGS_PREFIX, command_run, course_key ) for revision, videos in course_videos.items(): for video in videos: # Gather transcripts from a video block. all_transcripts = {} if video.transcripts is not None: all_transcripts.update(video.transcripts) english_transcript = video.sub if english_transcript: all_transcripts.update({'en': video.sub}) sub_tasks = [] video_location = unicode(video.location) for lang in all_transcripts: sub_tasks.append(async_migrate_transcript_subtask.s( video_location, revision, lang, force_update, **kwargs )) if sub_tasks: callback = task_status_callback.s( revision=revision, course_id=course_key, command_run=command_run, video_location=video_location ) chord(sub_tasks)(callback) LOGGER.info( ("[%s] [run=%s] [transcripts-migration-tasks-submitted] " "[transcripts_count=%s] [course=%s] [revision=%s] [video=%s]"), MIGRATION_LOGS_PREFIX, command_run, len(sub_tasks), course_key, revision, video_location ) else: LOGGER.info( "[%s] [run=%s] [no-video-transcripts] [course=%s] [revision=%s] [video=%s]", MIGRATION_LOGS_PREFIX, command_run, course_key, revision, video_location )
def enqueue_async_migrate_transcripts_tasks( course_keys, force_update=DEFAULT_FORCE_UPDATE, commit=DEFAULT_COMMIT ): """ Fires new Celery tasks for all the input courses or for all courses. Arguments: course_keys: Command line course ids as list of CourseKey objects, force_update: Overwrite file in S3. Default is False, commit: Update S3 or dry-run the command to see which transcripts will be affected. Default is False. """ kwargs = { 'force_update': force_update, 'commit': commit } tasks = [ async_migrate_transcript.s( unicode(course_key), **kwargs ) for course_key in course_keys ] callback = task_status_callback.s() status = chord(tasks)(callback) for res in status.get(): LOGGER.info("[Transcript migration] Result: %s", '\n'.join(res))
def test_simple_chord(): """ Test full chord execution in eager mode, check result. """ test_chord = chord(chord_subtask.s(i) for i in _range(10))(chord_callback.s()) # [0, 1, ..., 9] + [0, 1, ..., 9] = [0, 2, 4, 6, 8, 12, 14, 16, 18] # 2+4+6+8+10+12+14+16+18 = 90 assert test_chord.result == 90
def enqueue_async_migrate_transcripts_tasks(course_keys, force_update=DEFAULT_FORCE_UPDATE, commit=DEFAULT_COMMIT): """ Fires new Celery tasks for all the input courses or for all courses. Arguments: course_keys: Command line course ids as list of CourseKey objects, force_update: Overwrite file in S3. Default is False, commit: Update S3 or dry-run the command to see which transcripts will be affected. Default is False. """ kwargs = { 'force_update': force_update, 'commit': commit } tasks = [ async_migrate_transcript.s( unicode(course_key), **kwargs ) for course_key in course_keys ] callback = task_status_callback.s() chord(tasks)(callback)
def async_migrate_transcript(self, course_key, **kwargs): #pylint: disable=unused-argument """ Migrates the transcripts of all videos in a course as a new celery task. """ try: if not modulestore().get_course(CourseKey.from_string(course_key)): raise KeyError(u'Invalid course key: ' + unicode(course_key)) except KeyError as exc: LOGGER.exception('[Transcript migration] Exception: %r', text_type(exc)) return 'Failed: course {course_key} with exception {exception}'.format( course_key=course_key, exception=text_type(exc) ) force_update = kwargs['force_update'] sub_tasks = [] all_videos = get_videos_from_store(CourseKey.from_string(course_key)) LOGGER.info( "[Transcript migration] process for course %s started. Migrating %s videos", course_key, len(all_videos) ) for video in all_videos: all_lang_transcripts = video.transcripts english_transcript = video.sub if english_transcript: all_lang_transcripts.update({'en': video.sub}) for lang, _ in all_lang_transcripts.items(): transcript_already_present = is_transcript_available( clean_video_id(video.edx_video_id), lang ) if transcript_already_present and force_update: sub_tasks.append(async_migrate_transcript_subtask.s( video, lang, True, **kwargs )) elif not transcript_already_present: sub_tasks.append(async_migrate_transcript_subtask.s( video, lang, False, **kwargs )) LOGGER.info("[Transcript migration] Migrating %s transcripts", len(sub_tasks)) callback = task_status_callback.s() status = chord(sub_tasks)(callback) LOGGER.info( "[Transcript migration] process for course %s ended. Processed %s transcripts", course_key, len(status.get()) ) return status.get()
def async_migrate_transcript(self, course_key, **kwargs): #pylint: disable=unused-argument """ Migrates the transcripts of all videos in a course as a new celery task. """ try: if not modulestore().get_course(CourseKey.from_string(course_key)): raise KeyError(u'Invalid course key: ' + unicode(course_key)) except KeyError as exc: LOGGER.exception('[Transcript migration] Exception: %r', text_type(exc)) return 'Failed: course {course_key} with exception {exception}'.format( course_key=course_key, exception=text_type(exc)) force_update = kwargs['force_update'] sub_tasks = [] all_videos = get_videos_from_store(CourseKey.from_string(course_key)) LOGGER.info( "[Transcript migration] process for course %s started. Migrating %s videos", course_key, len(all_videos)) for video in all_videos: all_lang_transcripts = video.transcripts english_transcript = video.sub if english_transcript: all_lang_transcripts.update({'en': video.sub}) for lang, _ in all_lang_transcripts.items(): transcript_already_present = is_transcript_available( clean_video_id(video.edx_video_id), lang) if transcript_already_present and force_update: sub_tasks.append( async_migrate_transcript_subtask.s(video, lang, True, **kwargs)) elif not transcript_already_present: sub_tasks.append( async_migrate_transcript_subtask.s(video, lang, False, **kwargs)) LOGGER.info("[Transcript migration] Migrating %s transcripts", len(sub_tasks)) callback = task_status_callback.s() status = chord(sub_tasks)(callback) LOGGER.info( "[Transcript migration] process for course %s ended. Processed %s transcripts", course_key, len(status.get())) return status.get()
def _test_chord_internal(callback_signature, failing_subtasks=False): """ "Run" a chord in non-eager mode by mocking a bunch of things out. """ # Notice that we don't specify an app kwargs here, the 'chord' override will handle things test_chord = chord(chord_subtask.s(i) for i in _range(10))(callback_signature) # We now have several "tasks queued for execution" that will never be executed. assert TaskMeta.objects.all().count() == 11 # 10 subtasks, 1 callback chord_data = ChordData.objects.filter( callback_result__task_id=test_chord.id).first() for i, subtask in enumerate(chord_data.completed_results.all()): subtask.status = FAILURE if i % 3 == 0 and failing_subtasks else SUCCESS subtask.request = MagicMock( id=subtask.task_id, chord={'options': { 'task_id': test_chord.id }}) chord_subtask.backend.on_chord_part_return(subtask, subtask.status, -2) return chord_data