Esempio n. 1
0
def enqueue_update_thumbnail_tasks(course_videos, videos_per_task, run):
    """
    Enqueue tasks to update video thumbnails from youtube.

    Arguments:
        course_videos: A list of tuples, each containing course ID, video ID and youtube ID.
        videos_per_task: Number of course videos that can be processed by a single celery task.
        run: This tracks the YT thumbnail scraping job runs.
    """
    tasks = []
    batch_size = len(course_videos)
    # Further slice the course-videos batch into chunks on the
    # basis of number of course-videos per task.
    start = 0
    end = videos_per_task
    chunks_count = int(ceil(batch_size / float(videos_per_task)))
    for __ in range(0, chunks_count):  # pylint: disable=C7620
        course_videos_chunk = course_videos[start:end]
        tasks.append(task_scrape_youtube_thumbnail.s(course_videos_chunk, run))
        start = end
        end += videos_per_task

    # Kick off a chord of scraping tasks
    callback = task_scrape_youtube_thumbnail_callback.s(
        run=run,
        batch_size=batch_size,
        videos_per_task=videos_per_task,
    )
    chord(tasks)(callback)
def test_overrides():
    """
    Test to ensure ChordableDjangoBackend promitives override as expected.
    """
    @chord_task()
    def inner_task1(*args):  # pylint: disable=missing-docstring, unused-argument
        pass

    @chord_task(backend=current_app.backend)
    def inner_task2(*args):  # pylint: disable=missing-docstring, unused-argument
        pass

    @chord_task(backend=ChordableDjangoBackend(current_app))
    def inner_task3(*args):  # pylint: disable=missing-docstring, unused-argument
        pass

    test_chord1 = chord(inner_task1(i) for i in _range(10))
    test_chord2 = chord((inner_task2(i) for i in _range(10)), app=current_app)
    test_chord3 = chord(
        (inner_task3(i) for i in _range(10)),
        app=ChordableDjangoBackend.get_suitable_app(current_app))

    assert isinstance(inner_task1.backend, ChordableDjangoBackend)
    assert isinstance(inner_task2.backend, ChordableDjangoBackend)
    assert isinstance(inner_task3.backend, ChordableDjangoBackend)

    assert isinstance(test_chord1.app.backend, ChordableDjangoBackend)
    assert isinstance(test_chord2.app.backend, ChordableDjangoBackend)
    assert isinstance(test_chord3.app.backend, ChordableDjangoBackend)
Esempio n. 3
0
def async_migrate_transcript(self, course_key, **kwargs):   # pylint: disable=unused-argument
    """
    Migrates the transcripts of all videos in a course as a new celery task.
    """
    sub_tasks, force_update = [], kwargs['force_update']
    course_videos = get_course_videos(CourseKey.from_string(course_key))

    LOGGER.info(
        "[%s] process for course %s started. Migrating transcripts from %s videos.",
        MIGRATION_LOGS_PREFIX, course_key, len(course_videos)
    )

    for video in course_videos:
        all_transcripts = video.transcripts
        english_transcript = video.sub
        if english_transcript:
            all_transcripts.update({'en': video.sub})

        for lang in all_transcripts.keys():
            sub_tasks.append(async_migrate_transcript_subtask.s(unicode(video.location), lang, force_update, **kwargs))

    LOGGER.info(
        "[%s] Migrating %s transcripts for course %s.",
        MIGRATION_LOGS_PREFIX, len(sub_tasks), course_key
    )
    callback = task_status_callback.s()
    chord(sub_tasks)(callback)

    LOGGER.info(
        "[%s] task submission for course %s ended.",
        MIGRATION_LOGS_PREFIX, course_key
    )
Esempio n. 4
0
def enqueue_update_thumbnail_tasks(course_videos, videos_per_task, run):
    """
    Enqueue tasks to update video thumbnails from youtube.

    Arguments:
        course_videos: A list of tuples, each containing course ID, video ID and youtube ID.
        videos_per_task: Number of course videos that can be processed by a single celery task.
        run: This tracks the YT thumbnail scraping job runs.
    """
    tasks = []
    batch_size = len(course_videos)
    # Further slice the course-videos batch into chunks on the
    # basis of number of course-videos per task.
    start = 0
    end = videos_per_task
    chunks_count = int(ceil(batch_size / float(videos_per_task)))
    for __ in xrange(0, chunks_count):
        course_videos_chunk = course_videos[start:end]
        tasks.append(task_scrape_youtube_thumbnail.s(
            course_videos_chunk, run
        ))
        start = end
        end += videos_per_task

    # Kick off a chord of scraping tasks
    callback = task_scrape_youtube_thumbnail_callback.s(
        run=run,
        batch_size=batch_size,
        videos_per_task=videos_per_task,
    )
    chord(tasks)(callback)
Esempio n. 5
0
def async_migrate_transcript(self, course_key, **kwargs):   # pylint: disable=unused-argument
    """
    Migrates the transcripts of all videos in a course as a new celery task.
    """
    force_update = kwargs['force_update']
    command_run = kwargs['command_run']
    course_videos = get_course_videos(CourseKey.from_string(course_key))

    LOGGER.info(
        "[%s] [run=%s] [video-transcripts-migration-process-started-for-course] [course=%s]",
        MIGRATION_LOGS_PREFIX, command_run, course_key
    )

    for revision, videos in course_videos.items():
        for video in videos:
            # Gather transcripts from a video block.
            all_transcripts = {}
            if video.transcripts is not None:
                all_transcripts.update(video.transcripts)

            english_transcript = video.sub
            if english_transcript:
                all_transcripts.update({'en': video.sub})

            sub_tasks = []
            video_location = unicode(video.location)
            for lang in all_transcripts:
                sub_tasks.append(async_migrate_transcript_subtask.s(
                    video_location, revision, lang, force_update, **kwargs
                ))

            if sub_tasks:
                callback = task_status_callback.s(
                    revision=revision,
                    course_id=course_key,
                    command_run=command_run,
                    video_location=video_location
                )
                chord(sub_tasks)(callback)

                LOGGER.info(
                    ("[%s] [run=%s] [transcripts-migration-tasks-submitted] "
                     "[transcripts_count=%s] [course=%s] [revision=%s] [video=%s]"),
                    MIGRATION_LOGS_PREFIX, command_run, len(sub_tasks), course_key, revision, video_location
                )
            else:
                LOGGER.info(
                    "[%s] [run=%s] [no-video-transcripts] [course=%s] [revision=%s] [video=%s]",
                    MIGRATION_LOGS_PREFIX, command_run, course_key, revision, video_location
                )
Esempio n. 6
0
def async_migrate_transcript(self, course_key, **kwargs):   # pylint: disable=unused-argument
    """
    Migrates the transcripts of all videos in a course as a new celery task.
    """
    force_update = kwargs['force_update']
    command_run = kwargs['command_run']
    course_videos = get_course_videos(CourseKey.from_string(course_key))

    LOGGER.info(
        "[%s] [run=%s] [video-transcripts-migration-process-started-for-course] [course=%s]",
        MIGRATION_LOGS_PREFIX, command_run, course_key
    )

    for revision, videos in course_videos.items():
        for video in videos:
            # Gather transcripts from a video block.
            all_transcripts = {}
            if video.transcripts is not None:
                all_transcripts.update(video.transcripts)

            english_transcript = video.sub
            if english_transcript:
                all_transcripts.update({'en': video.sub})

            sub_tasks = []
            video_location = unicode(video.location)
            for lang in all_transcripts:
                sub_tasks.append(async_migrate_transcript_subtask.s(
                    video_location, revision, lang, force_update, **kwargs
                ))

            if sub_tasks:
                callback = task_status_callback.s(
                    revision=revision,
                    course_id=course_key,
                    command_run=command_run,
                    video_location=video_location
                )
                chord(sub_tasks)(callback)

                LOGGER.info(
                    ("[%s] [run=%s] [transcripts-migration-tasks-submitted] "
                     "[transcripts_count=%s] [course=%s] [revision=%s] [video=%s]"),
                    MIGRATION_LOGS_PREFIX, command_run, len(sub_tasks), course_key, revision, video_location
                )
            else:
                LOGGER.info(
                    "[%s] [run=%s] [no-video-transcripts] [course=%s] [revision=%s] [video=%s]",
                    MIGRATION_LOGS_PREFIX, command_run, course_key, revision, video_location
                )
Esempio n. 7
0
def enqueue_async_migrate_transcripts_tasks(
        course_keys,
        force_update=DEFAULT_FORCE_UPDATE,
        commit=DEFAULT_COMMIT
):
    """
    Fires new Celery tasks for all the input courses or for all courses.

    Arguments:
        course_keys: Command line course ids as list of CourseKey objects,
        force_update: Overwrite file in S3. Default is False,
        commit: Update S3 or dry-run the command to see which transcripts will be affected. Default is False.
    """
    kwargs = {
        'force_update': force_update,
        'commit': commit
    }

    tasks = [
        async_migrate_transcript.s(
            unicode(course_key),
            **kwargs
        ) for course_key in course_keys
    ]
    callback = task_status_callback.s()
    status = chord(tasks)(callback)
    for res in status.get():
        LOGGER.info("[Transcript migration] Result: %s", '\n'.join(res))
def test_simple_chord():
    """
    Test full chord execution in eager mode, check result.
    """
    test_chord = chord(chord_subtask.s(i)
                       for i in _range(10))(chord_callback.s())

    # [0, 1, ..., 9] + [0, 1, ..., 9] = [0, 2, 4, 6, 8, 12, 14, 16, 18]
    # 2+4+6+8+10+12+14+16+18 = 90
    assert test_chord.result == 90
Esempio n. 9
0
def enqueue_async_migrate_transcripts_tasks(course_keys, force_update=DEFAULT_FORCE_UPDATE, commit=DEFAULT_COMMIT):
    """
    Fires new Celery tasks for all the input courses or for all courses.

    Arguments:
        course_keys: Command line course ids as list of CourseKey objects,
        force_update: Overwrite file in S3. Default is False,
        commit: Update S3 or dry-run the command to see which transcripts will be affected. Default is False.
    """
    kwargs = {
        'force_update': force_update,
        'commit': commit
    }

    tasks = [
        async_migrate_transcript.s(
            unicode(course_key),
            **kwargs
        ) for course_key in course_keys
    ]
    callback = task_status_callback.s()
    chord(tasks)(callback)
Esempio n. 10
0
def async_migrate_transcript(self, course_key, **kwargs):
    #pylint: disable=unused-argument
    """
    Migrates the transcripts of all videos in a course as a new celery task.
    """
    try:
        if not modulestore().get_course(CourseKey.from_string(course_key)):
            raise KeyError(u'Invalid course key: ' + unicode(course_key))
    except KeyError as exc:
        LOGGER.exception('[Transcript migration] Exception: %r', text_type(exc))
        return 'Failed: course {course_key} with exception {exception}'.format(
            course_key=course_key,
            exception=text_type(exc)
        )
    force_update = kwargs['force_update']
    sub_tasks = []

    all_videos = get_videos_from_store(CourseKey.from_string(course_key))
    LOGGER.info(
        "[Transcript migration] process for course %s started. Migrating %s videos",
        course_key,
        len(all_videos)
    )
    for video in all_videos:
        all_lang_transcripts = video.transcripts
        english_transcript = video.sub
        if english_transcript:
            all_lang_transcripts.update({'en': video.sub})
        for lang, _ in all_lang_transcripts.items():
            transcript_already_present = is_transcript_available(
                clean_video_id(video.edx_video_id),
                lang
            )
            if transcript_already_present and force_update:
                sub_tasks.append(async_migrate_transcript_subtask.s(
                    video, lang, True, **kwargs
                ))
            elif not transcript_already_present:
                sub_tasks.append(async_migrate_transcript_subtask.s(
                    video, lang, False, **kwargs
                ))
    LOGGER.info("[Transcript migration] Migrating %s transcripts", len(sub_tasks))
    callback = task_status_callback.s()
    status = chord(sub_tasks)(callback)
    LOGGER.info(
        "[Transcript migration] process for course %s ended. Processed %s transcripts",
        course_key,
        len(status.get())
    )
    return status.get()
Esempio n. 11
0
def async_migrate_transcript(self, course_key, **kwargs):
    #pylint: disable=unused-argument
    """
    Migrates the transcripts of all videos in a course as a new celery task.
    """
    try:
        if not modulestore().get_course(CourseKey.from_string(course_key)):
            raise KeyError(u'Invalid course key: ' + unicode(course_key))
    except KeyError as exc:
        LOGGER.exception('[Transcript migration] Exception: %r',
                         text_type(exc))
        return 'Failed: course {course_key} with exception {exception}'.format(
            course_key=course_key, exception=text_type(exc))
    force_update = kwargs['force_update']
    sub_tasks = []

    all_videos = get_videos_from_store(CourseKey.from_string(course_key))
    LOGGER.info(
        "[Transcript migration] process for course %s started. Migrating %s videos",
        course_key, len(all_videos))
    for video in all_videos:
        all_lang_transcripts = video.transcripts
        english_transcript = video.sub
        if english_transcript:
            all_lang_transcripts.update({'en': video.sub})
        for lang, _ in all_lang_transcripts.items():
            transcript_already_present = is_transcript_available(
                clean_video_id(video.edx_video_id), lang)
            if transcript_already_present and force_update:
                sub_tasks.append(
                    async_migrate_transcript_subtask.s(video, lang, True,
                                                       **kwargs))
            elif not transcript_already_present:
                sub_tasks.append(
                    async_migrate_transcript_subtask.s(video, lang, False,
                                                       **kwargs))
    LOGGER.info("[Transcript migration] Migrating %s transcripts",
                len(sub_tasks))
    callback = task_status_callback.s()
    status = chord(sub_tasks)(callback)
    LOGGER.info(
        "[Transcript migration] process for course %s ended. Processed %s transcripts",
        course_key, len(status.get()))
    return status.get()
def _test_chord_internal(callback_signature, failing_subtasks=False):
    """
    "Run" a chord in non-eager mode by mocking a bunch of things out.
    """
    # Notice that we don't specify an app kwargs here, the 'chord' override will handle things
    test_chord = chord(chord_subtask.s(i)
                       for i in _range(10))(callback_signature)

    # We now have several "tasks queued for execution" that will never be executed.
    assert TaskMeta.objects.all().count() == 11  # 10 subtasks, 1 callback

    chord_data = ChordData.objects.filter(
        callback_result__task_id=test_chord.id).first()
    for i, subtask in enumerate(chord_data.completed_results.all()):
        subtask.status = FAILURE if i % 3 == 0 and failing_subtasks else SUCCESS
        subtask.request = MagicMock(
            id=subtask.task_id, chord={'options': {
                'task_id': test_chord.id
            }})
        chord_subtask.backend.on_chord_part_return(subtask, subtask.status, -2)

    return chord_data