Esempio n. 1
0
def test_bulk_calculate_course_progress_unlinked_course_error(db, monkeypatch):
    """Tests 'bulk_calculate_course_progress_data' function

    The function under test iterates over a set of course enrollment records,
    So we create a couple of records to iterate over and mock the collect
    function
    """
    course_overview = CourseOverviewFactory()
    course_enrollments = [
        CourseEnrollmentFactory(course_id=course_overview.id) for i in range(2)
    ]
    mapping = {
        ce.course_id:
        LearnerCourseGradeMetricsFactory(course_id=str(ce.course_id),
                                         user=ce.user,
                                         sections_worked=1,
                                         sections_possible=2)
        for ce in course_enrollments
    }

    def mock_metrics(course_enrollment, **_kwargs):
        return mapping[course_enrollment.course_id]

    monkeypatch.setattr(
        'figures.pipeline.enrollment_metrics.collect_metrics_for_enrollment',
        mock_metrics)
    with pytest.raises(UnlinkedCourseError) as e_info:
        data = bulk_calculate_course_progress_data(course_overview.id)
Esempio n. 2
0
def test_bulk_calculate_course_progress_data_happy_path(db, monkeypatch):
    """Tests 'bulk_calculate_course_progress_data' function

    The function under test iterates over a set of course enrollment records,
    So we create a couple of records to iterate over and mock the collect
    function
    """
    course_overview = CourseOverviewFactory()
    course_enrollments = [
        CourseEnrollmentFactory(course_id=course_overview.id) for i in range(2)
    ]
    mapping = {
        ce.course_id:
        LearnerCourseGradeMetricsFactory(course_id=str(ce.course_id),
                                         user=ce.user,
                                         sections_worked=1,
                                         sections_possible=2)
        for ce in course_enrollments
    }

    def mock_metrics(course_enrollment, **_kwargs):
        return mapping[course_enrollment.course_id]

    monkeypatch.setattr(
        'figures.pipeline.enrollment_metrics.get_site_for_course',
        lambda val: SiteFactory())
    monkeypatch.setattr(
        'figures.pipeline.enrollment_metrics.collect_metrics_for_enrollment',
        mock_metrics)
    data = bulk_calculate_course_progress_data(course_overview.id)
    assert data['average_progress'] == 0.5
    def extract(self, course_id, date_for, **_kwargs):
        """
            defaults = dict(
                enrollment_count=data['enrollment_count'],
                active_learners_today=data['active_learners_today'],
                average_progress=data.get('average_progress', None),
                average_days_to_complete=data.get('average_days_to_complete, None'),
                num_learners_completed=data['num_learners_completed'],
            )
        TODO: refactor this class
        Add lazy loading method to load course enrollments
        - Create a method for each metric field
        """

        # We can turn this series of calls into a parallel
        # set of calls defined in a ruleset instead of hardcoded here after
        # retrieving the core quersets

        course_enrollments = get_enrolled_in_exclude_admins(
            course_id, date_for,)

        data = dict(date_for=date_for, course_id=course_id)

        # This is the transform step
        # After we get this working, we can then define them declaratively
        # we can do a lambda for course_enrollments to get the count

        data['enrollment_count'] = course_enrollments.count()

        active_learner_ids_today = get_active_learner_ids_today(
            course_id, date_for,)
        if active_learner_ids_today:
            active_learners_today = active_learner_ids_today.count()
        else:
            active_learners_today = 0
        data['active_learners_today'] = active_learners_today

        # Average progress
        try:
            progress_data = bulk_calculate_course_progress_data(course_id=course_id,
                                                                date_for=date_for)
            data['average_progress'] = progress_data['average_progress']
        except Exception:  # pylint: disable=broad-except
            # Broad exception for starters. Refine as we see what gets caught
            # Make sure we set the average_progres to None so that upstream
            # does not think things are normal
            data['average_progress'] = None
            msg = ('FIGURES:FAIL bulk_calculate_course_progress_data'
                   ' date_for={date_for}, course_id="{course_id}"')
            logger.exception(msg.format(date_for=date_for, course_id=course_id))

        data['average_days_to_complete'] = get_average_days_to_complete(
            course_id, date_for,)

        data['num_learners_completed'] = get_num_learners_completed(
            course_id, date_for,)

        return data
Esempio n. 4
0
def test_bulk_calculate_course_progress_no_enrollments(db, monkeypatch):
    """This tests when there is a new course with no enrollments
    """
    monkeypatch.setattr(
        'figures.pipeline.enrollment_metrics.get_site_for_course',
        lambda val: SiteFactory())
    course_overview = CourseOverviewFactory()
    data = bulk_calculate_course_progress_data(course_overview.id)
    assert data['average_progress'] == 0.0
Esempio n. 5
0
    def extract(self, course_id, date_for=None, **_kwargs):
        """
            defaults = dict(
                enrollment_count=data['enrollment_count'],
                active_learners_today=data['active_learners_today'],
                average_progress=data.get('average_progress', None),
                average_days_to_complete=data.get('average_days_to_complete, None'),
                num_learners_completed=data['num_learners_completed'],
            )
        TODO: refactor this class
        Add lazy loading method to load course enrollments
        - Create a method for each metric field
        """

        # Update args if not assigned
        if not date_for:
            date_for = prev_day(
                datetime.datetime.utcnow().replace(tzinfo=utc).date())

        # We can turn this series of calls into a parallel
        # set of calls defined in a ruleset instead of hardcoded here after
        # retrieving the core quersets

        course_enrollments = get_enrolled_in_exclude_admins(
            course_id,
            date_for,
        )

        data = dict(date_for=date_for, course_id=course_id)

        # This is the transform step
        # After we get this working, we can then define them declaratively
        # we can do a lambda for course_enrollments to get the count

        data['enrollment_count'] = course_enrollments.count()

        active_learner_ids_today = get_active_learner_ids_today(
            course_id,
            date_for,
        )
        if active_learner_ids_today:
            active_learners_today = active_learner_ids_today.count()
        else:
            active_learners_today = 0
        data['active_learners_today'] = active_learners_today

        # Average progress
        progress_data = bulk_calculate_course_progress_data(
            course_id=course_id, date_for=date_for)
        data['average_progress'] = progress_data['average_progress']

        data['average_days_to_complete'] = get_average_days_to_complete(
            course_id,
            date_for,
        )

        data['num_learners_completed'] = get_num_learners_completed(
            course_id,
            date_for,
        )

        return data
    def extract(self, course_id, date_for, ed_next=False, **_kwargs):
        """Extracts (collects) aggregated course level data

        Args:
            course_id (:obj:`str` or :obj:`CourseKey`): The course for which we collect data
            date_for (str or date): Deprecated. Was to backfill data.
                Specialized TBD backfill data will be called instead.
            ed_next (bool, optional): "Enrollment Data Next" flag. If set to `True`
                then we collect metrics with our updated workflow. See here:
                https://github.com/appsembler/figures/issues/428

        Returns:
            dict with aggregate course level metrics

            ```
            dict(
                enrollment_count=data['enrollment_count'],
                active_learners_today=data['active_learners_today'],
                average_progress=data.get('average_progress', None),
                average_days_to_complete=data.get('average_days_to_complete, None'),
                num_learners_completed=data['num_learners_completed'],
            )
            ```

        TODO: refactor this class. It doesn't need to be a class. Can be a
        standalone function
        Add lazy loading method to load course enrollments
        - Create a method for each metric field
        """

        # We can turn this series of calls into a parallel
        # set of calls defined in a ruleset instead of hardcoded here after
        # retrieving the core quersets

        course_enrollments = get_enrolled_in_exclude_admins(
            course_id, date_for,)

        data = dict(date_for=date_for, course_id=course_id)

        # This is the transform step
        # After we get this working, we can then define them declaratively
        # we can do a lambda for course_enrollments to get the count

        data['enrollment_count'] = course_enrollments.count()

        active_learner_ids_today = get_active_learner_ids_today(
            course_id, date_for,)
        if active_learner_ids_today:
            active_learners_today = active_learner_ids_today.count()
        else:
            active_learners_today = 0
        data['active_learners_today'] = active_learners_today

        # Average progress
        # Progress data cannot be reliable for backfills or for any date prior to yesterday
        # without using StudentModuleHistory so we skip getting this data if running
        # for a day earlier than previous day (i.e., not during daily update of CDMs),
        #  especially since it is so expensive to calculate.
        # Note that Avg() applied across null and decimal vals for aggregate average_progress
        # will correctly ignore nulls
        # TODO: Reconsider this if we implement either StudentModuleHistory-based queries
        # (if so, you will need to add any types you want to
        # StudentModuleHistory.HISTORY_SAVING_TYPES)
        # TODO: Reconsider this once we switch to using Persistent Grades
        if is_past_date(date_for + relativedelta(days=1)):  # more than 1 day in past
            data['average_progress'] = None
            msg = ('FIGURES:PIPELINE:CDM Declining to calculate average progress for a past date'
                   ' date_for={date_for}, course_id="{course_id}"')
            logger.debug(msg.format(date_for=date_for, course_id=course_id))
        else:
            try:
                # This conditional check is an interim solution until we make
                # the progress function configurable and able to run Figures
                # plugins
                if ed_next:
                    progress_data = calculate_course_progress_next(course_id=course_id)
                else:
                    progress_data = bulk_calculate_course_progress_data(course_id=course_id,
                                                                        date_for=date_for)
                data['average_progress'] = progress_data['average_progress']
            except Exception:  # pylint: disable=broad-except
                # Broad exception for starters. Refine as we see what gets caught
                # Make sure we set the average_progres to None so that upstream
                # does not think things are normal
                data['average_progress'] = None

                if ed_next:
                    prog_func = 'calculate_course_progress_next'
                else:
                    prog_func = 'bulk_calculate_course_progress_data'

                msg = ('FIGURES:FAIL {prog_func}'
                       ' date_for={date_for}, course_id="{course_id}"')
                logger.exception(msg.format(prog_func=prog_func,
                                            date_for=date_for,
                                            course_id=course_id))

        data['average_days_to_complete'] = get_average_days_to_complete(
            course_id, date_for,)

        data['num_learners_completed'] = get_num_learners_completed(
            course_id, date_for,)

        return data
Esempio n. 7
0
    def extract(self, course_id, date_for, **_kwargs):
        """
            defaults = dict(
                enrollment_count=data['enrollment_count'],
                active_learners_today=data['active_learners_today'],
                average_progress=data.get('average_progress', None),
                average_days_to_complete=data.get('average_days_to_complete, None'),
                num_learners_completed=data['num_learners_completed'],
            )
        TODO: refactor this class
        Add lazy loading method to load course enrollments
        - Create a method for each metric field
        """

        # We can turn this series of calls into a parallel
        # set of calls defined in a ruleset instead of hardcoded here after
        # retrieving the core quersets

        course_enrollments = get_enrolled_in_exclude_admins(
            course_id,
            date_for,
        )

        data = dict(date_for=date_for, course_id=course_id)

        # This is the transform step
        # After we get this working, we can then define them declaratively
        # we can do a lambda for course_enrollments to get the count

        data['enrollment_count'] = course_enrollments.count()

        active_learner_ids_today = get_active_learner_ids_today(
            course_id,
            date_for,
        )
        if active_learner_ids_today:
            active_learners_today = active_learner_ids_today.count()
        else:
            active_learners_today = 0
        data['active_learners_today'] = active_learners_today

        # Average progress
        # Progress data cannot be reliable for backfills or for any date prior to yesterday
        # without using StudentModuleHistory so we skip getting this data if running
        # for a day earlier than previous day (i.e., not during daily update of CDMs),
        #  especially since it is so expensive to calculate.
        # Note that Avg() applied across null and decimal vals for aggregate average_progress
        # will correctly ignore nulls
        # TODO: Reconsider this if we implement either StudentModuleHistory-based queries
        # (if so, you will need to add any types you want to
        # StudentModuleHistory.HISTORY_SAVING_TYPES)
        # TODO: Reconsider this once we switch to using Persistent Grades
        if is_past_date(date_for +
                        relativedelta(days=1)):  # more than 1 day in past
            data['average_progress'] = None
            msg = (
                'FIGURES:PIPELINE:CDM Declining to calculate average progress for a past date'
                ' date_for={date_for}, course_id="{course_id}"')
            logger.debug(msg.format(date_for=date_for, course_id=course_id))
        else:
            try:
                progress_data = bulk_calculate_course_progress_data(
                    course_id=course_id, date_for=date_for)
                data['average_progress'] = progress_data['average_progress']
            except Exception:  # pylint: disable=broad-except
                # Broad exception for starters. Refine as we see what gets caught
                # Make sure we set the average_progres to None so that upstream
                # does not think things are normal
                data['average_progress'] = None
                msg = ('FIGURES:FAIL bulk_calculate_course_progress_data'
                       ' date_for={date_for}, course_id="{course_id}"')
                logger.exception(
                    msg.format(date_for=date_for, course_id=course_id))

        data['average_days_to_complete'] = get_average_days_to_complete(
            course_id,
            date_for,
        )

        data['num_learners_completed'] = get_num_learners_completed(
            course_id,
            date_for,
        )

        return data