def test_bulk_calculate_course_progress_unlinked_course_error(db, monkeypatch): """Tests 'bulk_calculate_course_progress_data' function The function under test iterates over a set of course enrollment records, So we create a couple of records to iterate over and mock the collect function """ course_overview = CourseOverviewFactory() course_enrollments = [ CourseEnrollmentFactory(course_id=course_overview.id) for i in range(2) ] mapping = { ce.course_id: LearnerCourseGradeMetricsFactory(course_id=str(ce.course_id), user=ce.user, sections_worked=1, sections_possible=2) for ce in course_enrollments } def mock_metrics(course_enrollment, **_kwargs): return mapping[course_enrollment.course_id] monkeypatch.setattr( 'figures.pipeline.enrollment_metrics.collect_metrics_for_enrollment', mock_metrics) with pytest.raises(UnlinkedCourseError) as e_info: data = bulk_calculate_course_progress_data(course_overview.id)
def test_bulk_calculate_course_progress_data_happy_path(db, monkeypatch): """Tests 'bulk_calculate_course_progress_data' function The function under test iterates over a set of course enrollment records, So we create a couple of records to iterate over and mock the collect function """ course_overview = CourseOverviewFactory() course_enrollments = [ CourseEnrollmentFactory(course_id=course_overview.id) for i in range(2) ] mapping = { ce.course_id: LearnerCourseGradeMetricsFactory(course_id=str(ce.course_id), user=ce.user, sections_worked=1, sections_possible=2) for ce in course_enrollments } def mock_metrics(course_enrollment, **_kwargs): return mapping[course_enrollment.course_id] monkeypatch.setattr( 'figures.pipeline.enrollment_metrics.get_site_for_course', lambda val: SiteFactory()) monkeypatch.setattr( 'figures.pipeline.enrollment_metrics.collect_metrics_for_enrollment', mock_metrics) data = bulk_calculate_course_progress_data(course_overview.id) assert data['average_progress'] == 0.5
def extract(self, course_id, date_for, **_kwargs): """ defaults = dict( enrollment_count=data['enrollment_count'], active_learners_today=data['active_learners_today'], average_progress=data.get('average_progress', None), average_days_to_complete=data.get('average_days_to_complete, None'), num_learners_completed=data['num_learners_completed'], ) TODO: refactor this class Add lazy loading method to load course enrollments - Create a method for each metric field """ # We can turn this series of calls into a parallel # set of calls defined in a ruleset instead of hardcoded here after # retrieving the core quersets course_enrollments = get_enrolled_in_exclude_admins( course_id, date_for,) data = dict(date_for=date_for, course_id=course_id) # This is the transform step # After we get this working, we can then define them declaratively # we can do a lambda for course_enrollments to get the count data['enrollment_count'] = course_enrollments.count() active_learner_ids_today = get_active_learner_ids_today( course_id, date_for,) if active_learner_ids_today: active_learners_today = active_learner_ids_today.count() else: active_learners_today = 0 data['active_learners_today'] = active_learners_today # Average progress try: progress_data = bulk_calculate_course_progress_data(course_id=course_id, date_for=date_for) data['average_progress'] = progress_data['average_progress'] except Exception: # pylint: disable=broad-except # Broad exception for starters. Refine as we see what gets caught # Make sure we set the average_progres to None so that upstream # does not think things are normal data['average_progress'] = None msg = ('FIGURES:FAIL bulk_calculate_course_progress_data' ' date_for={date_for}, course_id="{course_id}"') logger.exception(msg.format(date_for=date_for, course_id=course_id)) data['average_days_to_complete'] = get_average_days_to_complete( course_id, date_for,) data['num_learners_completed'] = get_num_learners_completed( course_id, date_for,) return data
def test_bulk_calculate_course_progress_no_enrollments(db, monkeypatch): """This tests when there is a new course with no enrollments """ monkeypatch.setattr( 'figures.pipeline.enrollment_metrics.get_site_for_course', lambda val: SiteFactory()) course_overview = CourseOverviewFactory() data = bulk_calculate_course_progress_data(course_overview.id) assert data['average_progress'] == 0.0
def extract(self, course_id, date_for=None, **_kwargs): """ defaults = dict( enrollment_count=data['enrollment_count'], active_learners_today=data['active_learners_today'], average_progress=data.get('average_progress', None), average_days_to_complete=data.get('average_days_to_complete, None'), num_learners_completed=data['num_learners_completed'], ) TODO: refactor this class Add lazy loading method to load course enrollments - Create a method for each metric field """ # Update args if not assigned if not date_for: date_for = prev_day( datetime.datetime.utcnow().replace(tzinfo=utc).date()) # We can turn this series of calls into a parallel # set of calls defined in a ruleset instead of hardcoded here after # retrieving the core quersets course_enrollments = get_enrolled_in_exclude_admins( course_id, date_for, ) data = dict(date_for=date_for, course_id=course_id) # This is the transform step # After we get this working, we can then define them declaratively # we can do a lambda for course_enrollments to get the count data['enrollment_count'] = course_enrollments.count() active_learner_ids_today = get_active_learner_ids_today( course_id, date_for, ) if active_learner_ids_today: active_learners_today = active_learner_ids_today.count() else: active_learners_today = 0 data['active_learners_today'] = active_learners_today # Average progress progress_data = bulk_calculate_course_progress_data( course_id=course_id, date_for=date_for) data['average_progress'] = progress_data['average_progress'] data['average_days_to_complete'] = get_average_days_to_complete( course_id, date_for, ) data['num_learners_completed'] = get_num_learners_completed( course_id, date_for, ) return data
def extract(self, course_id, date_for, ed_next=False, **_kwargs): """Extracts (collects) aggregated course level data Args: course_id (:obj:`str` or :obj:`CourseKey`): The course for which we collect data date_for (str or date): Deprecated. Was to backfill data. Specialized TBD backfill data will be called instead. ed_next (bool, optional): "Enrollment Data Next" flag. If set to `True` then we collect metrics with our updated workflow. See here: https://github.com/appsembler/figures/issues/428 Returns: dict with aggregate course level metrics ``` dict( enrollment_count=data['enrollment_count'], active_learners_today=data['active_learners_today'], average_progress=data.get('average_progress', None), average_days_to_complete=data.get('average_days_to_complete, None'), num_learners_completed=data['num_learners_completed'], ) ``` TODO: refactor this class. It doesn't need to be a class. Can be a standalone function Add lazy loading method to load course enrollments - Create a method for each metric field """ # We can turn this series of calls into a parallel # set of calls defined in a ruleset instead of hardcoded here after # retrieving the core quersets course_enrollments = get_enrolled_in_exclude_admins( course_id, date_for,) data = dict(date_for=date_for, course_id=course_id) # This is the transform step # After we get this working, we can then define them declaratively # we can do a lambda for course_enrollments to get the count data['enrollment_count'] = course_enrollments.count() active_learner_ids_today = get_active_learner_ids_today( course_id, date_for,) if active_learner_ids_today: active_learners_today = active_learner_ids_today.count() else: active_learners_today = 0 data['active_learners_today'] = active_learners_today # Average progress # Progress data cannot be reliable for backfills or for any date prior to yesterday # without using StudentModuleHistory so we skip getting this data if running # for a day earlier than previous day (i.e., not during daily update of CDMs), # especially since it is so expensive to calculate. # Note that Avg() applied across null and decimal vals for aggregate average_progress # will correctly ignore nulls # TODO: Reconsider this if we implement either StudentModuleHistory-based queries # (if so, you will need to add any types you want to # StudentModuleHistory.HISTORY_SAVING_TYPES) # TODO: Reconsider this once we switch to using Persistent Grades if is_past_date(date_for + relativedelta(days=1)): # more than 1 day in past data['average_progress'] = None msg = ('FIGURES:PIPELINE:CDM Declining to calculate average progress for a past date' ' date_for={date_for}, course_id="{course_id}"') logger.debug(msg.format(date_for=date_for, course_id=course_id)) else: try: # This conditional check is an interim solution until we make # the progress function configurable and able to run Figures # plugins if ed_next: progress_data = calculate_course_progress_next(course_id=course_id) else: progress_data = bulk_calculate_course_progress_data(course_id=course_id, date_for=date_for) data['average_progress'] = progress_data['average_progress'] except Exception: # pylint: disable=broad-except # Broad exception for starters. Refine as we see what gets caught # Make sure we set the average_progres to None so that upstream # does not think things are normal data['average_progress'] = None if ed_next: prog_func = 'calculate_course_progress_next' else: prog_func = 'bulk_calculate_course_progress_data' msg = ('FIGURES:FAIL {prog_func}' ' date_for={date_for}, course_id="{course_id}"') logger.exception(msg.format(prog_func=prog_func, date_for=date_for, course_id=course_id)) data['average_days_to_complete'] = get_average_days_to_complete( course_id, date_for,) data['num_learners_completed'] = get_num_learners_completed( course_id, date_for,) return data
def extract(self, course_id, date_for, **_kwargs): """ defaults = dict( enrollment_count=data['enrollment_count'], active_learners_today=data['active_learners_today'], average_progress=data.get('average_progress', None), average_days_to_complete=data.get('average_days_to_complete, None'), num_learners_completed=data['num_learners_completed'], ) TODO: refactor this class Add lazy loading method to load course enrollments - Create a method for each metric field """ # We can turn this series of calls into a parallel # set of calls defined in a ruleset instead of hardcoded here after # retrieving the core quersets course_enrollments = get_enrolled_in_exclude_admins( course_id, date_for, ) data = dict(date_for=date_for, course_id=course_id) # This is the transform step # After we get this working, we can then define them declaratively # we can do a lambda for course_enrollments to get the count data['enrollment_count'] = course_enrollments.count() active_learner_ids_today = get_active_learner_ids_today( course_id, date_for, ) if active_learner_ids_today: active_learners_today = active_learner_ids_today.count() else: active_learners_today = 0 data['active_learners_today'] = active_learners_today # Average progress # Progress data cannot be reliable for backfills or for any date prior to yesterday # without using StudentModuleHistory so we skip getting this data if running # for a day earlier than previous day (i.e., not during daily update of CDMs), # especially since it is so expensive to calculate. # Note that Avg() applied across null and decimal vals for aggregate average_progress # will correctly ignore nulls # TODO: Reconsider this if we implement either StudentModuleHistory-based queries # (if so, you will need to add any types you want to # StudentModuleHistory.HISTORY_SAVING_TYPES) # TODO: Reconsider this once we switch to using Persistent Grades if is_past_date(date_for + relativedelta(days=1)): # more than 1 day in past data['average_progress'] = None msg = ( 'FIGURES:PIPELINE:CDM Declining to calculate average progress for a past date' ' date_for={date_for}, course_id="{course_id}"') logger.debug(msg.format(date_for=date_for, course_id=course_id)) else: try: progress_data = bulk_calculate_course_progress_data( course_id=course_id, date_for=date_for) data['average_progress'] = progress_data['average_progress'] except Exception: # pylint: disable=broad-except # Broad exception for starters. Refine as we see what gets caught # Make sure we set the average_progres to None so that upstream # does not think things are normal data['average_progress'] = None msg = ('FIGURES:FAIL bulk_calculate_course_progress_data' ' date_for={date_for}, course_id="{course_id}"') logger.exception( msg.format(date_for=date_for, course_id=course_id)) data['average_days_to_complete'] = get_average_days_to_complete( course_id, date_for, ) data['num_learners_completed'] = get_num_learners_completed( course_id, date_for, ) return data