예제 #1
0
def test_populate_single_cdm(transactional_db, monkeypatch):
    """Test figures.tasks.populate_single_cdm nominal case

    This tests the normal execution to popluate a single CourseDailyMetrics
    record
    """
    assert CourseDailyMetrics.objects.count() == 0
    date_for = '2019-01-02'
    course_id = "course-v1:certs-appsembler+001+2019"
    created = False

    def mock_cdm_load(self, date_for, **kwargs):
        return (
            CourseDailyMetricsFactory(date_for=date_for),
            created,
        )

    monkeypatch.setattr('figures.sites.get_site_for_course',
                        lambda val: SiteFactory())
    monkeypatch.setattr(
        'figures.pipeline.course_daily_metrics.CourseDailyMetricsLoader.load',
        mock_cdm_load)

    populate_single_cdm(course_id, date_for)

    assert CourseDailyMetrics.objects.count() == 1
    assert as_date(
        CourseDailyMetrics.objects.first().date_for) == as_date(date_for)
예제 #2
0
def test_populate_single_cdm(transactional_db, monkeypatch):
    assert CourseDailyMetrics.objects.count() == 0
    date_for = '2019-01-02'
    course_id = "course-v1:certs-appsembler+001+2019"
    created = False

    def mock_cdm_load(self, date_for, **kwargs):
        return (CourseDailyMetricsFactory(date_for=date_for), created, )

    monkeypatch.setattr(
        figures.pipeline.course_daily_metrics.CourseDailyMetricsLoader,
        'load', mock_cdm_load)
    figures.tasks.populate_single_cdm(course_id, date_for)

    assert CourseDailyMetrics.objects.count() == 1
    assert as_date(CourseDailyMetrics.objects.first().date_for) == as_date(date_for)
예제 #3
0
def test_pipeline_date_for_rule_get_date_in_past(days_in_past):
    """Ensure function  under test returns a date instance of the arg date
    """
    arg_datetime = datetime.utcnow().replace(tzinfo=utc) - timedelta(
        days=days_in_past)
    expected_date = as_date(arg_datetime)
    run_pipeline_date_for_rule_asserts(arg_datetime, expected_date)
예제 #4
0
def populate_single_cdm(course_id,
                        date_for=None,
                        ed_next=False,
                        force_update=False):
    """Populates a CourseDailyMetrics record for the given date and course

    The calling function is responsible for error handling calls to this
    function
    """
    if date_for:
        date_for = as_date(date_for)

    # Provide info in celery log
    learner_count = CourseEnrollment.objects.filter(
        course_id=as_course_key(course_id)).count()
    msg = 'populate_single_cdm. course id = "{}", learner count={}'.format(
        course_id, learner_count)
    logger.debug(msg)

    start_time = time.time()

    cdm_obj, _created = CourseDailyMetricsLoader(course_id).load(
        date_for=date_for, ed_next=ed_next, force_update=force_update)
    elapsed_time = time.time() - start_time
    logger.debug('done. Elapsed time (seconds)={}. cdm_obj={}'.format(
        elapsed_time, cdm_obj))
예제 #5
0
파일: base.py 프로젝트: ponytojas/figures
 def get_date(self, date_str=None):
     '''Return a datetime.date from a string or NoneType.
     '''
     try:
         return helpers.as_date(date_str)
     except TypeError:
         return datetime.today().date()
예제 #6
0
def populate_daily_metrics(date_for=None, force_update=False):
    '''Populates the daily metrics models for the given date

    This method populates CourseDailyMetrics for all the courses in the site,
    then populates SiteDailyMetrics

    It calls the individual tasks, ``populate_single_cdm`` and
    ``populate_site_daily_metrics`` as immediate calls so that no courses are
    missed when the site daily metrics record is populated.

    NOTE: We have an experimental task that runs the course populators in

    parallel, then when they are all done, populates the site metrics. See the
    function ``experimental_populate_daily_metrics`` docstring for details

    TODO: Add error handling and error logging
    TODO: Create and add decorator to assign 'date_for' if None
    '''
    if date_for:
        date_for = as_date(date_for)
    else:
        date_for = datetime.datetime.utcnow().replace(tzinfo=utc).date()

    logger.info(
        'Starting task "figures.populate_daily_metrics" for date "{}"'.format(
            date_for))

    for course in CourseOverview.objects.all():
        populate_single_cdm(course_id=course.id,
                            date_for=date_for,
                            force_update=force_update)
    populate_site_daily_metrics(date_for=date_for, force_update=force_update)
    logger.info(
        'Finished task "figures.populate_daily_metrics" for date "{}"'.format(
            date_for))
예제 #7
0
def get_course_mau_history_metrics(site, course_id, date_for, months_back):
    """Quick copy/modification of 'get_monthly_history_metric' for Course MAU
    """
    date_for = as_date(date_for)
    history = []

    for year, month, _ in previous_months_iterator(
            month_for=date_for,
            months_back=months_back,
    ):

        period = '{year}/{month}'.format(year=year, month=str(month).zfill(2))
        active_users = get_mau_from_site_course(site=site,
                                                course_id=course_id,
                                                year=year,
                                                month=month)
        history.append(dict(
            period=period,
            value=active_users.count(),
        ))

    if history:
        # use the last entry
        current_month = history[-1]['value']
    else:
        # This should work for float too since '0 == 0.0' resolves to True
        current_month = 0
    return dict(current_month=current_month, history=history)
예제 #8
0
파일: tasks.py 프로젝트: regisb/figures
def populate_daily_metrics(date_for=None, force_update=False):
    '''Populates the daily metrics models for the given date

    This method populates CourseDailyMetrics for all the courses in the site,
    then populates SiteDailyMetrics

    It calls the individual tasks, ``populate_single_cdm`` and
    ``populate_site_daily_metrics`` as immediate calls so that no courses are
    missed when the site daily metrics record is populated.

    NOTE: We have an experimental task that runs the course populators in

    parallel, then when they are all done, populates the site metrics. See the
    function ``experimental_populate_daily_metrics`` docstring for details

    TODO: Add error handling and error logging
    TODO: Create and add decorator to assign 'date_for' if None
    '''
    if date_for:
        date_for = as_date(date_for)
    else:
        date_for = datetime.datetime.utcnow().replace(tzinfo=utc).date()

    logger.info(
        'Starting task "figures.populate_daily_metrics" for date "{}"'.format(
            date_for))

    for site in Site.objects.all():
        for course in figures.sites.get_courses_for_site(site):
            try:
                populate_single_cdm(course_id=course.id,
                                    date_for=date_for,
                                    force_update=force_update)
            except Exception as e:
                logger.exception('figures.tasks.populate_daily_metrics failed')
                # Always capture CDM load exceptions to the Figures pipeline
                # error table
                error_data = dict(
                    date_for=date_for,
                    msg='figures.tasks.populate_daily_metrics failed',
                    exception_class=e.__class__.__name__,
                )
                if hasattr(e, 'message_dict'):
                    error_data['message_dict'] = e.message_dict
                log_error_to_db(
                    error_data=error_data,
                    error_type=PipelineError.COURSE_DATA,
                    course_id=str(course.id),
                    site=site,
                    logger=logger,
                    log_pipeline_errors_to_db=True,
                )
        populate_site_daily_metrics(site_id=site.id,
                                    date_for=date_for,
                                    force_update=force_update)

    logger.info(
        'Finished task "figures.populate_daily_metrics" for date "{}"'.format(
            date_for))
예제 #9
0
def get_monthly_history_metric(func,
                               site,
                               date_for,
                               months_back,
                               include_current_in_history=True):  # pylint: disable=unused-argument
    """Convenience method to retrieve current and historic data

    Convenience function to populate monthly metrics data with history. Purpose
    is to provide a time series list of values for a particular metrics going
    back N months
    :param func: the function we call for each time point
    :param date_for: The most recent date for which we generate data. This is
    the "current month"
    :param months_back: How many months back to retrieve data
    :param include_current_in_history: flag to include the current month as well
    as previous months
    :type func: Python function
    :type date_for: datetime.datetime, datetime.date, or date as a string
    :type months_back: integer
    :type include_current_in_history: boolean
    :return: a dict with two keys. ``current_month`` contains the monthly
    metrics for the month in ``date_for``. ``history`` contains a list of metrics
    for the current period and perids going back ``months_back``
    :rtype: dict
    Each list item contains two keys, ``period``, containing the year and month
    for the data and ``value`` containing the numeric value of the data

    """
    date_for = as_date(date_for)
    history = []

    for month in previous_months_iterator(
            month_for=date_for,
            months_back=months_back,
    ):
        period = period_str(month)
        value = func(
            site=site,
            start_date=datetime.date(month[0], month[1], 1),
            end_date=datetime.date(month[0], month[1], month[2]),
        )
        history.append(dict(
            period=period,
            value=value,
        ))

    if history:
        # use the last entry
        current_month = history[-1]['value']
    else:
        # This should work for float too since '0 == 0.0' resolves to True
        current_month = 0
    return dict(
        current_month=current_month,
        history=history,
    )
def test_most_recent_with_data(db):
    """Make sure the query works with a couple of existing models

    We create two LearnerCourseGradeMetrics models and test that the function
    retrieves the newer one
    """
    user = UserFactory()
    first_date = as_date('2020-02-02')
    second_date = as_date('2020-04-01')
    course_overview = CourseOverviewFactory()
    older_lcgm = LearnerCourseGradeMetricsFactory(user=user,
                                                  course_id=str(course_overview.id),
                                                  date_for=first_date)
    newer_lcgm = LearnerCourseGradeMetricsFactory(user=user,
                                                  course_id=str(course_overview.id),
                                                  date_for=second_date)
    assert older_lcgm.date_for != newer_lcgm.date_for
    obj = LearnerCourseGradeMetrics.objects.most_recent_for_learner_course(
        user=user, course_id=course_overview.id)
    assert obj == newer_lcgm
예제 #11
0
파일: tasks.py 프로젝트: fava9410/figures
def experimental_populate_daily_metrics(date_for=None, force_update=False):
    '''Experimental task to populate daily metrics

    WARNING: In Ginkgo devstack, this task tends to gets stuck in the middle of
    processing course metrics. Not all the courses get processed and the site
    metrics doesn't get called.

    We're keeping it in the tasks so that we can continue to debug this.
    Enabling parallel course tasks will improve the pipeline performance

    '''
    def include_course(course_overview, threshold=50):
        '''This function let's us skip over courses with many enrollments, speeding
        up testing. Do not use for production
        '''
        count = CourseEnrollment.objects.filter(
            course_id=course_overview.id).count()
        return False if count > threshold else True

    if date_for:
        date_for = as_date(date_for)
    else:
        date_for = datetime.datetime.utcnow().replace(tzinfo=utc).date()
    date_for = date_for.strftime("%Y-%m-%d")
    logger.info(
        'Starting task "figures.experimental_populate_daily_metrics" for date "{}"'
        .format(date_for))

    courses = CourseOverview.objects.all()
    cdm_tasks = [
        populate_single_cdm.s(
            course_id=unicode(course.id),  # noqa: F821
            date_for=date_for,
            force_update=force_update) for course in courses
        if include_course(course)
    ]
    results = chord(cdm_tasks)(populate_site_daily_metrics.s(
        date_for=date_for, force_update=force_update))

    # TODO: Are we going to update the SDM for the day if
    # * course records were created, meaning there are data not added to the SDM
    # * the SDM record already exists
    # * force_update is not true

    logger.info(
        'Finished task "figures.experimental_populate_daily_metrics" for date "{}"'
        .format(date_for))

    return results
예제 #12
0
    def student_modules_active_on_date(self, date_for):
        """Returns StudentModule queryset active on the date
        Active is if there was a `created` or `modified` field for the given date

        NOTE: We need to do this instead of simplly `modified__date=date_for`
        because we still have to support Django 1.8/Ginkgo
        """
        date_for = as_date(date_for)
        q_created = Q(created__year=date_for.year,
                      created__month=date_for.month,
                      created__day=date_for.day)
        q_modified = Q(modified__year=date_for.year,
                       modified__month=date_for.month,
                       modified__day=date_for.day)
        return self.student_modules.filter(q_created | q_modified)
예제 #13
0
def test_mau_1g_for_month_as_of_day_first_day_next_month(db):
    """
    Test getting live MAU 1G values from StudentModule for the given day

    Quick-n-dirty data setup:

    We want to make sure we get the right records when the query happens on the
    first day of the next month. So we do the following

    * Add StudentModule records for the month before we want to capture records
    * Add StudentModule records for the month we want to capture records
    * Add StudentModule records for the month after we want to capture records

    This sets up the scenario that we run the daily pipeline to capture MAU
    "as of" yesterday (the last day of the previous month) to capture MAU for
    the previous month and not capture any records before the previous month,
    nor capture records for the "current month"
    """
    month_before = [as_datetime('2020-02-02'), as_datetime('2020-02-29')]
    month_after = [as_datetime('2020-04-01'), as_datetime('2020-04-01 12:00')]
    in_month = [
        as_datetime('2020-03-01'),
        as_datetime('2020-03-15'),
        as_datetime('2020-03-31'),
        as_datetime('2020-03-31 12:00')
    ]
    date_for = as_date('2020-03-31')

    # Create student modules for the month before, month after, and in the
    # month for which we want to retrieve records
    [StudentModuleFactory(created=dt, modified=dt) for dt in month_before]
    [StudentModuleFactory(created=dt, modified=dt) for dt in month_after]
    sm_in = [
        StudentModuleFactory(created=rec, modified=rec) for rec in in_month
    ]
    expected_user_ids = [obj.student_id for obj in sm_in]

    sm_queryset = StudentModule.objects.all()
    user_ids = mau_1g_for_month_as_of_day(sm_queryset=sm_queryset,
                                          date_for=date_for)
    assert len(user_ids) == len(in_month)
    assert set([rec['student__id']
                for rec in user_ids]) == set(expected_user_ids)
예제 #14
0
def populate_single_cdm(course_id, date_for=None, force_update=False):
    '''Populates a CourseDailyMetrics record for the given date and course
    '''
    if date_for:
        date_for = as_date(date_for)

    # Provide info in celery log
    learner_count = CourseEnrollment.objects.filter(
        course_id=as_course_key(course_id)).count()
    msg = 'populate_single_cdm. course id = "{}", learner count={}'.format(
        course_id, learner_count)
    logger.info(msg)

    start_time = time.time()
    cdm_obj, created = CourseDailyMetricsLoader(course_id).load(
        date_for=date_for, force_update=force_update)
    elapsed_time = time.time() - start_time
    logger.info('done. Elapsed time (seconds)={}. cdm_obj={}'.format(
        elapsed_time, cdm_obj))
예제 #15
0
파일: tasks.py 프로젝트: ponytojas/figures
def populate_course_mau(site_id, course_id, month_for=None, force_update=False):
    """Populates the MAU for the given site, course, and month
    """
    if month_for:
        month_for = as_date(month_for)
    else:
        month_for = datetime.datetime.utcnow().date()
    site = Site.objects.get(id=site_id)
    start_time = time.time()
    obj, _created = collect_course_mau(site=site,
                                       courselike=course_id,
                                       month_for=month_for,
                                       overwrite=force_update)
    if not obj:
        msg = 'populate_course_mau failed for course {course_id}'.format(
            course_id=str(course_id))
        logger.error(msg)
    elapsed_time = time.time() - start_time
    logger.info('populate_course_mau Elapsed time (seconds)={}. cdm_obj={}'.format(
        elapsed_time, obj))
예제 #16
0
def pipeline_date_for_rule(date_for):
    """Common logic to assign the 'date_for' date for daily pipeline processing

    * If 'date_for' is 'None' or today, then this function returns a
      'datetime.date' instance for yesterday
    * If 'date_for' is a date in the past, this function returns the
      'datetime.date' representation of the date
    * If 'date_for' is in the future, then `DateForCannotBeFutureError` is
      raised

    As part of normal Figures data collection, the pipeline must collect data
    from the previous calendar day, assuming all timestamps are UTC.
    This is to build a complete picture of a 24 hour period.

    This function exists to have this logic in a single place in the code.
    This logic is specific to the pipeline so it belongs in Figures' pipeline
    namespce.

    We may rework this as a decorator or as part of core functionality in a
    base class from which daily metrics classes can derive.
    """
    today = datetime.utcnow().replace(tzinfo=utc).date()
    if not date_for:
        date_for = prev_day(today)
    else:
        # Because we are working on the calendar day and the daily metrics
        # models use date and not datetime for the 'date_for' fields
        date_for = as_date(date_for)

        # Either we are backfilling data (if the date is prior to yesterday)
        # or the caller explicity requests to process for yesterday
        if date_for > today:
            msg = 'Attempted pipeline call with future date: "{date_for}"'
            raise DateForCannotBeFutureError(msg.format(date_for=date_for))
        elif date_for == today:
            return prev_day(today)

    return date_for
예제 #17
0
 def test_get_now_from_unicode(self):
     format = '%Y-%m-%d'
     a_date_str = six.text_type(self.now.strftime(format))
     expected = self.now.date()
     assert isinstance(a_date_str, six.text_type)
     assert as_date(a_date_str) == expected
예제 #18
0
파일: metrics.py 프로젝트: Bhanditz/figures
def get_monthly_site_metrics(date_for=None, **kwargs):
    """Gets current metrics with history

    Arg: date_for - if specified, uses that date as the 'current' date
    Useful for testing and for looking at past days as 'today'
    TODO: Add site filter for multi-tenancy

    {
      "monthly_active_users": {
        "current_month": 1323,
        "history": [
          {
            "period": "April 2018 (best to be some standardised Date format that I can parse)",
            "value": 1022,
          },
          {
            "period": "March 2018",
            "value": 1022,
          },
          ...
        ]
      },
      "total_site_users": {
        // represents total number of registered users for org/site
        "current": 4931,
        "history": [
          {
            "period": "April 2018",
            "value": 4899,
          },
          ...
        ]
      },
      "total_site_courses": {
        "current": 19,
        "history": [
          {
            "period": "April 2018",
            "value": 17,
          },
          ...
        ]
      },
      "total_course_enrollments": {
        // sum of number of users enrolled in all courses
        "current": 7911,
        "history": [
          {
            "period": "April 2018",
            "value": 5911,
          },
          ...
        ]
      },
      "total_course_completions": {
        // number of times user has completed a course in this month
        "current": 129,
        "history": [
          {
            "period": "April 2018",
            "value": 101,
          },
          ...
        ]
      }
    }
    """

    if date_for:
        date_for = as_date(date_for)
    else:
        date_for = datetime.datetime.utcnow().date()

    months_back = kwargs.get('months_back', 6)  # Warning: magic number

    ##
    # Brute force this for now. Later, refactor to define getters externally,
    # and rely more on the serializers to stitch data together to respond
    ##
    # Then, we can put the method calls into a dict, load the dict from
    # settings, for example, or a Django model

    # We are retrieving data here in series before constructing the return dict
    # This makes it easier to inspect
    monthly_active_users = get_monthly_history_metric(
        func=get_active_users_for_time_period,
        date_for=date_for,
        months_back=months_back,
    )
    total_site_users = get_monthly_history_metric(
        func=get_total_site_users_for_time_period,
        date_for=date_for,
        months_back=months_back,
    )
    total_site_coures = get_monthly_history_metric(
        func=get_total_site_courses_for_time_period,
        date_for=date_for,
        months_back=months_back,
    )
    total_course_enrollments = get_monthly_history_metric(
        func=get_total_enrollments_for_time_period,
        date_for=date_for,
        months_back=months_back,
    )
    total_course_completions = get_monthly_history_metric(
        func=get_total_course_completions_for_time_period,
        date_for=date_for,
        months_back=months_back,
    )

    return dict(
        monthly_active_users=monthly_active_users,
        total_site_users=total_site_users,
        total_site_coures=total_site_coures,
        total_course_enrollments=total_course_enrollments,
        total_course_completions=total_course_completions,
    )
예제 #19
0
            created,
        )

    monkeypatch.setattr(
        'figures.pipeline.site_daily_metrics.SiteDailyMetricsLoader.load',
        mock_sdm_load)

    populate_single_sdm(site.id, date_for=date_for)

    assert SiteDailyMetrics.objects.count() == 1


@pytest.mark.parametrize(
    'date_for',
    ['2020-12-12',
     as_date('2020-12-12'),
     as_datetime('2020-12-12')])
def test_populate_daily_metrics_for_site_basic(transactional_db, monkeypatch,
                                               date_for):
    site = SiteFactory()
    course_ids = ['fake-course-1', 'fake-course-2']
    collected_course_ids = []

    def fake_populate_single_cdm(course_id, **_kwargs):
        collected_course_ids.append(course_id)

    def fake_populate_single_sdm(site_id, **_kwargs):
        assert site_id == site.id

    monkeypatch.setattr('figures.tasks.site_course_ids',
                        lambda site: course_ids)
예제 #20
0
def populate_daily_metrics(date_for=None, force_update=False):
    '''Populates the daily metrics models for the given date

    This method populates CourseDailyMetrics for all the courses in the site,
    then populates SiteDailyMetrics

    It calls the individual tasks, ``populate_single_cdm`` and
    ``populate_site_daily_metrics`` as immediate calls so that no courses are
    missed when the site daily metrics record is populated.

    NOTE: We have an experimental task that runs the course populators in

    parallel, then when they are all done, populates the site metrics. See the
    function ``experimental_populate_daily_metrics`` docstring for details

    TODO: Add error handling and error logging
    TODO: Create and add decorator to assign 'date_for' if None
    '''
    if date_for:
        date_for = as_date(date_for)
    else:
        date_for = datetime.datetime.utcnow().replace(tzinfo=utc).date()

    logger.info(
        'Starting task "figures.populate_daily_metrics" for date "{}"'.format(
            date_for))

    sites_count = Site.objects.count()
    for i, site in enumerate(Site.objects.all()):
        try:
            for course in figures.sites.get_courses_for_site(site):
                try:
                    populate_single_cdm(course_id=course.id,
                                        date_for=date_for,
                                        force_update=force_update)
                except Exception as e:  # pylint: disable=broad-except
                    logger.exception(
                        'figures.tasks.populate_daily_metrics failed')
                    # Always capture CDM load exceptions to the Figures pipeline
                    # error table
                    error_data = dict(
                        date_for=date_for,
                        msg='figures.tasks.populate_daily_metrics failed',
                        exception_class=e.__class__.__name__,
                    )
                    if hasattr(e, 'message_dict'):
                        error_data['message_dict'] = e.message_dict  # pylint: disable=no-member
                    log_error_to_db(
                        error_data=error_data,
                        error_type=PipelineError.COURSE_DATA,
                        course_id=str(course.id),
                        site=site,
                        logger=logger,
                        log_pipeline_errors_to_db=True,
                    )
            populate_site_daily_metrics(site_id=site.id,
                                        date_for=date_for,
                                        force_update=force_update)

            # Until we implement signal triggers
            try:
                update_enrollment_data(site_id=site.id)
            except Exception:  # pylint: disable=broad-except
                msg = ('FIGURES:FAIL figures.tasks update_enrollment_data '
                       ' unhandled exception. site[{}]:{}')
                logger.exception(msg.format(site.id, site.domain))

        except Exception:  # pylint: disable=broad-except
            msg = ('FIGURES:FAIL populate_daily_metrics unhandled site level'
                   ' exception for site[{}]={}')
            logger.exception(msg.format(site.id, site.domain))
        logger.info(
            "figures.populate_daily_metrics: finished Site {:04d} of {:04d}".
            format(i, sites_count))
    logger.info(
        'Finished task "figures.populate_daily_metrics" for date "{}"'.format(
            date_for))
예제 #21
0
파일: tasks.py 프로젝트: ponytojas/figures
def populate_daily_metrics(site_id=None, date_for=None, force_update=False):
    """Runs Figures daily metrics collection

    This is a top level Celery task run every 24 hours to collect metrics.

    It iterates over each site to populate CourseDailyMetrics records for the
    courses in each site, then populates that site's SiteDailyMetrics record.

    Developer note: Errors need to be handled at each layer in the call chain
    1. Site
    2. Course
    3. Learner
    and for any auxiliary data collection that may be added in the future to
    this task. Those need to be wrapped in `try/ecxcept` blocks too

    This function will get reworked so that each site runs in its own
    """
    if waffle.switch_is_active(WAFFLE_DISABLE_PIPELINE):
        logger.warning('Figures pipeline is disabled due to %s being active.',
                       WAFFLE_DISABLE_PIPELINE)
        return

    # The date_for handling is very similar to the new rule we ahve in
    # `figures.pipeline.helpers.pipeline_data_for_rule`
    # The difference is the following code does not set 'date_for' as yesterday
    # So we likely want to rework the pipeline rule function and this code
    # so that we have a generalized date_for rule that can take an optional
    # transform function, like `prev_day`

    today = datetime.datetime.utcnow().replace(tzinfo=utc).date()
    # TODO: Decide if/how we want any special logging if we get an exception
    # on 'casting' the date_for argument as a datetime.date object
    if date_for:
        date_for = as_date(date_for)
        if date_for > today:
            msg = '{prefix}:ERROR - Attempted pipeline call with future date: "{date_for}"'
            raise DateForCannotBeFutureError(msg.format(prefix=FPD_LOG_PREFIX,
                                                        date_for=date_for))
        # Don't update enrollment data if we are backfilling (loading data for
        # previous dates) as it is expensive
    else:
        date_for = today

    do_update_enrollment_data = False if date_for < today else True
    if site_id is not None:
        sites = get_sites_by_id((site_id, ))
    else:
        sites = get_sites()
    sites_count = sites.count()

    # This is our task entry log message
    msg = '{prefix}:START:date_for={date_for}, site_count={site_count}'
    logger.info(msg.format(prefix=FPD_LOG_PREFIX,
                           date_for=date_for,
                           site_count=sites_count))

    if is_past_date(date_for):
        msg = ('{prefix}:INFO - CourseDailyMetrics.average_progress will not be '
               'calculated for past date {date_for}')
        logger.info(msg.format(date_for=date_for, prefix=FPD_LOG_PREFIX))

    for i, site in enumerate(sites):

        msg = '{prefix}:SITE:START:{id}:{domain} - Site {i:04d} of {n:04d}'
        logger.info(msg.format(prefix=FPD_LOG_PREFIX,
                               id=site.id,
                               domain=site.domain,
                               i=i,
                               n=sites_count))
        try:
            populate_daily_metrics_for_site(site_id=site.id,
                                            date_for=date_for,
                                            force_update=force_update)

        except Exception:  # pylint: disable=broad-except
            msg = ('{prefix}:FAIL populate_daily_metrics unhandled site level'
                   ' exception for site[{site_id}]={domain}')
            logger.exception(msg.format(prefix=FPD_LOG_PREFIX,
                                        site_id=site.id,
                                        domain=site.domain))

        # Until we implement signal triggers
        if do_update_enrollment_data:
            try:
                update_enrollment_data(site_id=site.id)
            except Exception:  # pylint: disable=broad-except
                msg = ('{prefix}:FAIL figures.tasks update_enrollment_data '
                       ' unhandled exception. site[{site_id}]:{domain}')
                logger.exception(msg.format(prefix=FPD_LOG_PREFIX,
                                            site_id=site.id,
                                            domain=site.domain))

        msg = '{prefix}:SITE:END:{id}:{domain} - Site {i:04d} of {n:04d}'
        logger.info(msg.format(prefix=FPD_LOG_PREFIX,
                               id=site.id,
                               domain=site.domain,
                               i=i,
                               n=sites_count))

    msg = '{prefix}:END:date_for={date_for}, site_count={site_count}'
    logger.info(msg.format(prefix=FPD_LOG_PREFIX,
                           date_for=date_for,
                           site_count=sites_count))
예제 #22
0
 def test_get_now_from_str(self):
     format = '%Y-%m-%d'
     a_date_str = self.now.strftime(format)
     expected = self.now.date()
     assert isinstance(a_date_str, str)
     assert as_date(a_date_str) == expected
예제 #23
0
 def test_get_now_from_invalid_string(self):
     with pytest.raises(ValueError):
         as_date('Hello World')
예제 #24
0
 def test_get_now_from_invalid_type(self):
     with pytest.raises(TypeError):
         as_date(dict(foo='bar'))
예제 #25
0
def get_monthly_site_metrics(site, date_for=None, **kwargs):
    """Gets current metrics with history

    :param site: The site object for which to collect site metrics
    :param date_for: The date for which to collect site metrics. Optional.
                     Defaults to current system date if not specified
    :type site: django.contrib.sites.models.Site
    :type date_for: datetime.datetime, datetime.date, or date as a string
    :return: Site metrics for a a month ending on the ``date_for`` or "today"
    if date_for is not specified
    :rtype: dict

    {
      "monthly_active_users": {
        "current_month": 1323,
        "history": [
          {
            "period": "April 2018 (best to be some standardised Date format that I can parse)",
            "value": 1022,
          },
          {
            "period": "March 2018",
            "value": 1022,
          },
          ...
        ]
      },
      "total_site_users": {
        // represents total number of registered users for org/site
        "current": 4931,
        "history": [
          {
            "period": "April 2018",
            "value": 4899,
          },
          ...
        ]
      },
      "total_site_courses": {
        "current": 19,
        "history": [
          {
            "period": "April 2018",
            "value": 17,
          },
          ...
        ]
      },
      "total_course_enrollments": {
        // sum of number of users enrolled in all courses
        "current": 7911,
        "history": [
          {
            "period": "April 2018",
            "value": 5911,
          },
          ...
        ]
      },
      "total_course_completions": {
        // number of times user has completed a course in this month
        "current": 129,
        "history": [
          {
            "period": "April 2018",
            "value": 101,
          },
          ...
        ]
      }
    }
    """

    if date_for:
        date_for = as_date(date_for)
    else:
        date_for = datetime.datetime.utcnow().date()

    months_back = kwargs.get('months_back', 6)  # Warning: magic number

    ##
    # Brute force this for now. Later, refactor to define getters externally,
    # and rely more on the serializers to stitch data together to respond
    ##
    # Then, we can put the method calls into a dict, load the dict from
    # settings, for example, or a Django model

    # We are retrieving data here in series before constructing the return dict
    # This makes it easier to inspect
    monthly_active_users = get_monthly_history_metric(
        func=get_active_users_for_time_period,
        site=site,
        date_for=date_for,
        months_back=months_back,
    )
    total_site_users = get_monthly_history_metric(
        func=get_total_site_users_for_time_period,
        site=site,
        date_for=date_for,
        months_back=months_back,
    )
    total_site_courses = get_monthly_history_metric(
        func=get_total_site_courses_for_time_period,
        site=site,
        date_for=date_for,
        months_back=months_back,
    )
    total_course_enrollments = get_monthly_history_metric(
        func=get_total_enrollments_for_time_period,
        site=site,
        date_for=date_for,
        months_back=months_back,
    )
    total_course_completions = get_monthly_history_metric(
        func=get_total_course_completions_for_time_period,
        site=site,
        date_for=date_for,
        months_back=months_back,
    )

    return dict(
        monthly_active_users=monthly_active_users,
        total_site_users=total_site_users,
        total_site_courses=total_site_courses,
        total_course_enrollments=total_course_enrollments,
        total_course_completions=total_course_completions,
    )
예제 #26
0
 def test_get_now_from_datetime(self):
     expected = self.now.date()
     assert isinstance(self.now, datetime.datetime)
     assert as_date(self.now) == expected