Ejemplo n.º 1
0
def run_figures_monthly_metrics():
    """
    Populate monthly metrics for all sites.
    """
    logger.info('Starting figures.tasks.run_figures_monthly_metrics...')
    for site in get_sites():
        populate_monthly_metrics_for_site.delay(site_id=site.id)
Ejemplo n.º 2
0
def backfill_figures_ed():
    results = dict()
    for site in get_sites():
        print('Backfilling enrollment data for site "{}"'.format(site.domain))
        site_ed = backfill_enrollment_data_for_site(site)
        results[site.id] = site_ed
    return results
Ejemplo n.º 3
0
def populate_daily_metrics_next(site_id=None, force_update=False):
    """Next iteration to collect daily metrics for all sites in a deployment

    This is a top level Celery task run every 24 hours to update Figures data.

    * It updates Figures per-enrollment data and collect daily aggregate metrics
    * It's purpose is to collect new metrics on an ongoing basis and not serve
      dual purpose of collecting ongoing data AND backfilling data.
    * The driver for this change is to improve performance of the daily Celery jobs

    What's different?

    * Figures collects the enrollment data first, then aggregates daily data.

    TODO: Draft up public architecture docs and reference them here
    """
    if waffle.switch_is_active(WAFFLE_DISABLE_PIPELINE):
        logger.warning('Figures pipeline is disabled due to %s being active.',
                       WAFFLE_DISABLE_PIPELINE)
        return

    date_for = datetime.datetime.utcnow().date()
    if site_id is not None:
        sites = get_sites_by_id((site_id, ))
    else:
        sites = get_sites()
    sites_count = sites.count()
    # This is our task entry log message
    msg = '{prefix}:START:date_for={date_for}, site_count={site_count}'
    logger.info(
        msg.format(prefix=FPD_LOG_PREFIX,
                   date_for=date_for,
                   site_count=sites_count))
    for i, site in enumerate(sites):
        msg = '{prefix}:SITE:START:{id}:{domain} - Site {i:04d} of {n:04d}'
        logger.info(
            msg.format(prefix=FPD_LOG_PREFIX,
                       id=site.id,
                       domain=site.domain,
                       i=i,
                       n=sites_count))
        try:
            populate_daily_metrics_for_site(site_id=site.id,
                                            date_for=date_for,
                                            ed_next=True,
                                            force_update=force_update)
        except Exception:  # pylint: disable=broad-except
            msg = ('{prefix}:FAIL populate_daily_metrics unhandled site level'
                   ' exception for site[{site_id}]={domain}')
            logger.exception(
                msg.format(prefix=FPD_LOG_PREFIX,
                           site_id=site.id,
                           domain=site.domain))

    msg = '{prefix}:END:date_for={date_for}, site_count={site_count}'
    logger.info(
        msg.format(prefix=FPD_LOG_PREFIX,
                   date_for=date_for,
                   site_count=sites_count))
Ejemplo n.º 4
0
def populate_all_mau():
    """
    Top level task to kick off MAU collection

    Initially, run it every day to observe monthly active user accumulation for
    the month and evaluate the results
    """
    for site in get_sites():
        populate_mau_metrics_for_site(site_id=site.id, force_update=False)
    def handle(self, *args, **options):
        print('BEGIN: Backfill Figures Metrics')

        if options['site']:
            sites = [get_site(options['site'])]
        else:
            sites = get_sites()
        for site in sites:
            backfill_site(site, overwrite=options['overwrite'])

        print('DONE: Backfill Figures Metrics')
Ejemplo n.º 6
0
def run_figures_monthly_metrics():
    """
    Populate monthly metrics for all sites.
    """
    if waffle.switch_is_active(WAFFLE_DISABLE_PIPELINE):
        logger.info('Figures pipeline is disabled due to %s being active.',
                    WAFFLE_DISABLE_PIPELINE)
        return

    logger.info('Starting figures.tasks.run_figures_monthly_metrics...')
    all_sites_jobs = group(populate_monthly_metrics_for_site.s(site.id) for site in get_sites())
    all_sites_jobs.delay()
Ejemplo n.º 7
0
    def handle(self, *args, **options):
        print('BEGIN: Update Figures EnrollmentData')

        if options['site']:
            sites = [get_site(options['site'])]
        else:
            sites = get_sites()
        for site in sites:
            print('Updating EnrollmentData for site "{}"'.format(site.domain))
            if options['no_delay']:
                update_enrollment_data(site_id=site.id)
            else:
                update_enrollment_data.delay(
                    site_id=site.id)  # pragma: no cover

        print('DONE: Update Figures EnrollmentData')
Ejemplo n.º 8
0
 def get_site_ids(self, identifier=None):
     """Quick-n-dirty function to let the caller choose the site id or domain.
     If no identifier is passed, return all available Sites.
     Let the 'get' fail if record can't be found from the identifier.
     Returns Site ids for passing to Celery tasks.
     Note that at present, none of the tasks handle more than one specified Site.
     """
     if not identifier:
         sites = get_sites()
     else:
         try:
             filter_arg = dict(pk=int(identifier))
         except ValueError:
             filter_arg = dict(domain=identifier)
         sites = Site.objects.filter(**filter_arg)
     return [site.id for site in sites]
Ejemplo n.º 9
0
def populate_daily_metrics(site_id=None, date_for=None, force_update=False):
    """Runs Figures daily metrics collection

    This is a top level Celery task run every 24 hours to collect metrics.

    It iterates over each site to populate CourseDailyMetrics records for the
    courses in each site, then populates that site's SiteDailyMetrics record.

    Developer note: Errors need to be handled at each layer in the call chain
    1. Site
    2. Course
    3. Learner
    and for any auxiliary data collection that may be added in the future to
    this task. Those need to be wrapped in `try/ecxcept` blocks too

    This function will get reworked so that each site runs in its own
    """
    if waffle.switch_is_active(WAFFLE_DISABLE_PIPELINE):
        logger.warning('Figures pipeline is disabled due to %s being active.',
                       WAFFLE_DISABLE_PIPELINE)
        return

    # The date_for handling is very similar to the new rule we ahve in
    # `figures.pipeline.helpers.pipeline_data_for_rule`
    # The difference is the following code does not set 'date_for' as yesterday
    # So we likely want to rework the pipeline rule function and this code
    # so that we have a generalized date_for rule that can take an optional
    # transform function, like `prev_day`

    today = datetime.datetime.utcnow().replace(tzinfo=utc).date()
    # TODO: Decide if/how we want any special logging if we get an exception
    # on 'casting' the date_for argument as a datetime.date object
    if date_for:
        date_for = as_date(date_for)
        if date_for > today:
            msg = '{prefix}:ERROR - Attempted pipeline call with future date: "{date_for}"'
            raise DateForCannotBeFutureError(msg.format(prefix=FPD_LOG_PREFIX,
                                                        date_for=date_for))
        # Don't update enrollment data if we are backfilling (loading data for
        # previous dates) as it is expensive
    else:
        date_for = today

    do_update_enrollment_data = False if date_for < today else True
    if site_id is not None:
        sites = get_sites_by_id((site_id, ))
    else:
        sites = get_sites()
    sites_count = sites.count()

    # This is our task entry log message
    msg = '{prefix}:START:date_for={date_for}, site_count={site_count}'
    logger.info(msg.format(prefix=FPD_LOG_PREFIX,
                           date_for=date_for,
                           site_count=sites_count))

    if is_past_date(date_for):
        msg = ('{prefix}:INFO - CourseDailyMetrics.average_progress will not be '
               'calculated for past date {date_for}')
        logger.info(msg.format(date_for=date_for, prefix=FPD_LOG_PREFIX))

    for i, site in enumerate(sites):

        msg = '{prefix}:SITE:START:{id}:{domain} - Site {i:04d} of {n:04d}'
        logger.info(msg.format(prefix=FPD_LOG_PREFIX,
                               id=site.id,
                               domain=site.domain,
                               i=i,
                               n=sites_count))
        try:
            populate_daily_metrics_for_site(site_id=site.id,
                                            date_for=date_for,
                                            force_update=force_update)

        except Exception:  # pylint: disable=broad-except
            msg = ('{prefix}:FAIL populate_daily_metrics unhandled site level'
                   ' exception for site[{site_id}]={domain}')
            logger.exception(msg.format(prefix=FPD_LOG_PREFIX,
                                        site_id=site.id,
                                        domain=site.domain))

        # Until we implement signal triggers
        if do_update_enrollment_data:
            try:
                update_enrollment_data(site_id=site.id)
            except Exception:  # pylint: disable=broad-except
                msg = ('{prefix}:FAIL figures.tasks update_enrollment_data '
                       ' unhandled exception. site[{site_id}]:{domain}')
                logger.exception(msg.format(prefix=FPD_LOG_PREFIX,
                                            site_id=site.id,
                                            domain=site.domain))

        msg = '{prefix}:SITE:END:{id}:{domain} - Site {i:04d} of {n:04d}'
        logger.info(msg.format(prefix=FPD_LOG_PREFIX,
                               id=site.id,
                               domain=site.domain,
                               i=i,
                               n=sites_count))

    msg = '{prefix}:END:date_for={date_for}, site_count={site_count}'
    logger.info(msg.format(prefix=FPD_LOG_PREFIX,
                           date_for=date_for,
                           site_count=sites_count))