def backfill_monthly_metrics_for_site(site, overwrite=False, use_raw_sql=False): """Backfill specified months' historical site metrics for the specified site """ site_sm = get_student_modules_for_site(site) if not site_sm: return None first_created = site_sm.order_by('created').first().created start_month = datetime(year=first_created.year, month=first_created.month, day=1, tzinfo=utc) last_month = datetime.utcnow().replace(tzinfo=utc) - relativedelta( months=1) backfilled = [] for dt in rrule(freq=MONTHLY, dtstart=start_month, until=last_month): obj, created = fill_month(site=site, month_for=dt, student_modules=site_sm, overwrite=overwrite, use_raw=use_raw_sql) backfilled.append(dict(obj=obj, created=created, dt=dt)) return backfilled
def store_mau_metrics(site, overwrite=False): """ Save "snapshot" of MAU metrics """ today = datetime.utcnow() # get site data student_modules = get_student_modules_for_site(site) site_mau = get_mau_from_student_modules(student_modules=student_modules, year=today.year, month=today.month) # store site data site_mau_obj, _created = SiteMauMetrics.save_metrics( site=site, date_for=today.date(), data=dict(mau=site_mau.count()), overwrite=overwrite) course_mau_objects = [] for course_key in get_course_keys_for_site(site): course_student_modules = student_modules.filter(course_id=course_key) course_mau = get_mau_from_student_modules( student_modules=course_student_modules, year=today.year, month=today.month) course_mau_obj, _created = CourseMauMetrics.save_metrics( site=site, course_id=str(course_key), date_for=today.date(), data=dict(mau=course_mau.count()), overwrite=overwrite) course_mau_objects.append(course_mau_obj) return dict(smo=site_mau_obj, cmos=course_mau_objects)
def test_get_mau_from_sm_for_site(sm_test_data): sm = get_student_modules_for_site(sm_test_data['site']) users = get_mau_from_student_modules(student_modules=sm, year=2019, month=10) sm_check = sm.values_list('student__id', flat=True).distinct() assert set(users) == set(sm_check)
def site_mau_1g_for_month_as_of_day(site, date_for): """Get the MAU for the given site, as of the "date_for" in the month This is a conenvience function. It gets the student modules for the site, then calls `figures.mau.mau_for_month_as_of_day(...)` Returns a queryset with distinct user ids """ site_sm = get_student_modules_for_site(site) return mau_1g_for_month_as_of_day(sm_queryset=site_sm, date_for=date_for)
def retrieve_live_site_mau_data(site): """ Used this when we need to retrieve unique active users for the whole site """ student_modules = get_student_modules_for_site(site) today = datetime.utcnow() users = get_mau_from_student_modules(student_modules=student_modules, year=today.year, month=today.month) return dict( count=users.count(), month_for=today.date(), domain=site.domain, )
def get_site_active_users_for_date(site, date_for): ''' Get the active users ids for the given site and date We do this by filtering StudentModule for courses in the site, then for StudentModule records filtered for the date, then we get the distinct user ids ''' student_modules = get_student_modules_for_site(site) # For Ginkgo backward compatibility, Django 1.8 does not support # `modified__date=<some_value>` in filters. Therefore, we need to match # each date field return student_modules.filter(modified__year=date_for.year, modified__month=date_for.month, modified__day=date_for.day).values_list( 'student__id', flat=True).distinct()
def fill_month(site, month_for, student_modules=None, overwrite=False, use_raw=False): """Fill a month's site monthly metrics for the specified site """ if not student_modules: student_modules = get_student_modules_for_site(site) if student_modules: if not use_raw: month_sm = student_modules.filter(modified__year=month_for.year, modified__month=month_for.month) mau_count = month_sm.values_list('student_id', flat=True).distinct().count() else: if RELEASE_LINE == 'ginkgo': site_ids = tuple([ int(sid) for sid in student_modules.values_list( 'id', flat=True).distinct() ]) else: # make sure we get integers and not longints from db from django.db.models.functions import Cast site_ids = tuple( student_modules.annotate( id_as_int=Cast('id', IntegerField())).values_list( 'id_as_int', flat=True).distinct()) statement = _get_fill_month_raw_sql_for_month(site_ids, month_for) with connection.cursor() as cursor: cursor.execute(statement) row = cursor.fetchone() mau_count = row[0] else: mau_count = 0 obj, created = SiteMonthlyMetrics.add_month(site=site, year=month_for.year, month=month_for.month, active_user_count=mau_count, overwrite=overwrite) return obj, created
def backfill_monthly_metrics_for_site(site, overwrite): """Quick hack function to backfill all historical site metrics for the site We are a bit verbose with the output to help testing and validation since this function was quickly put together """ site_sm = get_student_modules_for_site(site) if not site_sm: return None first_created = site_sm.order_by('created').first().created # We do this because there _might_ be a bug in `dateutil.rrule`. It was # skipping over February when we used the `created` field directly for the # start_month variable start_month = datetime(year=first_created.year, month=first_created.month, day=1).replace(tzinfo=utc) last_month = datetime.utcnow().replace(tzinfo=utc) - relativedelta( months=1) backfilled = [] for dt in rrule(freq=MONTHLY, dtstart=start_month, until=last_month): mau = get_mau_from_student_modules(student_modules=site_sm, year=dt.year, month=dt.month) month_sm = site_sm.filter(created__year=dt.year, created__month=dt.month) month_learners = month_sm.values_list('student__id', flat=True).distinct() obj, created = SiteMonthlyMetrics.add_month( site=site, year=dt.year, month=dt.month, active_user_count=month_learners.count(), overwrite=overwrite) backfill_rec = dict(obj=obj, created=created, dt=dt, mau=mau) backfilled.append(backfill_rec) return backfilled