def update_addons_collections_downloads(): """Update addons+collections download totals.""" raise_if_reindex_in_progress('amo') d = (AddonCollectionCount.objects.values('addon', 'collection') .annotate(sum=Sum('count'))) ts = [tasks.update_addons_collections_downloads.subtask(args=[chunk]) for chunk in chunked(d, 100)] group(ts).apply_async()
def update_addons_collections_downloads(): """Update addons+collections download totals.""" raise_if_reindex_in_progress('amo') d = (AddonCollectionCount.objects.values('addon', 'collection') .annotate(sum=Sum('count'))) ts = [tasks.update_addons_collections_downloads.subtask(args=[chunk]) for chunk in chunked(d, 100)] TaskSet(ts).apply_async()
def index_latest_stats(index=None): def fmt(d): return d.strftime('%Y-%m-%d') raise_if_reindex_in_progress('amo') latest = DownloadCount.search(index).order_by('-date').values_dict('date') if latest: latest = latest[0]['date'] else: latest = fmt(datetime.date.today() - datetime.timedelta(days=1)) date_range = '%s:%s' % (latest, fmt(datetime.date.today())) log.info('index_stats --date=%s' % date_range) call_command('index_stats', addons=None, date=date_range)
def index_latest_stats(index=None): if not waffle.switch_is_active('local-statistics-processing'): return False def fmt(d): return d.strftime('%Y-%m-%d') raise_if_reindex_in_progress('amo') latest = UpdateCount.search(index).order_by('-date').values_dict('date') if latest: latest = latest[0]['date'] else: latest = fmt(datetime.date.today() - datetime.timedelta(days=1)) date_range = '%s:%s' % (latest, fmt(datetime.date.today())) cron_log.info('index_stats --date=%s' % date_range) call_command('index_stats', addons=None, date=date_range)
def weekly_downloads(): """ Update 7-day add-on download counts. """ if not waffle.switch_is_active('local-statistics-processing'): return False raise_if_reindex_in_progress('amo') with connection.cursor() as cursor: cursor.execute(""" SELECT addon_id, SUM(count) AS weekly_count FROM download_counts WHERE `date` >= DATE_SUB(CURDATE(), INTERVAL 7 DAY) GROUP BY addon_id ORDER BY addon_id""") counts = cursor.fetchall() addon_ids = [r[0] for r in counts] if not addon_ids: return with connection.cursor() as cursor: cursor.execute( """ SELECT id, 0 FROM addons WHERE id NOT IN %s""", (addon_ids, )) counts += cursor.fetchall() cursor.execute(""" CREATE TEMPORARY TABLE tmp_wd (addon_id INT PRIMARY KEY, count INT)""") cursor.execute( 'INSERT INTO tmp_wd VALUES %s' % ','.join(['(%s,%s)'] * len(counts)), list(itertools.chain(*counts))) cursor.execute(""" UPDATE addons INNER JOIN tmp_wd ON addons.id = tmp_wd.addon_id SET weeklydownloads = tmp_wd.count""") cursor.execute("DROP TABLE IF EXISTS tmp_wd")
def weekly_downloads(): """ Update 7-day add-on download counts. """ if not waffle.switch_is_active("local-statistics-processing"): return False raise_if_reindex_in_progress("amo") cursor = connection.cursor() cursor.execute( """ SELECT addon_id, SUM(count) AS weekly_count FROM download_counts WHERE `date` >= DATE_SUB(CURDATE(), INTERVAL 7 DAY) GROUP BY addon_id ORDER BY addon_id""" ) counts = cursor.fetchall() addon_ids = [r[0] for r in counts] if not addon_ids: return cursor.execute( """ SELECT id, 0 FROM addons WHERE id NOT IN %s""", (addon_ids,), ) counts += cursor.fetchall() cursor.execute( """ CREATE TEMPORARY TABLE tmp_wd (addon_id INT PRIMARY KEY, count INT)""" ) cursor.execute("INSERT INTO tmp_wd VALUES %s" % ",".join(["(%s,%s)"] * len(counts)), list(itertools.chain(*counts))) cursor.execute( """ UPDATE addons INNER JOIN tmp_wd ON addons.id = tmp_wd.addon_id SET weeklydownloads = tmp_wd.count""" ) cursor.execute("DROP TABLE IF EXISTS tmp_wd")
def update_global_totals(date=None): """Update global statistics totals.""" raise_if_reindex_in_progress('amo') if date: date = datetime.datetime.strptime(date, '%Y-%m-%d').date() # Assume that we want to populate yesterday's stats by default. today = date or datetime.date.today() - datetime.timedelta(days=1) today_jobs = [dict(job=job, date=today) for job in tasks._get_daily_jobs(date)] max_update = date or UpdateCount.objects.aggregate(max=Max('date'))['max'] metrics_jobs = [dict(job=job, date=max_update) for job in tasks._get_metrics_jobs(date)] ts = [tasks.update_global_totals.subtask(kwargs=kw) for kw in today_jobs + metrics_jobs] TaskSet(ts).apply_async()
def update_global_totals(date=None): """Update global statistics totals.""" raise_if_reindex_in_progress('amo') if date: date = datetime.datetime.strptime(date, '%Y-%m-%d').date() # Assume that we want to populate yesterday's stats by default. today = date or datetime.date.today() - datetime.timedelta(days=1) today_jobs = [dict(job=job, date=today) for job in tasks._get_daily_jobs(date)] max_update = date or UpdateCount.objects.aggregate(max=Max('date'))['max'] metrics_jobs = [dict(job=job, date=max_update) for job in tasks._get_metrics_jobs(date)] ts = [tasks.update_global_totals.subtask(kwargs=kw) for kw in today_jobs + metrics_jobs] group(ts).apply_async()
def update_addon_average_daily_users(): """Update add-ons ADU totals.""" if not waffle.switch_is_active("local-statistics-processing"): return False raise_if_reindex_in_progress("amo") cursor = connections[multidb.get_slave()].cursor() q = """SELECT addon_id, AVG(`count`) FROM update_counts WHERE `date` > DATE_SUB(CURDATE(), INTERVAL 7 DAY) GROUP BY addon_id ORDER BY addon_id""" cursor.execute(q) d = cursor.fetchall() cursor.close() ts = [_update_addon_average_daily_users.subtask(args=[chunk]) for chunk in chunked(d, 250)] TaskSet(ts).apply_async()
def update_addon_average_daily_users(): """Update add-ons ADU totals.""" if not waffle.switch_is_active('local-statistics-processing'): return False raise_if_reindex_in_progress('amo') cursor = connections[multidb.get_slave()].cursor() q = """SELECT addon_id, AVG(`count`) FROM update_counts WHERE `date` > DATE_SUB(CURDATE(), INTERVAL 13 DAY) GROUP BY addon_id ORDER BY addon_id""" cursor.execute(q) d = cursor.fetchall() cursor.close() ts = [_update_addon_average_daily_users.subtask(args=[chunk]) for chunk in chunked(d, 250)] group(ts).apply_async()
def update_addon_average_daily_users(): """Update add-ons ADU totals.""" if not waffle.switch_is_active('local-statistics-processing'): return False kwargs = {'id_field': 'pk'} if waffle.switch_is_active('use-bigquery-for-addon-adu'): # BigQuery does not have data for add-ons with type other than those in # `ADDON_TYPES_WITH_STATS` so we use download counts instead. # See: https://github.com/mozilla/addons-server/issues/14609 amo_counts = dict( Addon.objects.exclude(type__in=amo.ADDON_TYPES_WITH_STATS).exclude( guid__isnull=True).exclude(guid__exact='').annotate( count=Coalesce(Sum('downloadcount__count'), 0)).values_list('guid', 'count') # Just to make order predictable in tests, we order by id. This # matches the GROUP BY being generated so it should be safe. .order_by('id')) counts = dict(get_addons_and_average_daily_users_from_bigquery()) counts.update(amo_counts) counts = list(counts.items()) # BigQuery stores GUIDs, not AMO primary keys. kwargs['id_field'] = 'guid' else: raise_if_reindex_in_progress('amo') cursor = connections[multidb.get_replica()].cursor() q = """SELECT addon_id, AVG(`count`) FROM update_counts WHERE `date` > DATE_SUB(CURDATE(), INTERVAL 13 DAY) GROUP BY addon_id ORDER BY addon_id""" cursor.execute(q) counts = cursor.fetchall() cursor.close() ts = [ _update_addon_average_daily_users.subtask(args=[chunk], kwargs=kwargs) for chunk in chunked(counts, 250) ] group(ts).apply_async()
def update_addon_weekly_downloads(chunk_size=250): """ Update 7-day add-on download counts. """ if waffle.switch_is_active('use-bigquery-for-download-stats-cron'): counts = dict( # In order to reset the `weekly_downloads` values of add-ons that # don't exist in BigQuery, we prepare a set of `(guid, 0)` for most # add-ons. Addon.objects.filter(type__in=amo.ADDON_TYPES_WITH_STATS).exclude( guid__isnull=True ).exclude(guid__exact='' ).exclude(weekly_downloads=0 ).annotate(count=Value(0, IntegerField()) ).values_list('guid', 'count')) # Update the `counts` with values from BigQuery. counts.update(get_addons_and_weekly_downloads_from_bigquery()) counts = list(counts.items()) log.info('Preparing update of `weekly_downloads` for %s add-ons.', len(counts)) create_chunked_tasks_signatures(_update_addon_weekly_downloads, counts, chunk_size).apply_async() else: raise_if_reindex_in_progress('amo') with connection.cursor() as cursor: cursor.execute(""" SELECT addon_id, SUM(count) AS weekly_count FROM download_counts WHERE `date` >= DATE_SUB(CURDATE(), INTERVAL 7 DAY) GROUP BY addon_id ORDER BY addon_id""") counts = cursor.fetchall() addon_ids = [r[0] for r in counts] if not addon_ids: return with connection.cursor() as cursor: cursor.execute( """ SELECT id, 0 FROM addons WHERE id NOT IN %s""", (addon_ids, )) counts += cursor.fetchall() cursor.execute(""" CREATE TEMPORARY TABLE tmp_wd (addon_id INT PRIMARY KEY, count INT)""") cursor.execute( 'INSERT INTO tmp_wd VALUES %s' % ','.join(['(%s,%s)'] * len(counts)), list(itertools.chain(*counts))) cursor.execute(""" UPDATE addons INNER JOIN tmp_wd ON addons.id = tmp_wd.addon_id SET weeklydownloads = tmp_wd.count""") cursor.execute("DROP TABLE IF EXISTS tmp_wd")