def test_create_client(self, bigquery_client_mock): client = self.create_mock_client() bigquery_client_mock.from_service_account_json.return_value = client credentials = 'path/to/credentials.json' with override_settings(GOOGLE_APPLICATION_CREDENTIALS=credentials): get_addons_and_average_daily_users_from_bigquery() bigquery_client_mock.from_service_account_json.assert_called_once_with( credentials)
def test_create_query(self, bigquery_client_mock): client = self.create_mock_client() bigquery_client_mock.from_service_account_json.return_value = client expected_query = f""" SELECT addon_id, AVG(dau) AS count FROM `project.dataset.{AMO_STATS_DAU_VIEW}` WHERE submission_date > DATE_SUB(CURRENT_DATE(), INTERVAL 13 DAY) GROUP BY addon_id""" get_addons_and_average_daily_users_from_bigquery() client.query.assert_called_once_with(expected_query)
def update_addon_average_daily_users(chunk_size=250): """Update add-ons ADU totals.""" if not waffle.switch_is_active('local-statistics-processing'): return False counts = dict( # In order to reset the `average_daily_users` values of add-ons that # don't exist in BigQuery, we prepare a set of `(guid, 0)` for most # add-ons. Addon.objects.filter(type__in=amo.ADDON_TYPES_WITH_STATS).exclude( guid__isnull=True ).exclude(guid__exact='' ).exclude(average_daily_users=0 ).annotate(count=Value(0, IntegerField()) ).values_list('guid', 'count') # Just to make order predictable in tests, we order by id. This # matches the GROUP BY being generated so it should be safe. .order_by('id')) # Update the `counts` with values from BigQuery. counts.update(get_addons_and_average_daily_users_from_bigquery()) counts = list(counts.items()) log.info('Preparing update of `average_daily_users` for %s add-ons.', len(counts)) create_chunked_tasks_signatures(_update_addon_average_daily_users, counts, chunk_size).apply_async()
def update_addon_average_daily_users(): """Update add-ons ADU totals.""" if not waffle.switch_is_active('local-statistics-processing'): return False # BigQuery does not have data for add-ons with type other than those in # `ADDON_TYPES_WITH_STATS` so we use download counts instead. # See: https://github.com/mozilla/addons-server/issues/14609 amo_counts = dict( Addon.objects.exclude(type__in=amo.ADDON_TYPES_WITH_STATS).exclude( guid__isnull=True).exclude(guid__exact='').annotate( count=Coalesce(Sum('downloadcount__count'), 0)).values_list( 'guid', 'count') # Just to make order predictable in tests, we order by id. This # matches the GROUP BY being generated so it should be safe. .order_by('id')) counts = dict(get_addons_and_average_daily_users_from_bigquery()) counts.update(amo_counts) counts = list(counts.items()) ts = [ _update_addon_average_daily_users.subtask(args=[chunk]) for chunk in chunked(counts, 250) ] group(ts).apply_async()
def test_returned_results(self, bigquery_client_mock): results = [ self.create_bigquery_row({'addon_id': 1, 'count': 123}), self.create_bigquery_row({'addon_id': 2, 'count': 456}), ] client = self.create_mock_client(results=results) bigquery_client_mock.from_service_account_json.return_value = client returned_results = get_addons_and_average_daily_users_from_bigquery() assert returned_results == [(1, 123), (2, 456)]
def update_addon_average_daily_users(): """Update add-ons ADU totals.""" if not waffle.switch_is_active('local-statistics-processing'): return False kwargs = {'id_field': 'pk'} if waffle.switch_is_active('use-bigquery-for-addon-adu'): # BigQuery does not have data for add-ons with type other than those in # `ADDON_TYPES_WITH_STATS` so we use download counts instead. # See: https://github.com/mozilla/addons-server/issues/14609 amo_counts = dict( Addon.objects.exclude(type__in=amo.ADDON_TYPES_WITH_STATS).exclude( guid__isnull=True).exclude(guid__exact='').annotate( count=Coalesce(Sum('downloadcount__count'), 0)).values_list('guid', 'count') # Just to make order predictable in tests, we order by id. This # matches the GROUP BY being generated so it should be safe. .order_by('id')) counts = dict(get_addons_and_average_daily_users_from_bigquery()) counts.update(amo_counts) counts = list(counts.items()) # BigQuery stores GUIDs, not AMO primary keys. kwargs['id_field'] = 'guid' else: raise_if_reindex_in_progress('amo') cursor = connections[multidb.get_replica()].cursor() q = """SELECT addon_id, AVG(`count`) FROM update_counts WHERE `date` > DATE_SUB(CURDATE(), INTERVAL 13 DAY) GROUP BY addon_id ORDER BY addon_id""" cursor.execute(q) counts = cursor.fetchall() cursor.close() ts = [ _update_addon_average_daily_users.subtask(args=[chunk], kwargs=kwargs) for chunk in chunked(counts, 250) ] group(ts).apply_async()