Ejemplo n.º 1
0
    def test_create_query(self, bigquery_client_mock):
        client = self.create_mock_client()
        bigquery_client_mock.from_service_account_json.return_value = client

        get_averages_by_addon_from_bigquery(today=date(2020, 5, 31))

        client.query.assert_called_once_with(self.expected_base_query,
                                             job_config=mock.ANY)
        parameters = self.get_job_config_named_parameters(client.query)
        assert parameters == [
            {
                'parameterType': {
                    'type': 'DATE'
                },
                'parameterValue': {
                    'value': '2020-05-24'
                },
                'name': 'one_week_date',
            },
            {
                'parameterType': {
                    'type': 'DATE'
                },
                'parameterValue': {
                    'value': '2020-05-03'
                },
                'name': 'four_weeks_date',
            },
        ]
Ejemplo n.º 2
0
    def test_create_query_with_excluded_guids(self, bigquery_client_mock):
        client = self.create_mock_client()
        bigquery_client_mock.from_service_account_json.return_value = client
        guids = ['guid-1', 'guid-2']
        expected_query = f'{self.expected_base_query} WHERE addon_id NOT IN UNNEST(@excluded_addon_ids)'  # noqa

        get_averages_by_addon_from_bigquery(today=date(2020, 5, 31), exclude=guids)

        client.query.assert_called_once_with(expected_query, job_config=mock.ANY)
        parameters = self.get_job_config_named_parameters(client.query)
        assert parameters == [
            {
                'parameterType': {'type': 'DATE'},
                'parameterValue': {'value': '2020-05-24'},
                'name': 'one_week_date',
            },
            {
                'parameterType': {'type': 'DATE'},
                'parameterValue': {'value': '2020-05-03'},
                'name': 'four_weeks_date',
            },
            {
                'parameterType': {
                    'type': 'ARRAY',
                    'arrayType': {'type': 'STRING'},
                },
                'parameterValue': {'arrayValues': [{'value': guid} for guid in guids]},
                'name': 'excluded_addon_ids',
            },
        ]
Ejemplo n.º 3
0
    def test_create_client(self, bigquery_client_mock):
        client = self.create_mock_client()
        bigquery_client_mock.from_service_account_json.return_value = client

        credentials = 'path/to/credentials.json'
        with override_settings(GOOGLE_APPLICATION_CREDENTIALS=credentials):
            get_averages_by_addon_from_bigquery(today=date.today())

        bigquery_client_mock.from_service_account_json.assert_called_once_with(
            credentials)
Ejemplo n.º 4
0
    def test_returned_values(self, bigquery_client_mock):
        results = [
            self.create_bigquery_row(
                {
                    'addon_id': 'guid',
                    'avg_this_week': 123,
                    'avg_three_weeks_before': 456,
                }
            ),
            self.create_bigquery_row(
                {
                    'addon_id': 'guid2',
                    'avg_this_week': 45,
                    'avg_three_weeks_before': 40,
                }
            ),
            # This should be skipped because `addon_id` is `None`.
            self.create_bigquery_row(
                {
                    'addon_id': None,
                    'avg_this_week': 123,
                    'avg_three_weeks_before': 456,
                }
            ),
        ]
        client = self.create_mock_client(results=results)
        bigquery_client_mock.from_service_account_json.return_value = client

        returned_results = get_averages_by_addon_from_bigquery(today=date(2020, 5, 6))

        assert returned_results == {
            'guid': {'avg_this_week': 123, 'avg_three_weeks_before': 456},
            'guid2': {'avg_this_week': 45, 'avg_three_weeks_before': 40},
        }
Ejemplo n.º 5
0
def deliver_hotness():
    """
    Calculate hotness of all add-ons.

    a = avg(users this week)
    b = avg(users three weeks before this week)
    threshold = 250 if addon type is theme, else 1000
    hotness = (a-b) / b if a > threshold and b > 1 else 0
    """
    frozen = set(f.addon_id for f in FrozenAddon.objects.all())
    averages = get_averages_by_addon_from_bigquery(today=date.today())
    addons = (Addon.objects.filter(guid__in=averages.keys()).filter(
        status__in=amo.REVIEWED_STATUSES).exclude(id__in=frozen))

    for addon in addons:
        average = averages.get(addon.guid)
        this = average['avg_this_week']
        three = average['avg_three_weeks_before']

        # Update the hotness score but only update hotness if necessary. We
        # don't want to cause unnecessary re-indexes.
        threshold = 250 if addon.type == amo.ADDON_STATICTHEME else 1000
        if this > threshold and three > 1:
            hotness = (this - three) / float(three)
            if addon.hotness != hotness:
                addon.update(hotness=hotness)
        else:
            if addon.hotness != 0:
                addon.update(hotness=0)

        # Let the database catch its breath.
        time.sleep(10)
Ejemplo n.º 6
0
    def test_create_query(self, bigquery_client_mock):
        client = self.create_mock_client()
        bigquery_client_mock.from_service_account_json.return_value = client
        expected_query = f"""
WITH
  this_week AS (
  SELECT
    addon_id,
    AVG(dau) AS avg_this_week
  FROM
    `project.dataset.{AMO_STATS_DAU_VIEW}`
  WHERE
    submission_date >= @one_week_date
  GROUP BY
    addon_id),
  three_weeks_before_this_week AS (
  SELECT
    addon_id,
    AVG(dau) AS avg_three_weeks_before
  FROM
    `project.dataset.{AMO_STATS_DAU_VIEW}`
  WHERE
    submission_date BETWEEN @four_weeks_date AND @one_week_date
  GROUP BY
    addon_id)
SELECT
  *
FROM
  this_week
JOIN
  three_weeks_before_this_week
USING
  (addon_id)
"""

        get_averages_by_addon_from_bigquery(today=date(2020, 5, 31))

        client.query.assert_called_once_with(
            expected_query, job_config=mock.ANY
        )
Ejemplo n.º 7
0
def deliver_hotness(chunk_size=300):
    """
    Calculate hotness of all add-ons.

    a = avg(users this week)
    b = avg(users three weeks before this week)
    threshold = 250 if addon type is theme, else 1000
    hotness = (a-b) / b if a > threshold and b > 1 else 0
    """
    frozen_guids = list(set(fa.addon.guid for fa in FrozenAddon.objects.all()))
    averages = get_averages_by_addon_from_bigquery(today=date.today(),
                                                   exclude=frozen_guids)
    create_chunked_tasks_signatures(update_addon_hotness, averages.items(),
                                    chunk_size).apply_async()

    # Reset add-ons that won't be returned by BigQuery.
    addon_ids = (Addon.objects.filter(status__in=amo.REVIEWED_STATUSES).filter(
        hotness__gt=0).exclude(guid__in=averages.keys()).values_list(
            'id', flat=True))
    create_chunked_tasks_signatures(reset_addon_hotness, addon_ids,
                                    chunk_size).apply_async()
Ejemplo n.º 8
0
def update_addon_hotness(chunk_size=300):
    """
    Calculate hotness of all add-ons.

    a = avg(users this week)
    b = avg(users three weeks before this week)
    threshold = 250 if addon type is theme, else 1000
    hotness = (a-b) / b if a > threshold and b > 1 else 0
    """
    frozen_guids = list(
        {fa.addon.guid for fa in FrozenAddon.objects.all() if fa.addon.guid}
    )
    log.info('Found %s frozen add-on GUIDs.', len(frozen_guids))

    amo_guids = (
        Addon.objects.exclude(guid__in=frozen_guids)
        .exclude(guid__isnull=True)
        .exclude(guid__exact='')
        .exclude(hotness=0)
        .values_list('guid', flat=True)
    )
    averages = {
        guid: {'avg_this_week': 1, 'avg_three_weeks_before': 1} for guid in amo_guids
    }
    log.info('Found %s add-on GUIDs in AMO DB.', len(averages))

    bq_averages = get_averages_by_addon_from_bigquery(
        today=date.today(), exclude=frozen_guids
    )
    log.info('Found %s add-on GUIDs with averages in BigQuery.', len(bq_averages))

    averages.update(bq_averages)
    log.info('Preparing update of `hotness` for %s add-ons.', len(averages))

    create_chunked_tasks_signatures(
        _update_addon_hotness, averages.items(), chunk_size
    ).apply_async()