Ejemplo n.º 1
0
 def test_update_locale(self):
     current_locales = [  # Taken from the language pack index.
         'ach', 'af', 'ak', 'an', 'ar', 'as', 'ast', 'ast-ES', 'az',
         'bb-BK', 'be', 'bg', 'bn', 'br', 'bs', 'ca', 'ca-valencia', 'cs',
         'csb', 'cy', 'cy-GB', 'da', 'de', 'dsb', 'el', 'en-GB', 'en-ZA',
         'eo', 'es-AR', 'es-CL', 'es-ES', 'es-MX', 'et', 'eu', 'fa', 'ff',
         'fi', 'fj-FJ', 'fr', 'fur-IT', 'fy-NL', 'ga-IE', 'gd', 'gl',
         'gu-IN', 'he', 'hi', 'hi-IN', 'hr', 'hsb', 'hu', 'hy-AM', 'id',
         'is', 'it', 'ja', 'kk', 'km', 'kn', 'ko', 'ku', 'lg', 'lij', 'lt',
         'lv', 'mai', 'mg', 'mk', 'ml', 'mr', 'ms', 'nb-NO', 'nl', 'nn-NO',
         'nr', 'nso', 'or', 'pa-IN', 'pl', 'pt-BR', 'pt-PT', 'rm', 'ro',
         'ru', 'si', 'sk', 'sl', 'son', 'sq', 'sr', 'ss', 'st', 'sv-SE',
         'sw', 'sw-TZ', 'ta', 'ta-IN', 'ta-LK', 'te', 'th', 'tn', 'tr',
         'ts', 'uk', 'ur', 've', 'vi', 'wa', 'wo-SN', 'xh', 'zap-MX-diiste',
         'zh-CN', 'zh-TW', 'zu'
     ]
     uc = UpdateCount(addon_id=3615)
     self.command.update_locale(uc, 'foobar', 123)  # Non-existent locale.
     assert not uc.locales
     for locale in current_locales:
         self.command.update_locale(uc, locale, 1)
     assert len(uc.locales) == len(current_locales)
Ejemplo n.º 2
0
    def setUp(self):
        self.persona = addon_factory(type=amo.ADDON_PERSONA)
        self.extension = addon_factory()
        self.static_theme = addon_factory(type=amo.ADDON_STATICTHEME)
        self.awaiting_review = addon_factory(status=amo.STATUS_NOMINATED)

        today = datetime.date.today()

        stats = [(today - datetime.timedelta(days=days_in_past), update_count)
                 for days_in_past, update_count in (
                     (1, 827080),
                     (2, 787930),
                     (3, 995860),
                     (4, 1044260),
                     (5, 105431),
                     (6, 106065),
                     (7, 980930),
                     (8, 817100),
                     (9, 78843),
                     (10, 993830),
                     (11, 104431),
                     (12, 105943),
                     (13, 105039),
                     (14, 100183),
                     (15, 82265),
                     (16, 100183),
                     (17, 82265),
                     (18, 100183),
                     (19, 82265),
                     (20, 100183),
                     (21, 82265),
                 )]

        for obj in (self.persona, self.extension, self.static_theme,
                    self.awaiting_review):
            UpdateCount.objects.bulk_create([
                UpdateCount(addon=obj, date=date, count=count)
                for date, count in stats
            ])
Ejemplo n.º 3
0
 def test_update_app(self):
     firefox_guid = '{ec8030f7-c20a-464f-9b0e-13a3a9e97384}'
     uc = UpdateCount(addon_id=3615)
     self.command.update_app(uc, 'foobar', '1.0', 123)  # Non-existent app.
     assert not uc.applications
     # Malformed versions.
     self.command.update_app(uc, firefox_guid, '3.0.1.2', 123)
     self.command.update_app(uc, firefox_guid, '3.0123', 123)
     self.command.update_app(uc, firefox_guid, '3.0c2', 123)
     self.command.update_app(uc, firefox_guid, 'a.b.c', 123)
     assert not uc.applications
     # Well formed versions.
     self.command.update_app(uc, firefox_guid, '1.0', 123)
     self.command.update_app(uc, firefox_guid, '1.0.1', 124)
     self.command.update_app(uc, firefox_guid, '1.0a1', 125)
     self.command.update_app(uc, firefox_guid, '1.0b2', 126)
     assert uc.applications == {
         firefox_guid: {
             '1.0': 123,
             '1.0.1': 124,
             '1.0a1': 125,
             '1.0b2': 126
         }
     }
Ejemplo n.º 4
0
    def test_13_day_window(self):
        addon = Addon.objects.get(pk=3615)

        # can't use a fixed date since we are relying on
        # mysql to get us the `CURDATE()`
        today = datetime.date.today()

        # data is coming from `tab groups` add-on from
        # jun 11 till may 29th 2017
        stats = [
            (today - datetime.timedelta(days=days_in_past), update_count)
            for days_in_past, update_count in ((1, 82708), (2, 78793),
                                               (3, 99586), (4, 104426),
                                               (5, 105431), (6, 106065),
                                               (7, 98093), (8, 81710), (9,
                                                                        78843),
                                               (10, 99383), (11, 104431),
                                               (12, 105943), (13, 105039),
                                               (14, 100183), (15, 82265))
        ]

        UpdateCount.objects.bulk_create([
            UpdateCount(addon=addon, date=date, count=count)
            for date, count in stats
        ])

        addon.update(average_daily_users=0)

        cron.update_addon_average_daily_users()

        addon.refresh_from_db()

        assert (82708 + 78793 + 99586 + 104426 + 105431 + 106065 + 98093 +
                81710 + 78843 + 99383 + 104431 + 105943) / 12 == 95451

        assert addon.average_daily_users == 95451
Ejemplo n.º 5
0
 def test_update_status(self):
     uc = UpdateCount(addon_id=3615)
     self.command.update_status(uc, 'foobar', 123)  # Non-existent status.
     assert not uc.statuses
     self.command.update_status(uc, 'userEnabled', 123)
     assert uc.statuses == {'userEnabled': 123}
Ejemplo n.º 6
0
 def test_update_os(self):
     uc = UpdateCount(addon_id=3615)
     self.command.update_os(uc, 'foobar', 123)  # Non-existent OS.
     assert not uc.oses
     self.command.update_os(uc, 'WINNT', 123)
     assert uc.oses == {'WINNT': 123}
Ejemplo n.º 7
0
    def handle(self, *args, **options):
        sep = options['separator']
        start = datetime.now()  # Measure the time it takes to run the script.
        day = options['date']
        if not day:
            day = (datetime.now() - timedelta(days=1)).strftime('%Y-%m-%d')

        if options['stats_source'] == 's3':
            filepath = 's3://' + '/'.join([settings.AWS_STATS_S3_BUCKET,
                                           'amo_stats', 'theme_update_counts',
                                           day, '000000_0'])

        elif options['stats_source'] == 'file':
            folder = options['folder_name']
            folder = path.join(settings.TMP_PATH, folder, day)
            filepath = path.join(folder, 'theme_update_counts.hive')

        # Make sure we're not trying to update with mismatched data.
        if get_date(filepath, sep) != day:
            raise CommandError('%s file contains data for another day' %
                               filepath)

        # First, make sure we don't have any existing counts for the same day,
        # or it would just increment again the same data.
        ThemeUpdateCount.objects.filter(date=day).delete()

        theme_update_counts = {}
        new_stheme_update_counts = {}

        # Preload a set containing the ids of all the persona Add-on objects
        # that we care about. When looping, if we find an id that is not in
        # that set, we'll reject it.
        addons = set(Addon.objects.filter(type=amo.ADDON_PERSONA,
                                          status=amo.STATUS_PUBLIC,
                                          persona__isnull=False)
                                  .values_list('id', flat=True))
        # Preload a dict of persona to static theme ids that are migrated.
        migrated_personas = dict(
            MigratedLWT.objects.values_list(
                'lightweight_theme_id', 'static_theme_id')
        )
        existing_stheme_update_counts = {
            uc.addon_id: uc for uc in UpdateCount.objects.filter(
                addon_id__in=migrated_personas.values())}
        # Preload all the Personas once and for all. This builds a dict where
        # each key (the persona_id we get from the hive query) has the addon_id
        # as value.
        persona_to_addon = dict(Persona.objects.values_list('persona_id',
                                                            'addon_id'))

        count_file = get_stats_data(filepath)
        for index, line in enumerate(count_file):
            if index and (index % 1000000) == 0:
                log.info('Processed %s lines' % index)

            splitted = line[:-1].split(sep)

            if len(splitted) != 4:
                log.debug('Badly formatted row: %s' % line)
                continue

            day, id_, src, count = splitted
            try:
                id_, count = int(id_), int(count)
            except ValueError:  # Badly formatted? Drop.
                continue

            if src:
                src = src.strip()

            # If src is 'gp', it's an old request for the persona id.
            if id_ not in persona_to_addon and src == 'gp':
                continue  # No such persona.
            addon_id = persona_to_addon[id_] if src == 'gp' else id_

            # Is the persona already migrated to static theme?
            if addon_id in migrated_personas:
                mig_addon_id = migrated_personas[addon_id]
                if mig_addon_id in existing_stheme_update_counts:
                    existing_stheme_update_counts[mig_addon_id].count += count
                    existing_stheme_update_counts[mig_addon_id].save()
                elif mig_addon_id in new_stheme_update_counts:
                    new_stheme_update_counts[mig_addon_id].count += count
                else:
                    new_stheme_update_counts[mig_addon_id] = UpdateCount(
                        addon_id=mig_addon_id, date=day, count=count)

            # Does this addon exist?
            if addon_id not in addons:
                continue

            # Memoize the ThemeUpdateCount.
            if addon_id in theme_update_counts:
                tuc = theme_update_counts[addon_id]
            else:
                tuc = ThemeUpdateCount(addon_id=addon_id, date=day,
                                       count=0)
                theme_update_counts[addon_id] = tuc

            # We can now fill the ThemeUpdateCount object.
            tuc.count += count

        # Create in bulk: this is much faster.
        ThemeUpdateCount.objects.bulk_create(theme_update_counts.values(), 100)
        UpdateCount.objects.bulk_create(new_stheme_update_counts.values(), 100)

        log.info('Processed a total of %s lines' % (index + 1))
        log.debug('Total processing time: %s' % (datetime.now() - start))

        # Clean up file.
        if options['stats_source'] == 'file':
            log.debug('Deleting {path}'.format(path=filepath))
            unlink(filepath)
    def handle(self, *args, **options):
        sep = options['separator']

        start = datetime.now()  # Measure the time it takes to run the script.
        day = options['date']
        if not day:
            day = (datetime.now() - timedelta(days=1)).strftime('%Y-%m-%d')

        groups = ('app', 'locale', 'os', 'status', 'version')
        group_filepaths = []
        # Make sure we're not trying to update with mismatched data.
        for group in groups:
            if options['stats_source'] == 's3':
                filepath = 's3://' + '/'.join([
                    settings.AWS_STATS_S3_BUCKET, settings.AWS_STATS_S3_PREFIX,
                    'update_counts_by_%s' % group, day, '000000_0'
                ])

            elif options['stats_source'] == 'file':
                folder = options['folder_name']
                folder = path.join(settings.TMP_PATH, folder, day)
                filepath = path.join(folder,
                                     'update_counts_by_%s.hive' % group)

            if get_date(filepath, sep) != day:
                raise CommandError('%s file contains data for another day' %
                                   filepath)
            group_filepaths.append((group, filepath))

        # First, make sure we don't have any existing counts for the same day,
        # or it would just increment again the same data.
        UpdateCount.objects.filter(date=day).delete()

        # Memoize the addons and the UpdateCounts.
        update_counts = {}
        # Perf: preload all the addons once and for all.
        # This builds a dict where each key (the addon guid we get from the
        # hive query) has the addon_id as value.
        guids_to_addon = (
            dict(
                Addon.unfiltered.exclude(status=amo.STATUS_NULL).exclude(
                    guid__isnull=True)
                # Shouldn't be necessary to exclude _ADDON_PERSONA now but we've
                # still got a huge number of deleted LWT in the database.
                .exclude(type=9).values_list('guid', 'id')))

        for group, filepath in group_filepaths:
            count_file = get_stats_data(filepath)
            for index, line in enumerate(count_file):
                if index and (index % 1000000) == 0:
                    log.info('Processed %s lines' % index)

                splitted = line[:-1].split(sep)

                if ((group == 'app' and len(splitted) != 6)
                        or (group != 'app' and len(splitted) != 5)):
                    log.debug('Badly formatted row: %s' % line)
                    continue

                if group == 'app':
                    day, addon_guid, app_id, app_ver, count, \
                        update_type = splitted
                else:
                    day, addon_guid, data, count, update_type = splitted

                addon_guid = addon_guid.strip()
                if update_type:
                    update_type.strip()

                # Old versions of Firefox don't provide the update type.
                # All the following are "empty-like" values.
                if update_type in [
                        '0', 'NULL', 'None', '', '\\N', '%UPDATE_TYPE%'
                ]:
                    update_type = None

                try:
                    count = int(count)
                    if update_type:
                        update_type = int(update_type)
                except ValueError:  # Badly formatted? Drop.
                    continue

                # The following is magic that I don't understand. I've just
                # been told that this is the way we can make sure a request
                # is valid:
                # > the lower bits for updateType (eg 112) should add to
                # > 16, if not, ignore the request.
                # > udpateType & 31 == 16 == valid request.
                if update_type and update_type & 31 != 16:
                    log.debug("Update type doesn't add to 16: %s" %
                              update_type)
                    continue

                # Does this addon exist?
                if addon_guid and addon_guid in guids_to_addon:
                    addon_id = guids_to_addon[addon_guid]
                else:
                    log.debug(u"Addon {guid} doesn't exist.".format(
                        guid=addon_guid.strip()))
                    continue

                # Memoize the UpdateCount.
                if addon_guid in update_counts:
                    uc = update_counts[addon_guid]
                else:
                    uc = UpdateCount(date=day, addon_id=addon_id, count=0)
                    update_counts[addon_guid] = uc

                # We can now fill the UpdateCount object.
                if group == 'version':
                    self.update_version(uc, data, count)
                elif group == 'status':
                    self.update_status(uc, data, count)
                    if data == UPDATE_COUNT_TRIGGER:
                        # Use this count to compute the global number
                        # of daily users for this addon.
                        uc.count += count
                elif group == 'app':
                    self.update_app(uc, app_id, app_ver, count)
                elif group == 'os':
                    self.update_os(uc, data, count)
                elif group == 'locale':
                    self.update_locale(uc, data, count)

        # Make sure the locales and versions fields aren't too big to fit in
        # the database. Those two fields are the only ones that are not fully
        # validated, so we could end up with just anything in there (spam,
        # buffer overflow attempts and the like).
        # We don't care that they will increase the numbers, but we do not want
        # those to break the process because of a "Data too long for column
        # 'version'" error.
        # The database field (TEXT), can hold up to 2^16 = 64k characters.
        # If the field is longer than that, we we drop the least used items
        # (with the lower count) until the field fits.
        for addon_guid, update_count in update_counts.items():
            self.trim_field(update_count.locales)
            self.trim_field(update_count.versions)

        # Create in bulk: this is much faster.
        UpdateCount.objects.bulk_create(update_counts.values(), 100)

        log.info('Processed a total of %s lines' % (index + 1))
        log.debug('Total processing time: %s' % (datetime.now() - start))

        # Clean up files.
        if options['stats_source'] == 'file':
            for _, filepath in group_filepaths:
                log.debug('Deleting {path}'.format(path=filepath))
                unlink(filepath)
Ejemplo n.º 9
0
    def setUp(self):
        self.extension = addon_factory()
        self.static_theme = addon_factory(type=amo.ADDON_STATICTHEME)
        self.unpopular_extension = addon_factory()
        self.unpopular_theme = addon_factory(type=amo.ADDON_STATICTHEME)
        self.barely_popular_theme = addon_factory(type=amo.ADDON_STATICTHEME)
        self.same_stats_as_barely_popular_theme = addon_factory()
        self.awaiting_review = addon_factory(status=amo.STATUS_NOMINATED)

        today = datetime.date.today()

        stats = [(today - datetime.timedelta(days=days_in_past), update_count)
                 for days_in_past, update_count in (
                     (1, 827080),
                     (2, 787930),
                     (3, 995860),
                     (4, 1044260),
                     (5, 105431),
                     (6, 106065),
                     (7, 980930),
                     (8, 817100),
                     (9, 78843),
                     (10, 993830),
                     (11, 104431),
                     (12, 105943),
                     (13, 105039),
                     (14, 100183),
                     (15, 82265),
                     (16, 100183),
                     (17, 82265),
                     (18, 100183),
                     (19, 82265),
                     (20, 100183),
                     (21, 82265),
                 )]

        unpopular_stats = [(today - datetime.timedelta(days=days_in_past),
                            update_count) for days_in_past, update_count in (
                                (1, 99),
                                (2, 76),
                                (3, 25),
                                (4, 32),
                                (5, 289),
                                (6, 34),
                                (7, 45),
                                (8, 25),
                                (9, 78),
                                (10, 36),
                                (11, 25),
                                (12, 100),
                                (13, 156),
                                (14, 24),
                                (15, 9),
                                (16, 267),
                                (17, 176),
                                (18, 16),
                                (19, 156),
                                (20, 187),
                                (21, 149),
                            )]

        barely_popular_stats = [(today - datetime.timedelta(days=days_in_past),
                                 update_count)
                                for days_in_past, update_count in (
                                    (1, 399),
                                    (2, 276),
                                    (3, 215),
                                    (4, 312),
                                    (5, 289),
                                    (6, 234),
                                    (7, 345),
                                    (8, 205),
                                    (9, 178),
                                    (10, 336),
                                    (11, 325),
                                    (12, 400),
                                    (13, 456),
                                    (14, 324),
                                    (15, 290),
                                    (16, 267),
                                    (17, 276),
                                    (18, 216),
                                    (19, 256),
                                    (20, 287),
                                    (21, 249),
                                )]

        for obj in (self.extension, self.static_theme, self.awaiting_review):
            UpdateCount.objects.bulk_create([
                UpdateCount(addon=obj, date=date, count=count)
                for date, count in stats
            ])

        for obj in (self.unpopular_extension, self.unpopular_theme):
            UpdateCount.objects.bulk_create([
                UpdateCount(addon=obj, date=date, count=count)
                for date, count in unpopular_stats
            ])

        for obj in (self.barely_popular_theme,
                    self.same_stats_as_barely_popular_theme):
            UpdateCount.objects.bulk_create([
                UpdateCount(addon=obj, date=date, count=count)
                for date, count in barely_popular_stats
            ])