Example #1
0
 def setUp(self):
     self.user = UserProfile.objects.get(email='*****@*****.**')
     self.api_key = self.create_api_key(self.user, str(self.user.pk) + ':f')
     self.addon = Addon.objects.get(pk=3615)
     self.theme_update_count = ThemeUpdateCount(addon_id=3615,
                                                date='2016-01-18',
                                                count=123)
def test_save_stats_to_file(mock_ContentFile, mock_storage):
    mock_ContentFile.return_value = mock.sentinel.content
    theme_update_count = ThemeUpdateCount(
        addon_id=321, date='2016-01-18', count=123)
    save_stats_to_file(theme_update_count)
    mock_storage.assert_called_once_with(
        '321/2016/01/2016_01_18_themeupdatecount.json', mock.sentinel.content)
def test_stats_from_model_theme_update_count():
    result = serialize_stats(
        ThemeUpdateCount(addon_id=321, date='2016-01-18', count=123))
    assert json.loads(result) == {
        'date': '2016-01-18',
        'addon': 321,
        'count': 123}
Example #4
0
    def handle(self, *args, **options):
        sep = options['separator']
        start = datetime.now()  # Measure the time it takes to run the script.
        day = options['date']
        if not day:
            day = (datetime.now() - timedelta(days=1)).strftime('%Y-%m-%d')

        if options['stats_source'] == 's3':
            filepath = 's3://' + '/'.join([settings.AWS_STATS_S3_BUCKET,
                                           'amo_stats', 'theme_update_counts',
                                           day, '000000_0'])

        elif options['stats_source'] == 'file':
            folder = options['folder_name']
            folder = path.join(settings.TMP_PATH, folder, day)
            filepath = path.join(folder, 'theme_update_counts.hive')

        # Make sure we're not trying to update with mismatched data.
        if get_date(filepath, sep) != day:
            raise CommandError('%s file contains data for another day' %
                               filepath)

        # First, make sure we don't have any existing counts for the same day,
        # or it would just increment again the same data.
        ThemeUpdateCount.objects.filter(date=day).delete()

        theme_update_counts = {}
        new_stheme_update_counts = {}

        # Preload a set containing the ids of all the persona Add-on objects
        # that we care about. When looping, if we find an id that is not in
        # that set, we'll reject it.
        addons = set(Addon.objects.filter(type=amo.ADDON_PERSONA,
                                          status=amo.STATUS_PUBLIC,
                                          persona__isnull=False)
                                  .values_list('id', flat=True))
        # Preload a dict of persona to static theme ids that are migrated.
        migrated_personas = dict(
            MigratedLWT.objects.values_list(
                'lightweight_theme_id', 'static_theme_id')
        )
        existing_stheme_update_counts = {
            uc.addon_id: uc for uc in UpdateCount.objects.filter(
                addon_id__in=migrated_personas.values())}
        # Preload all the Personas once and for all. This builds a dict where
        # each key (the persona_id we get from the hive query) has the addon_id
        # as value.
        persona_to_addon = dict(Persona.objects.values_list('persona_id',
                                                            'addon_id'))

        count_file = get_stats_data(filepath)
        for index, line in enumerate(count_file):
            if index and (index % 1000000) == 0:
                log.info('Processed %s lines' % index)

            splitted = line[:-1].split(sep)

            if len(splitted) != 4:
                log.debug('Badly formatted row: %s' % line)
                continue

            day, id_, src, count = splitted
            try:
                id_, count = int(id_), int(count)
            except ValueError:  # Badly formatted? Drop.
                continue

            if src:
                src = src.strip()

            # If src is 'gp', it's an old request for the persona id.
            if id_ not in persona_to_addon and src == 'gp':
                continue  # No such persona.
            addon_id = persona_to_addon[id_] if src == 'gp' else id_

            # Is the persona already migrated to static theme?
            if addon_id in migrated_personas:
                mig_addon_id = migrated_personas[addon_id]
                if mig_addon_id in existing_stheme_update_counts:
                    existing_stheme_update_counts[mig_addon_id].count += count
                    existing_stheme_update_counts[mig_addon_id].save()
                elif mig_addon_id in new_stheme_update_counts:
                    new_stheme_update_counts[mig_addon_id].count += count
                else:
                    new_stheme_update_counts[mig_addon_id] = UpdateCount(
                        addon_id=mig_addon_id, date=day, count=count)

            # Does this addon exist?
            if addon_id not in addons:
                continue

            # Memoize the ThemeUpdateCount.
            if addon_id in theme_update_counts:
                tuc = theme_update_counts[addon_id]
            else:
                tuc = ThemeUpdateCount(addon_id=addon_id, date=day,
                                       count=0)
                theme_update_counts[addon_id] = tuc

            # We can now fill the ThemeUpdateCount object.
            tuc.count += count

        # Create in bulk: this is much faster.
        ThemeUpdateCount.objects.bulk_create(theme_update_counts.values(), 100)
        UpdateCount.objects.bulk_create(new_stheme_update_counts.values(), 100)

        log.info('Processed a total of %s lines' % (index + 1))
        log.debug('Total processing time: %s' % (datetime.now() - start))

        # Clean up file.
        if options['stats_source'] == 'file':
            log.debug('Deleting {path}'.format(path=filepath))
            unlink(filepath)
Example #5
0
    def handle(self, *args, **options):
        start = datetime.now()  # Measure the time it takes to run the script.
        day = options['date']
        if not day:
            day = (datetime.now() - timedelta(days=1)).strftime('%Y-%m-%d')
        folder = args[0] if args else 'hive_results'
        folder = path.join(settings.TMP_PATH, folder, day)
        sep = options['separator']
        filepath = path.join(folder, 'theme_update_counts.hive')
        # Make sure we're not trying to update with mismatched data.
        if get_date_from_file(filepath, sep) != day:
            raise CommandError('%s file contains data for another day' %
                               filepath)
        # First, make sure we don't have any existing counts for the same day,
        # or it would just increment again the same data.
        ThemeUpdateCount.objects.filter(date=day).delete()

        theme_update_counts = {}

        # Memoize the addon ids.
        addons = set(Addon.objects.values_list('id', flat=True))
        # Perf: preload all the Personas once and for all.
        # This builds a dict where each key (the persona_id we get from the
        # hive query) has the addon_id as value.
        persona_to_addon = dict(
            Persona.objects.values_list('persona_id', 'addon_id'))

        with codecs.open(filepath, encoding='utf8') as count_file:
            for index, line in enumerate(count_file):
                if index and (index % 1000000) == 0:
                    log.info('Processed %s lines' % index)

                splitted = line[:-1].split(sep)

                if len(splitted) != 4:
                    log.debug('Badly formatted row: %s' % line)
                    continue

                day, id_, src, count = splitted
                try:
                    id_, count = int(id_), int(count)
                except ValueError:  # Badly formatted? Drop.
                    continue

                if src:
                    src = src.strip()

                # If src is 'gp', it's an old request for the persona id.
                if id_ not in persona_to_addon and src == 'gp':
                    continue  # No such persona.
                addon_id = persona_to_addon[id_] if src == 'gp' else id_

                # Does this addon exist?
                if addon_id not in addons:
                    continue

                # Memoize the ThemeUpdateCount.
                if addon_id in theme_update_counts:
                    tuc = theme_update_counts[addon_id]
                else:
                    tuc = ThemeUpdateCount(addon_id=addon_id,
                                           date=day,
                                           count=0)
                    theme_update_counts[addon_id] = tuc

                # We can now fill the ThemeUpdateCount object.
                tuc.count += count

        # Create in bulk: this is much faster.
        ThemeUpdateCount.objects.bulk_create(theme_update_counts.values(), 100)
        for theme_update_count in theme_update_counts.values():
            save_stats_to_file(theme_update_count)
        log.info('Processed a total of %s lines' % (index + 1))
        log.debug('Total processing time: %s' % (datetime.now() - start))

        # Clean up file.
        log.debug('Deleting {path}'.format(path=filepath))
        unlink(filepath)