def setUp(self): self.user = UserProfile.objects.get(email='*****@*****.**') self.api_key = self.create_api_key(self.user, str(self.user.pk) + ':f') self.addon = Addon.objects.get(pk=3615) self.theme_update_count = ThemeUpdateCount(addon_id=3615, date='2016-01-18', count=123)
def test_save_stats_to_file(mock_ContentFile, mock_storage): mock_ContentFile.return_value = mock.sentinel.content theme_update_count = ThemeUpdateCount( addon_id=321, date='2016-01-18', count=123) save_stats_to_file(theme_update_count) mock_storage.assert_called_once_with( '321/2016/01/2016_01_18_themeupdatecount.json', mock.sentinel.content)
def test_stats_from_model_theme_update_count(): result = serialize_stats( ThemeUpdateCount(addon_id=321, date='2016-01-18', count=123)) assert json.loads(result) == { 'date': '2016-01-18', 'addon': 321, 'count': 123}
def handle(self, *args, **options): sep = options['separator'] start = datetime.now() # Measure the time it takes to run the script. day = options['date'] if not day: day = (datetime.now() - timedelta(days=1)).strftime('%Y-%m-%d') if options['stats_source'] == 's3': filepath = 's3://' + '/'.join([settings.AWS_STATS_S3_BUCKET, 'amo_stats', 'theme_update_counts', day, '000000_0']) elif options['stats_source'] == 'file': folder = options['folder_name'] folder = path.join(settings.TMP_PATH, folder, day) filepath = path.join(folder, 'theme_update_counts.hive') # Make sure we're not trying to update with mismatched data. if get_date(filepath, sep) != day: raise CommandError('%s file contains data for another day' % filepath) # First, make sure we don't have any existing counts for the same day, # or it would just increment again the same data. ThemeUpdateCount.objects.filter(date=day).delete() theme_update_counts = {} new_stheme_update_counts = {} # Preload a set containing the ids of all the persona Add-on objects # that we care about. When looping, if we find an id that is not in # that set, we'll reject it. addons = set(Addon.objects.filter(type=amo.ADDON_PERSONA, status=amo.STATUS_PUBLIC, persona__isnull=False) .values_list('id', flat=True)) # Preload a dict of persona to static theme ids that are migrated. migrated_personas = dict( MigratedLWT.objects.values_list( 'lightweight_theme_id', 'static_theme_id') ) existing_stheme_update_counts = { uc.addon_id: uc for uc in UpdateCount.objects.filter( addon_id__in=migrated_personas.values())} # Preload all the Personas once and for all. This builds a dict where # each key (the persona_id we get from the hive query) has the addon_id # as value. persona_to_addon = dict(Persona.objects.values_list('persona_id', 'addon_id')) count_file = get_stats_data(filepath) for index, line in enumerate(count_file): if index and (index % 1000000) == 0: log.info('Processed %s lines' % index) splitted = line[:-1].split(sep) if len(splitted) != 4: log.debug('Badly formatted row: %s' % line) continue day, id_, src, count = splitted try: id_, count = int(id_), int(count) except ValueError: # Badly formatted? Drop. continue if src: src = src.strip() # If src is 'gp', it's an old request for the persona id. if id_ not in persona_to_addon and src == 'gp': continue # No such persona. addon_id = persona_to_addon[id_] if src == 'gp' else id_ # Is the persona already migrated to static theme? if addon_id in migrated_personas: mig_addon_id = migrated_personas[addon_id] if mig_addon_id in existing_stheme_update_counts: existing_stheme_update_counts[mig_addon_id].count += count existing_stheme_update_counts[mig_addon_id].save() elif mig_addon_id in new_stheme_update_counts: new_stheme_update_counts[mig_addon_id].count += count else: new_stheme_update_counts[mig_addon_id] = UpdateCount( addon_id=mig_addon_id, date=day, count=count) # Does this addon exist? if addon_id not in addons: continue # Memoize the ThemeUpdateCount. if addon_id in theme_update_counts: tuc = theme_update_counts[addon_id] else: tuc = ThemeUpdateCount(addon_id=addon_id, date=day, count=0) theme_update_counts[addon_id] = tuc # We can now fill the ThemeUpdateCount object. tuc.count += count # Create in bulk: this is much faster. ThemeUpdateCount.objects.bulk_create(theme_update_counts.values(), 100) UpdateCount.objects.bulk_create(new_stheme_update_counts.values(), 100) log.info('Processed a total of %s lines' % (index + 1)) log.debug('Total processing time: %s' % (datetime.now() - start)) # Clean up file. if options['stats_source'] == 'file': log.debug('Deleting {path}'.format(path=filepath)) unlink(filepath)
def handle(self, *args, **options): start = datetime.now() # Measure the time it takes to run the script. day = options['date'] if not day: day = (datetime.now() - timedelta(days=1)).strftime('%Y-%m-%d') folder = args[0] if args else 'hive_results' folder = path.join(settings.TMP_PATH, folder, day) sep = options['separator'] filepath = path.join(folder, 'theme_update_counts.hive') # Make sure we're not trying to update with mismatched data. if get_date_from_file(filepath, sep) != day: raise CommandError('%s file contains data for another day' % filepath) # First, make sure we don't have any existing counts for the same day, # or it would just increment again the same data. ThemeUpdateCount.objects.filter(date=day).delete() theme_update_counts = {} # Memoize the addon ids. addons = set(Addon.objects.values_list('id', flat=True)) # Perf: preload all the Personas once and for all. # This builds a dict where each key (the persona_id we get from the # hive query) has the addon_id as value. persona_to_addon = dict( Persona.objects.values_list('persona_id', 'addon_id')) with codecs.open(filepath, encoding='utf8') as count_file: for index, line in enumerate(count_file): if index and (index % 1000000) == 0: log.info('Processed %s lines' % index) splitted = line[:-1].split(sep) if len(splitted) != 4: log.debug('Badly formatted row: %s' % line) continue day, id_, src, count = splitted try: id_, count = int(id_), int(count) except ValueError: # Badly formatted? Drop. continue if src: src = src.strip() # If src is 'gp', it's an old request for the persona id. if id_ not in persona_to_addon and src == 'gp': continue # No such persona. addon_id = persona_to_addon[id_] if src == 'gp' else id_ # Does this addon exist? if addon_id not in addons: continue # Memoize the ThemeUpdateCount. if addon_id in theme_update_counts: tuc = theme_update_counts[addon_id] else: tuc = ThemeUpdateCount(addon_id=addon_id, date=day, count=0) theme_update_counts[addon_id] = tuc # We can now fill the ThemeUpdateCount object. tuc.count += count # Create in bulk: this is much faster. ThemeUpdateCount.objects.bulk_create(theme_update_counts.values(), 100) for theme_update_count in theme_update_counts.values(): save_stats_to_file(theme_update_count) log.info('Processed a total of %s lines' % (index + 1)) log.debug('Total processing time: %s' % (datetime.now() - start)) # Clean up file. log.debug('Deleting {path}'.format(path=filepath)) unlink(filepath)