def handle(self, *args, **options): task = tasks.get(options.get('task')) if not task: raise CommandError('Unknown task provided. Options are: %s' % ', '.join(tasks.keys())) if options.get('with_unlisted'): base_manager = Addon.with_unlisted else: base_manager = Addon.objects pks = (base_manager.filter(*task['qs']) .values_list('pk', flat=True) .order_by('-last_updated')) if 'pre' in task: # This is run in process to ensure its run before the tasks. pks = task['pre'](pks) if pks: kw = task.get('kwargs', {}) # All the remaining tasks go in one group. grouping = [] for chunk in chunked(pks, 100): grouping.append( task['method'].subtask(args=[chunk], kwargs=kw)) # Add the post task on to the end. post = None if 'post' in task: post = task['post'].subtask(args=[], kwargs=kw, immutable=True) ts = chord(grouping, post) else: ts = group(grouping) ts.apply_async()
def deliver_hotness(): """ Calculate hotness of all add-ons. a = avg(users this week) b = avg(users three weeks before this week) hotness = (a-b) / b if a > 1000 and b > 1 else 0 """ frozen = set(f.id for f in FrozenAddon.objects.all()) all_ids = list((Addon.objects.exclude(type=amo.ADDON_PERSONA).values_list("id", flat=True))) now = datetime.now() one_week = now - timedelta(days=7) four_weeks = now - timedelta(days=28) for ids in chunked(all_ids, 300): addons = Addon.objects.no_cache().filter(id__in=ids).no_transforms() ids = [a.id for a in addons if a.id not in frozen] qs = UpdateCount.objects.filter(addon__in=ids).values_list("addon").annotate(Avg("count")) thisweek = dict(qs.filter(date__gte=one_week)) threeweek = dict(qs.filter(date__range=(four_weeks, one_week))) for addon in addons: this, three = thisweek.get(addon.id, 0), threeweek.get(addon.id, 0) if this > 1000 and three > 1: addon.update(hotness=(this - three) / float(three)) else: addon.update(hotness=0) # Let the database catch its breath. time.sleep(10)
def handle(self, *args, **options): pks = (ActivityLog.objects.review_queue().values_list('pk', flat=True) .order_by('id')) ts = [add_versionlog.subtask(args=[chunk]) for chunk in chunked(pks, 100)] TaskSet(ts).apply_async()
def update_user_ratings(): """Update add-on author's ratings.""" cursor = connections[multidb.get_slave()].cursor() # We build this query ahead of time because the cursor complains about data # truncation if it does the parameters. Also, this query is surprisingly # quick, <1sec for 6100 rows returned q = """ SELECT addons_users.user_id as user_id, AVG(rating) as avg_rating FROM reviews INNER JOIN versions INNER JOIN addons_users INNER JOIN addons ON reviews.version_id = versions.id AND addons.id = versions.addon_id AND addons_users.addon_id = addons.id WHERE reviews.reply_to IS NULL AND reviews.rating > 0 AND addons.status IN (%s) GROUP BY addons_users.user_id """ % (",".join(map(str, VALID_STATUSES))) cursor.execute(q) d = cursor.fetchall() cursor.close() ts = [update_user_ratings_task.subtask(args=[chunk]) for chunk in chunked(d, 1000)] TaskSet(ts).apply_async()
def reindex_users(index=None): from . import tasks ids = UserProfile.objects.values_list('id', flat=True) taskset = [tasks.index_users.subtask(args=[chunk], kwargs=dict(index=index)) for chunk in chunked(sorted(list(ids)), 150)] TaskSet(taskset).apply_async()
def handle(self, *args, **options): if len(args) == 0: raise CommandError('Please provide at least one add-on id to fix.') addon_ids = [int(addon_id) for addon_id in args] for chunk in chunked(addon_ids, 100): fix_let_scope_bustage_in_addons.delay(chunk)
def handle(self, *args, **options): task = tasks.get(options.get("task")) if not task: raise CommandError("Unknown task provided. Options are: %s" % ", ".join(tasks.keys())) if options.get("with_unlisted"): base_manager = Addon.with_unlisted else: base_manager = Addon.objects pks = base_manager.filter(*task["qs"]).values_list("pk", flat=True).order_by("-last_updated") if "pre" in task: # This is run in process to ensure its run before the tasks. pks = task["pre"](pks) if pks: kw = task.get("kwargs", {}) # All the remaining tasks go in one group. grouping = [] for chunk in chunked(pks, 100): grouping.append(task["method"].subtask(args=[chunk], kwargs=kw)) # Add the post task on to the end. post = None if "post" in task: post = task["post"].subtask(args=[], kwargs=kw, immutable=True) ts = chord(grouping, post) else: ts = group(grouping) ts.apply_async()
def reindex_addons(index=None, addon_type=None): from . import tasks ids = Addon.unfiltered.values_list('id', flat=True) if addon_type: ids = ids.filter(type=addon_type) ts = [tasks.index_addons.subtask(args=[chunk], kwargs=dict(index=index)) for chunk in chunked(sorted(list(ids)), 150)] group(ts).apply_async()
def reindex_collections(index=None): from . import tasks ids = (Collection.objects.exclude(type=amo.COLLECTION_SYNCHRONIZED) .values_list('id', flat=True)) taskset = [tasks.index_collections.subtask(args=[chunk], kwargs=dict(index=index)) for chunk in chunked(sorted(list(ids)), 150)] group(taskset).apply_async()
def create_subtasks(task, qs, chunk_size, *args): """ Splits a task depending on a queryset into a bunch of subtasks of the specified chunk_size, passing a chunked queryset and optional additional arguments to each.""" ts = [task.subtask(args=(chunk,) + args) for chunk in chunked(qs, chunk_size)] TaskSet(ts).apply_async()
def update_all_appsupport(): from .tasks import update_appsupport ids = sorted(set(AppSupport.objects.values_list("addon", flat=True))) task_log.info("Updating appsupport for %s addons." % len(ids)) for idx, chunk in enumerate(chunked(ids, 100)): if idx % 10 == 0: task_log.info("[%s/%s] Updating appsupport." % (idx * 100, len(ids))) update_appsupport(chunk)
def featured_grid(context, addons, src=None, dl_src=None, pagesize=3, cols=3): if not src: src = context.get('src') # dl_src is an optional src paramater just for the download links if not dl_src: dl_src = src pages = chunked(addons, pagesize) columns = '' if cols != 3 else 'three-col' return new_context(**locals())
def update_collections_total(): """Update collections downloads totals.""" d = (CollectionCount.objects.values('collection_id') .annotate(sum=Sum('count'))) ts = [tasks.update_collections_total.subtask(args=[chunk]) for chunk in chunked(d, 50)] TaskSet(ts).apply_async()
def addon_grid(context, addons, src=None, dl_src=None, pagesize=6, cols=2, vital_summary="rating", vital_more="adu"): if not src: src = context.get("src") # dl_src is an optional src parameter just for the download links if not dl_src: dl_src = context.get("dl_src", src) pages = chunked(addons, pagesize) columns = "cols-%d" % cols return new_context(**locals())
def update_collections_subscribers(): """Update collections subscribers totals.""" d = (CollectionWatcher.objects.values('collection_id') .annotate(count=Count('collection')) .extra(where=['DATE(created)=%s'], params=[date.today()])) ts = [_update_collections_subscribers.subtask(args=[chunk]) for chunk in chunked(d, 1000)] group(ts).apply_async()
def hide_disabled_files(): # If an add-on or a file is disabled, it should be moved to # GUARDED_ADDONS_PATH so it's not publicly visible. q = Q(version__addon__status=amo.STATUS_DISABLED) | Q(version__addon__disabled_by_user=True) ids = File.objects.filter(q | Q(status=amo.STATUS_DISABLED)).values_list("id", flat=True) for chunk in chunked(ids, 300): qs = File.objects.no_cache().filter(id__in=chunk) qs = qs.select_related("version") for f in qs: f.hide_disabled_file()
def addon_grid(context, addons, src=None, dl_src=None, pagesize=6, cols=2, vital_summary='rating', vital_more='adu'): if not src: src = context.get('src') # dl_src is an optional src parameter just for the download links if not dl_src: dl_src = context.get('dl_src', src) pages = chunked(addons, pagesize) columns = 'cols-%d' % cols return new_context(**locals())
def update_addons_collections_downloads(): """Update addons+collections download totals.""" raise_if_reindex_in_progress('amo') d = (AddonCollectionCount.objects.values('addon', 'collection') .annotate(sum=Sum('count'))) ts = [tasks.update_addons_collections_downloads.subtask(args=[chunk]) for chunk in chunked(d, 100)] TaskSet(ts).apply_async()
def update_collections_votes(): """Update collection's votes.""" up = (CollectionVote.objects.values('collection_id') .annotate(count=Count('collection')) .filter(vote=1) .extra(where=['DATE(created)=%s'], params=[date.today()])) down = (CollectionVote.objects.values('collection_id') .annotate(count=Count('collection')) .filter(vote=-1) .extra(where=['DATE(created)=%s'], params=[date.today()])) ts = [_update_collections_votes.subtask(args=[chunk, 'new_votes_up']) for chunk in chunked(up, 1000)] group(ts).apply_async() ts = [_update_collections_votes.subtask(args=[chunk, 'new_votes_down']) for chunk in chunked(down, 1000)] group(ts).apply_async()
def update_addon_appsupport(): # Find all the add-ons that need their app support details updated. newish = Q(last_updated__gte=F("appsupport__created")) | Q(appsupport__created__isnull=True) # Search providers don't list supported apps. has_app = Q(versions__apps__isnull=False) | Q(type=amo.ADDON_SEARCH) has_file = Q(versions__files__status__in=amo.VALID_FILE_STATUSES) good = Q(has_app, has_file) | Q(type=amo.ADDON_PERSONA) ids = Addon.objects.valid().distinct().filter(newish, good).values_list("id", flat=True) task_log.info("Updating appsupport for %d new-ish addons." % len(ids)) ts = [_update_appsupport.subtask(args=[chunk]) for chunk in chunked(ids, 20)] TaskSet(ts).apply_async()
def handle(self, *args, **options): if len(args) == 0: raise CommandError('Please provide at least one file id to resign') full_server = options.get('signing_server') or settings.SIGNING_SERVER prelim_server = (options.get('preliminary_signing_server') or settings.PRELIMINARY_SIGNING_SERVER) file_ids = [int(file_id) for file_id in args] with override_settings( SIGNING_SERVER=full_server, PRELIMINARY_SIGNING_SERVER=prelim_server): for chunk in chunked(file_ids, 100): resign_files.delay(chunk)
def update_addon_download_totals(): """Update add-on total and average downloads.""" if not waffle.switch_is_active('local-statistics-processing'): return False qs = ( Addon.objects .annotate(sum_download_count=Sum('downloadcount__count')) .values_list('id', 'sum_download_count') .order_by('id') ) ts = [_update_addon_download_totals.subtask(args=[chunk]) for chunk in chunked(qs, 250)] group(ts).apply_async()
def handle(self, *args, **options): files = File.objects.filter(is_webextension=True) if not options['force']: files = files.filter(_webext_permissions=None) pks = files.values_list('pk', flat=True) print('pks count %s' % pks.count()) if pks: grouping = [] for chunk in chunked(pks, 100): grouping.append( extract_webext_permissions.subtask(args=[chunk])) ts = group(grouping) ts.apply_async()
def email_devs(request): form = DevMailerForm(request.POST or None) preview = EmailPreviewTopic(topic='email-devs') if preview.filter().count(): preview_csv = reverse('zadmin.email_preview_csv', args=[preview.topic]) else: preview_csv = None if request.method == 'POST' and form.is_valid(): data = form.cleaned_data qs = (AddonUser.objects.filter(role__in=(amo.AUTHOR_ROLE_DEV, amo.AUTHOR_ROLE_OWNER)) .exclude(user__email=None) .filter(addon__status__in=amo.LISTED_STATUSES)) if data['recipients'] == 'eula': qs = qs.exclude(addon__eula=None) elif data['recipients'] == 'sdk': qs = qs.exclude(addon__versions__files__jetpack_version=None) elif data['recipients'] == 'all_extensions': qs = qs.filter(addon__type=amo.ADDON_EXTENSION) elif data['recipients'] == 'fxa': # We're limiting to logins since 2014 to limit the list # dramatically. There was an import from getpersonas.com in 2013 # and if you haven't logged in within the last two years you # likely won't migrate. qs = qs.filter( Q(user__fxa_id__isnull=True) | Q(user__fxa_id=''), user__last_login__gte=datetime(year=2014, month=1, day=1)) else: raise NotImplementedError('If you want to support emailing other ' 'types of developers, do it here!') if data['preview_only']: # Clear out the last batch of previewed emails. preview.filter().delete() total = 0 for emails in chunked(set(qs.values_list('user__email', flat=True)), 100): total += len(emails) tasks.admin_email.delay(emails, data['subject'], data['message'], preview_only=data['preview_only'], preview_topic=preview.topic) msg = 'Emails queued for delivery: %s' % total if data['preview_only']: msg = '%s (for preview only, emails not sent!)' % msg messages.success(request, msg) return redirect('zadmin.email_devs') return render(request, 'zadmin/email-devs.html', dict(form=form, preview_csv=preview_csv))
def handle(self, *args, **options): confirm = raw_input( u'Are you sure you want to bulk approve and sign all those {0} ' u'addons? (yes/no)'.format(len(args))) if confirm != 'yes': raise CommandError(u'Aborted.') for chunk in chunked(options['addon_guid'], 100): files = get_files(chunk) log.info(u'Bulk approving chunk of %s files', len(files)) files_with_review_type = [ (file_, get_review_type(file_)) for file_ in files] approve_files(files_with_review_type)
def change(): files = list(File.objects.values_list('pk', flat=True) .filter(version__addon__type=amo.ADDON_SEARCH) .exclude(platform=amo.PLATFORM_ALL.id)) k = 0 print 'Changing %s files' % len(files) for chunk in chunked(files, 100): for file in File.objects.no_cache().filter(pk__in=chunk): file.platform = amo.PLATFORM_ALL.id if not file.datestatuschanged: file.datestatuschanged = datetime.now() file.save() k += 1 if not k % 50: print '... done %s' % k
def email_devs(request): form = DevMailerForm(request.POST or None) preview = EmailPreviewTopic(topic='email-devs') if preview.filter().count(): preview_csv = reverse('zadmin.email_preview_csv', args=[preview.topic]) else: preview_csv = None if request.method == 'POST' and form.is_valid(): data = form.cleaned_data qs = ( AddonUser.objects.filter( role__in=(amo.AUTHOR_ROLE_DEV, amo.AUTHOR_ROLE_OWNER)) .exclude(user__email=None) .filter(addon__status__in=amo.VALID_ADDON_STATUSES)) if data['recipients'] == 'eula': qs = qs.exclude(addon__eula=None) elif data['recipients'] == 'sdk': qs = qs.exclude(addon__versions__files__jetpack_version=None) elif data['recipients'] == 'all_extensions': qs = qs.filter(addon__type=amo.ADDON_EXTENSION) elif data['recipients'] == 'depreliminary': addon_logs = AddonLog.objects.filter( activity_log__action=amo.LOG.PRELIMINARY_ADDON_MIGRATED.id, activity_log___details__contains='"email": true') addons = addon_logs.values_list('addon', flat=True) qs = qs.filter(addon__in=addons) else: raise NotImplementedError('If you want to support emailing other ' 'types of developers, do it here!') if data['preview_only']: # Clear out the last batch of previewed emails. preview.filter().delete() total = 0 for emails in chunked(set(qs.values_list('user__email', flat=True)), 100): total += len(emails) tasks.admin_email.delay(emails, data['subject'], data['message'], preview_only=data['preview_only'], preview_topic=preview.topic) msg = 'Emails queued for delivery: %s' % total if data['preview_only']: msg = '%s (for preview only, emails not sent!)' % msg messages.success(request, msg) return redirect('zadmin.email_devs') return render(request, 'zadmin/email-devs.html', dict(form=form, preview_csv=preview_csv))
def create_subtasks(task, qs, chunk_size, countdown=None, task_args=None): """ Splits a task depending on a queryset into a bunch of subtasks of the specified chunk_size, passing a chunked queryset and optional additional arguments to each.""" if task_args is None: task_args = () job = group([ task.subtask(args=(chunk,) + task_args) for chunk in chunked(qs, chunk_size) ]) if countdown is not None: job.apply_async(countdown=countdown) else: job.apply_async()
def unhide_disabled_files(): """ Move files (on filesystem) belonging to public files (in database) to the correct place if necessary, so they they publicly accessible. See also hide_disabled_files(). """ ids = (File.objects.exclude( Q(version__addon__status=amo.STATUS_DISABLED) | Q(version__addon__disabled_by_user=True) | Q(status=amo.STATUS_DISABLED)).values_list('id', flat=True)) for chunk in chunked(ids, 300): qs = File.objects.select_related('version').filter(id__in=chunk) for file_ in qs: # This tries to move the file to the public location. If it # didn't exist at the source, it will catch the exception, log it # and continue. file_.unhide_disabled_file()
def update_addon_total_downloads(): """Update add-on total and average downloads.""" if not waffle.switch_is_active('local-statistics-processing'): return False if waffle.switch_is_active('use-bigquery-for-download-stats-cron'): log.info('Not running `update_addon_total_downloads()` because waffle ' 'switch is active.') return qs = (Addon.objects.annotate( sum_download_count=Sum('downloadcount__count')).values_list( 'id', 'sum_download_count').order_by('id')) ts = [ _update_addon_total_downloads.subtask(args=[chunk]) for chunk in chunked(qs, 250) ] group(ts).apply_async()
def report(): versions = list( Version.objects.values_list( 'pk', flat=True).annotate(files_count=Count('files')).filter( addon__type=amo.ADDON_SEARCH).filter(files_count__gt=1)) for chunk in chunked(versions, 100): for version in Version.objects.no_cache().filter(pk__in=chunk): print('Addon: %s, %s' % (version.addon.pk, version.addon.name)) print('Version: %s - %s files' % (version.pk, version.files.count())) print('URL: %s' % reverse('devhub.versions.edit', args=[version.addon.slug, version.pk])) hashes = [] for file in version.all_files: print('File: %s, %s' % (file.filename, file.hash)) if file.hash in hashes: print('...this hash is repeated, same file?') hashes.append(file.hash)
def update_addon_average_daily_users(): """Update add-ons ADU totals.""" if not waffle.switch_is_active("local-statistics-processing"): return False raise_if_reindex_in_progress("amo") cursor = connections[multidb.get_slave()].cursor() q = """SELECT addon_id, AVG(`count`) FROM update_counts WHERE `date` > DATE_SUB(CURDATE(), INTERVAL 7 DAY) GROUP BY addon_id ORDER BY addon_id""" cursor.execute(q) d = cursor.fetchall() cursor.close() ts = [_update_addon_average_daily_users.subtask(args=[chunk]) for chunk in chunked(d, 250)] TaskSet(ts).apply_async()
def handle(self, *args, **options): task = tasks.get(options.get('task')) if not task: raise CommandError('Unknown task provided. Options are: %s' % ', '.join(tasks.keys())) if options.get('with_deleted'): addon_manager = Addon.unfiltered else: addon_manager = Addon.objects if options.get('ids'): ids_list = options.get('ids').split(',') addon_manager = addon_manager.filter(id__in=ids_list) pks = (addon_manager.filter(*task['qs']).values_list( 'pk', flat=True).order_by('id')) if task.get('distinct'): pks = pks.distinct() if options.get('limit'): pks = pks[:options.get('limit')] if 'pre' in task: # This is run in process to ensure its run before the tasks. pks = task['pre'](pks) if pks: kwargs = task.get('kwargs', {}) if task.get('allowed_kwargs'): kwargs.update({ arg: options.get(arg, None) for arg in task['allowed_kwargs'] }) # All the remaining tasks go in one group. grouping = [] for chunk in chunked(pks, 100): grouping.append(task['method'].subtask(args=[chunk], kwargs=kwargs)) # Add the post task on to the end. post = None if 'post' in task: post = task['post'].subtask(args=[], kwargs=kwargs, immutable=True) ts = chord(grouping, post) else: ts = group(grouping) ts.apply_async()
def email_devs(request): form = DevMailerForm(request.POST or None) preview = EmailPreviewTopic(topic='email-devs') if preview.filter().count(): preview_csv = reverse('zadmin.email_preview_csv', args=[preview.topic]) else: preview_csv = None if request.method == 'POST' and form.is_valid(): data = form.cleaned_data qs = (AddonUser.objects.filter( role__in=(amo.AUTHOR_ROLE_DEV, amo.AUTHOR_ROLE_OWNER)).exclude(user__email=None).filter( addon__status__in=amo.LISTED_STATUSES)) if data['recipients'] == 'eula': qs = qs.exclude(addon__eula=None) elif data['recipients'] == 'sdk': qs = qs.exclude(addon__versions__files__jetpack_version=None) elif data['recipients'] == 'all_extensions': qs = qs.filter(addon__type=amo.ADDON_EXTENSION) elif data['recipients'] == 'fxa': qs = qs.filter(Q(user__fxa_id__isnull=True) | Q(user__fxa_id='')) else: raise NotImplementedError('If you want to support emailing other ' 'types of developers, do it here!') if data['preview_only']: # Clear out the last batch of previewed emails. preview.filter().delete() total = 0 for emails in chunked(set(qs.values_list('user__email', flat=True)), 100): total += len(emails) tasks.admin_email.delay(emails, data['subject'], data['message'], preview_only=data['preview_only'], preview_topic=preview.topic) msg = 'Emails queued for delivery: %s' % total if data['preview_only']: msg = '%s (for preview only, emails not sent!)' % msg messages.success(request, msg) return redirect('zadmin.email_devs') return render(request, 'zadmin/email-devs.html', dict(form=form, preview_csv=preview_csv))
def find_files(job): # This is a first pass, we know we don't want any addons in the states # STATUS_NULL and STATUS_DISABLED. current = job.curr_max_version.version_int target = job.target_version.version_int addons = ( Addon.objects.filter(status__in=amo.VALID_ADDON_STATUSES, disabled_by_user=False, versions__apps__application=job.application, versions__apps__max__version_int__gte=current, versions__apps__max__version_int__lt=target) # Exclude lang packs and themes. .exclude(type__in=[amo.ADDON_LPAPP, amo.ADDON_THEME]) # Exclude WebExtensions (see #1499). .exclude( versions__files__is_webextension=True).no_transforms().values_list( "pk", flat=True).distinct()) for pks in chunked(addons, 100): tasks.add_validation_jobs.delay(pks, job.pk)
def update_addon_average_daily_users(): """Update add-ons ADU totals.""" if not waffle.switch_is_active('local-statistics-processing'): return False raise_if_reindex_in_progress('amo') cursor = connections[multidb.get_slave()].cursor() q = """SELECT addon_id, AVG(`count`) FROM update_counts WHERE `date` > DATE_SUB(CURDATE(), INTERVAL 13 DAY) GROUP BY addon_id ORDER BY addon_id""" cursor.execute(q) d = cursor.fetchall() cursor.close() ts = [_update_addon_average_daily_users.subtask(args=[chunk]) for chunk in chunked(d, 250)] group(ts).apply_async()
def handle(self, *args, **options): if len(args) == 0: # No GUID provided? raise CommandError( u'Please provide at least one add-on guid to approve.') confirm = raw_input( u'Are you sure you want to bulk approve and sign all those {0} ' u'addons? (yes/no)'.format(len(args))) if confirm != 'yes': raise CommandError(u'Aborted.') for chunk in chunked(args, 100): files = get_files(chunk) log.info(u'Bulk approving chunk of %s files', len(files)) files_with_review_type = [(file_, get_review_type(file_)) for file_ in files] approve_files(files_with_review_type)
def handle(self, *args, **options): with open(options.get('guids_input'), 'r') as guid_file: input_guids = guid_file.read() guids = splitlines(input_guids) block_args = { prop: options.get(prop) for prop in ('min_version', 'max_version', 'reason', 'url') if options.get(prop) } block_args['updated_by'] = get_task_user() block_args['include_in_legacy'] = False submission = BlocklistSubmission(**block_args) for guids_chunk in chunked(guids, 100): blocks = save_guids_to_blocks(guids_chunk, submission) print( f'Added/Updated {len(blocks)} blocks from {len(guids_chunk)} ' 'guids')
def save_to_block_objects(self): assert self.is_submission_ready assert self.action == self.ACTION_ADDCHANGE fields_to_set = [ 'min_version', 'max_version', 'url', 'reason', 'updated_by', ] all_guids_to_block = [block['guid'] for block in self.to_block] for guids_chunk in chunked(all_guids_to_block, 100): save_guids_to_blocks(guids_chunk, self, fields_to_set=fields_to_set) self.save() self.update(signoff_state=self.SIGNOFF_PUBLISHED)
def create_chunked_tasks_signatures(task, items, chunk_size, task_args=None, task_kwargs=None): """ Splits a task depending on a list of items into a bunch of tasks of the specified chunk_size, passing a chunked queryset and optional additional arguments to each. Return the group of task signatures without executing it.""" from olympia.amo.utils import chunked if task_args is None: task_args = () if task_kwargs is None: task_kwargs = {} return group([ task.si(chunk, *task_args, **task_kwargs) for chunk in chunked(items, chunk_size) ])
def deliver_hotness(): """ Calculate hotness of all add-ons. a = avg(users this week) b = avg(users three weeks before this week) threshold = 250 if addon type is theme, else 1000 hotness = (a-b) / b if a > threshold and b > 1 else 0 """ frozen = set(f.id for f in FrozenAddon.objects.all()) all_ids = list( (Addon.objects.filter(status__in=amo.REVIEWED_STATUSES).values_list( 'id', flat=True))) now = datetime.now() one_week = now - timedelta(days=7) four_weeks = now - timedelta(days=28) for ids in chunked(all_ids, 300): addons = Addon.objects.filter(id__in=ids).no_transforms() ids = [a.id for a in addons if a.id not in frozen] qs = (UpdateCount.objects.filter( addon__in=ids).values_list('addon').annotate(Avg('count'))) thisweek = dict(qs.filter(date__gte=one_week)) threeweek = dict(qs.filter(date__range=(four_weeks, one_week))) for addon in addons: this, three = thisweek.get(addon.id, 0), threeweek.get(addon.id, 0) # Update the hotness score but only update hotness if necessary. # We don't want to cause unnecessary re-indexes threshold = 250 if addon.type == amo.ADDON_STATICTHEME else 1000 if this > threshold and three > 1: hotness = (this - three) / float(three) if addon.hotness != hotness: addon.update(hotness=(this - three) / float(three)) else: if addon.hotness != 0: addon.update(hotness=0) # Let the database catch its breath. time.sleep(10)
def update_addon_average_daily_users(): """Update add-ons ADU totals.""" if not waffle.switch_is_active('local-statistics-processing'): return False kwargs = {'id_field': 'pk'} if waffle.switch_is_active('use-bigquery-for-addon-adu'): # BigQuery does not have data for add-ons with type other than those in # `ADDON_TYPES_WITH_STATS` so we use download counts instead. # See: https://github.com/mozilla/addons-server/issues/14609 amo_counts = dict( Addon.objects.exclude(type__in=amo.ADDON_TYPES_WITH_STATS).exclude( guid__isnull=True).exclude(guid__exact='').annotate( count=Coalesce(Sum('downloadcount__count'), 0)).values_list('guid', 'count') # Just to make order predictable in tests, we order by id. This # matches the GROUP BY being generated so it should be safe. .order_by('id')) counts = dict(get_addons_and_average_daily_users_from_bigquery()) counts.update(amo_counts) counts = list(counts.items()) # BigQuery stores GUIDs, not AMO primary keys. kwargs['id_field'] = 'guid' else: raise_if_reindex_in_progress('amo') cursor = connections[multidb.get_replica()].cursor() q = """SELECT addon_id, AVG(`count`) FROM update_counts WHERE `date` > DATE_SUB(CURDATE(), INTERVAL 13 DAY) GROUP BY addon_id ORDER BY addon_id""" cursor.execute(q) counts = cursor.fetchall() cursor.close() ts = [ _update_addon_average_daily_users.subtask(args=[chunk], kwargs=kwargs) for chunk in chunked(counts, 250) ] group(ts).apply_async()
def handle(self, *args, **options): tasks = self.get_tasks() task_info = tasks.get(options.get('task_info')) if not task_info: raise CommandError('Unknown task provided. Options are: %s' % ', '.join(tasks.keys())) base_qs = self.get_base_queryset(options) pks = self.get_pks( base_qs, task_info['queryset_filters'], distinct=task_info.get('distinct'), ) if options.get('limit'): pks = pks[:options.get('limit')] if 'pre' in task_info: # This is run in process to ensure its run before the tasks. pks = task_info['pre'](pks) if pks: kwargs = task_info.get('kwargs', {}) if task_info.get('allowed_kwargs'): kwargs.update({ arg: options.get(arg, None) for arg in task_info['allowed_kwargs'] }) # All the remaining tasks go in one group. grouping = [] for chunk in chunked(pks, options.get('batch_size')): grouping.append(task_info['task'].subtask(args=[chunk], kwargs=kwargs)) # Add the post task on to the end. post = None if 'post' in task_info: post = task_info['post'].subtask(args=[], kwargs=kwargs, immutable=True) ts = chord(grouping, post) else: ts = group(grouping) ts.apply_async()
def update_addon_download_totals(): """Update add-on total and average downloads.""" if not waffle.switch_is_active('local-statistics-processing'): return False cursor = connections[multidb.get_slave()].cursor() # We need to use SQL for this until # http://code.djangoproject.com/ticket/11003 is resolved q = """SELECT addon_id, AVG(count), SUM(count) FROM download_counts USE KEY (`addon_and_count`) JOIN addons ON download_counts.addon_id=addons.id WHERE addons.status != %s GROUP BY addon_id ORDER BY addon_id""" cursor.execute(q, [amo.STATUS_DELETED]) d = cursor.fetchall() cursor.close() ts = [_update_addon_download_totals.subtask(args=[chunk]) for chunk in chunked(d, 250)] group(ts).apply_async()
def gc(test_result=True): """Site-wide garbage collections.""" def days_ago(days): return datetime.today() - timedelta(days=days) log.debug('Collecting data to delete') logs = (ActivityLog.objects.filter(created__lt=days_ago(90)) .exclude(action__in=amo.LOG_KEEP).values_list('id', flat=True)) # Paypal only keeps retrying to verify transactions for up to 3 days. If we # still have an unverified transaction after 6 days, we might as well get # rid of it. contributions_to_delete = ( Contribution.objects .filter(transaction_id__isnull=True, created__lt=days_ago(6)) .values_list('id', flat=True)) collections_to_delete = ( Collection.objects.filter(created__lt=days_ago(2), type=amo.COLLECTION_ANONYMOUS) .values_list('id', flat=True)) for chunk in chunked(logs, 100): tasks.delete_logs.delay(chunk) for chunk in chunked(contributions_to_delete, 100): tasks.delete_stale_contributions.delay(chunk) for chunk in chunked(collections_to_delete, 100): tasks.delete_anonymous_collections.delay(chunk) # Incomplete addons cannot be deleted here because when an addon is # rejected during a review it is marked as incomplete. See bug 670295. log.debug('Cleaning up test results extraction cache.') # lol at check for '/' if settings.MEDIA_ROOT and settings.MEDIA_ROOT != '/': cmd = ('find', settings.MEDIA_ROOT, '-maxdepth', '1', '-name', 'validate-*', '-mtime', '+7', '-type', 'd', '-exec', 'rm', '-rf', "{}", ';') output = Popen(cmd, stdout=PIPE).communicate()[0] for line in output.split("\n"): log.debug(line) else: log.warning('MEDIA_ROOT not defined.') if user_media_path('collection_icons'): log.debug('Cleaning up uncompressed icons.') cmd = ('find', user_media_path('collection_icons'), '-name', '*__unconverted', '-mtime', '+1', '-type', 'f', '-exec', 'rm', '{}', ';') output = Popen(cmd, stdout=PIPE).communicate()[0] for line in output.split("\n"): log.debug(line) USERPICS_PATH = user_media_path('userpics') if USERPICS_PATH: log.debug('Cleaning up uncompressed userpics.') cmd = ('find', USERPICS_PATH, '-name', '*__unconverted', '-mtime', '+1', '-type', 'f', '-exec', 'rm', '{}', ';') output = Popen(cmd, stdout=PIPE).communicate()[0] for line in output.split("\n"): log.debug(line) # Delete stale FileUploads. stale_uploads = FileUpload.objects.filter( created__lte=days_ago(180)).order_by('id') for file_upload in stale_uploads: log.debug(u'[FileUpload:{uuid}] Removing file: {path}' .format(uuid=file_upload.uuid, path=file_upload.path)) if file_upload.path: try: storage.delete(file_upload.path) except OSError: pass file_upload.delete()
def notify_compatibility(job, params): dry_run = params['preview_only'] log.info('[@None] Starting validation email/update process for job %d.' ' dry_run=%s.' % (job.pk, dry_run)) log.info('[@None] Starting validation version bumps for job %d.' % job.pk) version_list = updated_versions(job) total = version_list.count() for chunk in chunked(version_list, 100): log.info('[%d@%d] Updating versions for job %d.' % (len(chunk), total, job.pk)) update_maxversions.delay(chunk, job.pk, params) log.info('[@None] Starting validation email run for job %d.' % job.pk) updated_authors = completed_version_authors(job) total = updated_authors.count() for chunk in chunked(updated_authors, 100): log.info('[%d@%d] Notifying authors for validation job %d' % (len(chunk), total, job.pk)) # There are times when you want to punch django's ORM in # the face. This may be one of those times. users_addons = list( UserProfile.objects.filter(pk__in=chunk).filter( **_completed_versions(job, 'addons__versions')).values_list( 'pk', 'addons__pk').distinct()) users = list( UserProfile.objects.filter(pk__in=set(u for u, a in users_addons))) # Annotate fails in tests when using cached results addons = (Addon.objects.no_cache().filter( **{ 'pk__in': set(a for u, a in users_addons), 'versions__files__' 'validation_results__validation_job': job }).annotate( errors=Sum('versions__files__validation_results__errors'))) addons = dict((a.id, a) for a in addons) users_addons = dict( (u, [addons[a] for u, a in row]) for u, row in sorted_groupby(users_addons, lambda k: k[0])) log.info('[%d@%d] Notifying %d authors about %d addons for ' 'validation job %d' % (len(chunk), total, len(users), len(addons.keys()), job.pk)) for u in users: addons = users_addons[u.pk] u.passing_addons = [a for a in addons if a.errors == 0] u.failing_addons = [a for a in addons if a.errors > 0] notify_compatibility_chunk.delay(users, job, params) log.info('[@None] Completed validation email/update process ' 'for job %d. dry_run=%s.' % (job.pk, dry_run))
def impala_persona_grid(context, personas, src=None, pagesize=6, cols=3): c = dict(context.items()) return dict(pages=chunked(personas, pagesize), columns='cols-%d' % cols, **c)
def upload(self, **params): # The data should be in chunks. data = [''.join(x) for x in chunked(self.data, 3)] return FileUpload.from_post(data, 'filename.xpi', len(self.data), **params)
def collection_grid(context, collections, src=None, pagesize=4, cols=2): pages = chunked(collections, pagesize) columns = 'cols-%d' % cols return new_context(**locals())
def handle(self, *args, **options): pks = Addon.objects.values_list('pk', flat=True).order_by('id') for chunk in chunked(pks, 100): dedupe_approvals.delay(chunk)
def handle(self, *args, **options): pks = ActivityLog.objects.review_queue().values_list( 'pk', flat=True).order_by('id') for chunk in chunked(pks, 100): add_commentlog.delay(chunk)
def compatibility_report(index=None): docs = defaultdict(dict) indices = get_indices(index) # Gather all the data for the index. log.info(u'Generating Firefox compat report.') latest = UpdateCount.objects.aggregate(d=Max('date'))['d'] qs = UpdateCount.objects.filter(addon__appsupport__app=amo.FIREFOX.id, addon__disabled_by_user=False, addon__status__in=amo.VALID_ADDON_STATUSES, addon___current_version__isnull=False, date=latest) updates = dict(qs.values_list('addon', 'count')) for chunk in chunked(updates.items(), 50): chunk = dict(chunk) for addon in Addon.objects.filter(id__in=chunk): if (amo.FIREFOX not in addon.compatible_apps or addon.compatible_apps[amo.FIREFOX] is None): # Ignore this add-on if it does not have compat information # for Firefox. continue current_version = { 'id': addon.current_version.pk, 'version': addon.current_version.version, } doc = docs[addon.id] doc.update(id=addon.id, slug=addon.slug, guid=addon.guid, binary=addon.binary_components, name=unicode(addon.name), created=addon.created, current_version=current_version) doc['count'] = chunk[addon.id] doc['usage'] = updates[addon.id] doc['top_95'] = {} # Populate with default counts for all versions. doc['works'] = { vint(version['main']): { 'success': 0, 'failure': 0, 'total': 0, 'failure_ratio': 0.0, } for version in FIREFOX_COMPAT } # Group reports by `major`.`minor` app version. reports = (CompatReport.objects.filter( guid=addon.guid, app_guid=amo.FIREFOX.guid).values_list( 'app_version', 'works_properly').annotate(Count('id'))) for ver, works_properly, cnt in reports: ver = vint(floor_version(ver)) major = [ v['main'] for v in FIREFOX_COMPAT if vint(v['previous']) < ver <= vint(v['main']) ] if major: w = doc['works'][vint(major[0])] # Tally number of success and failure reports. w['success' if works_properly else 'failure'] += cnt w['total'] += cnt # Calculate % of incompatibility reports. w['failure_ratio'] = w['failure'] / float(w['total']) compat = addon.compatible_apps[amo.FIREFOX] doc['support'] = { 'min': compat.min.version_int, 'max': compat.max.version_int } doc['max_version'] = compat.max.version total = sum(updates.values()) # Remember the total so we can show % of usage later. compat_total, created = CompatTotals.objects.safer_get_or_create( defaults={'total': total}) if not created: compat_total.update(total=total) # Figure out which add-ons are in the top 95%. running_total = 0 for addon, count in sorted(updates.items(), key=lambda x: x[1], reverse=True): # Ignore the updates we skipped because of bad app compatibility. if addon in docs: running_total += count docs[addon]['top_95_all'] = running_total < (.95 * total) # Mark the top 95% of add-ons compatible with the previous version for each # version. for compat in FIREFOX_COMPAT: version = vint(compat['previous']) # Find all the docs that have a max_version compatible with version. supported = [ compat_doc for compat_doc in docs.values() if compat_doc['support']['max'] >= version ] # Sort by count so we can get the top 95% most-used add-ons. supported = sorted(supported, key=lambda d: d['count'], reverse=True) total = sum(doc['count'] for doc in supported) # Figure out which add-ons are in the top 95% for this app + version. running_total = 0 for doc in supported: running_total += doc['count'] doc['top_95'][version] = running_total < (.95 * total) # Send it all to ES. bulk = [] for id_, doc in docs.items(): for index in set(indices): bulk.append({ "_source": doc, "_id": id_, "_type": AppCompat.get_mapping_type(), "_index": index or AppCompat._get_index(), }) es = amo_search.get_es() log.info('Bulk indexing %s compat docs on %s indices' % (len(docs), len(indices))) elasticsearch.helpers.bulk(es, bulk, chunk_size=150) es.indices.refresh()
def compatibility_report(index=None): docs = defaultdict(dict) indices = get_indices(index) # Gather all the data for the index. for app in amo.APP_USAGE: versions = [c for c in amo.COMPAT if c['app'] == app.id] log.info(u'Making compat report for %s.' % app.pretty) latest = UpdateCount.objects.aggregate(d=Max('date'))['d'] qs = UpdateCount.objects.filter(addon__appsupport__app=app.id, addon__disabled_by_user=False, addon__status__in=amo.VALID_STATUSES, addon___current_version__isnull=False, date=latest) updates = dict(qs.values_list('addon', 'count')) for chunk in chunked(updates.items(), 50): chunk = dict(chunk) for addon in Addon.objects.filter(id__in=chunk): current_version = { 'id': addon.current_version.pk, 'version': addon.current_version.version, } doc = docs[addon.id] doc.update(id=addon.id, slug=addon.slug, guid=addon.guid, binary=addon.binary_components, name=unicode(addon.name), created=addon.created, current_version=current_version) doc['count'] = chunk[addon.id] doc.setdefault('top_95', defaultdict(lambda: defaultdict(dict))) doc.setdefault('top_95_all', {}) doc.setdefault('usage', {})[app.id] = updates[addon.id] doc.setdefault('works', {}).setdefault(app.id, {}) # Populate with default counts for all app versions. for ver in versions: doc['works'][app.id][vint(ver['main'])] = { 'success': 0, 'failure': 0, 'total': 0, 'failure_ratio': 0.0, } # Group reports by `major`.`minor` app version. reports = (CompatReport.objects.filter( guid=addon.guid, app_guid=app.guid).values_list( 'app_version', 'works_properly').annotate(Count('id'))) for ver, works_properly, cnt in reports: ver = vint(floor_version(ver)) major = [ v['main'] for v in versions if vint(v['previous']) < ver <= vint(v['main']) ] if major: w = doc['works'][app.id][vint(major[0])] # Tally number of success and failure reports. w['success' if works_properly else 'failure'] += cnt w['total'] += cnt # Calculate % of incompatibility reports. w['failure_ratio'] = w['failure'] / float(w['total']) if app not in addon.compatible_apps: continue compat = addon.compatible_apps[app] d = { 'min': compat.min.version_int, 'max': compat.max.version_int } doc.setdefault('support', {})[app.id] = d doc.setdefault('max_version', {})[app.id] = compat.max.version total = sum(updates.values()) # Remember the total so we can show % of usage later. compat_total, created = CompatTotals.objects.safer_get_or_create( app=app.id, defaults={'total': total}) if not created: compat_total.update(total=total) # Figure out which add-ons are in the top 95% for this app. running_total = 0 for addon, count in sorted(updates.items(), key=lambda x: x[1], reverse=True): running_total += count docs[addon]['top_95_all'][app.id] = running_total < (.95 * total) # Mark the top 95% of add-ons compatible with the previous version for each # app + version combo. for compat in amo.COMPAT: app, ver = compat['app'], vint(compat['previous']) # Find all the docs that have a max_version compatible with ver. supported = [ compat_doc for compat_doc in docs.values() if (app in compat_doc.get('support', {}) and compat_doc['support'][app]['max'] >= ver) ] # Sort by count so we can get the top 95% most-used add-ons. supported = sorted(supported, key=lambda d: d['count'], reverse=True) total = sum(doc['count'] for doc in supported) # Figure out which add-ons are in the top 95% for this app + version. running_total = 0 for doc in supported: running_total += doc['count'] doc['top_95'][app][ver] = running_total < (.95 * total) # Send it all to the index. for chunk in chunked(docs.values(), 150): for doc in chunk: for index in indices: AppCompat.index(doc, id=doc['id'], refresh=False, index=index) es = amo_search.get_es() es.indices.refresh()
def upload(self, **params): # The data should be in chunks. data = [bytes(bytearray(s)) for s in chunked(self.data, 3)] return FileUpload.from_post(data, 'filenamé.xpi', len(self.data), **params)