def test_apply(self): applied = [0] class mocksubtask(subtask): def apply(self, *args, **kwargs): applied[0] += 1 ts = TaskSet([mocksubtask(MockTask, (i, i)) for i in (2, 4, 8)]) ts.apply() self.assertEqual(applied[0], 3)
def update_collections_total(): """Update collections downloads totals.""" d = (CollectionCount.objects.values('collection_id').annotate( sum=Sum('count'))) ts = [ tasks.update_collections_total.subtask(args=[chunk]) for chunk in chunked(d, 50) ] TaskSet(ts).apply_async()
def update_monolith_stats(date=None): """Update monolith statistics.""" if date: date = datetime.datetime.strptime(date, '%Y-%m-%d').date() today = date or datetime.date.today() jobs = [{ 'metric': metric, 'date': today } for metric in tasks._get_monolith_jobs(date)] ts = [tasks.update_monolith_stats.subtask(kwargs=kw) for kw in jobs] TaskSet(ts).apply_async()
def handle(self, *args, **options): if len(args) != 1: regions = ', '.join(dict(REGIONS_CHOICES_SLUG[1:]).keys()) raise CommandError(('You must enter a single region slug. ' 'Available choices: %s' % regions)) region_slug = args[0] ids = (Webapp.objects.filter(premium_type__in=amo.ADDON_HAS_PAYMENTS) .exclude(status__in=amo.WEBAPPS_EXCLUDED_STATUSES) .values_list('id', flat=True)) ts = [new_payments_region_email.subtask(args=[chunk, region_slug]) for chunk in chunked(ids, 100)] TaskSet(ts).apply_async()
def update_collections_subscribers(): """Update collections subscribers totals.""" d = (CollectionWatcher.objects.values('collection_id').annotate( count=Count('collection')).extra(where=['DATE(created)=%s'], params=[date.today()])) ts = [ _update_collections_subscribers.subtask(args=[chunk]) for chunk in chunked(d, 1000) ] TaskSet(ts).apply_async()
def update_daily_theme_user_counts(): """Store the day's theme popularity counts into ThemeUserCount.""" raise_if_reindex_in_progress('amo') d = Persona.objects.values_list('addon', 'popularity').order_by('id') date = datetime.now().strftime('%M-%d-%y') ts = [ _update_daily_theme_user_counts.subtask(args=[chunk], kwargs={'date': date}) for chunk in chunked(d, 250) ] TaskSet(ts).apply_async()
def run(self, set, body, interval=1, max_retries=None, **kwargs): if not isinstance(set, TaskSet): set = TaskSet(set) r = [] setid = gen_unique_id() for task in set.tasks: uuid = gen_unique_id() task.options.update(task_id=uuid, chord=body) r.append(current_app.AsyncResult(uuid)) current_app.TaskSetResult(setid, r).save() self.backend.on_chord_apply(setid, body, interval, max_retries) return set.apply_async(taskset_id=setid)
def update_weekly_downloads(): """Update the weekly "downloads" from the users_install table.""" interval = datetime.today() - timedelta(days=7) counts = (Installed.objects.values('addon').filter( created__gte=interval, addon__type=amo.ADDON_WEBAPP).annotate(count=Count('addon'))) ts = [ webapp_update_weekly_downloads.subtask(args=[chunk]) for chunk in chunked(counts, 1000) ] TaskSet(ts).apply_async()
def update_addons_collections_downloads(): """Update addons+collections download totals.""" raise_if_reindex_in_progress('amo') d = (AddonCollectionCount.objects.values( 'addon', 'collection').annotate(sum=Sum('count'))) ts = [ tasks.update_addons_collections_downloads.subtask(args=[chunk]) for chunk in chunked(d, 100) ] TaskSet(ts).apply_async()
def update_documents(self, documents, count): sys.stdout.write('Graph size is {0:d} nodes.\n'.format(count)) sys.stdout.flush() processed_count = 0 subtasks = [] timings = [] average_per_s = 0 if self.index == 'concurrently': index_during_subtask = True else: index_during_subtask = False for doc in documents: processed_count += 1 if processed_count % 10000 == 0: # Send the commit every 10000 times. self.si.commit() subtasks.append(update_document.subtask((doc, index_during_subtask))) if processed_count % 1000 == 1: t1 = time.time() if processed_count % 1000 == 0: t2 = time.time() timings.append(t2 - t1) average_per_s = 1000 / (sum(timings) / float(len(timings))) sys.stdout.write("\rProcessing items in Celery queue: {:.0%} ({}/{}, {:.1f}/s)".format( processed_count * 1.0 / count, processed_count, count, average_per_s )) sys.stdout.flush() last_document = (count == processed_count) if (processed_count % 500 == 0) or last_document: # Every 500 documents, we send the subtasks off for processing # Poll to see when they're done. job = TaskSet(tasks=subtasks) result = job.apply_async() while not result.ready(): time.sleep(0.5) # The jobs finished - clean things up for the next round subtasks = [] if self.index == 'all_at_end': call_command( 'cl_update_index', update_mode=True, everything=True, solr_url='http://127.0.0.1:8983/solr/collection1' ) elif self.index == 'false': sys.stdout.write("Solr index not updated after running citation " "finder. You may want to do so manually.")
def update_addons_collections_downloads(): """Update addons+collections download totals.""" if settings.IGNORE_NON_CRITICAL_CRONS: return d = (AddonCollectionCount.objects.values( 'addon', 'collection').annotate(sum=Sum('count'))) ts = [ tasks.update_addons_collections_downloads.subtask(args=[chunk]) for chunk in chunked(d, 600) ] TaskSet(ts).apply_async()
def reindex_addons(): from . import tasks # Make sure our mapping is up to date. search.setup_mapping() ids = (Addon.objects.values_list('id', flat=True).filter( _current_version__isnull=False, status__in=amo.VALID_STATUSES, disabled_by_user=False)) ts = [ tasks.index_addons.subtask(args=[chunk]) for chunk in chunked(sorted(list(ids)), 150) ] TaskSet(ts).apply_async()
def update_collections_votes(): """Update collection's votes.""" up = (CollectionVote.objects.values('collection_id').annotate( count=Count('collection')).filter(vote=1).extra( where=['DATE(created)=%s'], params=[date.today()])) down = (CollectionVote.objects.values('collection_id').annotate( count=Count('collection')).filter(vote=-1).extra( where=['DATE(created)=%s'], params=[date.today()])) ts = [ _update_collections_votes.subtask(args=[chunk, 'new_votes_up']) for chunk in chunked(up, 1000) ] TaskSet(ts).apply_async() ts = [ _update_collections_votes.subtask(args=[chunk, 'new_votes_down']) for chunk in chunked(down, 1000) ] TaskSet(ts).apply_async()
def reindex_apps(): """Apps do get indexed by `reindex_addons`, but run this for apps only.""" from . import tasks search.setup_mapping() ids = (Addon.objects.values_list('id', flat=True).filter( type=amo.ADDON_WEBAPP, status__in=amo.VALID_STATUSES, disabled_by_user=False)) ts = [ tasks.index_addons.subtask(args=[chunk]) for chunk in chunked(sorted(list(ids)), 150) ] TaskSet(ts).apply_async()
def _exec_callbacks(callback): """ Exec the callback or list of callbacks. Return asyncronous results as the TaskSetResult object. """ async_result = None if callback: if not isinstance(callback, (list, tuple)): # not iterable callback = [ callback, ] taskset = TaskSet(tasks=callback) async_result = taskset.apply_async() return async_result
def _chunk_queryset_into_tasks(self, items, count, chunksize=5000, bundle_size=250): """Chunks the queryset passed in, and dispatches it to Celery for adding to the index. Potential performance improvements: - Postgres is quiescent when Solr is popping tasks from Celery, instead, it should be fetching the next 1,000 - The wait loop (while not result.ready()) polls for the results, at a 1s interval. Could this be reduced or somehow eliminated while keeping Celery's tasks list from running away? """ processed_count = 0 subtasks = [] item_bundle = [] for item in items: last_item = (count == processed_count + 1) if self.verbosity >= 2: self.stdout.write('Indexing item %s' % item.pk) item_bundle.append(item) if (processed_count % bundle_size == 0) or last_item: # Every bundle_size documents we create a subtask subtasks.append( add_or_update_items.subtask((item_bundle, self.solr_url))) item_bundle = [] processed_count += 1 if (processed_count % chunksize == 0) or last_item: # Every chunksize items, we send the subtasks for processing job = TaskSet(tasks=subtasks) result = job.apply_async() while not result.ready(): time.sleep(1) subtasks = [] if (processed_count % 50000 == 0) or last_item: # Do a commit every 50000 items, for good measure. self.stdout.write("...running commit command...") self.si.commit() sys.stdout.write("\rProcessed {}/{} ({:.0%})".format( processed_count, count, processed_count * 1.0 / count, )) self.stdout.flush() self.stdout.write('\n')
def update_global_totals(date=None): """Update global statistics totals.""" today = date or datetime.date.today() today_jobs = [dict(job=job, date=today) for job in tasks._get_daily_jobs(date)] max_update = date or UpdateCount.objects.aggregate(max=Max('date'))['max'] metrics_jobs = [dict(job=job, date=max_update) for job in tasks._get_metrics_jobs(date)] ts = [tasks.update_global_totals.subtask(kwargs=kw) for kw in today_jobs + metrics_jobs] TaskSet(ts).apply_async()
def update_addons_current_version(): """Update the current_version field of the addons.""" # Candidate for deletion - Bug 750510 if not waffle.switch_is_active('current_version_crons'): return d = (Addon.objects.filter(disabled_by_user=False, status__in=amo.VALID_STATUSES) .exclude(type=amo.ADDON_PERSONA).values_list('id')) ts = [_update_addons_current_version.subtask(args=[chunk]) for chunk in chunked(d, 100)] TaskSet(ts).apply_async()
def reindex_addons(index=None, addon_type=None): from . import tasks ids = (Addon.objects.values_list('id', flat=True).filter( _current_version__isnull=False, status__in=amo.VALID_STATUSES, is_listed=True, disabled_by_user=False)) if addon_type: ids = ids.filter(type=addon_type) ts = [ tasks.index_addons.subtask(args=[chunk], kwargs=dict(index=index)) for chunk in chunked(sorted(list(ids)), 150) ] TaskSet(ts).apply_async()
def update_addon_appsupport(): # Find all the add-ons that need their app support details updated. newish = (Q(last_updated__gte=F('appsupport__created')) | Q(appsupport__created__isnull=True)) # Search providers don't list supported apps. has_app = Q(versions__apps__isnull=False) | Q(type=amo.ADDON_SEARCH) has_file = Q(versions__files__status__in=amo.VALID_STATUSES) good = Q(has_app, has_file) | Q(type=amo.ADDON_PERSONA) ids = (Addon.objects.valid().distinct() .filter(newish, good).values_list('id', flat=True)) ts = [_update_appsupport.subtask(args=[chunk]) for chunk in chunked(ids, 20)] TaskSet(ts).apply_async()
def _run_galaxy_workflow(analysis_uuid): """ Import files into Galaxy and execute Galaxy Workflow """ analysis = _get_analysis(analysis_uuid) analysis_status = _get_analysis_status(analysis_uuid) if not analysis_status.galaxy_import_task_group_id: logger.debug("Starting analysis execution in Galaxy") try: analysis.prepare_galaxy() except (requests.exceptions.ConnectionError, galaxy.client.ConnectionError): error_msg = "Analysis '{}' failed during preparation in " \ "Galaxy".format(analysis) logger.error(error_msg) analysis.set_status(Analysis.FAILURE_STATUS, error_msg) analysis.send_email() get_taskset_result( analysis_status.refinery_import_task_group_id).delete() return galaxy_import_tasks = [ _start_galaxy_analysis.subtask((analysis.uuid, )), ] galaxy_import_taskset = TaskSet( tasks=galaxy_import_tasks).apply_async() galaxy_import_taskset.save() analysis_status.galaxy_import_task_group_id = \ galaxy_import_taskset.taskset_id analysis_status.set_galaxy_history_state(AnalysisStatus.PROGRESS) run_analysis.retry(countdown=RETRY_INTERVAL) # check if data files were successfully imported into Galaxy galaxy_import_taskset = get_taskset_result( analysis_status.galaxy_import_task_group_id) if not galaxy_import_taskset.ready(): logger.debug("Analysis '%s' pending in Galaxy", analysis) run_analysis.retry(countdown=RETRY_INTERVAL) elif not galaxy_import_taskset.successful(): error_msg = "Analysis '{}' failed in Galaxy".format(analysis) logger.error(error_msg) analysis.set_status(Analysis.FAILURE_STATUS, error_msg) analysis_status.set_galaxy_history_state(AnalysisStatus.ERROR) analysis.send_email() get_taskset_result( analysis_status.refinery_import_task_group_id).delete() galaxy_import_taskset.delete() analysis.galaxy_cleanup() return
def _run(fn_name, xs): fn = getattr(tasks, fn_name) job = TaskSet(tasks=[apply(fn.subtask, (x,)) for x in xs]) result = job.apply_async() out = [] if wait: with _close_taskset(result): while not result.ready(): time.sleep(5) if result.failed(): raise ValueError("Failed distributed task; cleaning up") for x in result.join(): if x: out.extend(x) return out
def update_addon_average_daily_users(): """Update add-ons ADU totals.""" raise_if_reindex_in_progress('amo') cursor = connections[multidb.get_slave()].cursor() q = """SELECT addon_id, AVG(`count`) FROM update_counts WHERE `date` > DATE_SUB(CURDATE(), INTERVAL 7 DAY) GROUP BY addon_id ORDER BY addon_id""" cursor.execute(q) d = cursor.fetchall() cursor.close() ts = [_update_addon_average_daily_users.subtask(args=[chunk]) for chunk in chunked(d, 250)] TaskSet(ts).apply_async()
def _attach_workflow_outputs(analysis_uuid): """ Attach the resulting files from the Galaxy workflow execution to our Analysis """ analysis = _get_analysis(analysis_uuid) if analysis.workflow.type == Workflow.ANALYSIS_TYPE: tasks = analysis.attach_derived_nodes_to_dataset() logger.info("Starting auxiliary file creation for analysis %s'", analysis) TaskSet(tasks=tasks).apply_async() elif analysis.workflow.type == Workflow.DOWNLOAD_TYPE: analysis.attach_outputs_downloads() else: logger.warning("Unknown workflow type '%s' in analysis '%s'", analysis.workflow.type, analysis.name)
def get_clients_with_prepaid_accounts(): sql = text(""" SELECT DISTINCT(leads_id) FROM subcontractors WHERE prepaid = 'yes' """) conn = engine.connect() r = conn.execute(sql).fetchall() client_ids = [] for x in r: client_ids.append(x[0]) #retrieve leads_id of timesheet records for the past 90 days #this is to prevent subcontractors record recently terminated and does not show up on adjustment now = get_ph_time() prev_date = now - timedelta(days=90) now_string = now.strftime('%F %T') prev_date_string = prev_date.strftime('%F %T') sql = text(""" SELECT DISTINCT(t.leads_id) FROM timesheet AS t JOIN subcontractors AS s ON t.leads_id = s.leads_id WHERE s.prepaid = 'yes' AND month_year BETWEEN :prev_date_string AND :now_string """) r = conn.execute(sql, prev_date_string=prev_date_string, now_string=now_string) for x in r: client_id = x[0] if client_id not in client_ids: client_ids.append(client_id) tasks = [] for client_id in client_ids: y = get_client_details.subtask((client_id, )) tasks.append(y) job = TaskSet(tasks=tasks) result = job.apply_async() data = result.join() conn.close() return data
def test_apply_async(self): applied = [0] class mocksubtask(subtask): def apply_async(self, *args, **kwargs): applied[0] += 1 ts = TaskSet([mocksubtask(MockTask, (i, i)) for i in (2, 4, 8)]) ts.apply_async() self.assertEqual(applied[0], 3) class Publisher(object): def send(self, *args, **kwargs): pass ts.apply_async(publisher=Publisher())
def submit_and_retry_if_celery_fails(self, submissions): submission_task = TaskSet( self.send_parallel_submissions_task.subtask( (self, submission), retry=True, retry_policy={ 'max_retries': settings.CELERY_NUMBER_OF_RETRIES_IN_CASE_OF_FAILURE, 'interval_start': settings.CELERY_TIME_TO_WAIT_BEFORE_RETRYING_SUBMISSION }) for submission in submissions) submission_job = submission_task.apply_async() wait_until_its_done = submission_job.get() return submission_job
def test_interface__compat(self): with catch_warnings(record=True) as log: ts = TaskSet(MockTask, [[(2, 2)], [(4, 4)], [(8, 8)]]) self.assertListEqual(ts.tasks, [MockTask.subtask((i, i)) for i in (2, 4, 8)]) self.assertIn("Using this invocation of TaskSet is deprecated", log[0].message.args[0]) log[:] = [] self.assertEqual(ts.task, registry.tasks[MockTask.name]) self.assertTrue(log) self.assertIn("TaskSet.task is deprecated", log[0].message.args[0]) log[:] = [] self.assertEqual(ts.task_name, MockTask.name) self.assertTrue(log) self.assertIn("TaskSet.task_name is deprecated", log[0].message.args[0])
def apply_async(self): tasks = [] for node in self.children: func = node.func args = node.args kwargs = node.kwargs callback = kwargs.pop('callback', []) if not isinstance(callback, (list, tuple)): callback = [callback] subtasks = node._get_child_tasks() callback += subtasks kwargs = dict(callback=callback, **kwargs) _task = func.subtask(args=args, kwargs=kwargs) tasks.append(_task) taskset = TaskSet(tasks) result = taskset.apply_async() return result
def _run_galaxy_workflow(analysis_uuid): """ Create DataSetCollection objects in galaxy, and invoke the workflow belonging to our tool. """ analysis = _get_analysis(analysis_uuid) analysis_status = _get_analysis_status(analysis_uuid) tool = _get_workflow_tool(analysis_uuid) if not analysis_status.galaxy_workflow_task_group_id: logger.debug("Starting workflow execution in Galaxy") tool.update_file_relationships_with_galaxy_history_data() galaxy_workflow_tasks = [ _invoke_galaxy_workflow.subtask((analysis_uuid, )) ] galaxy_workflow_taskset = TaskSet( tasks=galaxy_workflow_tasks).apply_async() galaxy_workflow_taskset.save() analysis_status.set_galaxy_workflow_task_group_id( galaxy_workflow_taskset.taskset_id) analysis_status.set_galaxy_history_state(AnalysisStatus.PROGRESS) run_analysis.retry(countdown=RETRY_INTERVAL) # Check on the status of the running galaxy workflow galaxy_workflow_taskset = get_taskset_result( analysis_status.galaxy_workflow_task_group_id) if not galaxy_workflow_taskset.ready(): logger.debug("Analysis '%s' pending in Galaxy", analysis) run_analysis.retry(countdown=RETRY_INTERVAL) elif not galaxy_workflow_taskset.successful(): error_msg = "Analysis '{}' failed in Galaxy".format(analysis) logger.error(error_msg) analysis.set_status(Analysis.FAILURE_STATUS, error_msg) analysis_status.set_galaxy_history_state(AnalysisStatus.ERROR) analysis.send_email() get_taskset_result( analysis_status.refinery_import_task_group_id).delete() galaxy_workflow_taskset.delete() analysis.galaxy_cleanup() return