def postcommit_after_request(response, base_status_error_code=500): if response.status_code >= base_status_error_code: _local.postcommit_queue = OrderedDict() _local.postcommit_celery_queue = OrderedDict() return response try: if postcommit_queue(): number_of_threads = 30 # one db connection per greenlet, let's share pool = Pool(number_of_threads) for func in postcommit_queue().values(): pool.spawn(func) pool.join(timeout=5.0, raise_error=True) # 5 second timeout and reraise exceptions if postcommit_celery_queue(): if settings.USE_CELERY: for task_dict in postcommit_celery_queue().values(): task = Signature.from_dict(task_dict) task.apply_async() else: for task in postcommit_celery_queue().values(): task() except AttributeError as ex: if not settings.DEBUG_MODE: logger.error('Post commit task queue not initialized: {}'.format(ex)) return response
def setup_cluster(task, *args, **kwargs): cluster = kwargs['cluster'] if '_id' in cluster: task.taskflow.logger.info( 'We are using an existing cluster: %s' % cluster['name']) else: task.taskflow.logger.info('We are creating an EC2 cluster.') task.logger.info('Cluster name %s' % cluster['name']) kwargs['machine'] = cluster.get('machine') profile = kwargs.get('profile') cluster = create_ec2_cluster( task, cluster, profile, kwargs['image_spec']) task.logger.info('Cluster started.') # Call any follow on task if 'next' in kwargs: kwargs['cluster'] = cluster next = Signature.from_dict(kwargs['next']) if next.task == 'celery.chain': # If we are dealing with a chain we want to update the arg and # kwargs passed into the chain. first_task = next.kwargs['tasks'][0] if first_task: if args: first_task.args = tuple(args) + tuple(first_task.args) if kwargs: first_task.kwargs = dict(first_task.kwargs, **kwargs) next.delay(*args, **kwargs)
def postcommit_after_request(response, base_status_error_code=500): if response.status_code >= base_status_error_code: _local.postcommit_queue = OrderedDict() _local.postcommit_celery_queue = OrderedDict() return response try: if postcommit_queue(): number_of_threads = 30 # one db connection per greenlet, let's share pool = Pool(number_of_threads) for func in postcommit_queue().values(): pool.spawn(func) pool.join( timeout=5.0, raise_error=True) # 5 second timeout and reraise exceptions if postcommit_celery_queue(): if settings.USE_CELERY: for task_dict in postcommit_celery_queue().values(): task = Signature.from_dict(task_dict) task.apply_async() else: for task in postcommit_celery_queue().values(): task() except AttributeError as ex: if not settings.DEBUG_MODE: logger.error( 'Post commit task queue not initialized: {}'.format(ex)) return response
def execute_callback(self): """ Execute serialized callback. Called via on_chord_part_return. There are no parameters, as everything we need is already serialized in the model somewhere. """ callback_signature = Signature.from_dict(json.loads(self.serialized_callback)) if any(result.status == FAILURE for result in self.completed_results.all()): # we either remove the failures and only return results from successful subtasks, or fail the entire chord if callback_signature.get('options', {}).get('propagate', current_app.conf.CELERY_CHORD_PROPAGATES): try: raise ChordError( "Error in subtasks! Ids: {}".format([ result.task_id for result in self.completed_results.all() if result.status == FAILURE ]) ) except ChordError as error: self.mark_error(error, is_subtask=True) return else: """ Dev note: this doesn't *quite* match the behavior of the default backend. According to http://www.pythondoc.com/celery-3.1.11/configuration.html#celery-chord-propagates, the 2 options are to either propagate through (as done above), or to forward the Exception result into callback (versus this approach of dropping error results). It seems silly to ask callbacks to expect exception results as input though, so we drop them. """ # pylint: disable=pointless-string-statement for result in self.completed_results.all(): if result.status == FAILURE: self.completed_results.remove(result) # pylint: disable=no-member result.delete() if callback_signature.get('options', {}).get('use_iterator', True): # If we're using an iterator, it's assumed to be because there are size concerns with the results # Thus, the callback_result TaskMeta object will have a null 'result' in the database, because you # stored those results someplace else as part of the callback function, right? try: callback_signature(self.completed_results.values_list('result', flat=True).iterator) except Exception as error: # pylint: disable=broad-except self.mark_error(error, is_subtask=False) return else: self.callback_result.status = SUCCESS self.callback_result.date_done = datetime.now() self.callback_result.save() else: results_list = [subtask.result for subtask in self.completed_results.all()] callback_signature.id = self.callback_result.task_id callback_signature.apply_async((results_list, ), {})
def setup_cluster(task, *args,**kwargs): cluster = kwargs['cluster'] if '_id' in cluster: task.taskflow.logger.info('We are using an existing cluster: %s' % cluster['name']) else: task.taskflow.logger.info('We are creating an EC2 cluster.') task.logger.info('Cluster name %s' % cluster['name']) kwargs['machine'] = cluster.get('machine') ami = kwargs.get('ami') profile = kwargs.get('profile') cluster = create_ec2_cluster(task, cluster, profile, ami) task.logger.info('Cluster started.') # Call any follow on task if 'next' in kwargs: kwargs['cluster'] = cluster next = Signature.from_dict(kwargs['next']) next.delay(*args, **kwargs)
def inner(*args, **kwargs): taskset = kwargs.pop("_taskset", None) rv = f(*args, **kwargs) if taskset is not None: done = False with transaction.atomic(): taskset_id = taskset["taskset_id"] sync_row = TaskSetMeta.objects.select_for_update().filter( id=taskset_id).all() if sync_row: assert len(sync_row) == 1 sync_row = sync_row[0] sync_row.count -= 1 sync_row.save() if sync_row.count == 0: logger.info("Finished taskset id %i" % taskset_id) done = True else: logger.info("Taskset %i has %i tasks remaining" % (taskset_id, sync_row.count)) if done: callback = Signature.from_dict(taskset["callback"]) callback.apply_async() return rv
def inner(*args, **kwargs): taskset = kwargs.pop("_taskset", None) rv = f(*args, **kwargs) if taskset is not None: done = False with transaction.atomic(): taskset_id = taskset["taskset_id"] sync_row = TaskSetMeta.objects.select_for_update().filter( id=taskset_id).all() if sync_row: assert len(sync_row) == 1 sync_row = sync_row[0] sync_row.count -= 1 sync_row.save() if sync_row.count == 0: logger.info("Finished taskset id %i" % taskset_id) done = True else: logger.info("Taskset %i has %i tasks remaining" % (taskset_id, sync_row.count)) if done: callback = Signature.from_dict(taskset["callback"]) callback.apply_async() return rv
def postcommit_celery_task_wrapper(queue): # chain.apply calls the tasks synchronously without re-enqueuing each one # http://stackoverflow.com/questions/34177131/how-to-solve-python-celery-error-when-using-chain-encodeerrorruntimeerrormaxi?answertab=votes#tab-top # celery serialized signatures into dictionaries, so we need to deserialize here # https://sentry.cos.io/sentry/osf-iy/issues/289209/ chain([Signature.from_dict(task_dict) for task_dict in queue.values()]).apply()
def setup_cluster(task, *args, **kwargs): cluster = kwargs['cluster'] profile = kwargs.get('profile') volume = kwargs.get('volume') new = False if '_id' in cluster: task.taskflow.logger.info('We are using an existing cluster: %s' % cluster['name']) else: new = True task.taskflow.logger.info('We are creating an EC2 cluster.') task.logger.info('Cluster name %s' % cluster['name']) kwargs['machine'] = cluster.get('machine') if volume: config = cluster.setdefault('config', {}) config['jobOutputDir'] = '/data' # Create the model in Girder cluster = create_ec2_cluster(task, cluster, profile, kwargs['image_spec']) # Now launch the cluster cluster = launch_ec2_cluster(task, cluster, profile) task.logger.info('Cluster started.') if volume and '_id' in volume: task.taskflow.logger.info('We are using an existing volume: %s' % volume['name']) elif volume: task.taskflow.logger.info('We are creating a new volume: "%s"' % volume['name']) volume = create_volume(task, volume, profile) # Now provision if new: provision_params = {} girder_token = task.taskflow.girder_token check_girder_cluster_status(cluster, girder_token, 'provisioning') # attach volume if volume: volume = _attach_volume(task, profile, volume, cluster) path = volume.get('path') if path: provision_params['master_nfs_exports_extra'] = [path] cluster = provision_ec2_cluster(task, cluster, profile, provision_params) # Call any follow on task if 'next' in kwargs: kwargs['cluster'] = cluster next = Signature.from_dict(kwargs['next']) if next.task == 'celery.chain': # If we are dealing with a chain we want to update the arg and # kwargs passed into the chain. first_task = next.kwargs['tasks'][0] if first_task: if args: first_task.args = tuple(args) + tuple(first_task.args) if kwargs: first_task.kwargs = dict(first_task.kwargs, **kwargs) next.delay(*args, **kwargs)