def apply_async(self, *args, **kwargs): """ A wrapper around the Celery apply_async method. It allows us to accept a few more parameters than Celery does for our own purposes, listed below. It also allows us to create and update task status which can be used to track status of this task during it's lifetime. :param queue: The queue that the task has been placed into (optional, defaults to the general Celery queue.) :type queue: basestring :param tags: A list of tags (strings) to place onto the task, used for searching for tasks by tag :type tags: list :return: An AsyncResult instance as returned by Celery's apply_async :rtype: celery.result.AsyncResult """ routing_key = kwargs.get('routing_key', defaults.NAMESPACES['CELERY']['DEFAULT_ROUTING_KEY'].default) tags = kwargs.pop('tags', []) async_result = super(Task, self).apply_async(*args, **kwargs) async_result.tags = tags # Create a new task status with the task id and tags. task_status = TaskStatus( task_id=async_result.id, task_type=self.name, state=constants.CALL_WAITING_STATE, worker_name=routing_key, tags=tags) # To avoid the race condition where __call__ method below is called before # this change is propagated to all db nodes, using an 'upsert' here and setting # the task state to 'waiting' only on an insert. task_status.save_with_set_on_insert(fields_to_set_on_insert=['state', 'start_time']) return async_result
def apply_async(self, *args, **kwargs): """ A wrapper around the PulpTask apply_async method. It allows us to accept a few more parameters than Celery does for our own purposes, listed below. It also allows us to create and update task status which can be used to track status of this task during it's lifetime. :param queue: The queue that the task has been placed into (optional, defaults to the general Celery queue.) :type queue: basestring :param tags: A list of tags (strings) to place onto the task, used for searching for tasks by tag :type tags: list :param group_id: The id that identifies which group of tasks a task belongs to :type group_id: uuid.UUID :return: An AsyncResult instance as returned by Celery's apply_async :rtype: celery.result.AsyncResult """ routing_key = kwargs.get('routing_key', defaults.NAMESPACES['CELERY']['DEFAULT_ROUTING_KEY'].default) tag_list = kwargs.pop('tags', []) group_id = kwargs.pop('group_id', None) async_result = super(Task, self).apply_async(*args, **kwargs) async_result.tags = tag_list # Create a new task status with the task id and tags. task_status = TaskStatus( task_id=async_result.id, task_type=self.name, state=constants.CALL_WAITING_STATE, worker_name=routing_key, tags=tag_list, group_id=group_id) # To avoid the race condition where __call__ method below is called before # this change is propagated to all db nodes, using an 'upsert' here and setting # the task state to 'waiting' only on an insert. task_status.save_with_set_on_insert(fields_to_set_on_insert=['state', 'start_time']) return async_result
def test_save_update_with_set_on_insert(self): """ Test the save method with set on insert arguments when the object is already in the database. """ task_id = str(uuid4()) worker_name = 'worker_name' tags = ['tag_1', 'tag_2'] state = constants.CALL_ACCEPTED_STATE spawned_tasks = ['foo'] error = {'error': 'some_error'} progress_report = {'what do we want?': 'progress!', 'when do we want it?': 'now!'} task_type = 'some.task' old_start_time = start_time = datetime.now() finish_time = start_time + timedelta(minutes=5) start_time = dateutils.format_iso8601_datetime(start_time) finish_time = dateutils.format_iso8601_datetime(finish_time) result = None ts = TaskStatus( task_id, worker_name, tags, state, spawned_tasks=spawned_tasks, error=error, progress_report=progress_report, task_type=task_type, start_time=start_time, finish_time=finish_time, result=result) # Put the object in the database, and then change some of it settings. ts.save() new_worker_name = 'a different_worker' new_state = constants.CALL_SUSPENDED_STATE new_start_time = old_start_time + timedelta(minutes=10) new_start_time = dateutils.format_iso8601_datetime(new_start_time) ts.worker_name = new_worker_name ts.state = new_state ts.start_time = new_start_time # This should update the worker_name on ts in the database, but should not update the state # or start_time ts.save_with_set_on_insert(fields_to_set_on_insert=['state', 'start_time']) ts = TaskStatus.objects() # There should only be one TaskStatus in the db self.assertEqual(len(ts), 1) ts = ts[0] # Make sure all the attributes are correct self.assertEqual(ts['task_id'], task_id) # Queue should have been updated self.assertEqual(ts['worker_name'], new_worker_name) self.assertEqual(ts['tags'], tags) # state should not have been updated self.assertEqual(ts['state'], state) self.assertEqual(ts['error'], error) self.assertEqual(ts['spawned_tasks'], spawned_tasks) self.assertEqual(ts['progress_report'], progress_report) self.assertEqual(ts['task_type'], task_type) # start_time should not have been updated self.assertEqual(ts['start_time'], start_time) self.assertEqual(ts['finish_time'], finish_time) self.assertEqual(ts['result'], result) # These are always None self.assertEqual(ts['traceback'], None) self.assertEqual(ts['exception'], None)
def apply_async_with_reservation(self, resource_type, resource_id, *args, **kwargs): """ This method allows the caller to schedule the ReservedTask to run asynchronously just like Celery's apply_async(), while also making the named resource. No two tasks that claim the same resource reservation can execute concurrently. It accepts type and id of a resource and combines them to form a resource id. This does not dispatch the task directly, but instead promises to dispatch it later by encapsulating the desired task through a call to a _queue_reserved_task task. See the docblock on _queue_reserved_task for more information on this. This method creates a TaskStatus as a placeholder for later updates. Pulp expects to poll on a task just after calling this method, so a TaskStatus entry needs to exist for it before it returns. For a list of parameters accepted by the *args and **kwargs parameters, please see the docblock for the apply_async() method. :param resource_type: A string that identifies type of a resource :type resource_type: basestring :param resource_id: A string that identifies some named resource, guaranteeing that only one task reserving this same string can happen at a time. :type resource_id: basestring :param tags: A list of tags (strings) to place onto the task, used for searching for tasks by tag :type tags: list :param group_id: The id to identify which group of tasks a task belongs to :type group_id: uuid.UUID :return: An AsyncResult instance as returned by Celery's apply_async :rtype: celery.result.AsyncResult """ # Form a resource_id for reservation by combining given resource type and id. This way, # two different resources having the same id will not block each other. resource_id = ":".join((resource_type, resource_id)) inner_task_id = str(uuid.uuid4()) task_name = self.name tag_list = kwargs.get('tags', []) group_id = kwargs.get('group_id', None) # Create a new task status with the task id and tags. task_status = TaskStatus(task_id=inner_task_id, task_type=task_name, state=constants.CALL_WAITING_STATE, tags=tag_list, group_id=group_id) # To avoid the race condition where __call__ method below is called before # this change is propagated to all db nodes, using an 'upsert' here and setting # the task state to 'waiting' only on an insert. task_status.save_with_set_on_insert(fields_to_set_on_insert=['state', 'start_time']) try: _queue_reserved_task.apply_async( args=[task_name, inner_task_id, resource_id, args, kwargs], queue=RESOURCE_MANAGER_QUEUE ) except Exception: TaskStatus.objects(task_id=task_status.task_id).update(state=constants.CALL_ERROR_STATE) raise return AsyncResult(inner_task_id)
def apply_async(self, *args, **kwargs): """ A wrapper around the PulpTask apply_async method. It allows us to accept a few more parameters than Celery does for our own purposes, listed below. It also allows us to create and update task status which can be used to track status of this task during it's lifetime. :param queue: The queue that the task has been placed into (optional, defaults to the general Celery queue.) :type queue: basestring :param tags: A list of tags (strings) to place onto the task, used for searching for tasks by tag :type tags: list :param group_id: The id that identifies which group of tasks a task belongs to :type group_id: uuid.UUID :return: An AsyncResult instance as returned by Celery's apply_async :rtype: celery.result.AsyncResult """ if celery_version.startswith('4'): routing_key = kwargs.get( 'routing_key', defaults.NAMESPACES['task']['default_routing_key'].default) else: routing_key = kwargs.get( 'routing_key', defaults.NAMESPACES['CELERY']['DEFAULT_ROUTING_KEY'].default) tag_list = kwargs.pop('tags', []) group_id = kwargs.pop('group_id', None) try: async_result = super(Task, self).apply_async(*args, **kwargs) except Exception: if 'task_id' in kwargs: TaskStatus.objects(task_id=kwargs['task_id']).update( state=constants.CALL_ERROR_STATE) raise async_result.tags = tag_list # Create a new task status with the task id and tags. task_status = TaskStatus(task_id=async_result.id, task_type=self.name, state=constants.CALL_WAITING_STATE, worker_name=routing_key, tags=tag_list, group_id=group_id) # We're now racing with __call__, on_failure and on_success, any of which may # have completed by now. To avoid overwriting TaskStatus updates from those callbacks, # we'll do an upsert and only touch the fields listed below if we've inserted the object. task_status.save_with_set_on_insert(fields_to_set_on_insert=[ 'state', 'start_time', 'finish_time', 'result', 'error', 'spawned_tasks', 'traceback' ]) return async_result
def apply_async(self, *args, **kwargs): """ A wrapper around the PulpTask apply_async method. It allows us to accept a few more parameters than Celery does for our own purposes, listed below. It also allows us to create and update task status which can be used to track status of this task during it's lifetime. :param queue: The queue that the task has been placed into (optional, defaults to the general Celery queue.) :type queue: basestring :param tags: A list of tags (strings) to place onto the task, used for searching for tasks by tag :type tags: list :param group_id: The id that identifies which group of tasks a task belongs to :type group_id: uuid.UUID :return: An AsyncResult instance as returned by Celery's apply_async :rtype: celery.result.AsyncResult """ if celery_version.startswith('4'): routing_key = kwargs.get('routing_key', defaults.NAMESPACES['task']['default_routing_key'].default) else: routing_key = kwargs.get('routing_key', defaults.NAMESPACES['CELERY']['DEFAULT_ROUTING_KEY'].default) tag_list = kwargs.pop('tags', []) group_id = kwargs.pop('group_id', None) try: async_result = super(Task, self).apply_async(*args, **kwargs) except Exception: if 'task_id' in kwargs: TaskStatus.objects(task_id=kwargs['task_id']).update( state=constants.CALL_ERROR_STATE ) raise async_result.tags = tag_list # Create a new task status with the task id and tags. task_status = TaskStatus( task_id=async_result.id, task_type=self.name, state=constants.CALL_WAITING_STATE, worker_name=routing_key, tags=tag_list, group_id=group_id) # We're now racing with __call__, on_failure and on_success, any of which may # have completed by now. To avoid overwriting TaskStatus updates from those callbacks, # we'll do an upsert and only touch the fields listed below if we've inserted the object. task_status.save_with_set_on_insert(fields_to_set_on_insert=[ 'state', 'start_time', 'finish_time', 'result', 'error', 'spawned_tasks', 'traceback']) return async_result
def test_save_insert_with_set_on_insert(self): """ Test the save method with set on insert arguments when the object is not already in the database. """ task_id = str(uuid4()) worker_name = 'some_worker' tags = ['tag_1', 'tag_2'] state = constants.CALL_RUNNING_STATE spawned_tasks = ['foo'] error = {'error': 'some_error'} progress_report = {'what do we want?': 'progress!', 'when do we want it?': 'now!'} task_type = 'some.task' start_time = datetime.now() finish_time = start_time + timedelta(minutes=5) start_time = dateutils.format_iso8601_datetime(start_time) finish_time = dateutils.format_iso8601_datetime(finish_time) result = None ts = TaskStatus( task_id, worker_name, tags, state, spawned_tasks=spawned_tasks, error=error, progress_report=progress_report, task_type=task_type, start_time=start_time, finish_time=finish_time, result=result) # This should cause ts to be in the database ts.save_with_set_on_insert(fields_to_set_on_insert=['state', 'start_time']) ts = TaskStatus.objects() # There should only be one TaskStatus in the db self.assertEqual(len(ts), 1) ts = ts[0] # Make sure all the attributes are correct self.assertEqual(ts['task_id'], task_id) self.assertEqual(ts['worker_name'], worker_name) self.assertEqual(ts['tags'], tags) self.assertEqual(ts['state'], state) self.assertEqual(ts['error'], error) self.assertEqual(ts['spawned_tasks'], spawned_tasks) self.assertEqual(ts['progress_report'], progress_report) self.assertEqual(ts['task_type'], task_type) self.assertEqual(ts['start_time'], start_time) self.assertEqual(ts['finish_time'], finish_time) self.assertEqual(ts['result'], result) # These are always None self.assertEqual(ts['traceback'], None) self.assertEqual(ts['exception'], None)
def _apply_async_inner(self, reservation, *args, **kwargs): """ This method allows the caller to schedule the ReservedTask to run asynchronously just like Celery's apply_async(), while also locking named resource(s). No two tasks that claim the same named-resource(s) can execute concurrently. It can accept a list-of-strings, of the form 'resource-type:resource-id'. If only asked for one resource (ie, list-len == 1), then call _queue_reserved_task, otherwise let _queue_reserved_task_list do the deed. This does not dispatch the task directly, but instead promises to dispatch it later. If the agument 'is_list' is True, the desired task is encapsualted by a call to _queue_reserved_task_list; otherwise, by a call to _queue_reserved_task. See the docblock on _queue_reserved_task and _queue_reserved_task_list for more information. This method creates a TaskStatus as a placeholder for later updates. Pulp expects to poll on a task just after calling this method, so a TaskStatus entry needs to exist for it before it returns. For a list of parameters accepted by the *args and **kwargs parameters, please see the docblock for the apply_async() method. :param reservation: A list-of-strings that identify a set of named resources, guaranteeing that only one task reserving any resource-ids in this list can happen at a time. :type reservation: list :param tags: A list of tags (strings) to place onto the task, used for searching for tasks by tag :type tags: list :param group_id: The id to identify which group of tasks a task belongs to :type group_id: uuid.UUID :return: An AsyncResult instance as returned by Celery's apply_async :rtype: celery.result.AsyncResult """ inner_task_id = str(uuid.uuid4()) task_name = self.name tag_list = kwargs.get('tags', []) group_id = kwargs.get('group_id', None) # Create a new task status with the task id and tags. task_status = TaskStatus(task_id=inner_task_id, task_type=task_name, state=constants.CALL_WAITING_STATE, tags=tag_list, group_id=group_id) # To avoid the race condition where __call__ method below is called before # this change is propagated to all db nodes, using an 'upsert' here and setting # the task state to 'waiting' only on an insert. task_status.save_with_set_on_insert( fields_to_set_on_insert=['state', 'start_time']) try: # Decide what to call based on how many reservation(s) we are being asked to make if len(reservation) == 1: _queue_reserved_task.apply_async(args=[ task_name, inner_task_id, reservation[0], args, kwargs ], queue=RESOURCE_MANAGER_QUEUE) else: _queue_reserved_task_list.apply_async( args=[task_name, inner_task_id, reservation, args, kwargs], queue=RESOURCE_MANAGER_QUEUE) except Exception: TaskStatus.objects(task_id=task_status.task_id).update( state=constants.CALL_ERROR_STATE) raise return AsyncResult(inner_task_id)