class Task(object): """Task base class. Note: When called tasks apply the :meth:`run` method. This method must be defined by all tasks (that is unless the :meth:`__call__` method is overridden). """ __trace__ = None __v2_compat__ = False # set by old base in celery.task.base MaxRetriesExceededError = MaxRetriesExceededError OperationalError = OperationalError #: Execution strategy used, or the qualified name of one. Strategy = 'celery.worker.strategy:default' #: Request class used, or the qualified name of one. Request = 'celery.worker.request:Request' #: The application instance associated with this task class. _app = None #: Name of the task. name = None #: Enable argument checking. #: You can set this to false if you don't want the signature to be #: checked when calling the task. #: Defaults to :attr:`app.strict_typing <@Celery.strict_typing>`. typing = None #: Maximum number of retries before giving up. If set to :const:`None`, #: it will **never** stop retrying. max_retries = 3 #: Default time in seconds before a retry of the task should be #: executed. 3 minutes by default. default_retry_delay = 3 * 60 #: Rate limit for this task type. Examples: :const:`None` (no rate #: limit), `'100/s'` (hundred tasks a second), `'100/m'` (hundred tasks #: a minute),`'100/h'` (hundred tasks an hour) rate_limit = None #: If enabled the worker won't store task state and return values #: for this task. Defaults to the :setting:`task_ignore_result` #: setting. ignore_result = None #: If enabled the request will keep track of subtasks started by #: this task, and this information will be sent with the result #: (``result.children``). trail = True #: If enabled the worker will send monitoring events related to #: this task (but only if the worker is configured to send #: task related events). #: Note that this has no effect on the task-failure event case #: where a task is not registered (as it will have no task class #: to check this flag). send_events = True #: When enabled errors will be stored even if the task is otherwise #: configured to ignore results. store_errors_even_if_ignored = None #: The name of a serializer that are registered with #: :mod:`kombu.serialization.registry`. Default is `'pickle'`. serializer = None #: Hard time limit. #: Defaults to the :setting:`task_time_limit` setting. time_limit = None #: Soft time limit. #: Defaults to the :setting:`task_soft_time_limit` setting. soft_time_limit = None #: The result store backend used for this task. backend = None #: If disabled this task won't be registered automatically. autoregister = True #: If enabled the task will report its status as 'started' when the task #: is executed by a worker. Disabled by default as the normal behavior #: is to not report that level of granularity. Tasks are either pending, #: finished, or waiting to be retried. #: #: Having a 'started' status can be useful for when there are long #: running tasks and there's a need to report what task is currently #: running. #: #: The application default can be overridden using the #: :setting:`task_track_started` setting. track_started = None #: When enabled messages for this task will be acknowledged **after** #: the task has been executed, and not *just before* (the #: default behavior). #: #: Please note that this means the task may be executed twice if the #: worker crashes mid execution. #: #: The application default can be overridden with the #: :setting:`task_acks_late` setting. acks_late = None #: When enabled messages for this task will be acknowledged even if it #: fails or times out. #: #: Configuring this setting only applies to tasks that are #: acknowledged **after** they have been executed and only if #: :setting:`task_acks_late` is enabled. #: #: The application default can be overridden with the #: :setting:`task_acks_on_failure_or_timeout` setting. acks_on_failure_or_timeout = None #: Even if :attr:`acks_late` is enabled, the worker will #: acknowledge tasks when the worker process executing them abruptly #: exits or is signaled (e.g., :sig:`KILL`/:sig:`INT`, etc). #: #: Setting this to true allows the message to be re-queued instead, #: so that the task will execute again by the same worker, or another #: worker. #: #: Warning: Enabling this can cause message loops; make sure you know #: what you're doing. reject_on_worker_lost = None #: Tuple of expected exceptions. #: #: These are errors that are expected in normal operation #: and that shouldn't be regarded as a real error by the worker. #: Currently this means that the state will be updated to an error #: state, but the worker won't log the event as an error. throws = () #: Default task expiry time. expires = None #: Default task priority. priority = None #: Max length of result representation used in logs and events. resultrepr_maxsize = 1024 #: Task request stack, the current request will be the topmost. request_stack = None #: Some may expect a request to exist even if the task hasn't been #: called. This should probably be deprecated. _default_request = None #: Deprecated attribute ``abstract`` here for compatibility. abstract = True _exec_options = None __bound__ = False from_config = ( ('serializer', 'task_serializer'), ('rate_limit', 'task_default_rate_limit'), ('priority', 'task_default_priority'), ('track_started', 'task_track_started'), ('acks_late', 'task_acks_late'), ('acks_on_failure_or_timeout', 'task_acks_on_failure_or_timeout'), ('reject_on_worker_lost', 'task_reject_on_worker_lost'), ('ignore_result', 'task_ignore_result'), ('store_errors_even_if_ignored', 'task_store_errors_even_if_ignored'), ) _backend = None # set by backend property. # - Tasks are lazily bound, so that configuration is not set # - until the task is actually used @classmethod def bind(cls, app): was_bound, cls.__bound__ = cls.__bound__, True cls._app = app conf = app.conf cls._exec_options = None # clear option cache if cls.typing is None: cls.typing = app.strict_typing for attr_name, config_name in cls.from_config: if getattr(cls, attr_name, None) is None: setattr(cls, attr_name, conf[config_name]) # decorate with annotations from config. if not was_bound: cls.annotate() from celery.utils.threads import LocalStack cls.request_stack = LocalStack() # PeriodicTask uses this to add itself to the PeriodicTask schedule. cls.on_bound(app) return app @classmethod def on_bound(cls, app): """Called when the task is bound to an app. Note: This class method can be defined to do additional actions when the task class is bound to an app. """ @classmethod def _get_app(cls): if cls._app is None: cls._app = current_app if not cls.__bound__: # The app property's __set__ method is not called # if Task.app is set (on the class), so must bind on use. cls.bind(cls._app) return cls._app app = class_property(_get_app, bind) @classmethod def annotate(cls): for d in resolve_all_annotations(cls.app.annotations, cls): for key, value in items(d): if key.startswith('@'): cls.add_around(key[1:], value) else: setattr(cls, key, value) @classmethod def add_around(cls, attr, around): orig = getattr(cls, attr) if getattr(orig, '__wrapped__', None): orig = orig.__wrapped__ meth = around(orig) meth.__wrapped__ = orig setattr(cls, attr, meth) def __call__(self, *args, **kwargs): _task_stack.push(self) self.push_request(args=args, kwargs=kwargs) try: return self.run(*args, **kwargs) finally: self.pop_request() _task_stack.pop() def __reduce__(self): # - tasks are pickled into the name of the task only, and the receiver # - simply grabs it from the local registry. # - in later versions the module of the task is also included, # - and the receiving side tries to import that module so that # - it will work even if the task hasn't been registered. mod = type(self).__module__ mod = mod if mod and mod in sys.modules else None return (_unpickle_task_v2, (self.name, mod), None) def run(self, *args, **kwargs): """The body of the task executed by workers.""" raise NotImplementedError('Tasks must define the run method.') def start_strategy(self, app, consumer, **kwargs): return instantiate(self.Strategy, self, app, consumer, **kwargs) def delay(self, *args, **kwargs): """Star argument version of :meth:`apply_async`. Does not support the extra options enabled by :meth:`apply_async`. Arguments: *args (Any): Positional arguments passed on to the task. **kwargs (Any): Keyword arguments passed on to the task. Returns: celery.result.AsyncResult: Future promise. """ return self.apply_async(args, kwargs) def apply_async(self, args=None, kwargs=None, task_id=None, producer=None, link=None, link_error=None, shadow=None, **options): """Apply tasks asynchronously by sending a message. Arguments: args (Tuple): The positional arguments to pass on to the task. kwargs (Dict): The keyword arguments to pass on to the task. countdown (float): Number of seconds into the future that the task should execute. Defaults to immediate execution. eta (~datetime.datetime): Absolute time and date of when the task should be executed. May not be specified if `countdown` is also supplied. expires (float, ~datetime.datetime): Datetime or seconds in the future for the task should expire. The task won't be executed after the expiration time. shadow (str): Override task name used in logs/monitoring. Default is retrieved from :meth:`shadow_name`. connection (kombu.Connection): Re-use existing broker connection instead of acquiring one from the connection pool. retry (bool): If enabled sending of the task message will be retried in the event of connection loss or failure. Default is taken from the :setting:`task_publish_retry` setting. Note that you need to handle the producer/connection manually for this to work. retry_policy (Mapping): Override the retry policy used. See the :setting:`task_publish_retry_policy` setting. queue (str, kombu.Queue): The queue to route the task to. This must be a key present in :setting:`task_queues`, or :setting:`task_create_missing_queues` must be enabled. See :ref:`guide-routing` for more information. exchange (str, kombu.Exchange): Named custom exchange to send the task to. Usually not used in combination with the ``queue`` argument. routing_key (str): Custom routing key used to route the task to a worker server. If in combination with a ``queue`` argument only used to specify custom routing keys to topic exchanges. priority (int): The task priority, a number between 0 and 9. Defaults to the :attr:`priority` attribute. serializer (str): Serialization method to use. Can be `pickle`, `json`, `yaml`, `msgpack` or any custom serialization method that's been registered with :mod:`kombu.serialization.registry`. Defaults to the :attr:`serializer` attribute. compression (str): Optional compression method to use. Can be one of ``zlib``, ``bzip2``, or any custom compression methods registered with :func:`kombu.compression.register`. Defaults to the :setting:`task_compression` setting. link (Signature): A single, or a list of tasks signatures to apply if the task returns successfully. link_error (Signature): A single, or a list of task signatures to apply if an error occurs while executing the task. producer (kombu.Producer): custom producer to use when publishing the task. add_to_parent (bool): If set to True (default) and the task is applied while executing another task, then the result will be appended to the parent tasks ``request.children`` attribute. Trailing can also be disabled by default using the :attr:`trail` attribute publisher (kombu.Producer): Deprecated alias to ``producer``. headers (Dict): Message headers to be included in the message. Returns: celery.result.AsyncResult: Promise of future evaluation. Raises: TypeError: If not enough arguments are passed, or too many arguments are passed. Note that signature checks may be disabled by specifying ``@task(typing=False)``. kombu.exceptions.OperationalError: If a connection to the transport cannot be made, or if the connection is lost. Note: Also supports all keyword arguments supported by :meth:`kombu.Producer.publish`. """ if self.typing: try: check_arguments = self.__header__ except AttributeError: # pragma: no cover pass else: check_arguments(*(args or ()), **(kwargs or {})) app = self._get_app() if app.conf.task_always_eager: with app.producer_or_acquire(producer) as eager_producer: serializer = options.get( 'serializer', (eager_producer.serializer if eager_producer.serializer else app.conf.task_serializer)) body = args, kwargs content_type, content_encoding, data = serialization.dumps( body, serializer, ) args, kwargs = serialization.loads(data, content_type, content_encoding, accept=[content_type]) with denied_join_result(): return self.apply(args, kwargs, task_id=task_id or uuid(), link=link, link_error=link_error, **options) if self.__v2_compat__: shadow = shadow or self.shadow_name(self(), args, kwargs, options) else: shadow = shadow or self.shadow_name(args, kwargs, options) preopts = self._get_exec_options() options = dict(preopts, **options) if options else preopts options.setdefault('ignore_result', self.ignore_result) if self.priority: options.setdefault('priority', self.priority) return app.send_task(self.name, args, kwargs, task_id=task_id, producer=producer, link=link, link_error=link_error, result_cls=self.AsyncResult, shadow=shadow, task_type=self, **options) def shadow_name(self, args, kwargs, options): """Override for custom task name in worker logs/monitoring. Example: .. code-block:: python from celery.utils.imports import qualname def shadow_name(task, args, kwargs, options): return qualname(args[0]) @app.task(shadow_name=shadow_name, serializer='pickle') def apply_function_async(fun, *args, **kwargs): return fun(*args, **kwargs) Arguments: args (Tuple): Task positional arguments. kwargs (Dict): Task keyword arguments. options (Dict): Task execution options. """ def signature_from_request(self, request=None, args=None, kwargs=None, queue=None, **extra_options): request = self.request if request is None else request args = request.args if args is None else args kwargs = request.kwargs if kwargs is None else kwargs options = request.as_execution_options() if queue: options['queue'] = queue else: delivery_info = request.delivery_info or {} exchange = delivery_info.get('exchange') routing_key = delivery_info.get('routing_key') if exchange == '' and routing_key: # sent to anon-exchange options['queue'] = routing_key else: options.update(delivery_info) return self.signature(args, kwargs, options, type=self, **extra_options) subtask_from_request = signature_from_request # XXX compat def retry(self, args=None, kwargs=None, exc=None, throw=True, eta=None, countdown=None, max_retries=None, **options): """Retry the task, adding it to the back of the queue. Example: >>> from imaginary_twitter_lib import Twitter >>> from proj.celery import app >>> @app.task(bind=True) ... def tweet(self, auth, message): ... twitter = Twitter(oauth=auth) ... try: ... twitter.post_status_update(message) ... except twitter.FailWhale as exc: ... # Retry in 5 minutes. ... raise self.retry(countdown=60 * 5, exc=exc) Note: Although the task will never return above as `retry` raises an exception to notify the worker, we use `raise` in front of the retry to convey that the rest of the block won't be executed. Arguments: args (Tuple): Positional arguments to retry with. kwargs (Dict): Keyword arguments to retry with. exc (Exception): Custom exception to report when the max retry limit has been exceeded (default: :exc:`~@MaxRetriesExceededError`). If this argument is set and retry is called while an exception was raised (``sys.exc_info()`` is set) it will attempt to re-raise the current exception. If no exception was raised it will raise the ``exc`` argument provided. countdown (float): Time in seconds to delay the retry for. eta (~datetime.datetime): Explicit time and date to run the retry at. max_retries (int): If set, overrides the default retry limit for this execution. Changes to this parameter don't propagate to subsequent task retry attempts. A value of :const:`None`, means "use the default", so if you want infinite retries you'd have to set the :attr:`max_retries` attribute of the task to :const:`None` first. time_limit (int): If set, overrides the default time limit. soft_time_limit (int): If set, overrides the default soft time limit. throw (bool): If this is :const:`False`, don't raise the :exc:`~@Retry` exception, that tells the worker to mark the task as being retried. Note that this means the task will be marked as failed if the task raises an exception, or successful if it returns after the retry call. **options (Any): Extra options to pass on to :meth:`apply_async`. Raises: celery.exceptions.Retry: To tell the worker that the task has been re-sent for retry. This always happens, unless the `throw` keyword argument has been explicitly set to :const:`False`, and is considered normal operation. """ request = self.request retries = request.retries + 1 max_retries = self.max_retries if max_retries is None else max_retries # Not in worker or emulated by (apply/always_eager), # so just raise the original exception. if request.called_directly: # raises orig stack if PyErr_Occurred, # and augments with exc' if that argument is defined. raise_with_context(exc or Retry('Task can be retried', None)) if not eta and countdown is None: countdown = self.default_retry_delay is_eager = request.is_eager S = self.signature_from_request(request, args, kwargs, countdown=countdown, eta=eta, retries=retries, **options) if max_retries is not None and retries > max_retries: if exc: # On Py3: will augment any current exception with # the exc' argument provided (raise exc from orig) raise_with_context(exc) raise self.MaxRetriesExceededError( "Can't retry {0}[{1}] args:{2} kwargs:{3}".format( self.name, request.id, S.args, S.kwargs), task_args=S.args, task_kwargs=S.kwargs) ret = Retry(exc=exc, when=eta or countdown) if is_eager: # if task was executed eagerly using apply(), # then the retry must also be executed eagerly. S.apply().get() if throw: raise ret return ret try: S.apply_async() except Exception as exc: raise Reject(exc, requeue=False) if throw: raise ret return ret def apply(self, args=None, kwargs=None, link=None, link_error=None, task_id=None, retries=None, throw=None, logfile=None, loglevel=None, headers=None, **options): """Execute this task locally, by blocking until the task returns. Arguments: args (Tuple): positional arguments passed on to the task. kwargs (Dict): keyword arguments passed on to the task. throw (bool): Re-raise task exceptions. Defaults to the :setting:`task_eager_propagates` setting. Returns: celery.result.EagerResult: pre-evaluated result. """ # trace imports Task, so need to import inline. from celery.app.trace import build_tracer app = self._get_app() args = args or () kwargs = kwargs or {} task_id = task_id or uuid() retries = retries or 0 if throw is None: throw = app.conf.task_eager_propagates # Make sure we get the task instance, not class. task = app._tasks[self.name] request = { 'id': task_id, 'retries': retries, 'is_eager': True, 'logfile': logfile, 'loglevel': loglevel or 0, 'hostname': gethostname(), 'callbacks': maybe_list(link), 'errbacks': maybe_list(link_error), 'headers': headers, 'delivery_info': { 'is_eager': True }, } tb = None tracer = build_tracer( task.name, task, eager=True, propagate=throw, app=self._get_app(), ) ret = tracer(task_id, args, kwargs, request) retval = ret.retval if isinstance(retval, ExceptionInfo): retval, tb = retval.exception, retval.traceback state = states.SUCCESS if ret.info is None else ret.info.state return EagerResult(task_id, retval, state, traceback=tb) def AsyncResult(self, task_id, **kwargs): """Get AsyncResult instance for this kind of task. Arguments: task_id (str): Task id to get result for. """ return self._get_app().AsyncResult(task_id, backend=self.backend, task_name=self.name, **kwargs) def signature(self, args=None, *starargs, **starkwargs): """Create signature. Returns: :class:`~celery.signature`: object for this task, wrapping arguments and execution options for a single task invocation. """ starkwargs.setdefault('app', self.app) return signature(self, args, *starargs, **starkwargs) subtask = signature def s(self, *args, **kwargs): """Create signature. Shortcut for ``.s(*a, **k) -> .signature(a, k)``. """ return self.signature(args, kwargs) def si(self, *args, **kwargs): """Create immutable signature. Shortcut for ``.si(*a, **k) -> .signature(a, k, immutable=True)``. """ return self.signature(args, kwargs, immutable=True) def chunks(self, it, n): """Create a :class:`~celery.canvas.chunks` task for this task.""" from celery import chunks return chunks(self.s(), it, n, app=self.app) def map(self, it): """Create a :class:`~celery.canvas.xmap` task from ``it``.""" from celery import xmap return xmap(self.s(), it, app=self.app) def starmap(self, it): """Create a :class:`~celery.canvas.xstarmap` task from ``it``.""" from celery import xstarmap return xstarmap(self.s(), it, app=self.app) def send_event(self, type_, retry=True, retry_policy=None, **fields): """Send monitoring event message. This can be used to add custom event types in :pypi:`Flower` and other monitors. Arguments: type_ (str): Type of event, e.g. ``"task-failed"``. Keyword Arguments: retry (bool): Retry sending the message if the connection is lost. Default is taken from the :setting:`task_publish_retry` setting. retry_policy (Mapping): Retry settings. Default is taken from the :setting:`task_publish_retry_policy` setting. **fields (Any): Map containing information about the event. Must be JSON serializable. """ req = self.request if retry_policy is None: retry_policy = self.app.conf.task_publish_retry_policy with self.app.events.default_dispatcher(hostname=req.hostname) as d: return d.send(type_, uuid=req.id, retry=retry, retry_policy=retry_policy, **fields) def replace(self, sig): """Replace this task, with a new task inheriting the task id. .. versionadded:: 4.0 Arguments: sig (~@Signature): signature to replace with. Raises: ~@Ignore: This is always raised, so the best practice is to always use ``raise self.replace(...)`` to convey to the reader that the task won't continue after being replaced. """ chord = self.request.chord if 'chord' in sig.options: raise ImproperlyConfigured( "A signature replacing a task must not be part of a chord") if isinstance(sig, group): sig |= self.app.tasks['celery.accumulate'].s(index=0).set( link=self.request.callbacks, link_error=self.request.errbacks, ) if self.request.chain: for t in reversed(self.request.chain): sig |= signature(t, app=self.app) sig.set( chord=chord, group_id=self.request.group, root_id=self.request.root_id, ) sig.freeze(self.request.id) sig.delay() raise Ignore('Replaced by new task') def add_to_chord(self, sig, lazy=False): """Add signature to the chord the current task is a member of. .. versionadded:: 4.0 Currently only supported by the Redis result backend. Arguments: sig (~@Signature): Signature to extend chord with. lazy (bool): If enabled the new task won't actually be called, and ``sig.delay()`` must be called manually. """ if not self.request.chord: raise ValueError('Current task is not member of any chord') sig.set( group_id=self.request.group, chord=self.request.chord, root_id=self.request.root_id, ) result = sig.freeze() self.backend.add_to_chord(self.request.group, result) return sig.delay() if not lazy else sig def update_state(self, task_id=None, state=None, meta=None, **kwargs): """Update task state. Arguments: task_id (str): Id of the task to update. Defaults to the id of the current task. state (str): New state. meta (Dict): State meta-data. """ if task_id is None: task_id = self.request.id self.backend.store_result(task_id, meta, state, **kwargs) def on_success(self, retval, task_id, args, kwargs): """Success handler. Run by the worker if the task executes successfully. Arguments: retval (Any): The return value of the task. task_id (str): Unique id of the executed task. args (Tuple): Original arguments for the executed task. kwargs (Dict): Original keyword arguments for the executed task. Returns: None: The return value of this handler is ignored. """ def on_retry(self, exc, task_id, args, kwargs, einfo): """Retry handler. This is run by the worker when the task is to be retried. Arguments: exc (Exception): The exception sent to :meth:`retry`. task_id (str): Unique id of the retried task. args (Tuple): Original arguments for the retried task. kwargs (Dict): Original keyword arguments for the retried task. einfo (~billiard.einfo.ExceptionInfo): Exception information. Returns: None: The return value of this handler is ignored. """ def on_failure(self, exc, task_id, args, kwargs, einfo): """Error handler. This is run by the worker when the task fails. Arguments: exc (Exception): The exception raised by the task. task_id (str): Unique id of the failed task. args (Tuple): Original arguments for the task that failed. kwargs (Dict): Original keyword arguments for the task that failed. einfo (~billiard.einfo.ExceptionInfo): Exception information. Returns: None: The return value of this handler is ignored. """ def after_return(self, status, retval, task_id, args, kwargs, einfo): """Handler called after the task returns. Arguments: status (str): Current task state. retval (Any): Task return value/exception. task_id (str): Unique id of the task. args (Tuple): Original arguments for the task. kwargs (Dict): Original keyword arguments for the task. einfo (~billiard.einfo.ExceptionInfo): Exception information. Returns: None: The return value of this handler is ignored. """ def add_trail(self, result): if self.trail: self.request.children.append(result) return result def push_request(self, *args, **kwargs): self.request_stack.push(Context(*args, **kwargs)) def pop_request(self): self.request_stack.pop() def __repr__(self): """``repr(task)``.""" return _reprtask(self, R_INSTANCE) def _get_request(self): """Get current request object.""" req = self.request_stack.top if req is None: # task was not called, but some may still expect a request # to be there, perhaps that should be deprecated. if self._default_request is None: self._default_request = Context() return self._default_request return req request = property(_get_request) def _get_exec_options(self): if self._exec_options is None: self._exec_options = extract_exec_options(self) return self._exec_options @property def backend(self): backend = self._backend if backend is None: return self.app.backend return backend @backend.setter def backend(self, value): # noqa self._backend = value @property def __name__(self): return self.__class__.__name__
class JobtasticTask(Task): """ A base ``Celery.Task`` class that provides some common niceties for running tasks that return some kind of result for which you need to wait. To create a task that uses these helpers, use ``JobtasticTask`` as a subclass and define a ``calculate_result`` method which returns a dictionary to be turned in to JSON. You will also need to define the following class variables: * ``significant_kwargs`` The kwarg values that will be converted to strings and hashed to determine if two versions of the same task are equivalent. This is a list of 2-tuples with the first item being the kwarg string and the second being a callable that converts the value to a hashable string. If no second item is given, it's assumed that calling ``str()`` on the value works just fine. * ``herd_avoidance_timeout`` Number of seconds to hold a lock on this task for other equivalent runs. Generally, this should be set to the longest estimated amount of time the task could consume. The following class members are optional: * ``cache_prefix`` A unique string representing this task. Eg. ``foo.bar.tasks.BazzTask`` * ``cache_duration`` The number of seconds for which the result of this task should be cached, meaning subsequent equivalent runs will skip computation. The default is to do no result caching. * ``memleak_threshold`` When a single run of a Task increase the resident process memory usage by more than this number of MegaBytes, a warning is logged to the logger. This is useful for finding tasks that are behaving badly under certain conditions. By default, no logging is performed. Set this value to 0 to log all RAM changes and -1 to disable logging. Provided are helpers for: 1. Handling failures to connect the task broker by either directly running the task (`delay_or_run`) or by returning a task that contains the connection error (`delay_or_fail`). This minimizes the user-facing impact of a dead task broker. 2. Defeating any thundering herd issues by ensuring only one of a task with specific arguments can be running at a time by directing subsequent calls to latch on to the appropriate result. 3. Caching the final result for a designated time period so that subsequent equivalent calls return quickly. 4. Returning the results as JSON, so that they can be processed easily by client-side javascript. 5. Returning time-based, continually updating progress estimates to front-end code so that users know what to expect. """ abstract = True #: The shared cache used for locking and thundering herd protection _cache = None @classmethod def async_or_eager(self, **options): """ Attempt to call self.apply_async, or if that fails because of a problem with the broker, run the task eagerly and return an EagerResult. """ args = options.pop("args", None) kwargs = options.pop("kwargs", None) possible_broker_errors = self._get_possible_broker_errors_tuple() try: return self.apply_async(args, kwargs, **options) except possible_broker_errors: return self.apply(args, kwargs, **options) @classmethod def async_or_fail(self, **options): """ Attempt to call self.apply_async, but if that fails with an exception, we fake the task completion using the exception as the result. This allows us to seamlessly handle errors on task creation the same way we handle errors when a task runs, simplifying the user interface. """ args = options.pop("args", None) kwargs = options.pop("kwargs", None) possible_broker_errors = self._get_possible_broker_errors_tuple() try: return self.apply_async(args, kwargs, **options) except possible_broker_errors as e: return self.simulate_async_error(e) @classmethod def delay_or_eager(self, *args, **kwargs): """ Wrap async_or_eager with a convenience signiture like delay """ return self.async_or_eager(args=args, kwargs=kwargs) @classmethod def delay_or_run(self, *args, **kwargs): """ Attempt to call self.delay, or if that fails, call self.run. Returns a tuple, (result, required_fallback). ``result`` is the result of calling delay or run. ``required_fallback`` is True if the broker failed we had to resort to `self.run`. """ warnings.warn( "delay_or_run is deprecated. Please use delay_or_eager", DeprecationWarning, ) possible_broker_errors = self._get_possible_broker_errors_tuple() try: result = self.apply_async(args=args, kwargs=kwargs) required_fallback = False except possible_broker_errors: result = self().run(*args, **kwargs) required_fallback = True return result, required_fallback @classmethod def delay_or_fail(self, *args, **kwargs): """ Wrap async_or_fail with a convenience signiture like delay """ return self.async_or_fail(args=args, kwargs=kwargs) @classmethod def _get_possible_broker_errors_tuple(self): if hasattr(self.app, 'connection'): dummy_conn = self.app.connection() else: # Celery 2.5 uses `broker_connection` instead dummy_conn = self.app.broker_connection() possible_broker_errors = (dummy_conn.connection_errors + dummy_conn.channel_errors) try: from kombu.exceptions import OperationalError # In celery 4.x when a broker is not connected it throws an # OperationalError from kombu. It should also be noted that the # newest version of kombu (4.1.0) actually hangs forever. So we # need to peg versions of kombu until that gets fixed. possible_broker_errors += (OperationalError, ) except ImportError: pass return possible_broker_errors @classmethod def simulate_async_error(self, exception): """ Take this exception and store it as an error in the result backend. This unifies the handling of broker-connection errors with any other type of error that might occur when running the task. So the same error-handling that might retry a task or display a useful message to the user can also handle this error. """ task_id = gen_unique_id() async_result = self.AsyncResult(task_id) einfo = ExceptionInfo(sys.exc_info()) async_result.backend.mark_as_failure( task_id, exception, traceback=einfo.traceback, ) return async_result @classmethod def apply_async(self, args, kwargs, **options): """ Put this task on the Celery queue as a singleton. Only one of this type of task with its distinguishing args/kwargs will be allowed on the queue at a time. Subsequent duplicate tasks called while this task is still running will just latch on to the results of the running task by synchronizing the task uuid. Additionally, identical task calls will return those results for the next ``cache_duration`` seconds. """ self._validate_required_class_vars() cache_key = self._get_cache_key(**kwargs) # Check for an already-computed and cached result task_id = self.cache.get(cache_key) # Check for the cached result if task_id: # We've already built this result, just latch on to the task that # did the work logging.info('Found existing cached and completed task: %s', task_id) return self.AsyncResult(task_id) # Check for an in-progress equivalent task to avoid duplicating work task_id = self.cache.get('herd:%s' % cache_key) if task_id: logging.info('Found existing in-progress task: %s', task_id) return self.AsyncResult(task_id) # It's not cached and it's not already running. Use an atomic lock to # start the task, ensuring there isn't a race condition that could # result in multiple identical tasks being fired at once. with self.cache.lock('lock:%s' % cache_key): task_meta = super(JobtasticTask, self).apply_async(args, kwargs, **options) logging.info('Current status: %s', task_meta.status) if task_meta.status in (PROGRESS, PENDING): self.cache.set('herd:%s' % cache_key, task_meta.task_id, timeout=self.herd_avoidance_timeout) logging.info('Setting herd-avoidance cache for task: %s', cache_key) return task_meta def calc_progress(self, completed_count, total_count): """ Calculate the percentage progress and estimated remaining time based on the current number of items completed of the total. Returns a tuple of ``(percentage_complete, seconds_remaining)``. """ self.logger.debug( "calc_progress(%s, %s)", completed_count, total_count, ) current_time = time.time() time_spent = current_time - self.start_time self.logger.debug("Progress time spent: %s", time_spent) if total_count == 0: return 100, 1 completion_fraction = completed_count / total_count if completion_fraction == 0: completion_fraction = 1 total_time = 0 total_time = time_spent / completion_fraction time_remaining = total_time - time_spent completion_display = completion_fraction * 100 if completion_display == 100: return 100, 1 # 1 second to finish up return completion_display, time_remaining def update_progress( self, completed_count, total_count, update_frequency=1, ): """ Update the task backend with both an estimated percentage complete and number of seconds remaining until completion. ``completed_count`` Number of task "units" that have been completed out of ``total_count`` total "units." ``update_frequency`` Only actually store the updated progress in the background at most every ``N`` ``completed_count``. """ if completed_count - self._last_update_count < update_frequency: # We've updated the progress too recently. Don't stress out the # result backend return # Store progress for display progress_percent, time_remaining = self.calc_progress( completed_count, total_count) self.logger.debug("Updating progress: %s percent, %s remaining", progress_percent, time_remaining) if self.request.id: self._last_update_count = completed_count self.update_state( None, PROGRESS, { "progress_percent": progress_percent, "time_remaining": time_remaining, }) def run(self, *args, **kwargs): if get_task_logger: self.logger = get_task_logger(self.__class__.__name__) else: # Celery 2.X fallback self.logger = self.get_logger(**kwargs) self.logger.info("Starting %s", self.__class__.__name__) self.cache_key = self._get_cache_key(**kwargs) # Record start time to give estimated time remaining estimates self.start_time = time.time() # Keep track of progress updates for update_frequency tracking self._last_update_count = 0 # Report to the backend that work has been started. if self.request.id: self.update_state(None, PROGRESS, { "progress_percent": 0, "time_remaining": -1, }) memleak_threshold = int(getattr(self, 'memleak_threshold', -1)) if memleak_threshold >= 0: begining_memory_usage = self._get_memory_usage() self.logger.info("Calculating result") try: task_result = self.calculate_result(*args, **kwargs) except Exception: # Don't want other tasks waiting for this task to finish, since it # won't self._break_thundering_herd_cache() raise # We can use normal celery exception handling for this if hasattr(self, 'cache_duration'): cache_duration = self.cache_duration else: cache_duration = -1 # By default, don't cache if cache_duration >= 0: # If we're configured to cache this result, do so. self.cache.set(self.cache_key, self.request.id, cache_duration) # Now that the task is finished, we can stop all of the thundering herd # avoidance self._break_thundering_herd_cache() if memleak_threshold >= 0: self._warn_if_leaking_memory( begining_memory_usage, self._get_memory_usage(), memleak_threshold, task_kwargs=kwargs, ) return task_result def calculate_result(self, *args, **kwargs): raise NotImplementedError(("Tasks using JobtasticTask must implement " "their own calculate_result")) @classmethod def _validate_required_class_vars(self): """ Ensure that this subclass has defined all of the required class variables. """ required_members = ( 'significant_kwargs', 'herd_avoidance_timeout', ) for required_member in required_members: if not hasattr(self, required_member): raise Exception("JobtasticTask's must define a %s" % required_member) def on_success(self, retval, task_id, args, kwargs): """ Store results in the backend even if we're always eager. This ensures the `delay_or_run` calls always at least have results. """ if self.request.is_eager: # Store the result because celery wouldn't otherwise self.update_state(task_id, SUCCESS, retval) def _break_thundering_herd_cache(self): self.cache.delete('herd:%s' % self.cache_key) @classmethod def _get_cache(self): """ Return the cache to use for thundering herd protection, etc. """ if not self._cache: self._cache = get_cache(self.app) return self._cache @classmethod def _set_cache(self, cache): """ Set the Jobtastic Cache for the Task The cache must support get/set (with timeout)/delete/lock (as a context manager). """ self._cache = cache cache = class_property(_get_cache, _set_cache) @classmethod def _get_cache_key(self, **kwargs): """ Take this task's configured ``significant_kwargs`` and build a hash that all equivalent task calls will match. Takes in kwargs and returns a string. To change the way the cache key is generated or do more in-depth processing, override this method. """ m = md5() for significant_kwarg in self.significant_kwargs: key, to_str = significant_kwarg try: m.update(to_str(kwargs[key])) except TypeError: # Python 3.x strings aren't accepted by hash.update(). # String should be byte-encoded first. m.update(to_str(kwargs[key]).encode('utf-8')) if hasattr(self, 'cache_prefix'): cache_prefix = self.cache_prefix else: cache_prefix = '%s.%s' % (self.__module__, self.__name__) return '%s:%s' % (cache_prefix, m.hexdigest()) def _get_memory_usage(self): current_process = psutil.Process(os.getpid()) usage = current_process.memory_info() return usage.rss def _warn_if_leaking_memory( self, begining_usage, ending_usage, threshold, task_kwargs, ): growth = ending_usage - begining_usage threshold_in_bytes = threshold * 1000000 if growth > threshold_in_bytes: self.warn_of_memory_leak( growth, begining_usage, ending_usage, task_kwargs, ) def warn_of_memory_leak( self, growth, begining_usage, ending_usage, task_kwargs, ): self.logger.warning( "Jobtastic:memleak memleak_detected. memory_increase=%05d unit=MB", growth / 1000000, ) self.logger.info( "Jobtastic:memleak memory_usage_start=%05d unit=MB", begining_usage / 1000000, ) self.logger.info( "Jobtastic:memleak memory_usage_end=%05d unit=MB", ending_usage / 1000000, ) self.logger.info( "Jobtastic:memleak task_kwargs=%s", repr(task_kwargs), )