class Huey(object): """ Huey executes tasks by exposing function decorators that cause the function call to be enqueued for execution by the consumer. Typically your application will only need one Huey instance, but you can have as many as you like -- the only caveat is that one consumer process must be executed for each Huey instance. :param name: a name for the task queue. :param bool result_store: whether to store task results. :param bool events: whether to enable consumer-sent events. :param store_none: Flag to indicate whether tasks that return ``None`` should store their results in the result store. :param always_eager: Useful for testing, this will execute all tasks immediately, without enqueueing them. :param store_errors: Flag to indicate whether task errors should be stored. :param global_registry: Use a global registry for tasks. Example usage:: from huey import RedisHuey # Create a huey instance and disable consumer-sent events. huey = RedisHuey('my-app', events=False) @huey.task() def slow_function(some_arg): # ... do something ... return some_arg @huey.periodic_task(crontab(minute='0', hour='3')) def backup(): # do a backup every day at 3am return """ def __init__(self, name='huey', result_store=True, events=True, store_none=False, always_eager=False, store_errors=True, blocking=False, global_registry=True, **storage_kwargs): self.name = name self.result_store = result_store self.events = events self.store_none = store_none self.always_eager = always_eager self.store_errors = store_errors self.blocking = blocking self.storage = self.get_storage(**storage_kwargs) if global_registry: self.registry = registry else: self.registry = TaskRegistry() def get_storage(self, **kwargs): raise NotImplementedError('Storage API not implemented in the base ' 'Huey class. Use `RedisHuey` instead.') def task(self, retries=0, retry_delay=0, retries_as_argument=False, include_task=False, name=None): def decorator(func): """ Decorator to execute a function out-of-band via the consumer. """ klass = create_task(QueueTask, func, retries_as_argument, name, include_task) self.registry.register(klass) def schedule(args=None, kwargs=None, eta=None, delay=None, convert_utc=True, task_id=None): if delay and eta: raise ValueError('Both a delay and an eta cannot be ' 'specified at the same time') if delay: eta = (datetime.datetime.now() + datetime.timedelta(seconds=delay)) if eta: if is_naive(eta) and convert_utc: eta = local_to_utc(eta) elif is_aware(eta) and convert_utc: eta = aware_to_utc(eta) elif is_aware(eta) and not convert_utc: eta = make_naive(eta) cmd = klass((args or (), kwargs or {}), execute_time=eta, retries=retries, retry_delay=retry_delay, task_id=task_id) return self.enqueue(cmd) func.schedule = schedule func.task_class = klass @wraps(func) def inner_run(*args, **kwargs): cmd = klass((args, kwargs), retries=retries, retry_delay=retry_delay) return self.enqueue(cmd) inner_run.call_local = func return inner_run return decorator def periodic_task(self, validate_datetime, name=None): """ Decorator to execute a function on a specific schedule. """ def decorator(func): def method_validate(self, dt): return validate_datetime(dt) klass = create_task( PeriodicQueueTask, func, task_name=name, validate_datetime=method_validate, ) self.registry.register(klass) func.task_class = klass def _revoke(revoke_until=None, revoke_once=False): self.revoke(klass(), revoke_until, revoke_once) func.revoke = _revoke def _is_revoked(dt=None, peek=True): return self.is_revoked(klass(), dt, peek) func.is_revoked = _is_revoked def _restore(): return self.restore(klass()) func.restore = _restore return func return decorator def _wrapped_operation(exc_class): def decorator(fn): def inner(*args, **kwargs): try: return fn(*args, **kwargs) except (KeyboardInterrupt, RuntimeError): raise except: wrap_exception(exc_class) return inner return decorator @_wrapped_operation(QueueWriteException) def _enqueue(self, msg): self.storage.enqueue(msg) @_wrapped_operation(QueueReadException) def _dequeue(self): return self.storage.dequeue() @_wrapped_operation(QueueRemoveException) def _unqueue(self, msg): return self.queue.unqueue(msg) @_wrapped_operation(DataStoreGetException) def _get_data(self, key, peek=False): if peek: return self.storage.peek_data(key) else: return self.storage.pop_data(key) @_wrapped_operation(DataStorePutException) def _put_data(self, key, value): return self.storage.put_data(key, value) @_wrapped_operation(DataStorePutException) def _put_error(self, metadata): self.storage.put_error(metadata) @_wrapped_operation(DataStoreGetException) def _get_errors(self, limit=None, offset=0): return self.storage.get_errors(limit=limit, offset=offset) @_wrapped_operation(ScheduleAddException) def _add_to_schedule(self, data, ts): self.storage.add_to_schedule(data, ts) @_wrapped_operation(ScheduleReadException) def _read_schedule(self, ts): return self.storage.read_schedule(ts) def emit(self, message): try: self.storage.emit(message) except: # Events always fail silently since they are treated as a non- # critical component. pass def enqueue(self, task): if self.always_eager: return task.execute() self._enqueue(self.registry.get_message_for_task(task)) if self.result_store: return TaskResultWrapper(self, task) def dequeue(self): message = self._dequeue() if message: return self.registry.get_task_for_message(message) def _format_time(self, dt): if dt is None: return None return time.mktime(dt.timetuple()) def _get_task_metadata(self, task, error=False, include_data=False): metadata = { 'id': task.task_id, 'task': type(task).__name__, 'retries': task.retries, 'retry_delay': task.retry_delay, 'execute_time': self._format_time(task.execute_time) } if include_data and not isinstance(task, PeriodicQueueTask): targs, tkwargs = task.get_data() if tkwargs.get("task") and isinstance(tkwargs["task"], QueueTask): del (tkwargs['task']) metadata['data'] = (targs, tkwargs) return metadata def emit_status(self, status, error=False, **data): if self.events: metadata = {'status': status, 'error': error} if error: metadata['traceback'] = traceback.format_exc() metadata.update(data) self.emit(json.dumps(metadata)) def emit_task(self, status, task, error=False, **data): if self.events: metadata = self._get_task_metadata(task) metadata.update(data) self.emit_status(status, error=error, **metadata) def execute(self, task): if not isinstance(task, QueueTask): raise TypeError('Unknown object: %s' % task) try: result = task.execute() except Exception as exc: if self.result_store and self.store_errors: metadata = self._get_task_metadata(task, True) metadata['error'] = exc metadata['traceback'] = traceback.format_exc() self._put_error(pickle.dumps(metadata)) raise if result is None and not self.store_none: return if self.result_store and not isinstance(task, PeriodicQueueTask): self._put_data(task.task_id, pickle.dumps(result)) return result def revoke(self, task, revoke_until=None, revoke_once=False): serialized = pickle.dumps((revoke_until, revoke_once)) self._put_data(task.revoke_id, serialized) def restore(self, task): # Return value indicates whether the task was in fact revoked. return self._get_data(task.revoke_id) is not EmptyData def revoke_by_id(self, task_id, revoke_until=None, revoke_once=False): return self.revoke(QueueTask(task_id=task_id), revoke_until, revoke_once) def restore_by_id(self, task_id): return self.restore(QueueTask(task_id=task_id)) def is_revoked(self, task, dt=None, peek=True): if not isinstance(task, QueueTask): task = QueueTask(task_id=task) res = self._get_data(task.revoke_id, peek=True) if res is EmptyData: return False revoke_until, revoke_once = pickle.loads(res) if revoke_once: # This task *was* revoked for one run, but now it should be # restored to normal execution. if not peek: self.restore(task) return True return revoke_until is None or revoke_until > dt def add_schedule(self, task): msg = self.registry.get_message_for_task(task) ex_time = task.execute_time or datetime.datetime.fromtimestamp(0) self._add_to_schedule(msg, ex_time) def read_schedule(self, ts): return [ self.registry.get_task_for_message(m) for m in self._read_schedule(ts) ] def read_periodic(self, ts): periodic = self.registry.get_periodic_tasks() return [task for task in periodic if task.validate_datetime(ts)] def ready_to_run(self, cmd, dt=None): dt = dt or datetime.datetime.utcnow() return cmd.execute_time is None or cmd.execute_time <= dt def pending(self, limit=None): return [ self.registry.get_task_for_message(m) for m in self.storage.enqueued_items(limit) ] def pending_count(self): return self.storage.queue_size() def scheduled(self, limit=None): return [ self.registry.get_task_for_message(m) for m in self.storage.scheduled_items(limit) ] def scheduled_count(self): return self.storage.schedule_size() def all_results(self): return self.storage.result_items() def result_count(self): return self.storage.result_store_size() def errors(self, limit=None, offset=0): return [ pickle.loads(error) for error in self.storage.get_errors(limit, offset) ] def __len__(self): return self.pending_count() def flush(self): self.storage.flush_all() def get_tasks(self): return sorted(self.registry._registry.keys()) def get_periodic_tasks(self): return [ name for name, task in self.registry._registry.items() if hasattr(task, 'validate_datetime') ] def get_regular_tasks(self): periodic = set(self.get_periodic_tasks()) return [task for task in self.get_tasks() if task not in periodic] def result(self, task_id, blocking=False, timeout=None, backoff=1.15, max_delay=1.0, revoke_on_timeout=False, preserve=False): """ Retrieve the results of a task, given the task's ID. This method accepts the same parameters and has the same behavior as the :py:class:`TaskResultWrapper` object. """ if not blocking: result = self._get_data(task_id, peek=preserve) if result is not EmptyData: return pickle.loads(result) else: task_result = TaskResultWrapper(self, QueueTask(task_id=task_id)) return task_result.get(blocking=blocking, timeout=timeout, backoff=backoff, max_delay=max_delay, revoke_on_timeout=revoke_on_timeout, preserve=preserve)
class Huey(object): """ Huey executes tasks by exposing function decorators that cause the function call to be enqueued for execution by the consumer. Typically your application will only need one Huey instance, but you can have as many as you like -- the only caveat is that one consumer process must be executed for each Huey instance. :param name: a name for the task queue. :param bool result_store: whether to store task results. :param bool events: whether to enable consumer-sent events. :param store_none: Flag to indicate whether tasks that return ``None`` should store their results in the result store. :param always_eager: Useful for testing, this will execute all tasks immediately, without enqueueing them. :param store_errors: Flag to indicate whether task errors should be stored. :param global_registry: Use a global registry for tasks. Example usage:: from huey import RedisHuey # Create a huey instance and disable consumer-sent events. huey = RedisHuey('my-app', events=False) @huey.task() def slow_function(some_arg): # ... do something ... return some_arg @huey.periodic_task(crontab(minute='0', hour='3')) def backup(): # do a backup every day at 3am return """ def __init__(self, name='huey', result_store=True, events=True, store_none=False, always_eager=False, store_errors=True, blocking=False, global_registry=True, **storage_kwargs): self.name = name self.result_store = result_store self.events = events self.store_none = store_none self.always_eager = always_eager self.store_errors = store_errors self.blocking = blocking self.storage = self.get_storage(**storage_kwargs) self.pre_execute_hooks = OrderedDict() self.post_execute_hooks = OrderedDict() self.startup_hooks = OrderedDict() self._locks = set() if global_registry: self.registry = registry else: self.registry = TaskRegistry() def get_storage(self, **kwargs): raise NotImplementedError('Storage API not implemented in the base ' 'Huey class. Use `RedisHuey` instead.') def create_consumer(self, **config): return Consumer(self, **config) def _normalize_execute_time(self, eta=None, delay=None, convert_utc=True): if delay and eta: raise ValueError('Both a delay and an eta cannot be ' 'specified at the same time') elif delay: method = (convert_utc and datetime.datetime.utcnow or datetime.datetime.now) return method() + datetime.timedelta(seconds=delay) elif eta: if is_naive(eta) and convert_utc: eta = local_to_utc(eta) elif is_aware(eta) and convert_utc: eta = aware_to_utc(eta) elif is_aware(eta) and not convert_utc: eta = make_naive(eta) return eta def task(self, retries=0, retry_delay=0, retries_as_argument=False, include_task=False, name=None, **task_settings): def decorator(func): """ Decorator to execute a function out-of-band via the consumer. """ return TaskWrapper( self, func.func if isinstance(func, TaskWrapper) else func, retries=retries, retry_delay=retry_delay, retries_as_argument=retries_as_argument, include_task=include_task, name=name, **task_settings) return decorator # We specify retries and retry_delay as 0 because they become the default # values as class attributes on the derived PeriodicQueueTask instance. # Since the values the class is instantiated with will always be `None`, # we want the fallback behavior to be 0 by default. def periodic_task(self, validate_datetime, name=None, retries=0, retry_delay=0, **task_settings): """ Decorator to execute a function on a specific schedule. """ def decorator(func): def method_validate(self, dt): return validate_datetime(dt) return TaskWrapper( self, func.func if isinstance(func, TaskWrapper) else func, name=name, task_base=PeriodicQueueTask, default_retries=retries, default_retry_delay=retry_delay, validate_datetime=method_validate, **task_settings) return decorator def register_pre_execute(self, name, fn): """ Register a pre-execute hook. The callback will be executed before the execution of all tasks. Execution of the task can be cancelled by raising a :py:class:`CancelExecution` exception. Uncaught exceptions will be logged but will not cause the task itself to be cancelled. The callback function should accept a single task instance, the return value is ignored. :param name: Name for the hook. :param fn: Callback function that accepts task to be executed. """ self.pre_execute_hooks[name] = fn def unregister_pre_execute(self, name): del self.pre_execute_hooks[name] def pre_execute(self, name=None): """ Decorator for registering a pre-execute hook. """ def decorator(fn): self.register_pre_execute(name or fn.__name__, fn) return fn return decorator def register_post_execute(self, name, fn): """ Register a post-execute hook. The callback will be executed after the execution of all tasks. Uncaught exceptions will be logged but will have no other effect on the overall operation of the consumer. The callback function should accept: * a task instance * the return value from the execution of the task (which may be None) * any exception that was raised during the execution of the task (which will be None for tasks that executed normally). The return value of the callback itself is ignored. :param name: Name for the hook. :param fn: Callback function that accepts task that was executed and the tasks return value (or None). """ self.post_execute_hooks[name] = fn def unregister_post_execute(self, name): del self.post_execute_hooks[name] def post_execute(self, name=None): """ Decorator for registering a post-execute hook. """ def decorator(fn): self.register_post_execute(name or fn.__name__, fn) return fn return decorator def register_startup(self, name, fn): """ Register a startup hook. The callback will be executed whenever a worker comes online. Uncaught exceptions will be logged but will have no other effect on the overall operation of the worker. The callback function must not accept any parameters. This API is provided to simplify setting up global resources that, for whatever reason, should not be created as import-time side-effects. For example, your tasks need to write data into a Postgres database. If you create the connection at import-time, before the worker processes are spawned, you'll likely run into errors when attempting to use the connection from the child (worker) processes. To avoid this problem, you can register a startup hook which is executed by the worker process as part of its initialization. :param name: Name for the hook. :param fn: Callback function. """ self.startup_hooks[name] = fn def unregister_startup(self, name): del self.startup_hooks[name] def on_startup(self, name=None): """ Decorator for registering a startup hook. """ def decorator(fn): self.register_startup(name or fn.__name__, fn) return fn return decorator def _wrapped_operation(exc_class): def decorator(fn): def inner(*args, **kwargs): try: return fn(*args, **kwargs) except (KeyboardInterrupt, RuntimeError): raise except: wrap_exception(exc_class) return inner return decorator @_wrapped_operation(QueueWriteException) def _enqueue(self, msg): self.storage.enqueue(msg) @_wrapped_operation(QueueReadException) def _dequeue(self): return self.storage.dequeue() @_wrapped_operation(QueueRemoveException) def _unqueue(self, msg): return self.queue.unqueue(msg) @_wrapped_operation(DataStoreGetException) def _get_data(self, key, peek=False): if peek: return self.storage.peek_data(key) else: return self.storage.pop_data(key) @_wrapped_operation(DataStorePutException) def _put_data(self, key, value): return self.storage.put_data(key, value) @_wrapped_operation(DataStorePutException) def _put_if_empty(self, key, value): return self.storage.put_if_empty(key, value) @_wrapped_operation(DataStorePutException) def _put_error(self, metadata): self.storage.put_error(metadata) @_wrapped_operation(DataStoreGetException) def _get_errors(self, limit=None, offset=0): return self.storage.get_errors(limit=limit, offset=offset) @_wrapped_operation(ScheduleAddException) def _add_to_schedule(self, data, ts): self.storage.add_to_schedule(data, ts) @_wrapped_operation(ScheduleReadException) def _read_schedule(self, ts): return self.storage.read_schedule(ts) def emit(self, message): try: self.storage.emit(message) except: # Events always fail silently since they are treated as a non- # critical component. pass def _execute_always_eager(self, task): accum = [] failure_exc = None while task is not None: for name, callback in self.pre_execute_hooks.items(): callback(task) try: result = task.execute() except Exception as exc: result = None failure_exc = task_exc = exc else: task_exc = None result_wrapper = EagerTaskResultWrapper(self, task) result_wrapper.set_result(result) accum.append(result_wrapper) for name, callback in self.post_execute_hooks.items(): callback(task, result, task_exc) if task.on_complete: task = task.on_complete task.extend_data(result) else: task = None if failure_exc is not None: raise failure_exc return accum[0] if len(accum) == 1 else accum def enqueue(self, task): if self.always_eager: return self._execute_always_eager(task) self._enqueue(self.registry.get_message_for_task(task)) if not self.result_store: return if task.on_complete: q = [task] result_wrappers = [] while q: current = q.pop() result_wrappers.append(TaskResultWrapper(self, current)) if current.on_complete: q.append(current.on_complete) return result_wrappers else: return TaskResultWrapper(self, task) def dequeue(self): message = self._dequeue() if message: return self.registry.get_task_for_message(message) def put(self, key, value): return self._put_data(key, pickle.dumps(value, pickle.HIGHEST_PROTOCOL)) def get(self, key, peek=False): data = self._get_data(key, peek=peek) if data is EmptyData: return else: return pickle.loads(data) def put_error(self, metadata): return self._put_error(pickle.dumps(metadata)) def emit_status(self, status, error=False, timestamp=None, **data): if self.events: if timestamp is not None: data['timestamp'] = time.mktime(timestamp.timetuple()) metadata = {'status': status, 'error': error} if error: metadata['traceback'] = traceback.format_exc() metadata.update(data) self.emit(json.dumps(metadata)) def emit_task(self, status, task, error=False, **data): if self.events: metadata = task.get_metadata() metadata.update(data) self.emit_status(status, error=error, **metadata) def execute(self, task): if not isinstance(task, QueueTask): raise TypeError('Unknown object: %s' % task) try: result = task.execute() except Exception as exc: if self.store_errors: metadata = task.get_metadata() metadata['error'] = repr(exc) metadata['traceback'] = traceback.format_exc() self.put(task.task_id, Error(metadata)) self.put_error(metadata) raise if self.result_store and not isinstance(task, PeriodicQueueTask): if result is not None or self.store_none: self.put(task.task_id, result) if task.on_complete: next_task = task.on_complete next_task.extend_data(result) self.enqueue(next_task) return result def revoke_all(self, task_class, revoke_until=None, revoke_once=False): self.put('rt:%s' % task_class.__name__, (revoke_until, revoke_once)) def restore_all(self, task_class): return self._get_data('rt:%s' % task_class.__name__) is not EmptyData def revoke(self, task, revoke_until=None, revoke_once=False): self.put(task.revoke_id, (revoke_until, revoke_once)) def restore(self, task): # Return value indicates whether the task was in fact revoked. return self._get_data(task.revoke_id) is not EmptyData def revoke_by_id(self, task_id, revoke_until=None, revoke_once=False): return self.revoke(QueueTask(task_id=task_id), revoke_until, revoke_once) def restore_by_id(self, task_id): return self.restore(QueueTask(task_id=task_id)) def _check_revoked(self, revoke_id, dt=None, peek=True): """ Checks if a task is revoked, returns a 2-tuple indicating: 1. Is task revoked? 2. Should task be restored? """ res = self.get(revoke_id, peek=True) if res is None: return False, False revoke_until, revoke_once = res if revoke_once: # This task *was* revoked for one run, but now it should be # restored to normal execution (unless we are just peeking). return True, not peek elif revoke_until is not None and revoke_until <= dt: # Task is no longer revoked and can be restored. return False, True else: # Task is still revoked. Do not restore. return True, False def is_revoked(self, task, dt=None, peek=True): if isclass(task) and issubclass(task, QueueTask): revoke_id = 'rt:%s' % task.__name__ is_revoked, can_restore = self._check_revoked(revoke_id, dt, peek) if can_restore: self.restore_all(task) return is_revoked if not isinstance(task, QueueTask): task = QueueTask(task_id=task) is_revoked, can_restore = self._check_revoked(task.revoke_id, dt, peek) if can_restore: self.restore(task) if not is_revoked: is_revoked = self.is_revoked(type(task), dt, peek) return is_revoked def add_schedule(self, task): msg = self.registry.get_message_for_task(task) ex_time = task.execute_time or datetime.datetime.fromtimestamp(0) self._add_to_schedule(msg, ex_time) def read_schedule(self, ts): return [self.registry.get_task_for_message(m) for m in self._read_schedule(ts)] def read_periodic(self, ts): periodic = self.registry.get_periodic_tasks() return [task for task in periodic if task.validate_datetime(ts)] def ready_to_run(self, cmd, dt=None): dt = dt or datetime.datetime.utcnow() return cmd.execute_time is None or cmd.execute_time <= dt def pending(self, limit=None): return [self.registry.get_task_for_message(m) for m in self.storage.enqueued_items(limit)] def pending_count(self): return self.storage.queue_size() def scheduled(self, limit=None): return [self.registry.get_task_for_message(m) for m in self.storage.scheduled_items(limit)] def scheduled_count(self): return self.storage.schedule_size() def all_results(self): return self.storage.result_items() def result_count(self): return self.storage.result_store_size() def errors(self, limit=None, offset=0): return [ pickle.loads(error) for error in self.storage.get_errors(limit, offset)] def __len__(self): return self.pending_count() def flush(self): self.storage.flush_all() def get_tasks(self): return sorted(self.registry._registry.keys()) def get_periodic_tasks(self): return [name for name, task in self.registry._registry.items() if hasattr(task, 'validate_datetime')] def get_regular_tasks(self): periodic = set(self.get_periodic_tasks()) return [task for task in self.get_tasks() if task not in periodic] def lock_task(self, lock_name): """ Utilize the Storage key/value APIs to implement simple locking. This lock is designed to be used to prevent multiple invocations of a task from running concurrently. Can be used as either a context-manager or as a task decorator. If using as a decorator, place it directly above the function declaration. If a second invocation occurs and the lock cannot be acquired, then a special exception is raised, which is handled by the consumer. The task will not be executed and an ``EVENT_LOCKED`` will be emitted. If the task is configured to be retried, then it will be retried normally, but the failure to acquire the lock is not considered an error. Examples: @huey.periodic_task(crontab(minute='*/5')) @huey.lock_task('reports-lock') def generate_report(): # If a report takes longer than 5 minutes to generate, we do # not want to kick off another until the previous invocation # has finished. run_report() @huey.periodic_task(crontab(minute='0')) def backup(): # Generate backup of code do_code_backup() # Generate database backup. Since this may take longer than an # hour, we want to ensure that it is not run concurrently. with huey.lock_task('db-backup'): do_db_backup() """ return TaskLock(self, lock_name) def flush_locks(self): """ Flush any stale locks (for example, when restarting the consumer). :return: List of any stale locks that were cleared. """ flushed = set() for lock_key in self._locks: if self._get_data(lock_key) is not EmptyData: flushed.add(lock_key.split('.lock.', 1)[-1]) return flushed def result(self, task_id, blocking=False, timeout=None, backoff=1.15, max_delay=1.0, revoke_on_timeout=False, preserve=False): """ Retrieve the results of a task, given the task's ID. This method accepts the same parameters and has the same behavior as the :py:class:`TaskResultWrapper` object. """ task_result = TaskResultWrapper(self, QueueTask(task_id=task_id)) return task_result.get( blocking=blocking, timeout=timeout, backoff=backoff, max_delay=max_delay, revoke_on_timeout=revoke_on_timeout, preserve=preserve)
class Huey(object): """ Huey executes tasks by exposing function decorators that cause the function call to be enqueued for execution by the consumer. Typically your application will only need one Huey instance, but you can have as many as you like -- the only caveat is that one consumer process must be executed for each Huey instance. :param name: a name for the task queue. :param bool result_store: whether to store task results. :param bool events: whether to enable consumer-sent events. :param store_none: Flag to indicate whether tasks that return ``None`` should store their results in the result store. :param always_eager: Useful for testing, this will execute all tasks immediately, without enqueueing them. :param store_errors: Flag to indicate whether task errors should be stored. :param global_registry: Use a global registry for tasks. Example usage:: from huey import RedisHuey # Create a huey instance and disable consumer-sent events. huey = RedisHuey('my-app', events=False) @huey.task() def slow_function(some_arg): # ... do something ... return some_arg @huey.periodic_task(crontab(minute='0', hour='3')) def backup(): # do a backup every day at 3am return """ def __init__(self, name='huey', result_store=True, events=True, store_none=False, always_eager=False, store_errors=True, blocking=False, global_registry=True, **storage_kwargs): self.name = name self.result_store = result_store self.events = events self.store_none = store_none self.always_eager = always_eager self.store_errors = store_errors self.blocking = blocking self.storage = self.get_storage(**storage_kwargs) self.pre_execute_hooks = OrderedDict() self.post_execute_hooks = OrderedDict() self._locks = set() if global_registry: self.registry = registry else: self.registry = TaskRegistry() def get_storage(self, **kwargs): raise NotImplementedError('Storage API not implemented in the base ' 'Huey class. Use `RedisHuey` instead.') def create_consumer(self, **config): return Consumer(self, **config) def _normalize_execute_time(self, eta=None, delay=None, convert_utc=True): if delay and eta: raise ValueError('Both a delay and an eta cannot be ' 'specified at the same time') elif delay: method = (convert_utc and datetime.datetime.utcnow or datetime.datetime.now) return method() + datetime.timedelta(seconds=delay) elif eta: if is_naive(eta) and convert_utc: eta = local_to_utc(eta) elif is_aware(eta) and convert_utc: eta = aware_to_utc(eta) elif is_aware(eta) and not convert_utc: eta = make_naive(eta) return eta def task(self, retries=0, retry_delay=0, retries_as_argument=False, include_task=False, name=None, **task_settings): def decorator(func): """ Decorator to execute a function out-of-band via the consumer. """ return TaskWrapper( self, func.func if isinstance(func, TaskWrapper) else func, retries=retries, retry_delay=retry_delay, retries_as_argument=retries_as_argument, include_task=include_task, name=name, **task_settings) return decorator # We specify retries and retry_delay as 0 because they become the default # values as class attributes on the derived PeriodicQueueTask instance. # Since the values the class is instantiated with will always be `None`, # we want the fallback behavior to be 0 by default. def periodic_task(self, validate_datetime, name=None, retries=0, retry_delay=0, **task_settings): """ Decorator to execute a function on a specific schedule. """ def decorator(func): def method_validate(self, dt): return validate_datetime(dt) return TaskWrapper( self, func.func if isinstance(func, TaskWrapper) else func, name=name, task_base=PeriodicQueueTask, default_retries=retries, default_retry_delay=retry_delay, validate_datetime=method_validate, **task_settings) return decorator def register_pre_execute(self, name, fn): """ Register a pre-execute hook. The callback will be executed before the execution of all tasks. Execution of the task can be cancelled by raising a :py:class:`CancelExecution` exception. Uncaught exceptions will be logged but will not cause the task itself to be cancelled. The callback function should accept a single task instance, the return value is ignored. :param name: Name for the hook. :param fn: Callback function that accepts task to be executed. """ self.pre_execute_hooks[name] = fn def unregister_pre_execute(self, name): del self.pre_execute_hooks[name] def pre_execute(self, name=None): """ Decorator for registering a pre-execute hook. """ def decorator(fn): self.register_pre_execute(name or fn.__name__, fn) return fn return decorator def register_post_execute(self, name, fn): """ Register a post-execute hook. The callback will be executed after the execution of all tasks. Uncaught exceptions will be logged but will have no other effect on the overall operation of the consumer. The callback function should accept: * a task instance * the return value from the execution of the task (which may be None) * any exception that was raised during the execution of the task (which will be None for tasks that executed normally). The return value of the callback itself is ignored. :param name: Name for the hook. :param fn: Callback function that accepts task that was executed and the tasks return value (or None). """ self.post_execute_hooks[name] = fn def unregister_post_execute(self, name): del self.post_execute_hooks[name] def post_execute(self, name=None): """ Decorator for registering a post-execute hook. """ def decorator(fn): self.register_post_execute(name or fn.__name__, fn) return fn return decorator def _wrapped_operation(exc_class): def decorator(fn): def inner(*args, **kwargs): try: return fn(*args, **kwargs) except (KeyboardInterrupt, RuntimeError): raise except: wrap_exception(exc_class) return inner return decorator @_wrapped_operation(QueueWriteException) def _enqueue(self, msg): self.storage.enqueue(msg) @_wrapped_operation(QueueReadException) def _dequeue(self): return self.storage.dequeue() @_wrapped_operation(QueueRemoveException) def _unqueue(self, msg): return self.queue.unqueue(msg) @_wrapped_operation(DataStoreGetException) def _get_data(self, key, peek=False): if peek: return self.storage.peek_data(key) else: return self.storage.pop_data(key) @_wrapped_operation(DataStorePutException) def _put_data(self, key, value): return self.storage.put_data(key, value) @_wrapped_operation(DataStorePutException) def _put_if_empty(self, key, value): return self.storage.put_if_empty(key, value) @_wrapped_operation(DataStorePutException) def _put_error(self, metadata): self.storage.put_error(metadata) @_wrapped_operation(DataStoreGetException) def _get_errors(self, limit=None, offset=0): return self.storage.get_errors(limit=limit, offset=offset) @_wrapped_operation(ScheduleAddException) def _add_to_schedule(self, data, ts): self.storage.add_to_schedule(data, ts) @_wrapped_operation(ScheduleReadException) def _read_schedule(self, ts): return self.storage.read_schedule(ts) def emit(self, message): try: self.storage.emit(message) except: # Events always fail silently since they are treated as a non- # critical component. pass def enqueue(self, task): if self.always_eager: return task.execute() self._enqueue(self.registry.get_message_for_task(task)) if not self.result_store: return if task.on_complete: q = [task] result_wrappers = [] while q: current = q.pop() result_wrappers.append(TaskResultWrapper(self, current)) if current.on_complete: q.append(current.on_complete) return result_wrappers else: return TaskResultWrapper(self, task) def dequeue(self): message = self._dequeue() if message: return self.registry.get_task_for_message(message) def put(self, key, value): return self._put_data(key, pickle.dumps(value, pickle.HIGHEST_PROTOCOL)) def get(self, key, peek=False): data = self._get_data(key, peek=peek) if data is EmptyData: return else: return pickle.loads(data) def put_error(self, metadata): return self._put_error(pickle.dumps(metadata)) def _format_time(self, dt): if dt is None: return None return time.mktime(dt.timetuple()) def _get_task_metadata(self, task, error=False, include_data=False): metadata = { 'id': task.task_id, 'task': type(task).__name__, 'retries': task.retries, 'retry_delay': task.retry_delay, 'execute_time': self._format_time(task.execute_time)} if include_data and not isinstance(task, PeriodicQueueTask): targs, tkwargs = task.get_data() if tkwargs.get("task") and isinstance(tkwargs["task"], QueueTask): del(tkwargs['task']) metadata['data'] = (targs, tkwargs) return metadata def emit_status(self, status, error=False, **data): if self.events: metadata = {'status': status, 'error': error} if error: metadata['traceback'] = traceback.format_exc() metadata.update(data) self.emit(json.dumps(metadata)) def emit_task(self, status, task, error=False, **data): if self.events: metadata = self._get_task_metadata(task) metadata.update(data) self.emit_status(status, error=error, **metadata) def execute(self, task): if not isinstance(task, QueueTask): raise TypeError('Unknown object: %s' % task) try: result = task.execute() except Exception as exc: if self.store_errors: metadata = self._get_task_metadata(task, True) metadata['error'] = repr(exc) metadata['traceback'] = traceback.format_exc() self.put(task.task_id, Error(metadata)) self.put_error(metadata) raise if self.result_store and not isinstance(task, PeriodicQueueTask): if result is not None or self.store_none: self.put(task.task_id, result) if task.on_complete: next_task = task.on_complete next_task.extend_data(result) self.enqueue(next_task) return result def revoke_all(self, task_class, revoke_until=None, revoke_once=False): self.put('rt:%s' % task_class.__name__, (revoke_until, revoke_once)) def restore_all(self, task_class): return self._get_data('rt:%s' % task_class.__name__) is not EmptyData def revoke(self, task, revoke_until=None, revoke_once=False): self.put(task.revoke_id, (revoke_until, revoke_once)) def restore(self, task): # Return value indicates whether the task was in fact revoked. return self._get_data(task.revoke_id) is not EmptyData def revoke_by_id(self, task_id, revoke_until=None, revoke_once=False): return self.revoke(QueueTask(task_id=task_id), revoke_until, revoke_once) def restore_by_id(self, task_id): return self.restore(QueueTask(task_id=task_id)) def _check_revoked(self, revoke_id, dt=None, peek=True): """ Checks if a task is revoked, returns a 2-tuple indicating: 1. Is task revoked? 2. Should task be restored? """ res = self.get(revoke_id, peek=True) if res is None: return False, False revoke_until, revoke_once = res if revoke_once: # This task *was* revoked for one run, but now it should be # restored to normal execution (unless we are just peeking). return True, not peek elif revoke_until is not None and revoke_until <= dt: # Task is no longer revoked and can be restored. return False, True else: # Task is still revoked. Do not restore. return True, False def is_revoked(self, task, dt=None, peek=True): if isclass(task) and issubclass(task, QueueTask): revoke_id = 'rt:%s' % task.__name__ is_revoked, can_restore = self._check_revoked(revoke_id, dt, peek) if can_restore: self.restore_all(task) return is_revoked if not isinstance(task, QueueTask): task = QueueTask(task_id=task) is_revoked, can_restore = self._check_revoked(task.revoke_id, dt, peek) if can_restore: self.restore(task) if not is_revoked: is_revoked = self.is_revoked(type(task), dt, peek) return is_revoked def add_schedule(self, task): msg = self.registry.get_message_for_task(task) ex_time = task.execute_time or datetime.datetime.fromtimestamp(0) self._add_to_schedule(msg, ex_time) def read_schedule(self, ts): return [self.registry.get_task_for_message(m) for m in self._read_schedule(ts)] def read_periodic(self, ts): periodic = self.registry.get_periodic_tasks() return [task for task in periodic if task.validate_datetime(ts)] def ready_to_run(self, cmd, dt=None): dt = dt or datetime.datetime.utcnow() return cmd.execute_time is None or cmd.execute_time <= dt def pending(self, limit=None): return [self.registry.get_task_for_message(m) for m in self.storage.enqueued_items(limit)] def pending_count(self): return self.storage.queue_size() def scheduled(self, limit=None): return [self.registry.get_task_for_message(m) for m in self.storage.scheduled_items(limit)] def scheduled_count(self): return self.storage.schedule_size() def all_results(self): return self.storage.result_items() def result_count(self): return self.storage.result_store_size() def errors(self, limit=None, offset=0): return [ pickle.loads(error) for error in self.storage.get_errors(limit, offset)] def __len__(self): return self.pending_count() def flush(self): self.storage.flush_all() def get_tasks(self): return sorted(self.registry._registry.keys()) def get_periodic_tasks(self): return [name for name, task in self.registry._registry.items() if hasattr(task, 'validate_datetime')] def get_regular_tasks(self): periodic = set(self.get_periodic_tasks()) return [task for task in self.get_tasks() if task not in periodic] def lock_task(self, lock_name): """ Utilize the Storage key/value APIs to implement simple locking. This lock is designed to be used to prevent multiple invocations of a task from running concurrently. Can be used as either a context-manager or as a task decorator. If using as a decorator, place it directly above the function declaration. If a second invocation occurs and the lock cannot be acquired, then a special exception is raised, which is handled by the consumer. The task will not be executed and an ``EVENT_LOCKED`` will be emitted. If the task is configured to be retried, then it will be retried normally, but the failure to acquire the lock is not considered an error. Examples: @huey.periodic_task(crontab(minute='*/5')) @huey.lock_task('reports-lock') def generate_report(): # If a report takes longer than 5 minutes to generate, we do # not want to kick off another until the previous invocation # has finished. run_report() @huey.periodic_task(crontab(minute='0')) def backup(): # Generate backup of code do_code_backup() # Generate database backup. Since this may take longer than an # hour, we want to ensure that it is not run concurrently. with huey.lock_task('db-backup'): do_db_backup() """ return TaskLock(self, lock_name) def flush_locks(self): """ Flush any stale locks (for example, when restarting the consumer). :return: List of any stale locks that were cleared. """ flushed = set() for lock_key in self._locks: if self._get_data(lock_key) is not EmptyData: flushed.add(lock_key.split('.lock.', 1)[-1]) return flushed def result(self, task_id, blocking=False, timeout=None, backoff=1.15, max_delay=1.0, revoke_on_timeout=False, preserve=False): """ Retrieve the results of a task, given the task's ID. This method accepts the same parameters and has the same behavior as the :py:class:`TaskResultWrapper` object. """ if not blocking: return self.get(task_id, peek=preserve) else: task_result = TaskResultWrapper(self, QueueTask(task_id=task_id)) return task_result.get( blocking=blocking, timeout=timeout, backoff=backoff, max_delay=max_delay, revoke_on_timeout=revoke_on_timeout, preserve=preserve)