Example #1
0
File: api.py Project: Kopei/huey
class Huey(object):
    """
    Huey executes tasks by exposing function decorators that cause the function
    call to be enqueued for execution by the consumer.

    Typically your application will only need one Huey instance, but you can
    have as many as you like -- the only caveat is that one consumer process
    must be executed for each Huey instance.

    :param name: a name for the task queue.
    :param bool result_store: whether to store task results.
    :param bool events: whether to enable consumer-sent events.
    :param store_none: Flag to indicate whether tasks that return ``None``
        should store their results in the result store.
    :param always_eager: Useful for testing, this will execute all tasks
        immediately, without enqueueing them.
    :param store_errors: Flag to indicate whether task errors should be stored.
    :param global_registry: Use a global registry for tasks.

    Example usage::

        from huey import RedisHuey

        # Create a huey instance and disable consumer-sent events.
        huey = RedisHuey('my-app', events=False)

        @huey.task()
        def slow_function(some_arg):
            # ... do something ...
            return some_arg

        @huey.periodic_task(crontab(minute='0', hour='3'))
        def backup():
            # do a backup every day at 3am
            return
    """
    def __init__(self,
                 name='huey',
                 result_store=True,
                 events=True,
                 store_none=False,
                 always_eager=False,
                 store_errors=True,
                 blocking=False,
                 global_registry=True,
                 **storage_kwargs):
        self.name = name
        self.result_store = result_store
        self.events = events
        self.store_none = store_none
        self.always_eager = always_eager
        self.store_errors = store_errors
        self.blocking = blocking
        self.storage = self.get_storage(**storage_kwargs)
        if global_registry:
            self.registry = registry
        else:
            self.registry = TaskRegistry()

    def get_storage(self, **kwargs):
        raise NotImplementedError('Storage API not implemented in the base '
                                  'Huey class. Use `RedisHuey` instead.')

    def task(self,
             retries=0,
             retry_delay=0,
             retries_as_argument=False,
             include_task=False,
             name=None):
        def decorator(func):
            """
            Decorator to execute a function out-of-band via the consumer.
            """
            klass = create_task(QueueTask, func, retries_as_argument, name,
                                include_task)
            self.registry.register(klass)

            def schedule(args=None,
                         kwargs=None,
                         eta=None,
                         delay=None,
                         convert_utc=True,
                         task_id=None):
                if delay and eta:
                    raise ValueError('Both a delay and an eta cannot be '
                                     'specified at the same time')
                if delay:
                    eta = (datetime.datetime.now() +
                           datetime.timedelta(seconds=delay))
                if eta:
                    if is_naive(eta) and convert_utc:
                        eta = local_to_utc(eta)
                    elif is_aware(eta) and convert_utc:
                        eta = aware_to_utc(eta)
                    elif is_aware(eta) and not convert_utc:
                        eta = make_naive(eta)
                cmd = klass((args or (), kwargs or {}),
                            execute_time=eta,
                            retries=retries,
                            retry_delay=retry_delay,
                            task_id=task_id)
                return self.enqueue(cmd)

            func.schedule = schedule
            func.task_class = klass

            @wraps(func)
            def inner_run(*args, **kwargs):
                cmd = klass((args, kwargs),
                            retries=retries,
                            retry_delay=retry_delay)
                return self.enqueue(cmd)

            inner_run.call_local = func
            return inner_run

        return decorator

    def periodic_task(self, validate_datetime, name=None):
        """
        Decorator to execute a function on a specific schedule.
        """
        def decorator(func):
            def method_validate(self, dt):
                return validate_datetime(dt)

            klass = create_task(
                PeriodicQueueTask,
                func,
                task_name=name,
                validate_datetime=method_validate,
            )
            self.registry.register(klass)

            func.task_class = klass

            def _revoke(revoke_until=None, revoke_once=False):
                self.revoke(klass(), revoke_until, revoke_once)

            func.revoke = _revoke

            def _is_revoked(dt=None, peek=True):
                return self.is_revoked(klass(), dt, peek)

            func.is_revoked = _is_revoked

            def _restore():
                return self.restore(klass())

            func.restore = _restore

            return func

        return decorator

    def _wrapped_operation(exc_class):
        def decorator(fn):
            def inner(*args, **kwargs):
                try:
                    return fn(*args, **kwargs)
                except (KeyboardInterrupt, RuntimeError):
                    raise
                except:
                    wrap_exception(exc_class)

            return inner

        return decorator

    @_wrapped_operation(QueueWriteException)
    def _enqueue(self, msg):
        self.storage.enqueue(msg)

    @_wrapped_operation(QueueReadException)
    def _dequeue(self):
        return self.storage.dequeue()

    @_wrapped_operation(QueueRemoveException)
    def _unqueue(self, msg):
        return self.queue.unqueue(msg)

    @_wrapped_operation(DataStoreGetException)
    def _get_data(self, key, peek=False):
        if peek:
            return self.storage.peek_data(key)
        else:
            return self.storage.pop_data(key)

    @_wrapped_operation(DataStorePutException)
    def _put_data(self, key, value):
        return self.storage.put_data(key, value)

    @_wrapped_operation(DataStorePutException)
    def _put_error(self, metadata):
        self.storage.put_error(metadata)

    @_wrapped_operation(DataStoreGetException)
    def _get_errors(self, limit=None, offset=0):
        return self.storage.get_errors(limit=limit, offset=offset)

    @_wrapped_operation(ScheduleAddException)
    def _add_to_schedule(self, data, ts):
        self.storage.add_to_schedule(data, ts)

    @_wrapped_operation(ScheduleReadException)
    def _read_schedule(self, ts):
        return self.storage.read_schedule(ts)

    def emit(self, message):
        try:
            self.storage.emit(message)
        except:
            # Events always fail silently since they are treated as a non-
            # critical component.
            pass

    def enqueue(self, task):
        if self.always_eager:
            return task.execute()

        self._enqueue(self.registry.get_message_for_task(task))

        if self.result_store:
            return TaskResultWrapper(self, task)

    def dequeue(self):
        message = self._dequeue()
        if message:
            return self.registry.get_task_for_message(message)

    def _format_time(self, dt):
        if dt is None:
            return None
        return time.mktime(dt.timetuple())

    def _get_task_metadata(self, task, error=False, include_data=False):
        metadata = {
            'id': task.task_id,
            'task': type(task).__name__,
            'retries': task.retries,
            'retry_delay': task.retry_delay,
            'execute_time': self._format_time(task.execute_time)
        }
        if include_data and not isinstance(task, PeriodicQueueTask):
            targs, tkwargs = task.get_data()
            if tkwargs.get("task") and isinstance(tkwargs["task"], QueueTask):
                del (tkwargs['task'])
            metadata['data'] = (targs, tkwargs)

        return metadata

    def emit_status(self, status, error=False, **data):
        if self.events:
            metadata = {'status': status, 'error': error}
            if error:
                metadata['traceback'] = traceback.format_exc()
            metadata.update(data)
            self.emit(json.dumps(metadata))

    def emit_task(self, status, task, error=False, **data):
        if self.events:
            metadata = self._get_task_metadata(task)
            metadata.update(data)
            self.emit_status(status, error=error, **metadata)

    def execute(self, task):
        if not isinstance(task, QueueTask):
            raise TypeError('Unknown object: %s' % task)

        try:
            result = task.execute()
        except Exception as exc:
            if self.result_store and self.store_errors:
                metadata = self._get_task_metadata(task, True)
                metadata['error'] = exc
                metadata['traceback'] = traceback.format_exc()
                self._put_error(pickle.dumps(metadata))
            raise

        if result is None and not self.store_none:
            return

        if self.result_store and not isinstance(task, PeriodicQueueTask):
            self._put_data(task.task_id, pickle.dumps(result))

        return result

    def revoke(self, task, revoke_until=None, revoke_once=False):
        serialized = pickle.dumps((revoke_until, revoke_once))
        self._put_data(task.revoke_id, serialized)

    def restore(self, task):
        # Return value indicates whether the task was in fact revoked.
        return self._get_data(task.revoke_id) is not EmptyData

    def revoke_by_id(self, task_id, revoke_until=None, revoke_once=False):
        return self.revoke(QueueTask(task_id=task_id), revoke_until,
                           revoke_once)

    def restore_by_id(self, task_id):
        return self.restore(QueueTask(task_id=task_id))

    def is_revoked(self, task, dt=None, peek=True):
        if not isinstance(task, QueueTask):
            task = QueueTask(task_id=task)
        res = self._get_data(task.revoke_id, peek=True)
        if res is EmptyData:
            return False
        revoke_until, revoke_once = pickle.loads(res)
        if revoke_once:
            # This task *was* revoked for one run, but now it should be
            # restored to normal execution.
            if not peek:
                self.restore(task)
            return True
        return revoke_until is None or revoke_until > dt

    def add_schedule(self, task):
        msg = self.registry.get_message_for_task(task)
        ex_time = task.execute_time or datetime.datetime.fromtimestamp(0)
        self._add_to_schedule(msg, ex_time)

    def read_schedule(self, ts):
        return [
            self.registry.get_task_for_message(m)
            for m in self._read_schedule(ts)
        ]

    def read_periodic(self, ts):
        periodic = self.registry.get_periodic_tasks()
        return [task for task in periodic if task.validate_datetime(ts)]

    def ready_to_run(self, cmd, dt=None):
        dt = dt or datetime.datetime.utcnow()
        return cmd.execute_time is None or cmd.execute_time <= dt

    def pending(self, limit=None):
        return [
            self.registry.get_task_for_message(m)
            for m in self.storage.enqueued_items(limit)
        ]

    def pending_count(self):
        return self.storage.queue_size()

    def scheduled(self, limit=None):
        return [
            self.registry.get_task_for_message(m)
            for m in self.storage.scheduled_items(limit)
        ]

    def scheduled_count(self):
        return self.storage.schedule_size()

    def all_results(self):
        return self.storage.result_items()

    def result_count(self):
        return self.storage.result_store_size()

    def errors(self, limit=None, offset=0):
        return [
            pickle.loads(error)
            for error in self.storage.get_errors(limit, offset)
        ]

    def __len__(self):
        return self.pending_count()

    def flush(self):
        self.storage.flush_all()

    def get_tasks(self):
        return sorted(self.registry._registry.keys())

    def get_periodic_tasks(self):
        return [
            name for name, task in self.registry._registry.items()
            if hasattr(task, 'validate_datetime')
        ]

    def get_regular_tasks(self):
        periodic = set(self.get_periodic_tasks())
        return [task for task in self.get_tasks() if task not in periodic]

    def result(self,
               task_id,
               blocking=False,
               timeout=None,
               backoff=1.15,
               max_delay=1.0,
               revoke_on_timeout=False,
               preserve=False):
        """
        Retrieve the results of a task, given the task's ID. This
        method accepts the same parameters and has the same behavior
        as the :py:class:`TaskResultWrapper` object.
        """
        if not blocking:
            result = self._get_data(task_id, peek=preserve)
            if result is not EmptyData:
                return pickle.loads(result)
        else:
            task_result = TaskResultWrapper(self, QueueTask(task_id=task_id))
            return task_result.get(blocking=blocking,
                                   timeout=timeout,
                                   backoff=backoff,
                                   max_delay=max_delay,
                                   revoke_on_timeout=revoke_on_timeout,
                                   preserve=preserve)
Example #2
0
class Huey(object):
    """
    Huey executes tasks by exposing function decorators that cause the function
    call to be enqueued for execution by the consumer.

    Typically your application will only need one Huey instance, but you can
    have as many as you like -- the only caveat is that one consumer process
    must be executed for each Huey instance.

    :param name: a name for the task queue.
    :param bool result_store: whether to store task results.
    :param bool events: whether to enable consumer-sent events.
    :param store_none: Flag to indicate whether tasks that return ``None``
        should store their results in the result store.
    :param always_eager: Useful for testing, this will execute all tasks
        immediately, without enqueueing them.
    :param store_errors: Flag to indicate whether task errors should be stored.
    :param global_registry: Use a global registry for tasks.

    Example usage::

        from huey import RedisHuey

        # Create a huey instance and disable consumer-sent events.
        huey = RedisHuey('my-app', events=False)

        @huey.task()
        def slow_function(some_arg):
            # ... do something ...
            return some_arg

        @huey.periodic_task(crontab(minute='0', hour='3'))
        def backup():
            # do a backup every day at 3am
            return
    """
    def __init__(self, name='huey', result_store=True, events=True,
                 store_none=False, always_eager=False, store_errors=True,
                 blocking=False, global_registry=True, **storage_kwargs):
        self.name = name
        self.result_store = result_store
        self.events = events
        self.store_none = store_none
        self.always_eager = always_eager
        self.store_errors = store_errors
        self.blocking = blocking
        self.storage = self.get_storage(**storage_kwargs)
        self.pre_execute_hooks = OrderedDict()
        self.post_execute_hooks = OrderedDict()
        self.startup_hooks = OrderedDict()
        self._locks = set()
        if global_registry:
            self.registry = registry
        else:
            self.registry = TaskRegistry()

    def get_storage(self, **kwargs):
        raise NotImplementedError('Storage API not implemented in the base '
                                  'Huey class. Use `RedisHuey` instead.')

    def create_consumer(self, **config):
        return Consumer(self, **config)

    def _normalize_execute_time(self, eta=None, delay=None, convert_utc=True):
        if delay and eta:
            raise ValueError('Both a delay and an eta cannot be '
                             'specified at the same time')
        elif delay:
            method = (convert_utc and datetime.datetime.utcnow or
                      datetime.datetime.now)
            return method() + datetime.timedelta(seconds=delay)
        elif eta:
            if is_naive(eta) and convert_utc:
                eta = local_to_utc(eta)
            elif is_aware(eta) and convert_utc:
                eta = aware_to_utc(eta)
            elif is_aware(eta) and not convert_utc:
                eta = make_naive(eta)
            return eta

    def task(self, retries=0, retry_delay=0, retries_as_argument=False,
             include_task=False, name=None, **task_settings):
        def decorator(func):
            """
            Decorator to execute a function out-of-band via the consumer.
            """
            return TaskWrapper(
                self,
                func.func if isinstance(func, TaskWrapper) else func,
                retries=retries,
                retry_delay=retry_delay,
                retries_as_argument=retries_as_argument,
                include_task=include_task,
                name=name,
                **task_settings)
        return decorator

    # We specify retries and retry_delay as 0 because they become the default
    # values as class attributes on the derived PeriodicQueueTask instance.
    # Since the values the class is instantiated with will always be `None`,
    # we want the fallback behavior to be 0 by default.
    def periodic_task(self, validate_datetime, name=None, retries=0,
                      retry_delay=0, **task_settings):
        """
        Decorator to execute a function on a specific schedule.
        """
        def decorator(func):
            def method_validate(self, dt):
                return validate_datetime(dt)

            return TaskWrapper(
                self,
                func.func if isinstance(func, TaskWrapper) else func,
                name=name,
                task_base=PeriodicQueueTask,
                default_retries=retries,
                default_retry_delay=retry_delay,
                validate_datetime=method_validate,
                **task_settings)

        return decorator

    def register_pre_execute(self, name, fn):
        """
        Register a pre-execute hook. The callback will be executed before the
        execution of all tasks. Execution of the task can be cancelled by
        raising a :py:class:`CancelExecution` exception. Uncaught exceptions
        will be logged but will not cause the task itself to be cancelled.

        The callback function should accept a single task instance, the return
        value is ignored.

        :param name: Name for the hook.
        :param fn: Callback function that accepts task to be executed.
        """
        self.pre_execute_hooks[name] = fn

    def unregister_pre_execute(self, name):
        del self.pre_execute_hooks[name]

    def pre_execute(self, name=None):
        """
        Decorator for registering a pre-execute hook.
        """
        def decorator(fn):
            self.register_pre_execute(name or fn.__name__, fn)
            return fn
        return decorator

    def register_post_execute(self, name, fn):
        """
        Register a post-execute hook. The callback will be executed after the
        execution of all tasks. Uncaught exceptions will be logged but will
        have no other effect on the overall operation of the consumer.

        The callback function should accept:

        * a task instance
        * the return value from the execution of the task (which may be None)
        * any exception that was raised during the execution of the task (which
          will be None for tasks that executed normally).

        The return value of the callback itself is ignored.

        :param name: Name for the hook.
        :param fn: Callback function that accepts task that was executed and
                   the tasks return value (or None).
        """
        self.post_execute_hooks[name] = fn

    def unregister_post_execute(self, name):
        del self.post_execute_hooks[name]

    def post_execute(self, name=None):
        """
        Decorator for registering a post-execute hook.
        """
        def decorator(fn):
            self.register_post_execute(name or fn.__name__, fn)
            return fn
        return decorator

    def register_startup(self, name, fn):
        """
        Register a startup hook. The callback will be executed whenever a
        worker comes online. Uncaught exceptions will be logged but will
        have no other effect on the overall operation of the worker.

        The callback function must not accept any parameters.

        This API is provided to simplify setting up global resources that, for
        whatever reason, should not be created as import-time side-effects. For
        example, your tasks need to write data into a Postgres database. If you
        create the connection at import-time, before the worker processes are
        spawned, you'll likely run into errors when attempting to use the
        connection from the child (worker) processes. To avoid this problem,
        you can register a startup hook which is executed by the worker process
        as part of its initialization.

        :param name: Name for the hook.
        :param fn: Callback function.
        """
        self.startup_hooks[name] = fn

    def unregister_startup(self, name):
        del self.startup_hooks[name]

    def on_startup(self, name=None):
        """
        Decorator for registering a startup hook.
        """
        def decorator(fn):
            self.register_startup(name or fn.__name__, fn)
            return fn
        return decorator

    def _wrapped_operation(exc_class):
        def decorator(fn):
            def inner(*args, **kwargs):
                try:
                    return fn(*args, **kwargs)
                except (KeyboardInterrupt, RuntimeError):
                    raise
                except:
                    wrap_exception(exc_class)
            return inner
        return decorator

    @_wrapped_operation(QueueWriteException)
    def _enqueue(self, msg):
        self.storage.enqueue(msg)

    @_wrapped_operation(QueueReadException)
    def _dequeue(self):
        return self.storage.dequeue()

    @_wrapped_operation(QueueRemoveException)
    def _unqueue(self, msg):
        return self.queue.unqueue(msg)

    @_wrapped_operation(DataStoreGetException)
    def _get_data(self, key, peek=False):
        if peek:
            return self.storage.peek_data(key)
        else:
            return self.storage.pop_data(key)

    @_wrapped_operation(DataStorePutException)
    def _put_data(self, key, value):
        return self.storage.put_data(key, value)

    @_wrapped_operation(DataStorePutException)
    def _put_if_empty(self, key, value):
        return self.storage.put_if_empty(key, value)

    @_wrapped_operation(DataStorePutException)
    def _put_error(self, metadata):
        self.storage.put_error(metadata)

    @_wrapped_operation(DataStoreGetException)
    def _get_errors(self, limit=None, offset=0):
        return self.storage.get_errors(limit=limit, offset=offset)

    @_wrapped_operation(ScheduleAddException)
    def _add_to_schedule(self, data, ts):
        self.storage.add_to_schedule(data, ts)

    @_wrapped_operation(ScheduleReadException)
    def _read_schedule(self, ts):
        return self.storage.read_schedule(ts)

    def emit(self, message):
        try:
            self.storage.emit(message)
        except:
            # Events always fail silently since they are treated as a non-
            # critical component.
            pass

    def _execute_always_eager(self, task):
        accum = []
        failure_exc = None
        while task is not None:
            for name, callback in self.pre_execute_hooks.items():
                callback(task)
            try:
                result = task.execute()
            except Exception as exc:
                result = None
                failure_exc = task_exc = exc
            else:
                task_exc = None

            result_wrapper = EagerTaskResultWrapper(self, task)
            result_wrapper.set_result(result)
            accum.append(result_wrapper)

            for name, callback in self.post_execute_hooks.items():
                callback(task, result, task_exc)
            if task.on_complete:
                task = task.on_complete
                task.extend_data(result)
            else:
                task = None

        if failure_exc is not None:
            raise failure_exc

        return accum[0] if len(accum) == 1 else accum

    def enqueue(self, task):
        if self.always_eager:
            return self._execute_always_eager(task)

        self._enqueue(self.registry.get_message_for_task(task))
        if not self.result_store:
            return

        if task.on_complete:
            q = [task]
            result_wrappers = []
            while q:
                current = q.pop()
                result_wrappers.append(TaskResultWrapper(self, current))
                if current.on_complete:
                    q.append(current.on_complete)
            return result_wrappers
        else:
            return TaskResultWrapper(self, task)

    def dequeue(self):
        message = self._dequeue()
        if message:
            return self.registry.get_task_for_message(message)

    def put(self, key, value):
        return self._put_data(key,
                              pickle.dumps(value, pickle.HIGHEST_PROTOCOL))

    def get(self, key, peek=False):
        data = self._get_data(key, peek=peek)
        if data is EmptyData:
            return
        else:
            return pickle.loads(data)

    def put_error(self, metadata):
        return self._put_error(pickle.dumps(metadata))

    def emit_status(self, status, error=False, timestamp=None, **data):
        if self.events:
            if timestamp is not None:
                data['timestamp'] = time.mktime(timestamp.timetuple())

            metadata = {'status': status, 'error': error}
            if error:
                metadata['traceback'] = traceback.format_exc()
            metadata.update(data)
            self.emit(json.dumps(metadata))

    def emit_task(self, status, task, error=False, **data):
        if self.events:
            metadata = task.get_metadata()
            metadata.update(data)
            self.emit_status(status, error=error, **metadata)

    def execute(self, task):
        if not isinstance(task, QueueTask):
            raise TypeError('Unknown object: %s' % task)

        try:
            result = task.execute()
        except Exception as exc:
            if self.store_errors:
                metadata = task.get_metadata()
                metadata['error'] = repr(exc)
                metadata['traceback'] = traceback.format_exc()
                self.put(task.task_id, Error(metadata))
                self.put_error(metadata)
            raise

        if self.result_store and not isinstance(task, PeriodicQueueTask):
            if result is not None or self.store_none:
                self.put(task.task_id, result)

        if task.on_complete:
            next_task = task.on_complete
            next_task.extend_data(result)
            self.enqueue(next_task)

        return result

    def revoke_all(self, task_class, revoke_until=None, revoke_once=False):
        self.put('rt:%s' % task_class.__name__, (revoke_until, revoke_once))

    def restore_all(self, task_class):
        return self._get_data('rt:%s' % task_class.__name__) is not EmptyData

    def revoke(self, task, revoke_until=None, revoke_once=False):
        self.put(task.revoke_id, (revoke_until, revoke_once))

    def restore(self, task):
        # Return value indicates whether the task was in fact revoked.
        return self._get_data(task.revoke_id) is not EmptyData

    def revoke_by_id(self, task_id, revoke_until=None, revoke_once=False):
        return self.revoke(QueueTask(task_id=task_id), revoke_until,
                           revoke_once)

    def restore_by_id(self, task_id):
        return self.restore(QueueTask(task_id=task_id))

    def _check_revoked(self, revoke_id, dt=None, peek=True):
        """
        Checks if a task is revoked, returns a 2-tuple indicating:

        1. Is task revoked?
        2. Should task be restored?
        """
        res = self.get(revoke_id, peek=True)
        if res is None:
            return False, False

        revoke_until, revoke_once = res
        if revoke_once:
            # This task *was* revoked for one run, but now it should be
            # restored to normal execution (unless we are just peeking).
            return True, not peek
        elif revoke_until is not None and revoke_until <= dt:
            # Task is no longer revoked and can be restored.
            return False, True
        else:
            # Task is still revoked. Do not restore.
            return True, False

    def is_revoked(self, task, dt=None, peek=True):
        if isclass(task) and issubclass(task, QueueTask):
            revoke_id = 'rt:%s' % task.__name__
            is_revoked, can_restore = self._check_revoked(revoke_id, dt, peek)
            if can_restore:
                self.restore_all(task)
            return is_revoked

        if not isinstance(task, QueueTask):
            task = QueueTask(task_id=task)

        is_revoked, can_restore = self._check_revoked(task.revoke_id, dt, peek)
        if can_restore:
            self.restore(task)
        if not is_revoked:
            is_revoked = self.is_revoked(type(task), dt, peek)

        return is_revoked

    def add_schedule(self, task):
        msg = self.registry.get_message_for_task(task)
        ex_time = task.execute_time or datetime.datetime.fromtimestamp(0)
        self._add_to_schedule(msg, ex_time)

    def read_schedule(self, ts):
        return [self.registry.get_task_for_message(m)
                for m in self._read_schedule(ts)]

    def read_periodic(self, ts):
        periodic = self.registry.get_periodic_tasks()
        return [task for task in periodic
                if task.validate_datetime(ts)]

    def ready_to_run(self, cmd, dt=None):
        dt = dt or datetime.datetime.utcnow()
        return cmd.execute_time is None or cmd.execute_time <= dt

    def pending(self, limit=None):
        return [self.registry.get_task_for_message(m)
                for m in self.storage.enqueued_items(limit)]

    def pending_count(self):
        return self.storage.queue_size()

    def scheduled(self, limit=None):
        return [self.registry.get_task_for_message(m)
                for m in self.storage.scheduled_items(limit)]

    def scheduled_count(self):
        return self.storage.schedule_size()

    def all_results(self):
        return self.storage.result_items()

    def result_count(self):
        return self.storage.result_store_size()

    def errors(self, limit=None, offset=0):
        return [
            pickle.loads(error)
            for error in self.storage.get_errors(limit, offset)]

    def __len__(self):
        return self.pending_count()

    def flush(self):
        self.storage.flush_all()

    def get_tasks(self):
        return sorted(self.registry._registry.keys())

    def get_periodic_tasks(self):
        return [name for name, task in self.registry._registry.items()
                if hasattr(task, 'validate_datetime')]

    def get_regular_tasks(self):
        periodic = set(self.get_periodic_tasks())
        return [task for task in self.get_tasks() if task not in periodic]

    def lock_task(self, lock_name):
        """
        Utilize the Storage key/value APIs to implement simple locking.

        This lock is designed to be used to prevent multiple invocations of a
        task from running concurrently. Can be used as either a context-manager
        or as a task decorator. If using as a decorator, place it directly
        above the function declaration.

        If a second invocation occurs and the lock cannot be acquired, then a
        special exception is raised, which is handled by the consumer. The task
        will not be executed and an ``EVENT_LOCKED`` will be emitted. If the
        task is configured to be retried, then it will be retried normally, but
        the failure to acquire the lock is not considered an error.

        Examples:

            @huey.periodic_task(crontab(minute='*/5'))
            @huey.lock_task('reports-lock')
            def generate_report():
                # If a report takes longer than 5 minutes to generate, we do
                # not want to kick off another until the previous invocation
                # has finished.
                run_report()

            @huey.periodic_task(crontab(minute='0'))
            def backup():
                # Generate backup of code
                do_code_backup()

                # Generate database backup. Since this may take longer than an
                # hour, we want to ensure that it is not run concurrently.
                with huey.lock_task('db-backup'):
                    do_db_backup()
        """
        return TaskLock(self, lock_name)

    def flush_locks(self):
        """
        Flush any stale locks (for example, when restarting the consumer).

        :return: List of any stale locks that were cleared.
        """
        flushed = set()
        for lock_key in self._locks:
            if self._get_data(lock_key) is not EmptyData:
                flushed.add(lock_key.split('.lock.', 1)[-1])
        return flushed

    def result(self, task_id, blocking=False, timeout=None, backoff=1.15,
               max_delay=1.0, revoke_on_timeout=False, preserve=False):
        """
        Retrieve the results of a task, given the task's ID. This
        method accepts the same parameters and has the same behavior
        as the :py:class:`TaskResultWrapper` object.
        """
        task_result = TaskResultWrapper(self, QueueTask(task_id=task_id))
        return task_result.get(
            blocking=blocking,
            timeout=timeout,
            backoff=backoff,
            max_delay=max_delay,
            revoke_on_timeout=revoke_on_timeout,
            preserve=preserve)
Example #3
0
File: api.py Project: blakev/huey
class Huey(object):
    """
    Huey executes tasks by exposing function decorators that cause the function
    call to be enqueued for execution by the consumer.

    Typically your application will only need one Huey instance, but you can
    have as many as you like -- the only caveat is that one consumer process
    must be executed for each Huey instance.

    :param name: a name for the task queue.
    :param bool result_store: whether to store task results.
    :param bool events: whether to enable consumer-sent events.
    :param store_none: Flag to indicate whether tasks that return ``None``
        should store their results in the result store.
    :param always_eager: Useful for testing, this will execute all tasks
        immediately, without enqueueing them.
    :param store_errors: Flag to indicate whether task errors should be stored.
    :param global_registry: Use a global registry for tasks.

    Example usage::

        from huey import RedisHuey

        # Create a huey instance and disable consumer-sent events.
        huey = RedisHuey('my-app', events=False)

        @huey.task()
        def slow_function(some_arg):
            # ... do something ...
            return some_arg

        @huey.periodic_task(crontab(minute='0', hour='3'))
        def backup():
            # do a backup every day at 3am
            return
    """
    def __init__(self, name='huey', result_store=True, events=True,
                 store_none=False, always_eager=False, store_errors=True,
                 blocking=False, global_registry=True, **storage_kwargs):
        self.name = name
        self.result_store = result_store
        self.events = events
        self.store_none = store_none
        self.always_eager = always_eager
        self.store_errors = store_errors
        self.blocking = blocking
        self.storage = self.get_storage(**storage_kwargs)
        self.pre_execute_hooks = OrderedDict()
        self.post_execute_hooks = OrderedDict()
        self._locks = set()
        if global_registry:
            self.registry = registry
        else:
            self.registry = TaskRegistry()

    def get_storage(self, **kwargs):
        raise NotImplementedError('Storage API not implemented in the base '
                                  'Huey class. Use `RedisHuey` instead.')

    def create_consumer(self, **config):
        return Consumer(self, **config)

    def _normalize_execute_time(self, eta=None, delay=None, convert_utc=True):
        if delay and eta:
            raise ValueError('Both a delay and an eta cannot be '
                             'specified at the same time')
        elif delay:
            method = (convert_utc and datetime.datetime.utcnow or
                      datetime.datetime.now)
            return method() + datetime.timedelta(seconds=delay)
        elif eta:
            if is_naive(eta) and convert_utc:
                eta = local_to_utc(eta)
            elif is_aware(eta) and convert_utc:
                eta = aware_to_utc(eta)
            elif is_aware(eta) and not convert_utc:
                eta = make_naive(eta)
            return eta

    def task(self, retries=0, retry_delay=0, retries_as_argument=False,
             include_task=False, name=None, **task_settings):
        def decorator(func):
            """
            Decorator to execute a function out-of-band via the consumer.
            """
            return TaskWrapper(
                self,
                func.func if isinstance(func, TaskWrapper) else func,
                retries=retries,
                retry_delay=retry_delay,
                retries_as_argument=retries_as_argument,
                include_task=include_task,
                name=name,
                **task_settings)
        return decorator

    # We specify retries and retry_delay as 0 because they become the default
    # values as class attributes on the derived PeriodicQueueTask instance.
    # Since the values the class is instantiated with will always be `None`,
    # we want the fallback behavior to be 0 by default.
    def periodic_task(self, validate_datetime, name=None, retries=0,
                      retry_delay=0, **task_settings):
        """
        Decorator to execute a function on a specific schedule.
        """
        def decorator(func):
            def method_validate(self, dt):
                return validate_datetime(dt)

            return TaskWrapper(
                self,
                func.func if isinstance(func, TaskWrapper) else func,
                name=name,
                task_base=PeriodicQueueTask,
                default_retries=retries,
                default_retry_delay=retry_delay,
                validate_datetime=method_validate,
                **task_settings)

        return decorator

    def register_pre_execute(self, name, fn):
        """
        Register a pre-execute hook. The callback will be executed before the
        execution of all tasks. Execution of the task can be cancelled by
        raising a :py:class:`CancelExecution` exception. Uncaught exceptions
        will be logged but will not cause the task itself to be cancelled.

        The callback function should accept a single task instance, the return
        value is ignored.

        :param name: Name for the hook.
        :param fn: Callback function that accepts task to be executed.
        """
        self.pre_execute_hooks[name] = fn

    def unregister_pre_execute(self, name):
        del self.pre_execute_hooks[name]

    def pre_execute(self, name=None):
        """
        Decorator for registering a pre-execute hook.
        """
        def decorator(fn):
            self.register_pre_execute(name or fn.__name__, fn)
            return fn
        return decorator

    def register_post_execute(self, name, fn):
        """
        Register a post-execute hook. The callback will be executed after the
        execution of all tasks. Uncaught exceptions will be logged but will
        have no other effect on the overall operation of the consumer.

        The callback function should accept:

        * a task instance
        * the return value from the execution of the task (which may be None)
        * any exception that was raised during the execution of the task (which
          will be None for tasks that executed normally).

        The return value of the callback itself is ignored.

        :param name: Name for the hook.
        :param fn: Callback function that accepts task that was executed and
                   the tasks return value (or None).
        """
        self.post_execute_hooks[name] = fn

    def unregister_post_execute(self, name):
        del self.post_execute_hooks[name]

    def post_execute(self, name=None):
        """
        Decorator for registering a post-execute hook.
        """
        def decorator(fn):
            self.register_post_execute(name or fn.__name__, fn)
            return fn
        return decorator

    def _wrapped_operation(exc_class):
        def decorator(fn):
            def inner(*args, **kwargs):
                try:
                    return fn(*args, **kwargs)
                except (KeyboardInterrupt, RuntimeError):
                    raise
                except:
                    wrap_exception(exc_class)
            return inner
        return decorator

    @_wrapped_operation(QueueWriteException)
    def _enqueue(self, msg):
        self.storage.enqueue(msg)

    @_wrapped_operation(QueueReadException)
    def _dequeue(self):
        return self.storage.dequeue()

    @_wrapped_operation(QueueRemoveException)
    def _unqueue(self, msg):
        return self.queue.unqueue(msg)

    @_wrapped_operation(DataStoreGetException)
    def _get_data(self, key, peek=False):
        if peek:
            return self.storage.peek_data(key)
        else:
            return self.storage.pop_data(key)

    @_wrapped_operation(DataStorePutException)
    def _put_data(self, key, value):
        return self.storage.put_data(key, value)

    @_wrapped_operation(DataStorePutException)
    def _put_if_empty(self, key, value):
        return self.storage.put_if_empty(key, value)

    @_wrapped_operation(DataStorePutException)
    def _put_error(self, metadata):
        self.storage.put_error(metadata)

    @_wrapped_operation(DataStoreGetException)
    def _get_errors(self, limit=None, offset=0):
        return self.storage.get_errors(limit=limit, offset=offset)

    @_wrapped_operation(ScheduleAddException)
    def _add_to_schedule(self, data, ts):
        self.storage.add_to_schedule(data, ts)

    @_wrapped_operation(ScheduleReadException)
    def _read_schedule(self, ts):
        return self.storage.read_schedule(ts)

    def emit(self, message):
        try:
            self.storage.emit(message)
        except:
            # Events always fail silently since they are treated as a non-
            # critical component.
            pass

    def enqueue(self, task):
        if self.always_eager:
            return task.execute()

        self._enqueue(self.registry.get_message_for_task(task))
        if not self.result_store:
            return

        if task.on_complete:
            q = [task]
            result_wrappers = []
            while q:
                current = q.pop()
                result_wrappers.append(TaskResultWrapper(self, current))
                if current.on_complete:
                    q.append(current.on_complete)
            return result_wrappers
        else:
            return TaskResultWrapper(self, task)

    def dequeue(self):
        message = self._dequeue()
        if message:
            return self.registry.get_task_for_message(message)

    def put(self, key, value):
        return self._put_data(key,
                              pickle.dumps(value, pickle.HIGHEST_PROTOCOL))

    def get(self, key, peek=False):
        data = self._get_data(key, peek=peek)
        if data is EmptyData:
            return
        else:
            return pickle.loads(data)

    def put_error(self, metadata):
        return self._put_error(pickle.dumps(metadata))

    def _format_time(self, dt):
        if dt is None:
            return None
        return time.mktime(dt.timetuple())

    def _get_task_metadata(self, task, error=False, include_data=False):
        metadata = {
            'id': task.task_id,
            'task': type(task).__name__,
            'retries': task.retries,
            'retry_delay': task.retry_delay,
            'execute_time': self._format_time(task.execute_time)}
        if include_data and not isinstance(task, PeriodicQueueTask):
            targs, tkwargs = task.get_data()
            if tkwargs.get("task") and isinstance(tkwargs["task"], QueueTask):
                del(tkwargs['task'])
            metadata['data'] = (targs, tkwargs)

        return metadata

    def emit_status(self, status, error=False, **data):
        if self.events:
            metadata = {'status': status, 'error': error}
            if error:
                metadata['traceback'] = traceback.format_exc()
            metadata.update(data)
            self.emit(json.dumps(metadata))

    def emit_task(self, status, task, error=False, **data):
        if self.events:
            metadata = self._get_task_metadata(task)
            metadata.update(data)
            self.emit_status(status, error=error, **metadata)

    def execute(self, task):
        if not isinstance(task, QueueTask):
            raise TypeError('Unknown object: %s' % task)

        try:
            result = task.execute()
        except Exception as exc:
            if self.store_errors:
                metadata = self._get_task_metadata(task, True)
                metadata['error'] = repr(exc)
                metadata['traceback'] = traceback.format_exc()
                self.put(task.task_id, Error(metadata))
                self.put_error(metadata)
            raise

        if self.result_store and not isinstance(task, PeriodicQueueTask):
            if result is not None or self.store_none:
                self.put(task.task_id, result)

        if task.on_complete:
            next_task = task.on_complete
            next_task.extend_data(result)
            self.enqueue(next_task)

        return result

    def revoke_all(self, task_class, revoke_until=None, revoke_once=False):
        self.put('rt:%s' % task_class.__name__, (revoke_until, revoke_once))

    def restore_all(self, task_class):
        return self._get_data('rt:%s' % task_class.__name__) is not EmptyData

    def revoke(self, task, revoke_until=None, revoke_once=False):
        self.put(task.revoke_id, (revoke_until, revoke_once))

    def restore(self, task):
        # Return value indicates whether the task was in fact revoked.
        return self._get_data(task.revoke_id) is not EmptyData

    def revoke_by_id(self, task_id, revoke_until=None, revoke_once=False):
        return self.revoke(QueueTask(task_id=task_id), revoke_until,
                           revoke_once)

    def restore_by_id(self, task_id):
        return self.restore(QueueTask(task_id=task_id))

    def _check_revoked(self, revoke_id, dt=None, peek=True):
        """
        Checks if a task is revoked, returns a 2-tuple indicating:

        1. Is task revoked?
        2. Should task be restored?
        """
        res = self.get(revoke_id, peek=True)
        if res is None:
            return False, False

        revoke_until, revoke_once = res
        if revoke_once:
            # This task *was* revoked for one run, but now it should be
            # restored to normal execution (unless we are just peeking).
            return True, not peek
        elif revoke_until is not None and revoke_until <= dt:
            # Task is no longer revoked and can be restored.
            return False, True
        else:
            # Task is still revoked. Do not restore.
            return True, False

    def is_revoked(self, task, dt=None, peek=True):
        if isclass(task) and issubclass(task, QueueTask):
            revoke_id = 'rt:%s' % task.__name__
            is_revoked, can_restore = self._check_revoked(revoke_id, dt, peek)
            if can_restore:
                self.restore_all(task)
            return is_revoked

        if not isinstance(task, QueueTask):
            task = QueueTask(task_id=task)

        is_revoked, can_restore = self._check_revoked(task.revoke_id, dt, peek)
        if can_restore:
            self.restore(task)
        if not is_revoked:
            is_revoked = self.is_revoked(type(task), dt, peek)

        return is_revoked

    def add_schedule(self, task):
        msg = self.registry.get_message_for_task(task)
        ex_time = task.execute_time or datetime.datetime.fromtimestamp(0)
        self._add_to_schedule(msg, ex_time)

    def read_schedule(self, ts):
        return [self.registry.get_task_for_message(m)
                for m in self._read_schedule(ts)]

    def read_periodic(self, ts):
        periodic = self.registry.get_periodic_tasks()
        return [task for task in periodic
                if task.validate_datetime(ts)]

    def ready_to_run(self, cmd, dt=None):
        dt = dt or datetime.datetime.utcnow()
        return cmd.execute_time is None or cmd.execute_time <= dt

    def pending(self, limit=None):
        return [self.registry.get_task_for_message(m)
                for m in self.storage.enqueued_items(limit)]

    def pending_count(self):
        return self.storage.queue_size()

    def scheduled(self, limit=None):
        return [self.registry.get_task_for_message(m)
                for m in self.storage.scheduled_items(limit)]

    def scheduled_count(self):
        return self.storage.schedule_size()

    def all_results(self):
        return self.storage.result_items()

    def result_count(self):
        return self.storage.result_store_size()

    def errors(self, limit=None, offset=0):
        return [
            pickle.loads(error)
            for error in self.storage.get_errors(limit, offset)]

    def __len__(self):
        return self.pending_count()

    def flush(self):
        self.storage.flush_all()

    def get_tasks(self):
        return sorted(self.registry._registry.keys())

    def get_periodic_tasks(self):
        return [name for name, task in self.registry._registry.items()
                if hasattr(task, 'validate_datetime')]

    def get_regular_tasks(self):
        periodic = set(self.get_periodic_tasks())
        return [task for task in self.get_tasks() if task not in periodic]

    def lock_task(self, lock_name):
        """
        Utilize the Storage key/value APIs to implement simple locking.

        This lock is designed to be used to prevent multiple invocations of a
        task from running concurrently. Can be used as either a context-manager
        or as a task decorator. If using as a decorator, place it directly
        above the function declaration.

        If a second invocation occurs and the lock cannot be acquired, then a
        special exception is raised, which is handled by the consumer. The task
        will not be executed and an ``EVENT_LOCKED`` will be emitted. If the
        task is configured to be retried, then it will be retried normally, but
        the failure to acquire the lock is not considered an error.

        Examples:

            @huey.periodic_task(crontab(minute='*/5'))
            @huey.lock_task('reports-lock')
            def generate_report():
                # If a report takes longer than 5 minutes to generate, we do
                # not want to kick off another until the previous invocation
                # has finished.
                run_report()

            @huey.periodic_task(crontab(minute='0'))
            def backup():
                # Generate backup of code
                do_code_backup()

                # Generate database backup. Since this may take longer than an
                # hour, we want to ensure that it is not run concurrently.
                with huey.lock_task('db-backup'):
                    do_db_backup()
        """
        return TaskLock(self, lock_name)

    def flush_locks(self):
        """
        Flush any stale locks (for example, when restarting the consumer).

        :return: List of any stale locks that were cleared.
        """
        flushed = set()
        for lock_key in self._locks:
            if self._get_data(lock_key) is not EmptyData:
                flushed.add(lock_key.split('.lock.', 1)[-1])
        return flushed

    def result(self, task_id, blocking=False, timeout=None, backoff=1.15,
               max_delay=1.0, revoke_on_timeout=False, preserve=False):
        """
        Retrieve the results of a task, given the task's ID. This
        method accepts the same parameters and has the same behavior
        as the :py:class:`TaskResultWrapper` object.
        """
        if not blocking:
            return self.get(task_id, peek=preserve)
        else:
            task_result = TaskResultWrapper(self, QueueTask(task_id=task_id))
            return task_result.get(
                blocking=blocking,
                timeout=timeout,
                backoff=backoff,
                max_delay=max_delay,
                revoke_on_timeout=revoke_on_timeout,
                preserve=preserve)