def named_queue(url): """Named queues facilitate discrete queues on a single backend.""" foo_state = [] def foo_func(arg): foo_state.append(arg) foo_broker = get_broker(url, 'foo') foo_broker.expose(foo_func) bar_state = [] def bar_func(arg): bar_state.append(arg) bar_broker = get_broker(url, 'bar') bar_broker.expose(bar_func) with thread_worker(foo_broker), thread_worker(bar_broker): # -- task-invoking code, usually another process -- f = get_queue(url, 'foo') f.foo_func(1) b = get_queue(url, 'bar') b.bar_func(2) eventually((lambda:(foo_state, bar_state)), ([1], [2]))
def named_queue(url): """Named queues facilitate discrete queues on a single backend.""" foo_state = [] def foo_func(arg): foo_state.append(arg) foo_broker = get_broker(url, 'foo') foo_broker.expose(foo_func) bar_state = [] def bar_func(arg): bar_state.append(arg) bar_broker = get_broker(url, 'bar') bar_broker.expose(bar_func) with thread_worker(foo_broker), thread_worker(bar_broker): # -- task-invoking code, usually another process -- f = get_queue(url, 'foo') f.foo_func(1) b = get_queue(url, 'bar') b.bar_func(2) eventually((lambda: (foo_state, bar_state)), ([1], [2]))
def expose_method(url): """Object methods can be exposed too, not just functions.""" class Database(object): """stateful storage""" value = None def update(self, value): self.value = value class TaskObj(object): """object with task definitions""" def __init__(self, db): self.db = db def update_value(self, value): self.db.update(value) db = Database() obj = TaskObj(db) broker = get_broker(url) broker.expose(obj.update_value) with thread_worker(broker): # -- task-invoking code, usually another process -- q = get_queue(url) q.update_value(2) eventually((lambda:db.value), 2)
def test_Queue_default_options(url): def func(arg=3): if isinstance(arg, int) and arg < 2: raise ValueError('too low') return str(arg) broker = get_broker(url) broker.expose(func) with thread_worker(broker): q = get_queue(url, ignore_result=True) eq_(q.func(), None) q = get_queue(url, on_error=Task.PASS) rx = q.func(1) res = q.func(rx) assert res.wait(WAIT), repr(res) eq_(res.value, 'func [default:%s] ValueError: too low' % rx.id)
def task_with_failed_deferred_arguments(url): """TaskFailure can be passed to the final task. By default, a task fails if any of its deferred arguments fail. However, creating a ``Task`` with ``on_error=Task.PASS`` will cause a ``TaskFailure`` to be passed as the result of any task that fails. """ def func(arg): if arg == 0: raise Exception('zero fail!') return arg broker = get_broker(url) broker.expose(func) with thread_worker(broker): # -- task-invoking code, usually another process -- q = get_queue(url) items = [ q.func(1), q.func(0), q.func(2), ] task = Task(q.func, on_error=Task.PASS) res = task(items) res.wait(timeout=WAIT) fail = TaskFailure( 'func', 'default', items[1].id, 'Exception: zero fail!') eq_(res.value, [1, fail, 2])
def result_status(url): """Deferred results can be queried for task status. A lock is used to control state interactions between the producer and the worker for illustration purposes only. This type of lock-step interaction is not normally needed or even desired. """ lock = TimeoutLock(locked=True) def func(arg): lock.acquire() return arg broker = get_broker(url) broker.expose(func) with thread_worker(broker, lock): # -- task-invoking code, usually another process -- q = get_queue(url) res = q.func('arg') eventually((lambda:res.status), const.ENQUEUED) eq_(repr(res), "<Deferred func [default:%s] enqueued>" % res.id) lock.release() eventually((lambda:res.status), const.PROCESSING) eq_(repr(res), "<Deferred func [default:%s] processing>" % res.id) lock.release() assert res.wait(WAIT), repr(res) eq_(repr(res), "<Deferred func [default:%s] success>" % res.id) eq_(res.value, 'arg')
def result_status(url): """Deferred results can be queried for task status. A lock is used to control state interactions between the producer and the worker for illustration purposes only. This type of lock-step interaction is not normally needed or even desired. """ lock = TimeoutLock(locked=True) def func(arg): lock.acquire() return arg broker = get_broker(url) broker.expose(func) with thread_worker(broker, lock): # -- task-invoking code, usually another process -- q = get_queue(url) res = q.func('arg') eventually((lambda: res.status), const.ENQUEUED) eq_(repr(res), "<Deferred func [default:%s] enqueued>" % res.id) lock.release() eventually((lambda: res.status), const.PROCESSING) eq_(repr(res), "<Deferred func [default:%s] processing>" % res.id) lock.release() assert res.wait(WAIT), repr(res) eq_(repr(res), "<Deferred func [default:%s] success>" % res.id) eq_(res.value, 'arg')
def more_deferred_arguments(url): from operator import add def func(arg): return arg broker = get_broker(url) broker.expose(func) broker.expose(sum) broker.expose(add) with thread_worker(broker): # -- task-invoking code, usually another process -- q = get_queue(url) sum_123 = q.sum([ q.func(1), q.func(2), q.func(3), ]) sum_1234 = q.add(sum_123, q.func(4)) assert sum_1234.wait(WAIT), repr(res) eq_(sum_1234.value, 10)
def task_namespaces(url): """Task namepsaces are used to arrange tasks similar to the Python package/module hierarchy. """ state = set() __name__ = 'module.path' ts = TaskSpace(__name__) @ts.task def foo(): state.add('foo') @ts.task def bar(arg): state.add(arg) broker = get_broker(url) broker.expose(ts) with thread_worker(broker): # -- task-invoking code, usually another process -- q = get_queue(url) q.module.path.foo() q.module.path.bar(1) eventually((lambda: state), {'foo', 1})
def task_with_failed_deferred_arguments(url): """TaskFailure can be passed to the final task. By default, a task fails if any of its deferred arguments fail. However, creating a ``Task`` with ``on_error=Task.PASS`` will cause a ``TaskFailure`` to be passed as the result of any task that fails. """ def func(arg): if arg == 0: raise Exception('zero fail!') return arg broker = get_broker(url) broker.expose(func) with thread_worker(broker): # -- task-invoking code, usually another process -- q = get_queue(url) items = [ q.func(1), q.func(0), q.func(2), ] task = Task(q.func, on_error=Task.PASS) res = task(items) res.wait(timeout=WAIT) fail = TaskFailure('func', 'default', items[1].id, 'Exception: zero fail!') eq_(res.value, [1, fail, 2])
def task_with_deferred_arguments(url): """A deferred result may be passed as an argument to another task. Tasks receiving deferred arguments will not be invoked until the deferred value is available. Notice that the value of the deferred argument, not the Deferred object itself, is passed to ``sum`` in this example. """ def func(arg): return arg broker = get_broker(url) broker.expose(func) broker.expose(sum) with thread_worker(broker): # -- task-invoking code, usually another process -- q = get_queue(url) res = q.sum([ q.func(1), q.func(2), q.func(3), ]) assert res.wait(WAIT), repr(res) eq_(res.value, 6)
def test_Queue_len(url): lock = TimeoutLock(locked=True) def func(arg=None): pass broker = get_broker(url) broker.expose(func) with thread_worker(broker, lock): q = get_queue(url) eq_(len(q), 0) r0 = q.func() eq_(len(q), 1) r1 = q.func() r2 = q.func(r1) eq_(len(q), 3) eventually((lambda:lock.locked), True) lock.release() assert r0.wait(timeout=WAIT), repr(r0) eq_(len(q), 2) eventually((lambda:lock.locked), True) lock.release() eventually((lambda:lock.locked), True) lock.release() assert r2.wait(timeout=WAIT), repr(r2) eq_(len(q), 0)
def test_Queue_len(url): lock = TimeoutLock(locked=True) def func(arg=None): pass broker = get_broker(url) broker.expose(func) with thread_worker(broker, lock): q = get_queue(url) eq_(len(q), 0) r0 = q.func() eq_(len(q), 1) r1 = q.func() r2 = q.func(r1) eq_(len(q), 3) eventually((lambda: lock.locked), True) lock.release() assert r0.wait(timeout=WAIT), repr(r0) eq_(len(q), 2) eventually((lambda: lock.locked), True) lock.release() eventually((lambda: lock.locked), True) lock.release() assert r2.wait(timeout=WAIT), repr(r2) eq_(len(q), 0)
def task_namespaces(url): """Task namepsaces are used to arrange tasks similar to the Python package/module hierarchy. """ state = set() __name__ = 'module.path' ts = TaskSpace(__name__) @ts.task def foo(): state.add('foo') @ts.task def bar(arg): state.add(arg) broker = get_broker(url) broker.expose(ts) with thread_worker(broker): # -- task-invoking code, usually another process -- q = get_queue(url) q.module.path.foo() q.module.path.bar(1) eventually((lambda:state), {'foo', 1})
def expose_method(url): """Object methods can be exposed too, not just functions.""" class Database(object): """stateful storage""" value = None def update(self, value): self.value = value class TaskObj(object): """object with task definitions""" def __init__(self, db): self.db = db def update_value(self, value): self.db.update(value) db = Database() obj = TaskObj(db) broker = get_broker(url) broker.expose(obj.update_value) with thread_worker(broker): # -- task-invoking code, usually another process -- q = get_queue(url) q.update_value(2) eventually((lambda: db.value), 2)
def dependency_graph(url): """Dependency graph | _____________|_____________ / | \ / \ / \ / \ / \ / \ / \ left right left right left right \ / \ / \ / \ / \ / \ / catch catch catch \ | / \___________|___________/ | combine """ ts = TaskSpace() @ts.task def left(num): return ('left', num) @ts.task def right(num): return ('right', num) @ts.task def catch(left, right, num): return [num, left, right] @ts.task def combine(items): return {i[0]: i[1:] for i in items} broker = get_broker(url) broker.expose(ts) with thread_worker(broker): # -- task-invoking code, usually another process -- q = get_queue(url) catches = [] for num in [1, 2, 3]: left = q.left(num) right = q.right(num) catch = q.catch(left, right, num) catches.append(catch) res = q.combine(catches) assert res.wait(WAIT), repr(res) eq_( res.value, { 1: [('left', 1), ('right', 1)], 2: [('left', 2), ('right', 2)], 3: [('left', 3), ('right', 3)], })
def dependency_graph(url): """Dependency graph | _____________|_____________ / | \ / \ / \ / \ / \ / \ / \ left right left right left right \ / \ / \ / \ / \ / \ / catch catch catch \ | / \___________|___________/ | combine """ ts = TaskSpace() @ts.task def left(num): return ('left', num) @ts.task def right(num): return ('right', num) @ts.task def catch(left, right, num): return [num, left, right] @ts.task def combine(items): return {i[0]: i[1:] for i in items} broker = get_broker(url) broker.expose(ts) with thread_worker(broker): # -- task-invoking code, usually another process -- q = get_queue(url) catches = [] for num in [1, 2, 3]: left = q.left(num) right = q.right(num) catch = q.catch(left, right, num) catches.append(catch) res = q.combine(catches) assert res.wait(WAIT), repr(res) eq_(res.value, { 1: [('left', 1), ('right', 1)], 2: [('left', 2), ('right', 2)], 3: [('left', 3), ('right', 3)], })
def test_WorkerPool_heartrate(url): broker = get_broker(url) pool = WorkerPool(broker, WorkerPool_heartrate_init, workers=1) with start_pool(pool): q = get_queue(url) res = Task(q.suicide_worker, heartrate=0.1, result_timeout=5)() assert res.wait(WAIT), repr(res) print(repr(res)) with assert_raises(TaskExpired): res.value
def test_WorkerPool_max_worker_tasks(url): broker = get_broker(url) pool = WorkerPool(broker, WorkerPool_max_worker_tasks_init, workers=1, max_worker_tasks=3) with start_pool(pool): q = get_queue(url) res = q.results([q.func() for n in range(4)]) assert res.wait(WAIT), repr(res) results = res.value assert isinstance(results, list), results eq_([r[1] for r in results], [1, 2, 3, 1]) eq_(len(set(r[0] for r in results)), 2)
def test_clear_Queue(url): q = get_queue(url) eq_(len(q), 0) q.func() q.func() eq_(len(q), 2) del q[:] eq_(len(q), 0) msg = 'delitem is only valid with a full slice ([:])' with assert_raises(ValueError, msg=msg): del q[:2]
def no_such_task(url): broker = get_broker(url) with thread_worker(broker): # -- task-invoking code, usually another process -- q = get_queue(url) res = q.func('arg') assert res.wait(WAIT), repr(res) eq_(repr(res), '<Deferred func [default:%s] failed>' % res.id) with assert_raises(TaskFailure, 'func [default:%s] no such task' % res.id): res.value
def test_WorkerPool_crashed_worker(url): broker = get_broker(url) pool = WorkerPool(broker, WorkerPool_crashed_worker_init, workers=1) with start_pool(pool): q = get_queue(url) res = q.getpid() assert res.wait(WAIT), repr(res) pid = res.value q.kill_worker() res = q.getpid() assert res.wait(WAIT), repr(res) assert res.value != pid, pid
def test_Broker_task_failed(url): lock = TimeoutLock(locked=True) def func(): lock.acquire() broker = get_broker(url) broker.expose(func) with thread_worker(broker): q = get_queue(url) res = q.func() broker.task_failed(res) assert res.wait(timeout=WAIT), repr(res) lock.release() with assert_raises(TaskExpired): res.value
def test_completed_Deferred_as_argument(url): def func(arg): eq_(arg, 1) return arg broker = get_broker(url) broker.expose(func) with thread_worker(broker): q = get_queue(url) eq_(len(q), 0) r0 = q.func(1) assert r0.wait(timeout=WAIT), repr(r0) eq_(r0.value, 1) r1 = q.func(r0) assert r1.wait(timeout=WAIT), repr(r1) eq_(r0.value, 1)
def simple(url): """A simple example demonstrating WorQ mechanics""" state = [] def func(arg): state.append(arg) broker = get_broker(url) broker.expose(func) with thread_worker(broker): # -- task-invoking code, usually another process -- q = get_queue(url) q.func('arg') eventually((lambda: state), ['arg'])
def simple(url): """A simple example demonstrating WorQ mechanics""" state = [] def func(arg): state.append(arg) broker = get_broker(url) broker.expose(func) with thread_worker(broker): # -- task-invoking code, usually another process -- q = get_queue(url) q.func('arg') eventually((lambda:state), ['arg'])
def task_error(url): def func(arg): raise Exception('fail!') broker = get_broker(url) broker.expose(func) with thread_worker(broker): # -- task-invoking code, usually another process -- q = get_queue(url) res = q.func('arg') assert res.wait(WAIT), repr(res) eq_(repr(res), '<Deferred func [default:%s] failed>' % res.id) with assert_raises(TaskFailure, 'func [default:%s] Exception: fail!' % res.id): res.value
def more_namespaces(url): state = set() foo = TaskSpace('foo') bar = TaskSpace('foo.bar') baz = TaskSpace('foo.bar.baz') @foo.task def join(arg): state.add('foo-join %s' % arg) @bar.task def kick(arg): state.add('bar-kick %s' % arg) @baz.task def join(arg): state.add('baz-join %s' % arg) @baz.task def kick(arg): state.add('baz-kick %s' % arg) broker = get_broker(url) broker.expose(foo) broker.expose(bar) broker.expose(baz) with thread_worker(broker): # -- task-invoking code, usually another process -- q = get_queue(url) q.foo.join(1) q.foo.bar.kick(2) q.foo.bar.baz.join(3) q.foo.bar.baz.kick(4) eventually((lambda: state), { 'foo-join 1', 'bar-kick 2', 'baz-join 3', 'baz-kick 4', })
def more_namespaces(url): state = set() foo = TaskSpace('foo') bar = TaskSpace('foo.bar') baz = TaskSpace('foo.bar.baz') @foo.task def join(arg): state.add('foo-join %s' % arg) @bar.task def kick(arg): state.add('bar-kick %s' % arg) @baz.task def join(arg): state.add('baz-join %s' % arg) @baz.task def kick(arg): state.add('baz-kick %s' % arg) broker = get_broker(url) broker.expose(foo) broker.expose(bar) broker.expose(baz) with thread_worker(broker): # -- task-invoking code, usually another process -- q = get_queue(url) q.foo.join(1) q.foo.bar.kick(2) q.foo.bar.baz.join(3) q.foo.bar.baz.kick(4) eventually((lambda:state), { 'foo-join 1', 'bar-kick 2', 'baz-join 3', 'baz-kick 4', })
def test_worker_interrupted(url): def func(arg): raise KeyboardInterrupt() broker = get_broker(url) broker.expose(func) with thread_worker(broker): # -- task-invoking code, usually another process -- q = get_queue(url) res = q.func('arg') completed = res.wait(WAIT) assert completed, repr(res) eq_(repr(res), '<Deferred func [default:%s] failed>' % res.id) with assert_raises(TaskFailure, 'func [default:%s] KeyboardInterrupt: ' % res.id): res.value
def test_deferred_task_fail_on_error(url): def func(arg): if arg == 0: raise Exception('zero fail!') return arg broker = get_broker(url) broker.expose(func) with thread_worker(broker): # -- task-invoking code, usually another process -- q = get_queue(url) res = q.func([q.func(1), q.func(0), q.func(2)]) res.wait(timeout=WAIT) msg = 'func [default:%s] Exception: zero fail!' % res.task.args[0][1].id with assert_raises(TaskFailure, msg): res.value
def test_WorkerPool_sigterm(url): with tempdir() as tmp: logpath = join(tmp, 'output.log') proc = run_in_subprocess(worker_pool, url, WorkerPool_sigterm_init, (tmp, logpath), workers=3) with printlog(logpath), force_kill_on_exit(proc): q = get_queue(url) q.func('text') eventually(reader(tmp, 'func.started'), '') proc.terminate() # signal pool shutdown touch(join(tmp, 'func.unlock')) # allow func to proceed eventually(reader(tmp, 'func.out'), 'text') eventually(verify_shutdown(proc), True, timeout=WAIT)
def ignore_result(url): """Tell the queue to ignore the task result when the result is not important. This is done by creating a ``Task`` object with custom options for more efficient queue operation. """ state = [] def func(arg): state.append(arg) broker = get_broker(url) broker.expose(func) with thread_worker(broker): # -- task-invoking code, usually another process -- q = get_queue(url) f = Task(q.func, ignore_result=True) res = f(3) eq_(res, None) # verify that we did not get a deferred result eventually((lambda:state), [3])
def ignore_result(url): """Tell the queue to ignore the task result when the result is not important. This is done by creating a ``Task`` object with custom options for more efficient queue operation. """ state = [] def func(arg): state.append(arg) broker = get_broker(url) broker.expose(func) with thread_worker(broker): # -- task-invoking code, usually another process -- q = get_queue(url) f = Task(q.func, ignore_result=True) res = f(3) eq_(res, None) # verify that we did not get a deferred result eventually((lambda: state), [3])
def test_WorkerPool_worker_shutdown_on_parent_die(url): with tempdir() as tmp: logpath = join(tmp, 'output.log') proc = run_in_subprocess(worker_pool, url, WorkerPool_worker_shutdown_on_parent_die_init, (tmp, logpath)) with printlog(logpath), force_kill_on_exit(proc): res = get_queue(url).getpid() assert res.wait(WAIT), repr(res) os.kill(proc.pid, signal.SIGKILL) # force kill pool master eventually(proc.is_alive, False, timeout=WAIT) try: eventually(pid_running(res.value), False, timeout=WAIT, poll_interval=0.1) except Exception: os.kill(res.value, signal.SIGTERM) # clean up raise
def Broker_duplicate_task_id(url, identifier): lock = TimeoutLock(locked=True) state = [] def func(arg): lock.acquire() return arg broker = get_broker(url) broker.expose(func) with thread_worker(broker, lock): q = get_queue(url) task = Task(q.func, id=identifier) res = task(1) eventually((lambda: res.status), const.ENQUEUED) msg = 'func [default:int] cannot enqueue task with duplicate id' with assert_raises(DuplicateTask, msg): task(2) lock.release() eventually((lambda: res.status), const.PROCESSING) msg = 'func [default:int] cannot enqueue task with duplicate id' with assert_raises(DuplicateTask, msg): task(3) lock.release() assert res.wait(timeout=WAIT), repr(res) eq_(res.value, 1) res = task(4) eventually((lambda: res.status), const.ENQUEUED) lock.release() eventually((lambda: res.status), const.PROCESSING) lock.release() assert res.wait(timeout=WAIT), repr(res) eq_(res.value, 4)
def Broker_duplicate_task_id(url, identifier): lock = TimeoutLock(locked=True) state = [] def func(arg): lock.acquire() return arg broker = get_broker(url) broker.expose(func) with thread_worker(broker, lock): q = get_queue(url) task = Task(q.func, id=identifier) res = task(1) eventually((lambda:res.status), const.ENQUEUED) msg = 'func [default:int] cannot enqueue task with duplicate id' with assert_raises(DuplicateTask, msg): task(2) lock.release() eventually((lambda:res.status), const.PROCESSING) msg = 'func [default:int] cannot enqueue task with duplicate id' with assert_raises(DuplicateTask, msg): task(3) lock.release() assert res.wait(timeout=WAIT), repr(res) eq_(res.value, 1) res = task(4) eventually((lambda:res.status), const.ENQUEUED) lock.release() eventually((lambda:res.status), const.PROCESSING) lock.release() assert res.wait(timeout=WAIT), repr(res) eq_(res.value, 4)
def wait_for_result(url): """Efficiently wait for (block on) a task result. Use this feature wisely. Waiting for a result in a WorQ task could deadlock the queue. """ def func(arg): return arg broker = get_broker(url) broker.expose(func) with thread_worker(broker): # -- task-invoking code, usually another process -- q = get_queue(url) res = q.func('arg') completed = res.wait(WAIT) assert completed, repr(res) eq_(res.value, 'arg') eq_(repr(res), "<Deferred func [default:%s] success>" % res.id)
def create_worker_pool(queue_url, thread_count, **kw): broker = get_broker(queue_url) broker.expose(ts) pool = WorkerPool(broker, workers=thread_count) pool.start(**kw) return pool # crawl_page(args.url) visited_links = [args.url] html_queue_url = "memory://html" html_pool = create_worker_pool(html_queue_url, args.threads, timeout=2) html_queue = get_queue(html_queue_url) res_queue_url = "memory://res" resource_pool = create_worker_pool(res_queue_url, args.resource_threads, timeout=2) resource_queue = get_queue(res_queue_url) html_queue.tasks.crawl_page(args.url) while (True): time.sleep(30) if len(html_queue) == 0 and len(resource_queue) == 0: html_pool.stop() resource_pool.stop() logger.info(' ------> FINISHED <-----')
def run_scans(app, target_list, profile=None, prefs=None, num_workers=4, targets_per_worker=50, worq_url="memory://", get_certs=False, timeout=10, progress_callback=None): global logger, pool pool = start_pool(worq_url, timeout=1, num_workers=num_workers) chunks = __as_chunks(target_list, targets_per_worker) try: queue = get_queue(worq_url, target=__name__) # Enqueue tasks to be executed in parallel scan_results = [ queue.scan_urls(app, targets, profile=profile, prefs=prefs, get_certs=get_certs, timeout=timeout) for targets in chunks ] result = queue.collect(scan_results) queue_len = len(queue) logged_len = 0 # Required to correct for "overlogging" due to chunking while True: finished = result.wait(timeout=10) current_queue_len = len(queue) chunks_done = queue_len - current_queue_len logger.debug("After queue wait: %d old - %d new = %d done" % (queue_len, current_queue_len, chunks_done)) queue_len = current_queue_len # Check finished first to ensure that the final chunk is not logged, # because the final chunk might not have the full chunk size. if finished: break if progress_callback is not None and chunks_done > 0: # We must assume the maximum chunk size here to calculate the number of results progress_callback(chunks_done * targets_per_worker) logged_len += chunks_done * targets_per_worker except KeyboardInterrupt: logger.critical("Ctrl-C received. Winding down workers...") stop() logger.debug("Signaled workers to quit") raise KeyboardInterrupt finally: stop() # Log the results of the final chunk if progress_callback is not None: actual_len = len(result.value) logger.debug( "Chunkwise logging reported on %d results, actually received %d" % (logged_len, actual_len)) len_correction = actual_len - logged_len if len_correction != 0: logger.debug("Logging correction for %d results" % len_correction) progress_callback(len_correction) return result.value