def test_ping_retry(self): """ Worker ping fails once. Ping continues to try to connect to scheduler Kind of ugly since it uses actual timing with sleep to test the thread """ sch = Scheduler( retry_delay=100, remove_delay=1000, worker_disconnect_delay=10, ) self._total_pings = 0 # class var so it can be accessed from fail_ping def fail_ping(worker): # this will be called from within keep-alive thread... self._total_pings += 1 raise Exception("Some random exception") sch.ping = fail_ping with Worker( scheduler=sch, worker_id="foo", ping_interval=0.01 # very short between pings to make test fast ): # let the keep-alive thread run for a bit... time.sleep(0.1) # yes, this is ugly but it's exactly what we need to test self.assertTrue( self._total_pings > 1, msg="Didn't retry pings (%d pings performed)" % (self._total_pings,) )
def test_ping_retry(self): """ Worker ping fails once. Ping continues to try to connect to scheduler Kind of ugly since it uses actual timing with sleep to test the thread """ sch = Scheduler( retry_delay=100, remove_delay=1000, worker_disconnect_delay=10, ) self._total_pings = 0 # class var so it can be accessed from fail_ping def fail_ping(worker): # this will be called from within keep-alive thread... self._total_pings += 1 raise Exception("Some random exception") sch.ping = fail_ping with Worker( scheduler=sch, worker_id="foo", ping_interval=0.01 # very short between pings to make test fast ): # let the keep-alive thread run for a bit... time.sleep( 0.1) # yes, this is ugly but it's exactly what we need to test self.assertTrue(self._total_pings > 1, msg="Didn't retry pings (%d pings performed)" % (self._total_pings, ))
def run(self, result=None): self.sch = Scheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) self.assistant = Worker(scheduler=self.sch, worker_id='Y', assistant=True) with Worker(scheduler=self.sch, worker_id='X') as w: self.w = w super(AssistantTest, self).run(result)
class AssistantTest(unittest.TestCase): def run(self, result=None): self.sch = Scheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) self.assistant = Worker(scheduler=self.sch, worker_id='Y', assistant=True) with Worker(scheduler=self.sch, worker_id='X') as w: self.w = w super(AssistantTest, self).run(result) def test_get_work(self): d = Dummy2Task('123') self.w.add(d) self.assertFalse(d.complete()) self.assistant.run() self.assertTrue(d.complete()) def test_bad_job_type(self): class Dummy3Task(Dummy2Task): task_family = 'UnknownTaskFamily' d = Dummy3Task('123') self.w.add(d) self.assertFalse(d.complete()) self.assertFalse(self.assistant.run()) self.assertFalse(d.complete()) self.assertEqual(list(self.sch.task_list('FAILED', '').keys()), [d.task_id]) def test_unimported_job_type(self): MODULE_CONTENTS = b''' import luigi class UnimportedTask(luigi.Task): def complete(self): return False ''' class NotImportedTask(luigi.Task): task_family = 'UnimportedTask' task_module = None task = NotImportedTask() # verify that it can't run the task without the module info necessary to import it self.w.add(task) self.assertFalse(self.assistant.run()) self.assertEqual(list(self.sch.task_list('FAILED', '').keys()), [task.task_id]) # check that it can import with the right module with temporary_unloaded_module(MODULE_CONTENTS) as task.task_module: self.w.add(task) self.assertTrue(self.assistant.run()) self.assertEqual(list(self.sch.task_list('DONE', '').keys()), [task.task_id])
def run(self, result=None): self.sch = Scheduler() with Worker(scheduler=self.sch, worker_id='X', ping_interval=1, max_reschedules=0) as w: self.w = w # also save scheduler's worker struct self.sw = self.sch._state.get_worker(self.w._id) super(WorkerSchedulerCommunicationTest, self).run(result)
def run(self, result=None): """ Common setup code. Due to the contextmanager cant use normal setup """ self.sch = Scheduler(retry_delay=0.00000001, retry_count=2) with Worker(scheduler=self.sch, worker_id='X', keep_alive=True, wait_interval=0.1, wait_jitter=0) as w: self.w = w super(WorkerKeepAliveUpstreamTest, self).run(result)
def run(self, result=None): self.sch = Scheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) self.time = time.time with Worker(scheduler=self.sch, worker_id='X') as w, Worker(scheduler=self.sch, worker_id='Y') as w2: self.w = w self.w2 = w2 super(WorkerTest, self).run(result) if time.time != self.time: time.time = self.time
def run(self, result=None): super(WorkerEmailTest, self).setUp() sch = Scheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) with Worker(scheduler=sch, worker_id="foo") as self.worker: super(WorkerEmailTest, self).run(result)
def test_requires_exception(self): class A(DummyTask): def requires(self): raise Exception("doh") a = A() class D(DummyTask): pass d = D() class C(DummyTask): def requires(self): return d c = C() class B(DummyTask): def requires(self): return c, a b = B() sch = Scheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) with Worker(scheduler=sch, worker_id="foo") as w: self.assertFalse(w.add(b)) self.assertTrue(w.run()) self.assertFalse(b.has_run) self.assertTrue(c.has_run) self.assertTrue(d.has_run) self.assertFalse(a.has_run)
def test_complete_exception(self): "Tests that a task is still scheduled if its sister task crashes in the complete() method" class A(DummyTask): def complete(self): raise Exception("doh") a = A() class C(DummyTask): pass c = C() class B(DummyTask): def requires(self): return a, c b = B() sch = Scheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) with Worker(scheduler=sch, worker_id="foo") as w: self.assertFalse(w.add(b)) self.assertTrue(w.run()) self.assertFalse(b.has_run) self.assertTrue(c.has_run) self.assertFalse(a.has_run)
def test_interleaved_workers2(self): # two tasks without dependencies, one external, one not class B(DummyTask): pass class ExternalB(ExternalTask): task_family = "B" def complete(self): return False b = B() eb = ExternalB() self.assertEqual(str(eb), "B()") sch = Scheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) with Worker(scheduler=sch, worker_id='X') as w, Worker(scheduler=sch, worker_id='Y') as w2: self.assertTrue(w2.add(eb)) self.assertTrue(w.add(b)) self.assertTrue(w2.run()) self.assertFalse(b.complete()) self.assertTrue(w.run()) self.assertTrue(b.complete())
def __init__(self, scheduler=None, worker_id=None, worker_processes=1, assistant=False, **kwargs): if scheduler is None: scheduler = Scheduler() self.worker_processes = int(worker_processes) self._worker_info = self._generate_worker_info() if not worker_id: worker_id = 'Worker(%s)' % ', '.join( ['%s=%s' % (k, v) for k, v in self._worker_info]) self._config = worker(**kwargs) assert self._config.wait_interval >= _WAIT_INTERVAL_EPS, "[worker] wait_interval must be positive" assert self._config.wait_jitter >= 0.0, "[worker] wait_jitter must be equal or greater than zero" self._id = worker_id self._scheduler = scheduler self._assistant = assistant self._stop_requesting_work = False self.host = socket.gethostname() self._scheduled_tasks = {} self._suspended_tasks = {} self._batch_running_tasks = {} self._batch_families_sent = set() self._first_task = None self.add_succeeded = True self.run_succeeded = True self.unfulfilled_counts = collections.defaultdict(int) # note that ``signal.signal(signal.SIGUSR1, fn)`` only works inside the main execution thread, which is why we # provide the ability to conditionally install the hook. if not self._config.no_install_shutdown_handler: try: signal.signal(signal.SIGUSR1, self.handle_interrupt) signal.siginterrupt(signal.SIGUSR1, False) except AttributeError: pass # Keep info about what tasks are running (could be in other processes) if worker_processes == 1: self._task_result_queue = DequeQueue() else: self._task_result_queue = multiprocessing.Queue() self._running_tasks = {} # Stuff for execution_summary self._add_task_history = [] self._get_work_response_history = []
def run(self, result=None): """ Common setup code. Due to the contextmanager cant use normal setup """ self.sch = Scheduler(retry_delay=0.00000001, disable_failures=2) with Worker(scheduler=self.sch, worker_id='X', keep_alive=True, wait_interval=0.1, wait_jitter=0) as w: self.w = w super(WorkerKeepAliveUpstreamTest, self).run(result)
def run(api_port=8082, address=None, unix_socket=None, scheduler=None, responder=None): """ Runs one instance of the API server. """ if scheduler is None: scheduler = Scheduler() # load scheduler state scheduler.load() _init_api( scheduler=scheduler, responder=responder, api_port=api_port, address=address, unix_socket=unix_socket, ) # prune work DAG every 60 seconds pruner = tornado.ioloop.PeriodicCallback(scheduler.prune, 60000) pruner.start() def shutdown_handler(signum, frame): exit_handler() sys.exit(0) @atexit.register def exit_handler(): logger.info("Scheduler instance shutting down") scheduler.dump() stop() signal.signal(signal.SIGINT, shutdown_handler) signal.signal(signal.SIGTERM, shutdown_handler) if os.name == 'nt': signal.signal(signal.SIGBREAK, shutdown_handler) else: signal.signal(signal.SIGQUIT, shutdown_handler) logger.info("Scheduler starting up") tornado.ioloop.IOLoop.instance().start()
def test_send_event_on_task_disabled(self): s = Scheduler(metrics_collector=MetricsCollectors.datadog, disable_persist=10, retry_count=2, disable_window=2) task = self.startTask(scheduler=s) self.collector.handle_task_disabled(task, s._config) self.mock_create.assert_called_once_with( alert_type='error', priority='normal', tags=[ 'task_name:DDTaskName', 'task_state:DISABLED', 'environment:development', 'application:luigi' ], text='A task has been disabled in the pipeline named: DDTaskName. ' + 'The task has failed 2 times in the last 2 seconds' + ', so it is being disabled for 10 seconds.', title='Luigi: A task has been disabled!')
def test_interleaved_workers3(self): class A(DummyTask): def run(self): logging.debug('running A') time.sleep(0.1) super(A, self).run() a = A() class B(DummyTask): def requires(self): return a def run(self): logging.debug('running B') super(B, self).run() b = B() sch = Scheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) with Worker(scheduler=sch, worker_id='X', keep_alive=True, count_uniques=True) as w: with Worker(scheduler=sch, worker_id='Y', keep_alive=True, count_uniques=True, wait_interval=0.1) as w2: self.assertTrue(w.add(a)) self.assertTrue(w2.add(b)) threading.Thread(target=w.run).start() self.assertTrue(w2.run()) self.assertTrue(a.complete()) self.assertTrue(b.complete())
def test_die_for_non_unique_pending(self): class A(DummyTask): def run(self): logging.debug('running A') time.sleep(0.1) super(A, self).run() a = A() class B(DummyTask): def requires(self): return a def run(self): logging.debug('running B') super(B, self).run() b = B() sch = Scheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) with Worker(scheduler=sch, worker_id='X', keep_alive=True, count_uniques=True) as w: with Worker(scheduler=sch, worker_id='Y', keep_alive=True, count_uniques=True, wait_interval=0.1) as w2: self.assertTrue(w.add(b)) self.assertTrue(w2.add(b)) self.assertEqual(w._get_work()[0], a.task_id) self.assertTrue(w2.run()) self.assertFalse(a.complete()) self.assertFalse(b.complete())
def test_interleaved_workers(self): class A(DummyTask): pass a = A() class B(DummyTask): def requires(self): return a class ExternalB(ExternalTask): task_family = "B" def complete(self): return False b = B() eb = ExternalB() self.assertEqual(str(eb), "B()") sch = Scheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) with Worker(scheduler=sch, worker_id='X') as w, Worker(scheduler=sch, worker_id='Y') as w2: self.assertTrue(w.add(b)) self.assertTrue(w2.add(eb)) logging.debug("RUNNING BROKEN WORKER") self.assertTrue(w2.run()) self.assertFalse(a.complete()) self.assertFalse(b.complete()) logging.debug("RUNNING FUNCTIONAL WORKER") self.assertTrue(w.run()) self.assertTrue(a.complete()) self.assertTrue(b.complete())
class PerTaskRetryPolicyBehaviorTest(LuigiTestCase): def setUp(self): super(PerTaskRetryPolicyBehaviorTest, self).setUp() self.per_task_retry_count = 2 self.default_retry_count = 1 self.sch = Scheduler(retry_delay=0.1, retry_count=self.default_retry_count, prune_on_get_work=True) def test_with_all_disabled_with_single_worker(self): """ With this test, a case which has a task (TestWrapperTask), requires two another tasks (TestErrorTask1,TestErrorTask1) which both is failed, is tested. Task TestErrorTask1 has default retry_count which is 1, but Task TestErrorTask2 has retry_count at task level as 2. This test is running on single worker """ class TestErrorTask1(DummyErrorTask): pass e1 = TestErrorTask1() class TestErrorTask2(DummyErrorTask): retry_count = self.per_task_retry_count e2 = TestErrorTask2() class TestWrapperTask(luigi.WrapperTask): def requires(self): return [e2, e1] wt = TestWrapperTask() with Worker(scheduler=self.sch, worker_id='X', keep_alive=True, wait_interval=0.1) as w1: self.assertTrue(w1.add(wt)) self.assertFalse(w1.run()) self.assertEqual([wt.task_id], list(self.sch.task_list('PENDING', 'UPSTREAM_DISABLED').keys())) self.assertEqual(sorted([e1.task_id, e2.task_id]), sorted(self.sch.task_list('DISABLED', '').keys())) self.assertEqual(0, self.sch._state.get_task(wt.task_id).failures.num_failures()) self.assertEqual(self.per_task_retry_count, self.sch._state.get_task(e2.task_id).failures.num_failures()) self.assertEqual(self.default_retry_count, self.sch._state.get_task(e1.task_id).failures.num_failures()) def test_with_all_disabled_with_multiple_worker(self): """ With this test, a case which has a task (TestWrapperTask), requires two another tasks (TestErrorTask1,TestErrorTask1) which both is failed, is tested. Task TestErrorTask1 has default retry_count which is 1, but Task TestErrorTask2 has retry_count at task level as 2. This test is running on multiple worker """ class TestErrorTask1(DummyErrorTask): pass e1 = TestErrorTask1() class TestErrorTask2(DummyErrorTask): retry_count = self.per_task_retry_count e2 = TestErrorTask2() class TestWrapperTask(luigi.WrapperTask): def requires(self): return [e2, e1] wt = TestWrapperTask() with Worker(scheduler=self.sch, worker_id='X', keep_alive=True, wait_interval=0.1) as w1: with Worker(scheduler=self.sch, worker_id='Y', keep_alive=True, wait_interval=0.1) as w2: with Worker(scheduler=self.sch, worker_id='Z', keep_alive=True, wait_interval=0.1) as w3: self.assertTrue(w1.add(wt)) self.assertTrue(w2.add(e2)) self.assertTrue(w3.add(e1)) self.assertFalse(w3.run()) self.assertFalse(w2.run()) self.assertFalse(w1.run()) self.assertEqual([wt.task_id], list(self.sch.task_list('PENDING', 'UPSTREAM_DISABLED').keys())) self.assertEqual(sorted([e1.task_id, e2.task_id]), sorted(self.sch.task_list('DISABLED', '').keys())) self.assertEqual(0, self.sch._state.get_task(wt.task_id).failures.num_failures()) self.assertEqual(self.per_task_retry_count, self.sch._state.get_task(e2.task_id).failures.num_failures()) self.assertEqual(self.default_retry_count, self.sch._state.get_task(e1.task_id).failures.num_failures()) def test_with_includes_success_with_single_worker(self): """ With this test, a case which has a task (TestWrapperTask), requires one (TestErrorTask1) FAILED and one (TestSuccessTask1) SUCCESS, is tested. Task TestSuccessTask1 will be DONE successfully, but Task TestErrorTask1 will be failed and it has retry_count at task level as 2. This test is running on single worker """ class TestSuccessTask1(DummyTask): pass s1 = TestSuccessTask1() class TestErrorTask1(DummyErrorTask): retry_count = self.per_task_retry_count e1 = TestErrorTask1() class TestWrapperTask(luigi.WrapperTask): def requires(self): return [e1, s1] wt = TestWrapperTask() with Worker(scheduler=self.sch, worker_id='X', keep_alive=True, wait_interval=0.1) as w1: self.assertTrue(w1.add(wt)) self.assertFalse(w1.run()) self.assertEqual([wt.task_id], list(self.sch.task_list('PENDING', 'UPSTREAM_DISABLED').keys())) self.assertEqual([e1.task_id], list(self.sch.task_list('DISABLED', '').keys())) self.assertEqual([s1.task_id], list(self.sch.task_list('DONE', '').keys())) self.assertEqual(0, self.sch._state.get_task(wt.task_id).failures.num_failures()) self.assertEqual(self.per_task_retry_count, self.sch._state.get_task(e1.task_id).failures.num_failures()) self.assertEqual(0, self.sch._state.get_task(s1.task_id).failures.num_failures()) def test_with_includes_success_with_multiple_worker(self): """ With this test, a case which has a task (TestWrapperTask), requires one (TestErrorTask1) FAILED and one (TestSuccessTask1) SUCCESS, is tested. Task TestSuccessTask1 will be DONE successfully, but Task TestErrorTask1 will be failed and it has retry_count at task level as 2. This test is running on multiple worker """ class TestSuccessTask1(DummyTask): pass s1 = TestSuccessTask1() class TestErrorTask1(DummyErrorTask): retry_count = self.per_task_retry_count e1 = TestErrorTask1() class TestWrapperTask(luigi.WrapperTask): def requires(self): return [e1, s1] wt = TestWrapperTask() with Worker(scheduler=self.sch, worker_id='X', keep_alive=True, wait_interval=0.1) as w1: with Worker(scheduler=self.sch, worker_id='Y', keep_alive=True, wait_interval=0.1) as w2: with Worker(scheduler=self.sch, worker_id='Z', keep_alive=True, wait_interval=0.1) as w3: self.assertTrue(w1.add(wt)) self.assertTrue(w2.add(e1)) self.assertTrue(w3.add(s1)) self.assertTrue(w3.run()) self.assertFalse(w2.run()) self.assertFalse(w1.run()) self.assertEqual([wt.task_id], list(self.sch.task_list('PENDING', 'UPSTREAM_DISABLED').keys())) self.assertEqual([e1.task_id], list(self.sch.task_list('DISABLED', '').keys())) self.assertEqual([s1.task_id], list(self.sch.task_list('DONE', '').keys())) self.assertEqual(0, self.sch._state.get_task(wt.task_id).failures.num_failures()) self.assertEqual(self.per_task_retry_count, self.sch._state.get_task(e1.task_id).failures.num_failures()) self.assertEqual(0, self.sch._state.get_task(s1.task_id).failures.num_failures()) def test_with_dynamic_dependencies_with_single_worker(self): """ With this test, a case includes dependency tasks(TestErrorTask1,TestErrorTask2) which both are failed. Task TestErrorTask1 has default retry_count which is 1, but Task TestErrorTask2 has retry_count at task level as 2. This test is running on single worker """ class TestErrorTask1(DummyErrorTask): pass e1 = TestErrorTask1() class TestErrorTask2(DummyErrorTask): retry_count = self.per_task_retry_count e2 = TestErrorTask2() class TestSuccessTask1(DummyTask): pass s1 = TestSuccessTask1() class TestWrapperTask(DummyTask): def requires(self): return [s1] def run(self): super(TestWrapperTask, self).run() yield e2, e1 wt = TestWrapperTask() with Worker(scheduler=self.sch, worker_id='X', keep_alive=True, wait_interval=0.1) as w1: self.assertTrue(w1.add(wt)) self.assertFalse(w1.run()) self.assertEqual([wt.task_id], list(self.sch.task_list('PENDING', 'UPSTREAM_DISABLED').keys())) self.assertEqual(sorted([e1.task_id, e2.task_id]), sorted(self.sch.task_list('DISABLED', '').keys())) self.assertEqual(0, self.sch._state.get_task(wt.task_id).failures.num_failures()) self.assertEqual(0, self.sch._state.get_task(s1.task_id).failures.num_failures()) self.assertEqual(self.per_task_retry_count, self.sch._state.get_task(e2.task_id).failures.num_failures()) self.assertEqual(self.default_retry_count, self.sch._state.get_task(e1.task_id).failures.num_failures()) def test_with_dynamic_dependencies_with_multiple_workers(self): """ With this test, a case includes dependency tasks(TestErrorTask1,TestErrorTask2) which both are failed. Task TestErrorTask1 has default retry_count which is 1, but Task TestErrorTask2 has retry_count at task level as 2. This test is running on multiple worker """ class TestErrorTask1(DummyErrorTask): pass e1 = TestErrorTask1() class TestErrorTask2(DummyErrorTask): retry_count = self.per_task_retry_count e2 = TestErrorTask2() class TestSuccessTask1(DummyTask): pass s1 = TestSuccessTask1() class TestWrapperTask(DummyTask): def requires(self): return [s1] def run(self): super(TestWrapperTask, self).run() yield e2, e1 wt = TestWrapperTask() with Worker(scheduler=self.sch, worker_id='X', keep_alive=True, wait_interval=0.1) as w1: with Worker(scheduler=self.sch, worker_id='Y', keep_alive=True, wait_interval=0.1) as w2: self.assertTrue(w1.add(wt)) self.assertTrue(w2.add(s1)) self.assertTrue(w2.run()) self.assertFalse(w1.run()) self.assertEqual([wt.task_id], list(self.sch.task_list('PENDING', 'UPSTREAM_DISABLED').keys())) self.assertEqual(sorted([e1.task_id, e2.task_id]), sorted(self.sch.task_list('DISABLED', '').keys())) self.assertEqual(0, self.sch._state.get_task(wt.task_id).failures.num_failures()) self.assertEqual(0, self.sch._state.get_task(s1.task_id).failures.num_failures()) self.assertEqual(self.per_task_retry_count, self.sch._state.get_task(e2.task_id).failures.num_failures()) self.assertEqual(self.default_retry_count, self.sch._state.get_task(e1.task_id).failures.num_failures())
class WorkerSchedulerCommunicationTest(LuigiTestCase): """ Tests related to communication between Worker and Scheduler that is based on the ping polling. See https://github.com/spotify/luigi/pull/1993 """ def run(self, result=None): self.sch = Scheduler() with Worker(scheduler=self.sch, worker_id='X', ping_interval=1, max_reschedules=0) as w: self.w = w # also save scheduler's worker struct self.sw = self.sch._state.get_worker(self.w._id) super(WorkerSchedulerCommunicationTest, self).run(result) def wrapper_task(test_self): tmp = tempfile.mkdtemp() class MyTask(luigi.Task): n = luigi.IntParameter() delay = 3 def output(self): basename = "%s_%s.txt" % (self.__class__.__name__, self.n) return luigi.LocalTarget(os.path.join(tmp, basename)) def run(self): time.sleep(self.delay) with self.output().open('w') as f: f.write("content\n") class Wrapper(MyTask): delay = 0 def requires(self): return [MyTask(n=n) for n in range(self.n)] return Wrapper, tmp def test_message_handling(self): # add some messages for that worker for i in range(10): self.sw.add_rpc_message('foo', i=i) self.assertEqual(10, len(self.sw.rpc_messages)) self.assertEqual(9, self.sw.rpc_messages[-1]['kwargs']['i']) # fetch msgs = self.sw.fetch_rpc_messages() self.assertEqual(0, len(self.sw.rpc_messages)) self.assertEqual(9, msgs[-1]['kwargs']['i']) def test_ping_content(self): # add some messages for that worker for i in range(10): self.sw.add_rpc_message('foo', i=i) # ping the scheduler and check the result res = self.sch.ping(worker=self.w._id) self.assertIn('rpc_messages', res) msgs = res['rpc_messages'] self.assertEqual(10, len(msgs)) self.assertEqual('foo', msgs[-1]['name']) self.assertEqual(9, msgs[-1]['kwargs']['i']) # there should be no message left self.assertEqual(0, len(self.sw.rpc_messages)) @contextlib.contextmanager def run_wrapper(self, n): # assign the wrapper task to the worker Wrapper, tmp = self.wrapper_task() wrapper = Wrapper(n=n) self.assertTrue(self.w.add(wrapper)) # check the initial number of worker processes self.assertEqual(1, self.w.worker_processes) # run the task in a thread and while running, increase the number of worker processes # via an rpc message t = threading.Thread(target=self.w.run) t.start() # yield yield wrapper, t # finally, check that thread is done self.assertFalse(t.is_alive()) # cleanup the tmp dir shutil.rmtree(tmp) def test_dispatch_valid_message(self): with self.run_wrapper(3) as (wrapper, t): # each of the wrapper task's tasks runs 3 seconds, and the ping/message dispatch # interval is 1 second, so it should be safe to wait 1 second here, add the message # which is then fetched by the keep alive thread and dispatched, so after additional 3 # seconds, the worker will have a changed number of processes t.join(1) self.sch.set_worker_processes(self.w._id, 2) t.join(3) self.assertEqual(2, self.w.worker_processes) # after additional 3 seconds, the wrapper task + all required tasks should be completed t.join(3) self.assertTrue(all(task.complete() for task in wrapper.requires())) self.assertTrue(wrapper.complete()) def test_dispatch_invalid_message(self): # this test is identical to test_dispatch_valid_message, except that the number of processes # is not increased during running as we send an invalid rpc message # in addition, the wrapper will only have two requirements with self.run_wrapper(2) as (wrapper, t): # timing info as above t.join(1) self.sw.add_rpc_message('set_worker_processes_not_there', n=2) t.join(3) self.assertEqual(1, self.w.worker_processes) # after additional 3 seconds, the wrapper task and all required tasks should be completed t.join(3) self.assertTrue(all(task.complete() for task in wrapper.requires())) self.assertTrue(wrapper.complete()) def test_dispatch_unregistered_message(self): # this test is identical to test_dispatch_valid_message, except that the number of processes # is not increased during running as we disable the particular callback to work as a # callback, so we want to achieve sth like # self.w.set_worker_processes.is_rpc_message_callback = False # but this is not possible in py 2 due to wrapped method lookup, see # http://stackoverflow.com/questions/9523370/adding-attributes-to-instance-methods-in-python set_worker_processes_orig = self.w.set_worker_processes def set_worker_processes_replacement(*args, **kwargs): return set_worker_processes_orig(*args, **kwargs) self.w.set_worker_processes = set_worker_processes_replacement self.assertFalse(getattr(self.w.set_worker_processes, "is_rpc_message_callback", False)) with self.run_wrapper(2) as (wrapper, t): # timing info as above t.join(1) self.sw.add_rpc_message('set_worker_processes', n=2) t.join(3) self.assertEqual(1, self.w.worker_processes) # after additional 3 seconds, the wrapper task and all required tasks should be completed t.join(3) self.assertTrue(all(task.complete() for task in wrapper.requires())) self.assertTrue(wrapper.complete())
class WorkerTest(unittest.TestCase): def run(self, result=None): self.sch = Scheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) self.time = time.time with Worker(scheduler=self.sch, worker_id='X') as w, Worker(scheduler=self.sch, worker_id='Y') as w2: self.w = w self.w2 = w2 super(WorkerTest, self).run(result) if time.time != self.time: time.time = self.time def setTime(self, t): time.time = lambda: t def test_dep(self): class A(Task): def run(self): self.has_run = True def complete(self): return self.has_run a = A() class B(Task): def requires(self): return a def run(self): self.has_run = True def complete(self): return self.has_run b = B() a.has_run = False b.has_run = False self.assertTrue(self.w.add(b)) self.assertTrue(self.w.run()) self.assertTrue(a.has_run) self.assertTrue(b.has_run) def test_stop_getting_new_work(self): d = DummyTask() self.w.add(d) self.assertFalse(d.complete()) try: self.w.handle_interrupt(signal.SIGUSR1, None) except AttributeError: raise unittest.SkipTest('signal.SIGUSR1 not found on this system') self.w.run() self.assertFalse(d.complete()) def test_disabled_shutdown_hook(self): w = Worker(scheduler=self.sch, keep_alive=True, no_install_shutdown_handler=True) with w: try: # try to kill the worker! os.kill(os.getpid(), signal.SIGUSR1) except AttributeError: raise unittest.SkipTest('signal.SIGUSR1 not found on this system') # try to kill the worker... AGAIN! t = SuicidalWorker(signal.SIGUSR1) w.add(t) w.run() # task should have stepped away from the ledge, and completed successfully despite all the SIGUSR1 signals self.assertEqual(list(self.sch.task_list('DONE', '').keys()), [t.task_id]) @with_config({"worker": {"no_install_shutdown_handler": "True"}}) def test_can_run_luigi_in_thread(self): class A(DummyTask): pass task = A() # Note that ``signal.signal(signal.SIGUSR1, fn)`` can only be called in the main thread. # So if we do not disable the shutdown handler, this would fail. t = threading.Thread(target=lambda: luigi.build([task], local_scheduler=True)) t.start() t.join() self.assertTrue(task.complete()) def test_external_dep(self): class A(ExternalTask): def complete(self): return False a = A() class B(Task): def requires(self): return a def run(self): self.has_run = True def complete(self): return self.has_run b = B() a.has_run = False b.has_run = False self.assertTrue(self.w.add(b)) self.assertTrue(self.w.run()) self.assertFalse(a.has_run) self.assertFalse(b.has_run) def test_externalized_dep(self): class A(Task): has_run = False def run(self): self.has_run = True def complete(self): return self.has_run a = A() class B(A): def requires(self): return luigi.task.externalize(a) b = B() self.assertTrue(self.w.add(b)) self.assertTrue(self.w.run()) self.assertFalse(a.has_run) self.assertFalse(b.has_run) def test_legacy_externalized_dep(self): class A(Task): has_run = False def run(self): self.has_run = True def complete(self): return self.has_run a = A() a.run = NotImplemented class B(A): def requires(self): return a b = B() self.assertTrue(self.w.add(b)) self.assertTrue(self.w.run()) self.assertFalse(a.has_run) self.assertFalse(b.has_run) def test_type_error_in_tracking_run_deprecated(self): class A(Task): num_runs = 0 def complete(self): return False def run(self, tracking_url_callback=None): self.num_runs += 1 raise TypeError('bad type') a = A() self.assertTrue(self.w.add(a)) self.assertFalse(self.w.run()) # Should only run and fail once, not retry because of the type error self.assertEqual(1, a.num_runs) def test_tracking_url(self): tracking_url = 'http://test_url.com/' class A(Task): has_run = False def complete(self): return self.has_run def run(self): self.set_tracking_url(tracking_url) self.has_run = True a = A() self.assertTrue(self.w.add(a)) self.assertTrue(self.w.run()) tasks = self.sch.task_list('DONE', '') self.assertEqual(1, len(tasks)) self.assertEqual(tracking_url, tasks[a.task_id]['tracking_url']) def test_fail(self): class CustomException(BaseException): def __init__(self, msg): self.msg = msg class A(Task): def run(self): self.has_run = True raise CustomException('bad things') def complete(self): return self.has_run a = A() class B(Task): def requires(self): return a def run(self): self.has_run = True def complete(self): return self.has_run b = B() a.has_run = False b.has_run = False self.assertTrue(self.w.add(b)) self.assertFalse(self.w.run()) self.assertTrue(a.has_run) self.assertFalse(b.has_run) def test_unknown_dep(self): # see related test_remove_dep test (grep for it) class A(ExternalTask): def complete(self): return False class C(Task): def complete(self): return True def get_b(dep): class B(Task): def requires(self): return dep def run(self): self.has_run = True def complete(self): return False b = B() b.has_run = False return b b_a = get_b(A()) b_c = get_b(C()) self.assertTrue(self.w.add(b_a)) # So now another worker goes in and schedules C -> B # This should remove the dep A -> B but will screw up the first worker self.assertTrue(self.w2.add(b_c)) self.assertFalse(self.w.run()) # should not run anything - the worker should detect that A is broken self.assertFalse(b_a.has_run) # not sure what should happen?? # self.w2.run() # should run B since C is fulfilled # self.assertTrue(b_c.has_run) def test_unfulfilled_dep(self): class A(Task): def complete(self): return self.done def run(self): self.done = True def get_b(a): class B(A): def requires(self): return a b = B() b.done = False a.done = True return b a = A() b = get_b(a) self.assertTrue(self.w.add(b)) a.done = False self.w.run() self.assertTrue(a.complete()) self.assertTrue(b.complete()) def test_gets_missed_work(self): class A(Task): done = False def complete(self): return self.done def run(self): self.done = True a = A() self.assertTrue(self.w.add(a)) # simulate a missed get_work response self.assertEqual(a.task_id, self.sch.get_work(worker='X')['task_id']) self.assertTrue(self.w.run()) self.assertTrue(a.complete()) def test_avoid_infinite_reschedule(self): class A(Task): def complete(self): return False class B(Task): def complete(self): return False def requires(self): return A() self.assertTrue(self.w.add(B())) self.assertFalse(self.w.run()) def test_fails_registering_signal(self): with mock.patch('luigi.worker.signal', spec=['signal']): # mock will raise an attribute error getting signal.SIGUSR1 Worker() def test_allow_reschedule_with_many_missing_deps(self): class A(Task): """ Task that must run twice to succeed """ i = luigi.IntParameter() runs = 0 def complete(self): return self.runs >= 2 def run(self): self.runs += 1 class B(Task): done = False def requires(self): return map(A, range(20)) def complete(self): return self.done def run(self): self.done = True b = B() w = Worker(scheduler=self.sch, worker_id='X', max_reschedules=1) self.assertTrue(w.add(b)) self.assertFalse(w.run()) # For b to be done, we must have rescheduled its dependencies to run them twice self.assertTrue(b.complete()) self.assertTrue(all(a.complete() for a in b.deps())) def test_interleaved_workers(self): class A(DummyTask): pass a = A() class B(DummyTask): def requires(self): return a class ExternalB(ExternalTask): task_family = "B" def complete(self): return False b = B() eb = ExternalB() self.assertEqual(str(eb), "B()") sch = Scheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) with Worker(scheduler=sch, worker_id='X') as w, Worker(scheduler=sch, worker_id='Y') as w2: self.assertTrue(w.add(b)) self.assertTrue(w2.add(eb)) logging.debug("RUNNING BROKEN WORKER") self.assertTrue(w2.run()) self.assertFalse(a.complete()) self.assertFalse(b.complete()) logging.debug("RUNNING FUNCTIONAL WORKER") self.assertTrue(w.run()) self.assertTrue(a.complete()) self.assertTrue(b.complete()) def test_interleaved_workers2(self): # two tasks without dependencies, one external, one not class B(DummyTask): pass class ExternalB(ExternalTask): task_family = "B" def complete(self): return False b = B() eb = ExternalB() self.assertEqual(str(eb), "B()") sch = Scheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) with Worker(scheduler=sch, worker_id='X') as w, Worker(scheduler=sch, worker_id='Y') as w2: self.assertTrue(w2.add(eb)) self.assertTrue(w.add(b)) self.assertTrue(w2.run()) self.assertFalse(b.complete()) self.assertTrue(w.run()) self.assertTrue(b.complete()) def test_interleaved_workers3(self): class A(DummyTask): def run(self): logging.debug('running A') time.sleep(0.1) super(A, self).run() a = A() class B(DummyTask): def requires(self): return a def run(self): logging.debug('running B') super(B, self).run() b = B() sch = Scheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) with Worker(scheduler=sch, worker_id='X', keep_alive=True, count_uniques=True) as w: with Worker(scheduler=sch, worker_id='Y', keep_alive=True, count_uniques=True, wait_interval=0.1) as w2: self.assertTrue(w.add(a)) self.assertTrue(w2.add(b)) threading.Thread(target=w.run).start() self.assertTrue(w2.run()) self.assertTrue(a.complete()) self.assertTrue(b.complete()) def test_die_for_non_unique_pending(self): class A(DummyTask): def run(self): logging.debug('running A') time.sleep(0.1) super(A, self).run() a = A() class B(DummyTask): def requires(self): return a def run(self): logging.debug('running B') super(B, self).run() b = B() sch = Scheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) with Worker(scheduler=sch, worker_id='X', keep_alive=True, count_uniques=True) as w: with Worker(scheduler=sch, worker_id='Y', keep_alive=True, count_uniques=True, wait_interval=0.1) as w2: self.assertTrue(w.add(b)) self.assertTrue(w2.add(b)) self.assertEqual(w._get_work()[0], a.task_id) self.assertTrue(w2.run()) self.assertFalse(a.complete()) self.assertFalse(b.complete()) def test_complete_exception(self): "Tests that a task is still scheduled if its sister task crashes in the complete() method" class A(DummyTask): def complete(self): raise Exception("doh") a = A() class C(DummyTask): pass c = C() class B(DummyTask): def requires(self): return a, c b = B() sch = Scheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) with Worker(scheduler=sch, worker_id="foo") as w: self.assertFalse(w.add(b)) self.assertTrue(w.run()) self.assertFalse(b.has_run) self.assertTrue(c.has_run) self.assertFalse(a.has_run) def test_requires_exception(self): class A(DummyTask): def requires(self): raise Exception("doh") a = A() class D(DummyTask): pass d = D() class C(DummyTask): def requires(self): return d c = C() class B(DummyTask): def requires(self): return c, a b = B() sch = Scheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) with Worker(scheduler=sch, worker_id="foo") as w: self.assertFalse(w.add(b)) self.assertTrue(w.run()) self.assertFalse(b.has_run) self.assertTrue(c.has_run) self.assertTrue(d.has_run) self.assertFalse(a.has_run)
class WorkerTest(unittest.TestCase): def run(self, result=None): self.sch = Scheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) self.time = time.time with Worker(scheduler=self.sch, worker_id='X') as w, Worker(scheduler=self.sch, worker_id='Y') as w2: self.w = w self.w2 = w2 super(WorkerTest, self).run(result) if time.time != self.time: time.time = self.time def setTime(self, t): time.time = lambda: t def test_dep(self): class A(Task): def run(self): self.has_run = True def complete(self): return self.has_run a = A() class B(Task): def requires(self): return a def run(self): self.has_run = True def complete(self): return self.has_run b = B() a.has_run = False b.has_run = False self.assertTrue(self.w.add(b)) self.assertTrue(self.w.run()) self.assertTrue(a.has_run) self.assertTrue(b.has_run) def test_stop_getting_new_work(self): d = DummyTask() self.w.add(d) self.assertFalse(d.complete()) try: self.w.handle_interrupt(signal.SIGUSR1, None) except AttributeError: raise unittest.SkipTest('signal.SIGUSR1 not found on this system') self.w.run() self.assertFalse(d.complete()) def test_disabled_shutdown_hook(self): w = Worker(scheduler=self.sch, keep_alive=True, no_install_shutdown_handler=True) with w: try: # try to kill the worker! os.kill(os.getpid(), signal.SIGUSR1) except AttributeError: raise unittest.SkipTest( 'signal.SIGUSR1 not found on this system') # try to kill the worker... AGAIN! t = SuicidalWorker(signal.SIGUSR1) w.add(t) w.run() # task should have stepped away from the ledge, and completed successfully despite all the SIGUSR1 signals self.assertEqual(list(self.sch.task_list('DONE', '').keys()), [t.task_id]) @with_config({"worker": {"no_install_shutdown_handler": "True"}}) def test_can_run_luigi_in_thread(self): class A(DummyTask): pass task = A() # Note that ``signal.signal(signal.SIGUSR1, fn)`` can only be called in the main thread. # So if we do not disable the shutdown handler, this would fail. t = threading.Thread( target=lambda: luigi.build([task], local_scheduler=True)) t.start() t.join() self.assertTrue(task.complete()) def test_external_dep(self): class A(ExternalTask): def complete(self): return False a = A() class B(Task): def requires(self): return a def run(self): self.has_run = True def complete(self): return self.has_run b = B() a.has_run = False b.has_run = False self.assertTrue(self.w.add(b)) self.assertTrue(self.w.run()) self.assertFalse(a.has_run) self.assertFalse(b.has_run) def test_externalized_dep(self): class A(Task): has_run = False def run(self): self.has_run = True def complete(self): return self.has_run a = A() class B(A): def requires(self): return luigi.task.externalize(a) b = B() self.assertTrue(self.w.add(b)) self.assertTrue(self.w.run()) self.assertFalse(a.has_run) self.assertFalse(b.has_run) def test_legacy_externalized_dep(self): class A(Task): has_run = False def run(self): self.has_run = True def complete(self): return self.has_run a = A() a.run = NotImplemented class B(A): def requires(self): return a b = B() self.assertTrue(self.w.add(b)) self.assertTrue(self.w.run()) self.assertFalse(a.has_run) self.assertFalse(b.has_run) def test_type_error_in_tracking_run_deprecated(self): class A(Task): num_runs = 0 def complete(self): return False def run(self, tracking_url_callback=None): self.num_runs += 1 raise TypeError('bad type') a = A() self.assertTrue(self.w.add(a)) self.assertFalse(self.w.run()) # Should only run and fail once, not retry because of the type error self.assertEqual(1, a.num_runs) def test_tracking_url(self): tracking_url = 'http://test_url.com/' class A(Task): has_run = False def complete(self): return self.has_run def run(self): self.set_tracking_url(tracking_url) self.has_run = True a = A() self.assertTrue(self.w.add(a)) self.assertTrue(self.w.run()) tasks = self.sch.task_list('DONE', '') self.assertEqual(1, len(tasks)) self.assertEqual(tracking_url, tasks[a.task_id]['tracking_url']) def test_fail(self): class CustomException(BaseException): def __init__(self, msg): self.msg = msg class A(Task): def run(self): self.has_run = True raise CustomException('bad things') def complete(self): return self.has_run a = A() class B(Task): def requires(self): return a def run(self): self.has_run = True def complete(self): return self.has_run b = B() a.has_run = False b.has_run = False self.assertTrue(self.w.add(b)) self.assertFalse(self.w.run()) self.assertTrue(a.has_run) self.assertFalse(b.has_run) def test_unknown_dep(self): # see related test_remove_dep test (grep for it) class A(ExternalTask): def complete(self): return False class C(Task): def complete(self): return True def get_b(dep): class B(Task): def requires(self): return dep def run(self): self.has_run = True def complete(self): return False b = B() b.has_run = False return b b_a = get_b(A()) b_c = get_b(C()) self.assertTrue(self.w.add(b_a)) # So now another worker goes in and schedules C -> B # This should remove the dep A -> B but will screw up the first worker self.assertTrue(self.w2.add(b_c)) self.assertFalse( self.w.run() ) # should not run anything - the worker should detect that A is broken self.assertFalse(b_a.has_run) # not sure what should happen?? # self.w2.run() # should run B since C is fulfilled # self.assertTrue(b_c.has_run) def test_unfulfilled_dep(self): class A(Task): def complete(self): return self.done def run(self): self.done = True def get_b(a): class B(A): def requires(self): return a b = B() b.done = False a.done = True return b a = A() b = get_b(a) self.assertTrue(self.w.add(b)) a.done = False self.w.run() self.assertTrue(a.complete()) self.assertTrue(b.complete()) def test_gets_missed_work(self): class A(Task): done = False def complete(self): return self.done def run(self): self.done = True a = A() self.assertTrue(self.w.add(a)) # simulate a missed get_work response self.assertEqual(a.task_id, self.sch.get_work(worker='X')['task_id']) self.assertTrue(self.w.run()) self.assertTrue(a.complete()) def test_avoid_infinite_reschedule(self): class A(Task): def complete(self): return False class B(Task): def complete(self): return False def requires(self): return A() self.assertTrue(self.w.add(B())) self.assertFalse(self.w.run()) def test_fails_registering_signal(self): with mock.patch('luigi.worker.signal', spec=['signal']): # mock will raise an attribute error getting signal.SIGUSR1 Worker() def test_allow_reschedule_with_many_missing_deps(self): class A(Task): """ Task that must run twice to succeed """ i = luigi.IntParameter() runs = 0 def complete(self): return self.runs >= 2 def run(self): self.runs += 1 class B(Task): done = False def requires(self): return map(A, range(20)) def complete(self): return self.done def run(self): self.done = True b = B() w = Worker(scheduler=self.sch, worker_id='X', max_reschedules=1) self.assertTrue(w.add(b)) self.assertFalse(w.run()) # For b to be done, we must have rescheduled its dependencies to run them twice self.assertTrue(b.complete()) self.assertTrue(all(a.complete() for a in b.deps())) def test_interleaved_workers(self): class A(DummyTask): pass a = A() class B(DummyTask): def requires(self): return a class ExternalB(ExternalTask): task_family = "B" def complete(self): return False b = B() eb = ExternalB() self.assertEqual(str(eb), "B()") sch = Scheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) with Worker(scheduler=sch, worker_id='X') as w, Worker(scheduler=sch, worker_id='Y') as w2: self.assertTrue(w.add(b)) self.assertTrue(w2.add(eb)) logging.debug("RUNNING BROKEN WORKER") self.assertTrue(w2.run()) self.assertFalse(a.complete()) self.assertFalse(b.complete()) logging.debug("RUNNING FUNCTIONAL WORKER") self.assertTrue(w.run()) self.assertTrue(a.complete()) self.assertTrue(b.complete()) def test_interleaved_workers2(self): # two tasks without dependencies, one external, one not class B(DummyTask): pass class ExternalB(ExternalTask): task_family = "B" def complete(self): return False b = B() eb = ExternalB() self.assertEqual(str(eb), "B()") sch = Scheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) with Worker(scheduler=sch, worker_id='X') as w, Worker(scheduler=sch, worker_id='Y') as w2: self.assertTrue(w2.add(eb)) self.assertTrue(w.add(b)) self.assertTrue(w2.run()) self.assertFalse(b.complete()) self.assertTrue(w.run()) self.assertTrue(b.complete()) def test_interleaved_workers3(self): class A(DummyTask): def run(self): logging.debug('running A') time.sleep(0.1) super(A, self).run() a = A() class B(DummyTask): def requires(self): return a def run(self): logging.debug('running B') super(B, self).run() b = B() sch = Scheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) with Worker(scheduler=sch, worker_id='X', keep_alive=True, count_uniques=True) as w: with Worker(scheduler=sch, worker_id='Y', keep_alive=True, count_uniques=True, wait_interval=0.1) as w2: self.assertTrue(w.add(a)) self.assertTrue(w2.add(b)) threading.Thread(target=w.run).start() self.assertTrue(w2.run()) self.assertTrue(a.complete()) self.assertTrue(b.complete()) def test_die_for_non_unique_pending(self): class A(DummyTask): def run(self): logging.debug('running A') time.sleep(0.1) super(A, self).run() a = A() class B(DummyTask): def requires(self): return a def run(self): logging.debug('running B') super(B, self).run() b = B() sch = Scheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) with Worker(scheduler=sch, worker_id='X', keep_alive=True, count_uniques=True) as w: with Worker(scheduler=sch, worker_id='Y', keep_alive=True, count_uniques=True, wait_interval=0.1) as w2: self.assertTrue(w.add(b)) self.assertTrue(w2.add(b)) self.assertEqual(w._get_work()[0], a.task_id) self.assertTrue(w2.run()) self.assertFalse(a.complete()) self.assertFalse(b.complete()) def test_complete_exception(self): "Tests that a task is still scheduled if its sister task crashes in the complete() method" class A(DummyTask): def complete(self): raise Exception("doh") a = A() class C(DummyTask): pass c = C() class B(DummyTask): def requires(self): return a, c b = B() sch = Scheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) with Worker(scheduler=sch, worker_id="foo") as w: self.assertFalse(w.add(b)) self.assertTrue(w.run()) self.assertFalse(b.has_run) self.assertTrue(c.has_run) self.assertFalse(a.has_run) def test_requires_exception(self): class A(DummyTask): def requires(self): raise Exception("doh") a = A() class D(DummyTask): pass d = D() class C(DummyTask): def requires(self): return d c = C() class B(DummyTask): def requires(self): return c, a b = B() sch = Scheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) with Worker(scheduler=sch, worker_id="foo") as w: self.assertFalse(w.add(b)) self.assertTrue(w.run()) self.assertFalse(b.has_run) self.assertTrue(c.has_run) self.assertTrue(d.has_run) self.assertFalse(a.has_run)
class WorkerKeepAliveUpstreamTest(LuigiTestCase): """ Tests related to how the worker stays alive after upstream status changes. See https://github.com/spotify/luigi/pull/1789 """ def run(self, result=None): """ Common setup code. Due to the contextmanager cant use normal setup """ self.sch = Scheduler(retry_delay=0.00000001, disable_failures=2) with Worker(scheduler=self.sch, worker_id='X', keep_alive=True, wait_interval=0.1, wait_jitter=0) as w: self.w = w super(WorkerKeepAliveUpstreamTest, self).run(result) def test_alive_while_has_failure(self): """ One dependency disables and one fails """ class Disabler(luigi.Task): pass class Failer(luigi.Task): did_run = False def run(self): self.did_run = True class Wrapper(luigi.WrapperTask): def requires(self): return (Disabler(), Failer()) self.w.add(Wrapper()) disabler = Disabler().task_id failer = Failer().task_id self.sch.add_task(disabler, 'FAILED', worker='X') self.sch.prune() # Make scheduler unfail the disabled task self.sch.add_task(disabler, 'FAILED', worker='X') # Disable it self.sch.add_task(failer, 'FAILED', worker='X') # Fail it try: t = threading.Thread(target=self.w.run) t.start() t.join(timeout=1) # Wait 1 second self.assertTrue(t.is_alive()) # It shouldn't stop trying, the failed task should be retried! self.assertFalse(Failer.did_run) # It should never have run, the cooldown is longer than a second. finally: self.sch.prune() # Make it, like die. Couldn't find a more forceful way to do this. t.join(timeout=1) # Wait 1 second assert not t.is_alive() def test_alive_while_has_success(self): """ One dependency disables and one succeeds """ # TODO: Fix copy paste mess class Disabler(luigi.Task): pass class Succeeder(luigi.Task): did_run = False def run(self): self.did_run = True class Wrapper(luigi.WrapperTask): def requires(self): return (Disabler(), Succeeder()) self.w.add(Wrapper()) disabler = Disabler().task_id succeeder = Succeeder().task_id self.sch.add_task(disabler, 'FAILED', worker='X') self.sch.prune() # Make scheduler unfail the disabled task self.sch.add_task(disabler, 'FAILED', worker='X') # Disable it self.sch.add_task(succeeder, 'DONE', worker='X') # Fail it try: t = threading.Thread(target=self.w.run) t.start() t.join(timeout=1) # Wait 1 second self.assertFalse(t.is_alive()) # The worker should think that it should stop ... # ... because in this case the only work remaining depends on DISABLED tasks, # hence it's not worth considering the wrapper task as a PENDING task to # keep the worker alive anymore. self.assertFalse(Succeeder.did_run) # It should never have run, it suceeded already finally: self.sch.prune() # This shouldnt be necessary in this version, but whatevs t.join(timeout=1) # Wait 1 second assert not t.is_alive()
def get_app(self): conf = self.get_scheduler_config() sch = Scheduler(**conf) return luigi.server.app(sch)
class WorkerKeepAliveUpstreamTest(LuigiTestCase): """ Tests related to how the worker stays alive after upstream status changes. See https://github.com/spotify/luigi/pull/1789 """ def run(self, result=None): """ Common setup code. Due to the contextmanager cant use normal setup """ self.sch = Scheduler(retry_delay=0.00000001, retry_count=2) with Worker(scheduler=self.sch, worker_id='X', keep_alive=True, wait_interval=0.1, wait_jitter=0) as w: self.w = w super(WorkerKeepAliveUpstreamTest, self).run(result) def test_alive_while_has_failure(self): """ One dependency disables and one fails """ class Disabler(luigi.Task): pass class Failer(luigi.Task): did_run = False def run(self): self.did_run = True class Wrapper(luigi.WrapperTask): def requires(self): return (Disabler(), Failer()) self.w.add(Wrapper()) disabler = Disabler().task_id failer = Failer().task_id self.sch.add_task(disabler, 'FAILED', worker='X') self.sch.prune() # Make scheduler unfail the disabled task self.sch.add_task(disabler, 'FAILED', worker='X') # Disable it self.sch.add_task(failer, 'FAILED', worker='X') # Fail it try: t = threading.Thread(target=self.w.run) t.start() t.join(timeout=1) # Wait 1 second self.assertTrue(t.is_alive( )) # It shouldn't stop trying, the failed task should be retried! self.assertFalse( Failer.did_run ) # It should never have run, the cooldown is longer than a second. finally: self.sch.prune( ) # Make it, like die. Couldn't find a more forceful way to do this. t.join(timeout=1) # Wait 1 second assert not t.is_alive() def test_alive_while_has_success(self): """ One dependency disables and one succeeds """ # TODO: Fix copy paste mess class Disabler(luigi.Task): pass class Succeeder(luigi.Task): did_run = False def run(self): self.did_run = True class Wrapper(luigi.WrapperTask): def requires(self): return (Disabler(), Succeeder()) self.w.add(Wrapper()) disabler = Disabler().task_id succeeder = Succeeder().task_id self.sch.add_task(disabler, 'FAILED', worker='X') self.sch.prune() # Make scheduler unfail the disabled task self.sch.add_task(disabler, 'FAILED', worker='X') # Disable it self.sch.add_task(succeeder, 'DONE', worker='X') # Fail it try: t = threading.Thread(target=self.w.run) t.start() t.join(timeout=1) # Wait 1 second self.assertFalse(t.is_alive( )) # The worker should think that it should stop ... # ... because in this case the only work remaining depends on DISABLED tasks, # hence it's not worth considering the wrapper task as a PENDING task to # keep the worker alive anymore. self.assertFalse( Succeeder.did_run ) # It should never have run, it succeeded already finally: self.sch.prune( ) # This shouldnt be necessary in this version, but whatevs t.join(timeout=1) # Wait 1 second assert not t.is_alive()
def setUp(self): super(PerTaskRetryPolicyBehaviorTest, self).setUp() self.per_task_retry_count = 2 self.default_retry_count = 1 self.sch = Scheduler(retry_delay=0.1, retry_count=self.default_retry_count, prune_on_get_work=True)
def setUp(self): self.collector = PrometheusMetricsCollector() self.s = Scheduler(metrics_collector=MetricsCollectors.prometheus) self.gauge_name = 'luigi_task_execution_time_seconds' self.labels = {'family': TASK_FAMILY}
class PrometheusMetricTest(unittest.TestCase): def setUp(self): self.collector = PrometheusMetricsCollector() self.s = Scheduler(metrics_collector=MetricsCollectors.prometheus) self.gauge_name = 'luigi_task_execution_time_seconds' self.labels = {'family': TASK_FAMILY} def startTask(self): self.s.add_task(worker=WORKER, task_id=TASK_ID, family=TASK_FAMILY) task = self.s._state.get_task(TASK_ID) task.time_running = 0 task.updated = 5 return task def test_handle_task_started(self): task = self.startTask() self.collector.handle_task_started(task) counter_name = 'luigi_task_started_total' gauge_name = self.gauge_name labels = self.labels assert self.collector.registry.get_sample_value( counter_name, labels=self.labels) == 1 assert self.collector.registry.get_sample_value(gauge_name, labels=labels) == 0 def test_handle_task_failed(self): task = self.startTask() self.collector.handle_task_failed(task) counter_name = 'luigi_task_failed_total' gauge_name = self.gauge_name labels = self.labels assert self.collector.registry.get_sample_value(counter_name, labels=labels) == 1 assert self.collector.registry.get_sample_value( gauge_name, labels=labels) == task.updated - task.time_running def test_handle_task_disabled(self): task = self.startTask() self.collector.handle_task_disabled(task, self.s._config) counter_name = 'luigi_task_disabled_total' gauge_name = self.gauge_name labels = self.labels assert self.collector.registry.get_sample_value(counter_name, labels=labels) == 1 assert self.collector.registry.get_sample_value( gauge_name, labels=labels) == task.updated - task.time_running def test_handle_task_done(self): task = self.startTask() self.collector.handle_task_done(task) counter_name = 'luigi_task_done_total' gauge_name = self.gauge_name labels = self.labels assert self.collector.registry.get_sample_value(counter_name, labels=labels) == 1 assert self.collector.registry.get_sample_value( gauge_name, labels=labels) == task.updated - task.time_running
def setUp(self): self.sch = Scheduler()
def get_app(self): return luigi.server.app(Scheduler())
def setUp(self): self.mockDatadog() self.time = time.time self.collector = DatadogMetricsCollector() self.s = Scheduler(metrics_collector=MetricsCollectors.datadog)
class WorkerSchedulerCommunicationTest(LuigiTestCase): """ Tests related to communication between Worker and Scheduler that is based on the ping polling. See https://github.com/spotify/luigi/pull/1993 """ def run(self, result=None): self.sch = Scheduler() with Worker(scheduler=self.sch, worker_id='X', ping_interval=1, max_reschedules=0) as w: self.w = w # also save scheduler's worker struct self.sw = self.sch._state.get_worker(self.w._id) super(WorkerSchedulerCommunicationTest, self).run(result) def wrapper_task(test_self): tmp = tempfile.mkdtemp() class MyTask(luigi.Task): n = luigi.IntParameter() delay = 3 def output(self): basename = "%s_%s.txt" % (self.__class__.__name__, self.n) return luigi.LocalTarget(os.path.join(tmp, basename)) def run(self): time.sleep(self.delay) with self.output().open('w') as f: f.write("content\n") class Wrapper(MyTask): delay = 0 def requires(self): return [MyTask(n=n) for n in range(self.n)] return Wrapper, tmp def test_message_handling(self): # add some messages for that worker for i in range(10): self.sw.add_rpc_message('foo', i=i) self.assertEqual(10, len(self.sw.rpc_messages)) self.assertEqual(9, self.sw.rpc_messages[-1]['kwargs']['i']) # fetch msgs = self.sw.fetch_rpc_messages() self.assertEqual(0, len(self.sw.rpc_messages)) self.assertEqual(9, msgs[-1]['kwargs']['i']) def test_ping_content(self): # add some messages for that worker for i in range(10): self.sw.add_rpc_message('foo', i=i) # ping the scheduler and check the result res = self.sch.ping(worker=self.w._id) self.assertIn('rpc_messages', res) msgs = res['rpc_messages'] self.assertEqual(10, len(msgs)) self.assertEqual('foo', msgs[-1]['name']) self.assertEqual(9, msgs[-1]['kwargs']['i']) # there should be no message left self.assertEqual(0, len(self.sw.rpc_messages)) @contextlib.contextmanager def run_wrapper(self, n): # assign the wrapper task to the worker Wrapper, tmp = self.wrapper_task() wrapper = Wrapper(n=n) self.assertTrue(self.w.add(wrapper)) # check the initial number of worker processes self.assertEqual(1, self.w.worker_processes) # run the task in a thread and while running, increase the number of worker processes # via an rpc message t = threading.Thread(target=self.w.run) t.start() # yield yield wrapper, t # finally, check that thread is done self.assertFalse(t.is_alive()) # cleanup the tmp dir shutil.rmtree(tmp) def test_dispatch_valid_message(self): with self.run_wrapper(3) as (wrapper, t): # each of the wrapper task's tasks runs 3 seconds, and the ping/message dispatch # interval is 1 second, so it should be safe to wait 1 second here, add the message # which is then fetched by the keep alive thread and dispatched, so after additional 3 # seconds, the worker will have a changed number of processes t.join(1) self.sch.set_worker_processes(self.w._id, 2) t.join(3) self.assertEqual(2, self.w.worker_processes) # after additional 3 seconds, the wrapper task + all required tasks should be completed t.join(3) self.assertTrue(all(task.complete() for task in wrapper.requires())) self.assertTrue(wrapper.complete()) def test_dispatch_invalid_message(self): # this test is identical to test_dispatch_valid_message, except that the number of processes # is not increased during running as we send an invalid rpc message # in addition, the wrapper will only have two requirements with self.run_wrapper(2) as (wrapper, t): # timing info as above t.join(1) self.sw.add_rpc_message('set_worker_processes_not_there', n=2) t.join(3) self.assertEqual(1, self.w.worker_processes) # after additional 3 seconds, the wrapper task and all required tasks should be completed t.join(3) self.assertTrue(all(task.complete() for task in wrapper.requires())) self.assertTrue(wrapper.complete()) def test_dispatch_unregistered_message(self): # this test is identical to test_dispatch_valid_message, except that the number of processes # is not increased during running as we disable the particular callback to work as a # callback, so we want to achieve sth like # self.w.set_worker_processes.is_rpc_message_callback = False # but this is not possible in py 2 due to wrapped method lookup, see # http://stackoverflow.com/questions/9523370/adding-attributes-to-instance-methods-in-python set_worker_processes_orig = self.w.set_worker_processes def set_worker_processes_replacement(*args, **kwargs): return set_worker_processes_orig(*args, **kwargs) self.w.set_worker_processes = set_worker_processes_replacement self.assertFalse( getattr(self.w.set_worker_processes, "is_rpc_message_callback", False)) with self.run_wrapper(2) as (wrapper, t): # timing info as above t.join(1) self.sw.add_rpc_message('set_worker_processes', n=2) t.join(3) self.assertEqual(1, self.w.worker_processes) # after additional 3 seconds, the wrapper task and all required tasks should be completed t.join(3) self.assertTrue(all(task.complete() for task in wrapper.requires())) self.assertTrue(wrapper.complete())
def _make_worker(self): self.scheduler = Scheduler(prune_on_get_work=True) return luigi.worker.Worker(scheduler=self.scheduler, worker_processes=1)
class PrometheusMetricTest(unittest.TestCase): def setUp(self): self.collector = PrometheusMetricsCollector() self.s = Scheduler(metrics_collector=MetricsCollectors.prometheus) self.gauge_name = 'luigi_task_execution_time_seconds' self.labels = {'family': TASK_FAMILY} def startTask(self): self.s.add_task(worker=WORKER, task_id=TASK_ID, family=TASK_FAMILY) task = self.s._state.get_task(TASK_ID) task.time_running = 0 task.updated = 5 return task def test_handle_task_started(self): task = self.startTask() self.collector.handle_task_started(task) counter_name = 'luigi_task_started_total' gauge_name = self.gauge_name labels = self.labels assert self.collector.registry.get_sample_value(counter_name, labels=self.labels) == 1 assert self.collector.registry.get_sample_value(gauge_name, labels=labels) == 0 def test_handle_task_failed(self): task = self.startTask() self.collector.handle_task_failed(task) counter_name = 'luigi_task_failed_total' gauge_name = self.gauge_name labels = self.labels assert self.collector.registry.get_sample_value(counter_name, labels=labels) == 1 assert self.collector.registry.get_sample_value(gauge_name, labels=labels) == task.updated - task.time_running def test_handle_task_disabled(self): task = self.startTask() self.collector.handle_task_disabled(task, self.s._config) counter_name = 'luigi_task_disabled_total' gauge_name = self.gauge_name labels = self.labels assert self.collector.registry.get_sample_value(counter_name, labels=labels) == 1 assert self.collector.registry.get_sample_value(gauge_name, labels=labels) == task.updated - task.time_running def test_handle_task_done(self): task = self.startTask() self.collector.handle_task_done(task) counter_name = 'luigi_task_done_total' gauge_name = self.gauge_name labels = self.labels assert self.collector.registry.get_sample_value(counter_name, labels=labels) == 1 assert self.collector.registry.get_sample_value(gauge_name, labels=labels) == task.updated - task.time_running def test_configure_http_handler(self): mock_http_handler = mock.MagicMock() self.collector.configure_http_handler(mock_http_handler) mock_http_handler.set_header.assert_called_once_with('Content-Type', CONTENT_TYPE_LATEST)