def test_die_for_non_unique_pending(self): class A(DummyTask): def run(self): logging.debug('running A') time.sleep(0.1) super(A, self).run() a = A() class B(DummyTask): def requires(self): return a def run(self): logging.debug('running B') super(B, self).run() b = B() sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id='X', keep_alive=True, count_uniques=True) w2 = Worker(scheduler=sch, worker_id='Y', keep_alive=True, count_uniques=True, wait_interval=0.1) self.assertTrue(w.add(b)) self.assertTrue(w2.add(b)) self.assertEqual(w._get_work()[0], 'A()') self.assertTrue(w2.run()) self.assertFalse(a.complete()) self.assertFalse(b.complete()) w2.stop()
def test_complete_exception(self): "Tests that a task is still scheduled if its sister task crashes in the complete() method" class A(DummyTask): def complete(self): raise Exception("doh") a = A() class C(DummyTask): pass c = C() class B(DummyTask): def requires(self): return a, c b = B() sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id="foo") w.add(b) w.run() self.assertFalse(b.has_run) self.assertTrue(c.has_run) self.assertFalse(a.has_run) w.stop()
def test_interleaved_workers2(self): # two tasks without dependencies, one external, one not class B(DummyTask): pass class ExternalB(ExternalTask): task_family = "B" def complete(self): return False b = B() eb = ExternalB() self.assertEqual(eb.task_id, "B()") sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id='X') w2 = Worker(scheduler=sch, worker_id='Y') self.assertTrue(w2.add(eb)) self.assertTrue(w.add(b)) self.assertTrue(w2.run()) self.assertFalse(b.complete()) self.assertTrue(w.run()) self.assertTrue(b.complete()) w.stop() w2.stop()
def test_ping_retry(self): """ Worker ping fails once. Ping continues to try to connect to scheduler Kind of ugly since it uses actual timing with sleep to test the thread """ sch = CentralPlannerScheduler( retry_delay=100, remove_delay=1000, worker_disconnect_delay=10, ) self._total_pings = 0 # class var so it can be accessed from fail_ping def fail_ping(worker): # this will be called from within keep-alive thread... self._total_pings += 1 raise Exception("Some random exception") sch.ping = fail_ping w = Worker( scheduler=sch, worker_id="foo", ping_interval=0.01 # very short between pings to make test fast ) # let the keep-alive thread run for a bit... time.sleep( 0.1) # yes, this is ugly but it's exactly what we need to test w.stop() self.assertTrue(self._total_pings > 1, msg="Didn't retry pings (%d pings performed)" % (self._total_pings, ))
def test_interleaved_workers3(self): class A(DummyTask): def run(self): logging.debug('running A') time.sleep(0.1) super(A, self).run() a = A() class B(DummyTask): def requires(self): return a def run(self): logging.debug('running B') super(B, self).run() b = B() sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id='X', keep_alive=True) w2 = Worker(scheduler=sch, worker_id='Y', keep_alive=True, wait_interval=0.1) w.add(a) w2.add(b) threading.Thread(target=w.run).start() w2.run() self.assertTrue(a.complete()) self.assertTrue(b.complete()) w.stop() w2.stop()
def test_complete_exception(self): "Tests that a task is still scheduled if its sister task crashes in the complete() method" class A(DummyTask): def complete(self): raise Exception("doh") a = A() class C(DummyTask): pass c = C() class B(DummyTask): def requires(self): return a, c b = B() sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id="foo") self.assertFalse(w.add(b)) self.assertTrue(w.run()) self.assertFalse(b.has_run) self.assertTrue(c.has_run) self.assertFalse(a.has_run) w.stop()
def test_requires_exception(self): class A(DummyTask): def requires(self): raise Exception("doh") a = A() class C(DummyTask): pass c = C() class B(DummyTask): def requires(self): return a, c b = B() sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id="foo") self.assertFalse(w.add(b)) self.assertTrue(w.run()) self.assertFalse(b.has_run) self.assertTrue(c.has_run) self.assertFalse(a.has_run) w.stop()
def test_ping_retry(self): """ Worker ping fails once. Ping continues to try to connect to scheduler Kind of ugly since it uses actual timing with sleep to test the thread """ sch = CentralPlannerScheduler( retry_delay=100, remove_delay=1000, worker_disconnect_delay=10, ) self._total_pings = 0 # class var so it can be accessed from fail_ping def fail_ping(worker): # this will be called from within keep-alive thread... self._total_pings += 1 raise Exception("Some random exception") sch.ping = fail_ping w = Worker( scheduler=sch, worker_id="foo", ping_interval=0.01 # very short between pings to make test fast ) # let the keep-alive thread run for a bit... time.sleep(0.1) # yes, this is ugly but it's exactly what we need to test w.stop() self.assertTrue( self._total_pings > 1, msg="Didn't retry pings (%d pings performed)" % (self._total_pings,) )
def test_interleaved_workers2(self): # two tasks without dependencies, one external, one not class B(DummyTask): pass class ExternalB(ExternalTask): task_family = "B" def complete(self): return False b = B() eb = ExternalB() self.assertEquals(eb.task_id, "B()") sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id='X') w2 = Worker(scheduler=sch, worker_id='Y') w2.add(eb) w.add(b) w2.run() self.assertFalse(b.complete()) w.run() self.assertTrue(b.complete()) w.stop() w2.stop()
class MultiprocessWorkerTest(unittest.TestCase): def setUp(self): self.scheduler = RemoteScheduler() self.scheduler.add_worker = Mock() self.scheduler.add_task = Mock() self.worker = Worker(scheduler=self.scheduler, worker_id='X', worker_processes=2) def tearDown(self): self.worker.stop() def test_positive_path(self): a = DummyTask("a") b = DummyTask("b") class MultipleRequirementTask(DummyTask): def requires(self): return [a, b] c = MultipleRequirementTask("C") self.assertTrue(self.worker.add(c)) self.scheduler.get_work = Mock( side_effect=[(3, str(a)), (2, str(b)), (1, str(c)), (0, None), (0, None)]) self.assertTrue(self.worker.run()) self.assertTrue(c.has_run) def test_path_with_task_failures(self): class FailingTask(DummyTask): def run(self): raise Exception("I am failing") a = FailingTask("a") b = FailingTask("b") class MultipleRequirementTask(DummyTask): def requires(self): return [a, b] c = MultipleRequirementTask("C") self.assertTrue(self.worker.add(c)) self.scheduler.get_work = Mock( side_effect=[(3, str(a)), (2, str(b)), (1, str(c)), (0, None), (0, None)]) self.assertFalse(self.worker.run())
class MultiprocessWorkerTest(unittest.TestCase): def setUp(self): self.scheduler = RemoteScheduler() self.scheduler.add_worker = Mock() self.scheduler.add_task = Mock() self.worker = Worker(scheduler=self.scheduler, worker_id='X', worker_processes=2) def tearDown(self): self.worker.stop() def test_positive_path(self): a = DummyTask("a") b = DummyTask("b") class MultipleRequirementTask(DummyTask): def requires(self): return [a, b] c = MultipleRequirementTask("C") self.assertTrue(self.worker.add(c)) self.scheduler.get_work = Mock(side_effect=[(3, str(a)), (2, str(b)), (1, str(c)), (0, None), (0, None)]) self.assertTrue(self.worker.run()) self.assertTrue(c.has_run) def test_path_with_task_failures(self): class FailingTask(DummyTask): def run(self): raise Exception("I am failing") a = FailingTask("a") b = FailingTask("b") class MultipleRequirementTask(DummyTask): def requires(self): return [a, b] c = MultipleRequirementTask("C") self.assertTrue(self.worker.add(c)) self.scheduler.get_work = Mock(side_effect=[(3, str(a)), (2, str(b)), (1, str(c)), (0, None), (0, None)]) self.assertFalse(self.worker.run())
def test_stop_worker_kills_subprocesses(self): w = Worker(worker_processes=2) hung_task = HungWorker() w.add(hung_task) w._run_task(hung_task.task_id) pids = [p.pid for p in w._running_tasks.values()] self.assertEqual(1, len(pids)) pid = pids[0] def is_running(): return pid in {p.pid for p in psutil.Process().children()} self.assertTrue(is_running()) w.stop() self.assertFalse(is_running())
def invoke_task(godzilla_task, task_id=None, **kwargs): print godzilla_task, task_id, kwargs task = tasks[godzilla_task] kwargs['task_id'] = task_id task = task.as_luigi(**kwargs) w = Worker(scheduler=sch) w.add(task) try: w.run() if not w.run_succeeded: raise Exception("job failed") return task.output().path finally: w.stop()
def test_interleaved_workers3(self): class A(DummyTask): def run(self): logging.debug('running A') time.sleep(0.1) super(A, self).run() a = A() class B(DummyTask): def requires(self): return a def run(self): logging.debug('running B') super(B, self).run() b = B() sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id='X', keep_alive=True, count_uniques=True) w2 = Worker(scheduler=sch, worker_id='Y', keep_alive=True, count_uniques=True, wait_interval=0.1) self.assertTrue(w.add(a)) self.assertTrue(w2.add(b)) threading.Thread(target=w.run).start() self.assertTrue(w2.run()) self.assertTrue(a.complete()) self.assertTrue(b.complete()) w.stop() w2.stop()
def test_interleaved_workers(self): class A(DummyTask): pass a = A() class B(DummyTask): def requires(self): return a class ExternalB(ExternalTask): task_family = "B" def complete(self): return False b = B() eb = ExternalB() self.assertEquals(eb.task_id, "B()") sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id='X') w2 = Worker(scheduler=sch, worker_id='Y') w.add(b) w2.add(eb) logging.debug("RUNNING BROKEN WORKER") w2.run() self.assertFalse(a.complete()) self.assertFalse(b.complete()) logging.debug("RUNNING FUNCTIONAL WORKER") w.run() self.assertTrue(a.complete()) self.assertTrue(b.complete()) w.stop() w2.stop()
def test_interleaved_workers(self): class A(DummyTask): pass a = A() class B(DummyTask): def requires(self): return a class ExternalB(ExternalTask): task_family = "B" def complete(self): return False b = B() eb = ExternalB() self.assertEqual(eb.task_id, "B()") sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id='X') w2 = Worker(scheduler=sch, worker_id='Y') self.assertTrue(w.add(b)) self.assertTrue(w2.add(eb)) logging.debug("RUNNING BROKEN WORKER") self.assertTrue(w2.run()) self.assertFalse(a.complete()) self.assertFalse(b.complete()) logging.debug("RUNNING FUNCTIONAL WORKER") self.assertTrue(w.run()) self.assertTrue(a.complete()) self.assertTrue(b.complete()) w.stop() w2.stop()
class WorkerTest(unittest.TestCase): def setUp(self): # InstanceCache.disable() self.sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) self.w = Worker(scheduler=self.sch, worker_id='X') self.w2 = Worker(scheduler=self.sch, worker_id='Y') self.time = time.time def tearDown(self): if time.time != self.time: time.time = self.time self.w.stop() self.w2.stop() def setTime(self, t): time.time = lambda: t def test_dep(self): class A(Task): def run(self): self.has_run = True def complete(self): return self.has_run a = A() class B(Task): def requires(self): return a def run(self): self.has_run = True def complete(self): return self.has_run b = B() a.has_run = False b.has_run = False self.assertTrue(self.w.add(b)) self.assertTrue(self.w.run()) self.assertTrue(a.has_run) self.assertTrue(b.has_run) def test_stop_getting_new_work(self): d = DummyTask() self.w.add(d) self.assertFalse(d.complete()) self.w.handle_interrupt(signal.SIGUSR1, None) self.w.run() self.assertFalse(d.complete()) def test_external_dep(self): class A(ExternalTask): def complete(self): return False a = A() class B(Task): def requires(self): return a def run(self): self.has_run = True def complete(self): return self.has_run b = B() a.has_run = False b.has_run = False self.assertTrue(self.w.add(b)) self.assertTrue(self.w.run()) self.assertFalse(a.has_run) self.assertFalse(b.has_run) def test_fail(self): class A(Task): def run(self): self.has_run = True raise Exception() def complete(self): return self.has_run a = A() class B(Task): def requires(self): return a def run(self): self.has_run = True def complete(self): return self.has_run b = B() a.has_run = False b.has_run = False self.assertTrue(self.w.add(b)) self.assertFalse(self.w.run()) self.assertTrue(a.has_run) self.assertFalse(b.has_run) def test_unknown_dep(self): # see central_planner_test.CentralPlannerTest.test_remove_dep class A(ExternalTask): def complete(self): return False class C(Task): def complete(self): return True def get_b(dep): class B(Task): def requires(self): return dep def run(self): self.has_run = True def complete(self): return False b = B() b.has_run = False return b b_a = get_b(A()) b_c = get_b(C()) self.assertTrue(self.w.add(b_a)) # So now another worker goes in and schedules C -> B # This should remove the dep A -> B but will screw up the first worker self.assertTrue(self.w2.add(b_c)) self.assertFalse(self.w.run()) # should not run anything - the worker should detect that A is broken self.assertFalse(b_a.has_run) # not sure what should happen?? # self.w2.run() # should run B since C is fulfilled # self.assertTrue(b_c.has_run) def test_unfulfilled_dep(self): class A(Task): def complete(self): return self.done def run(self): self.done = True def get_b(a): class B(A): def requires(self): return a b = B() b.done = False a.done = True return b a = A() b = get_b(a) self.assertTrue(self.w.add(b)) a.done = False self.w.run() self.assertTrue(a.complete()) self.assertTrue(b.complete()) def test_avoid_infinite_reschedule(self): class A(Task): def complete(self): return False class B(Task): def complete(self): return False def requires(self): return A() self.assertTrue(self.w.add(B())) self.assertFalse(self.w.run()) def test_allow_reschedule_with_many_missing_deps(self): class A(Task): """ Task that must run twice to succeed """ i = luigi.IntParameter() runs = 0 def complete(self): return self.runs >= 2 def run(self): self.runs += 1 class B(Task): done = False def requires(self): return map(A, range(20)) def complete(self): return self.done def run(self): self.done = True b = B() w = Worker(scheduler=self.sch, worker_id='X', max_reschedules=1) self.assertTrue(w.add(b)) self.assertFalse(w.run()) # For b to be done, we must have rescheduled its dependencies to run them twice self.assertTrue(b.complete()) self.assertTrue(all(a.complete() for a in b.deps())) def test_interleaved_workers(self): class A(DummyTask): pass a = A() class B(DummyTask): def requires(self): return a class ExternalB(ExternalTask): task_family = "B" def complete(self): return False b = B() eb = ExternalB() self.assertEqual(eb.task_id, "B()") sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id='X') w2 = Worker(scheduler=sch, worker_id='Y') self.assertTrue(w.add(b)) self.assertTrue(w2.add(eb)) logging.debug("RUNNING BROKEN WORKER") self.assertTrue(w2.run()) self.assertFalse(a.complete()) self.assertFalse(b.complete()) logging.debug("RUNNING FUNCTIONAL WORKER") self.assertTrue(w.run()) self.assertTrue(a.complete()) self.assertTrue(b.complete()) w.stop() w2.stop() def test_interleaved_workers2(self): # two tasks without dependencies, one external, one not class B(DummyTask): pass class ExternalB(ExternalTask): task_family = "B" def complete(self): return False b = B() eb = ExternalB() self.assertEqual(eb.task_id, "B()") sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id='X') w2 = Worker(scheduler=sch, worker_id='Y') self.assertTrue(w2.add(eb)) self.assertTrue(w.add(b)) self.assertTrue(w2.run()) self.assertFalse(b.complete()) self.assertTrue(w.run()) self.assertTrue(b.complete()) w.stop() w2.stop() def test_interleaved_workers3(self): class A(DummyTask): def run(self): logging.debug('running A') time.sleep(0.1) super(A, self).run() a = A() class B(DummyTask): def requires(self): return a def run(self): logging.debug('running B') super(B, self).run() b = B() sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id='X', keep_alive=True, count_uniques=True) w2 = Worker(scheduler=sch, worker_id='Y', keep_alive=True, count_uniques=True, wait_interval=0.1) self.assertTrue(w.add(a)) self.assertTrue(w2.add(b)) threading.Thread(target=w.run).start() self.assertTrue(w2.run()) self.assertTrue(a.complete()) self.assertTrue(b.complete()) w.stop() w2.stop() def test_die_for_non_unique_pending(self): class A(DummyTask): def run(self): logging.debug('running A') time.sleep(0.1) super(A, self).run() a = A() class B(DummyTask): def requires(self): return a def run(self): logging.debug('running B') super(B, self).run() b = B() sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id='X', keep_alive=True, count_uniques=True) w2 = Worker(scheduler=sch, worker_id='Y', keep_alive=True, count_uniques=True, wait_interval=0.1) self.assertTrue(w.add(b)) self.assertTrue(w2.add(b)) self.assertEqual(w._get_work()[0], 'A()') self.assertTrue(w2.run()) self.assertFalse(a.complete()) self.assertFalse(b.complete()) w2.stop() def test_complete_exception(self): "Tests that a task is still scheduled if its sister task crashes in the complete() method" class A(DummyTask): def complete(self): raise Exception("doh") a = A() class C(DummyTask): pass c = C() class B(DummyTask): def requires(self): return a, c b = B() sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id="foo") self.assertFalse(w.add(b)) self.assertTrue(w.run()) self.assertFalse(b.has_run) self.assertTrue(c.has_run) self.assertFalse(a.has_run) w.stop() def test_requires_exception(self): class A(DummyTask): def requires(self): raise Exception("doh") a = A() class C(DummyTask): pass c = C() class B(DummyTask): def requires(self): return a, c b = B() sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id="foo") self.assertFalse(w.add(b)) self.assertTrue(w.run()) self.assertFalse(b.has_run) self.assertTrue(c.has_run) self.assertFalse(a.has_run) w.stop()
class WorkerTest(unittest.TestCase): def setUp(self): # InstanceCache.disable() self.sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) self.w = Worker(scheduler=self.sch, worker_id='X') self.w2 = Worker(scheduler=self.sch, worker_id='Y') self.time = time.time def tearDown(self): if time.time != self.time: time.time = self.time self.w.stop() self.w2.stop() def setTime(self, t): time.time = lambda: t def test_dep(self): class A(Task): def run(self): self.has_run = True def complete(self): return self.has_run a = A() class B(Task): def requires(self): return a def run(self): self.has_run = True def complete(self): return self.has_run b = B() a.has_run = False b.has_run = False self.w.add(b) self.w.run() self.assertTrue(a.has_run) self.assertTrue(b.has_run) def test_external_dep(self): class A(ExternalTask): def complete(self): return False a = A() class B(Task): def requires(self): return a def run(self): self.has_run = True def complete(self): return self.has_run b = B() a.has_run = False b.has_run = False self.w.add(b) self.w.run() self.assertFalse(a.has_run) self.assertFalse(b.has_run) def test_fail(self): class A(Task): def run(self): self.has_run = True raise Exception() def complete(self): return self.has_run a = A() class B(Task): def requires(self): return a def run(self): self.has_run = True def complete(self): return self.has_run b = B() a.has_run = False b.has_run = False self.w.add(b) self.w.run() self.assertTrue(a.has_run) self.assertFalse(b.has_run) def test_unknown_dep(self): # see central_planner_test.CentralPlannerTest.test_remove_dep class A(ExternalTask): def complete(self): return False class C(Task): def complete(self): return True def get_b(dep): class B(Task): def requires(self): return dep def run(self): self.has_run = True def complete(self): return False b = B() b.has_run = False return b b_a = get_b(A()) b_c = get_b(C()) self.w.add(b_a) # So now another worker goes in and schedules C -> B # This should remove the dep A -> B but will screw up the first worker self.w2.add(b_c) self.w.run() # should not run anything - the worker should detect that A is broken self.assertFalse(b_a.has_run) # not sure what should happen?? # self.w2.run() # should run B since C is fulfilled # self.assertTrue(b_c.has_run) def test_interleaved_workers(self): class A(DummyTask): pass a = A() class B(DummyTask): def requires(self): return a class ExternalB(ExternalTask): task_family = "B" def complete(self): return False b = B() eb = ExternalB() self.assertEquals(eb.task_id, "B()") sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id='X') w2 = Worker(scheduler=sch, worker_id='Y') w.add(b) w2.add(eb) logging.debug("RUNNING BROKEN WORKER") w2.run() self.assertFalse(a.complete()) self.assertFalse(b.complete()) logging.debug("RUNNING FUNCTIONAL WORKER") w.run() self.assertTrue(a.complete()) self.assertTrue(b.complete()) w.stop() w2.stop() def test_interleaved_workers2(self): # two tasks without dependencies, one external, one not class B(DummyTask): pass class ExternalB(ExternalTask): task_family = "B" def complete(self): return False b = B() eb = ExternalB() self.assertEquals(eb.task_id, "B()") sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id='X') w2 = Worker(scheduler=sch, worker_id='Y') w2.add(eb) w.add(b) w2.run() self.assertFalse(b.complete()) w.run() self.assertTrue(b.complete()) w.stop() w2.stop() def test_interleaved_workers3(self): class A(DummyTask): def run(self): logging.debug('running A') time.sleep(0.1) super(A, self).run() a = A() class B(DummyTask): def requires(self): return a def run(self): logging.debug('running B') super(B, self).run() b = B() sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id='X', keep_alive=True) w2 = Worker(scheduler=sch, worker_id='Y', keep_alive=True, wait_interval=0.1) w.add(a) w2.add(b) threading.Thread(target=w.run).start() w2.run() self.assertTrue(a.complete()) self.assertTrue(b.complete()) w.stop() w2.stop() def test_complete_exception(self): "Tests that a task is still scheduled if its sister task crashes in the complete() method" class A(DummyTask): def complete(self): raise Exception("doh") a = A() class C(DummyTask): pass c = C() class B(DummyTask): def requires(self): return a, c b = B() sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id="foo") w.add(b) w.run() self.assertFalse(b.has_run) self.assertTrue(c.has_run) self.assertFalse(a.has_run) w.stop()
class WorkerEmailTest(EmailTest): def setUp(self): super(WorkerEmailTest, self).setUp() sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) self.worker = Worker(scheduler=sch, worker_id="foo") def tearDown(self): self.worker.stop() @with_config(EMAIL_CONFIG) def test_connection_error(self): sch = RemoteScheduler(host="this_host_doesnt_exist", port=1337) worker = Worker(scheduler=sch) self.waits = 0 def dummy_wait(): self.waits += 1 sch._wait = dummy_wait class A(DummyTask): pass a = A() self.assertEquals(self.last_email, None) worker.add(a) self.assertEquals(self.waits, 2) # should attempt to add it 3 times self.assertNotEquals(self.last_email, None) self.assertEquals(self.last_email[0], "Luigi: Framework error while scheduling %s" % (a,)) worker.stop() @with_config(EMAIL_CONFIG) def test_complete_error(self): class A(DummyTask): def complete(self): raise Exception("b0rk") a = A() self.assertEquals(self.last_email, None) self.worker.add(a) self.assertEquals(("Luigi: %s failed scheduling" % (a,)), self.last_email[0]) self.worker.run() self.assertEquals(("Luigi: %s failed scheduling" % (a,)), self.last_email[0]) self.assertFalse(a.has_run) @with_config(EMAIL_CONFIG) def test_complete_return_value(self): class A(DummyTask): def complete(self): pass # no return value should be an error a = A() self.assertEquals(self.last_email, None) self.worker.add(a) self.assertEquals(("Luigi: %s failed scheduling" % (a,)), self.last_email[0]) self.worker.run() self.assertEquals(("Luigi: %s failed scheduling" % (a,)), self.last_email[0]) self.assertFalse(a.has_run) @with_config(EMAIL_CONFIG) def test_run_error(self): class A(luigi.Task): def complete(self): return False def run(self): raise Exception("b0rk") a = A() self.worker.add(a) self.assertEquals(self.last_email, None) self.worker.run() self.assertEquals(("Luigi: %s FAILED" % (a,)), self.last_email[0]) def test_no_error(self): class A(DummyTask): pass a = A() self.assertEquals(self.last_email, None) self.worker.add(a) self.assertEquals(self.last_email, None) self.worker.run() self.assertEquals(self.last_email, None) self.assertTrue(a.complete())
def test_ping_thread_shutdown(self): w = Worker(ping_interval=0.01) self.assertTrue(w._keep_alive_thread.is_alive()) w.stop() # should stop within 0.01 s self.assertFalse(w._keep_alive_thread.is_alive())
class WorkerTaskGlobalEventHandlerTests(unittest.TestCase): @with_config(dict(worker_history=dict(record_worker_history_sqs='true', sqs_queue_name='name', aws_access_key_id='key', aws_secret_access_key='secret_key'), worker_metadata=dict(meta1='data1'))) def setUp(self): try: from luigi.sqs_history import SqsHistory, SqsTaskHistory, SqsWorkerHistory except ImportError as e: raise unittest.SkipTest('Could not test WorkerTaskGlobalEventHandlerTests: %s' % e) # Replace _config method with one that uses our dummy queue. def fake_config(s, *args): s._queue = DummyQueue() SqsHistory._config = fake_config # InstanceCache.disable() self.sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) self.w = Worker(scheduler=self.sch, worker_id='X') self.w2 = Worker(scheduler=self.sch, worker_id='Y') self.time = time.time def tearDown(self): if time.time != self.time: time.time = self.time self.w.stop() self.w2.stop() def setTime(self, t): time.time = lambda: t def _parse_task_events(self, messages): results = {} for m in messages: event = m.get('event') if not event: continue messages = results.get(event, []) messages.append(m) results[event] = messages return results def test_dep(self): class A(Task): param_a = luigi.Parameter(default="a") def run(self): self.has_run = True def complete(self): return self.has_run a = A() class B(Task): def requires(self): return a def run(self): self.has_run = True def complete(self): return self.has_run b = B() a.has_run = False b.has_run = False self.assertTrue(self.w.add(b)) self.assertTrue(self.w.run()) self.assertTrue(a.has_run) self.assertTrue(b.has_run) sent_messages = [json.loads(m.get_body()) for m in self.w._worker_history_impl._queue.messages] event_messages = self._parse_task_events(sent_messages) self.assertEquals(4, len(event_messages)) # Check started events: started_events = event_messages.get(Event.START) self.assertEquals(2, len(started_events)) self.assertEquals('A(param_a=a)', started_events[0]['task']['id']) self.assertEquals('B()', started_events[1]['task']['id']) # Check success events success_events = event_messages.get(Event.SUCCESS) self.assertEquals(2, len(success_events)) self.assertEquals('A(param_a=a)', success_events[0]['task']['id']) self.assertEquals('B()', success_events[1]['task']['id']) # Check processing time events processing_events = event_messages.get(Event.PROCESSING_TIME) self.assertEquals(2, len(processing_events)) self.assertEquals('A(param_a=a)', processing_events[0]['task']['id']) self.assertTrue('processing_time' in processing_events[0]) self.assertEquals('B()', processing_events[1]['task']['id']) self.assertTrue('processing_time' in processing_events[1]) # Check dependency event dependency_event = event_messages.get(Event.DEPENDENCY_DISCOVERED) self.assertEquals(1, len(dependency_event)) self.assertEquals('B()', dependency_event[0]['task']['id']) self.assertEquals('A(param_a=a)', dependency_event[0]['dependency_task']['id']) def test_external_dep(self): class A(ExternalTask): def complete(self): return False a = A() class B(Task): def requires(self): return a def run(self): self.has_run = True def complete(self): return self.has_run b = B() a.has_run = False b.has_run = False self.assertTrue(self.w.add(b)) self.assertTrue(self.w.run()) self.assertFalse(a.has_run) self.assertFalse(b.has_run) sent_messages = [json.loads(m.get_body()) for m in self.w._worker_history_impl._queue.messages] event_messages = self._parse_task_events(sent_messages) self.assertEquals(2, len(event_messages)) # Check dependency event dependency_event = event_messages.get(Event.DEPENDENCY_DISCOVERED) self.assertEquals(1, len(dependency_event)) self.assertEquals('B()', dependency_event[0]['task']['id']) self.assertEquals('A()', dependency_event[0]['dependency_task']['id']) # Check dependency missing event dependency_missing_event = event_messages.get(Event.DEPENDENCY_MISSING) self.assertEquals(1, len(dependency_missing_event)) self.assertEquals('A()', dependency_missing_event[0]['task']['id']) def test_fail(self): class A(Task): def run(self): self.has_run = True raise Exception() def complete(self): return self.has_run a = A() a.has_run = False self.assertTrue(self.w.add(a)) self.assertFalse(self.w.run()) self.assertTrue(a.has_run) sent_messages = [json.loads(m.get_body()) for m in self.w._worker_history_impl._queue.messages] event_messages = self._parse_task_events(sent_messages) self.assertEquals(3, len(event_messages)) # Check failure event failure_event = event_messages.get(Event.FAILURE) self.assertEquals(1, len(failure_event)) self.assertEquals('A()', failure_event[0]['task']['id']) self.assertEquals('Exception()', failure_event[0]['exception']) def test_unknown_dep(self): # see central_planner_test.CentralPlannerTest.test_remove_dep class A(ExternalTask): def complete(self): return False class C(Task): def complete(self): return True def get_b(dep): class B(Task): def requires(self): return dep def run(self): self.has_run = True def complete(self): return False b = B() b.has_run = False return b b_a = get_b(A()) b_c = get_b(C()) self.assertTrue(self.w.add(b_a)) self.assertTrue(self.w2.add(b_c)) sent_messages = [json.loads(m.get_body()) for m in self.w._worker_history_impl._queue.messages] event_messages = self._parse_task_events(sent_messages) # Verify missing event dependency_missing_event = event_messages.get(Event.DEPENDENCY_MISSING) self.assertEquals(1, len(dependency_missing_event)) self.assertEquals('A()', dependency_missing_event[0]['task']['id']) sent_messages2 = [json.loads(m.get_body()) for m in self.w2._worker_history_impl._queue.messages] event_messages2 = self._parse_task_events(sent_messages2) # Verify present event dependency_present_event = event_messages2.get(Event.DEPENDENCY_PRESENT) self.assertEquals(1, len(dependency_present_event)) self.assertEquals('C()', dependency_present_event[0]['task']['id']) def test_broken_task(self): # see central_planner_test.CentralPlannerTest.test_remove_dep class A(object): def complete(self): return False a = A() class B(Task): def requires(self): return a def run(self): self.has_run = False def complete(self): return self.has_run b = B() b.has_run = False try: self.assertTrue(self.w.add(b)) except: pass sent_messages = [json.loads(m.get_body()) for m in self.w._worker_history_impl._queue.messages] event_messages = self._parse_task_events(sent_messages) # Verify broken event broken_event = event_messages.get(Event.BROKEN_TASK) self.assertEquals(1, len(broken_event)) self.assertEquals('B()', broken_event[0]['task']['id']) self.assertEquals("Exception('requires() must return Task objects',)", broken_event[0]['exception'])
class ExceptionFormatTest(unittest.TestCase): def setUp(self): self.sch = CentralPlannerScheduler() self.w = Worker(scheduler=self.sch) def tear_down(self): self.w.stop() def test_fail_run(self): task = FailRunTask(foo='foo', bar='bar') self._run_task(task) def test_fail_run_html(self): task = FailRunTask(foo='foo', bar='bar') self._run_task_html(task) def test_fail_schedule(self): task = FailSchedulingTask(foo='foo', bar='bar') self._run_task(task) def test_fail_schedule_html(self): task = FailSchedulingTask(foo='foo', bar='bar') self._run_task_html(task) @with_config({'core': {'error-email': '*****@*****.**', 'email-prefix': '[TEST] '}}) @mock.patch('luigi.notifications.send_error_email') def _run_task(self, task, mock_send): self.w.add(task) self.w.run() self.assertEqual(mock_send.call_count, 1) args, kwargs = mock_send.call_args self._check_subject(args[0], task) self._check_body(args[1], task, html=False) @with_config({'core': {'error-email': '*****@*****.**', 'email-prefix': '[TEST] ', 'email-type': 'html'}}) @mock.patch('luigi.notifications.send_error_email') def _run_task_html(self, task, mock_send): self.w.add(task) self.w.run() self.assertEqual(mock_send.call_count, 1) args, kwargs = mock_send.call_args self._check_subject(args[0], task) self._check_body(args[1], task, html=True) def _check_subject(self, subject, task): self.assertIn(task.task_id, subject) def _check_body(self, body, task, html=False): if html: self.assertIn('<th>name</th><td>{}</td>'.format(task.task_family), body) self.assertIn('<div class="highlight"', body) self.assertIn('Oops!', body) for param, value in task.param_kwargs.items(): self.assertIn('<th>{}</th><td>{}</td>'.format(param, value), body) else: self.assertIn('Name: {}\n'.format(task.task_family), body) self.assertIn('Parameters:\n', body) self.assertIn('TestException: Oops!', body) for param, value in task.param_kwargs.items(): self.assertIn('{}: {}\n'.format(param, value), body) @with_config({"core": {"error-email": "[email protected]"}}) def testEmailRecipients(self): six.assertCountEqual(self, notifications._email_recipients(), ["[email protected]"]) six.assertCountEqual(self, notifications._email_recipients("[email protected]"), ["[email protected]", "[email protected]"]) six.assertCountEqual(self, notifications._email_recipients(["[email protected]", "[email protected]"]), ["[email protected]", "[email protected]", "[email protected]"]) @with_config({"core": {}}, replace_sections=True) def testEmailRecipientsNoConfig(self): six.assertCountEqual(self, notifications._email_recipients(), []) six.assertCountEqual(self, notifications._email_recipients("[email protected]"), ["[email protected]"]) six.assertCountEqual(self, notifications._email_recipients(["[email protected]", "[email protected]"]), ["[email protected]", "[email protected]"])
class WorkerTest(unittest.TestCase): def setUp(self): # InstanceCache.disable() self.sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) self.w = Worker(scheduler=self.sch, worker_id='X') self.w2 = Worker(scheduler=self.sch, worker_id='Y') self.time = time.time def tearDown(self): if time.time != self.time: time.time = self.time self.w.stop() self.w2.stop() def setTime(self, t): time.time = lambda: t def test_dep(self): class A(Task): def run(self): self.has_run = True def complete(self): return self.has_run a = A() class B(Task): def requires(self): return a def run(self): self.has_run = True def complete(self): return self.has_run b = B() a.has_run = False b.has_run = False self.assertTrue(self.w.add(b)) self.assertTrue(self.w.run()) self.assertTrue(a.has_run) self.assertTrue(b.has_run) def test_external_dep(self): class A(ExternalTask): def complete(self): return False a = A() class B(Task): def requires(self): return a def run(self): self.has_run = True def complete(self): return self.has_run b = B() a.has_run = False b.has_run = False self.assertTrue(self.w.add(b)) self.assertTrue(self.w.run()) self.assertFalse(a.has_run) self.assertFalse(b.has_run) def test_fail(self): class A(Task): def run(self): self.has_run = True raise Exception() def complete(self): return self.has_run a = A() class B(Task): def requires(self): return a def run(self): self.has_run = True def complete(self): return self.has_run b = B() a.has_run = False b.has_run = False self.assertTrue(self.w.add(b)) self.assertFalse(self.w.run()) self.assertTrue(a.has_run) self.assertFalse(b.has_run) def test_unknown_dep(self): # see central_planner_test.CentralPlannerTest.test_remove_dep class A(ExternalTask): def complete(self): return False class C(Task): def complete(self): return True def get_b(dep): class B(Task): def requires(self): return dep def run(self): self.has_run = True def complete(self): return False b = B() b.has_run = False return b b_a = get_b(A()) b_c = get_b(C()) self.assertTrue(self.w.add(b_a)) # So now another worker goes in and schedules C -> B # This should remove the dep A -> B but will screw up the first worker self.assertTrue(self.w2.add(b_c)) self.assertFalse(self.w.run()) # should not run anything - the worker should detect that A is broken self.assertFalse(b_a.has_run) # not sure what should happen?? # self.w2.run() # should run B since C is fulfilled # self.assertTrue(b_c.has_run) def test_unfulfilled_dep(self): class A(Task): def complete(self): return self.done def run(self): self.done = True def get_b(a): class B(A): def requires(self): return a b = B() b.done = False a.done = True return b a = A() b = get_b(a) self.assertTrue(self.w.add(b)) a.done = False self.w.run() self.assertTrue(a.complete()) self.assertTrue(b.complete()) def test_dynamic_dependencies(self): class DynamicRequires(Task): p = luigi.Parameter() def output(self): return luigi.LocalTarget(os.path.join(self.p, 'parent')) def run(self): dummy_targets = yield [DynamicDummyTask(os.path.join(self.p, str(i))) for i in range(5)] dummy_targets += yield [DynamicDummyTask(os.path.join(self.p, str(i))) for i in range(5, 7)] with self.output().open('w') as f: for i, d in enumerate(dummy_targets): for line in d.open('r'): print >>f, '%d: %s' % (i, line.strip()) t = DynamicRequires(p=tempfile.mktemp()) luigi.build([t], local_scheduler=True) self.assertTrue(t.complete()) # loop through output and verify f = t.output().open('r') for i in xrange(7): self.assertEqual(f.readline().strip(), '%d: Done!' % i) def test_avoid_infinite_reschedule(self): class A(Task): def complete(self): return False class B(Task): def complete(self): return False def requires(self): return A() self.assertTrue(self.w.add(B())) self.assertFalse(self.w.run()) def test_interleaved_workers(self): class A(DummyTask): pass a = A() class B(DummyTask): def requires(self): return a class ExternalB(ExternalTask): task_family = "B" def complete(self): return False b = B() eb = ExternalB() self.assertEqual(eb.task_id, "B()") sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id='X') w2 = Worker(scheduler=sch, worker_id='Y') self.assertTrue(w.add(b)) self.assertTrue(w2.add(eb)) logging.debug("RUNNING BROKEN WORKER") self.assertTrue(w2.run()) self.assertFalse(a.complete()) self.assertFalse(b.complete()) logging.debug("RUNNING FUNCTIONAL WORKER") self.assertTrue(w.run()) self.assertTrue(a.complete()) self.assertTrue(b.complete()) w.stop() w2.stop() def test_interleaved_workers2(self): # two tasks without dependencies, one external, one not class B(DummyTask): pass class ExternalB(ExternalTask): task_family = "B" def complete(self): return False b = B() eb = ExternalB() self.assertEqual(eb.task_id, "B()") sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id='X') w2 = Worker(scheduler=sch, worker_id='Y') self.assertTrue(w2.add(eb)) self.assertTrue(w.add(b)) self.assertTrue(w2.run()) self.assertFalse(b.complete()) self.assertTrue(w.run()) self.assertTrue(b.complete()) w.stop() w2.stop() def test_interleaved_workers3(self): class A(DummyTask): def run(self): logging.debug('running A') time.sleep(0.1) super(A, self).run() a = A() class B(DummyTask): def requires(self): return a def run(self): logging.debug('running B') super(B, self).run() b = B() sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id='X', keep_alive=True, count_uniques=True) w2 = Worker(scheduler=sch, worker_id='Y', keep_alive=True, count_uniques=True, wait_interval=0.1) self.assertTrue(w.add(a)) self.assertTrue(w2.add(b)) threading.Thread(target=w.run).start() self.assertTrue(w2.run()) self.assertTrue(a.complete()) self.assertTrue(b.complete()) w.stop() w2.stop() def test_die_for_non_unique_pending(self): class A(DummyTask): def run(self): logging.debug('running A') time.sleep(0.1) super(A, self).run() a = A() class B(DummyTask): def requires(self): return a def run(self): logging.debug('running B') super(B, self).run() b = B() sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id='X', keep_alive=True, count_uniques=True) w2 = Worker(scheduler=sch, worker_id='Y', keep_alive=True, count_uniques=True, wait_interval=0.1) self.assertTrue(w.add(b)) self.assertTrue(w2.add(b)) self.assertEqual(w._get_work()[0], 'A()') self.assertTrue(w2.run()) self.assertFalse(a.complete()) self.assertFalse(b.complete()) w2.stop() def test_complete_exception(self): "Tests that a task is still scheduled if its sister task crashes in the complete() method" class A(DummyTask): def complete(self): raise Exception("doh") a = A() class C(DummyTask): pass c = C() class B(DummyTask): def requires(self): return a, c b = B() sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id="foo") self.assertFalse(w.add(b)) self.assertTrue(w.run()) self.assertFalse(b.has_run) self.assertTrue(c.has_run) self.assertFalse(a.has_run) w.stop() def test_requires_exception(self): class A(DummyTask): def requires(self): raise Exception("doh") a = A() class C(DummyTask): pass c = C() class B(DummyTask): def requires(self): return a, c b = B() sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id="foo") self.assertFalse(w.add(b)) self.assertTrue(w.run()) self.assertFalse(b.has_run) self.assertTrue(c.has_run) self.assertFalse(a.has_run) w.stop()
class WorkerEmailTest(unittest.TestCase): def setUp(self): super(WorkerEmailTest, self).setUp() sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) self.worker = Worker(scheduler=sch, worker_id="foo") def tearDown(self): self.worker.stop() @email_patch def test_connection_error(self, emails): sch = RemoteScheduler('http://tld.invalid:1337', connect_timeout=1) worker = Worker(scheduler=sch) self.waits = 0 def dummy_wait(): self.waits += 1 sch._wait = dummy_wait class A(DummyTask): pass a = A() self.assertEqual(emails, []) worker.add(a) self.assertEqual(self.waits, 2) # should attempt to add it 3 times self.assertNotEqual(emails, []) self.assertTrue( emails[0].find("Luigi: Framework error while scheduling %s" % (a, )) != -1) worker.stop() @email_patch def test_complete_error(self, emails): class A(DummyTask): def complete(self): raise Exception("b0rk") a = A() self.assertEqual(emails, []) self.worker.add(a) self.assertTrue(emails[0].find("Luigi: %s failed scheduling" % (a, )) != -1) self.worker.run() self.assertTrue(emails[0].find("Luigi: %s failed scheduling" % (a, )) != -1) self.assertFalse(a.has_run) @email_patch def test_complete_return_value(self, emails): class A(DummyTask): def complete(self): pass # no return value should be an error a = A() self.assertEqual(emails, []) self.worker.add(a) self.assertTrue(emails[0].find("Luigi: %s failed scheduling" % (a, )) != -1) self.worker.run() self.assertTrue(emails[0].find("Luigi: %s failed scheduling" % (a, )) != -1) self.assertFalse(a.has_run) @email_patch def test_run_error(self, emails): class A(luigi.Task): def complete(self): return False def run(self): raise Exception("b0rk") a = A() self.worker.add(a) self.assertEqual(emails, []) self.worker.run() self.assertTrue(emails[0].find("Luigi: %s FAILED" % (a, )) != -1) @email_patch def test_no_error(self, emails): class A(DummyTask): pass a = A() self.assertEqual(emails, []) self.worker.add(a) self.assertEqual(emails, []) self.worker.run() self.assertEqual(emails, []) self.assertTrue(a.complete())
class WorkerTest(unittest.TestCase): def setUp(self): # InstanceCache.disable() self.sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) self.w = Worker(scheduler=self.sch, worker_id='X') self.w2 = Worker(scheduler=self.sch, worker_id='Y') self.time = time.time def tearDown(self): if time.time != self.time: time.time = self.time self.w.stop() self.w2.stop() def setTime(self, t): time.time = lambda: t def test_dep(self): class A(Task): def run(self): self.has_run = True def complete(self): return self.has_run a = A() class B(Task): def requires(self): return a def run(self): self.has_run = True def complete(self): return self.has_run b = B() a.has_run = False b.has_run = False self.assertTrue(self.w.add(b)) self.assertTrue(self.w.run()) self.assertTrue(a.has_run) self.assertTrue(b.has_run) def test_stop_getting_new_work(self): d = DummyTask() self.w.add(d) self.assertFalse(d.complete()) self.w.handle_interrupt(signal.SIGUSR1, None) self.w.run() self.assertFalse(d.complete()) def test_external_dep(self): class A(ExternalTask): def complete(self): return False a = A() class B(Task): def requires(self): return a def run(self): self.has_run = True def complete(self): return self.has_run b = B() a.has_run = False b.has_run = False self.assertTrue(self.w.add(b)) self.assertTrue(self.w.run()) self.assertFalse(a.has_run) self.assertFalse(b.has_run) def test_tracking_url(self): tracking_url = 'http://test_url.com/' class A(Task): has_run = False def complete(self): return self.has_run def run(self, tracking_url_callback=None): if tracking_url_callback is not None: tracking_url_callback(tracking_url) self.has_run = True a = A() self.assertTrue(self.w.add(a)) self.assertTrue(self.w.run()) tasks = self.sch.task_list('DONE', '') self.assertEqual(1, len(tasks)) self.assertEqual(tracking_url, tasks['A()']['tracking_url']) def test_type_error_in_tracking_run(self): class A(Task): num_runs = 0 def complete(self): return False def run(self, tracking_url_callback=None): self.num_runs += 1 raise TypeError('bad type') a = A() self.assertTrue(self.w.add(a)) self.assertFalse(self.w.run()) # Should only run and fail once, not retry because of the type error self.assertEqual(1, a.num_runs) def test_fail(self): class CustomException(BaseException): def __init__(self, msg): self.msg = msg class A(Task): def run(self): self.has_run = True raise CustomException('bad things') def complete(self): return self.has_run a = A() class B(Task): def requires(self): return a def run(self): self.has_run = True def complete(self): return self.has_run b = B() a.has_run = False b.has_run = False self.assertTrue(self.w.add(b)) self.assertFalse(self.w.run()) self.assertTrue(a.has_run) self.assertFalse(b.has_run) def test_unknown_dep(self): # see central_planner_test.CentralPlannerTest.test_remove_dep class A(ExternalTask): def complete(self): return False class C(Task): def complete(self): return True def get_b(dep): class B(Task): def requires(self): return dep def run(self): self.has_run = True def complete(self): return False b = B() b.has_run = False return b b_a = get_b(A()) b_c = get_b(C()) self.assertTrue(self.w.add(b_a)) # So now another worker goes in and schedules C -> B # This should remove the dep A -> B but will screw up the first worker self.assertTrue(self.w2.add(b_c)) self.assertFalse( self.w.run() ) # should not run anything - the worker should detect that A is broken self.assertFalse(b_a.has_run) # not sure what should happen?? # self.w2.run() # should run B since C is fulfilled # self.assertTrue(b_c.has_run) def test_unfulfilled_dep(self): class A(Task): def complete(self): return self.done def run(self): self.done = True def get_b(a): class B(A): def requires(self): return a b = B() b.done = False a.done = True return b a = A() b = get_b(a) self.assertTrue(self.w.add(b)) a.done = False self.w.run() self.assertTrue(a.complete()) self.assertTrue(b.complete()) def test_avoid_infinite_reschedule(self): class A(Task): def complete(self): return False class B(Task): def complete(self): return False def requires(self): return A() self.assertTrue(self.w.add(B())) self.assertFalse(self.w.run()) def test_allow_reschedule_with_many_missing_deps(self): class A(Task): """ Task that must run twice to succeed """ i = luigi.IntParameter() runs = 0 def complete(self): return self.runs >= 2 def run(self): self.runs += 1 class B(Task): done = False def requires(self): return map(A, range(20)) def complete(self): return self.done def run(self): self.done = True b = B() w = Worker(scheduler=self.sch, worker_id='X', max_reschedules=1) self.assertTrue(w.add(b)) self.assertFalse(w.run()) # For b to be done, we must have rescheduled its dependencies to run them twice self.assertTrue(b.complete()) self.assertTrue(all(a.complete() for a in b.deps())) def test_interleaved_workers(self): class A(DummyTask): pass a = A() class B(DummyTask): def requires(self): return a class ExternalB(ExternalTask): task_family = "B" def complete(self): return False b = B() eb = ExternalB() self.assertEqual(eb.task_id, "B()") sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id='X') w2 = Worker(scheduler=sch, worker_id='Y') self.assertTrue(w.add(b)) self.assertTrue(w2.add(eb)) logging.debug("RUNNING BROKEN WORKER") self.assertTrue(w2.run()) self.assertFalse(a.complete()) self.assertFalse(b.complete()) logging.debug("RUNNING FUNCTIONAL WORKER") self.assertTrue(w.run()) self.assertTrue(a.complete()) self.assertTrue(b.complete()) w.stop() w2.stop() def test_interleaved_workers2(self): # two tasks without dependencies, one external, one not class B(DummyTask): pass class ExternalB(ExternalTask): task_family = "B" def complete(self): return False b = B() eb = ExternalB() self.assertEqual(eb.task_id, "B()") sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id='X') w2 = Worker(scheduler=sch, worker_id='Y') self.assertTrue(w2.add(eb)) self.assertTrue(w.add(b)) self.assertTrue(w2.run()) self.assertFalse(b.complete()) self.assertTrue(w.run()) self.assertTrue(b.complete()) w.stop() w2.stop() def test_interleaved_workers3(self): class A(DummyTask): def run(self): logging.debug('running A') time.sleep(0.1) super(A, self).run() a = A() class B(DummyTask): def requires(self): return a def run(self): logging.debug('running B') super(B, self).run() b = B() sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id='X', keep_alive=True, count_uniques=True) w2 = Worker(scheduler=sch, worker_id='Y', keep_alive=True, count_uniques=True, wait_interval=0.1) self.assertTrue(w.add(a)) self.assertTrue(w2.add(b)) threading.Thread(target=w.run).start() self.assertTrue(w2.run()) self.assertTrue(a.complete()) self.assertTrue(b.complete()) w.stop() w2.stop() def test_die_for_non_unique_pending(self): class A(DummyTask): def run(self): logging.debug('running A') time.sleep(0.1) super(A, self).run() a = A() class B(DummyTask): def requires(self): return a def run(self): logging.debug('running B') super(B, self).run() b = B() sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id='X', keep_alive=True, count_uniques=True) w2 = Worker(scheduler=sch, worker_id='Y', keep_alive=True, count_uniques=True, wait_interval=0.1) self.assertTrue(w.add(b)) self.assertTrue(w2.add(b)) self.assertEqual(w._get_work()[0], 'A()') self.assertTrue(w2.run()) self.assertFalse(a.complete()) self.assertFalse(b.complete()) w2.stop() def test_complete_exception(self): "Tests that a task is still scheduled if its sister task crashes in the complete() method" class A(DummyTask): def complete(self): raise Exception("doh") a = A() class C(DummyTask): pass c = C() class B(DummyTask): def requires(self): return a, c b = B() sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id="foo") self.assertFalse(w.add(b)) self.assertTrue(w.run()) self.assertFalse(b.has_run) self.assertTrue(c.has_run) self.assertFalse(a.has_run) w.stop() def test_requires_exception(self): class A(DummyTask): def requires(self): raise Exception("doh") a = A() class C(DummyTask): pass c = C() class B(DummyTask): def requires(self): return a, c b = B() sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id="foo") self.assertFalse(w.add(b)) self.assertTrue(w.run()) self.assertFalse(b.has_run) self.assertTrue(c.has_run) self.assertFalse(a.has_run) w.stop()
class WorkerTest(unittest.TestCase): def setUp(self): # InstanceCache.disable() self.sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) self.w = Worker(scheduler=self.sch, worker_id='X') self.w2 = Worker(scheduler=self.sch, worker_id='Y') self.time = time.time def tearDown(self): if time.time != self.time: time.time = self.time self.w.stop() self.w2.stop() def setTime(self, t): time.time = lambda: t def test_dep(self): class A(Task): def run(self): self.has_run = True def complete(self): return self.has_run a = A() class B(Task): def requires(self): return a def run(self): self.has_run = True def complete(self): return self.has_run b = B() a.has_run = False b.has_run = False self.assertTrue(self.w.add(b)) self.assertTrue(self.w.run()) self.assertTrue(a.has_run) self.assertTrue(b.has_run) def test_stop_getting_new_work(self): d = DummyTask() self.w.add(d) self.assertFalse(d.complete()) try: self.w.handle_interrupt(signal.SIGUSR1, None) except AttributeError: raise unittest.SkipTest('signal.SIGUSR1 not found on this system') self.w.run() self.assertFalse(d.complete()) def test_external_dep(self): class A(ExternalTask): def complete(self): return False a = A() class B(Task): def requires(self): return a def run(self): self.has_run = True def complete(self): return self.has_run b = B() a.has_run = False b.has_run = False self.assertTrue(self.w.add(b)) self.assertTrue(self.w.run()) self.assertFalse(a.has_run) self.assertFalse(b.has_run) def test_tracking_url(self): tracking_url = 'http://test_url.com/' class A(Task): has_run = False def complete(self): return self.has_run def run(self, tracking_url_callback=None): if tracking_url_callback is not None: tracking_url_callback(tracking_url) self.has_run = True a = A() self.assertTrue(self.w.add(a)) self.assertTrue(self.w.run()) tasks = self.sch.task_list('DONE', '') self.assertEqual(1, len(tasks)) self.assertEqual(tracking_url, tasks['A()']['tracking_url']) def test_type_error_in_tracking_run(self): class A(Task): num_runs = 0 def complete(self): return False def run(self, tracking_url_callback=None): self.num_runs += 1 raise TypeError('bad type') a = A() self.assertTrue(self.w.add(a)) self.assertFalse(self.w.run()) # Should only run and fail once, not retry because of the type error self.assertEqual(1, a.num_runs) def test_fail(self): class CustomException(BaseException): def __init__(self, msg): self.msg = msg class A(Task): def run(self): self.has_run = True raise CustomException('bad things') def complete(self): return self.has_run a = A() class B(Task): def requires(self): return a def run(self): self.has_run = True def complete(self): return self.has_run b = B() a.has_run = False b.has_run = False self.assertTrue(self.w.add(b)) self.assertFalse(self.w.run()) self.assertTrue(a.has_run) self.assertFalse(b.has_run) def test_unknown_dep(self): # see central_planner_test.CentralPlannerTest.test_remove_dep class A(ExternalTask): def complete(self): return False class C(Task): def complete(self): return True def get_b(dep): class B(Task): def requires(self): return dep def run(self): self.has_run = True def complete(self): return False b = B() b.has_run = False return b b_a = get_b(A()) b_c = get_b(C()) self.assertTrue(self.w.add(b_a)) # So now another worker goes in and schedules C -> B # This should remove the dep A -> B but will screw up the first worker self.assertTrue(self.w2.add(b_c)) self.assertFalse(self.w.run()) # should not run anything - the worker should detect that A is broken self.assertFalse(b_a.has_run) # not sure what should happen?? # self.w2.run() # should run B since C is fulfilled # self.assertTrue(b_c.has_run) def test_unfulfilled_dep(self): class A(Task): def complete(self): return self.done def run(self): self.done = True def get_b(a): class B(A): def requires(self): return a b = B() b.done = False a.done = True return b a = A() b = get_b(a) self.assertTrue(self.w.add(b)) a.done = False self.w.run() self.assertTrue(a.complete()) self.assertTrue(b.complete()) def test_gets_missed_work(self): class A(Task): done = False def complete(self): return self.done def run(self): self.done = True a = A() self.assertTrue(self.w.add(a)) # simulate a missed get_work response self.assertEqual('A()', self.sch.get_work(worker='X')['task_id']) self.assertTrue(self.w.run()) self.assertTrue(a.complete()) def test_avoid_infinite_reschedule(self): class A(Task): def complete(self): return False class B(Task): def complete(self): return False def requires(self): return A() self.assertTrue(self.w.add(B())) self.assertFalse(self.w.run()) def test_fails_registering_signal(self): with mock.patch('luigi.worker.signal', spec=['signal']): # mock will raise an attribute error getting signal.SIGUSR1 Worker() def test_allow_reschedule_with_many_missing_deps(self): class A(Task): """ Task that must run twice to succeed """ i = luigi.IntParameter() runs = 0 def complete(self): return self.runs >= 2 def run(self): self.runs += 1 class B(Task): done = False def requires(self): return map(A, range(20)) def complete(self): return self.done def run(self): self.done = True b = B() w = Worker(scheduler=self.sch, worker_id='X', max_reschedules=1) self.assertTrue(w.add(b)) self.assertFalse(w.run()) # For b to be done, we must have rescheduled its dependencies to run them twice self.assertTrue(b.complete()) self.assertTrue(all(a.complete() for a in b.deps())) def test_interleaved_workers(self): class A(DummyTask): pass a = A() class B(DummyTask): def requires(self): return a class ExternalB(ExternalTask): task_family = "B" def complete(self): return False b = B() eb = ExternalB() self.assertEqual(eb.task_id, "B()") sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id='X') w2 = Worker(scheduler=sch, worker_id='Y') self.assertTrue(w.add(b)) self.assertTrue(w2.add(eb)) logging.debug("RUNNING BROKEN WORKER") self.assertTrue(w2.run()) self.assertFalse(a.complete()) self.assertFalse(b.complete()) logging.debug("RUNNING FUNCTIONAL WORKER") self.assertTrue(w.run()) self.assertTrue(a.complete()) self.assertTrue(b.complete()) w.stop() w2.stop() def test_interleaved_workers2(self): # two tasks without dependencies, one external, one not class B(DummyTask): pass class ExternalB(ExternalTask): task_family = "B" def complete(self): return False b = B() eb = ExternalB() self.assertEqual(eb.task_id, "B()") sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id='X') w2 = Worker(scheduler=sch, worker_id='Y') self.assertTrue(w2.add(eb)) self.assertTrue(w.add(b)) self.assertTrue(w2.run()) self.assertFalse(b.complete()) self.assertTrue(w.run()) self.assertTrue(b.complete()) w.stop() w2.stop() def test_interleaved_workers3(self): class A(DummyTask): def run(self): logging.debug('running A') time.sleep(0.1) super(A, self).run() a = A() class B(DummyTask): def requires(self): return a def run(self): logging.debug('running B') super(B, self).run() b = B() sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id='X', keep_alive=True, count_uniques=True) w2 = Worker(scheduler=sch, worker_id='Y', keep_alive=True, count_uniques=True, wait_interval=0.1) self.assertTrue(w.add(a)) self.assertTrue(w2.add(b)) threading.Thread(target=w.run).start() self.assertTrue(w2.run()) self.assertTrue(a.complete()) self.assertTrue(b.complete()) w.stop() w2.stop() def test_die_for_non_unique_pending(self): class A(DummyTask): def run(self): logging.debug('running A') time.sleep(0.1) super(A, self).run() a = A() class B(DummyTask): def requires(self): return a def run(self): logging.debug('running B') super(B, self).run() b = B() sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id='X', keep_alive=True, count_uniques=True) w2 = Worker(scheduler=sch, worker_id='Y', keep_alive=True, count_uniques=True, wait_interval=0.1) self.assertTrue(w.add(b)) self.assertTrue(w2.add(b)) self.assertEqual(w._get_work()[0], 'A()') self.assertTrue(w2.run()) self.assertFalse(a.complete()) self.assertFalse(b.complete()) w2.stop() def test_complete_exception(self): "Tests that a task is still scheduled if its sister task crashes in the complete() method" class A(DummyTask): def complete(self): raise Exception("doh") a = A() class C(DummyTask): pass c = C() class B(DummyTask): def requires(self): return a, c b = B() sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id="foo") self.assertFalse(w.add(b)) self.assertTrue(w.run()) self.assertFalse(b.has_run) self.assertTrue(c.has_run) self.assertFalse(a.has_run) w.stop() def test_requires_exception(self): class A(DummyTask): def requires(self): raise Exception("doh") a = A() class D(DummyTask): pass d = D() class C(DummyTask): def requires(self): return d c = C() class B(DummyTask): def requires(self): return c, a b = B() sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id="foo") self.assertFalse(w.add(b)) self.assertTrue(w.run()) self.assertFalse(b.has_run) self.assertTrue(c.has_run) self.assertTrue(d.has_run) self.assertFalse(a.has_run) w.stop()
class WorkerEmailTest(EmailTest): def setUp(self): super(WorkerEmailTest, self).setUp() sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) self.worker = Worker(scheduler=sch, worker_id="foo") def tearDown(self): self.worker.stop() def test_connection_error(self): sch = RemoteScheduler(host="this_host_doesnt_exist", port=1337) worker = Worker(scheduler=sch) self.waits = 0 def dummy_wait(): self.waits += 1 sch._wait = dummy_wait class A(DummyTask): pass a = A() self.assertEquals(self.last_email, None) worker.add(a) self.assertEquals(self.waits, 2) # should attempt to add it 3 times self.assertNotEquals(self.last_email, None) self.assertEquals(self.last_email[0], "Luigi: Framework error while scheduling %s" % (a, )) worker.stop() def test_complete_error(self): class A(DummyTask): def complete(self): raise Exception("b0rk") a = A() self.assertEquals(self.last_email, None) self.worker.add(a) self.assertEquals(("Luigi: %s failed scheduling" % (a, )), self.last_email[0]) self.worker.run() self.assertEquals(("Luigi: %s failed scheduling" % (a, )), self.last_email[0]) self.assertFalse(a.has_run) def test_complete_return_value(self): class A(DummyTask): def complete(self): pass # no return value should be an error a = A() self.assertEquals(self.last_email, None) self.worker.add(a) self.assertEquals(("Luigi: %s failed scheduling" % (a, )), self.last_email[0]) self.worker.run() self.assertEquals(("Luigi: %s failed scheduling" % (a, )), self.last_email[0]) self.assertFalse(a.has_run) def test_run_error(self): class A(luigi.Task): def complete(self): return False def run(self): raise Exception("b0rk") a = A() self.worker.add(a) self.assertEquals(self.last_email, None) self.worker.run() self.assertEquals(("Luigi: %s FAILED" % (a, )), self.last_email[0]) def test_no_error(self): class A(DummyTask): pass a = A() self.assertEquals(self.last_email, None) self.worker.add(a) self.assertEquals(self.last_email, None) self.worker.run() self.assertEquals(self.last_email, None) self.assertTrue(a.complete())
class WorkerTaskGlobalEventHandlerTests(unittest.TestCase): @with_config( dict(worker_history=dict(record_worker_history_sqs='true', sqs_queue_name='name', aws_access_key_id='key', aws_secret_access_key='secret_key'), worker_metadata=dict(meta1='data1'))) def setUp(self): try: from luigi.sqs_history import SqsHistory, SqsTaskHistory, SqsWorkerHistory except ImportError as e: raise unittest.SkipTest( 'Could not test WorkerTaskGlobalEventHandlerTests: %s' % e) # Replace _config method with one that uses our dummy queue. def fake_config(s, *args): s._queue = DummyQueue() SqsHistory._config = fake_config # InstanceCache.disable() self.sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) self.w = Worker(scheduler=self.sch, worker_id='X') self.w2 = Worker(scheduler=self.sch, worker_id='Y') self.time = time.time def tearDown(self): if time.time != self.time: time.time = self.time self.w.stop() self.w2.stop() def setTime(self, t): time.time = lambda: t def _parse_task_events(self, messages): results = {} for m in messages: event = m.get('event') if not event: continue messages = results.get(event, []) messages.append(m) results[event] = messages return results def test_dep(self): class A(Task): param_a = luigi.Parameter(default="a") def run(self): self.has_run = True def complete(self): return self.has_run a = A() class B(Task): def requires(self): return a def run(self): self.has_run = True def complete(self): return self.has_run b = B() a.has_run = False b.has_run = False self.assertTrue(self.w.add(b)) self.assertTrue(self.w.run()) self.assertTrue(a.has_run) self.assertTrue(b.has_run) sent_messages = [ json.loads(m.get_body()) for m in self.w._worker_history_impl._queue.messages ] event_messages = self._parse_task_events(sent_messages) self.assertEquals(4, len(event_messages)) # Check started events: started_events = event_messages.get(Event.START) self.assertEquals(2, len(started_events)) self.assertEquals('A(param_a=a)', started_events[0]['task']['id']) self.assertEquals('B()', started_events[1]['task']['id']) # Check success events success_events = event_messages.get(Event.SUCCESS) self.assertEquals(2, len(success_events)) self.assertEquals('A(param_a=a)', success_events[0]['task']['id']) self.assertEquals('B()', success_events[1]['task']['id']) # Check processing time events processing_events = event_messages.get(Event.PROCESSING_TIME) self.assertEquals(2, len(processing_events)) self.assertEquals('A(param_a=a)', processing_events[0]['task']['id']) self.assertTrue('processing_time' in processing_events[0]) self.assertEquals('B()', processing_events[1]['task']['id']) self.assertTrue('processing_time' in processing_events[1]) # Check dependency event dependency_event = event_messages.get(Event.DEPENDENCY_DISCOVERED) self.assertEquals(1, len(dependency_event)) self.assertEquals('B()', dependency_event[0]['task']['id']) self.assertEquals('A(param_a=a)', dependency_event[0]['dependency_task']['id']) def test_external_dep(self): class A(ExternalTask): def complete(self): return False a = A() class B(Task): def requires(self): return a def run(self): self.has_run = True def complete(self): return self.has_run b = B() a.has_run = False b.has_run = False self.assertTrue(self.w.add(b)) self.assertTrue(self.w.run()) self.assertFalse(a.has_run) self.assertFalse(b.has_run) sent_messages = [ json.loads(m.get_body()) for m in self.w._worker_history_impl._queue.messages ] event_messages = self._parse_task_events(sent_messages) self.assertEquals(2, len(event_messages)) # Check dependency event dependency_event = event_messages.get(Event.DEPENDENCY_DISCOVERED) self.assertEquals(1, len(dependency_event)) self.assertEquals('B()', dependency_event[0]['task']['id']) self.assertEquals('A()', dependency_event[0]['dependency_task']['id']) # Check dependency missing event dependency_missing_event = event_messages.get(Event.DEPENDENCY_MISSING) self.assertEquals(1, len(dependency_missing_event)) self.assertEquals('A()', dependency_missing_event[0]['task']['id']) def test_fail(self): class A(Task): def run(self): self.has_run = True raise Exception() def complete(self): return self.has_run a = A() a.has_run = False self.assertTrue(self.w.add(a)) self.assertFalse(self.w.run()) self.assertTrue(a.has_run) sent_messages = [ json.loads(m.get_body()) for m in self.w._worker_history_impl._queue.messages ] event_messages = self._parse_task_events(sent_messages) self.assertEquals(3, len(event_messages)) # Check failure event failure_event = event_messages.get(Event.FAILURE) self.assertEquals(1, len(failure_event)) self.assertEquals('A()', failure_event[0]['task']['id']) self.assertEquals('Exception()', failure_event[0]['exception']) def test_unknown_dep(self): # see central_planner_test.CentralPlannerTest.test_remove_dep class A(ExternalTask): def complete(self): return False class C(Task): def complete(self): return True def get_b(dep): class B(Task): def requires(self): return dep def run(self): self.has_run = True def complete(self): return False b = B() b.has_run = False return b b_a = get_b(A()) b_c = get_b(C()) self.assertTrue(self.w.add(b_a)) self.assertTrue(self.w2.add(b_c)) sent_messages = [ json.loads(m.get_body()) for m in self.w._worker_history_impl._queue.messages ] event_messages = self._parse_task_events(sent_messages) # Verify missing event dependency_missing_event = event_messages.get(Event.DEPENDENCY_MISSING) self.assertEquals(1, len(dependency_missing_event)) self.assertEquals('A()', dependency_missing_event[0]['task']['id']) sent_messages2 = [ json.loads(m.get_body()) for m in self.w2._worker_history_impl._queue.messages ] event_messages2 = self._parse_task_events(sent_messages2) # Verify present event dependency_present_event = event_messages2.get( Event.DEPENDENCY_PRESENT) self.assertEquals(1, len(dependency_present_event)) self.assertEquals('C()', dependency_present_event[0]['task']['id']) def test_broken_task(self): # see central_planner_test.CentralPlannerTest.test_remove_dep class A(object): def complete(self): return False a = A() class B(Task): def requires(self): return a def run(self): self.has_run = False def complete(self): return self.has_run b = B() b.has_run = False try: self.assertTrue(self.w.add(b)) except: pass sent_messages = [ json.loads(m.get_body()) for m in self.w._worker_history_impl._queue.messages ] event_messages = self._parse_task_events(sent_messages) # Verify broken event broken_event = event_messages.get(Event.BROKEN_TASK) self.assertEquals(1, len(broken_event)) self.assertEquals('B()', broken_event[0]['task']['id']) self.assertEquals("Exception('requires() must return Task objects',)", broken_event[0]['exception'])
class WorkerTest(unittest.TestCase): def setUp(self): # InstanceCache.disable() self.sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) self.w = Worker(scheduler=self.sch, worker_id='X') self.w2 = Worker(scheduler=self.sch, worker_id='Y') self.time = time.time def tearDown(self): if time.time != self.time: time.time = self.time self.w.stop() self.w2.stop() def setTime(self, t): time.time = lambda: t def test_dep(self): class A(Task): def run(self): self.has_run = True def complete(self): return self.has_run a = A() class B(Task): def requires(self): return a def run(self): self.has_run = True def complete(self): return self.has_run b = B() a.has_run = False b.has_run = False self.w.add(b) self.w.run() self.assertTrue(a.has_run) self.assertTrue(b.has_run) def test_external_dep(self): class A(ExternalTask): def complete(self): return False a = A() class B(Task): def requires(self): return a def run(self): self.has_run = True def complete(self): return self.has_run b = B() a.has_run = False b.has_run = False self.w.add(b) self.w.run() self.assertFalse(a.has_run) self.assertFalse(b.has_run) def test_fail(self): class A(Task): def run(self): self.has_run = True raise Exception() def complete(self): return self.has_run a = A() class B(Task): def requires(self): return a def run(self): self.has_run = True def complete(self): return self.has_run b = B() a.has_run = False b.has_run = False self.w.add(b) self.w.run() self.assertTrue(a.has_run) self.assertFalse(b.has_run) def test_unknown_dep(self): # see central_planner_test.CentralPlannerTest.test_remove_dep class A(ExternalTask): def complete(self): return False class C(Task): def complete(self): return True def get_b(dep): class B(Task): def requires(self): return dep def run(self): self.has_run = True def complete(self): return False b = B() b.has_run = False return b b_a = get_b(A()) b_c = get_b(C()) self.w.add(b_a) # So now another worker goes in and schedules C -> B # This should remove the dep A -> B but will screw up the first worker self.w2.add(b_c) self.w.run( ) # should not run anything - the worker should detect that A is broken self.assertFalse(b_a.has_run) # not sure what should happen?? # self.w2.run() # should run B since C is fulfilled # self.assertTrue(b_c.has_run) def test_interleaved_workers(self): class A(DummyTask): pass a = A() class B(DummyTask): def requires(self): return a class ExternalB(ExternalTask): task_family = "B" def complete(self): return False b = B() eb = ExternalB() self.assertEquals(eb.task_id, "B()") sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id='X') w2 = Worker(scheduler=sch, worker_id='Y') w.add(b) w2.add(eb) logging.debug("RUNNING BROKEN WORKER") w2.run() self.assertFalse(a.complete()) self.assertFalse(b.complete()) logging.debug("RUNNING FUNCTIONAL WORKER") w.run() self.assertTrue(a.complete()) self.assertTrue(b.complete()) w.stop() w2.stop() def test_interleaved_workers2(self): # two tasks without dependencies, one external, one not class B(DummyTask): pass class ExternalB(ExternalTask): task_family = "B" def complete(self): return False b = B() eb = ExternalB() self.assertEquals(eb.task_id, "B()") sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id='X') w2 = Worker(scheduler=sch, worker_id='Y') w2.add(eb) w.add(b) w2.run() self.assertFalse(b.complete()) w.run() self.assertTrue(b.complete()) w.stop() w2.stop() def test_complete_exception(self): "Tests that a task is still scheduled if its sister task crashes in the complete() method" class A(DummyTask): def complete(self): raise Exception("doh") a = A() class C(DummyTask): pass c = C() class B(DummyTask): def requires(self): return a, c b = B() sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id="foo") w.add(b) w.run() self.assertFalse(b.has_run) self.assertTrue(c.has_run) self.assertFalse(a.has_run) w.stop()
class WorkerEmailTest(unittest.TestCase): def setUp(self): super(WorkerEmailTest, self).setUp() sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) self.worker = Worker(scheduler=sch, worker_id="foo") def tearDown(self): self.worker.stop() @email_patch def test_connection_error(self, emails): sch = RemoteScheduler('http://tld.invalid:1337', connect_timeout=1) worker = Worker(scheduler=sch) self.waits = 0 def dummy_wait(): self.waits += 1 sch._wait = dummy_wait class A(DummyTask): pass a = A() self.assertEqual(emails, []) worker.add(a) self.assertEqual(self.waits, 2) # should attempt to add it 3 times self.assertNotEquals(emails, []) self.assertTrue(emails[0].find("Luigi: Framework error while scheduling %s" % (a,)) != -1) worker.stop() @email_patch def test_complete_error(self, emails): class A(DummyTask): def complete(self): raise Exception("b0rk") a = A() self.assertEqual(emails, []) self.worker.add(a) self.assertTrue(emails[0].find("Luigi: %s failed scheduling" % (a,)) != -1) self.worker.run() self.assertTrue(emails[0].find("Luigi: %s failed scheduling" % (a,)) != -1) self.assertFalse(a.has_run) @email_patch def test_complete_return_value(self, emails): class A(DummyTask): def complete(self): pass # no return value should be an error a = A() self.assertEqual(emails, []) self.worker.add(a) self.assertTrue(emails[0].find("Luigi: %s failed scheduling" % (a,)) != -1) self.worker.run() self.assertTrue(emails[0].find("Luigi: %s failed scheduling" % (a,)) != -1) self.assertFalse(a.has_run) @email_patch def test_run_error(self, emails): class A(luigi.Task): def complete(self): return False def run(self): raise Exception("b0rk") a = A() self.worker.add(a) self.assertEqual(emails, []) self.worker.run() self.assertTrue(emails[0].find("Luigi: %s FAILED" % (a,)) != -1) @email_patch def test_no_error(self, emails): class A(DummyTask): pass a = A() self.assertEqual(emails, []) self.worker.add(a) self.assertEqual(emails, []) self.worker.run() self.assertEqual(emails, []) self.assertTrue(a.complete())
class WorkerTest(unittest.TestCase): def setUp(self): # InstanceCache.disable() self.sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) self.w = Worker(scheduler=self.sch, worker_id='X') self.w2 = Worker(scheduler=self.sch, worker_id='Y') self.time = time.time def tearDown(self): if time.time != self.time: time.time = self.time self.w.stop() self.w2.stop() def setTime(self, t): time.time = lambda: t def test_dep(self): class A(Task): def run(self): self.has_run = True def complete(self): return self.has_run a = A() class B(Task): def requires(self): return a def run(self): self.has_run = True def complete(self): return self.has_run b = B() a.has_run = False b.has_run = False self.assertTrue(self.w.add(b)) self.assertTrue(self.w.run()) self.assertTrue(a.has_run) self.assertTrue(b.has_run) def test_external_dep(self): class A(ExternalTask): def complete(self): return False a = A() class B(Task): def requires(self): return a def run(self): self.has_run = True def complete(self): return self.has_run b = B() a.has_run = False b.has_run = False self.assertTrue(self.w.add(b)) self.assertTrue(self.w.run()) self.assertFalse(a.has_run) self.assertFalse(b.has_run) def test_fail(self): class A(Task): def run(self): self.has_run = True raise Exception() def complete(self): return self.has_run a = A() class B(Task): def requires(self): return a def run(self): self.has_run = True def complete(self): return self.has_run b = B() a.has_run = False b.has_run = False self.assertTrue(self.w.add(b)) self.assertFalse(self.w.run()) self.assertTrue(a.has_run) self.assertFalse(b.has_run) def test_unknown_dep(self): # see central_planner_test.CentralPlannerTest.test_remove_dep class A(ExternalTask): def complete(self): return False class C(Task): def complete(self): return True def get_b(dep): class B(Task): def requires(self): return dep def run(self): self.has_run = True def complete(self): return False b = B() b.has_run = False return b b_a = get_b(A()) b_c = get_b(C()) self.assertTrue(self.w.add(b_a)) # So now another worker goes in and schedules C -> B # This should remove the dep A -> B but will screw up the first worker self.assertTrue(self.w2.add(b_c)) self.assertFalse( self.w.run() ) # should not run anything - the worker should detect that A is broken self.assertFalse(b_a.has_run) # not sure what should happen?? # self.w2.run() # should run B since C is fulfilled # self.assertTrue(b_c.has_run) def test_unfulfilled_dep(self): class A(Task): def complete(self): return self.done def run(self): self.done = True def get_b(a): class B(A): def requires(self): return a b = B() b.done = False a.done = True return b a = A() b = get_b(a) self.assertTrue(self.w.add(b)) a.done = False self.w.run() self.assertTrue(a.complete()) self.assertTrue(b.complete()) def test_dynamic_dependencies(self): class DynamicRequires(Task): p = luigi.Parameter() def output(self): return luigi.LocalTarget(os.path.join(self.p, 'parent')) def run(self): dummy_targets = yield [ DynamicDummyTask(os.path.join(self.p, str(i))) for i in range(5) ] dummy_targets += yield [ DynamicDummyTask(os.path.join(self.p, str(i))) for i in range(5, 7) ] with self.output().open('w') as f: for i, d in enumerate(dummy_targets): for line in d.open('r'): print >> f, '%d: %s' % (i, line.strip()) p = tempfile.mkdtemp() try: t = DynamicRequires(p=p) luigi.build([t], local_scheduler=True) self.assertTrue(t.complete()) # loop through output and verify f = t.output().open('r') for i in xrange(7): self.assertEqual(f.readline().strip(), '%d: Done!' % i) finally: shutil.rmtree(p) def test_avoid_infinite_reschedule(self): class A(Task): def complete(self): return False class B(Task): def complete(self): return False def requires(self): return A() self.assertTrue(self.w.add(B())) self.assertFalse(self.w.run()) def test_allow_reschedule_with_many_missing_deps(self): class A(Task): """ Task that must run twice to succeed """ i = luigi.IntParameter() runs = 0 def complete(self): return self.runs >= 2 def run(self): self.runs += 1 class B(Task): done = False def requires(self): return map(A, range(20)) def complete(self): return self.done def run(self): self.done = True b = B() w = Worker(scheduler=self.sch, worker_id='X', max_reschedules=1) self.assertTrue(w.add(b)) self.assertFalse(w.run()) # For b to be done, we must have rescheduled its dependencies to run them twice self.assertTrue(b.complete()) self.assertTrue(all(a.complete() for a in b.deps())) def test_interleaved_workers(self): class A(DummyTask): pass a = A() class B(DummyTask): def requires(self): return a class ExternalB(ExternalTask): task_family = "B" def complete(self): return False b = B() eb = ExternalB() self.assertEqual(eb.task_id, "B()") sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id='X') w2 = Worker(scheduler=sch, worker_id='Y') self.assertTrue(w.add(b)) self.assertTrue(w2.add(eb)) logging.debug("RUNNING BROKEN WORKER") self.assertTrue(w2.run()) self.assertFalse(a.complete()) self.assertFalse(b.complete()) logging.debug("RUNNING FUNCTIONAL WORKER") self.assertTrue(w.run()) self.assertTrue(a.complete()) self.assertTrue(b.complete()) w.stop() w2.stop() def test_interleaved_workers2(self): # two tasks without dependencies, one external, one not class B(DummyTask): pass class ExternalB(ExternalTask): task_family = "B" def complete(self): return False b = B() eb = ExternalB() self.assertEqual(eb.task_id, "B()") sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id='X') w2 = Worker(scheduler=sch, worker_id='Y') self.assertTrue(w2.add(eb)) self.assertTrue(w.add(b)) self.assertTrue(w2.run()) self.assertFalse(b.complete()) self.assertTrue(w.run()) self.assertTrue(b.complete()) w.stop() w2.stop() def test_interleaved_workers3(self): class A(DummyTask): def run(self): logging.debug('running A') time.sleep(0.1) super(A, self).run() a = A() class B(DummyTask): def requires(self): return a def run(self): logging.debug('running B') super(B, self).run() b = B() sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id='X', keep_alive=True, count_uniques=True) w2 = Worker(scheduler=sch, worker_id='Y', keep_alive=True, count_uniques=True, wait_interval=0.1) self.assertTrue(w.add(a)) self.assertTrue(w2.add(b)) threading.Thread(target=w.run).start() self.assertTrue(w2.run()) self.assertTrue(a.complete()) self.assertTrue(b.complete()) w.stop() w2.stop() def test_die_for_non_unique_pending(self): class A(DummyTask): def run(self): logging.debug('running A') time.sleep(0.1) super(A, self).run() a = A() class B(DummyTask): def requires(self): return a def run(self): logging.debug('running B') super(B, self).run() b = B() sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id='X', keep_alive=True, count_uniques=True) w2 = Worker(scheduler=sch, worker_id='Y', keep_alive=True, count_uniques=True, wait_interval=0.1) self.assertTrue(w.add(b)) self.assertTrue(w2.add(b)) self.assertEqual(w._get_work()[0], 'A()') self.assertTrue(w2.run()) self.assertFalse(a.complete()) self.assertFalse(b.complete()) w2.stop() def test_complete_exception(self): "Tests that a task is still scheduled if its sister task crashes in the complete() method" class A(DummyTask): def complete(self): raise Exception("doh") a = A() class C(DummyTask): pass c = C() class B(DummyTask): def requires(self): return a, c b = B() sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id="foo") self.assertFalse(w.add(b)) self.assertTrue(w.run()) self.assertFalse(b.has_run) self.assertTrue(c.has_run) self.assertFalse(a.has_run) w.stop() def test_requires_exception(self): class A(DummyTask): def requires(self): raise Exception("doh") a = A() class C(DummyTask): pass c = C() class B(DummyTask): def requires(self): return a, c b = B() sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id="foo") self.assertFalse(w.add(b)) self.assertTrue(w.run()) self.assertFalse(b.has_run) self.assertTrue(c.has_run) self.assertFalse(a.has_run) w.stop()