def test_interleaved_workers(self): class A(DummyTask): pass a = A() class B(DummyTask): def requires(self): return a class ExternalB(ExternalTask): task_family = "B" def complete(self): return False b = B() eb = ExternalB() self.assertEquals(eb.task_id, "B()") sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id='X') w2 = Worker(scheduler=sch, worker_id='Y') w.add(b) w2.add(eb) logging.debug("RUNNING BROKEN WORKER") w2.run() self.assertFalse(a.complete()) self.assertFalse(b.complete()) logging.debug("RUNNING FUNCTIONAL WORKER") w.run() self.assertTrue(a.complete()) self.assertTrue(b.complete())
def test_worker_executes_fork_handler(self): class Stub(luigi.Task): def complete(self): return False def run(self): pass s = CentralPlannerScheduler() w = Worker(scheduler=s, worker_processes=2) w.add(Stub()) w.run() time.sleep(0.1) worker_res = [self.worker_queue.get(), self.worker_queue.get(), self.worker_queue.get()] self.assertIn("both", worker_res) self.assertIn("master", worker_res) child_res = [self.child_queue.get(), self.child_queue.get(), self.child_queue.get()] self.assertIn("both", child_res) self.assertIn("child", child_res) self.assertTrue(self.worker_queue.empty()) self.assertTrue(self.child_queue.empty())
def test_interleaved_workers3(self): class A(DummyTask): def run(self): logging.debug('running A') time.sleep(0.1) super(A, self).run() a = A() class B(DummyTask): def requires(self): return a def run(self): logging.debug('running B') super(B, self).run() b = B() sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id='X', keep_alive=True) w2 = Worker(scheduler=sch, worker_id='Y', keep_alive=True, wait_interval=0.1) w.add(a) w2.add(b) threading.Thread(target=w.run).start() w2.run() self.assertTrue(a.complete()) self.assertTrue(b.complete()) w.stop() w2.stop()
def test_complete_exception(self): "Tests that a task is still scheduled if its sister task crashes in the complete() method" class A(DummyTask): def complete(self): raise Exception("doh") a = A() class C(DummyTask): pass c = C() class B(DummyTask): def requires(self): return a, c b = B() sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id="foo") w.add(b) w.run() self.assertFalse(b.has_run) self.assertTrue(c.has_run) self.assertFalse(a.has_run) w.stop()
def test_interleaved_workers2(self): # two tasks without dependencies, one external, one not class B(DummyTask): pass class ExternalB(ExternalTask): task_family = "B" def complete(self): return False b = B() eb = ExternalB() self.assertEqual(eb.task_id, "B()") sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id='X') w2 = Worker(scheduler=sch, worker_id='Y') self.assertTrue(w2.add(eb)) self.assertTrue(w.add(b)) self.assertTrue(w2.run()) self.assertFalse(b.complete()) self.assertTrue(w.run()) self.assertTrue(b.complete()) w.stop() w2.stop()
def _run(self, dt, interval): w = Worker(scheduler=self.sch) tasks = self.gen_tasks(dt, interval) map(w.add, tasks) for t in filter(lambda x: isinstance(x, RunMetricRules), tasks): if t.handler == 'save_to_mongodb': if t.filter_rules: filter_metrics_args = {"require": t, 'filter_rules': t.filter_rules} # 过滤 filter_metrics = FilterMetrics(**filter_metrics_args) require_task = filter_metrics w.add(filter_metrics) else: require_task = t # 保存 kwargs = dict(**self.mongo_conf) kwargs.update({"require": require_task, "date": dt, 'app_id': self.app_id, 'interval': interval}) save_metrics = SaveMetrics(**kwargs) w.add(save_metrics) else: kwargs = dict(**self.mongo_conf) kwargs.update({"require": t, "date": dt, 'app_id': self.app_id, 'interval': interval, 'handler_func': t.handler}) metric_handler = MetricsHandler(**kwargs) w.add(metric_handler) w.run()
def test_task_limit_exceeded(self): w = Worker() t = ForkBombTask(3, 2) w.add(t) w.run() self.assertFalse(t.complete()) leaf_tasks = [ForkBombTask(3, 2, branch) for branch in [(0, 0, 0), (0, 0, 1), (0, 1, 0), (0, 1, 1)]] self.assertEquals(3, sum(t.complete() for t in leaf_tasks), "should have gracefully completed as much as possible even though the single last leaf didn't get scheduled")
class AssistantTest(unittest.TestCase): def run(self, result=None): self.sch = Scheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) self.assistant = Worker(scheduler=self.sch, worker_id='Y', assistant=True) with Worker(scheduler=self.sch, worker_id='X') as w: self.w = w super(AssistantTest, self).run(result) def test_get_work(self): d = Dummy2Task('123') self.w.add(d) self.assertFalse(d.complete()) self.assistant.run() self.assertTrue(d.complete()) def test_bad_job_type(self): class Dummy3Task(Dummy2Task): task_family = 'UnknownTaskFamily' d = Dummy3Task('123') self.w.add(d) self.assertFalse(d.complete()) self.assertFalse(self.assistant.run()) self.assertFalse(d.complete()) self.assertEqual(list(self.sch.task_list('FAILED', '').keys()), [d.task_id]) def test_unimported_job_type(self): MODULE_CONTENTS = b''' import luigi class UnimportedTask(luigi.Task): def complete(self): return False ''' class NotImportedTask(luigi.Task): task_family = 'UnimportedTask' task_module = None task = NotImportedTask() # verify that it can't run the task without the module info necessary to import it self.w.add(task) self.assertFalse(self.assistant.run()) self.assertEqual(list(self.sch.task_list('FAILED', '').keys()), [task.task_id]) # check that it can import with the right module with temporary_unloaded_module(MODULE_CONTENTS) as task.task_module: self.w.add(task) self.assertTrue(self.assistant.run()) self.assertEqual(list(self.sch.task_list('DONE', '').keys()), [task.task_id])
class MultiprocessWorkerTest(unittest.TestCase): def setUp(self): self.scheduler = RemoteScheduler() self.scheduler.add_worker = Mock() self.scheduler.add_task = Mock() self.worker = Worker(scheduler=self.scheduler, worker_id='X', worker_processes=2) def tearDown(self): self.worker.stop() def test_positive_path(self): a = DummyTask("a") b = DummyTask("b") class MultipleRequirementTask(DummyTask): def requires(self): return [a, b] c = MultipleRequirementTask("C") self.assertTrue(self.worker.add(c)) self.scheduler.get_work = Mock(side_effect=[(3, str(a)), (2, str(b)), (1, str(c)), (0, None), (0, None)]) self.assertTrue(self.worker.run()) self.assertTrue(c.has_run) def test_path_with_task_failures(self): class FailingTask(DummyTask): def run(self): raise Exception("I am failing") a = FailingTask("a") b = FailingTask("b") class MultipleRequirementTask(DummyTask): def requires(self): return [a, b] c = MultipleRequirementTask("C") self.assertTrue(self.worker.add(c)) self.scheduler.get_work = Mock(side_effect=[(3, str(a)), (2, str(b)), (1, str(c)), (0, None), (0, None)]) self.assertFalse(self.worker.run())
def test_disabled_shutdown_hook(self): w = Worker(scheduler=self.sch, keep_alive=True, no_install_shutdown_handler=True) with w: try: # try to kill the worker! os.kill(os.getpid(), signal.SIGUSR1) except AttributeError: raise unittest.SkipTest('signal.SIGUSR1 not found on this system') # try to kill the worker... AGAIN! t = SuicidalWorker(signal.SIGUSR1) w.add(t) w.run() # task should have stepped away from the ledge, and completed successfully despite all the SIGUSR1 signals self.assertEqual(list(self.sch.task_list('DONE', '').keys()), [t.task_id])
def test_die_for_non_unique_pending(self): class A(DummyTask): def run(self): logging.debug('running A') time.sleep(0.1) super(A, self).run() a = A() class B(DummyTask): def requires(self): return a def run(self): logging.debug('running B') super(B, self).run() b = B() sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id='X', keep_alive=True, count_uniques=True) w2 = Worker(scheduler=sch, worker_id='Y', keep_alive=True, count_uniques=True, wait_interval=0.1) self.assertTrue(w.add(b)) self.assertTrue(w2.add(b)) self.assertEqual(w._get_work()[0], 'A()') self.assertTrue(w2.run()) self.assertFalse(a.complete()) self.assertFalse(b.complete()) w2.stop()
def test_allow_reschedule_with_many_missing_deps(self): class A(Task): """ Task that must run twice to succeed """ i = luigi.IntParameter() runs = 0 def complete(self): return self.runs >= 2 def run(self): self.runs += 1 class B(Task): done = False def requires(self): return map(A, range(20)) def complete(self): return self.done def run(self): self.done = True b = B() w = Worker(scheduler=self.sch, worker_id='X', max_reschedules=1) self.assertTrue(w.add(b)) self.assertFalse(w.run()) # For b to be done, we must have rescheduled its dependencies to run them twice self.assertTrue(b.complete()) self.assertTrue(all(a.complete() for a in b.deps()))
def test_requires_exception(self): class A(DummyTask): def requires(self): raise Exception("doh") a = A() class C(DummyTask): pass c = C() class B(DummyTask): def requires(self): return a, c b = B() sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id="foo") self.assertFalse(w.add(b)) self.assertTrue(w.run()) self.assertFalse(b.has_run) self.assertTrue(c.has_run) self.assertFalse(a.has_run) w.stop()
def test_single_threaded_worker_doesnot_execute_fork_handler(self): class Stub(luigi.Task): def complete(self): return False def run(self): pass s = CentralPlannerScheduler() w = Worker(scheduler=s, worker_processes=1) w.add(Stub()) w.run() time.sleep(0.1) self.assertTrue(self.worker_queue.empty()) self.assertTrue(self.child_queue.empty())
def invoke_task(godzilla_task, task_id=None, **kwargs): print godzilla_task, task_id, kwargs task = tasks[godzilla_task] kwargs['task_id'] = task_id task = task.as_luigi(**kwargs) w = Worker(scheduler=sch) w.add(task) try: w.run() if not w.run_succeeded: raise Exception("job failed") return task.output().path finally: w.stop()
class AssistantTest(unittest.TestCase): def setUp(self): self.sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) self.w = Worker(scheduler=self.sch, worker_id='X').__enter__() self.assistant = Worker(scheduler=self.sch, worker_id='Y', assistant=True) def tearDown(self): self.w.__exit__(None, None, None) def test_get_work(self): d = Dummy2Task('123') self.w.add(d) self.assertFalse(d.complete()) self.assistant.run() self.assertTrue(d.complete()) def test_bad_job_type(self): class Dummy3Task(Dummy2Task): task_family = 'UnknownTaskFamily' d = Dummy3Task('123') self.w.add(d) self.assertFalse(d.complete()) self.assertFalse(self.assistant.run()) self.assertFalse(d.complete()) self.assertEqual(list(self.sch.task_list('FAILED', '').keys()), [str(d)]) def test_unimported_job_type(self): class NotImportedTask(luigi.Task): task_family = 'UnimportedTask' task_module = None task = NotImportedTask() # verify that it can't run the task without the module info necessary to import it self.w.add(task) self.assertFalse(self.assistant.run()) self.assertEqual(list(self.sch.task_list('FAILED', '').keys()), ['UnimportedTask()']) # check that it can import with the right module task.task_module = 'dummy_test_module.not_imported' self.w.add(task) self.assertTrue(self.assistant.run()) self.assertEqual(list(self.sch.task_list('DONE', '').keys()), ['UnimportedTask()'])
def _test_context_manager(self, force_multiprocessing): CONTEXT_MANAGER_MODULE = b''' class MyContextManager(object): def __init__(self, task_process): self.task = task_process.task def __enter__(self): assert not self.task.run_event.is_set(), "the task should not have run yet" self.task.enter_event.set() return self def __exit__(self, exc_type=None, exc_value=None, traceback=None): assert self.task.run_event.is_set(), "the task should have run" self.task.exit_event.set() ''' class DummyEventRecordingTask(luigi.Task): def __init__(self, *args, **kwargs): self.enter_event = multiprocessing.Event() self.exit_event = multiprocessing.Event() self.run_event = multiprocessing.Event() super(DummyEventRecordingTask, self).__init__(*args, **kwargs) def run(self): assert self.enter_event.is_set(), "the context manager should have been entered" assert not self.exit_event.is_set(), "the context manager should not have been exited yet" assert not self.run_event.is_set(), "the task should not have run yet" self.run_event.set() def complete(self): return self.run_event.is_set() with temporary_unloaded_module(CONTEXT_MANAGER_MODULE) as module_name: t = DummyEventRecordingTask() w = Worker(task_process_context=module_name + '.MyContextManager', force_multiprocessing=force_multiprocessing) w.add(t) self.assertTrue(w.run()) self.assertTrue(t.complete()) self.assertTrue(t.enter_event.is_set()) self.assertTrue(t.exit_event.is_set())
class WorkerEmailTest(EmailTest): def setUp(self): super(WorkerEmailTest, self).setUp() sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) self.worker = Worker(scheduler=sch, worker_id="foo") def test_connection_error(self): sch = RemoteScheduler(host="this_host_doesnt_exist", port=1337) worker = Worker(scheduler=sch) self.waits = 0 def dummy_wait(): self.waits += 1 sch._wait = dummy_wait class A(DummyTask): pass a = A() self.assertEquals(self.last_email, None) worker.add(a) self.assertEquals(self.waits, sch._attempts - 1) self.assertNotEquals(self.last_email, None) self.assertEquals(self.last_email[0], "Luigi: Framework error while scheduling %s" % (a,)) def test_complete_error(self): class A(DummyTask): def complete(self): raise Exception("b0rk") a = A() self.assertEquals(self.last_email, None) self.worker.add(a) self.assertEquals(("Luigi: %s failed scheduling" % (a,)), self.last_email[0]) self.worker.run() self.assertEquals(("Luigi: %s failed scheduling" % (a,)), self.last_email[0]) self.assertFalse(a.has_run) # We removed the non-boolean check due to bug, this should be brought back in after Christmas /nyman # def test_complete_return_value(self): # class A(DummyTask): # def complete(self): # return # # a = A() # self.assertEquals(self.last_email, None) # self.worker.add(a) # self.assertEquals(("Luigi: %s failed scheduling" % (a,)), self.last_email[0]) # self.worker.run() # self.assertEquals(("Luigi: %s failed scheduling" % (a,)), self.last_email[0]) # self.assertFalse(a.has_run) def test_run_error(self): class A(luigi.Task): def complete(self): return False def run(self): raise Exception("b0rk") a = A() self.worker.add(a) self.assertEquals(self.last_email, None) self.worker.run() self.assertEquals(("Luigi: %s FAILED" % (a,)), self.last_email[0]) def test_no_error(self): class A(DummyTask): pass a = A() self.assertEquals(self.last_email, None) self.worker.add(a) self.assertEquals(self.last_email, None) self.worker.run() self.assertEquals(self.last_email, None) self.assertTrue(a.complete())
def luigi_run(task): sch = RemoteScheduler() w = Worker(scheduler=sch) w.add(task) w.run()
def test_task_limit_not_exceeded(self): w = Worker() t = ForkBombTask(3, 2) w.add(t) w.run() self.assertTrue(t.complete())
class WorkerTest(unittest.TestCase): def setUp(self): # InstanceCache.disable() self.sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) self.w = Worker(scheduler=self.sch, worker_id='X') self.w2 = Worker(scheduler=self.sch, worker_id='Y') self.time = time.time def tearDown(self): if time.time != self.time: time.time = self.time self.w.stop() self.w2.stop() def setTime(self, t): time.time = lambda: t def test_dep(self): class A(Task): def run(self): self.has_run = True def complete(self): return self.has_run a = A() class B(Task): def requires(self): return a def run(self): self.has_run = True def complete(self): return self.has_run b = B() a.has_run = False b.has_run = False self.w.add(b) self.w.run() self.assertTrue(a.has_run) self.assertTrue(b.has_run) def test_external_dep(self): class A(ExternalTask): def complete(self): return False a = A() class B(Task): def requires(self): return a def run(self): self.has_run = True def complete(self): return self.has_run b = B() a.has_run = False b.has_run = False self.w.add(b) self.w.run() self.assertFalse(a.has_run) self.assertFalse(b.has_run) def test_fail(self): class A(Task): def run(self): self.has_run = True raise Exception() def complete(self): return self.has_run a = A() class B(Task): def requires(self): return a def run(self): self.has_run = True def complete(self): return self.has_run b = B() a.has_run = False b.has_run = False self.w.add(b) self.w.run() self.assertTrue(a.has_run) self.assertFalse(b.has_run) def test_unknown_dep(self): # see central_planner_test.CentralPlannerTest.test_remove_dep class A(ExternalTask): def complete(self): return False class C(Task): def complete(self): return True def get_b(dep): class B(Task): def requires(self): return dep def run(self): self.has_run = True def complete(self): return False b = B() b.has_run = False return b b_a = get_b(A()) b_c = get_b(C()) self.w.add(b_a) # So now another worker goes in and schedules C -> B # This should remove the dep A -> B but will screw up the first worker self.w2.add(b_c) self.w.run() # should not run anything - the worker should detect that A is broken self.assertFalse(b_a.has_run) # not sure what should happen?? # self.w2.run() # should run B since C is fulfilled # self.assertTrue(b_c.has_run) def test_interleaved_workers(self): class A(DummyTask): pass a = A() class B(DummyTask): def requires(self): return a class ExternalB(ExternalTask): task_family = "B" def complete(self): return False b = B() eb = ExternalB() self.assertEquals(eb.task_id, "B()") sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id='X') w2 = Worker(scheduler=sch, worker_id='Y') w.add(b) w2.add(eb) logging.debug("RUNNING BROKEN WORKER") w2.run() self.assertFalse(a.complete()) self.assertFalse(b.complete()) logging.debug("RUNNING FUNCTIONAL WORKER") w.run() self.assertTrue(a.complete()) self.assertTrue(b.complete()) w.stop() w2.stop() def test_interleaved_workers2(self): # two tasks without dependencies, one external, one not class B(DummyTask): pass class ExternalB(ExternalTask): task_family = "B" def complete(self): return False b = B() eb = ExternalB() self.assertEquals(eb.task_id, "B()") sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id='X') w2 = Worker(scheduler=sch, worker_id='Y') w2.add(eb) w.add(b) w2.run() self.assertFalse(b.complete()) w.run() self.assertTrue(b.complete()) w.stop() w2.stop() def test_interleaved_workers3(self): class A(DummyTask): def run(self): logging.debug('running A') time.sleep(0.1) super(A, self).run() a = A() class B(DummyTask): def requires(self): return a def run(self): logging.debug('running B') super(B, self).run() b = B() sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id='X', keep_alive=True) w2 = Worker(scheduler=sch, worker_id='Y', keep_alive=True, wait_interval=0.1) w.add(a) w2.add(b) threading.Thread(target=w.run).start() w2.run() self.assertTrue(a.complete()) self.assertTrue(b.complete()) w.stop() w2.stop() def test_complete_exception(self): "Tests that a task is still scheduled if its sister task crashes in the complete() method" class A(DummyTask): def complete(self): raise Exception("doh") a = A() class C(DummyTask): pass c = C() class B(DummyTask): def requires(self): return a, c b = B() sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id="foo") w.add(b) w.run() self.assertFalse(b.has_run) self.assertTrue(c.has_run) self.assertFalse(a.has_run) w.stop()
class WorkerTest(unittest.TestCase): def setUp(self): # InstanceCache.disable() self.sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) self.w = Worker(scheduler=self.sch, worker_id='X') self.w2 = Worker(scheduler=self.sch, worker_id='Y') self.time = time.time def tearDown(self): if time.time != self.time: time.time = self.time self.w.stop() self.w2.stop() def setTime(self, t): time.time = lambda: t def test_dep(self): class A(Task): def run(self): self.has_run = True def complete(self): return self.has_run a = A() class B(Task): def requires(self): return a def run(self): self.has_run = True def complete(self): return self.has_run b = B() a.has_run = False b.has_run = False self.assertTrue(self.w.add(b)) self.assertTrue(self.w.run()) self.assertTrue(a.has_run) self.assertTrue(b.has_run) def test_stop_getting_new_work(self): d = DummyTask() self.w.add(d) self.assertFalse(d.complete()) self.w.handle_interrupt(signal.SIGUSR1, None) self.w.run() self.assertFalse(d.complete()) def test_external_dep(self): class A(ExternalTask): def complete(self): return False a = A() class B(Task): def requires(self): return a def run(self): self.has_run = True def complete(self): return self.has_run b = B() a.has_run = False b.has_run = False self.assertTrue(self.w.add(b)) self.assertTrue(self.w.run()) self.assertFalse(a.has_run) self.assertFalse(b.has_run) def test_fail(self): class A(Task): def run(self): self.has_run = True raise Exception() def complete(self): return self.has_run a = A() class B(Task): def requires(self): return a def run(self): self.has_run = True def complete(self): return self.has_run b = B() a.has_run = False b.has_run = False self.assertTrue(self.w.add(b)) self.assertFalse(self.w.run()) self.assertTrue(a.has_run) self.assertFalse(b.has_run) def test_unknown_dep(self): # see central_planner_test.CentralPlannerTest.test_remove_dep class A(ExternalTask): def complete(self): return False class C(Task): def complete(self): return True def get_b(dep): class B(Task): def requires(self): return dep def run(self): self.has_run = True def complete(self): return False b = B() b.has_run = False return b b_a = get_b(A()) b_c = get_b(C()) self.assertTrue(self.w.add(b_a)) # So now another worker goes in and schedules C -> B # This should remove the dep A -> B but will screw up the first worker self.assertTrue(self.w2.add(b_c)) self.assertFalse(self.w.run()) # should not run anything - the worker should detect that A is broken self.assertFalse(b_a.has_run) # not sure what should happen?? # self.w2.run() # should run B since C is fulfilled # self.assertTrue(b_c.has_run) def test_unfulfilled_dep(self): class A(Task): def complete(self): return self.done def run(self): self.done = True def get_b(a): class B(A): def requires(self): return a b = B() b.done = False a.done = True return b a = A() b = get_b(a) self.assertTrue(self.w.add(b)) a.done = False self.w.run() self.assertTrue(a.complete()) self.assertTrue(b.complete()) def test_avoid_infinite_reschedule(self): class A(Task): def complete(self): return False class B(Task): def complete(self): return False def requires(self): return A() self.assertTrue(self.w.add(B())) self.assertFalse(self.w.run()) def test_allow_reschedule_with_many_missing_deps(self): class A(Task): """ Task that must run twice to succeed """ i = luigi.IntParameter() runs = 0 def complete(self): return self.runs >= 2 def run(self): self.runs += 1 class B(Task): done = False def requires(self): return map(A, range(20)) def complete(self): return self.done def run(self): self.done = True b = B() w = Worker(scheduler=self.sch, worker_id='X', max_reschedules=1) self.assertTrue(w.add(b)) self.assertFalse(w.run()) # For b to be done, we must have rescheduled its dependencies to run them twice self.assertTrue(b.complete()) self.assertTrue(all(a.complete() for a in b.deps())) def test_interleaved_workers(self): class A(DummyTask): pass a = A() class B(DummyTask): def requires(self): return a class ExternalB(ExternalTask): task_family = "B" def complete(self): return False b = B() eb = ExternalB() self.assertEqual(eb.task_id, "B()") sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id='X') w2 = Worker(scheduler=sch, worker_id='Y') self.assertTrue(w.add(b)) self.assertTrue(w2.add(eb)) logging.debug("RUNNING BROKEN WORKER") self.assertTrue(w2.run()) self.assertFalse(a.complete()) self.assertFalse(b.complete()) logging.debug("RUNNING FUNCTIONAL WORKER") self.assertTrue(w.run()) self.assertTrue(a.complete()) self.assertTrue(b.complete()) w.stop() w2.stop() def test_interleaved_workers2(self): # two tasks without dependencies, one external, one not class B(DummyTask): pass class ExternalB(ExternalTask): task_family = "B" def complete(self): return False b = B() eb = ExternalB() self.assertEqual(eb.task_id, "B()") sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id='X') w2 = Worker(scheduler=sch, worker_id='Y') self.assertTrue(w2.add(eb)) self.assertTrue(w.add(b)) self.assertTrue(w2.run()) self.assertFalse(b.complete()) self.assertTrue(w.run()) self.assertTrue(b.complete()) w.stop() w2.stop() def test_interleaved_workers3(self): class A(DummyTask): def run(self): logging.debug('running A') time.sleep(0.1) super(A, self).run() a = A() class B(DummyTask): def requires(self): return a def run(self): logging.debug('running B') super(B, self).run() b = B() sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id='X', keep_alive=True, count_uniques=True) w2 = Worker(scheduler=sch, worker_id='Y', keep_alive=True, count_uniques=True, wait_interval=0.1) self.assertTrue(w.add(a)) self.assertTrue(w2.add(b)) threading.Thread(target=w.run).start() self.assertTrue(w2.run()) self.assertTrue(a.complete()) self.assertTrue(b.complete()) w.stop() w2.stop() def test_die_for_non_unique_pending(self): class A(DummyTask): def run(self): logging.debug('running A') time.sleep(0.1) super(A, self).run() a = A() class B(DummyTask): def requires(self): return a def run(self): logging.debug('running B') super(B, self).run() b = B() sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id='X', keep_alive=True, count_uniques=True) w2 = Worker(scheduler=sch, worker_id='Y', keep_alive=True, count_uniques=True, wait_interval=0.1) self.assertTrue(w.add(b)) self.assertTrue(w2.add(b)) self.assertEqual(w._get_work()[0], 'A()') self.assertTrue(w2.run()) self.assertFalse(a.complete()) self.assertFalse(b.complete()) w2.stop() def test_complete_exception(self): "Tests that a task is still scheduled if its sister task crashes in the complete() method" class A(DummyTask): def complete(self): raise Exception("doh") a = A() class C(DummyTask): pass c = C() class B(DummyTask): def requires(self): return a, c b = B() sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id="foo") self.assertFalse(w.add(b)) self.assertTrue(w.run()) self.assertFalse(b.has_run) self.assertTrue(c.has_run) self.assertFalse(a.has_run) w.stop() def test_requires_exception(self): class A(DummyTask): def requires(self): raise Exception("doh") a = A() class C(DummyTask): pass c = C() class B(DummyTask): def requires(self): return a, c b = B() sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id="foo") self.assertFalse(w.add(b)) self.assertTrue(w.run()) self.assertFalse(b.has_run) self.assertTrue(c.has_run) self.assertFalse(a.has_run) w.stop()
def test_no_task_limit(self): w = Worker() t = ForkBombTask(4, 2) w.add(t) w.run() self.assertTrue(t.complete())
class WorkerEmailTest(EmailTest): def setUp(self): super(WorkerEmailTest, self).setUp() sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) self.worker = Worker(scheduler=sch, worker_id="foo") def tearDown(self): self.worker.stop() @with_config(EMAIL_CONFIG) def test_connection_error(self): sch = RemoteScheduler(host="this_host_doesnt_exist", port=1337) worker = Worker(scheduler=sch) self.waits = 0 def dummy_wait(): self.waits += 1 sch._wait = dummy_wait class A(DummyTask): pass a = A() self.assertEquals(self.last_email, None) worker.add(a) self.assertEquals(self.waits, 2) # should attempt to add it 3 times self.assertNotEquals(self.last_email, None) self.assertEquals(self.last_email[0], "Luigi: Framework error while scheduling %s" % (a,)) worker.stop() @with_config(EMAIL_CONFIG) def test_complete_error(self): class A(DummyTask): def complete(self): raise Exception("b0rk") a = A() self.assertEquals(self.last_email, None) self.worker.add(a) self.assertEquals(("Luigi: %s failed scheduling" % (a,)), self.last_email[0]) self.worker.run() self.assertEquals(("Luigi: %s failed scheduling" % (a,)), self.last_email[0]) self.assertFalse(a.has_run) @with_config(EMAIL_CONFIG) def test_complete_return_value(self): class A(DummyTask): def complete(self): pass # no return value should be an error a = A() self.assertEquals(self.last_email, None) self.worker.add(a) self.assertEquals(("Luigi: %s failed scheduling" % (a,)), self.last_email[0]) self.worker.run() self.assertEquals(("Luigi: %s failed scheduling" % (a,)), self.last_email[0]) self.assertFalse(a.has_run) @with_config(EMAIL_CONFIG) def test_run_error(self): class A(luigi.Task): def complete(self): return False def run(self): raise Exception("b0rk") a = A() self.worker.add(a) self.assertEquals(self.last_email, None) self.worker.run() self.assertEquals(("Luigi: %s FAILED" % (a,)), self.last_email[0]) def test_no_error(self): class A(DummyTask): pass a = A() self.assertEquals(self.last_email, None) self.worker.add(a) self.assertEquals(self.last_email, None) self.worker.run() self.assertEquals(self.last_email, None) self.assertTrue(a.complete())
class ExceptionFormatTest(unittest.TestCase): def setUp(self): self.sch = CentralPlannerScheduler() self.w = Worker(scheduler=self.sch) def tear_down(self): self.w.stop() def test_fail_run(self): task = FailRunTask(foo='foo', bar='bar') self._run_task(task) def test_fail_run_html(self): task = FailRunTask(foo='foo', bar='bar') self._run_task_html(task) def test_fail_schedule(self): task = FailSchedulingTask(foo='foo', bar='bar') self._run_task(task) def test_fail_schedule_html(self): task = FailSchedulingTask(foo='foo', bar='bar') self._run_task_html(task) @with_config({'core': {'error-email': '*****@*****.**', 'email-prefix': '[TEST] '}}) @mock.patch('luigi.notifications.send_error_email') def _run_task(self, task, mock_send): self.w.add(task) self.w.run() self.assertEqual(mock_send.call_count, 1) args, kwargs = mock_send.call_args self._check_subject(args[0], task) self._check_body(args[1], task, html=False) @with_config({'core': {'error-email': '*****@*****.**', 'email-prefix': '[TEST] ', 'email-type': 'html'}}) @mock.patch('luigi.notifications.send_error_email') def _run_task_html(self, task, mock_send): self.w.add(task) self.w.run() self.assertEqual(mock_send.call_count, 1) args, kwargs = mock_send.call_args self._check_subject(args[0], task) self._check_body(args[1], task, html=True) def _check_subject(self, subject, task): self.assertIn(task.task_id, subject) def _check_body(self, body, task, html=False): if html: self.assertIn('<th>name</th><td>{}</td>'.format(task.task_family), body) self.assertIn('<div class="highlight"', body) self.assertIn('Oops!', body) for param, value in task.param_kwargs.items(): self.assertIn('<th>{}</th><td>{}</td>'.format(param, value), body) else: self.assertIn('Name: {}\n'.format(task.task_family), body) self.assertIn('Parameters:\n', body) self.assertIn('TestException: Oops!', body) for param, value in task.param_kwargs.items(): self.assertIn('{}: {}\n'.format(param, value), body) @with_config({"core": {"error-email": "[email protected]"}}) def testEmailRecipients(self): six.assertCountEqual(self, notifications._email_recipients(), ["[email protected]"]) six.assertCountEqual(self, notifications._email_recipients("[email protected]"), ["[email protected]", "[email protected]"]) six.assertCountEqual(self, notifications._email_recipients(["[email protected]", "[email protected]"]), ["[email protected]", "[email protected]", "[email protected]"]) @with_config({"core": {}}, replace_sections=True) def testEmailRecipientsNoConfig(self): six.assertCountEqual(self, notifications._email_recipients(), []) six.assertCountEqual(self, notifications._email_recipients("[email protected]"), ["[email protected]"]) six.assertCountEqual(self, notifications._email_recipients(["[email protected]", "[email protected]"]), ["[email protected]", "[email protected]"])
class WorkerTest(unittest.TestCase): def setUp(self): # InstanceCache.disable() self.sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) self.w = Worker(scheduler=self.sch, worker_id='X') self.w2 = Worker(scheduler=self.sch, worker_id='Y') self.time = time.time def tearDown(self): if time.time != self.time: time.time = self.time self.w.stop() self.w2.stop() def setTime(self, t): time.time = lambda: t def test_dep(self): class A(Task): def run(self): self.has_run = True def complete(self): return self.has_run a = A() class B(Task): def requires(self): return a def run(self): self.has_run = True def complete(self): return self.has_run b = B() a.has_run = False b.has_run = False self.assertTrue(self.w.add(b)) self.assertTrue(self.w.run()) self.assertTrue(a.has_run) self.assertTrue(b.has_run) def test_stop_getting_new_work(self): d = DummyTask() self.w.add(d) self.assertFalse(d.complete()) try: self.w.handle_interrupt(signal.SIGUSR1, None) except AttributeError: raise unittest.SkipTest('signal.SIGUSR1 not found on this system') self.w.run() self.assertFalse(d.complete()) def test_external_dep(self): class A(ExternalTask): def complete(self): return False a = A() class B(Task): def requires(self): return a def run(self): self.has_run = True def complete(self): return self.has_run b = B() a.has_run = False b.has_run = False self.assertTrue(self.w.add(b)) self.assertTrue(self.w.run()) self.assertFalse(a.has_run) self.assertFalse(b.has_run) def test_tracking_url(self): tracking_url = 'http://test_url.com/' class A(Task): has_run = False def complete(self): return self.has_run def run(self, tracking_url_callback=None): if tracking_url_callback is not None: tracking_url_callback(tracking_url) self.has_run = True a = A() self.assertTrue(self.w.add(a)) self.assertTrue(self.w.run()) tasks = self.sch.task_list('DONE', '') self.assertEqual(1, len(tasks)) self.assertEqual(tracking_url, tasks['A()']['tracking_url']) def test_type_error_in_tracking_run(self): class A(Task): num_runs = 0 def complete(self): return False def run(self, tracking_url_callback=None): self.num_runs += 1 raise TypeError('bad type') a = A() self.assertTrue(self.w.add(a)) self.assertFalse(self.w.run()) # Should only run and fail once, not retry because of the type error self.assertEqual(1, a.num_runs) def test_fail(self): class CustomException(BaseException): def __init__(self, msg): self.msg = msg class A(Task): def run(self): self.has_run = True raise CustomException('bad things') def complete(self): return self.has_run a = A() class B(Task): def requires(self): return a def run(self): self.has_run = True def complete(self): return self.has_run b = B() a.has_run = False b.has_run = False self.assertTrue(self.w.add(b)) self.assertFalse(self.w.run()) self.assertTrue(a.has_run) self.assertFalse(b.has_run) def test_unknown_dep(self): # see central_planner_test.CentralPlannerTest.test_remove_dep class A(ExternalTask): def complete(self): return False class C(Task): def complete(self): return True def get_b(dep): class B(Task): def requires(self): return dep def run(self): self.has_run = True def complete(self): return False b = B() b.has_run = False return b b_a = get_b(A()) b_c = get_b(C()) self.assertTrue(self.w.add(b_a)) # So now another worker goes in and schedules C -> B # This should remove the dep A -> B but will screw up the first worker self.assertTrue(self.w2.add(b_c)) self.assertFalse(self.w.run()) # should not run anything - the worker should detect that A is broken self.assertFalse(b_a.has_run) # not sure what should happen?? # self.w2.run() # should run B since C is fulfilled # self.assertTrue(b_c.has_run) def test_unfulfilled_dep(self): class A(Task): def complete(self): return self.done def run(self): self.done = True def get_b(a): class B(A): def requires(self): return a b = B() b.done = False a.done = True return b a = A() b = get_b(a) self.assertTrue(self.w.add(b)) a.done = False self.w.run() self.assertTrue(a.complete()) self.assertTrue(b.complete()) def test_gets_missed_work(self): class A(Task): done = False def complete(self): return self.done def run(self): self.done = True a = A() self.assertTrue(self.w.add(a)) # simulate a missed get_work response self.assertEqual('A()', self.sch.get_work(worker='X')['task_id']) self.assertTrue(self.w.run()) self.assertTrue(a.complete()) def test_avoid_infinite_reschedule(self): class A(Task): def complete(self): return False class B(Task): def complete(self): return False def requires(self): return A() self.assertTrue(self.w.add(B())) self.assertFalse(self.w.run()) def test_fails_registering_signal(self): with mock.patch('luigi.worker.signal', spec=['signal']): # mock will raise an attribute error getting signal.SIGUSR1 Worker() def test_allow_reschedule_with_many_missing_deps(self): class A(Task): """ Task that must run twice to succeed """ i = luigi.IntParameter() runs = 0 def complete(self): return self.runs >= 2 def run(self): self.runs += 1 class B(Task): done = False def requires(self): return map(A, range(20)) def complete(self): return self.done def run(self): self.done = True b = B() w = Worker(scheduler=self.sch, worker_id='X', max_reschedules=1) self.assertTrue(w.add(b)) self.assertFalse(w.run()) # For b to be done, we must have rescheduled its dependencies to run them twice self.assertTrue(b.complete()) self.assertTrue(all(a.complete() for a in b.deps())) def test_interleaved_workers(self): class A(DummyTask): pass a = A() class B(DummyTask): def requires(self): return a class ExternalB(ExternalTask): task_family = "B" def complete(self): return False b = B() eb = ExternalB() self.assertEqual(eb.task_id, "B()") sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id='X') w2 = Worker(scheduler=sch, worker_id='Y') self.assertTrue(w.add(b)) self.assertTrue(w2.add(eb)) logging.debug("RUNNING BROKEN WORKER") self.assertTrue(w2.run()) self.assertFalse(a.complete()) self.assertFalse(b.complete()) logging.debug("RUNNING FUNCTIONAL WORKER") self.assertTrue(w.run()) self.assertTrue(a.complete()) self.assertTrue(b.complete()) w.stop() w2.stop() def test_interleaved_workers2(self): # two tasks without dependencies, one external, one not class B(DummyTask): pass class ExternalB(ExternalTask): task_family = "B" def complete(self): return False b = B() eb = ExternalB() self.assertEqual(eb.task_id, "B()") sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id='X') w2 = Worker(scheduler=sch, worker_id='Y') self.assertTrue(w2.add(eb)) self.assertTrue(w.add(b)) self.assertTrue(w2.run()) self.assertFalse(b.complete()) self.assertTrue(w.run()) self.assertTrue(b.complete()) w.stop() w2.stop() def test_interleaved_workers3(self): class A(DummyTask): def run(self): logging.debug('running A') time.sleep(0.1) super(A, self).run() a = A() class B(DummyTask): def requires(self): return a def run(self): logging.debug('running B') super(B, self).run() b = B() sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id='X', keep_alive=True, count_uniques=True) w2 = Worker(scheduler=sch, worker_id='Y', keep_alive=True, count_uniques=True, wait_interval=0.1) self.assertTrue(w.add(a)) self.assertTrue(w2.add(b)) threading.Thread(target=w.run).start() self.assertTrue(w2.run()) self.assertTrue(a.complete()) self.assertTrue(b.complete()) w.stop() w2.stop() def test_die_for_non_unique_pending(self): class A(DummyTask): def run(self): logging.debug('running A') time.sleep(0.1) super(A, self).run() a = A() class B(DummyTask): def requires(self): return a def run(self): logging.debug('running B') super(B, self).run() b = B() sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id='X', keep_alive=True, count_uniques=True) w2 = Worker(scheduler=sch, worker_id='Y', keep_alive=True, count_uniques=True, wait_interval=0.1) self.assertTrue(w.add(b)) self.assertTrue(w2.add(b)) self.assertEqual(w._get_work()[0], 'A()') self.assertTrue(w2.run()) self.assertFalse(a.complete()) self.assertFalse(b.complete()) w2.stop() def test_complete_exception(self): "Tests that a task is still scheduled if its sister task crashes in the complete() method" class A(DummyTask): def complete(self): raise Exception("doh") a = A() class C(DummyTask): pass c = C() class B(DummyTask): def requires(self): return a, c b = B() sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id="foo") self.assertFalse(w.add(b)) self.assertTrue(w.run()) self.assertFalse(b.has_run) self.assertTrue(c.has_run) self.assertFalse(a.has_run) w.stop() def test_requires_exception(self): class A(DummyTask): def requires(self): raise Exception("doh") a = A() class D(DummyTask): pass d = D() class C(DummyTask): def requires(self): return d c = C() class B(DummyTask): def requires(self): return c, a b = B() sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id="foo") self.assertFalse(w.add(b)) self.assertTrue(w.run()) self.assertFalse(b.has_run) self.assertTrue(c.has_run) self.assertTrue(d.has_run) self.assertFalse(a.has_run) w.stop()
class WorkerEmailTest(unittest.TestCase): def setUp(self): super(WorkerEmailTest, self).setUp() sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) self.worker = Worker(scheduler=sch, worker_id="foo") def tearDown(self): self.worker.stop() @email_patch def test_connection_error(self, emails): sch = RemoteScheduler('http://tld.invalid:1337', connect_timeout=1) worker = Worker(scheduler=sch) self.waits = 0 def dummy_wait(): self.waits += 1 sch._wait = dummy_wait class A(DummyTask): pass a = A() self.assertEqual(emails, []) worker.add(a) self.assertEqual(self.waits, 2) # should attempt to add it 3 times self.assertNotEquals(emails, []) self.assertTrue(emails[0].find("Luigi: Framework error while scheduling %s" % (a,)) != -1) worker.stop() @email_patch def test_complete_error(self, emails): class A(DummyTask): def complete(self): raise Exception("b0rk") a = A() self.assertEqual(emails, []) self.worker.add(a) self.assertTrue(emails[0].find("Luigi: %s failed scheduling" % (a,)) != -1) self.worker.run() self.assertTrue(emails[0].find("Luigi: %s failed scheduling" % (a,)) != -1) self.assertFalse(a.has_run) @email_patch def test_complete_return_value(self, emails): class A(DummyTask): def complete(self): pass # no return value should be an error a = A() self.assertEqual(emails, []) self.worker.add(a) self.assertTrue(emails[0].find("Luigi: %s failed scheduling" % (a,)) != -1) self.worker.run() self.assertTrue(emails[0].find("Luigi: %s failed scheduling" % (a,)) != -1) self.assertFalse(a.has_run) @email_patch def test_run_error(self, emails): class A(luigi.Task): def complete(self): return False def run(self): raise Exception("b0rk") a = A() self.worker.add(a) self.assertEqual(emails, []) self.worker.run() self.assertTrue(emails[0].find("Luigi: %s FAILED" % (a,)) != -1) @email_patch def test_no_error(self, emails): class A(DummyTask): pass a = A() self.assertEqual(emails, []) self.worker.add(a) self.assertEqual(emails, []) self.worker.run() self.assertEqual(emails, []) self.assertTrue(a.complete())
class WorkerEmailTest(unittest.TestCase): def setUp(self): super(WorkerEmailTest, self).setUp() sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) self.worker = Worker(scheduler=sch, worker_id="foo").__enter__() def tearDown(self): self.worker.__exit__(None, None, None) @email_patch def test_connection_error(self, emails): sch = RemoteScheduler('http://tld.invalid:1337', connect_timeout=1) self.waits = 0 def dummy_wait(): self.waits += 1 sch._wait = dummy_wait class A(DummyTask): pass a = A() self.assertEqual(emails, []) with Worker(scheduler=sch) as worker: worker.add(a) self.assertEqual(self.waits, 2) # should attempt to add it 3 times self.assertNotEqual(emails, []) self.assertTrue(emails[0].find("Luigi: Framework error while scheduling %s" % (a,)) != -1) @email_patch def test_complete_error(self, emails): class A(DummyTask): def complete(self): raise Exception("b0rk") a = A() self.assertEqual(emails, []) self.worker.add(a) self.assertTrue(emails[0].find("Luigi: %s failed scheduling" % (a,)) != -1) self.worker.run() self.assertTrue(emails[0].find("Luigi: %s failed scheduling" % (a,)) != -1) self.assertFalse(a.has_run) @email_patch def test_requires_error(self, emails): class A(DummyTask): def requires(self): raise Exception("b0rk") a = A() self.assertEqual(emails, []) self.worker.add(a) self.assertTrue(emails[0].find("Luigi: %s failed scheduling" % (a,)) != -1) self.worker.run() self.assertFalse(a.has_run) @email_patch def test_complete_return_value(self, emails): class A(DummyTask): def complete(self): pass # no return value should be an error a = A() self.assertEqual(emails, []) self.worker.add(a) self.assertTrue(emails[0].find("Luigi: %s failed scheduling" % (a,)) != -1) self.worker.run() self.assertTrue(emails[0].find("Luigi: %s failed scheduling" % (a,)) != -1) self.assertFalse(a.has_run) @email_patch def test_run_error(self, emails): class A(luigi.Task): def complete(self): return False def run(self): raise Exception("b0rk") a = A() self.worker.add(a) self.assertEqual(emails, []) self.worker.run() self.assertTrue(emails[0].find("Luigi: %s FAILED" % (a,)) != -1) @email_patch def test_no_error(self, emails): class A(DummyTask): pass a = A() self.assertEqual(emails, []) self.worker.add(a) self.assertEqual(emails, []) self.worker.run() self.assertEqual(emails, []) self.assertTrue(a.complete()) @custom_email_patch({"core": {"error-email": "not-a-real-email-address-for-test-only", 'email-type': 'none'}}) def test_disable_emails(self, emails): class A(luigi.Task): def complete(self): raise Exception("b0rk") self.worker.add(A()) self.assertEqual(emails, [])
class WorkerTest(unittest.TestCase): def setUp(self): # InstanceCache.disable() self.sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) self.w = Worker(scheduler=self.sch, worker_id='X') self.w2 = Worker(scheduler=self.sch, worker_id='Y') self.time = time.time def tearDown(self): if time.time != self.time: time.time = self.time self.w.stop() self.w2.stop() def setTime(self, t): time.time = lambda: t def test_dep(self): class A(Task): def run(self): self.has_run = True def complete(self): return self.has_run a = A() class B(Task): def requires(self): return a def run(self): self.has_run = True def complete(self): return self.has_run b = B() a.has_run = False b.has_run = False self.assertTrue(self.w.add(b)) self.assertTrue(self.w.run()) self.assertTrue(a.has_run) self.assertTrue(b.has_run) def test_stop_getting_new_work(self): d = DummyTask() self.w.add(d) self.assertFalse(d.complete()) self.w.handle_interrupt(signal.SIGUSR1, None) self.w.run() self.assertFalse(d.complete()) def test_external_dep(self): class A(ExternalTask): def complete(self): return False a = A() class B(Task): def requires(self): return a def run(self): self.has_run = True def complete(self): return self.has_run b = B() a.has_run = False b.has_run = False self.assertTrue(self.w.add(b)) self.assertTrue(self.w.run()) self.assertFalse(a.has_run) self.assertFalse(b.has_run) def test_fail(self): class A(Task): def run(self): self.has_run = True raise Exception() def complete(self): return self.has_run a = A() class B(Task): def requires(self): return a def run(self): self.has_run = True def complete(self): return self.has_run b = B() a.has_run = False b.has_run = False self.assertTrue(self.w.add(b)) self.assertFalse(self.w.run()) self.assertTrue(a.has_run) self.assertFalse(b.has_run) def test_unknown_dep(self): # see central_planner_test.CentralPlannerTest.test_remove_dep class A(ExternalTask): def complete(self): return False class C(Task): def complete(self): return True def get_b(dep): class B(Task): def requires(self): return dep def run(self): self.has_run = True def complete(self): return False b = B() b.has_run = False return b b_a = get_b(A()) b_c = get_b(C()) self.assertTrue(self.w.add(b_a)) # So now another worker goes in and schedules C -> B # This should remove the dep A -> B but will screw up the first worker self.assertTrue(self.w2.add(b_c)) self.assertFalse( self.w.run() ) # should not run anything - the worker should detect that A is broken self.assertFalse(b_a.has_run) # not sure what should happen?? # self.w2.run() # should run B since C is fulfilled # self.assertTrue(b_c.has_run) def test_unfulfilled_dep(self): class A(Task): def complete(self): return self.done def run(self): self.done = True def get_b(a): class B(A): def requires(self): return a b = B() b.done = False a.done = True return b a = A() b = get_b(a) self.assertTrue(self.w.add(b)) a.done = False self.w.run() self.assertTrue(a.complete()) self.assertTrue(b.complete()) def test_avoid_infinite_reschedule(self): class A(Task): def complete(self): return False class B(Task): def complete(self): return False def requires(self): return A() self.assertTrue(self.w.add(B())) self.assertFalse(self.w.run()) def test_allow_reschedule_with_many_missing_deps(self): class A(Task): """ Task that must run twice to succeed """ i = luigi.IntParameter() runs = 0 def complete(self): return self.runs >= 2 def run(self): self.runs += 1 class B(Task): done = False def requires(self): return map(A, range(20)) def complete(self): return self.done def run(self): self.done = True b = B() w = Worker(scheduler=self.sch, worker_id='X', max_reschedules=1) self.assertTrue(w.add(b)) self.assertFalse(w.run()) # For b to be done, we must have rescheduled its dependencies to run them twice self.assertTrue(b.complete()) self.assertTrue(all(a.complete() for a in b.deps())) def test_interleaved_workers(self): class A(DummyTask): pass a = A() class B(DummyTask): def requires(self): return a class ExternalB(ExternalTask): task_family = "B" def complete(self): return False b = B() eb = ExternalB() self.assertEqual(eb.task_id, "B()") sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id='X') w2 = Worker(scheduler=sch, worker_id='Y') self.assertTrue(w.add(b)) self.assertTrue(w2.add(eb)) logging.debug("RUNNING BROKEN WORKER") self.assertTrue(w2.run()) self.assertFalse(a.complete()) self.assertFalse(b.complete()) logging.debug("RUNNING FUNCTIONAL WORKER") self.assertTrue(w.run()) self.assertTrue(a.complete()) self.assertTrue(b.complete()) w.stop() w2.stop() def test_interleaved_workers2(self): # two tasks without dependencies, one external, one not class B(DummyTask): pass class ExternalB(ExternalTask): task_family = "B" def complete(self): return False b = B() eb = ExternalB() self.assertEqual(eb.task_id, "B()") sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id='X') w2 = Worker(scheduler=sch, worker_id='Y') self.assertTrue(w2.add(eb)) self.assertTrue(w.add(b)) self.assertTrue(w2.run()) self.assertFalse(b.complete()) self.assertTrue(w.run()) self.assertTrue(b.complete()) w.stop() w2.stop() def test_interleaved_workers3(self): class A(DummyTask): def run(self): logging.debug('running A') time.sleep(0.1) super(A, self).run() a = A() class B(DummyTask): def requires(self): return a def run(self): logging.debug('running B') super(B, self).run() b = B() sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id='X', keep_alive=True, count_uniques=True) w2 = Worker(scheduler=sch, worker_id='Y', keep_alive=True, count_uniques=True, wait_interval=0.1) self.assertTrue(w.add(a)) self.assertTrue(w2.add(b)) threading.Thread(target=w.run).start() self.assertTrue(w2.run()) self.assertTrue(a.complete()) self.assertTrue(b.complete()) w.stop() w2.stop() def test_die_for_non_unique_pending(self): class A(DummyTask): def run(self): logging.debug('running A') time.sleep(0.1) super(A, self).run() a = A() class B(DummyTask): def requires(self): return a def run(self): logging.debug('running B') super(B, self).run() b = B() sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id='X', keep_alive=True, count_uniques=True) w2 = Worker(scheduler=sch, worker_id='Y', keep_alive=True, count_uniques=True, wait_interval=0.1) self.assertTrue(w.add(b)) self.assertTrue(w2.add(b)) self.assertEqual(w._get_work()[0], 'A()') self.assertTrue(w2.run()) self.assertFalse(a.complete()) self.assertFalse(b.complete()) w2.stop() def test_complete_exception(self): "Tests that a task is still scheduled if its sister task crashes in the complete() method" class A(DummyTask): def complete(self): raise Exception("doh") a = A() class C(DummyTask): pass c = C() class B(DummyTask): def requires(self): return a, c b = B() sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id="foo") self.assertFalse(w.add(b)) self.assertTrue(w.run()) self.assertFalse(b.has_run) self.assertTrue(c.has_run) self.assertFalse(a.has_run) w.stop() def test_requires_exception(self): class A(DummyTask): def requires(self): raise Exception("doh") a = A() class C(DummyTask): pass c = C() class B(DummyTask): def requires(self): return a, c b = B() sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id="foo") self.assertFalse(w.add(b)) self.assertTrue(w.run()) self.assertFalse(b.has_run) self.assertTrue(c.has_run) self.assertFalse(a.has_run) w.stop()
class WorkerEmailTest(unittest.TestCase): def setUp(self): super(WorkerEmailTest, self).setUp() sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) self.worker = Worker(scheduler=sch, worker_id="foo") def tearDown(self): self.worker.stop() @email_patch def test_connection_error(self, emails): sch = RemoteScheduler(host="this_host_doesnt_exist", port=1337, connect_timeout=1) worker = Worker(scheduler=sch) self.waits = 0 def dummy_wait(): self.waits += 1 sch._wait = dummy_wait class A(DummyTask): pass a = A() self.assertEqual(emails, []) worker.add(a) self.assertEqual(self.waits, 2) # should attempt to add it 3 times self.assertNotEquals(emails, []) self.assertTrue( emails[0].find("Luigi: Framework error while scheduling %s" % (a, )) != -1) worker.stop() @email_patch def test_complete_error(self, emails): class A(DummyTask): def complete(self): raise Exception("b0rk") a = A() self.assertEqual(emails, []) self.worker.add(a) self.assertTrue(emails[0].find("Luigi: %s failed scheduling" % (a, )) != -1) self.worker.run() self.assertTrue(emails[0].find("Luigi: %s failed scheduling" % (a, )) != -1) self.assertFalse(a.has_run) @email_patch def test_complete_return_value(self, emails): class A(DummyTask): def complete(self): pass # no return value should be an error a = A() self.assertEqual(emails, []) self.worker.add(a) self.assertTrue(emails[0].find("Luigi: %s failed scheduling" % (a, )) != -1) self.worker.run() self.assertTrue(emails[0].find("Luigi: %s failed scheduling" % (a, )) != -1) self.assertFalse(a.has_run) @email_patch def test_run_error(self, emails): class A(luigi.Task): def complete(self): return False def run(self): raise Exception("b0rk") a = A() self.worker.add(a) self.assertEqual(emails, []) self.worker.run() self.assertTrue(emails[0].find("Luigi: %s FAILED" % (a, )) != -1) @email_patch def test_no_error(self, emails): class A(DummyTask): pass a = A() self.assertEqual(emails, []) self.worker.add(a) self.assertEqual(emails, []) self.worker.run() self.assertEqual(emails, []) self.assertTrue(a.complete())
class WorkerTest(unittest.TestCase): def setUp(self): # InstanceCache.disable() self.sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) self.w = Worker(scheduler=self.sch, worker_id='X') self.w2 = Worker(scheduler=self.sch, worker_id='Y') self.time = time.time def tearDown(self): if time.time != self.time: time.time = self.time self.w.stop() self.w2.stop() def setTime(self, t): time.time = lambda: t def test_dep(self): class A(Task): def run(self): self.has_run = True def complete(self): return self.has_run a = A() class B(Task): def requires(self): return a def run(self): self.has_run = True def complete(self): return self.has_run b = B() a.has_run = False b.has_run = False self.assertTrue(self.w.add(b)) self.assertTrue(self.w.run()) self.assertTrue(a.has_run) self.assertTrue(b.has_run) def test_external_dep(self): class A(ExternalTask): def complete(self): return False a = A() class B(Task): def requires(self): return a def run(self): self.has_run = True def complete(self): return self.has_run b = B() a.has_run = False b.has_run = False self.assertTrue(self.w.add(b)) self.assertTrue(self.w.run()) self.assertFalse(a.has_run) self.assertFalse(b.has_run) def test_fail(self): class A(Task): def run(self): self.has_run = True raise Exception() def complete(self): return self.has_run a = A() class B(Task): def requires(self): return a def run(self): self.has_run = True def complete(self): return self.has_run b = B() a.has_run = False b.has_run = False self.assertTrue(self.w.add(b)) self.assertFalse(self.w.run()) self.assertTrue(a.has_run) self.assertFalse(b.has_run) def test_unknown_dep(self): # see central_planner_test.CentralPlannerTest.test_remove_dep class A(ExternalTask): def complete(self): return False class C(Task): def complete(self): return True def get_b(dep): class B(Task): def requires(self): return dep def run(self): self.has_run = True def complete(self): return False b = B() b.has_run = False return b b_a = get_b(A()) b_c = get_b(C()) self.assertTrue(self.w.add(b_a)) # So now another worker goes in and schedules C -> B # This should remove the dep A -> B but will screw up the first worker self.assertTrue(self.w2.add(b_c)) self.assertFalse(self.w.run()) # should not run anything - the worker should detect that A is broken self.assertFalse(b_a.has_run) # not sure what should happen?? # self.w2.run() # should run B since C is fulfilled # self.assertTrue(b_c.has_run) def test_unfulfilled_dep(self): class A(Task): def complete(self): return self.done def run(self): self.done = True def get_b(a): class B(A): def requires(self): return a b = B() b.done = False a.done = True return b a = A() b = get_b(a) self.assertTrue(self.w.add(b)) a.done = False self.w.run() self.assertTrue(a.complete()) self.assertTrue(b.complete()) def test_dynamic_dependencies(self): class DynamicRequires(Task): p = luigi.Parameter() def output(self): return luigi.LocalTarget(os.path.join(self.p, 'parent')) def run(self): dummy_targets = yield [DynamicDummyTask(os.path.join(self.p, str(i))) for i in range(5)] dummy_targets += yield [DynamicDummyTask(os.path.join(self.p, str(i))) for i in range(5, 7)] with self.output().open('w') as f: for i, d in enumerate(dummy_targets): for line in d.open('r'): print >>f, '%d: %s' % (i, line.strip()) t = DynamicRequires(p=tempfile.mktemp()) luigi.build([t], local_scheduler=True) self.assertTrue(t.complete()) # loop through output and verify f = t.output().open('r') for i in xrange(7): self.assertEqual(f.readline().strip(), '%d: Done!' % i) def test_avoid_infinite_reschedule(self): class A(Task): def complete(self): return False class B(Task): def complete(self): return False def requires(self): return A() self.assertTrue(self.w.add(B())) self.assertFalse(self.w.run()) def test_interleaved_workers(self): class A(DummyTask): pass a = A() class B(DummyTask): def requires(self): return a class ExternalB(ExternalTask): task_family = "B" def complete(self): return False b = B() eb = ExternalB() self.assertEqual(eb.task_id, "B()") sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id='X') w2 = Worker(scheduler=sch, worker_id='Y') self.assertTrue(w.add(b)) self.assertTrue(w2.add(eb)) logging.debug("RUNNING BROKEN WORKER") self.assertTrue(w2.run()) self.assertFalse(a.complete()) self.assertFalse(b.complete()) logging.debug("RUNNING FUNCTIONAL WORKER") self.assertTrue(w.run()) self.assertTrue(a.complete()) self.assertTrue(b.complete()) w.stop() w2.stop() def test_interleaved_workers2(self): # two tasks without dependencies, one external, one not class B(DummyTask): pass class ExternalB(ExternalTask): task_family = "B" def complete(self): return False b = B() eb = ExternalB() self.assertEqual(eb.task_id, "B()") sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id='X') w2 = Worker(scheduler=sch, worker_id='Y') self.assertTrue(w2.add(eb)) self.assertTrue(w.add(b)) self.assertTrue(w2.run()) self.assertFalse(b.complete()) self.assertTrue(w.run()) self.assertTrue(b.complete()) w.stop() w2.stop() def test_interleaved_workers3(self): class A(DummyTask): def run(self): logging.debug('running A') time.sleep(0.1) super(A, self).run() a = A() class B(DummyTask): def requires(self): return a def run(self): logging.debug('running B') super(B, self).run() b = B() sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id='X', keep_alive=True, count_uniques=True) w2 = Worker(scheduler=sch, worker_id='Y', keep_alive=True, count_uniques=True, wait_interval=0.1) self.assertTrue(w.add(a)) self.assertTrue(w2.add(b)) threading.Thread(target=w.run).start() self.assertTrue(w2.run()) self.assertTrue(a.complete()) self.assertTrue(b.complete()) w.stop() w2.stop() def test_die_for_non_unique_pending(self): class A(DummyTask): def run(self): logging.debug('running A') time.sleep(0.1) super(A, self).run() a = A() class B(DummyTask): def requires(self): return a def run(self): logging.debug('running B') super(B, self).run() b = B() sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id='X', keep_alive=True, count_uniques=True) w2 = Worker(scheduler=sch, worker_id='Y', keep_alive=True, count_uniques=True, wait_interval=0.1) self.assertTrue(w.add(b)) self.assertTrue(w2.add(b)) self.assertEqual(w._get_work()[0], 'A()') self.assertTrue(w2.run()) self.assertFalse(a.complete()) self.assertFalse(b.complete()) w2.stop() def test_complete_exception(self): "Tests that a task is still scheduled if its sister task crashes in the complete() method" class A(DummyTask): def complete(self): raise Exception("doh") a = A() class C(DummyTask): pass c = C() class B(DummyTask): def requires(self): return a, c b = B() sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id="foo") self.assertFalse(w.add(b)) self.assertTrue(w.run()) self.assertFalse(b.has_run) self.assertTrue(c.has_run) self.assertFalse(a.has_run) w.stop() def test_requires_exception(self): class A(DummyTask): def requires(self): raise Exception("doh") a = A() class C(DummyTask): pass c = C() class B(DummyTask): def requires(self): return a, c b = B() sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id="foo") self.assertFalse(w.add(b)) self.assertTrue(w.run()) self.assertFalse(b.has_run) self.assertTrue(c.has_run) self.assertFalse(a.has_run) w.stop()
class WorkerTest(unittest.TestCase): def setUp(self): # InstanceCache.disable() self.sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) self.w = Worker(scheduler=self.sch, worker_id='X') self.w2 = Worker(scheduler=self.sch, worker_id='Y') self.time = time.time def tearDown(self): if time.time != self.time: time.time = self.time def setTime(self, t): time.time = lambda: t def test_dep(self): class A(Task): def run(self): self.has_run = True def complete(self): return self.has_run a = A() class B(Task): def requires(self): return a def run(self): self.has_run = True def complete(self): return self.has_run b = B() a.has_run = False b.has_run = False self.w.add(b) self.w.run() self.assertTrue(a.has_run) self.assertTrue(b.has_run) def test_external_dep(self): class A(ExternalTask): def complete(self): return False a = A() class B(Task): def requires(self): return a def run(self): self.has_run = True def complete(self): return self.has_run b = B() a.has_run = False b.has_run = False self.w.add(b) self.w.run() self.assertFalse(a.has_run) self.assertFalse(b.has_run) def test_fail(self): class A(Task): def run(self): self.has_run = True raise Exception() def complete(self): return self.has_run a = A() class B(Task): def requires(self): return a def run(self): self.has_run = True def complete(self): return self.has_run b = B() a.has_run = False b.has_run = False self.w.add(b) self.w.run() self.assertTrue(a.has_run) self.assertFalse(b.has_run) def test_unknown_dep(self): # see central_planner_test.CentralPlannerTest.test_remove_dep class A(ExternalTask): def complete(self): return False class C(Task): def complete(self): return True def get_b(dep): class B(Task): def requires(self): return dep def run(self): self.has_run = True def complete(self): return False b = B() b.has_run = False return b b_a = get_b(A()) b_c = get_b(C()) self.w.add(b_a) # So now another worker goes in and schedules C -> B # This should remove the dep A -> B but will screw up the first worker self.w2.add(b_c) self.w.run( ) # should not run anything - the worker should detect that A is broken self.assertFalse(b_a.has_run) # not sure what should happen?? # self.w2.run() # should run B since C is fulfilled # self.assertTrue(b_c.has_run) def test_interleaved_workers(self): class A(DummyTask): pass a = A() class B(DummyTask): def requires(self): return a class ExternalB(ExternalTask): task_family = "B" def complete(self): return False b = B() eb = ExternalB() self.assertEquals(eb.task_id, "B()") sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id='X') w2 = Worker(scheduler=sch, worker_id='Y') w.add(b) w2.add(eb) logging.debug("RUNNING BROKEN WORKER") w2.run() self.assertFalse(a.complete()) self.assertFalse(b.complete()) logging.debug("RUNNING FUNCTIONAL WORKER") w.run() self.assertTrue(a.complete()) self.assertTrue(b.complete()) def test_interleaved_workers2(self): # two tasks without dependencies, one external, one not class B(DummyTask): pass class ExternalB(ExternalTask): task_family = "B" def complete(self): return False b = B() eb = ExternalB() self.assertEquals(eb.task_id, "B()") sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id='X') w2 = Worker(scheduler=sch, worker_id='Y') w2.add(eb) w.add(b) w2.run() self.assertFalse(b.complete()) w.run() self.assertTrue(b.complete())
class MultiprocessWorkerTest(unittest.TestCase): def setUp(self): self.scheduler = RemoteScheduler() self.scheduler.add_worker = Mock() self.scheduler.add_task = Mock() self.worker = Worker(scheduler=self.scheduler, worker_id='X', worker_processes=2).__enter__() def tearDown(self): self.worker.__exit__(None, None, None) def gw_res(self, pending, task_id): return dict(n_pending_tasks=pending, task_id=task_id, running_tasks=0, n_unique_pending=0) def test_positive_path(self): a = DummyTask("a") b = DummyTask("b") class MultipleRequirementTask(DummyTask): def requires(self): return [a, b] c = MultipleRequirementTask("C") self.assertTrue(self.worker.add(c)) self.scheduler.get_work = Mock(side_effect=[self.gw_res(3, str(a)), self.gw_res(2, str(b)), self.gw_res(1, str(c)), self.gw_res(0, None), self.gw_res(0, None)]) self.assertTrue(self.worker.run()) self.assertTrue(c.has_run) def test_path_with_task_failures(self): class FailingTask(DummyTask): def run(self): raise Exception("I am failing") a = FailingTask("a") b = FailingTask("b") class MultipleRequirementTask(DummyTask): def requires(self): return [a, b] c = MultipleRequirementTask("C") self.assertTrue(self.worker.add(c)) self.scheduler.get_work = Mock(side_effect=[self.gw_res(3, str(a)), self.gw_res(2, str(b)), self.gw_res(1, str(c)), self.gw_res(0, None), self.gw_res(0, None)]) self.assertFalse(self.worker.run())