def test_complete_exception(self): "Tests that a task is still scheduled if its sister task crashes in the complete() method" class A(DummyTask): def complete(self): raise Exception("doh") a = A() class C(DummyTask): pass c = C() class B(DummyTask): def requires(self): return a, c b = B() sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id="foo") self.assertFalse(w.add(b)) self.assertTrue(w.run()) self.assertFalse(b.has_run) self.assertTrue(c.has_run) self.assertFalse(a.has_run) w.stop()
def test_ping_retry(self): """ Worker ping fails once. Ping continues to try to connect to scheduler Kind of ugly since it uses actual timing with sleep to test the thread """ sch = CentralPlannerScheduler( retry_delay=100, remove_delay=1000, worker_disconnect_delay=10, ) self._total_pings = 0 # class var so it can be accessed from fail_ping def fail_ping(worker): # this will be called from within keep-alive thread... self._total_pings += 1 raise Exception("Some random exception") sch.ping = fail_ping w = Worker( scheduler=sch, worker_id="foo", ping_interval=0.01 # very short between pings to make test fast ) # let the keep-alive thread run for a bit... time.sleep(0.1) # yes, this is ugly but it's exactly what we need to test w.stop() self.assertTrue( self._total_pings > 1, msg="Didn't retry pings (%d pings performed)" % (self._total_pings,) )
def test_requires_exception(self): class A(DummyTask): def requires(self): raise Exception("doh") a = A() class C(DummyTask): pass c = C() class B(DummyTask): def requires(self): return a, c b = B() sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id="foo") self.assertFalse(w.add(b)) self.assertTrue(w.run()) self.assertFalse(b.has_run) self.assertTrue(c.has_run) self.assertFalse(a.has_run) w.stop()
def test_interleaved_workers(self): class A(DummyTask): pass a = A() class B(DummyTask): def requires(self): return a class ExternalB(ExternalTask): task_family = "B" def complete(self): return False b = B() eb = ExternalB() self.assertEquals(eb.task_id, "B()") sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id='X') w2 = Worker(scheduler=sch, worker_id='Y') w.add(b) w2.add(eb) logging.debug("RUNNING BROKEN WORKER") w2.run() self.assertFalse(a.complete()) self.assertFalse(b.complete()) logging.debug("RUNNING FUNCTIONAL WORKER") w.run() self.assertTrue(a.complete()) self.assertTrue(b.complete())
def setUp(self): # InstanceCache.disable() self.sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) self.w = Worker(scheduler=self.sch, worker_id='X') self.w_raise = Worker(scheduler=self.sch, worker_id='X_raise', raise_on_error=True) self.w2 = Worker(scheduler=self.sch, worker_id='Y') self.time = time.time
def test_worker_executes_fork_handler(self): class Stub(luigi.Task): def complete(self): return False def run(self): pass s = CentralPlannerScheduler() w = Worker(scheduler=s, worker_processes=2) w.add(Stub()) w.run() time.sleep(0.1) worker_res = [self.worker_queue.get(), self.worker_queue.get(), self.worker_queue.get()] self.assertIn("both", worker_res) self.assertIn("master", worker_res) child_res = [self.child_queue.get(), self.child_queue.get(), self.child_queue.get()] self.assertIn("both", child_res) self.assertIn("child", child_res) self.assertTrue(self.worker_queue.empty()) self.assertTrue(self.child_queue.empty())
def test_allow_reschedule_with_many_missing_deps(self): class A(Task): """ Task that must run twice to succeed """ i = luigi.IntParameter() runs = 0 def complete(self): return self.runs >= 2 def run(self): self.runs += 1 class B(Task): done = False def requires(self): return map(A, range(20)) def complete(self): return self.done def run(self): self.done = True b = B() w = Worker(scheduler=self.sch, worker_id='X', max_reschedules=1) self.assertTrue(w.add(b)) self.assertFalse(w.run()) # For b to be done, we must have rescheduled its dependencies to run them twice self.assertTrue(b.complete()) self.assertTrue(all(a.complete() for a in b.deps()))
def test_interleaved_workers2(self): # two tasks without dependencies, one external, one not class B(DummyTask): pass class ExternalB(ExternalTask): task_family = "B" def complete(self): return False b = B() eb = ExternalB() self.assertEqual(eb.task_id, "B()") sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id='X') w2 = Worker(scheduler=sch, worker_id='Y') self.assertTrue(w2.add(eb)) self.assertTrue(w.add(b)) self.assertTrue(w2.run()) self.assertFalse(b.complete()) self.assertTrue(w.run()) self.assertTrue(b.complete()) w.stop() w2.stop()
def test_task_limit_exceeded(self): w = Worker() t = ForkBombTask(3, 2) w.add(t) w.run() self.assertFalse(t.complete()) leaf_tasks = [ForkBombTask(3, 2, branch) for branch in [(0, 0, 0), (0, 0, 1), (0, 1, 0), (0, 1, 1)]] self.assertEquals(3, sum(t.complete() for t in leaf_tasks), "should have gracefully completed as much as possible even though the single last leaf didn't get scheduled")
def _run(self, dt, interval): w = Worker(scheduler=self.sch) tasks = self.gen_tasks(dt, interval) map(w.add, tasks) for t in filter(lambda x: isinstance(x, RunMetricRules), tasks): if t.handler == 'save_to_mongodb': if t.filter_rules: filter_metrics_args = {"require": t, 'filter_rules': t.filter_rules} # 过滤 filter_metrics = FilterMetrics(**filter_metrics_args) require_task = filter_metrics w.add(filter_metrics) else: require_task = t # 保存 kwargs = dict(**self.mongo_conf) kwargs.update({"require": require_task, "date": dt, 'app_id': self.app_id, 'interval': interval}) save_metrics = SaveMetrics(**kwargs) w.add(save_metrics) else: kwargs = dict(**self.mongo_conf) kwargs.update({"require": t, "date": dt, 'app_id': self.app_id, 'interval': interval, 'handler_func': t.handler}) metric_handler = MetricsHandler(**kwargs) w.add(metric_handler) w.run()
def test_interleaved_workers3(self): class A(DummyTask): def run(self): logging.debug('running A') time.sleep(0.1) super(A, self).run() a = A() class B(DummyTask): def requires(self): return a def run(self): logging.debug('running B') super(B, self).run() b = B() sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id='X', keep_alive=True, count_uniques=True) w2 = Worker(scheduler=sch, worker_id='Y', keep_alive=True, count_uniques=True, wait_interval=0.1) self.assertTrue(w.add(a)) self.assertTrue(w2.add(b)) threading.Thread(target=w.run).start() self.assertTrue(w2.run()) self.assertTrue(a.complete()) self.assertTrue(b.complete()) w.stop() w2.stop()
class AssistantTest(unittest.TestCase): def run(self, result=None): self.sch = Scheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) self.assistant = Worker(scheduler=self.sch, worker_id='Y', assistant=True) with Worker(scheduler=self.sch, worker_id='X') as w: self.w = w super(AssistantTest, self).run(result) def test_get_work(self): d = Dummy2Task('123') self.w.add(d) self.assertFalse(d.complete()) self.assistant.run() self.assertTrue(d.complete()) def test_bad_job_type(self): class Dummy3Task(Dummy2Task): task_family = 'UnknownTaskFamily' d = Dummy3Task('123') self.w.add(d) self.assertFalse(d.complete()) self.assertFalse(self.assistant.run()) self.assertFalse(d.complete()) self.assertEqual(list(self.sch.task_list('FAILED', '').keys()), [d.task_id]) def test_unimported_job_type(self): MODULE_CONTENTS = b''' import luigi class UnimportedTask(luigi.Task): def complete(self): return False ''' class NotImportedTask(luigi.Task): task_family = 'UnimportedTask' task_module = None task = NotImportedTask() # verify that it can't run the task without the module info necessary to import it self.w.add(task) self.assertFalse(self.assistant.run()) self.assertEqual(list(self.sch.task_list('FAILED', '').keys()), [task.task_id]) # check that it can import with the right module with temporary_unloaded_module(MODULE_CONTENTS) as task.task_module: self.w.add(task) self.assertTrue(self.assistant.run()) self.assertEqual(list(self.sch.task_list('DONE', '').keys()), [task.task_id])
def test_wait_jitter_default(self, mock_sleep, mock_random): """ verify default jitter is as expected """ mock_random.return_value = 1.0 w = Worker() x = w._sleeper() six.next(x) mock_random.assert_called_with(0, 5.0) mock_sleep.assert_called_with(2.0) mock_random.return_value = 3.3 six.next(x) mock_random.assert_called_with(0, 5.0) mock_sleep.assert_called_with(4.3)
def test_disabled_shutdown_hook(self): w = Worker(scheduler=self.sch, keep_alive=True, no_install_shutdown_handler=True) with w: try: # try to kill the worker! os.kill(os.getpid(), signal.SIGUSR1) except AttributeError: raise unittest.SkipTest('signal.SIGUSR1 not found on this system') # try to kill the worker... AGAIN! t = SuicidalWorker(signal.SIGUSR1) w.add(t) w.run() # task should have stepped away from the ledge, and completed successfully despite all the SIGUSR1 signals self.assertEqual(list(self.sch.task_list('DONE', '').keys()), [t.task_id])
def test_die_for_non_unique_pending(self): class A(DummyTask): def run(self): logging.debug('running A') time.sleep(0.1) super(A, self).run() a = A() class B(DummyTask): def requires(self): return a def run(self): logging.debug('running B') super(B, self).run() b = B() sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id='X', keep_alive=True, count_uniques=True) w2 = Worker(scheduler=sch, worker_id='Y', keep_alive=True, count_uniques=True, wait_interval=0.1) self.assertTrue(w.add(b)) self.assertTrue(w2.add(b)) self.assertEqual(w._get_work()[0], 'A()') self.assertTrue(w2.run()) self.assertFalse(a.complete()) self.assertFalse(b.complete()) w2.stop()
def test_wait_jitter(self, mock_sleep, mock_random): """ verify configured jitter amount """ mock_random.return_value = 1.0 w = Worker() x = w._sleeper() six.next(x) mock_random.assert_called_with(0, 10.0) mock_sleep.assert_called_with(2.0) mock_random.return_value = 2.0 six.next(x) mock_random.assert_called_with(0, 10.0) mock_sleep.assert_called_with(3.0)
class ParallelSchedulingTest(unittest.TestCase): def setUp(self): self.sch = mock.Mock() self.w = Worker(scheduler=self.sch, worker_id='x') def added_tasks(self, status): return [kw['task_id'] for args, kw in self.sch.add_task.call_args_list if kw['status'] == status] def test_multiprocess_scheduling_with_overlapping_dependencies(self): self.w.add(OverlappingSelfDependenciesTask(5, 2), True) self.assertEqual(15, self.sch.add_task.call_count) self.assertEqual(set(( 'OverlappingSelfDependenciesTask(n=1, k=1)', 'OverlappingSelfDependenciesTask(n=2, k=1)', 'OverlappingSelfDependenciesTask(n=2, k=2)', 'OverlappingSelfDependenciesTask(n=3, k=1)', 'OverlappingSelfDependenciesTask(n=3, k=2)', 'OverlappingSelfDependenciesTask(n=4, k=1)', 'OverlappingSelfDependenciesTask(n=4, k=2)', 'OverlappingSelfDependenciesTask(n=5, k=2)', )), set(self.added_tasks('PENDING'))) self.assertEqual(set(( 'OverlappingSelfDependenciesTask(n=0, k=0)', 'OverlappingSelfDependenciesTask(n=0, k=1)', 'OverlappingSelfDependenciesTask(n=1, k=0)', 'OverlappingSelfDependenciesTask(n=1, k=2)', 'OverlappingSelfDependenciesTask(n=2, k=0)', 'OverlappingSelfDependenciesTask(n=3, k=0)', 'OverlappingSelfDependenciesTask(n=4, k=0)', )), set(self.added_tasks('DONE'))) @mock.patch('luigi.notifications.send_error_email') def test_raise_exception_in_complete(self, send): self.w.add(ExceptionCompleteTask(), multiprocess=True) send.check_called_once() self.assertEqual(0, self.sch.add_task.call_count) self.assertTrue('assert False' in send.call_args[0][1]) @mock.patch('luigi.notifications.send_error_email') def test_raise_unpicklable_exception_in_complete(self, send): # verify exception can't be pickled self.assertRaises(Exception, UnpicklableExceptionTask().complete) try: UnpicklableExceptionTask().complete() except Exception as e: ex = e self.assertRaises(pickle.PicklingError, pickle.dumps, ex) # verify this can run async self.w.add(UnpicklableExceptionTask(), multiprocess=True) send.check_called_once() self.assertEqual(0, self.sch.add_task.call_count) self.assertTrue('raise UnpicklableException()' in send.call_args[0][1]) @mock.patch('luigi.notifications.send_error_email') def test_raise_exception_in_requires(self, send): self.w.add(ExceptionRequiresTask(), multiprocess=True) send.check_called_once() self.assertEqual(0, self.sch.add_task.call_count)
def setUp(self): try: from luigi.sqs_history import SqsHistory, SqsTaskHistory, SqsWorkerHistory except ImportError as e: raise unittest.SkipTest('Could not test WorkerTaskGlobalEventHandlerTests: %s' % e) # Replace _config method with one that uses our dummy queue. def fake_config(s, *args): s._queue = DummyQueue() SqsHistory._config = fake_config # InstanceCache.disable() self.sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) self.w = Worker(scheduler=self.sch, worker_id='X') self.w2 = Worker(scheduler=self.sch, worker_id='Y') self.time = time.time
def test_purge_hung_worker_override_timeout_time(self, mock_time): w = Worker(worker_processes=2, wait_interval=0.01, timeout=5) mock_time.time.return_value = 0 w.add(HungWorker(10)) w._run_task('HungWorker(worker_timeout=10)') mock_time.time.return_value = 10 w._handle_next_task() self.assertEqual(1, len(w._running_tasks)) mock_time.time.return_value = 11 w._handle_next_task() self.assertEqual(0, len(w._running_tasks))
def test_purge_hung_worker_default_timeout_time(self, mock_time): w = Worker(worker_processes=2, wait_interval=0.01, worker_timeout=5) mock_time.time.return_value = 0 w.add(HungWorker()) w._run_task('HungWorker(worker_timeout=None)') mock_time.time.return_value = 5 w._handle_next_task() self.assertEqual(1, len(w._running_tasks)) mock_time.time.return_value = 6 w._handle_next_task() self.assertEqual(0, len(w._running_tasks))
def test_single_threaded_worker_doesnot_execute_fork_handler(self): class Stub(luigi.Task): def complete(self): return False def run(self): pass s = CentralPlannerScheduler() w = Worker(scheduler=s, worker_processes=1) w.add(Stub()) w.run() time.sleep(0.1) self.assertTrue(self.worker_queue.empty()) self.assertTrue(self.child_queue.empty())
def test_purge_multiple_workers(self): w = Worker(worker_processes=2, wait_interval=0.01) t1 = SuicidalWorker(signal.SIGTERM) t2 = SuicidalWorker(signal.SIGKILL) w.add(t1) w.add(t2) w._run_task(t1.task_id) w._run_task(t2.task_id) time.sleep(1.0) w._handle_next_task() w._handle_next_task() w._handle_next_task()
class AssistantTest(unittest.TestCase): def run(self, result=None): self.sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) self.assistant = Worker(scheduler=self.sch, worker_id='Y', assistant=True) with Worker(scheduler=self.sch, worker_id='X') as w: self.w = w super(AssistantTest, self).run(result) def test_get_work(self): d = Dummy2Task('123') self.w.add(d) self.assertFalse(d.complete()) self.assistant.run() self.assertTrue(d.complete()) def test_bad_job_type(self): class Dummy3Task(Dummy2Task): task_family = 'UnknownTaskFamily' d = Dummy3Task('123') self.w.add(d) self.assertFalse(d.complete()) self.assertFalse(self.assistant.run()) self.assertFalse(d.complete()) self.assertEqual(list(self.sch.task_list('FAILED', '').keys()), [str(d)]) def test_unimported_job_type(self): class NotImportedTask(luigi.Task): task_family = 'UnimportedTask' task_module = None task = NotImportedTask() # verify that it can't run the task without the module info necessary to import it self.w.add(task) self.assertFalse(self.assistant.run()) self.assertEqual(list(self.sch.task_list('FAILED', '').keys()), ['UnimportedTask()']) # check that it can import with the right module task.task_module = 'dummy_test_module.not_imported' self.w.add(task) self.assertTrue(self.assistant.run()) self.assertEqual(list(self.sch.task_list('DONE', '').keys()), ['UnimportedTask()'])
def test_purge_hung_worker_default_timeout_time(self, mock_time): w = Worker(worker_processes=2, wait_interval=0.01, timeout=5) mock_time.time.return_value = 0 task = HangTheWorkerTask() w.add(task) w._run_task(task.task_id) mock_time.time.return_value = 5 w._handle_next_task() self.assertEqual(1, len(w._running_tasks)) mock_time.time.return_value = 6 w._handle_next_task() self.assertEqual(0, len(w._running_tasks))
def _test_context_manager(self, force_multiprocessing): CONTEXT_MANAGER_MODULE = b''' class MyContextManager(object): def __init__(self, task_process): self.task = task_process.task def __enter__(self): assert not self.task.run_event.is_set(), "the task should not have run yet" self.task.enter_event.set() return self def __exit__(self, exc_type=None, exc_value=None, traceback=None): assert self.task.run_event.is_set(), "the task should have run" self.task.exit_event.set() ''' class DummyEventRecordingTask(luigi.Task): def __init__(self, *args, **kwargs): self.enter_event = multiprocessing.Event() self.exit_event = multiprocessing.Event() self.run_event = multiprocessing.Event() super(DummyEventRecordingTask, self).__init__(*args, **kwargs) def run(self): assert self.enter_event.is_set(), "the context manager should have been entered" assert not self.exit_event.is_set(), "the context manager should not have been exited yet" assert not self.run_event.is_set(), "the task should not have run yet" self.run_event.set() def complete(self): return self.run_event.is_set() with temporary_unloaded_module(CONTEXT_MANAGER_MODULE) as module_name: t = DummyEventRecordingTask() w = Worker(task_process_context=module_name + '.MyContextManager', force_multiprocessing=force_multiprocessing) w.add(t) self.assertTrue(w.run()) self.assertTrue(t.complete()) self.assertTrue(t.enter_event.is_set()) self.assertTrue(t.exit_event.is_set())
def test_stop_worker_kills_subprocesses(self): w = Worker(worker_processes=2) hung_task = HungWorker() w.add(hung_task) w._run_task(hung_task.task_id) pids = [p.pid for p in w._running_tasks.values()] self.assertEqual(1, len(pids)) pid = pids[0] def is_running(): return pid in {p.pid for p in psutil.Process().children()} self.assertTrue(is_running()) w.stop() self.assertFalse(is_running())
def invoke_task(godzilla_task, task_id=None, **kwargs): print godzilla_task, task_id, kwargs task = tasks[godzilla_task] kwargs['task_id'] = task_id task = task.as_luigi(**kwargs) w = Worker(scheduler=sch) w.add(task) try: w.run() if not w.run_succeeded: raise Exception("job failed") return task.output().path finally: w.stop()
class MultiprocessWorkerTest(unittest.TestCase): def setUp(self): self.scheduler = RemoteScheduler() self.scheduler.add_worker = Mock() self.scheduler.add_task = Mock() self.worker = Worker(scheduler=self.scheduler, worker_id='X', worker_processes=2) def tearDown(self): self.worker.stop() def test_positive_path(self): a = DummyTask("a") b = DummyTask("b") class MultipleRequirementTask(DummyTask): def requires(self): return [a, b] c = MultipleRequirementTask("C") self.assertTrue(self.worker.add(c)) self.scheduler.get_work = Mock(side_effect=[(3, str(a)), (2, str(b)), (1, str(c)), (0, None), (0, None)]) self.assertTrue(self.worker.run()) self.assertTrue(c.has_run) def test_path_with_task_failures(self): class FailingTask(DummyTask): def run(self): raise Exception("I am failing") a = FailingTask("a") b = FailingTask("b") class MultipleRequirementTask(DummyTask): def requires(self): return [a, b] c = MultipleRequirementTask("C") self.assertTrue(self.worker.add(c)) self.scheduler.get_work = Mock(side_effect=[(3, str(a)), (2, str(b)), (1, str(c)), (0, None), (0, None)]) self.assertFalse(self.worker.run())
def test_process_killed_handler(self, task_proc): result = [] @HangTheWorkerTask.event_handler(Event.PROCESS_FAILURE) def store_task(t, error_msg): self.assertTrue(error_msg) result.append(t) w = Worker() task = HangTheWorkerTask() task_process = mock.MagicMock(is_alive=lambda: False, exitcode=-14, task=task) task_proc.return_value = task_process w.add(task) w._run_task(task.task_id) w._handle_next_task() self.assertEqual(result, [task])
def test_timeout_handler(self, mock_time): result = [] @HangTheWorkerTask.event_handler(Event.TIMEOUT) def store_task(t, error_msg): self.assertTrue(error_msg) result.append(t) w = Worker(worker_processes=2, wait_interval=0.01, timeout=5) mock_time.time.return_value = 0 task = HangTheWorkerTask(worker_timeout=1) w.add(task) w._run_task(task.task_id) mock_time.time.return_value = 3 w._handle_next_task() self.assertEqual(result, [task])
def test_fails_registering_signal(self): with mock.patch('luigi.worker.signal', spec=['signal']): # mock will raise an attribute error getting signal.SIGUSR1 Worker()
def test_asserts_for_worker(self): """ Test that Worker() asserts that it's sanely configured """ Worker(wait_interval=1) # This shouldn't raise self.assertRaises(AssertionError, Worker, wait_interval=0)
def test_interleaved_workers2(self): # two tasks without dependencies, one external, one not class B(DummyTask): pass class ExternalB(ExternalTask): task_family = "B" def complete(self): return False b = B() eb = ExternalB() self.assertEquals(eb.task_id, "B()") sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id='X') w2 = Worker(scheduler=sch, worker_id='Y') self.assertTrue(w2.add(eb)) self.assertTrue(w.add(b)) self.assertTrue(w2.run()) self.assertFalse(b.complete()) self.assertTrue(w.run()) self.assertTrue(b.complete()) w.stop() w2.stop()
class WorkerEmailTest(EmailTest): def setUp(self): super(WorkerEmailTest, self).setUp() sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) self.worker = Worker(scheduler=sch, worker_id="foo") def tearDown(self): self.worker.stop() @with_config(EMAIL_CONFIG) def test_connection_error(self): sch = RemoteScheduler(host="this_host_doesnt_exist", port=1337, connect_timeout=1) worker = Worker(scheduler=sch) self.waits = 0 def dummy_wait(): self.waits += 1 sch._wait = dummy_wait class A(DummyTask): pass a = A() self.assertEquals(self.last_email, None) worker.add(a) self.assertEquals(self.waits, 2) # should attempt to add it 3 times self.assertNotEquals(self.last_email, None) self.assertEquals(self.last_email[0], "Luigi: Framework error while scheduling %s" % (a, )) worker.stop() @with_config(EMAIL_CONFIG) def test_complete_error(self): class A(DummyTask): def complete(self): raise Exception("b0rk") a = A() self.assertEquals(self.last_email, None) self.worker.add(a) self.assertEquals(("Luigi: %s failed scheduling" % (a, )), self.last_email[0]) self.worker.run() self.assertEquals(("Luigi: %s failed scheduling" % (a, )), self.last_email[0]) self.assertFalse(a.has_run) @with_config(EMAIL_CONFIG) def test_complete_return_value(self): class A(DummyTask): def complete(self): pass # no return value should be an error a = A() self.assertEquals(self.last_email, None) self.worker.add(a) self.assertEquals(("Luigi: %s failed scheduling" % (a, )), self.last_email[0]) self.worker.run() self.assertEquals(("Luigi: %s failed scheduling" % (a, )), self.last_email[0]) self.assertFalse(a.has_run) @with_config(EMAIL_CONFIG) def test_run_error(self): class A(luigi.Task): def complete(self): return False def run(self): raise Exception("b0rk") a = A() self.worker.add(a) self.assertEquals(self.last_email, None) self.worker.run() self.assertEquals(("Luigi: %s FAILED" % (a, )), self.last_email[0]) def test_no_error(self): class A(DummyTask): pass a = A() self.assertEquals(self.last_email, None) self.worker.add(a) self.assertEquals(self.last_email, None) self.worker.run() self.assertEquals(self.last_email, None) self.assertTrue(a.complete())
class WorkerTaskGlobalEventHandlerTests(unittest.TestCase): @with_config( dict(worker_history=dict(record_worker_history_sqs='true', sqs_queue_name='name', aws_access_key_id='key', aws_secret_access_key='secret_key'), worker_metadata=dict(meta1='data1'))) def setUp(self): try: from luigi.sqs_history import SqsHistory, SqsTaskHistory, SqsWorkerHistory except ImportError as e: raise unittest.SkipTest( 'Could not test WorkerTaskGlobalEventHandlerTests: %s' % e) # Replace _config method with one that uses our dummy queue. def fake_config(s, *args): s._queue = DummyQueue() SqsHistory._config = fake_config # InstanceCache.disable() self.sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) self.w = Worker(scheduler=self.sch, worker_id='X') self.w2 = Worker(scheduler=self.sch, worker_id='Y') self.time = time.time def tearDown(self): if time.time != self.time: time.time = self.time self.w.stop() self.w2.stop() def setTime(self, t): time.time = lambda: t def _parse_task_events(self, messages): results = {} for m in messages: event = m.get('event') if not event: continue messages = results.get(event, []) messages.append(m) results[event] = messages return results def test_dep(self): class A(Task): param_a = luigi.Parameter(default="a") def run(self): self.has_run = True def complete(self): return self.has_run a = A() class B(Task): def requires(self): return a def run(self): self.has_run = True def complete(self): return self.has_run b = B() a.has_run = False b.has_run = False self.assertTrue(self.w.add(b)) self.assertTrue(self.w.run()) self.assertTrue(a.has_run) self.assertTrue(b.has_run) sent_messages = [ json.loads(m.get_body()) for m in self.w._worker_history_impl._queue.messages ] event_messages = self._parse_task_events(sent_messages) self.assertEquals(4, len(event_messages)) # Check started events: started_events = event_messages.get(Event.START) self.assertEquals(2, len(started_events)) self.assertEquals('A(param_a=a)', started_events[0]['task']['id']) self.assertEquals('B()', started_events[1]['task']['id']) # Check success events success_events = event_messages.get(Event.SUCCESS) self.assertEquals(2, len(success_events)) self.assertEquals('A(param_a=a)', success_events[0]['task']['id']) self.assertEquals('B()', success_events[1]['task']['id']) # Check processing time events processing_events = event_messages.get(Event.PROCESSING_TIME) self.assertEquals(2, len(processing_events)) self.assertEquals('A(param_a=a)', processing_events[0]['task']['id']) self.assertTrue('processing_time' in processing_events[0]) self.assertEquals('B()', processing_events[1]['task']['id']) self.assertTrue('processing_time' in processing_events[1]) # Check dependency event dependency_event = event_messages.get(Event.DEPENDENCY_DISCOVERED) self.assertEquals(1, len(dependency_event)) self.assertEquals('B()', dependency_event[0]['task']['id']) self.assertEquals('A(param_a=a)', dependency_event[0]['dependency_task']['id']) def test_external_dep(self): class A(ExternalTask): def complete(self): return False a = A() class B(Task): def requires(self): return a def run(self): self.has_run = True def complete(self): return self.has_run b = B() a.has_run = False b.has_run = False self.assertTrue(self.w.add(b)) self.assertTrue(self.w.run()) self.assertFalse(a.has_run) self.assertFalse(b.has_run) sent_messages = [ json.loads(m.get_body()) for m in self.w._worker_history_impl._queue.messages ] event_messages = self._parse_task_events(sent_messages) self.assertEquals(2, len(event_messages)) # Check dependency event dependency_event = event_messages.get(Event.DEPENDENCY_DISCOVERED) self.assertEquals(1, len(dependency_event)) self.assertEquals('B()', dependency_event[0]['task']['id']) self.assertEquals('A()', dependency_event[0]['dependency_task']['id']) # Check dependency missing event dependency_missing_event = event_messages.get(Event.DEPENDENCY_MISSING) self.assertEquals(1, len(dependency_missing_event)) self.assertEquals('A()', dependency_missing_event[0]['task']['id']) def test_fail(self): class A(Task): def run(self): self.has_run = True raise Exception() def complete(self): return self.has_run a = A() a.has_run = False self.assertTrue(self.w.add(a)) self.assertFalse(self.w.run()) self.assertTrue(a.has_run) sent_messages = [ json.loads(m.get_body()) for m in self.w._worker_history_impl._queue.messages ] event_messages = self._parse_task_events(sent_messages) self.assertEquals(3, len(event_messages)) # Check failure event failure_event = event_messages.get(Event.FAILURE) self.assertEquals(1, len(failure_event)) self.assertEquals('A()', failure_event[0]['task']['id']) self.assertEquals('Exception()', failure_event[0]['exception']) def test_unknown_dep(self): # see central_planner_test.CentralPlannerTest.test_remove_dep class A(ExternalTask): def complete(self): return False class C(Task): def complete(self): return True def get_b(dep): class B(Task): def requires(self): return dep def run(self): self.has_run = True def complete(self): return False b = B() b.has_run = False return b b_a = get_b(A()) b_c = get_b(C()) self.assertTrue(self.w.add(b_a)) self.assertTrue(self.w2.add(b_c)) sent_messages = [ json.loads(m.get_body()) for m in self.w._worker_history_impl._queue.messages ] event_messages = self._parse_task_events(sent_messages) # Verify missing event dependency_missing_event = event_messages.get(Event.DEPENDENCY_MISSING) self.assertEquals(1, len(dependency_missing_event)) self.assertEquals('A()', dependency_missing_event[0]['task']['id']) sent_messages2 = [ json.loads(m.get_body()) for m in self.w2._worker_history_impl._queue.messages ] event_messages2 = self._parse_task_events(sent_messages2) # Verify present event dependency_present_event = event_messages2.get( Event.DEPENDENCY_PRESENT) self.assertEquals(1, len(dependency_present_event)) self.assertEquals('C()', dependency_present_event[0]['task']['id']) def test_broken_task(self): # see central_planner_test.CentralPlannerTest.test_remove_dep class A(object): def complete(self): return False a = A() class B(Task): def requires(self): return a def run(self): self.has_run = False def complete(self): return self.has_run b = B() b.has_run = False try: self.assertTrue(self.w.add(b)) except: pass sent_messages = [ json.loads(m.get_body()) for m in self.w._worker_history_impl._queue.messages ] event_messages = self._parse_task_events(sent_messages) # Verify broken event broken_event = event_messages.get(Event.BROKEN_TASK) self.assertEquals(1, len(broken_event)) self.assertEquals('B()', broken_event[0]['task']['id']) self.assertEquals("Exception('requires() must return Task objects',)", broken_event[0]['exception'])
def setUp(self): self.sch = CentralPlannerScheduler() self.w = Worker(scheduler=self.sch)
def setUp(self): super(WorkerEmailTest, self).setUp() sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) self.worker = Worker(scheduler=sch, worker_id="foo")
def setUp(self): self.sch = mock.Mock() self.w = Worker(scheduler=self.sch, worker_id='x')
class ParallelSchedulingTest(unittest.TestCase): def setUp(self): self.sch = mock.Mock() self.w = Worker(scheduler=self.sch, worker_id='x') def added_tasks(self, status): return [ kw['task_id'] for args, kw in self.sch.add_task.call_args_list if kw['status'] == status ] def test_number_of_processes(self): import multiprocessing real_pool = multiprocessing.Pool(1) with mock.patch('multiprocessing.Pool') as mocked_pool: mocked_pool.return_value = real_pool self.w.add(OverlappingSelfDependenciesTask(n=1, k=1), multiprocess=True, processes=1234) mocked_pool.assert_called_once_with(processes=1234) def test_zero_processes(self): import multiprocessing real_pool = multiprocessing.Pool(1) with mock.patch('multiprocessing.Pool') as mocked_pool: mocked_pool.return_value = real_pool self.w.add(OverlappingSelfDependenciesTask(n=1, k=1), multiprocess=True, processes=0) mocked_pool.assert_called_once_with(processes=None) def test_children_terminated(self): before_children = running_children() with pause_gc(): self.w.add( OverlappingSelfDependenciesTask(5, 2), multiprocess=True, ) self.assertLessEqual(running_children(), before_children) def test_multiprocess_scheduling_with_overlapping_dependencies(self): self.w.add(OverlappingSelfDependenciesTask(5, 2), True) self.assertEqual(15, self.sch.add_task.call_count) self.assertEqual( set(( OverlappingSelfDependenciesTask(n=1, k=1).task_id, OverlappingSelfDependenciesTask(n=2, k=1).task_id, OverlappingSelfDependenciesTask(n=2, k=2).task_id, OverlappingSelfDependenciesTask(n=3, k=1).task_id, OverlappingSelfDependenciesTask(n=3, k=2).task_id, OverlappingSelfDependenciesTask(n=4, k=1).task_id, OverlappingSelfDependenciesTask(n=4, k=2).task_id, OverlappingSelfDependenciesTask(n=5, k=2).task_id, )), set(self.added_tasks('PENDING'))) self.assertEqual( set(( OverlappingSelfDependenciesTask(n=0, k=0).task_id, OverlappingSelfDependenciesTask(n=0, k=1).task_id, OverlappingSelfDependenciesTask(n=1, k=0).task_id, OverlappingSelfDependenciesTask(n=1, k=2).task_id, OverlappingSelfDependenciesTask(n=2, k=0).task_id, OverlappingSelfDependenciesTask(n=3, k=0).task_id, OverlappingSelfDependenciesTask(n=4, k=0).task_id, )), set(self.added_tasks('DONE'))) @mock.patch('luigi.notifications.send_error_email') def test_raise_exception_in_complete(self, send): self.w.add(ExceptionCompleteTask(), multiprocess=True) send.check_called_once() self.assertEqual(UNKNOWN, self.sch.add_task.call_args[1]['status']) self.assertFalse(self.sch.add_task.call_args[1]['runnable']) self.assertTrue('assert False' in send.call_args[0][1]) @mock.patch('luigi.notifications.send_error_email') def test_raise_unpicklable_exception_in_complete(self, send): # verify exception can't be pickled self.assertRaises(Exception, UnpicklableExceptionTask().complete) try: UnpicklableExceptionTask().complete() except Exception as e: ex = e self.assertRaises((pickle.PicklingError, AttributeError), pickle.dumps, ex) # verify this can run async self.w.add(UnpicklableExceptionTask(), multiprocess=True) send.check_called_once() self.assertEqual(UNKNOWN, self.sch.add_task.call_args[1]['status']) self.assertFalse(self.sch.add_task.call_args[1]['runnable']) self.assertTrue('raise UnpicklableException()' in send.call_args[0][1]) @mock.patch('luigi.notifications.send_error_email') def test_raise_exception_in_requires(self, send): self.w.add(ExceptionRequiresTask(), multiprocess=True) send.check_called_once() self.assertEqual(UNKNOWN, self.sch.add_task.call_args[1]['status']) self.assertFalse(self.sch.add_task.call_args[1]['runnable'])
def test_default_multiprocessing_behavior(self): with Worker(worker_processes=1) as worker: task = DummyTask("a") task_process = worker._create_task_process(task) self.assertFalse(task_process.use_multiprocessing)
def test_force_multiprocessing(self): with Worker(worker_processes=1, force_multiprocessing=True) as worker: task = DummyTask("a") task_process = worker._create_task_process(task) self.assertTrue(task_process.use_multiprocessing)
def test_interleaved_workers(self): class A(DummyTask): pass a = A() class B(DummyTask): def requires(self): return a class ExternalB(ExternalTask): task_family = "B" def complete(self): return False b = B() eb = ExternalB() self.assertEquals(eb.task_id, "B()") sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id='X') w2 = Worker(scheduler=sch, worker_id='Y') self.assertTrue(w.add(b)) self.assertTrue(w2.add(eb)) logging.debug("RUNNING BROKEN WORKER") self.assertTrue(w2.run()) self.assertFalse(a.complete()) self.assertFalse(b.complete()) logging.debug("RUNNING FUNCTIONAL WORKER") self.assertTrue(w.run()) self.assertTrue(a.complete()) self.assertTrue(b.complete()) w.stop() w2.stop()
def test_no_task_limit(self): w = Worker() t = ForkBombTask(4, 2) w.add(t) w.run() self.assertTrue(t.complete())
class ExceptionFormatTest(unittest.TestCase): def setUp(self): self.sch = CentralPlannerScheduler() self.w = Worker(scheduler=self.sch) def tear_down(self): self.w.stop() def test_fail_run(self): task = FailRunTask(foo='foo', bar='bar') self._run_task(task) def test_fail_run_html(self): task = FailRunTask(foo='foo', bar='bar') self._run_task_html(task) def test_fail_schedule(self): task = FailSchedulingTask(foo='foo', bar='bar') self._run_task(task) def test_fail_schedule_html(self): task = FailSchedulingTask(foo='foo', bar='bar') self._run_task_html(task) @with_config({ 'core': { 'error-email': '*****@*****.**', 'email-prefix': '[TEST] ' } }) @mock.patch('luigi.notifications.send_error_email') def _run_task(self, task, mock_send): self.w.add(task) self.w.run() self.assertEqual(mock_send.call_count, 1) args, kwargs = mock_send.call_args self._check_subject(args[0], task) self._check_body(args[1], task, html=False) @with_config({ 'core': { 'error-email': '*****@*****.**', 'email-prefix': '[TEST] ', 'email-type': 'html' } }) @mock.patch('luigi.notifications.send_error_email') def _run_task_html(self, task, mock_send): self.w.add(task) self.w.run() self.assertEqual(mock_send.call_count, 1) args, kwargs = mock_send.call_args self._check_subject(args[0], task) self._check_body(args[1], task, html=True) def _check_subject(self, subject, task): self.assertIn(task.task_id, subject) def _check_body(self, body, task, html=False): if html: self.assertIn('<th>name</th><td>{}</td>'.format(task.task_family), body) self.assertIn('<div class="highlight"', body) self.assertIn('Oops!', body) for param, value in task.param_kwargs.items(): self.assertIn('<th>{}</th><td>{}</td>'.format(param, value), body) else: self.assertIn('Name: {}\n'.format(task.task_family), body) self.assertIn('Parameters:\n', body) self.assertIn('TestException: Oops!', body) for param, value in task.param_kwargs.items(): self.assertIn('{}: {}\n'.format(param, value), body) @with_config({"core": {"error-email": "[email protected]"}}) def testEmailRecipients(self): six.assertCountEqual(self, notifications._email_recipients(), ["[email protected]"]) six.assertCountEqual(self, notifications._email_recipients("[email protected]"), ["[email protected]", "[email protected]"]) six.assertCountEqual( self, notifications._email_recipients(["[email protected]", "[email protected]"]), ["[email protected]", "[email protected]", "[email protected]"]) @with_config({"core": {}}, replace_sections=True) def testEmailRecipientsNoConfig(self): six.assertCountEqual(self, notifications._email_recipients(), []) six.assertCountEqual(self, notifications._email_recipients("[email protected]"), ["[email protected]"]) six.assertCountEqual( self, notifications._email_recipients(["[email protected]", "[email protected]"]), ["[email protected]", "[email protected]"])
def test_task_limit_not_exceeded(self): w = Worker() t = ForkBombTask(3, 2) w.add(t) w.run() self.assertTrue(t.complete())
class ParallelSchedulingTest(unittest.TestCase): def setUp(self): self.sch = mock.Mock() self.w = Worker(scheduler=self.sch, worker_id='x') def added_tasks(self, status): return [ kw['task_id'] for args, kw in self.sch.add_task.call_args_list if kw['status'] == status ] def test_multiprocess_scheduling_with_overlapping_dependencies(self): self.w.add(OverlappingSelfDependenciesTask(5, 2), True) self.assertEqual(15, self.sch.add_task.call_count) self.assertEqual( set(( 'OverlappingSelfDependenciesTask(n=1, k=1)', 'OverlappingSelfDependenciesTask(n=2, k=1)', 'OverlappingSelfDependenciesTask(n=2, k=2)', 'OverlappingSelfDependenciesTask(n=3, k=1)', 'OverlappingSelfDependenciesTask(n=3, k=2)', 'OverlappingSelfDependenciesTask(n=4, k=1)', 'OverlappingSelfDependenciesTask(n=4, k=2)', 'OverlappingSelfDependenciesTask(n=5, k=2)', )), set(self.added_tasks('PENDING'))) self.assertEqual( set(( 'OverlappingSelfDependenciesTask(n=0, k=0)', 'OverlappingSelfDependenciesTask(n=0, k=1)', 'OverlappingSelfDependenciesTask(n=1, k=0)', 'OverlappingSelfDependenciesTask(n=1, k=2)', 'OverlappingSelfDependenciesTask(n=2, k=0)', 'OverlappingSelfDependenciesTask(n=3, k=0)', 'OverlappingSelfDependenciesTask(n=4, k=0)', )), set(self.added_tasks('DONE'))) @mock.patch('luigi.notifications.send_error_email') def test_raise_exception_in_complete(self, send): self.w.add(ExceptionCompleteTask(), multiprocess=True) send.check_called_once() self.assertEqual(0, self.sch.add_task.call_count) self.assertTrue('assert False' in send.call_args[0][1]) @mock.patch('luigi.notifications.send_error_email') def test_raise_unpicklable_exception_in_complete(self, send): # verify exception can't be pickled self.assertRaises(Exception, UnpicklableExceptionTask().complete) try: UnpicklableExceptionTask().complete() except Exception as e: ex = e self.assertRaises(pickle.PicklingError, pickle.dumps, ex) # verify this can run async self.w.add(UnpicklableExceptionTask(), multiprocess=True) send.check_called_once() self.assertEqual(0, self.sch.add_task.call_count) self.assertTrue('raise UnpicklableException()' in send.call_args[0][1]) @mock.patch('luigi.notifications.send_error_email') def test_raise_exception_in_requires(self, send): self.w.add(ExceptionRequiresTask(), multiprocess=True) send.check_called_once() self.assertEqual(0, self.sch.add_task.call_count)
class MultiprocessWorkerTest(unittest.TestCase): def setUp(self): self.scheduler = RemoteScheduler() self.scheduler.add_worker = Mock() self.scheduler.add_task = Mock() self.worker = Worker(scheduler=self.scheduler, worker_id='X', worker_processes=2) def tearDown(self): self.worker.stop() def gw_res(self, pending, task_id): return dict(n_pending_tasks=pending, task_id=task_id, running_tasks=0, n_unique_pending=0) def test_positive_path(self): a = DummyTask("a") b = DummyTask("b") class MultipleRequirementTask(DummyTask): def requires(self): return [a, b] c = MultipleRequirementTask("C") self.assertTrue(self.worker.add(c)) self.scheduler.get_work = Mock(side_effect=[ self.gw_res(3, str(a)), self.gw_res(2, str(b)), self.gw_res(1, str(c)), self.gw_res(0, None), self.gw_res(0, None) ]) self.assertTrue(self.worker.run()) self.assertTrue(c.has_run) def test_path_with_task_failures(self): class FailingTask(DummyTask): def run(self): raise Exception("I am failing") a = FailingTask("a") b = FailingTask("b") class MultipleRequirementTask(DummyTask): def requires(self): return [a, b] c = MultipleRequirementTask("C") self.assertTrue(self.worker.add(c)) self.scheduler.get_work = Mock(side_effect=[ self.gw_res(3, str(a)), self.gw_res(2, str(b)), self.gw_res(1, str(c)), self.gw_res(0, None), self.gw_res(0, None) ]) self.assertFalse(self.worker.run())
def setUp(self): # InstanceCache.disable() self.sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) self.w = Worker(scheduler=self.sch, worker_id='X') self.w2 = Worker(scheduler=self.sch, worker_id='Y') self.time = time.time
class AssistantTest(unittest.TestCase): def setUp(self): self.sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) self.w = Worker(scheduler=self.sch, worker_id='X') self.assistant = Worker(scheduler=self.sch, worker_id='Y', assistant=True) def test_get_work(self): d = Dummy2Task('123') self.w.add(d) self.assertFalse(d.complete()) self.assistant.run() self.assertTrue(d.complete()) def test_bad_job_type(self): class Dummy3Task(Dummy2Task): task_family = 'UnknownTaskFamily' d = Dummy3Task('123') self.w.add(d) self.assertFalse(d.complete()) self.assertFalse(self.assistant.run()) self.assertFalse(d.complete()) self.assertEqual(list(self.sch.task_list('FAILED', '').keys()), [str(d)]) def test_unimported_job_type(self): class NotImportedTask(luigi.Task): task_family = 'UnimportedTask' task_module = None task = NotImportedTask() # verify that it can't run the task without the module info necessary to import it self.w.add(task) self.assertFalse(self.assistant.run()) self.assertEqual(list(self.sch.task_list('FAILED', '').keys()), ['UnimportedTask()']) # check that it can import with the right module task.task_module = 'dummy_test_module.not_imported' self.w.add(task) self.assertTrue(self.assistant.run()) self.assertEqual(list(self.sch.task_list('DONE', '').keys()), ['UnimportedTask()'])
class WorkerTest(unittest.TestCase): def setUp(self): # InstanceCache.disable() self.sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) self.w = Worker(scheduler=self.sch, worker_id='X') self.w2 = Worker(scheduler=self.sch, worker_id='Y') self.time = time.time def tearDown(self): if time.time != self.time: time.time = self.time def setTime(self, t): time.time = lambda: t def test_dep(self): class A(Task): def run(self): self.has_run = True def complete(self): return self.has_run a = A() class B(Task): def requires(self): return a def run(self): self.has_run = True def complete(self): return self.has_run b = B() a.has_run = False b.has_run = False self.w.add(b) self.w.run() self.assertTrue(a.has_run) self.assertTrue(b.has_run) def test_external_dep(self): class A(ExternalTask): def complete(self): return False a = A() class B(Task): def requires(self): return a def run(self): self.has_run = True def complete(self): return self.has_run b = B() a.has_run = False b.has_run = False self.w.add(b) self.w.run() self.assertFalse(a.has_run) self.assertFalse(b.has_run) def test_fail(self): class A(Task): def run(self): self.has_run = True raise Exception() def complete(self): return self.has_run a = A() class B(Task): def requires(self): return a def run(self): self.has_run = True def complete(self): return self.has_run b = B() a.has_run = False b.has_run = False self.w.add(b) self.w.run() self.assertTrue(a.has_run) self.assertFalse(b.has_run) def test_unknown_dep(self): # see central_planner_test.CentralPlannerTest.test_remove_dep class A(ExternalTask): def complete(self): return False class C(Task): def complete(self): return True def get_b(dep): class B(Task): def requires(self): return dep def run(self): self.has_run = True def complete(self): return False b = B() b.has_run = False return b b_a = get_b(A()) b_c = get_b(C()) self.w.add(b_a) # So now another worker goes in and schedules C -> B # This should remove the dep A -> B but will screw up the first worker self.w2.add(b_c) self.w.run( ) # should not run anything - the worker should detect that A is broken self.assertFalse(b_a.has_run) # not sure what should happen?? # self.w2.run() # should run B since C is fulfilled # self.assertTrue(b_c.has_run) def test_interleaved_workers(self): class A(DummyTask): pass a = A() class B(DummyTask): def requires(self): return a class ExternalB(ExternalTask): task_family = "B" def complete(self): return False b = B() eb = ExternalB() self.assertEquals(eb.task_id, "B()") sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id='X') w2 = Worker(scheduler=sch, worker_id='Y') w.add(b) w2.add(eb) logging.debug("RUNNING BROKEN WORKER") w2.run() self.assertFalse(a.complete()) self.assertFalse(b.complete()) logging.debug("RUNNING FUNCTIONAL WORKER") w.run() self.assertTrue(a.complete()) self.assertTrue(b.complete()) def test_interleaved_workers2(self): # two tasks without dependencies, one external, one not class B(DummyTask): pass class ExternalB(ExternalTask): task_family = "B" def complete(self): return False b = B() eb = ExternalB() self.assertEquals(eb.task_id, "B()") sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id='X') w2 = Worker(scheduler=sch, worker_id='Y') w2.add(eb) w.add(b) w2.run() self.assertFalse(b.complete()) w.run() self.assertTrue(b.complete())
def setUp(self): self.sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) self.w = Worker(scheduler=self.sch, worker_id='X') self.assistant = Worker(scheduler=self.sch, worker_id='Y', assistant=True)
def test_interleaved_workers3(self): class A(DummyTask): def run(self): logging.debug('running A') time.sleep(0.1) super(A, self).run() a = A() class B(DummyTask): def requires(self): return a def run(self): logging.debug('running B') super(B, self).run() b = B() sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id='X', keep_alive=True) w2 = Worker(scheduler=sch, worker_id='Y', keep_alive=True, wait_interval=0.1) self.assertTrue(w.add(a)) self.assertTrue(w2.add(b)) threading.Thread(target=w.run).start() self.assertTrue(w2.run()) self.assertTrue(a.complete()) self.assertTrue(b.complete()) w.stop() w2.stop()
class WorkerEmailTest(unittest.TestCase): def setUp(self): super(WorkerEmailTest, self).setUp() sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) self.worker = Worker(scheduler=sch, worker_id="foo") def tearDown(self): self.worker.stop() @email_patch def test_connection_error(self, emails): sch = RemoteScheduler('http://tld.invalid:1337', connect_timeout=1) worker = Worker(scheduler=sch) self.waits = 0 def dummy_wait(): self.waits += 1 sch._wait = dummy_wait class A(DummyTask): pass a = A() self.assertEqual(emails, []) worker.add(a) self.assertEqual(self.waits, 2) # should attempt to add it 3 times self.assertNotEquals(emails, []) self.assertTrue( emails[0].find("Luigi: Framework error while scheduling %s" % (a, )) != -1) worker.stop() @email_patch def test_complete_error(self, emails): class A(DummyTask): def complete(self): raise Exception("b0rk") a = A() self.assertEqual(emails, []) self.worker.add(a) self.assertTrue(emails[0].find("Luigi: %s failed scheduling" % (a, )) != -1) self.worker.run() self.assertTrue(emails[0].find("Luigi: %s failed scheduling" % (a, )) != -1) self.assertFalse(a.has_run) @email_patch def test_complete_return_value(self, emails): class A(DummyTask): def complete(self): pass # no return value should be an error a = A() self.assertEqual(emails, []) self.worker.add(a) self.assertTrue(emails[0].find("Luigi: %s failed scheduling" % (a, )) != -1) self.worker.run() self.assertTrue(emails[0].find("Luigi: %s failed scheduling" % (a, )) != -1) self.assertFalse(a.has_run) @email_patch def test_run_error(self, emails): class A(luigi.Task): def complete(self): return False def run(self): raise Exception("b0rk") a = A() self.worker.add(a) self.assertEqual(emails, []) self.worker.run() self.assertTrue(emails[0].find("Luigi: %s FAILED" % (a, )) != -1) @email_patch def test_no_error(self, emails): class A(DummyTask): pass a = A() self.assertEqual(emails, []) self.worker.add(a) self.assertEqual(emails, []) self.worker.run() self.assertEqual(emails, []) self.assertTrue(a.complete())
def test_ping_thread_shutdown(self): with Worker(ping_interval=0.01) as w: self.assertTrue(w._keep_alive_thread.is_alive()) self.assertFalse(w._keep_alive_thread.is_alive())
def test_ping_thread_shutdown(self): w = Worker(ping_interval=0.01) self.assertTrue(w._keep_alive_thread.is_alive()) w.stop() # should stop within 0.01 s self.assertFalse(w._keep_alive_thread.is_alive())
class WorkerTest(unittest.TestCase): def setUp(self): # InstanceCache.disable() self.sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) self.w = Worker(scheduler=self.sch, worker_id='X') self.w2 = Worker(scheduler=self.sch, worker_id='Y') self.time = time.time def tearDown(self): if time.time != self.time: time.time = self.time self.w.stop() self.w2.stop() def setTime(self, t): time.time = lambda: t def test_dep(self): class A(Task): def run(self): self.has_run = True def complete(self): return self.has_run a = A() class B(Task): def requires(self): return a def run(self): self.has_run = True def complete(self): return self.has_run b = B() a.has_run = False b.has_run = False self.assertTrue(self.w.add(b)) self.assertTrue(self.w.run()) self.assertTrue(a.has_run) self.assertTrue(b.has_run) def test_external_dep(self): class A(ExternalTask): def complete(self): return False a = A() class B(Task): def requires(self): return a def run(self): self.has_run = True def complete(self): return self.has_run b = B() a.has_run = False b.has_run = False self.assertTrue(self.w.add(b)) self.assertTrue(self.w.run()) self.assertFalse(a.has_run) self.assertFalse(b.has_run) def test_fail(self): class A(Task): def run(self): self.has_run = True raise Exception() def complete(self): return self.has_run a = A() class B(Task): def requires(self): return a def run(self): self.has_run = True def complete(self): return self.has_run b = B() a.has_run = False b.has_run = False self.assertTrue(self.w.add(b)) self.assertFalse(self.w.run()) self.assertTrue(a.has_run) self.assertFalse(b.has_run) def test_unknown_dep(self): # see central_planner_test.CentralPlannerTest.test_remove_dep class A(ExternalTask): def complete(self): return False class C(Task): def complete(self): return True def get_b(dep): class B(Task): def requires(self): return dep def run(self): self.has_run = True def complete(self): return False b = B() b.has_run = False return b b_a = get_b(A()) b_c = get_b(C()) self.assertTrue(self.w.add(b_a)) # So now another worker goes in and schedules C -> B # This should remove the dep A -> B but will screw up the first worker self.assertTrue(self.w2.add(b_c)) self.assertFalse( self.w.run() ) # should not run anything - the worker should detect that A is broken self.assertFalse(b_a.has_run) # not sure what should happen?? # self.w2.run() # should run B since C is fulfilled # self.assertTrue(b_c.has_run) def test_unfulfilled_dep(self): class A(Task): def complete(self): return self.done def run(self): self.done = True def get_b(a): class B(A): def requires(self): return a b = B() b.done = False a.done = True return b a = A() b = get_b(a) self.assertTrue(self.w.add(b)) a.done = False self.w.run() self.assertTrue(a.complete()) self.assertTrue(b.complete()) def test_avoid_infinite_reschedule(self): class A(Task): def complete(self): return False class B(Task): def complete(self): return False def requires(self): return A() self.assertTrue(self.w.add(B())) self.assertFalse(self.w.run()) def test_interleaved_workers(self): class A(DummyTask): pass a = A() class B(DummyTask): def requires(self): return a class ExternalB(ExternalTask): task_family = "B" def complete(self): return False b = B() eb = ExternalB() self.assertEquals(eb.task_id, "B()") sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id='X') w2 = Worker(scheduler=sch, worker_id='Y') self.assertTrue(w.add(b)) self.assertTrue(w2.add(eb)) logging.debug("RUNNING BROKEN WORKER") self.assertTrue(w2.run()) self.assertFalse(a.complete()) self.assertFalse(b.complete()) logging.debug("RUNNING FUNCTIONAL WORKER") self.assertTrue(w.run()) self.assertTrue(a.complete()) self.assertTrue(b.complete()) w.stop() w2.stop() def test_interleaved_workers2(self): # two tasks without dependencies, one external, one not class B(DummyTask): pass class ExternalB(ExternalTask): task_family = "B" def complete(self): return False b = B() eb = ExternalB() self.assertEquals(eb.task_id, "B()") sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id='X') w2 = Worker(scheduler=sch, worker_id='Y') self.assertTrue(w2.add(eb)) self.assertTrue(w.add(b)) self.assertTrue(w2.run()) self.assertFalse(b.complete()) self.assertTrue(w.run()) self.assertTrue(b.complete()) w.stop() w2.stop() def test_interleaved_workers3(self): class A(DummyTask): def run(self): logging.debug('running A') time.sleep(0.1) super(A, self).run() a = A() class B(DummyTask): def requires(self): return a def run(self): logging.debug('running B') super(B, self).run() b = B() sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id='X', keep_alive=True) w2 = Worker(scheduler=sch, worker_id='Y', keep_alive=True, wait_interval=0.1) self.assertTrue(w.add(a)) self.assertTrue(w2.add(b)) threading.Thread(target=w.run).start() self.assertTrue(w2.run()) self.assertTrue(a.complete()) self.assertTrue(b.complete()) w.stop() w2.stop() def test_complete_exception(self): "Tests that a task is still scheduled if its sister task crashes in the complete() method" class A(DummyTask): def complete(self): raise Exception("doh") a = A() class C(DummyTask): pass c = C() class B(DummyTask): def requires(self): return a, c b = B() sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id="foo") self.assertFalse(w.add(b)) self.assertTrue(w.run()) self.assertFalse(b.has_run) self.assertTrue(c.has_run) self.assertFalse(a.has_run) w.stop() def test_requires_exception(self): class A(DummyTask): def requires(self): raise Exception("doh") a = A() class C(DummyTask): pass c = C() class B(DummyTask): def requires(self): return a, c b = B() sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10) w = Worker(scheduler=sch, worker_id="foo") self.assertFalse(w.add(b)) self.assertTrue(w.run()) self.assertFalse(b.has_run) self.assertTrue(c.has_run) self.assertFalse(a.has_run) w.stop()