class WorkerKeepAliveUpstreamTest(LuigiTestCase): """ Tests related to how the worker stays alive after upstream status changes. See https://github.com/spotify/luigi/pull/1789 """ def run(self, result=None): """ Common setup code. Due to the contextmanager cant use normal setup """ self.sch = Scheduler(retry_delay=0.00000001, disable_failures=2) with Worker(scheduler=self.sch, worker_id='X', keep_alive=True, wait_interval=0.1, wait_jitter=0) as w: self.w = w super(WorkerKeepAliveUpstreamTest, self).run(result) def test_alive_while_has_failure(self): """ One dependency disables and one fails """ class Disabler(luigi.Task): pass class Failer(luigi.Task): did_run = False def run(self): self.did_run = True class Wrapper(luigi.WrapperTask): def requires(self): return (Disabler(), Failer()) self.w.add(Wrapper()) disabler = Disabler().task_id failer = Failer().task_id self.sch.add_task(disabler, 'FAILED', worker='X') self.sch.prune() # Make scheduler unfail the disabled task self.sch.add_task(disabler, 'FAILED', worker='X') # Disable it self.sch.add_task(failer, 'FAILED', worker='X') # Fail it try: t = threading.Thread(target=self.w.run) t.start() t.join(timeout=1) # Wait 1 second self.assertTrue(t.is_alive()) # It shouldn't stop trying, the failed task should be retried! self.assertFalse(Failer.did_run) # It should never have run, the cooldown is longer than a second. finally: self.sch.prune() # Make it, like die. Couldn't find a more forceful way to do this. t.join(timeout=1) # Wait 1 second assert not t.is_alive() def test_alive_while_has_success(self): """ One dependency disables and one succeeds """ # TODO: Fix copy paste mess class Disabler(luigi.Task): pass class Succeeder(luigi.Task): did_run = False def run(self): self.did_run = True class Wrapper(luigi.WrapperTask): def requires(self): return (Disabler(), Succeeder()) self.w.add(Wrapper()) disabler = Disabler().task_id succeeder = Succeeder().task_id self.sch.add_task(disabler, 'FAILED', worker='X') self.sch.prune() # Make scheduler unfail the disabled task self.sch.add_task(disabler, 'FAILED', worker='X') # Disable it self.sch.add_task(succeeder, 'DONE', worker='X') # Fail it try: t = threading.Thread(target=self.w.run) t.start() t.join(timeout=1) # Wait 1 second self.assertFalse(t.is_alive()) # The worker should think that it should stop ... # ... because in this case the only work remaining depends on DISABLED tasks, # hence it's not worth considering the wrapper task as a PENDING task to # keep the worker alive anymore. self.assertFalse(Succeeder.did_run) # It should never have run, it suceeded already finally: self.sch.prune() # This shouldnt be necessary in this version, but whatevs t.join(timeout=1) # Wait 1 second assert not t.is_alive()
class PrometheusMetricTest(unittest.TestCase): def setUp(self): self.collector = PrometheusMetricsCollector() self.s = Scheduler(metrics_collector=MetricsCollectors.prometheus) self.gauge_name = 'luigi_task_execution_time_seconds' self.labels = {'family': TASK_FAMILY} def startTask(self): self.s.add_task(worker=WORKER, task_id=TASK_ID, family=TASK_FAMILY) task = self.s._state.get_task(TASK_ID) task.time_running = 0 task.updated = 5 return task def test_handle_task_started(self): task = self.startTask() self.collector.handle_task_started(task) counter_name = 'luigi_task_started_total' gauge_name = self.gauge_name labels = self.labels assert self.collector.registry.get_sample_value(counter_name, labels=self.labels) == 1 assert self.collector.registry.get_sample_value(gauge_name, labels=labels) == 0 def test_handle_task_failed(self): task = self.startTask() self.collector.handle_task_failed(task) counter_name = 'luigi_task_failed_total' gauge_name = self.gauge_name labels = self.labels assert self.collector.registry.get_sample_value(counter_name, labels=labels) == 1 assert self.collector.registry.get_sample_value(gauge_name, labels=labels) == task.updated - task.time_running def test_handle_task_disabled(self): task = self.startTask() self.collector.handle_task_disabled(task, self.s._config) counter_name = 'luigi_task_disabled_total' gauge_name = self.gauge_name labels = self.labels assert self.collector.registry.get_sample_value(counter_name, labels=labels) == 1 assert self.collector.registry.get_sample_value(gauge_name, labels=labels) == task.updated - task.time_running def test_handle_task_done(self): task = self.startTask() self.collector.handle_task_done(task) counter_name = 'luigi_task_done_total' gauge_name = self.gauge_name labels = self.labels assert self.collector.registry.get_sample_value(counter_name, labels=labels) == 1 assert self.collector.registry.get_sample_value(gauge_name, labels=labels) == task.updated - task.time_running def test_configure_http_handler(self): mock_http_handler = mock.MagicMock() self.collector.configure_http_handler(mock_http_handler) mock_http_handler.set_header.assert_called_once_with('Content-Type', CONTENT_TYPE_LATEST)
class PrometheusMetricTest(unittest.TestCase): def setUp(self): self.collector = PrometheusMetricsCollector() self.s = Scheduler(metrics_collector=MetricsCollectors.prometheus) self.gauge_name = 'luigi_task_execution_time_seconds' self.labels = {'family': TASK_FAMILY} def startTask(self): self.s.add_task(worker=WORKER, task_id=TASK_ID, family=TASK_FAMILY) task = self.s._state.get_task(TASK_ID) task.time_running = 0 task.updated = 5 return task def test_handle_task_started(self): task = self.startTask() self.collector.handle_task_started(task) counter_name = 'luigi_task_started_total' gauge_name = self.gauge_name labels = self.labels assert self.collector.registry.get_sample_value( counter_name, labels=self.labels) == 1 assert self.collector.registry.get_sample_value(gauge_name, labels=labels) == 0 def test_handle_task_failed(self): task = self.startTask() self.collector.handle_task_failed(task) counter_name = 'luigi_task_failed_total' gauge_name = self.gauge_name labels = self.labels assert self.collector.registry.get_sample_value(counter_name, labels=labels) == 1 assert self.collector.registry.get_sample_value( gauge_name, labels=labels) == task.updated - task.time_running def test_handle_task_disabled(self): task = self.startTask() self.collector.handle_task_disabled(task, self.s._config) counter_name = 'luigi_task_disabled_total' gauge_name = self.gauge_name labels = self.labels assert self.collector.registry.get_sample_value(counter_name, labels=labels) == 1 assert self.collector.registry.get_sample_value( gauge_name, labels=labels) == task.updated - task.time_running def test_handle_task_done(self): task = self.startTask() self.collector.handle_task_done(task) counter_name = 'luigi_task_done_total' gauge_name = self.gauge_name labels = self.labels assert self.collector.registry.get_sample_value(counter_name, labels=labels) == 1 assert self.collector.registry.get_sample_value( gauge_name, labels=labels) == task.updated - task.time_running
class WorkerKeepAliveUpstreamTest(LuigiTestCase): """ Tests related to how the worker stays alive after upstream status changes. See https://github.com/spotify/luigi/pull/1789 """ def run(self, result=None): """ Common setup code. Due to the contextmanager cant use normal setup """ self.sch = Scheduler(retry_delay=0.00000001, retry_count=2) with Worker(scheduler=self.sch, worker_id='X', keep_alive=True, wait_interval=0.1, wait_jitter=0) as w: self.w = w super(WorkerKeepAliveUpstreamTest, self).run(result) def test_alive_while_has_failure(self): """ One dependency disables and one fails """ class Disabler(luigi.Task): pass class Failer(luigi.Task): did_run = False def run(self): self.did_run = True class Wrapper(luigi.WrapperTask): def requires(self): return (Disabler(), Failer()) self.w.add(Wrapper()) disabler = Disabler().task_id failer = Failer().task_id self.sch.add_task(disabler, 'FAILED', worker='X') self.sch.prune() # Make scheduler unfail the disabled task self.sch.add_task(disabler, 'FAILED', worker='X') # Disable it self.sch.add_task(failer, 'FAILED', worker='X') # Fail it try: t = threading.Thread(target=self.w.run) t.start() t.join(timeout=1) # Wait 1 second self.assertTrue(t.is_alive( )) # It shouldn't stop trying, the failed task should be retried! self.assertFalse( Failer.did_run ) # It should never have run, the cooldown is longer than a second. finally: self.sch.prune( ) # Make it, like die. Couldn't find a more forceful way to do this. t.join(timeout=1) # Wait 1 second assert not t.is_alive() def test_alive_while_has_success(self): """ One dependency disables and one succeeds """ # TODO: Fix copy paste mess class Disabler(luigi.Task): pass class Succeeder(luigi.Task): did_run = False def run(self): self.did_run = True class Wrapper(luigi.WrapperTask): def requires(self): return (Disabler(), Succeeder()) self.w.add(Wrapper()) disabler = Disabler().task_id succeeder = Succeeder().task_id self.sch.add_task(disabler, 'FAILED', worker='X') self.sch.prune() # Make scheduler unfail the disabled task self.sch.add_task(disabler, 'FAILED', worker='X') # Disable it self.sch.add_task(succeeder, 'DONE', worker='X') # Fail it try: t = threading.Thread(target=self.w.run) t.start() t.join(timeout=1) # Wait 1 second self.assertFalse(t.is_alive( )) # The worker should think that it should stop ... # ... because in this case the only work remaining depends on DISABLED tasks, # hence it's not worth considering the wrapper task as a PENDING task to # keep the worker alive anymore. self.assertFalse( Succeeder.did_run ) # It should never have run, it succeeded already finally: self.sch.prune( ) # This shouldnt be necessary in this version, but whatevs t.join(timeout=1) # Wait 1 second assert not t.is_alive()