Example #1
0
class WorkerKeepAliveUpstreamTest(LuigiTestCase):
    """
    Tests related to how the worker stays alive after upstream status changes.

    See https://github.com/spotify/luigi/pull/1789
    """
    def run(self, result=None):
        """
        Common setup code. Due to the contextmanager cant use normal setup
        """
        self.sch = Scheduler(retry_delay=0.00000001, disable_failures=2)

        with Worker(scheduler=self.sch, worker_id='X', keep_alive=True, wait_interval=0.1, wait_jitter=0) as w:
            self.w = w
            super(WorkerKeepAliveUpstreamTest, self).run(result)

    def test_alive_while_has_failure(self):
        """
        One dependency disables and one fails
        """
        class Disabler(luigi.Task):
            pass

        class Failer(luigi.Task):
            did_run = False

            def run(self):
                self.did_run = True

        class Wrapper(luigi.WrapperTask):
            def requires(self):
                return (Disabler(), Failer())

        self.w.add(Wrapper())
        disabler = Disabler().task_id
        failer = Failer().task_id
        self.sch.add_task(disabler, 'FAILED', worker='X')
        self.sch.prune()  # Make scheduler unfail the disabled task
        self.sch.add_task(disabler, 'FAILED', worker='X')  # Disable it
        self.sch.add_task(failer, 'FAILED', worker='X')  # Fail it
        try:
            t = threading.Thread(target=self.w.run)
            t.start()
            t.join(timeout=1)  # Wait 1 second
            self.assertTrue(t.is_alive())  # It shouldn't stop trying, the failed task should be retried!
            self.assertFalse(Failer.did_run)  # It should never have run, the cooldown is longer than a second.
        finally:
            self.sch.prune()  # Make it, like die. Couldn't find a more forceful way to do this.
            t.join(timeout=1)  # Wait 1 second
            assert not t.is_alive()

    def test_alive_while_has_success(self):
        """
        One dependency disables and one succeeds
        """
        # TODO: Fix copy paste mess
        class Disabler(luigi.Task):
            pass

        class Succeeder(luigi.Task):
            did_run = False

            def run(self):
                self.did_run = True

        class Wrapper(luigi.WrapperTask):
            def requires(self):
                return (Disabler(), Succeeder())

        self.w.add(Wrapper())
        disabler = Disabler().task_id
        succeeder = Succeeder().task_id
        self.sch.add_task(disabler, 'FAILED', worker='X')
        self.sch.prune()  # Make scheduler unfail the disabled task
        self.sch.add_task(disabler, 'FAILED', worker='X')  # Disable it
        self.sch.add_task(succeeder, 'DONE', worker='X')  # Fail it
        try:
            t = threading.Thread(target=self.w.run)
            t.start()
            t.join(timeout=1)  # Wait 1 second
            self.assertFalse(t.is_alive())  # The worker should think that it should stop ...
            # ... because in this case the only work remaining depends on DISABLED tasks,
            # hence it's not worth considering the wrapper task as a PENDING task to
            # keep the worker alive anymore.
            self.assertFalse(Succeeder.did_run)  # It should never have run, it suceeded already
        finally:
            self.sch.prune()  # This shouldnt be necessary in this version, but whatevs
            t.join(timeout=1)  # Wait 1 second
            assert not t.is_alive()
Example #2
0
class PrometheusMetricTest(unittest.TestCase):
    def setUp(self):
        self.collector = PrometheusMetricsCollector()
        self.s = Scheduler(metrics_collector=MetricsCollectors.prometheus)
        self.gauge_name = 'luigi_task_execution_time_seconds'
        self.labels = {'family': TASK_FAMILY}

    def startTask(self):
        self.s.add_task(worker=WORKER, task_id=TASK_ID, family=TASK_FAMILY)
        task = self.s._state.get_task(TASK_ID)
        task.time_running = 0
        task.updated = 5
        return task

    def test_handle_task_started(self):
        task = self.startTask()
        self.collector.handle_task_started(task)

        counter_name = 'luigi_task_started_total'
        gauge_name = self.gauge_name
        labels = self.labels

        assert self.collector.registry.get_sample_value(counter_name, labels=self.labels) == 1
        assert self.collector.registry.get_sample_value(gauge_name, labels=labels) == 0

    def test_handle_task_failed(self):
        task = self.startTask()
        self.collector.handle_task_failed(task)

        counter_name = 'luigi_task_failed_total'
        gauge_name = self.gauge_name
        labels = self.labels

        assert self.collector.registry.get_sample_value(counter_name, labels=labels) == 1
        assert self.collector.registry.get_sample_value(gauge_name, labels=labels) == task.updated - task.time_running

    def test_handle_task_disabled(self):
        task = self.startTask()
        self.collector.handle_task_disabled(task, self.s._config)

        counter_name = 'luigi_task_disabled_total'
        gauge_name = self.gauge_name
        labels = self.labels

        assert self.collector.registry.get_sample_value(counter_name, labels=labels) == 1
        assert self.collector.registry.get_sample_value(gauge_name, labels=labels) == task.updated - task.time_running

    def test_handle_task_done(self):
        task = self.startTask()
        self.collector.handle_task_done(task)

        counter_name = 'luigi_task_done_total'
        gauge_name = self.gauge_name
        labels = self.labels

        assert self.collector.registry.get_sample_value(counter_name, labels=labels) == 1
        assert self.collector.registry.get_sample_value(gauge_name, labels=labels) == task.updated - task.time_running

    def test_configure_http_handler(self):
        mock_http_handler = mock.MagicMock()
        self.collector.configure_http_handler(mock_http_handler)
        mock_http_handler.set_header.assert_called_once_with('Content-Type', CONTENT_TYPE_LATEST)
Example #3
0
class PrometheusMetricTest(unittest.TestCase):
    def setUp(self):
        self.collector = PrometheusMetricsCollector()
        self.s = Scheduler(metrics_collector=MetricsCollectors.prometheus)
        self.gauge_name = 'luigi_task_execution_time_seconds'
        self.labels = {'family': TASK_FAMILY}

    def startTask(self):
        self.s.add_task(worker=WORKER, task_id=TASK_ID, family=TASK_FAMILY)
        task = self.s._state.get_task(TASK_ID)
        task.time_running = 0
        task.updated = 5
        return task

    def test_handle_task_started(self):
        task = self.startTask()
        self.collector.handle_task_started(task)

        counter_name = 'luigi_task_started_total'
        gauge_name = self.gauge_name
        labels = self.labels

        assert self.collector.registry.get_sample_value(
            counter_name, labels=self.labels) == 1
        assert self.collector.registry.get_sample_value(gauge_name,
                                                        labels=labels) == 0

    def test_handle_task_failed(self):
        task = self.startTask()
        self.collector.handle_task_failed(task)

        counter_name = 'luigi_task_failed_total'
        gauge_name = self.gauge_name
        labels = self.labels

        assert self.collector.registry.get_sample_value(counter_name,
                                                        labels=labels) == 1
        assert self.collector.registry.get_sample_value(
            gauge_name, labels=labels) == task.updated - task.time_running

    def test_handle_task_disabled(self):
        task = self.startTask()
        self.collector.handle_task_disabled(task, self.s._config)

        counter_name = 'luigi_task_disabled_total'
        gauge_name = self.gauge_name
        labels = self.labels

        assert self.collector.registry.get_sample_value(counter_name,
                                                        labels=labels) == 1
        assert self.collector.registry.get_sample_value(
            gauge_name, labels=labels) == task.updated - task.time_running

    def test_handle_task_done(self):
        task = self.startTask()
        self.collector.handle_task_done(task)

        counter_name = 'luigi_task_done_total'
        gauge_name = self.gauge_name
        labels = self.labels

        assert self.collector.registry.get_sample_value(counter_name,
                                                        labels=labels) == 1
        assert self.collector.registry.get_sample_value(
            gauge_name, labels=labels) == task.updated - task.time_running
class WorkerKeepAliveUpstreamTest(LuigiTestCase):
    """
    Tests related to how the worker stays alive after upstream status changes.

    See https://github.com/spotify/luigi/pull/1789
    """
    def run(self, result=None):
        """
        Common setup code. Due to the contextmanager cant use normal setup
        """
        self.sch = Scheduler(retry_delay=0.00000001, retry_count=2)

        with Worker(scheduler=self.sch,
                    worker_id='X',
                    keep_alive=True,
                    wait_interval=0.1,
                    wait_jitter=0) as w:
            self.w = w
            super(WorkerKeepAliveUpstreamTest, self).run(result)

    def test_alive_while_has_failure(self):
        """
        One dependency disables and one fails
        """
        class Disabler(luigi.Task):
            pass

        class Failer(luigi.Task):
            did_run = False

            def run(self):
                self.did_run = True

        class Wrapper(luigi.WrapperTask):
            def requires(self):
                return (Disabler(), Failer())

        self.w.add(Wrapper())
        disabler = Disabler().task_id
        failer = Failer().task_id
        self.sch.add_task(disabler, 'FAILED', worker='X')
        self.sch.prune()  # Make scheduler unfail the disabled task
        self.sch.add_task(disabler, 'FAILED', worker='X')  # Disable it
        self.sch.add_task(failer, 'FAILED', worker='X')  # Fail it
        try:
            t = threading.Thread(target=self.w.run)
            t.start()
            t.join(timeout=1)  # Wait 1 second
            self.assertTrue(t.is_alive(
            ))  # It shouldn't stop trying, the failed task should be retried!
            self.assertFalse(
                Failer.did_run
            )  # It should never have run, the cooldown is longer than a second.
        finally:
            self.sch.prune(
            )  # Make it, like die. Couldn't find a more forceful way to do this.
            t.join(timeout=1)  # Wait 1 second
            assert not t.is_alive()

    def test_alive_while_has_success(self):
        """
        One dependency disables and one succeeds
        """

        # TODO: Fix copy paste mess
        class Disabler(luigi.Task):
            pass

        class Succeeder(luigi.Task):
            did_run = False

            def run(self):
                self.did_run = True

        class Wrapper(luigi.WrapperTask):
            def requires(self):
                return (Disabler(), Succeeder())

        self.w.add(Wrapper())
        disabler = Disabler().task_id
        succeeder = Succeeder().task_id
        self.sch.add_task(disabler, 'FAILED', worker='X')
        self.sch.prune()  # Make scheduler unfail the disabled task
        self.sch.add_task(disabler, 'FAILED', worker='X')  # Disable it
        self.sch.add_task(succeeder, 'DONE', worker='X')  # Fail it
        try:
            t = threading.Thread(target=self.w.run)
            t.start()
            t.join(timeout=1)  # Wait 1 second
            self.assertFalse(t.is_alive(
            ))  # The worker should think that it should stop ...
            # ... because in this case the only work remaining depends on DISABLED tasks,
            # hence it's not worth considering the wrapper task as a PENDING task to
            # keep the worker alive anymore.
            self.assertFalse(
                Succeeder.did_run
            )  # It should never have run, it succeeded already
        finally:
            self.sch.prune(
            )  # This shouldnt be necessary in this version, but whatevs
            t.join(timeout=1)  # Wait 1 second
            assert not t.is_alive()