Example #1
0
class AssistantTest(unittest.TestCase):
    def run(self, result=None):
        self.sch = Scheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10)
        self.assistant = Worker(scheduler=self.sch, worker_id='Y', assistant=True)
        with Worker(scheduler=self.sch, worker_id='X') as w:
            self.w = w
            super(AssistantTest, self).run(result)

    def test_get_work(self):
        d = Dummy2Task('123')
        self.w.add(d)

        self.assertFalse(d.complete())
        self.assistant.run()
        self.assertTrue(d.complete())

    def test_bad_job_type(self):
        class Dummy3Task(Dummy2Task):
            task_family = 'UnknownTaskFamily'

        d = Dummy3Task('123')
        self.w.add(d)

        self.assertFalse(d.complete())
        self.assertFalse(self.assistant.run())
        self.assertFalse(d.complete())
        self.assertEqual(list(self.sch.task_list('FAILED', '').keys()), [d.task_id])

    def test_unimported_job_type(self):
        MODULE_CONTENTS = b'''
import luigi


class UnimportedTask(luigi.Task):
    def complete(self):
        return False
'''

        class NotImportedTask(luigi.Task):
            task_family = 'UnimportedTask'
            task_module = None

        task = NotImportedTask()

        # verify that it can't run the task without the module info necessary to import it
        self.w.add(task)
        self.assertFalse(self.assistant.run())
        self.assertEqual(list(self.sch.task_list('FAILED', '').keys()), [task.task_id])

        # check that it can import with the right module
        with temporary_unloaded_module(MODULE_CONTENTS) as task.task_module:
            self.w.add(task)
            self.assertTrue(self.assistant.run())
            self.assertEqual(list(self.sch.task_list('DONE', '').keys()), [task.task_id])
Example #2
0
class PerTaskRetryPolicyBehaviorTest(LuigiTestCase):
    def setUp(self):
        super(PerTaskRetryPolicyBehaviorTest, self).setUp()
        self.per_task_retry_count = 2
        self.default_retry_count = 1
        self.sch = Scheduler(retry_delay=0.1, retry_count=self.default_retry_count, prune_on_get_work=True)

    def test_with_all_disabled_with_single_worker(self):
        """
            With this test, a case which has a task (TestWrapperTask), requires two another tasks (TestErrorTask1,TestErrorTask1) which both is failed, is
            tested.

            Task TestErrorTask1 has default retry_count which is 1, but Task TestErrorTask2 has retry_count at task level as 2.

            This test is running on single worker
        """

        class TestErrorTask1(DummyErrorTask):
            pass

        e1 = TestErrorTask1()

        class TestErrorTask2(DummyErrorTask):
            retry_count = self.per_task_retry_count

        e2 = TestErrorTask2()

        class TestWrapperTask(luigi.WrapperTask):
            def requires(self):
                return [e2, e1]

        wt = TestWrapperTask()

        with Worker(scheduler=self.sch, worker_id='X', keep_alive=True, wait_interval=0.1) as w1:
            self.assertTrue(w1.add(wt))

            self.assertFalse(w1.run())

            self.assertEqual([wt.task_id], list(self.sch.task_list('PENDING', 'UPSTREAM_DISABLED').keys()))

            self.assertEqual(sorted([e1.task_id, e2.task_id]), sorted(self.sch.task_list('DISABLED', '').keys()))

            self.assertEqual(0, self.sch._state.get_task(wt.task_id).failures.num_failures())
            self.assertEqual(self.per_task_retry_count, self.sch._state.get_task(e2.task_id).failures.num_failures())
            self.assertEqual(self.default_retry_count, self.sch._state.get_task(e1.task_id).failures.num_failures())

    def test_with_all_disabled_with_multiple_worker(self):
        """
            With this test, a case which has a task (TestWrapperTask), requires two another tasks (TestErrorTask1,TestErrorTask1) which both is failed, is
            tested.

            Task TestErrorTask1 has default retry_count which is 1, but Task TestErrorTask2 has retry_count at task level as 2.

            This test is running on multiple worker
        """

        class TestErrorTask1(DummyErrorTask):
            pass

        e1 = TestErrorTask1()

        class TestErrorTask2(DummyErrorTask):
            retry_count = self.per_task_retry_count

        e2 = TestErrorTask2()

        class TestWrapperTask(luigi.WrapperTask):
            def requires(self):
                return [e2, e1]

        wt = TestWrapperTask()

        with Worker(scheduler=self.sch, worker_id='X', keep_alive=True, wait_interval=0.1) as w1:
            with Worker(scheduler=self.sch, worker_id='Y', keep_alive=True, wait_interval=0.1) as w2:
                with Worker(scheduler=self.sch, worker_id='Z', keep_alive=True, wait_interval=0.1) as w3:
                    self.assertTrue(w1.add(wt))
                    self.assertTrue(w2.add(e2))
                    self.assertTrue(w3.add(e1))

                    self.assertFalse(w3.run())
                    self.assertFalse(w2.run())
                    self.assertFalse(w1.run())

                    self.assertEqual([wt.task_id], list(self.sch.task_list('PENDING', 'UPSTREAM_DISABLED').keys()))

                    self.assertEqual(sorted([e1.task_id, e2.task_id]), sorted(self.sch.task_list('DISABLED', '').keys()))

                    self.assertEqual(0, self.sch._state.get_task(wt.task_id).failures.num_failures())
                    self.assertEqual(self.per_task_retry_count, self.sch._state.get_task(e2.task_id).failures.num_failures())
                    self.assertEqual(self.default_retry_count, self.sch._state.get_task(e1.task_id).failures.num_failures())

    def test_with_includes_success_with_single_worker(self):
        """
            With this test, a case which has a task (TestWrapperTask), requires one (TestErrorTask1) FAILED and one (TestSuccessTask1) SUCCESS, is tested.

            Task TestSuccessTask1 will be DONE successfully, but Task TestErrorTask1 will be failed and it has retry_count at task level as 2.

            This test is running on single worker
        """

        class TestSuccessTask1(DummyTask):
            pass

        s1 = TestSuccessTask1()

        class TestErrorTask1(DummyErrorTask):
            retry_count = self.per_task_retry_count

        e1 = TestErrorTask1()

        class TestWrapperTask(luigi.WrapperTask):
            def requires(self):
                return [e1, s1]

        wt = TestWrapperTask()

        with Worker(scheduler=self.sch, worker_id='X', keep_alive=True, wait_interval=0.1) as w1:
            self.assertTrue(w1.add(wt))

            self.assertFalse(w1.run())

            self.assertEqual([wt.task_id], list(self.sch.task_list('PENDING', 'UPSTREAM_DISABLED').keys()))
            self.assertEqual([e1.task_id], list(self.sch.task_list('DISABLED', '').keys()))
            self.assertEqual([s1.task_id], list(self.sch.task_list('DONE', '').keys()))

            self.assertEqual(0, self.sch._state.get_task(wt.task_id).failures.num_failures())
            self.assertEqual(self.per_task_retry_count, self.sch._state.get_task(e1.task_id).failures.num_failures())
            self.assertEqual(0, self.sch._state.get_task(s1.task_id).failures.num_failures())

    def test_with_includes_success_with_multiple_worker(self):
        """
            With this test, a case which has a task (TestWrapperTask), requires one (TestErrorTask1) FAILED and one (TestSuccessTask1) SUCCESS, is tested.

            Task TestSuccessTask1 will be DONE successfully, but Task TestErrorTask1 will be failed and it has retry_count at task level as 2.

            This test is running on multiple worker
        """

        class TestSuccessTask1(DummyTask):
            pass

        s1 = TestSuccessTask1()

        class TestErrorTask1(DummyErrorTask):
            retry_count = self.per_task_retry_count

        e1 = TestErrorTask1()

        class TestWrapperTask(luigi.WrapperTask):
            def requires(self):
                return [e1, s1]

        wt = TestWrapperTask()

        with Worker(scheduler=self.sch, worker_id='X', keep_alive=True, wait_interval=0.1) as w1:
            with Worker(scheduler=self.sch, worker_id='Y', keep_alive=True, wait_interval=0.1) as w2:
                with Worker(scheduler=self.sch, worker_id='Z', keep_alive=True, wait_interval=0.1) as w3:
                    self.assertTrue(w1.add(wt))
                    self.assertTrue(w2.add(e1))
                    self.assertTrue(w3.add(s1))

                    self.assertTrue(w3.run())
                    self.assertFalse(w2.run())
                    self.assertFalse(w1.run())

                    self.assertEqual([wt.task_id], list(self.sch.task_list('PENDING', 'UPSTREAM_DISABLED').keys()))
                    self.assertEqual([e1.task_id], list(self.sch.task_list('DISABLED', '').keys()))
                    self.assertEqual([s1.task_id], list(self.sch.task_list('DONE', '').keys()))

                    self.assertEqual(0, self.sch._state.get_task(wt.task_id).failures.num_failures())
                    self.assertEqual(self.per_task_retry_count, self.sch._state.get_task(e1.task_id).failures.num_failures())
                    self.assertEqual(0, self.sch._state.get_task(s1.task_id).failures.num_failures())

    def test_with_dynamic_dependencies_with_single_worker(self):
        """
            With this test, a case includes dependency tasks(TestErrorTask1,TestErrorTask2) which both are failed.

            Task TestErrorTask1 has default retry_count which is 1, but Task TestErrorTask2 has retry_count at task level as 2.

            This test is running on single worker
        """

        class TestErrorTask1(DummyErrorTask):
            pass

        e1 = TestErrorTask1()

        class TestErrorTask2(DummyErrorTask):
            retry_count = self.per_task_retry_count

        e2 = TestErrorTask2()

        class TestSuccessTask1(DummyTask):
            pass

        s1 = TestSuccessTask1()

        class TestWrapperTask(DummyTask):
            def requires(self):
                return [s1]

            def run(self):
                super(TestWrapperTask, self).run()
                yield e2, e1

        wt = TestWrapperTask()

        with Worker(scheduler=self.sch, worker_id='X', keep_alive=True, wait_interval=0.1) as w1:
            self.assertTrue(w1.add(wt))

            self.assertFalse(w1.run())

            self.assertEqual([wt.task_id], list(self.sch.task_list('PENDING', 'UPSTREAM_DISABLED').keys()))

            self.assertEqual(sorted([e1.task_id, e2.task_id]), sorted(self.sch.task_list('DISABLED', '').keys()))

            self.assertEqual(0, self.sch._state.get_task(wt.task_id).failures.num_failures())
            self.assertEqual(0, self.sch._state.get_task(s1.task_id).failures.num_failures())
            self.assertEqual(self.per_task_retry_count, self.sch._state.get_task(e2.task_id).failures.num_failures())
            self.assertEqual(self.default_retry_count, self.sch._state.get_task(e1.task_id).failures.num_failures())

    def test_with_dynamic_dependencies_with_multiple_workers(self):
        """
            With this test, a case includes dependency tasks(TestErrorTask1,TestErrorTask2) which both are failed.

            Task TestErrorTask1 has default retry_count which is 1, but Task TestErrorTask2 has retry_count at task level as 2.

            This test is running on multiple worker
        """

        class TestErrorTask1(DummyErrorTask):
            pass

        e1 = TestErrorTask1()

        class TestErrorTask2(DummyErrorTask):
            retry_count = self.per_task_retry_count

        e2 = TestErrorTask2()

        class TestSuccessTask1(DummyTask):
            pass

        s1 = TestSuccessTask1()

        class TestWrapperTask(DummyTask):
            def requires(self):
                return [s1]

            def run(self):
                super(TestWrapperTask, self).run()
                yield e2, e1

        wt = TestWrapperTask()

        with Worker(scheduler=self.sch, worker_id='X', keep_alive=True, wait_interval=0.1) as w1:
            with Worker(scheduler=self.sch, worker_id='Y', keep_alive=True, wait_interval=0.1) as w2:
                self.assertTrue(w1.add(wt))
                self.assertTrue(w2.add(s1))

                self.assertTrue(w2.run())
                self.assertFalse(w1.run())

                self.assertEqual([wt.task_id], list(self.sch.task_list('PENDING', 'UPSTREAM_DISABLED').keys()))

                self.assertEqual(sorted([e1.task_id, e2.task_id]), sorted(self.sch.task_list('DISABLED', '').keys()))

                self.assertEqual(0, self.sch._state.get_task(wt.task_id).failures.num_failures())
                self.assertEqual(0, self.sch._state.get_task(s1.task_id).failures.num_failures())
                self.assertEqual(self.per_task_retry_count, self.sch._state.get_task(e2.task_id).failures.num_failures())
                self.assertEqual(self.default_retry_count, self.sch._state.get_task(e1.task_id).failures.num_failures())
Example #3
0
class WorkerTest(unittest.TestCase):

    def run(self, result=None):
        self.sch = Scheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10)
        self.time = time.time
        with Worker(scheduler=self.sch, worker_id='X') as w, Worker(scheduler=self.sch, worker_id='Y') as w2:
            self.w = w
            self.w2 = w2
            super(WorkerTest, self).run(result)

        if time.time != self.time:
            time.time = self.time

    def setTime(self, t):
        time.time = lambda: t

    def test_dep(self):
        class A(Task):

            def run(self):
                self.has_run = True

            def complete(self):
                return self.has_run
        a = A()

        class B(Task):

            def requires(self):
                return a

            def run(self):
                self.has_run = True

            def complete(self):
                return self.has_run

        b = B()
        a.has_run = False
        b.has_run = False

        self.assertTrue(self.w.add(b))
        self.assertTrue(self.w.run())
        self.assertTrue(a.has_run)
        self.assertTrue(b.has_run)

    def test_stop_getting_new_work(self):
        d = DummyTask()
        self.w.add(d)

        self.assertFalse(d.complete())
        try:
            self.w.handle_interrupt(signal.SIGUSR1, None)
        except AttributeError:
            raise unittest.SkipTest('signal.SIGUSR1 not found on this system')
        self.w.run()
        self.assertFalse(d.complete())

    def test_disabled_shutdown_hook(self):
        w = Worker(scheduler=self.sch, keep_alive=True, no_install_shutdown_handler=True)
        with w:
            try:
                # try to kill the worker!
                os.kill(os.getpid(), signal.SIGUSR1)
            except AttributeError:
                raise unittest.SkipTest('signal.SIGUSR1 not found on this system')
            # try to kill the worker... AGAIN!
            t = SuicidalWorker(signal.SIGUSR1)
            w.add(t)
            w.run()
            # task should have stepped away from the ledge, and completed successfully despite all the SIGUSR1 signals
            self.assertEqual(list(self.sch.task_list('DONE', '').keys()), [t.task_id])

    @with_config({"worker": {"no_install_shutdown_handler": "True"}})
    def test_can_run_luigi_in_thread(self):
        class A(DummyTask):
            pass
        task = A()
        # Note that ``signal.signal(signal.SIGUSR1, fn)`` can only be called in the main thread.
        # So if we do not disable the shutdown handler, this would fail.
        t = threading.Thread(target=lambda: luigi.build([task], local_scheduler=True))
        t.start()
        t.join()
        self.assertTrue(task.complete())

    def test_external_dep(self):
        class A(ExternalTask):

            def complete(self):
                return False
        a = A()

        class B(Task):

            def requires(self):
                return a

            def run(self):
                self.has_run = True

            def complete(self):
                return self.has_run

        b = B()

        a.has_run = False
        b.has_run = False

        self.assertTrue(self.w.add(b))
        self.assertTrue(self.w.run())

        self.assertFalse(a.has_run)
        self.assertFalse(b.has_run)

    def test_externalized_dep(self):
        class A(Task):
            has_run = False

            def run(self):
                self.has_run = True

            def complete(self):
                return self.has_run
        a = A()

        class B(A):
            def requires(self):
                return luigi.task.externalize(a)
        b = B()

        self.assertTrue(self.w.add(b))
        self.assertTrue(self.w.run())

        self.assertFalse(a.has_run)
        self.assertFalse(b.has_run)

    def test_legacy_externalized_dep(self):
        class A(Task):
            has_run = False

            def run(self):
                self.has_run = True

            def complete(self):
                return self.has_run
        a = A()
        a.run = NotImplemented

        class B(A):
            def requires(self):
                return a
        b = B()

        self.assertTrue(self.w.add(b))
        self.assertTrue(self.w.run())

        self.assertFalse(a.has_run)
        self.assertFalse(b.has_run)

    def test_type_error_in_tracking_run_deprecated(self):
        class A(Task):
            num_runs = 0

            def complete(self):
                return False

            def run(self, tracking_url_callback=None):
                self.num_runs += 1
                raise TypeError('bad type')

        a = A()
        self.assertTrue(self.w.add(a))
        self.assertFalse(self.w.run())

        # Should only run and fail once, not retry because of the type error
        self.assertEqual(1, a.num_runs)

    def test_tracking_url(self):
        tracking_url = 'http://test_url.com/'

        class A(Task):
            has_run = False

            def complete(self):
                return self.has_run

            def run(self):
                self.set_tracking_url(tracking_url)
                self.has_run = True

        a = A()
        self.assertTrue(self.w.add(a))
        self.assertTrue(self.w.run())
        tasks = self.sch.task_list('DONE', '')
        self.assertEqual(1, len(tasks))
        self.assertEqual(tracking_url, tasks[a.task_id]['tracking_url'])

    def test_fail(self):
        class CustomException(BaseException):
            def __init__(self, msg):
                self.msg = msg

        class A(Task):

            def run(self):
                self.has_run = True
                raise CustomException('bad things')

            def complete(self):
                return self.has_run

        a = A()

        class B(Task):

            def requires(self):
                return a

            def run(self):
                self.has_run = True

            def complete(self):
                return self.has_run

        b = B()

        a.has_run = False
        b.has_run = False

        self.assertTrue(self.w.add(b))
        self.assertFalse(self.w.run())

        self.assertTrue(a.has_run)
        self.assertFalse(b.has_run)

    def test_unknown_dep(self):
        # see related test_remove_dep test (grep for it)
        class A(ExternalTask):

            def complete(self):
                return False

        class C(Task):

            def complete(self):
                return True

        def get_b(dep):
            class B(Task):

                def requires(self):
                    return dep

                def run(self):
                    self.has_run = True

                def complete(self):
                    return False

            b = B()
            b.has_run = False
            return b

        b_a = get_b(A())
        b_c = get_b(C())

        self.assertTrue(self.w.add(b_a))
        # So now another worker goes in and schedules C -> B
        # This should remove the dep A -> B but will screw up the first worker
        self.assertTrue(self.w2.add(b_c))

        self.assertFalse(self.w.run())  # should not run anything - the worker should detect that A is broken
        self.assertFalse(b_a.has_run)
        # not sure what should happen??
        # self.w2.run() # should run B since C is fulfilled
        # self.assertTrue(b_c.has_run)

    def test_unfulfilled_dep(self):
        class A(Task):

            def complete(self):
                return self.done

            def run(self):
                self.done = True

        def get_b(a):
            class B(A):

                def requires(self):
                    return a
            b = B()
            b.done = False
            a.done = True
            return b

        a = A()
        b = get_b(a)

        self.assertTrue(self.w.add(b))
        a.done = False
        self.w.run()
        self.assertTrue(a.complete())
        self.assertTrue(b.complete())

    def test_gets_missed_work(self):
        class A(Task):
            done = False

            def complete(self):
                return self.done

            def run(self):
                self.done = True

        a = A()
        self.assertTrue(self.w.add(a))

        # simulate a missed get_work response
        self.assertEqual(a.task_id, self.sch.get_work(worker='X')['task_id'])

        self.assertTrue(self.w.run())
        self.assertTrue(a.complete())

    def test_avoid_infinite_reschedule(self):
        class A(Task):

            def complete(self):
                return False

        class B(Task):

            def complete(self):
                return False

            def requires(self):
                return A()

        self.assertTrue(self.w.add(B()))
        self.assertFalse(self.w.run())

    def test_fails_registering_signal(self):
        with mock.patch('luigi.worker.signal', spec=['signal']):
            # mock will raise an attribute error getting signal.SIGUSR1
            Worker()

    def test_allow_reschedule_with_many_missing_deps(self):
        class A(Task):

            """ Task that must run twice to succeed """
            i = luigi.IntParameter()

            runs = 0

            def complete(self):
                return self.runs >= 2

            def run(self):
                self.runs += 1

        class B(Task):
            done = False

            def requires(self):
                return map(A, range(20))

            def complete(self):
                return self.done

            def run(self):
                self.done = True

        b = B()
        w = Worker(scheduler=self.sch, worker_id='X', max_reschedules=1)
        self.assertTrue(w.add(b))
        self.assertFalse(w.run())

        # For b to be done, we must have rescheduled its dependencies to run them twice
        self.assertTrue(b.complete())
        self.assertTrue(all(a.complete() for a in b.deps()))

    def test_interleaved_workers(self):
        class A(DummyTask):
            pass

        a = A()

        class B(DummyTask):

            def requires(self):
                return a

        class ExternalB(ExternalTask):
            task_family = "B"

            def complete(self):
                return False

        b = B()
        eb = ExternalB()
        self.assertEqual(str(eb), "B()")

        sch = Scheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10)
        with Worker(scheduler=sch, worker_id='X') as w, Worker(scheduler=sch, worker_id='Y') as w2:
            self.assertTrue(w.add(b))
            self.assertTrue(w2.add(eb))
            logging.debug("RUNNING BROKEN WORKER")
            self.assertTrue(w2.run())
            self.assertFalse(a.complete())
            self.assertFalse(b.complete())
            logging.debug("RUNNING FUNCTIONAL WORKER")
            self.assertTrue(w.run())
            self.assertTrue(a.complete())
            self.assertTrue(b.complete())

    def test_interleaved_workers2(self):
        # two tasks without dependencies, one external, one not
        class B(DummyTask):
            pass

        class ExternalB(ExternalTask):
            task_family = "B"

            def complete(self):
                return False

        b = B()
        eb = ExternalB()

        self.assertEqual(str(eb), "B()")

        sch = Scheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10)
        with Worker(scheduler=sch, worker_id='X') as w, Worker(scheduler=sch, worker_id='Y') as w2:
            self.assertTrue(w2.add(eb))
            self.assertTrue(w.add(b))

            self.assertTrue(w2.run())
            self.assertFalse(b.complete())
            self.assertTrue(w.run())
            self.assertTrue(b.complete())

    def test_interleaved_workers3(self):
        class A(DummyTask):

            def run(self):
                logging.debug('running A')
                time.sleep(0.1)
                super(A, self).run()

        a = A()

        class B(DummyTask):

            def requires(self):
                return a

            def run(self):
                logging.debug('running B')
                super(B, self).run()

        b = B()

        sch = Scheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10)

        with Worker(scheduler=sch, worker_id='X', keep_alive=True, count_uniques=True) as w:
            with Worker(scheduler=sch, worker_id='Y', keep_alive=True, count_uniques=True, wait_interval=0.1) as w2:
                self.assertTrue(w.add(a))
                self.assertTrue(w2.add(b))

                threading.Thread(target=w.run).start()
                self.assertTrue(w2.run())

                self.assertTrue(a.complete())
                self.assertTrue(b.complete())

    def test_die_for_non_unique_pending(self):
        class A(DummyTask):

            def run(self):
                logging.debug('running A')
                time.sleep(0.1)
                super(A, self).run()

        a = A()

        class B(DummyTask):

            def requires(self):
                return a

            def run(self):
                logging.debug('running B')
                super(B, self).run()

        b = B()

        sch = Scheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10)

        with Worker(scheduler=sch, worker_id='X', keep_alive=True, count_uniques=True) as w:
            with Worker(scheduler=sch, worker_id='Y', keep_alive=True, count_uniques=True, wait_interval=0.1) as w2:
                self.assertTrue(w.add(b))
                self.assertTrue(w2.add(b))

                self.assertEqual(w._get_work()[0], a.task_id)
                self.assertTrue(w2.run())

                self.assertFalse(a.complete())
                self.assertFalse(b.complete())

    def test_complete_exception(self):
        "Tests that a task is still scheduled if its sister task crashes in the complete() method"
        class A(DummyTask):

            def complete(self):
                raise Exception("doh")

        a = A()

        class C(DummyTask):
            pass

        c = C()

        class B(DummyTask):

            def requires(self):
                return a, c

        b = B()
        sch = Scheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10)
        with Worker(scheduler=sch, worker_id="foo") as w:
            self.assertFalse(w.add(b))
            self.assertTrue(w.run())
            self.assertFalse(b.has_run)
            self.assertTrue(c.has_run)
            self.assertFalse(a.has_run)

    def test_requires_exception(self):
        class A(DummyTask):

            def requires(self):
                raise Exception("doh")

        a = A()

        class D(DummyTask):
            pass

        d = D()

        class C(DummyTask):
            def requires(self):
                return d

        c = C()

        class B(DummyTask):

            def requires(self):
                return c, a

        b = B()
        sch = Scheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10)
        with Worker(scheduler=sch, worker_id="foo") as w:
            self.assertFalse(w.add(b))
            self.assertTrue(w.run())
            self.assertFalse(b.has_run)
            self.assertTrue(c.has_run)
            self.assertTrue(d.has_run)
            self.assertFalse(a.has_run)
Example #4
0
class WorkerTest(unittest.TestCase):
    def run(self, result=None):
        self.sch = Scheduler(retry_delay=100,
                             remove_delay=1000,
                             worker_disconnect_delay=10)
        self.time = time.time
        with Worker(scheduler=self.sch,
                    worker_id='X') as w, Worker(scheduler=self.sch,
                                                worker_id='Y') as w2:
            self.w = w
            self.w2 = w2
            super(WorkerTest, self).run(result)

        if time.time != self.time:
            time.time = self.time

    def setTime(self, t):
        time.time = lambda: t

    def test_dep(self):
        class A(Task):
            def run(self):
                self.has_run = True

            def complete(self):
                return self.has_run

        a = A()

        class B(Task):
            def requires(self):
                return a

            def run(self):
                self.has_run = True

            def complete(self):
                return self.has_run

        b = B()
        a.has_run = False
        b.has_run = False

        self.assertTrue(self.w.add(b))
        self.assertTrue(self.w.run())
        self.assertTrue(a.has_run)
        self.assertTrue(b.has_run)

    def test_stop_getting_new_work(self):
        d = DummyTask()
        self.w.add(d)

        self.assertFalse(d.complete())
        try:
            self.w.handle_interrupt(signal.SIGUSR1, None)
        except AttributeError:
            raise unittest.SkipTest('signal.SIGUSR1 not found on this system')
        self.w.run()
        self.assertFalse(d.complete())

    def test_disabled_shutdown_hook(self):
        w = Worker(scheduler=self.sch,
                   keep_alive=True,
                   no_install_shutdown_handler=True)
        with w:
            try:
                # try to kill the worker!
                os.kill(os.getpid(), signal.SIGUSR1)
            except AttributeError:
                raise unittest.SkipTest(
                    'signal.SIGUSR1 not found on this system')
            # try to kill the worker... AGAIN!
            t = SuicidalWorker(signal.SIGUSR1)
            w.add(t)
            w.run()
            # task should have stepped away from the ledge, and completed successfully despite all the SIGUSR1 signals
            self.assertEqual(list(self.sch.task_list('DONE', '').keys()),
                             [t.task_id])

    @with_config({"worker": {"no_install_shutdown_handler": "True"}})
    def test_can_run_luigi_in_thread(self):
        class A(DummyTask):
            pass

        task = A()
        # Note that ``signal.signal(signal.SIGUSR1, fn)`` can only be called in the main thread.
        # So if we do not disable the shutdown handler, this would fail.
        t = threading.Thread(
            target=lambda: luigi.build([task], local_scheduler=True))
        t.start()
        t.join()
        self.assertTrue(task.complete())

    def test_external_dep(self):
        class A(ExternalTask):
            def complete(self):
                return False

        a = A()

        class B(Task):
            def requires(self):
                return a

            def run(self):
                self.has_run = True

            def complete(self):
                return self.has_run

        b = B()

        a.has_run = False
        b.has_run = False

        self.assertTrue(self.w.add(b))
        self.assertTrue(self.w.run())

        self.assertFalse(a.has_run)
        self.assertFalse(b.has_run)

    def test_externalized_dep(self):
        class A(Task):
            has_run = False

            def run(self):
                self.has_run = True

            def complete(self):
                return self.has_run

        a = A()

        class B(A):
            def requires(self):
                return luigi.task.externalize(a)

        b = B()

        self.assertTrue(self.w.add(b))
        self.assertTrue(self.w.run())

        self.assertFalse(a.has_run)
        self.assertFalse(b.has_run)

    def test_legacy_externalized_dep(self):
        class A(Task):
            has_run = False

            def run(self):
                self.has_run = True

            def complete(self):
                return self.has_run

        a = A()
        a.run = NotImplemented

        class B(A):
            def requires(self):
                return a

        b = B()

        self.assertTrue(self.w.add(b))
        self.assertTrue(self.w.run())

        self.assertFalse(a.has_run)
        self.assertFalse(b.has_run)

    def test_type_error_in_tracking_run_deprecated(self):
        class A(Task):
            num_runs = 0

            def complete(self):
                return False

            def run(self, tracking_url_callback=None):
                self.num_runs += 1
                raise TypeError('bad type')

        a = A()
        self.assertTrue(self.w.add(a))
        self.assertFalse(self.w.run())

        # Should only run and fail once, not retry because of the type error
        self.assertEqual(1, a.num_runs)

    def test_tracking_url(self):
        tracking_url = 'http://test_url.com/'

        class A(Task):
            has_run = False

            def complete(self):
                return self.has_run

            def run(self):
                self.set_tracking_url(tracking_url)
                self.has_run = True

        a = A()
        self.assertTrue(self.w.add(a))
        self.assertTrue(self.w.run())
        tasks = self.sch.task_list('DONE', '')
        self.assertEqual(1, len(tasks))
        self.assertEqual(tracking_url, tasks[a.task_id]['tracking_url'])

    def test_fail(self):
        class CustomException(BaseException):
            def __init__(self, msg):
                self.msg = msg

        class A(Task):
            def run(self):
                self.has_run = True
                raise CustomException('bad things')

            def complete(self):
                return self.has_run

        a = A()

        class B(Task):
            def requires(self):
                return a

            def run(self):
                self.has_run = True

            def complete(self):
                return self.has_run

        b = B()

        a.has_run = False
        b.has_run = False

        self.assertTrue(self.w.add(b))
        self.assertFalse(self.w.run())

        self.assertTrue(a.has_run)
        self.assertFalse(b.has_run)

    def test_unknown_dep(self):
        # see related test_remove_dep test (grep for it)
        class A(ExternalTask):
            def complete(self):
                return False

        class C(Task):
            def complete(self):
                return True

        def get_b(dep):
            class B(Task):
                def requires(self):
                    return dep

                def run(self):
                    self.has_run = True

                def complete(self):
                    return False

            b = B()
            b.has_run = False
            return b

        b_a = get_b(A())
        b_c = get_b(C())

        self.assertTrue(self.w.add(b_a))
        # So now another worker goes in and schedules C -> B
        # This should remove the dep A -> B but will screw up the first worker
        self.assertTrue(self.w2.add(b_c))

        self.assertFalse(
            self.w.run()
        )  # should not run anything - the worker should detect that A is broken
        self.assertFalse(b_a.has_run)
        # not sure what should happen??
        # self.w2.run() # should run B since C is fulfilled
        # self.assertTrue(b_c.has_run)

    def test_unfulfilled_dep(self):
        class A(Task):
            def complete(self):
                return self.done

            def run(self):
                self.done = True

        def get_b(a):
            class B(A):
                def requires(self):
                    return a

            b = B()
            b.done = False
            a.done = True
            return b

        a = A()
        b = get_b(a)

        self.assertTrue(self.w.add(b))
        a.done = False
        self.w.run()
        self.assertTrue(a.complete())
        self.assertTrue(b.complete())

    def test_gets_missed_work(self):
        class A(Task):
            done = False

            def complete(self):
                return self.done

            def run(self):
                self.done = True

        a = A()
        self.assertTrue(self.w.add(a))

        # simulate a missed get_work response
        self.assertEqual(a.task_id, self.sch.get_work(worker='X')['task_id'])

        self.assertTrue(self.w.run())
        self.assertTrue(a.complete())

    def test_avoid_infinite_reschedule(self):
        class A(Task):
            def complete(self):
                return False

        class B(Task):
            def complete(self):
                return False

            def requires(self):
                return A()

        self.assertTrue(self.w.add(B()))
        self.assertFalse(self.w.run())

    def test_fails_registering_signal(self):
        with mock.patch('luigi.worker.signal', spec=['signal']):
            # mock will raise an attribute error getting signal.SIGUSR1
            Worker()

    def test_allow_reschedule_with_many_missing_deps(self):
        class A(Task):
            """ Task that must run twice to succeed """
            i = luigi.IntParameter()

            runs = 0

            def complete(self):
                return self.runs >= 2

            def run(self):
                self.runs += 1

        class B(Task):
            done = False

            def requires(self):
                return map(A, range(20))

            def complete(self):
                return self.done

            def run(self):
                self.done = True

        b = B()
        w = Worker(scheduler=self.sch, worker_id='X', max_reschedules=1)
        self.assertTrue(w.add(b))
        self.assertFalse(w.run())

        # For b to be done, we must have rescheduled its dependencies to run them twice
        self.assertTrue(b.complete())
        self.assertTrue(all(a.complete() for a in b.deps()))

    def test_interleaved_workers(self):
        class A(DummyTask):
            pass

        a = A()

        class B(DummyTask):
            def requires(self):
                return a

        class ExternalB(ExternalTask):
            task_family = "B"

            def complete(self):
                return False

        b = B()
        eb = ExternalB()
        self.assertEqual(str(eb), "B()")

        sch = Scheduler(retry_delay=100,
                        remove_delay=1000,
                        worker_disconnect_delay=10)
        with Worker(scheduler=sch,
                    worker_id='X') as w, Worker(scheduler=sch,
                                                worker_id='Y') as w2:
            self.assertTrue(w.add(b))
            self.assertTrue(w2.add(eb))
            logging.debug("RUNNING BROKEN WORKER")
            self.assertTrue(w2.run())
            self.assertFalse(a.complete())
            self.assertFalse(b.complete())
            logging.debug("RUNNING FUNCTIONAL WORKER")
            self.assertTrue(w.run())
            self.assertTrue(a.complete())
            self.assertTrue(b.complete())

    def test_interleaved_workers2(self):
        # two tasks without dependencies, one external, one not
        class B(DummyTask):
            pass

        class ExternalB(ExternalTask):
            task_family = "B"

            def complete(self):
                return False

        b = B()
        eb = ExternalB()

        self.assertEqual(str(eb), "B()")

        sch = Scheduler(retry_delay=100,
                        remove_delay=1000,
                        worker_disconnect_delay=10)
        with Worker(scheduler=sch,
                    worker_id='X') as w, Worker(scheduler=sch,
                                                worker_id='Y') as w2:
            self.assertTrue(w2.add(eb))
            self.assertTrue(w.add(b))

            self.assertTrue(w2.run())
            self.assertFalse(b.complete())
            self.assertTrue(w.run())
            self.assertTrue(b.complete())

    def test_interleaved_workers3(self):
        class A(DummyTask):
            def run(self):
                logging.debug('running A')
                time.sleep(0.1)
                super(A, self).run()

        a = A()

        class B(DummyTask):
            def requires(self):
                return a

            def run(self):
                logging.debug('running B')
                super(B, self).run()

        b = B()

        sch = Scheduler(retry_delay=100,
                        remove_delay=1000,
                        worker_disconnect_delay=10)

        with Worker(scheduler=sch,
                    worker_id='X',
                    keep_alive=True,
                    count_uniques=True) as w:
            with Worker(scheduler=sch,
                        worker_id='Y',
                        keep_alive=True,
                        count_uniques=True,
                        wait_interval=0.1) as w2:
                self.assertTrue(w.add(a))
                self.assertTrue(w2.add(b))

                threading.Thread(target=w.run).start()
                self.assertTrue(w2.run())

                self.assertTrue(a.complete())
                self.assertTrue(b.complete())

    def test_die_for_non_unique_pending(self):
        class A(DummyTask):
            def run(self):
                logging.debug('running A')
                time.sleep(0.1)
                super(A, self).run()

        a = A()

        class B(DummyTask):
            def requires(self):
                return a

            def run(self):
                logging.debug('running B')
                super(B, self).run()

        b = B()

        sch = Scheduler(retry_delay=100,
                        remove_delay=1000,
                        worker_disconnect_delay=10)

        with Worker(scheduler=sch,
                    worker_id='X',
                    keep_alive=True,
                    count_uniques=True) as w:
            with Worker(scheduler=sch,
                        worker_id='Y',
                        keep_alive=True,
                        count_uniques=True,
                        wait_interval=0.1) as w2:
                self.assertTrue(w.add(b))
                self.assertTrue(w2.add(b))

                self.assertEqual(w._get_work()[0], a.task_id)
                self.assertTrue(w2.run())

                self.assertFalse(a.complete())
                self.assertFalse(b.complete())

    def test_complete_exception(self):
        "Tests that a task is still scheduled if its sister task crashes in the complete() method"

        class A(DummyTask):
            def complete(self):
                raise Exception("doh")

        a = A()

        class C(DummyTask):
            pass

        c = C()

        class B(DummyTask):
            def requires(self):
                return a, c

        b = B()
        sch = Scheduler(retry_delay=100,
                        remove_delay=1000,
                        worker_disconnect_delay=10)
        with Worker(scheduler=sch, worker_id="foo") as w:
            self.assertFalse(w.add(b))
            self.assertTrue(w.run())
            self.assertFalse(b.has_run)
            self.assertTrue(c.has_run)
            self.assertFalse(a.has_run)

    def test_requires_exception(self):
        class A(DummyTask):
            def requires(self):
                raise Exception("doh")

        a = A()

        class D(DummyTask):
            pass

        d = D()

        class C(DummyTask):
            def requires(self):
                return d

        c = C()

        class B(DummyTask):
            def requires(self):
                return c, a

        b = B()
        sch = Scheduler(retry_delay=100,
                        remove_delay=1000,
                        worker_disconnect_delay=10)
        with Worker(scheduler=sch, worker_id="foo") as w:
            self.assertFalse(w.add(b))
            self.assertTrue(w.run())
            self.assertFalse(b.has_run)
            self.assertTrue(c.has_run)
            self.assertTrue(d.has_run)
            self.assertFalse(a.has_run)
Example #5
0
class AssistantTest(unittest.TestCase):
    def run(self, result=None):
        self.sch = Scheduler(retry_delay=100,
                             remove_delay=1000,
                             worker_disconnect_delay=10)
        self.assistant = Worker(scheduler=self.sch,
                                worker_id='Y',
                                assistant=True)
        with Worker(scheduler=self.sch, worker_id='X') as w:
            self.w = w
            super(AssistantTest, self).run(result)

    def test_get_work(self):
        d = Dummy2Task('123')
        self.w.add(d)

        self.assertFalse(d.complete())
        self.assistant.run()
        self.assertTrue(d.complete())

    def test_bad_job_type(self):
        class Dummy3Task(Dummy2Task):
            task_family = 'UnknownTaskFamily'

        d = Dummy3Task('123')
        self.w.add(d)

        self.assertFalse(d.complete())
        self.assertFalse(self.assistant.run())
        self.assertFalse(d.complete())
        self.assertEqual(list(self.sch.task_list('FAILED', '').keys()),
                         [d.task_id])

    def test_unimported_job_type(self):
        MODULE_CONTENTS = b'''
import luigi


class UnimportedTask(luigi.Task):
    def complete(self):
        return False
'''

        class NotImportedTask(luigi.Task):
            task_family = 'UnimportedTask'
            task_module = None

        task = NotImportedTask()

        # verify that it can't run the task without the module info necessary to import it
        self.w.add(task)
        self.assertFalse(self.assistant.run())
        self.assertEqual(list(self.sch.task_list('FAILED', '').keys()),
                         [task.task_id])

        # check that it can import with the right module
        with temporary_unloaded_module(MODULE_CONTENTS) as task.task_module:
            self.w.add(task)
            self.assertTrue(self.assistant.run())
            self.assertEqual(list(self.sch.task_list('DONE', '').keys()),
                             [task.task_id])