Example #1
0
    def test_ping_retry(self):
        """ Worker ping fails once. Ping continues to try to connect to scheduler

        Kind of ugly since it uses actual timing with sleep to test the thread
        """
        sch = CentralPlannerScheduler(
            retry_delay=100,
            remove_delay=1000,
            worker_disconnect_delay=10,
        )

        self._total_pings = 0  # class var so it can be accessed from fail_ping

        def fail_ping(worker):
            # this will be called from within keep-alive thread...
            self._total_pings += 1
            raise Exception("Some random exception")

        sch.ping = fail_ping

        w = Worker(
            scheduler=sch,
            worker_id="foo",
            ping_interval=0.01  # very short between pings to make test fast
        )

        # let the keep-alive thread run for a bit...
        time.sleep(0.1)  # yes, this is ugly but it's exactly what we need to test
        w.stop()
        self.assertTrue(
            self._total_pings > 1,
            msg="Didn't retry pings (%d pings performed)" % (self._total_pings,)
        )
Example #2
0
class AssistantTest(unittest.TestCase):
    def run(self, result=None):
        self.sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10)
        self.assistant = Worker(scheduler=self.sch, worker_id="Y", assistant=True)
        with Worker(scheduler=self.sch, worker_id="X") as w:
            self.w = w
            super(AssistantTest, self).run(result)

    def test_get_work(self):
        d = Dummy2Task("123")
        self.w.add(d)

        self.assertFalse(d.complete())
        self.assistant.run()
        self.assertTrue(d.complete())

    def test_bad_job_type(self):
        class Dummy3Task(Dummy2Task):
            task_family = "UnknownTaskFamily"

        d = Dummy3Task("123")
        self.w.add(d)

        self.assertFalse(d.complete())
        self.assertFalse(self.assistant.run())
        self.assertFalse(d.complete())
        self.assertEqual(list(self.sch.task_list("FAILED", "").keys()), [d.task_id])

    def test_unimported_job_type(self):
        MODULE_CONTENTS = b"""
import luigi


class UnimportedTask(luigi.Task):
    def complete(self):
        return False
"""

        class NotImportedTask(luigi.Task):
            task_family = "UnimportedTask"
            task_module = None

        task = NotImportedTask()

        # verify that it can't run the task without the module info necessary to import it
        self.w.add(task)
        self.assertFalse(self.assistant.run())
        self.assertEqual(list(self.sch.task_list("FAILED", "").keys()), [task.task_id])

        # check that it can import with the right module
        with temporary_unloaded_module(MODULE_CONTENTS) as task.task_module:
            self.w.add(task)
            self.assertTrue(self.assistant.run())
            self.assertEqual(list(self.sch.task_list("DONE", "").keys()), [task.task_id])
Example #3
0
class TestParameterSplit(unittest.TestCase):
    task_id_examples = [
        "TrackIsrcs()",
        "CrazyTask(foo=foo_table_id, bar={'keyName': 'com.my.org', 'parameters': {'this.is.tricky': '1'}}, what_is_dis=foo bar, oh hippo)",
        "MyOldDateHourTask(datehour=2013-07-21 11:00:00)",
        "MyOldDateHourTask(datehour=2013-07-21T11:00:00)"
    ]

    def setUp(self):
        self.sch = CentralPlannerScheduler()

    def test_parameter_split(self):
        for task_id in self.task_id_examples:
            self.sch._get_task_params(task_id)
Example #4
0
class AssistantTest(unittest.TestCase):
    def setUp(self):
        self.sch = CentralPlannerScheduler(retry_delay=100,
                                           remove_delay=1000,
                                           worker_disconnect_delay=10)
        self.w = Worker(scheduler=self.sch, worker_id='X')
        self.assistant = Worker(scheduler=self.sch,
                                worker_id='Y',
                                assistant=True)

    def test_get_work(self):
        d = Dummy2Task('123')
        self.w.add(d)

        self.assertFalse(d.complete())
        self.assistant.run()
        self.assertTrue(d.complete())

    def test_bad_job_type(self):
        class Dummy3Task(Dummy2Task):
            task_family = 'UnknownTaskFamily'

        d = Dummy3Task('123')
        self.w.add(d)

        self.assertFalse(d.complete())
        self.assertFalse(self.assistant.run())
        self.assertFalse(d.complete())
        self.assertEqual(list(self.sch.task_list('FAILED', '').keys()),
                         [str(d)])

    def test_unimported_job_type(self):
        class NotImportedTask(luigi.Task):
            task_family = 'UnimportedTask'
            task_module = None

        task = NotImportedTask()

        # verify that it can't run the task without the module info necessary to import it
        self.w.add(task)
        self.assertFalse(self.assistant.run())
        self.assertEqual(list(self.sch.task_list('FAILED', '').keys()),
                         ['UnimportedTask()'])

        # check that it can import with the right module
        task.task_module = 'dummy_test_module.not_imported'
        self.w.add(task)
        self.assertTrue(self.assistant.run())
        self.assertEqual(list(self.sch.task_list('DONE', '').keys()),
                         ['UnimportedTask()'])
Example #5
0
 def test_search_results_beyond_limit(self):
     sch = CentralPlannerScheduler(max_shown_tasks=3)
     sch.add_task(worker=WORKER, task_id='task_a')
     sch.add_task(worker=WORKER, task_id='task_b')
     sch.add_task(worker=WORKER, task_id='task_c')
     sch.add_task(worker=WORKER, task_id='task_d')
     self.assertEqual({'num_tasks': 4}, sch.task_list('PENDING', '', search='a'))
     self.assertEqual(['task_a'], list(sch.task_list('PENDING', '', search='_a').keys()))
Example #6
0
 def test_task_lists_some_beyond_limit(self):
     sch = CentralPlannerScheduler(max_shown_tasks=3)
     for c in 'ABCD':
         sch.add_task(worker=WORKER, task_id=c, status=DONE)
     for c in 'EFG':
         sch.add_task(worker=WORKER, task_id=c)
     self.assertEqual(set('EFG'), set(sch.task_list('PENDING', '').keys()))
     self.assertEqual({'num_tasks': 4}, sch.task_list('DONE', ''))
 def setUp(self):
     self.scheduler = CentralPlannerScheduler(retry_delay=0.01,
                                              remove_delay=3,
                                              worker_disconnect_delay=3,
                                              disable_persist=3,
                                              disable_window=5,
                                              disable_failures=2)
Example #8
0
    def test_interleaved_workers3(self):
        class A(DummyTask):

            def run(self):
                logging.debug('running A')
                time.sleep(0.1)
                super(A, self).run()

        a = A()

        class B(DummyTask):

            def requires(self):
                return a

            def run(self):
                logging.debug('running B')
                super(B, self).run()

        b = B()

        sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10)

        with Worker(scheduler=sch, worker_id='X', keep_alive=True, count_uniques=True) as w:
            with Worker(scheduler=sch, worker_id='Y', keep_alive=True, count_uniques=True, wait_interval=0.1) as w2:
                self.assertTrue(w.add(a))
                self.assertTrue(w2.add(b))

                threading.Thread(target=w.run).start()
                self.assertTrue(w2.run())

                self.assertTrue(a.complete())
                self.assertTrue(b.complete())
Example #9
0
    def test_interleaved_workers(self):
        class A(DummyTask):
            pass

        a = A()

        class B(DummyTask):

            def requires(self):
                return a

        class ExternalB(ExternalTask):
            task_family = "B"

            def complete(self):
                return False

        b = B()
        eb = ExternalB()
        self.assertEqual(str(eb), "B()")

        sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10)
        with Worker(scheduler=sch, worker_id='X') as w, Worker(scheduler=sch, worker_id='Y') as w2:
            self.assertTrue(w.add(b))
            self.assertTrue(w2.add(eb))
            logging.debug("RUNNING BROKEN WORKER")
            self.assertTrue(w2.run())
            self.assertFalse(a.complete())
            self.assertFalse(b.complete())
            logging.debug("RUNNING FUNCTIONAL WORKER")
            self.assertTrue(w.run())
            self.assertTrue(a.complete())
            self.assertTrue(b.complete())
Example #10
0
    def __init__(self, scheduler=None, worker_id=None, worker_processes=1, assistant=False, **kwargs):
        if scheduler is None:
            scheduler = CentralPlannerScheduler()

        self.worker_processes = int(worker_processes)
        self._worker_info = self._generate_worker_info()

        if not worker_id:
            worker_id = 'Worker(%s)' % ', '.join(['%s=%s' % (k, v) for k, v in self._worker_info])

        self._config = worker(**kwargs)

        self._id = worker_id
        self._scheduler = scheduler
        self._assistant = assistant

        self.host = socket.gethostname()
        self._scheduled_tasks = {}
        self._suspended_tasks = {}

        self._first_task = None

        self.add_succeeded = True
        self.run_succeeded = True
        self.unfulfilled_counts = collections.defaultdict(int)

        self._keep_alive_thread = KeepAliveThread(self._scheduler, self._id, self._config.ping_interval)
        self._keep_alive_thread.daemon = True
        self._keep_alive_thread.start()

        # Keep info about what tasks are running (could be in other processes)
        self._task_result_queue = multiprocessing.Queue()
        self._running_tasks = {}
Example #11
0
 def run(self, result=None):
     super(WorkerEmailTest, self).setUp()
     sch = CentralPlannerScheduler(retry_delay=100,
                                   remove_delay=1000,
                                   worker_disconnect_delay=10)
     with Worker(scheduler=sch, worker_id="foo") as self.worker:
         super(WorkerEmailTest, self).run(result)
 def _build(self, tasks):
     self.scheduler = CentralPlannerScheduler(prune_on_get_work=True)
     w = luigi.worker.Worker(scheduler=self.scheduler, worker_processes=1)
     for t in tasks:
         w.add(t)
     w.run()
     w.stop()
Example #13
0
    def test_requires_exception(self):
        class A(DummyTask):
            def requires(self):
                raise Exception("doh")

        a = A()

        class D(DummyTask):
            pass

        d = D()

        class C(DummyTask):
            def requires(self):
                return d

        c = C()

        class B(DummyTask):
            def requires(self):
                return c, a

        b = B()
        sch = CentralPlannerScheduler(retry_delay=100,
                                      remove_delay=1000,
                                      worker_disconnect_delay=10)
        with Worker(scheduler=sch, worker_id="foo") as w:
            self.assertFalse(w.add(b))
            self.assertTrue(w.run())
            self.assertFalse(b.has_run)
            self.assertTrue(c.has_run)
            self.assertTrue(d.has_run)
            self.assertFalse(a.has_run)
Example #14
0
 def setUp(self):
     # InstanceCache.disable()
     self.sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10)
     self.w = Worker(scheduler=self.sch, worker_id='X')
     self.w_raise = Worker(scheduler=self.sch, worker_id='X_raise', raise_on_error=True)
     self.w2 = Worker(scheduler=self.sch, worker_id='Y')
     self.time = time.time
Example #15
0
    def test_die_for_non_unique_pending(self):
        class A(DummyTask):

            def run(self):
                logging.debug('running A')
                time.sleep(0.1)
                super(A, self).run()

        a = A()

        class B(DummyTask):

            def requires(self):
                return a

            def run(self):
                logging.debug('running B')
                super(B, self).run()

        b = B()

        sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10)

        with Worker(scheduler=sch, worker_id='X', keep_alive=True, count_uniques=True) as w:
            with Worker(scheduler=sch, worker_id='Y', keep_alive=True, count_uniques=True, wait_interval=0.1) as w2:
                self.assertTrue(w.add(b))
                self.assertTrue(w2.add(b))

                self.assertEqual(w._get_work()[0], a.task_id)
                self.assertTrue(w2.run())

                self.assertFalse(a.complete())
                self.assertFalse(b.complete())
Example #16
0
    def test_no_automatic_re_enable_after_auto_then_manual_disable(self):
        self.sch = CentralPlannerScheduler(disable_failures=2, disable_persist=100)
        self.setTime(0)
        self.sch.add_task(worker=WORKER, task_id='A', status=FAILED)
        self.sch.add_task(worker=WORKER, task_id='A', status=FAILED)

        # should be disabled now
        self.assertEqual(DISABLED, self.sch.task_list('', '')['A']['status'])

        # should remain disabled once set
        self.sch.add_task(worker=WORKER, task_id='A', status=DISABLED)
        self.assertEqual(DISABLED, self.sch.task_list('', '')['A']['status'])

        # should not re-enable after 100 seconds
        self.setTime(101)
        self.assertEqual(DISABLED, self.sch.task_list('', '')['A']['status'])
Example #17
0
    def test_interleaved_workers2(self):
        # two tasks without dependencies, one external, one not
        class B(DummyTask):
            pass

        class ExternalB(ExternalTask):
            task_family = "B"

            def complete(self):
                return False

        b = B()
        eb = ExternalB()

        self.assertEqual(str(eb), "B()")

        sch = CentralPlannerScheduler(retry_delay=100,
                                      remove_delay=1000,
                                      worker_disconnect_delay=10)
        with Worker(scheduler=sch,
                    worker_id='X') as w, Worker(scheduler=sch,
                                                worker_id='Y') as w2:
            self.assertTrue(w2.add(eb))
            self.assertTrue(w.add(b))

            self.assertTrue(w2.run())
            self.assertFalse(b.complete())
            self.assertTrue(w.run())
            self.assertTrue(b.complete())
Example #18
0
    def test_complete_exception(self):
        "Tests that a task is still scheduled if its sister task crashes in the complete() method"

        class A(DummyTask):
            def complete(self):
                raise Exception("doh")

        a = A()

        class C(DummyTask):
            pass

        c = C()

        class B(DummyTask):
            def requires(self):
                return a, c

        b = B()
        sch = CentralPlannerScheduler(retry_delay=100,
                                      remove_delay=1000,
                                      worker_disconnect_delay=10)
        with Worker(scheduler=sch, worker_id="foo") as w:
            self.assertFalse(w.add(b))
            self.assertTrue(w.run())
            self.assertFalse(b.has_run)
            self.assertTrue(c.has_run)
            self.assertFalse(a.has_run)
 def setUp(self):
     self.scheduler = CentralPlannerScheduler(retry_delay=0.01,
                                              remove_delay=3,
                                              worker_disconnect_delay=3,
                                              disable_persist=3,
                                              disable_window=5,
                                              disable_failures=2,
                                              prune_on_get_work=True)
Example #20
0
def run(api_port=8082,
        address=None,
        unix_socket=None,
        scheduler=None,
        responder=None):
    """
    Runs one instance of the API server.
    """
    if scheduler is None:
        scheduler = CentralPlannerScheduler()

    # load scheduler state
    scheduler.load()

    _init_api(
        scheduler=scheduler,
        responder=responder,
        api_port=api_port,
        address=address,
        unix_socket=unix_socket,
    )

    # prune work DAG every 60 seconds
    pruner = tornado.ioloop.PeriodicCallback(scheduler.prune, 60000)
    pruner.start()

    def shutdown_handler(signum, frame):
        exit_handler()
        sys.exit(0)

    @atexit.register
    def exit_handler():
        logger.info("Scheduler instance shutting down")
        scheduler.dump()
        stop()

    signal.signal(signal.SIGINT, shutdown_handler)
    signal.signal(signal.SIGTERM, shutdown_handler)
    if os.name == 'nt':
        signal.signal(signal.SIGBREAK, shutdown_handler)
    else:
        signal.signal(signal.SIGQUIT, shutdown_handler)

    logger.info("Scheduler starting up")

    tornado.ioloop.IOLoop.instance().start()
Example #21
0
class AssistantTest(unittest.TestCase):
    def setUp(self):
        self.sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10)
        self.w = Worker(scheduler=self.sch, worker_id='X').__enter__()
        self.assistant = Worker(scheduler=self.sch, worker_id='Y', assistant=True)

    def tearDown(self):
        self.w.__exit__(None, None, None)

    def test_get_work(self):
        d = Dummy2Task('123')
        self.w.add(d)

        self.assertFalse(d.complete())
        self.assistant.run()
        self.assertTrue(d.complete())

    def test_bad_job_type(self):
        class Dummy3Task(Dummy2Task):
            task_family = 'UnknownTaskFamily'

        d = Dummy3Task('123')
        self.w.add(d)

        self.assertFalse(d.complete())
        self.assertFalse(self.assistant.run())
        self.assertFalse(d.complete())
        self.assertEqual(list(self.sch.task_list('FAILED', '').keys()), [str(d)])

    def test_unimported_job_type(self):
        class NotImportedTask(luigi.Task):
            task_family = 'UnimportedTask'
            task_module = None

        task = NotImportedTask()

        # verify that it can't run the task without the module info necessary to import it
        self.w.add(task)
        self.assertFalse(self.assistant.run())
        self.assertEqual(list(self.sch.task_list('FAILED', '').keys()), ['UnimportedTask()'])

        # check that it can import with the right module
        task.task_module = 'dummy_test_module.not_imported'
        self.w.add(task)
        self.assertTrue(self.assistant.run())
        self.assertEqual(list(self.sch.task_list('DONE', '').keys()), ['UnimportedTask()'])
Example #22
0
 def setUp(self):
     self.sch = CentralPlannerScheduler(retry_delay=100,
                                        remove_delay=1000,
                                        worker_disconnect_delay=10,
                                        disable_persist=10,
                                        disable_window=10,
                                        disable_failures=3)
     self.time = time.time
Example #23
0
    def __init__(self,
                 scheduler=None,
                 worker_id=None,
                 worker_processes=1,
                 assistant=False,
                 **kwargs):
        if scheduler is None:
            scheduler = CentralPlannerScheduler()

        self.worker_processes = int(worker_processes)
        self._worker_info = self._generate_worker_info()

        if not worker_id:
            worker_id = 'Worker(%s)' % ', '.join(
                ['%s=%s' % (k, v) for k, v in self._worker_info])

        self._config = worker(**kwargs)

        assert self._config.wait_interval >= _WAIT_INTERVAL_EPS, "[worker] wait_interval must be positive"
        assert self._config.wait_jitter >= 0.0, "[worker] wait_jitter must be equal or greater than zero"

        self._id = worker_id
        self._scheduler = scheduler
        self._assistant = assistant
        self._stop_requesting_work = False

        self.host = socket.gethostname()
        self._scheduled_tasks = {}
        self._suspended_tasks = {}

        self._first_task = None

        self.add_succeeded = True
        self.run_succeeded = True
        self.unfulfilled_counts = collections.defaultdict(int)

        try:
            signal.signal(signal.SIGUSR1, self.handle_interrupt)
        except AttributeError:
            pass

        self._keep_alive_thread = KeepAliveThread(self._scheduler, self._id,
                                                  self._config.ping_interval)
        self._keep_alive_thread.daemon = True
        self._keep_alive_thread.start()

        # Keep info about what tasks are running (could be in other processes)
        if worker_processes == 1:
            self._task_result_queue = DequeQueue()
        else:
            self._task_result_queue = multiprocessing.Queue()

        self._running_tasks = {}

        # Stuff for execution_summary
        self._add_task_history = []
        self._get_work_response_history = []
Example #24
0
 def test_search_results_beyond_limit(self):
     sch = CentralPlannerScheduler(max_shown_tasks=3)
     sch.add_task(worker=WORKER, task_id="task_a")
     sch.add_task(worker=WORKER, task_id="task_b")
     sch.add_task(worker=WORKER, task_id="task_c")
     sch.add_task(worker=WORKER, task_id="task_d")
     self.assertEqual({"num_tasks": 4}, sch.task_list("PENDING", "", search="a"))
     self.assertEqual(["task_a"], list(sch.task_list("PENDING", "", search="_a").keys()))
Example #25
0
 def test_task_lists_some_beyond_limit(self):
     sch = CentralPlannerScheduler(max_shown_tasks=3)
     for c in "ABCD":
         sch.add_task(worker=WORKER, task_id=c, status=DONE)
     for c in "EFG":
         sch.add_task(worker=WORKER, task_id=c)
     self.assertEqual(set("EFG"), set(sch.task_list("PENDING", "").keys()))
     self.assertEqual({"num_tasks": 4}, sch.task_list("DONE", ""))
 def test_search_results_beyond_limit(self):
     sch = CentralPlannerScheduler(max_shown_tasks=3)
     sch.add_task(worker=WORKER, task_id='task_a')
     sch.add_task(worker=WORKER, task_id='task_b')
     sch.add_task(worker=WORKER, task_id='task_c')
     sch.add_task(worker=WORKER, task_id='task_d')
     self.assertEqual({'num_tasks': 4}, sch.task_list('PENDING', '', search='a'))
     self.assertEqual(['task_a'], list(sch.task_list('PENDING', '', search='_a').keys()))
 def test_task_lists_some_beyond_limit(self):
     sch = CentralPlannerScheduler(max_shown_tasks=3)
     for c in 'ABCD':
         sch.add_task(worker=WORKER, task_id=c, status=DONE)
     for c in 'EFG':
         sch.add_task(worker=WORKER, task_id=c)
     self.assertEqual(set('EFG'), set(sch.task_list('PENDING', '').keys()))
     self.assertEqual({'num_tasks': 4}, sch.task_list('DONE', ''))
Example #28
0
    def run(self, result=None):
        self.sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10)
        self.time = time.time
        with Worker(scheduler=self.sch, worker_id='X') as w, Worker(scheduler=self.sch, worker_id='Y') as w2:
            self.w = w
            self.w2 = w2
            super(WorkerTest, self).run(result)

        if time.time != self.time:
            time.time = self.time
Example #29
0
File: server.py Project: 01-/luigi
def run(api_port=8082, address=None, unix_socket=None, scheduler=None, responder=None):
    """
    Runs one instance of the API server.
    """
    if scheduler is None:
        scheduler = CentralPlannerScheduler()

    # load scheduler state
    scheduler.load()

    _init_api(
        scheduler=scheduler,
        responder=responder,
        api_port=api_port,
        address=address,
        unix_socket=unix_socket,
    )

    # prune work DAG every 60 seconds
    pruner = tornado.ioloop.PeriodicCallback(scheduler.prune, 60000)
    pruner.start()

    def shutdown_handler(signum, frame):
        exit_handler()
        sys.exit(0)

    @atexit.register
    def exit_handler():
        logger.info("Scheduler instance shutting down")
        scheduler.dump()
        stop()

    signal.signal(signal.SIGINT, shutdown_handler)
    signal.signal(signal.SIGTERM, shutdown_handler)
    if os.name == 'nt':
        signal.signal(signal.SIGBREAK, shutdown_handler)
    else:
        signal.signal(signal.SIGQUIT, shutdown_handler)

    logger.info("Scheduler starting up")

    tornado.ioloop.IOLoop.instance().start()
Example #30
0
    def test_automatic_re_enable_with_one_failure_allowed(self):
        self.sch = CentralPlannerScheduler(disable_failures=1, disable_persist=100)
        self.setTime(0)
        self.sch.add_task(worker=WORKER, task_id='A', status=FAILED)

        # should be disabled now
        self.assertEqual(DISABLED, self.sch.task_list('', '')['A']['status'])

        # re-enables after 100 seconds
        self.setTime(101)
        self.assertEqual(FAILED, self.sch.task_list('', '')['A']['status'])
Example #31
0
    def __init__(self,
                 scheduler=None,
                 worker_id=None,
                 worker_processes=1,
                 assistant=False,
                 raise_on_error=False,
                 **kwargs):
        if scheduler is None:
            scheduler = CentralPlannerScheduler()

        self.worker_processes = int(worker_processes)
        self._worker_info = self._generate_worker_info()

        if not worker_id:
            worker_id = 'Worker(%s)' % ', '.join(
                ['%s=%s' % (k, v) for k, v in self._worker_info])

        self._config = worker(**kwargs)

        # multiprocessing.Queue.get() is undefined for timeout=0
        assert self._config.wait_interval >= 0.00001, "[worker] wait_interval must be positive"

        self._id = worker_id
        self._scheduler = scheduler
        self._assistant = assistant
        self._stop_requesting_work = False

        self.host = socket.gethostname()
        self._scheduled_tasks = {}
        self._suspended_tasks = {}

        self._first_task = None

        self.add_succeeded = True
        self.run_succeeded = True
        self.unfulfilled_counts = collections.defaultdict(int)

        signal.signal(signal.SIGUSR1, self.handle_interrupt)

        self._keep_alive_thread = KeepAliveThread(self._scheduler, self._id,
                                                  self._config.ping_interval)
        self._keep_alive_thread.daemon = True
        self._keep_alive_thread.start()

        # Keep info about what tasks are running (could be in other processes)
        self._task_result_queue = multiprocessing.Queue()
        self._running_tasks = {}

        # Stuff for execution_summary
        self._add_task_history = []
        self._get_work_response_history = []

        # whether to raise when a task throws an exception
        self._raise_on_error = raise_on_error
Example #32
0
    def test_no_automatic_re_enable_after_manual_disable(self):
        self.sch = CentralPlannerScheduler(disable_persist=100)
        self.setTime(0)
        self.sch.add_task(worker=WORKER, task_id='A', status=DISABLED)

        # should be disabled now
        self.assertEqual(DISABLED, self.sch.task_list('', '')['A']['status'])

        # should not re-enable after 100 seconds
        self.setTime(101)
        self.assertEqual(DISABLED, self.sch.task_list('', '')['A']['status'])
Example #33
0
    def test_automatic_re_enable(self):
        self.sch = CentralPlannerScheduler(disable_failures=2, disable_persist=100)
        self.setTime(0)
        self.sch.add_task(worker=WORKER, task_id="A", status=FAILED)
        self.sch.add_task(worker=WORKER, task_id="A", status=FAILED)

        # should be disabled now
        self.assertEqual(DISABLED, self.sch.task_list("", "")["A"]["status"])

        # re-enables after 100 seconds
        self.setTime(101)
        self.assertEqual(FAILED, self.sch.task_list("", "")["A"]["status"])
Example #34
0
 def test_task_list_beyond_limit(self):
     sch = CentralPlannerScheduler(max_shown_tasks=3)
     for c in 'ABCD':
         sch.add_task(worker=WORKER, task_id=c)
     self.assertEqual(set('ABCD'),
                      set(sch.task_list('PENDING', '', False).keys()))
     self.assertEqual({'num_tasks': 4}, sch.task_list('PENDING', ''))
 def test_search_results_beyond_limit(self):
     sch = CentralPlannerScheduler(max_shown_tasks=3)
     for i in range(4):
         sch.add_task(worker=WORKER,
                      family='Test',
                      params={'p': str(i)},
                      task_id='Test_%i' % i)
     self.assertEqual({'num_tasks': 4},
                      sch.task_list('PENDING', '', search='Test'))
     self.assertEqual(['Test_0'],
                      list(sch.task_list('PENDING', '', search='0').keys()))
Example #36
0
    def __init__(self, scheduler=None, worker_id=None, worker_processes=1, assistant=False, **kwargs):
        if scheduler is None:
            scheduler = CentralPlannerScheduler()

        self.worker_processes = int(worker_processes)

        self._worker_info = self._generate_worker_info()
        config = configuration.get_config()

        if not worker_id:
            default_worker_id = 'Worker(%s)' % ', '.join(['%s=%s' % (k, v) for k, v in self._worker_info])
            worker_id = config.get('worker_metadata', 'worker_id', default_worker_id)

        if config.getboolean('worker_history', 'record_worker_history_sqs', False):
            import sqs_history  # Needs boto, thus imported here
            self._worker_history_impl = sqs_history.SqsWorkerHistory()
        else:
            self._worker_history_impl = NopWorkerHistory()

        self._config = worker(**kwargs)

        self._id = worker_id
        self._scheduler = scheduler
        self._assistant = assistant

        self.host = socket.gethostname()
        self._scheduled_tasks = {}
        self._suspended_tasks = {}

        self._first_task = None

        self.add_succeeded = True
        self.run_succeeded = True
        self.unfulfilled_counts = collections.defaultdict(int)

        self._worker_history_impl.worker_started(worker_id)

        self._keep_alive_thread = KeepAliveThread(self._scheduler, self._id, self._config.ping_interval)
        self._keep_alive_thread.daemon = True
        self._keep_alive_thread.start()

        # Keep info about what tasks are running (could be in other processes)
        self._task_result_queue = multiprocessing.Queue()
        self._running_tasks = {}
Example #37
0
    def setUp(self):
        try:
            from luigi.sqs_history import SqsHistory, SqsTaskHistory, SqsWorkerHistory
        except ImportError as e:
            raise unittest.SkipTest(
                'Could not test WorkerTaskGlobalEventHandlerTests: %s' % e)

        # Replace _config method with one that uses our dummy queue.
        def fake_config(s, *args):
            s._queue = DummyQueue()

        SqsHistory._config = fake_config

        # InstanceCache.disable()
        self.sch = CentralPlannerScheduler(retry_delay=100,
                                           remove_delay=1000,
                                           worker_disconnect_delay=10)
        self.w = Worker(scheduler=self.sch, worker_id='X')
        self.w2 = Worker(scheduler=self.sch, worker_id='Y')
        self.time = time.time
Example #38
0
 def setUp(self):
     self.sch = CentralPlannerScheduler()
Example #39
0
class CentralPlannerTest(unittest.TestCase):
    def setUp(self):
        self.sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10)
        self.time = time.time

    def tearDown(self):
        if time.time != self.time:
            time.time = self.time

    def setTime(self, t):
        time.time = lambda: t

    def test_dep(self):
        self.sch.add_task(WORKER, 'B', deps=('A',))
        self.sch.add_task(WORKER, 'A')
        self.assertEqual(self.sch.get_work(WORKER)[1], 'A')
        self.sch.add_task(WORKER, 'A', status=DONE)
        self.assertEqual(self.sch.get_work(WORKER)[1], 'B')
        self.sch.add_task(WORKER, 'B', status=DONE)
        self.assertEqual(self.sch.get_work(WORKER), (0, None))

    def test_failed_dep(self):
        self.sch.add_task(WORKER, 'B', deps=('A',))
        self.sch.add_task(WORKER, 'A')

        self.assertEqual(self.sch.get_work(WORKER)[1], 'A')
        self.sch.add_task(WORKER, 'A', status=FAILED)

        self.assertEqual(self.sch.get_work(WORKER)[1], None)  # can still wait and retry: TODO: do we want this?
        self.sch.add_task(WORKER, 'A', DONE)
        self.assertEqual(self.sch.get_work(WORKER)[1], 'B')
        self.sch.add_task(WORKER, 'B', DONE)
        self.assertEqual(self.sch.get_work(WORKER), (0, None))

    def test_broken_dep(self):
        self.sch.add_task(WORKER, 'B', deps=('A',))
        self.sch.add_task(WORKER, 'A', runnable=False)

        self.assertEqual(self.sch.get_work(WORKER)[1], None)  # can still wait and retry: TODO: do we want this?
        self.sch.add_task(WORKER, 'A', DONE)
        self.assertEqual(self.sch.get_work(WORKER)[1], 'B')
        self.sch.add_task(WORKER, 'B', DONE)
        self.assertEqual(self.sch.get_work(WORKER), (0, None))

    def test_two_workers(self):
        # Worker X wants to build A -> B
        # Worker Y wants to build A -> C
        self.sch.add_task(worker='X', task_id='A')
        self.sch.add_task(worker='Y', task_id='A')
        self.sch.add_task(task_id='B', deps=('A',), worker='X')
        self.sch.add_task(task_id='C', deps=('A',), worker='Y')

        self.assertEqual(self.sch.get_work(worker='X')[1], 'A')
        self.assertEqual(self.sch.get_work(worker='Y')[1], None)  # Worker Y is pending on A to be done
        self.sch.add_task(worker='X', task_id='A', status=DONE)
        self.assertEqual(self.sch.get_work(worker='Y')[1], 'C')
        self.assertEqual(self.sch.get_work(worker='X')[1], 'B')

    def test_retry(self):
        # Try to build A but fails, will retry after 100s
        self.setTime(0)
        self.sch.add_task(WORKER, 'A')
        self.assertEqual(self.sch.get_work(WORKER)[1], 'A')
        self.sch.add_task(WORKER, 'A', FAILED)
        for t in xrange(100):
            self.setTime(t)
            self.assertEqual(self.sch.get_work(WORKER)[1], None)
            self.sch.ping(WORKER)

        self.setTime(101)
        self.assertEqual(self.sch.get_work(WORKER)[1], 'A')

    def test_disconnect_running(self):
        # X and Y wants to run A.
        # X starts but does not report back. Y does.
        # After some timeout, Y will build it instead
        self.setTime(0)
        self.sch.add_task(task_id='A', worker='X')
        self.sch.add_task(task_id='A', worker='Y')
        self.assertEqual(self.sch.get_work(worker='X')[1], 'A')
        for t in xrange(200):
            self.setTime(t)
            self.sch.ping(worker='Y')

        self.assertEqual(self.sch.get_work(worker='Y')[1], 'A')

    def test_remove_dep(self):
        # X schedules A -> B, A is broken
        # Y schedules C -> B: this should remove A as a dep of B
        self.sch.add_task(task_id='A', worker='X', runnable=False)
        self.sch.add_task(task_id='B', deps=('A',), worker='X')

        # X can't build anything
        self.assertEqual(self.sch.get_work(worker='X')[1], None)

        self.sch.add_task(task_id='B', deps=('C',), worker='Y')  # should reset dependencies for A
        self.sch.add_task(task_id='C', worker='Y', status=DONE)

        self.assertEqual(self.sch.get_work(worker='Y')[1], 'B')

    def test_timeout(self):
        # A bug that was earlier present when restarting the same flow
        self.setTime(0)
        self.sch.add_task(task_id='A', worker='X')
        self.assertEqual(self.sch.get_work(worker='X')[1], 'A')
        self.setTime(10000)
        self.sch.add_task(task_id='A', worker='Y')  # Will timeout X but not schedule A for removal
        for i in xrange(2000):
            self.setTime(10000 + i)
            self.sch.ping(worker='Y')
        self.sch.add_task(task_id='A', status=DONE, worker='Y')  # This used to raise an exception since A was removed

    def test_disallowed_state_changes(self):
        # Test that we can not schedule an already running task
        t = 'A'
        self.sch.add_task(task_id=t, worker='X')
        self.assertEqual(self.sch.get_work(worker='X')[1], t)
        self.sch.add_task(task_id=t, worker='Y')
        self.assertEqual(self.sch.get_work(worker='Y')[1], None)
class CentralPlannerTest(unittest.TestCase):

    def setUp(self):
        super(CentralPlannerTest, self).setUp()
        conf = self.get_scheduler_config()
        self.sch = CentralPlannerScheduler(**conf)
        self.time = time.time

    def get_scheduler_config(self):
        return {
            'retry_delay': 100,
            'remove_delay': 1000,
            'worker_disconnect_delay': 10,
            'disable_persist': 10,
            'disable_window': 10,
            'disable_failures': 3,
        }

    def tearDown(self):
        super(CentralPlannerTest, self).tearDown()
        if time.time != self.time:
            time.time = self.time

    def setTime(self, t):
        time.time = lambda: t

    def test_dep(self):
        self.sch.add_task(WORKER, 'B', deps=('A',))
        self.sch.add_task(WORKER, 'A')
        self.assertEqual(self.sch.get_work(WORKER)['task_id'], 'A')
        self.sch.add_task(WORKER, 'A', status=DONE)
        self.assertEqual(self.sch.get_work(WORKER)['task_id'], 'B')
        self.sch.add_task(WORKER, 'B', status=DONE)
        self.assertEqual(self.sch.get_work(WORKER)['task_id'], None)

    def test_failed_dep(self):
        self.sch.add_task(WORKER, 'B', deps=('A',))
        self.sch.add_task(WORKER, 'A')

        self.assertEqual(self.sch.get_work(WORKER)['task_id'], 'A')
        self.sch.add_task(WORKER, 'A', status=FAILED)

        self.assertEqual(self.sch.get_work(WORKER)['task_id'], None)  # can still wait and retry: TODO: do we want this?
        self.sch.add_task(WORKER, 'A', DONE)
        self.assertEqual(self.sch.get_work(WORKER)['task_id'], 'B')
        self.sch.add_task(WORKER, 'B', DONE)
        self.assertEqual(self.sch.get_work(WORKER)['task_id'], None)

    def test_broken_dep(self):
        self.sch.add_task(WORKER, 'B', deps=('A',))
        self.sch.add_task(WORKER, 'A', runnable=False)

        self.assertEqual(self.sch.get_work(WORKER)['task_id'], None)  # can still wait and retry: TODO: do we want this?
        self.sch.add_task(WORKER, 'A', DONE)
        self.assertEqual(self.sch.get_work(WORKER)['task_id'], 'B')
        self.sch.add_task(WORKER, 'B', DONE)
        self.assertEqual(self.sch.get_work(WORKER)['task_id'], None)

    def test_two_workers(self):
        # Worker X wants to build A -> B
        # Worker Y wants to build A -> C
        self.sch.add_task(worker='X', task_id='A')
        self.sch.add_task(worker='Y', task_id='A')
        self.sch.add_task(task_id='B', deps=('A',), worker='X')
        self.sch.add_task(task_id='C', deps=('A',), worker='Y')

        self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A')
        self.assertEqual(self.sch.get_work(worker='Y')['task_id'], None)  # Worker Y is pending on A to be done
        self.sch.add_task(worker='X', task_id='A', status=DONE)
        self.assertEqual(self.sch.get_work(worker='Y')['task_id'], 'C')
        self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'B')

    def test_retry(self):
        # Try to build A but fails, will retry after 100s
        self.setTime(0)
        self.sch.add_task(WORKER, 'A')
        self.assertEqual(self.sch.get_work(WORKER)['task_id'], 'A')
        self.sch.add_task(WORKER, 'A', FAILED)
        for t in range(100):
            self.setTime(t)
            self.assertEqual(self.sch.get_work(WORKER)['task_id'], None)
            self.sch.ping(WORKER)
            if t % 10 == 0:
                self.sch.prune()

        self.setTime(101)
        self.sch.prune()
        self.assertEqual(self.sch.get_work(WORKER)['task_id'], 'A')

    def test_disconnect_running(self):
        # X and Y wants to run A.
        # X starts but does not report back. Y does.
        # After some timeout, Y will build it instead
        self.setTime(0)
        self.sch.add_task(task_id='A', worker='X')
        self.sch.add_task(task_id='A', worker='Y')
        self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A')
        for t in range(200):
            self.setTime(t)
            self.sch.ping(worker='Y')
            if t % 10 == 0:
                self.sch.prune()

        self.assertEqual(self.sch.get_work(worker='Y')['task_id'], 'A')

    def test_remove_dep(self):
        # X schedules A -> B, A is broken
        # Y schedules C -> B: this should remove A as a dep of B
        self.sch.add_task(task_id='A', worker='X', runnable=False)
        self.sch.add_task(task_id='B', deps=('A',), worker='X')

        # X can't build anything
        self.assertEqual(self.sch.get_work(worker='X')['task_id'], None)

        self.sch.add_task(task_id='B', deps=('C',), worker='Y')  # should reset dependencies for A
        self.sch.add_task(task_id='C', worker='Y', status=DONE)

        self.assertEqual(self.sch.get_work(worker='Y')['task_id'], 'B')

    def test_timeout(self):
        # A bug that was earlier present when restarting the same flow
        self.setTime(0)
        self.sch.add_task(task_id='A', worker='X')
        self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A')
        self.setTime(10000)
        self.sch.add_task(task_id='A', worker='Y')  # Will timeout X but not schedule A for removal
        for i in range(2000):
            self.setTime(10000 + i)
            self.sch.ping(worker='Y')
        self.sch.add_task(task_id='A', status=DONE, worker='Y')  # This used to raise an exception since A was removed

    def test_disallowed_state_changes(self):
        # Test that we can not schedule an already running task
        t = 'A'
        self.sch.add_task(task_id=t, worker='X')
        self.assertEqual(self.sch.get_work(worker='X')['task_id'], t)
        self.sch.add_task(task_id=t, worker='Y')
        self.assertEqual(self.sch.get_work(worker='Y')['task_id'], None)

    def test_two_worker_info(self):
        # Make sure the scheduler returns info that some other worker is running task A
        self.sch.add_task(worker='X', task_id='A')
        self.sch.add_task(worker='Y', task_id='A')

        self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A')
        r = self.sch.get_work(worker='Y')
        self.assertEqual(r['task_id'], None)  # Worker Y is pending on A to be done
        s = r['running_tasks'][0]
        self.assertEqual(s['task_id'], 'A')
        self.assertEqual(s['worker'], 'X')

    def test_assistant_get_work(self):
        self.sch.add_task(worker='X', task_id='A')
        self.sch.add_worker('Y', [])

        self.assertEqual(self.sch.get_work('Y', assistant=True)['task_id'], 'A')

        # check that the scheduler recognizes tasks as running
        running_tasks = self.sch.task_list('RUNNING', '')
        self.assertEqual(len(running_tasks), 1)
        self.assertEqual(list(running_tasks.keys()), ['A'])
        self.assertEqual(running_tasks['A']['worker_running'], 'Y')

    def test_assistant_get_work_external_task(self):
        self.sch.add_task('X', task_id='A', runnable=False)
        self.assertTrue(self.sch.get_work('Y', assistant=True)['task_id'] is None)

    def test_task_fails_when_assistant_dies(self):
        self.setTime(0)
        self.sch.add_task(worker='X', task_id='A')
        self.sch.add_worker('Y', [])

        self.assertEqual(self.sch.get_work('Y', assistant=True)['task_id'], 'A')
        self.assertEqual(list(self.sch.task_list('RUNNING', '').keys()), ['A'])

        # Y dies for 50 seconds, X stays alive
        self.setTime(50)
        self.sch.ping('X')
        self.assertEqual(list(self.sch.task_list('FAILED', '').keys()), ['A'])

    def test_prune_with_live_assistant(self):
        self.setTime(0)
        self.sch.add_task(worker='X', task_id='A')
        self.sch.get_work('Y', assistant=True)
        self.sch.add_task(worker='Y', task_id='A', status=DONE, assistant=True)

        # worker X stops communicating, A should be marked for removal
        self.setTime(600)
        self.sch.ping('Y')
        self.sch.prune()

        # A will now be pruned
        self.setTime(2000)
        self.sch.prune()
        self.assertFalse(list(self.sch.task_list('', '')))

    def test_prune_done_tasks(self, expected=None):
        self.setTime(0)
        self.sch.add_task(WORKER, task_id='A', status=DONE)
        self.sch.add_task(WORKER, task_id='B', deps=['A'], status=DONE)
        self.sch.add_task(WORKER, task_id='C', deps=['B'])

        self.setTime(600)
        self.sch.ping('ASSISTANT')
        self.sch.prune()
        self.setTime(2000)
        self.sch.ping('ASSISTANT')
        self.sch.prune()

        self.assertEqual(set(expected or ()), set(self.sch.task_list('', '').keys()))

    def test_keep_tasks_for_assistant(self):
        self.sch.get_work('ASSISTANT', assistant=True)  # tell the scheduler this is an assistant
        self.test_prune_done_tasks(['B', 'C'])

    def test_keep_scheduler_disabled_tasks_for_assistant(self):
        self.sch.get_work('ASSISTANT', assistant=True)  # tell the scheduler this is an assistant

        # create a scheduler disabled task and a worker disabled task
        for i in range(10):
            self.sch.add_task(WORKER, 'D', status=FAILED)
        self.sch.add_task(WORKER, 'E', status=DISABLED)

        # scheduler prunes the worker disabled task
        self.assertEqual(set(['D', 'E']), set(self.sch.task_list(DISABLED, '')))
        self.test_prune_done_tasks(['B', 'C', 'D'])

    def test_keep_failed_tasks_for_assistant(self):
        self.sch.get_work('ASSISTANT', assistant=True)  # tell the scheduler this is an assistant
        self.sch.add_task(WORKER, 'D', status=FAILED, deps='A')
        self.test_prune_done_tasks(['A', 'B', 'C', 'D'])

    def test_scheduler_resources_none_allow_one(self):
        self.sch.add_task(worker='X', task_id='A', resources={'R1': 1})
        self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A')

    def test_scheduler_resources_none_disallow_two(self):
        self.sch.add_task(worker='X', task_id='A', resources={'R1': 2})
        self.assertFalse(self.sch.get_work(worker='X')['task_id'], 'A')

    def test_scheduler_with_insufficient_resources(self):
        self.sch.add_task(worker='X', task_id='A', resources={'R1': 3})
        self.sch.update_resources(R1=2)
        self.assertFalse(self.sch.get_work(worker='X')['task_id'])

    def test_scheduler_with_sufficient_resources(self):
        self.sch.add_task(worker='X', task_id='A', resources={'R1': 3})
        self.sch.update_resources(R1=3)
        self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A')

    def test_scheduler_with_resources_used(self):
        self.sch.add_task(worker='X', task_id='A', resources={'R1': 1})
        self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A')

        self.sch.add_task(worker='Y', task_id='B', resources={'R1': 1})
        self.sch.update_resources(R1=1)
        self.assertFalse(self.sch.get_work(worker='Y')['task_id'])

    def test_scheduler_overprovisioned_on_other_resource(self):
        self.sch.add_task(worker='X', task_id='A', resources={'R1': 2})
        self.sch.update_resources(R1=2)
        self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A')

        self.sch.add_task(worker='Y', task_id='B', resources={'R2': 2})
        self.sch.update_resources(R1=1, R2=2)
        self.assertEqual(self.sch.get_work(worker='Y')['task_id'], 'B')

    def test_scheduler_with_priority_and_competing_resources(self):
        self.sch.add_task(worker='X', task_id='A')
        self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A')

        self.sch.add_task(worker='X', task_id='B', resources={'R': 1}, priority=10)
        self.sch.add_task(worker='Y', task_id='C', resources={'R': 1}, priority=1)
        self.sch.update_resources(R=1)
        self.assertFalse(self.sch.get_work(worker='Y')['task_id'])

        self.sch.add_task(worker='Y', task_id='D', priority=0)
        self.assertEqual(self.sch.get_work(worker='Y')['task_id'], 'D')

    def test_do_not_lock_resources_when_not_ready(self):
        """ Test to make sure that resources won't go unused waiting on workers """
        self.sch.add_task(worker='X', task_id='A', priority=10)
        self.sch.add_task(worker='X', task_id='B', resources={'R': 1}, priority=5)
        self.sch.add_task(worker='Y', task_id='C', resources={'R': 1}, priority=1)

        self.sch.update_resources(R=1)
        self.sch.add_worker('X', [('workers', 1)])
        self.assertEqual('C', self.sch.get_work(worker='Y')['task_id'])

    def test_lock_resources_when_one_of_multiple_workers_is_ready(self):
        self.sch.add_task(worker='X', task_id='A', priority=10)
        self.sch.add_task(worker='X', task_id='B', resources={'R': 1}, priority=5)
        self.sch.add_task(worker='Y', task_id='C', resources={'R': 1}, priority=1)

        self.sch.update_resources(R=1)
        self.sch.add_worker('X', [('workers', 2)])
        self.sch.add_worker('Y', [])
        self.assertFalse(self.sch.get_work('Y')['task_id'])

    def test_do_not_lock_resources_while_running_higher_priority(self):
        """ Test to make sure that resources won't go unused waiting on workers """
        self.sch.add_task(worker='X', task_id='A', priority=10)
        self.sch.add_task(worker='X', task_id='B', resources={'R': 1}, priority=5)
        self.sch.add_task(worker='Y', task_id='C', resources={'R': 1}, priority=1)

        self.sch.update_resources(R=1)
        self.sch.add_worker('X', [('workers', 1)])
        self.assertEqual('A', self.sch.get_work('X')['task_id'])
        self.assertEqual('C', self.sch.get_work('Y')['task_id'])

    def test_lock_resources_while_running_lower_priority(self):
        """ Make sure resources will be made available while working on lower priority tasks """
        self.sch.add_task(worker='X', task_id='A', priority=4)
        self.assertEqual('A', self.sch.get_work('X')['task_id'])
        self.sch.add_task(worker='X', task_id='B', resources={'R': 1}, priority=5)
        self.sch.add_task(worker='Y', task_id='C', resources={'R': 1}, priority=1)

        self.sch.update_resources(R=1)
        self.sch.add_worker('X', [('workers', 1)])
        self.assertFalse(self.sch.get_work('Y')['task_id'])

    def test_lock_resources_for_second_worker(self):
        self.sch.add_task(worker='X', task_id='A', resources={'R': 1})
        self.sch.add_task(worker='X', task_id='B', resources={'R': 1})
        self.sch.add_task(worker='Y', task_id='C', resources={'R': 1}, priority=10)

        self.sch.add_worker('X', {'workers': 2})
        self.sch.add_worker('Y', {'workers': 1})
        self.sch.update_resources(R=2)

        self.assertEqual('A', self.sch.get_work('X')['task_id'])
        self.assertFalse(self.sch.get_work('X')['task_id'])

    def test_can_work_on_lower_priority_while_waiting_for_resources(self):
        self.sch.add_task(worker='X', task_id='A', resources={'R': 1}, priority=0)
        self.assertEqual('A', self.sch.get_work('X')['task_id'])

        self.sch.add_task(worker='Y', task_id='B', resources={'R': 1}, priority=10)
        self.sch.add_task(worker='Y', task_id='C', priority=0)
        self.sch.update_resources(R=1)

        self.assertEqual('C', self.sch.get_work('Y')['task_id'])

    def test_priority_update_with_pruning(self):
        self.setTime(0)
        self.sch.add_task(task_id='A', worker='X')

        self.setTime(50)  # after worker disconnects
        self.sch.prune()
        self.sch.add_task(task_id='B', deps=['A'], worker='X')

        self.setTime(2000)  # after remove for task A
        self.sch.prune()

        # Here task A that B depends on is missing
        self.sch.add_task(WORKER, task_id='C', deps=['B'], priority=100)
        self.sch.add_task(WORKER, task_id='B', deps=['A'])
        self.sch.add_task(WORKER, task_id='A')
        self.sch.add_task(WORKER, task_id='D', priority=10)

        self.check_task_order('ABCD')

    def test_update_resources(self):
        self.sch.add_task(WORKER, task_id='A', deps=['B'])
        self.sch.add_task(WORKER, task_id='B', resources={'r': 2})
        self.sch.update_resources(r=1)

        # B requires too many resources, we can't schedule
        self.check_task_order([])

        self.sch.add_task(WORKER, task_id='B', resources={'r': 1})

        # now we have enough resources
        self.check_task_order(['B', 'A'])

    def test_hendle_multiple_resources(self):
        self.sch.add_task(WORKER, task_id='A', resources={'r1': 1, 'r2': 1})
        self.sch.add_task(WORKER, task_id='B', resources={'r1': 1, 'r2': 1})
        self.sch.add_task(WORKER, task_id='C', resources={'r1': 1})
        self.sch.update_resources(r1=2, r2=1)

        self.assertEqual('A', self.sch.get_work(WORKER)['task_id'])
        self.check_task_order('C')

    def test_single_resource_lock(self):
        self.sch.add_task('X', task_id='A', resources={'r': 1})
        self.assertEqual('A', self.sch.get_work('X')['task_id'])

        self.sch.add_task(WORKER, task_id='B', resources={'r': 2}, priority=10)
        self.sch.add_task(WORKER, task_id='C', resources={'r': 1})
        self.sch.update_resources(r=2)

        # Should wait for 2 units of r to be available for B before scheduling C
        self.check_task_order([])

    def test_no_lock_if_too_many_resources_required(self):
        self.sch.add_task(WORKER, task_id='A', resources={'r': 2}, priority=10)
        self.sch.add_task(WORKER, task_id='B', resources={'r': 1})
        self.sch.update_resources(r=1)
        self.check_task_order('B')

    def test_multiple_resources_lock(self):
        self.sch.add_task('X', task_id='A', resources={'r1': 1, 'r2': 1}, priority=10)
        self.sch.add_task(WORKER, task_id='B', resources={'r2': 1})
        self.sch.add_task(WORKER, task_id='C', resources={'r1': 1})
        self.sch.update_resources(r1=1, r2=1)

        # should preserve both resources for worker 'X'
        self.check_task_order([])

    def test_multiple_resources_no_lock(self):
        self.sch.add_task(WORKER, task_id='A', resources={'r1': 1}, priority=10)
        self.sch.add_task(WORKER, task_id='B', resources={'r1': 1, 'r2': 1}, priority=10)
        self.sch.add_task(WORKER, task_id='C', resources={'r2': 1})
        self.sch.update_resources(r1=1, r2=2)

        self.assertEqual('A', self.sch.get_work(WORKER)['task_id'])
        # C doesn't block B, so it can go first
        self.check_task_order('C')

    def check_task_order(self, order):
        for expected_id in order:
            self.assertEqual(self.sch.get_work(WORKER)['task_id'], expected_id)
            self.sch.add_task(WORKER, expected_id, status=DONE)
        self.assertEqual(self.sch.get_work(WORKER)['task_id'], None)

    def test_priorities(self):
        self.sch.add_task(WORKER, 'A', priority=10)
        self.sch.add_task(WORKER, 'B', priority=5)
        self.sch.add_task(WORKER, 'C', priority=15)
        self.sch.add_task(WORKER, 'D', priority=9)
        self.check_task_order(['C', 'A', 'D', 'B'])

    def test_priorities_default_and_negative(self):
        self.sch.add_task(WORKER, 'A', priority=10)
        self.sch.add_task(WORKER, 'B')
        self.sch.add_task(WORKER, 'C', priority=15)
        self.sch.add_task(WORKER, 'D', priority=-20)
        self.sch.add_task(WORKER, 'E', priority=1)
        self.check_task_order(['C', 'A', 'E', 'B', 'D'])

    def test_priorities_and_dependencies(self):
        self.sch.add_task(WORKER, 'A', deps=['Z'], priority=10)
        self.sch.add_task(WORKER, 'B', priority=5)
        self.sch.add_task(WORKER, 'C', deps=['Z'], priority=3)
        self.sch.add_task(WORKER, 'D', priority=2)
        self.sch.add_task(WORKER, 'Z', priority=1)
        self.check_task_order(['Z', 'A', 'B', 'C', 'D'])

    def test_priority_update_dependency_after_scheduling(self):
        self.sch.add_task(WORKER, 'A', priority=1)
        self.sch.add_task(WORKER, 'B', priority=5, deps=['A'])
        self.sch.add_task(WORKER, 'C', priority=10, deps=['B'])
        self.sch.add_task(WORKER, 'D', priority=6)
        self.check_task_order(['A', 'B', 'C', 'D'])

    def test_disable(self):
        self.sch.add_task(WORKER, 'A')
        self.sch.add_task(WORKER, 'A', status=FAILED)
        self.sch.add_task(WORKER, 'A', status=FAILED)
        self.sch.add_task(WORKER, 'A', status=FAILED)

        # should be disabled at this point
        self.assertEqual(len(self.sch.task_list('DISABLED', '')), 1)
        self.assertEqual(len(self.sch.task_list('FAILED', '')), 0)
        self.sch.add_task(WORKER, 'A')
        self.assertEqual(self.sch.get_work(WORKER)['task_id'], None)

    def test_disable_and_reenable(self):
        self.sch.add_task(WORKER, 'A')
        self.sch.add_task(WORKER, 'A', status=FAILED)
        self.sch.add_task(WORKER, 'A', status=FAILED)
        self.sch.add_task(WORKER, 'A', status=FAILED)

        # should be disabled at this point
        self.assertEqual(len(self.sch.task_list('DISABLED', '')), 1)
        self.assertEqual(len(self.sch.task_list('FAILED', '')), 0)

        self.sch.re_enable_task('A')

        # should be enabled at this point
        self.assertEqual(len(self.sch.task_list('DISABLED', '')), 0)
        self.assertEqual(len(self.sch.task_list('FAILED', '')), 1)
        self.sch.add_task(WORKER, 'A')
        self.assertEqual(self.sch.get_work(WORKER)['task_id'], 'A')

    def test_disable_and_reenable_and_disable_again(self):
        self.sch.add_task(WORKER, 'A')
        self.sch.add_task(WORKER, 'A', status=FAILED)
        self.sch.add_task(WORKER, 'A', status=FAILED)
        self.sch.add_task(WORKER, 'A', status=FAILED)

        # should be disabled at this point
        self.assertEqual(len(self.sch.task_list('DISABLED', '')), 1)
        self.assertEqual(len(self.sch.task_list('FAILED', '')), 0)

        self.sch.re_enable_task('A')

        # should be enabled at this point
        self.assertEqual(len(self.sch.task_list('DISABLED', '')), 0)
        self.assertEqual(len(self.sch.task_list('FAILED', '')), 1)
        self.sch.add_task(WORKER, 'A')
        self.assertEqual(self.sch.get_work(WORKER)['task_id'], 'A')

        self.sch.add_task(WORKER, 'A', status=FAILED)

        # should be still enabled
        self.assertEqual(len(self.sch.task_list('DISABLED', '')), 0)
        self.assertEqual(len(self.sch.task_list('FAILED', '')), 1)
        self.sch.add_task(WORKER, 'A')
        self.assertEqual(self.sch.get_work(WORKER)['task_id'], 'A')

        self.sch.add_task(WORKER, 'A', status=FAILED)
        self.sch.add_task(WORKER, 'A', status=FAILED)

        # should be disabled now
        self.assertEqual(len(self.sch.task_list('DISABLED', '')), 1)
        self.assertEqual(len(self.sch.task_list('FAILED', '')), 0)
        self.sch.add_task(WORKER, 'A')
        self.assertEqual(self.sch.get_work(WORKER)['task_id'], None)

    def test_disable_and_done(self):
        self.sch.add_task(WORKER, 'A')
        self.sch.add_task(WORKER, 'A', status=FAILED)
        self.sch.add_task(WORKER, 'A', status=FAILED)
        self.sch.add_task(WORKER, 'A', status=FAILED)

        # should be disabled at this point
        self.assertEqual(len(self.sch.task_list('DISABLED', '')), 1)
        self.assertEqual(len(self.sch.task_list('FAILED', '')), 0)

        self.sch.add_task(WORKER, 'A', status=DONE)

        # should be enabled at this point
        self.assertEqual(len(self.sch.task_list('DISABLED', '')), 0)
        self.assertEqual(len(self.sch.task_list('DONE', '')), 1)
        self.sch.add_task(WORKER, 'A')
        self.assertEqual(self.sch.get_work(WORKER)['task_id'], 'A')

    def test_disable_by_worker(self):
        self.sch.add_task(WORKER, 'A', status=DISABLED)
        self.assertEqual(len(self.sch.task_list('DISABLED', '')), 1)

        self.sch.add_task(WORKER, 'A')

        # should be enabled at this point
        self.assertEqual(len(self.sch.task_list('DISABLED', '')), 0)
        self.sch.add_task(WORKER, 'A')
        self.assertEqual(self.sch.get_work(WORKER)['task_id'], 'A')

    def test_task_list_beyond_limit(self):
        sch = CentralPlannerScheduler(max_shown_tasks=3)
        for c in 'ABCD':
            sch.add_task(WORKER, c)
        self.assertEqual(set('ABCD'), set(sch.task_list('PENDING', '', False).keys()))
        self.assertEqual({'num_tasks': 4}, sch.task_list('PENDING', ''))

    def test_task_list_within_limit(self):
        sch = CentralPlannerScheduler(max_shown_tasks=4)
        for c in 'ABCD':
            sch.add_task(WORKER, c)
        self.assertEqual(set('ABCD'), set(sch.task_list('PENDING', '').keys()))

    def test_task_lists_some_beyond_limit(self):
        sch = CentralPlannerScheduler(max_shown_tasks=3)
        for c in 'ABCD':
            sch.add_task(WORKER, c, 'DONE')
        for c in 'EFG':
            sch.add_task(WORKER, c)
        self.assertEqual(set('EFG'), set(sch.task_list('PENDING', '').keys()))
        self.assertEqual({'num_tasks': 4}, sch.task_list('DONE', ''))

    def test_priority_update_dependency_chain(self):
        self.sch.add_task(WORKER, 'A', priority=10, deps=['B'])
        self.sch.add_task(WORKER, 'B', priority=5, deps=['C'])
        self.sch.add_task(WORKER, 'C', priority=1)
        self.sch.add_task(WORKER, 'D', priority=6)
        self.check_task_order(['C', 'B', 'A', 'D'])

    def test_priority_no_decrease_with_multiple_updates(self):
        self.sch.add_task(WORKER, 'A', priority=1)
        self.sch.add_task(WORKER, 'B', priority=10, deps=['A'])
        self.sch.add_task(WORKER, 'C', priority=5, deps=['A'])
        self.sch.add_task(WORKER, 'D', priority=6)
        self.check_task_order(['A', 'B', 'D', 'C'])

    def test_unique_tasks(self):
        self.sch.add_task(WORKER, 'A')
        self.sch.add_task(WORKER, 'B')
        self.sch.add_task(WORKER, 'C')
        self.sch.add_task(WORKER + "_2", 'B')

        response = self.sch.get_work(WORKER)
        self.assertEqual(3, response['n_pending_tasks'])
        self.assertEqual(2, response['n_unique_pending'])

    def test_pending_downstream_disable(self):
        self.sch.add_task(WORKER, 'A', status=DISABLED)
        self.sch.add_task(WORKER, 'B', deps=('A',))
        self.sch.add_task(WORKER, 'C', deps=('B',))

        response = self.sch.get_work(WORKER)
        self.assertTrue(response['task_id'] is None)
        self.assertEqual(0, response['n_pending_tasks'])
        self.assertEqual(0, response['n_unique_pending'])

    def test_pending_downstream_failure(self):
        self.sch.add_task(WORKER, 'A', status=FAILED)
        self.sch.add_task(WORKER, 'B', deps=('A',))
        self.sch.add_task(WORKER, 'C', deps=('B',))

        response = self.sch.get_work(WORKER)
        self.assertTrue(response['task_id'] is None)
        self.assertEqual(2, response['n_pending_tasks'])
        self.assertEqual(2, response['n_unique_pending'])

    def test_prefer_more_dependents(self):
        self.sch.add_task(WORKER, 'A')
        self.sch.add_task(WORKER, 'B')
        self.sch.add_task(WORKER, 'C', deps=['B'])
        self.sch.add_task(WORKER, 'D', deps=['B'])
        self.sch.add_task(WORKER, 'E', deps=['A'])
        self.check_task_order('BACDE')

    def test_prefer_readier_dependents(self):
        self.sch.add_task(WORKER, 'A')
        self.sch.add_task(WORKER, 'B')
        self.sch.add_task(WORKER, 'C')
        self.sch.add_task(WORKER, 'D')
        self.sch.add_task(WORKER, 'F', deps=['A', 'B', 'C'])
        self.sch.add_task(WORKER, 'G', deps=['A', 'B', 'C'])
        self.sch.add_task(WORKER, 'E', deps=['D'])
        self.check_task_order('DABCFGE')

    def test_ignore_done_dependents(self):
        self.sch.add_task(WORKER, 'A')
        self.sch.add_task(WORKER, 'B')
        self.sch.add_task(WORKER, 'C')
        self.sch.add_task(WORKER, 'D', priority=1)
        self.sch.add_task(WORKER, 'E', deps=['C', 'D'])
        self.sch.add_task(WORKER, 'F', deps=['A', 'B'])
        self.check_task_order('DCABEF')
Example #41
0
 def test_task_list_within_limit(self):
     sch = CentralPlannerScheduler(max_shown_tasks=4)
     for c in 'ABCD':
         sch.add_task(worker=WORKER, task_id=c)
     self.assertEqual(set('ABCD'), set(sch.task_list('PENDING', '').keys()))
Example #42
0
class CentralPlannerTest(unittest.TestCase):
    def setUp(self):
        super(CentralPlannerTest, self).setUp()
        conf = self.get_scheduler_config()
        self.sch = CentralPlannerScheduler(**conf)
        self.time = time.time

    def get_scheduler_config(self):
        return {
            'retry_delay': 100,
            'remove_delay': 1000,
            'worker_disconnect_delay': 10,
            'disable_persist': 10,
            'disable_window': 10,
            'disable_failures': 3,
            'disable_hard_timeout': 60 * 60,
        }

    def tearDown(self):
        super(CentralPlannerTest, self).tearDown()
        if time.time != self.time:
            time.time = self.time

    def setTime(self, t):
        time.time = lambda: t

    def test_dep(self):
        self.sch.add_task(worker=WORKER, task_id='B', deps=('A', ))
        self.sch.add_task(worker=WORKER, task_id='A')
        self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], 'A')
        self.sch.add_task(worker=WORKER, task_id='A', status=DONE)
        self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], 'B')
        self.sch.add_task(worker=WORKER, task_id='B', status=DONE)
        self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], None)

    def test_failed_dep(self):
        self.sch.add_task(worker=WORKER, task_id='B', deps=('A', ))
        self.sch.add_task(worker=WORKER, task_id='A')

        self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], 'A')
        self.sch.add_task(worker=WORKER, task_id='A', status=FAILED)

        self.assertEqual(
            self.sch.get_work(worker=WORKER)['task_id'],
            None)  # can still wait and retry: TODO: do we want this?
        self.sch.add_task(worker=WORKER, task_id='A', status=DONE)
        self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], 'B')
        self.sch.add_task(worker=WORKER, task_id='B', status=DONE)
        self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], None)

    def test_broken_dep(self):
        self.sch.add_task(worker=WORKER, task_id='B', deps=('A', ))
        self.sch.add_task(worker=WORKER, task_id='A', runnable=False)

        self.assertEqual(
            self.sch.get_work(worker=WORKER)['task_id'],
            None)  # can still wait and retry: TODO: do we want this?
        self.sch.add_task(worker=WORKER, task_id='A', status=DONE)
        self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], 'B')
        self.sch.add_task(worker=WORKER, task_id='B', status=DONE)
        self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], None)

    def test_two_workers(self):
        # Worker X wants to build A -> B
        # Worker Y wants to build A -> C
        self.sch.add_task(worker='X', task_id='A')
        self.sch.add_task(worker='Y', task_id='A')
        self.sch.add_task(task_id='B', deps=('A', ), worker='X')
        self.sch.add_task(task_id='C', deps=('A', ), worker='Y')

        self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A')
        self.assertEqual(self.sch.get_work(worker='Y')['task_id'],
                         None)  # Worker Y is pending on A to be done
        self.sch.add_task(worker='X', task_id='A', status=DONE)
        self.assertEqual(self.sch.get_work(worker='Y')['task_id'], 'C')
        self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'B')

    def test_retry(self):
        # Try to build A but fails, will retry after 100s
        self.setTime(0)
        self.sch.add_task(worker=WORKER, task_id='A')
        self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], 'A')
        self.sch.add_task(worker=WORKER, task_id='A', status=FAILED)
        for t in range(100):
            self.setTime(t)
            self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], None)
            self.sch.ping(worker=WORKER)
            if t % 10 == 0:
                self.sch.prune()

        self.setTime(101)
        self.sch.prune()
        self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], 'A')

    def test_disconnect_running(self):
        # X and Y wants to run A.
        # X starts but does not report back. Y does.
        # After some timeout, Y will build it instead
        self.setTime(0)
        self.sch.add_task(task_id='A', worker='X')
        self.sch.add_task(task_id='A', worker='Y')
        self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A')
        for t in range(200):
            self.setTime(t)
            self.sch.ping(worker='Y')
            if t % 10 == 0:
                self.sch.prune()

        self.assertEqual(self.sch.get_work(worker='Y')['task_id'], 'A')

    def test_remove_dep(self):
        # X schedules A -> B, A is broken
        # Y schedules C -> B: this should remove A as a dep of B
        self.sch.add_task(task_id='A', worker='X', runnable=False)
        self.sch.add_task(task_id='B', deps=('A', ), worker='X')

        # X can't build anything
        self.assertEqual(self.sch.get_work(worker='X')['task_id'], None)

        self.sch.add_task(task_id='B', deps=('C', ),
                          worker='Y')  # should reset dependencies for A
        self.sch.add_task(task_id='C', worker='Y', status=DONE)

        self.assertEqual(self.sch.get_work(worker='Y')['task_id'], 'B')

    def test_timeout(self):
        # A bug that was earlier present when restarting the same flow
        self.setTime(0)
        self.sch.add_task(task_id='A', worker='X')
        self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A')
        self.setTime(10000)
        self.sch.add_task(
            task_id='A',
            worker='Y')  # Will timeout X but not schedule A for removal
        for i in range(2000):
            self.setTime(10000 + i)
            self.sch.ping(worker='Y')
        self.sch.add_task(
            task_id='A', status=DONE,
            worker='Y')  # This used to raise an exception since A was removed

    def test_disallowed_state_changes(self):
        # Test that we can not schedule an already running task
        t = 'A'
        self.sch.add_task(task_id=t, worker='X')
        self.assertEqual(self.sch.get_work(worker='X')['task_id'], t)
        self.sch.add_task(task_id=t, worker='Y')
        self.assertEqual(self.sch.get_work(worker='Y')['task_id'], None)

    def test_two_worker_info(self):
        # Make sure the scheduler returns info that some other worker is running task A
        self.sch.add_task(worker='X', task_id='A')
        self.sch.add_task(worker='Y', task_id='A')

        self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A')
        r = self.sch.get_work(worker='Y')
        self.assertEqual(r['task_id'],
                         None)  # Worker Y is pending on A to be done
        s = r['running_tasks'][0]
        self.assertEqual(s['task_id'], 'A')
        self.assertEqual(s['worker'], 'X')

    def test_assistant_get_work(self):
        self.sch.add_task(worker='X', task_id='A')
        self.sch.add_worker('Y', [])

        self.assertEqual(
            self.sch.get_work(worker='Y', assistant=True)['task_id'], 'A')

        # check that the scheduler recognizes tasks as running
        running_tasks = self.sch.task_list('RUNNING', '')
        self.assertEqual(len(running_tasks), 1)
        self.assertEqual(list(running_tasks.keys()), ['A'])
        self.assertEqual(running_tasks['A']['worker_running'], 'Y')

    def test_assistant_get_work_external_task(self):
        self.sch.add_task(worker='X', task_id='A', runnable=False)
        self.assertTrue(
            self.sch.get_work(worker='Y', assistant=True)['task_id'] is None)

    def test_task_fails_when_assistant_dies(self):
        self.setTime(0)
        self.sch.add_task(worker='X', task_id='A')
        self.sch.add_worker('Y', [])

        self.assertEqual(
            self.sch.get_work(worker='Y', assistant=True)['task_id'], 'A')
        self.assertEqual(list(self.sch.task_list('RUNNING', '').keys()), ['A'])

        # Y dies for 50 seconds, X stays alive
        self.setTime(50)
        self.sch.ping(worker='X')
        self.assertEqual(list(self.sch.task_list('FAILED', '').keys()), ['A'])

    def test_prune_with_live_assistant(self):
        self.setTime(0)
        self.sch.add_task(worker='X', task_id='A')
        self.sch.get_work(worker='Y', assistant=True)
        self.sch.add_task(worker='Y', task_id='A', status=DONE, assistant=True)

        # worker X stops communicating, A should be marked for removal
        self.setTime(600)
        self.sch.ping(worker='Y')
        self.sch.prune()

        # A will now be pruned
        self.setTime(2000)
        self.sch.prune()
        self.assertFalse(list(self.sch.task_list('', '')))

    def test_fail_job_from_dead_worker_with_live_assistant(self):
        self.setTime(0)
        self.sch.add_task(worker='X', task_id='A')
        self.assertEqual('A', self.sch.get_work(worker='X')['task_id'])
        self.sch.add_worker('Y', [('assistant', True)])

        self.setTime(600)
        self.sch.ping(worker='Y')
        self.sch.prune()

        self.assertEqual(['A'], list(self.sch.task_list('FAILED', '').keys()))

    def test_prune_done_tasks(self, expected=None):
        self.setTime(0)
        self.sch.add_task(worker=WORKER, task_id='A', status=DONE)
        self.sch.add_task(worker=WORKER, task_id='B', deps=['A'], status=DONE)
        self.sch.add_task(worker=WORKER, task_id='C', deps=['B'])

        self.setTime(600)
        self.sch.ping(worker='ASSISTANT')
        self.sch.prune()
        self.setTime(2000)
        self.sch.ping(worker='ASSISTANT')
        self.sch.prune()

        self.assertEqual(set(expected or ()),
                         set(self.sch.task_list('', '').keys()))

    def test_keep_tasks_for_assistant(self):
        self.sch.get_work(
            worker='ASSISTANT',
            assistant=True)  # tell the scheduler this is an assistant
        self.test_prune_done_tasks(['B', 'C'])

    def test_keep_scheduler_disabled_tasks_for_assistant(self):
        self.sch.get_work(
            worker='ASSISTANT',
            assistant=True)  # tell the scheduler this is an assistant

        # create a scheduler disabled task and a worker disabled task
        for i in range(10):
            self.sch.add_task(worker=WORKER, task_id='D', status=FAILED)
        self.sch.add_task(worker=WORKER, task_id='E', status=DISABLED)

        # scheduler prunes the worker disabled task
        self.assertEqual(set(['D', 'E']), set(self.sch.task_list(DISABLED,
                                                                 '')))
        self.test_prune_done_tasks(['B', 'C', 'D'])

    def test_keep_failed_tasks_for_assistant(self):
        self.sch.get_work(
            worker='ASSISTANT',
            assistant=True)  # tell the scheduler this is an assistant
        self.sch.add_task(worker=WORKER, task_id='D', status=FAILED, deps='A')
        self.test_prune_done_tasks(['A', 'B', 'C', 'D'])

    def test_scheduler_resources_none_allow_one(self):
        self.sch.add_task(worker='X', task_id='A', resources={'R1': 1})
        self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A')

    def test_scheduler_resources_none_disallow_two(self):
        self.sch.add_task(worker='X', task_id='A', resources={'R1': 2})
        self.assertFalse(self.sch.get_work(worker='X')['task_id'], 'A')

    def test_scheduler_with_insufficient_resources(self):
        self.sch.add_task(worker='X', task_id='A', resources={'R1': 3})
        self.sch.update_resources(R1=2)
        self.assertFalse(self.sch.get_work(worker='X')['task_id'])

    def test_scheduler_with_sufficient_resources(self):
        self.sch.add_task(worker='X', task_id='A', resources={'R1': 3})
        self.sch.update_resources(R1=3)
        self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A')

    def test_scheduler_with_resources_used(self):
        self.sch.add_task(worker='X', task_id='A', resources={'R1': 1})
        self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A')

        self.sch.add_task(worker='Y', task_id='B', resources={'R1': 1})
        self.sch.update_resources(R1=1)
        self.assertFalse(self.sch.get_work(worker='Y')['task_id'])

    def test_scheduler_overprovisioned_on_other_resource(self):
        self.sch.add_task(worker='X', task_id='A', resources={'R1': 2})
        self.sch.update_resources(R1=2)
        self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A')

        self.sch.add_task(worker='Y', task_id='B', resources={'R2': 2})
        self.sch.update_resources(R1=1, R2=2)
        self.assertEqual(self.sch.get_work(worker='Y')['task_id'], 'B')

    def test_scheduler_with_priority_and_competing_resources(self):
        self.sch.add_task(worker='X', task_id='A')
        self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A')

        self.sch.add_task(worker='X',
                          task_id='B',
                          resources={'R': 1},
                          priority=10)
        self.sch.add_task(worker='Y',
                          task_id='C',
                          resources={'R': 1},
                          priority=1)
        self.sch.update_resources(R=1)
        self.assertFalse(self.sch.get_work(worker='Y')['task_id'])

        self.sch.add_task(worker='Y', task_id='D', priority=0)
        self.assertEqual(self.sch.get_work(worker='Y')['task_id'], 'D')

    def test_do_not_lock_resources_when_not_ready(self):
        """ Test to make sure that resources won't go unused waiting on workers """
        self.sch.add_task(worker='X', task_id='A', priority=10)
        self.sch.add_task(worker='X',
                          task_id='B',
                          resources={'R': 1},
                          priority=5)
        self.sch.add_task(worker='Y',
                          task_id='C',
                          resources={'R': 1},
                          priority=1)

        self.sch.update_resources(R=1)
        self.sch.add_worker('X', [('workers', 1)])
        self.assertEqual('C', self.sch.get_work(worker='Y')['task_id'])

    def test_lock_resources_when_one_of_multiple_workers_is_ready(self):
        self.sch.add_task(worker='X', task_id='A', priority=10)
        self.sch.add_task(worker='X',
                          task_id='B',
                          resources={'R': 1},
                          priority=5)
        self.sch.add_task(worker='Y',
                          task_id='C',
                          resources={'R': 1},
                          priority=1)

        self.sch.update_resources(R=1)
        self.sch.add_worker('X', [('workers', 2)])
        self.sch.add_worker('Y', [])
        self.assertFalse(self.sch.get_work(worker='Y')['task_id'])

    def test_do_not_lock_resources_while_running_higher_priority(self):
        """ Test to make sure that resources won't go unused waiting on workers """
        self.sch.add_task(worker='X', task_id='A', priority=10)
        self.sch.add_task(worker='X',
                          task_id='B',
                          resources={'R': 1},
                          priority=5)
        self.sch.add_task(worker='Y',
                          task_id='C',
                          resources={'R': 1},
                          priority=1)

        self.sch.update_resources(R=1)
        self.sch.add_worker('X', [('workers', 1)])
        self.assertEqual('A', self.sch.get_work(worker='X')['task_id'])
        self.assertEqual('C', self.sch.get_work(worker='Y')['task_id'])

    def test_lock_resources_while_running_lower_priority(self):
        """ Make sure resources will be made available while working on lower priority tasks """
        self.sch.add_task(worker='X', task_id='A', priority=4)
        self.assertEqual('A', self.sch.get_work(worker='X')['task_id'])
        self.sch.add_task(worker='X',
                          task_id='B',
                          resources={'R': 1},
                          priority=5)
        self.sch.add_task(worker='Y',
                          task_id='C',
                          resources={'R': 1},
                          priority=1)

        self.sch.update_resources(R=1)
        self.sch.add_worker('X', [('workers', 1)])
        self.assertFalse(self.sch.get_work(worker='Y')['task_id'])

    def test_lock_resources_for_second_worker(self):
        self.sch.add_task(worker='X', task_id='A', resources={'R': 1})
        self.sch.add_task(worker='X', task_id='B', resources={'R': 1})
        self.sch.add_task(worker='Y',
                          task_id='C',
                          resources={'R': 1},
                          priority=10)

        self.sch.add_worker('X', {'workers': 2})
        self.sch.add_worker('Y', {'workers': 1})
        self.sch.update_resources(R=2)

        self.assertEqual('A', self.sch.get_work(worker='X')['task_id'])
        self.assertFalse(self.sch.get_work(worker='X')['task_id'])

    def test_can_work_on_lower_priority_while_waiting_for_resources(self):
        self.sch.add_task(worker='X',
                          task_id='A',
                          resources={'R': 1},
                          priority=0)
        self.assertEqual('A', self.sch.get_work(worker='X')['task_id'])

        self.sch.add_task(worker='Y',
                          task_id='B',
                          resources={'R': 1},
                          priority=10)
        self.sch.add_task(worker='Y', task_id='C', priority=0)
        self.sch.update_resources(R=1)

        self.assertEqual('C', self.sch.get_work(worker='Y')['task_id'])

    def test_priority_update_with_pruning(self):
        self.setTime(0)
        self.sch.add_task(task_id='A', worker='X')

        self.setTime(50)  # after worker disconnects
        self.sch.prune()
        self.sch.add_task(task_id='B', deps=['A'], worker='X')

        self.setTime(2000)  # after remove for task A
        self.sch.prune()

        # Here task A that B depends on is missing
        self.sch.add_task(worker=WORKER, task_id='C', deps=['B'], priority=100)
        self.sch.add_task(worker=WORKER, task_id='B', deps=['A'])
        self.sch.add_task(worker=WORKER, task_id='A')
        self.sch.add_task(worker=WORKER, task_id='D', priority=10)

        self.check_task_order('ABCD')

    def test_update_resources(self):
        self.sch.add_task(worker=WORKER, task_id='A', deps=['B'])
        self.sch.add_task(worker=WORKER, task_id='B', resources={'r': 2})
        self.sch.update_resources(r=1)

        # B requires too many resources, we can't schedule
        self.check_task_order([])

        self.sch.add_task(worker=WORKER, task_id='B', resources={'r': 1})

        # now we have enough resources
        self.check_task_order(['B', 'A'])

    def test_hendle_multiple_resources(self):
        self.sch.add_task(worker=WORKER,
                          task_id='A',
                          resources={
                              'r1': 1,
                              'r2': 1
                          })
        self.sch.add_task(worker=WORKER,
                          task_id='B',
                          resources={
                              'r1': 1,
                              'r2': 1
                          })
        self.sch.add_task(worker=WORKER, task_id='C', resources={'r1': 1})
        self.sch.update_resources(r1=2, r2=1)

        self.assertEqual('A', self.sch.get_work(worker=WORKER)['task_id'])
        self.check_task_order('C')

    def test_single_resource_lock(self):
        self.sch.add_task(worker='X', task_id='A', resources={'r': 1})
        self.assertEqual('A', self.sch.get_work(worker='X')['task_id'])

        self.sch.add_task(worker=WORKER,
                          task_id='B',
                          resources={'r': 2},
                          priority=10)
        self.sch.add_task(worker=WORKER, task_id='C', resources={'r': 1})
        self.sch.update_resources(r=2)

        # Should wait for 2 units of r to be available for B before scheduling C
        self.check_task_order([])

    def test_no_lock_if_too_many_resources_required(self):
        self.sch.add_task(worker=WORKER,
                          task_id='A',
                          resources={'r': 2},
                          priority=10)
        self.sch.add_task(worker=WORKER, task_id='B', resources={'r': 1})
        self.sch.update_resources(r=1)
        self.check_task_order('B')

    def test_multiple_resources_lock(self):
        self.sch.add_task(worker='X',
                          task_id='A',
                          resources={
                              'r1': 1,
                              'r2': 1
                          },
                          priority=10)
        self.sch.add_task(worker=WORKER, task_id='B', resources={'r2': 1})
        self.sch.add_task(worker=WORKER, task_id='C', resources={'r1': 1})
        self.sch.update_resources(r1=1, r2=1)

        # should preserve both resources for worker 'X'
        self.check_task_order([])

    def test_multiple_resources_no_lock(self):
        self.sch.add_task(worker=WORKER,
                          task_id='A',
                          resources={'r1': 1},
                          priority=10)
        self.sch.add_task(worker=WORKER,
                          task_id='B',
                          resources={
                              'r1': 1,
                              'r2': 1
                          },
                          priority=10)
        self.sch.add_task(worker=WORKER, task_id='C', resources={'r2': 1})
        self.sch.update_resources(r1=1, r2=2)

        self.assertEqual('A', self.sch.get_work(worker=WORKER)['task_id'])
        # C doesn't block B, so it can go first
        self.check_task_order('C')

    def check_task_order(self, order):
        for expected_id in order:
            self.assertEqual(
                self.sch.get_work(worker=WORKER)['task_id'], expected_id)
            self.sch.add_task(worker=WORKER, task_id=expected_id, status=DONE)
        self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], None)

    def test_priorities(self):
        self.sch.add_task(worker=WORKER, task_id='A', priority=10)
        self.sch.add_task(worker=WORKER, task_id='B', priority=5)
        self.sch.add_task(worker=WORKER, task_id='C', priority=15)
        self.sch.add_task(worker=WORKER, task_id='D', priority=9)
        self.check_task_order(['C', 'A', 'D', 'B'])

    def test_priorities_default_and_negative(self):
        self.sch.add_task(worker=WORKER, task_id='A', priority=10)
        self.sch.add_task(worker=WORKER, task_id='B')
        self.sch.add_task(worker=WORKER, task_id='C', priority=15)
        self.sch.add_task(worker=WORKER, task_id='D', priority=-20)
        self.sch.add_task(worker=WORKER, task_id='E', priority=1)
        self.check_task_order(['C', 'A', 'E', 'B', 'D'])

    def test_priorities_and_dependencies(self):
        self.sch.add_task(worker=WORKER, task_id='A', deps=['Z'], priority=10)
        self.sch.add_task(worker=WORKER, task_id='B', priority=5)
        self.sch.add_task(worker=WORKER, task_id='C', deps=['Z'], priority=3)
        self.sch.add_task(worker=WORKER, task_id='D', priority=2)
        self.sch.add_task(worker=WORKER, task_id='Z', priority=1)
        self.check_task_order(['Z', 'A', 'B', 'C', 'D'])

    def test_priority_update_dependency_after_scheduling(self):
        self.sch.add_task(worker=WORKER, task_id='A', priority=1)
        self.sch.add_task(worker=WORKER, task_id='B', priority=5, deps=['A'])
        self.sch.add_task(worker=WORKER, task_id='C', priority=10, deps=['B'])
        self.sch.add_task(worker=WORKER, task_id='D', priority=6)
        self.check_task_order(['A', 'B', 'C', 'D'])

    def test_disable(self):
        self.sch.add_task(worker=WORKER, task_id='A')
        self.sch.add_task(worker=WORKER, task_id='A', status=FAILED)
        self.sch.add_task(worker=WORKER, task_id='A', status=FAILED)
        self.sch.add_task(worker=WORKER, task_id='A', status=FAILED)

        # should be disabled at this point
        self.assertEqual(len(self.sch.task_list('DISABLED', '')), 1)
        self.assertEqual(len(self.sch.task_list('FAILED', '')), 0)
        self.sch.add_task(worker=WORKER, task_id='A')
        self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], None)

    def test_disable_and_reenable(self):
        self.sch.add_task(worker=WORKER, task_id='A')
        self.sch.add_task(worker=WORKER, task_id='A', status=FAILED)
        self.sch.add_task(worker=WORKER, task_id='A', status=FAILED)
        self.sch.add_task(worker=WORKER, task_id='A', status=FAILED)

        # should be disabled at this point
        self.assertEqual(len(self.sch.task_list('DISABLED', '')), 1)
        self.assertEqual(len(self.sch.task_list('FAILED', '')), 0)

        self.sch.re_enable_task('A')

        # should be enabled at this point
        self.assertEqual(len(self.sch.task_list('DISABLED', '')), 0)
        self.assertEqual(len(self.sch.task_list('FAILED', '')), 1)
        self.sch.add_task(worker=WORKER, task_id='A')
        self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], 'A')

    def test_disable_and_reenable_and_disable_again(self):
        self.sch.add_task(worker=WORKER, task_id='A')
        self.sch.add_task(worker=WORKER, task_id='A', status=FAILED)
        self.sch.add_task(worker=WORKER, task_id='A', status=FAILED)
        self.sch.add_task(worker=WORKER, task_id='A', status=FAILED)

        # should be disabled at this point
        self.assertEqual(len(self.sch.task_list('DISABLED', '')), 1)
        self.assertEqual(len(self.sch.task_list('FAILED', '')), 0)

        self.sch.re_enable_task('A')

        # should be enabled at this point
        self.assertEqual(len(self.sch.task_list('DISABLED', '')), 0)
        self.assertEqual(len(self.sch.task_list('FAILED', '')), 1)
        self.sch.add_task(worker=WORKER, task_id='A')
        self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], 'A')

        self.sch.add_task(worker=WORKER, task_id='A', status=FAILED)

        # should be still enabled
        self.assertEqual(len(self.sch.task_list('DISABLED', '')), 0)
        self.assertEqual(len(self.sch.task_list('FAILED', '')), 1)
        self.sch.add_task(worker=WORKER, task_id='A')
        self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], 'A')

        self.sch.add_task(worker=WORKER, task_id='A', status=FAILED)
        self.sch.add_task(worker=WORKER, task_id='A', status=FAILED)

        # should be disabled now
        self.assertEqual(len(self.sch.task_list('DISABLED', '')), 1)
        self.assertEqual(len(self.sch.task_list('FAILED', '')), 0)
        self.sch.add_task(worker=WORKER, task_id='A')
        self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], None)

    def test_disable_and_done(self):
        self.sch.add_task(worker=WORKER, task_id='A')
        self.sch.add_task(worker=WORKER, task_id='A', status=FAILED)
        self.sch.add_task(worker=WORKER, task_id='A', status=FAILED)
        self.sch.add_task(worker=WORKER, task_id='A', status=FAILED)

        # should be disabled at this point
        self.assertEqual(len(self.sch.task_list('DISABLED', '')), 1)
        self.assertEqual(len(self.sch.task_list('FAILED', '')), 0)

        self.sch.add_task(worker=WORKER, task_id='A', status=DONE)

        # should be enabled at this point
        self.assertEqual(len(self.sch.task_list('DISABLED', '')), 0)
        self.assertEqual(len(self.sch.task_list('DONE', '')), 1)
        self.sch.add_task(worker=WORKER, task_id='A')
        self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], 'A')

    def test_disable_by_worker(self):
        self.sch.add_task(worker=WORKER, task_id='A', status=DISABLED)
        self.assertEqual(len(self.sch.task_list('DISABLED', '')), 1)

        self.sch.add_task(worker=WORKER, task_id='A')

        # should be enabled at this point
        self.assertEqual(len(self.sch.task_list('DISABLED', '')), 0)
        self.sch.add_task(worker=WORKER, task_id='A')
        self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], 'A')

    def test_task_list_beyond_limit(self):
        sch = CentralPlannerScheduler(max_shown_tasks=3)
        for c in 'ABCD':
            sch.add_task(worker=WORKER, task_id=c)
        self.assertEqual(set('ABCD'),
                         set(sch.task_list('PENDING', '', False).keys()))
        self.assertEqual({'num_tasks': 4}, sch.task_list('PENDING', ''))

    def test_task_list_within_limit(self):
        sch = CentralPlannerScheduler(max_shown_tasks=4)
        for c in 'ABCD':
            sch.add_task(worker=WORKER, task_id=c)
        self.assertEqual(set('ABCD'), set(sch.task_list('PENDING', '').keys()))

    def test_task_lists_some_beyond_limit(self):
        sch = CentralPlannerScheduler(max_shown_tasks=3)
        for c in 'ABCD':
            sch.add_task(worker=WORKER, task_id=c, status=DONE)
        for c in 'EFG':
            sch.add_task(worker=WORKER, task_id=c)
        self.assertEqual(set('EFG'), set(sch.task_list('PENDING', '').keys()))
        self.assertEqual({'num_tasks': 4}, sch.task_list('DONE', ''))

    def test_task_list_filter_by_search(self):
        self.sch.add_task(worker=WORKER, task_id='test_match_task')
        self.sch.add_task(worker=WORKER, task_id='test_filter_task')
        matches = self.sch.task_list('PENDING', '', search='match')
        self.assertEqual(['test_match_task'], list(matches.keys()))

    def test_task_list_filter_by_multiple_search_terms(self):
        self.sch.add_task(worker=WORKER, task_id='abcd')
        self.sch.add_task(worker=WORKER, task_id='abd')
        self.sch.add_task(worker=WORKER, task_id='acd')
        self.sch.add_task(worker=WORKER, task_id='ad')
        self.sch.add_task(worker=WORKER, task_id='bc')
        matches = self.sch.task_list('PENDING', '', search='b c')
        self.assertEqual(set(['abcd', 'bc']), set(matches.keys()))

    def test_search_results_beyond_limit(self):
        sch = CentralPlannerScheduler(max_shown_tasks=3)
        sch.add_task(worker=WORKER, task_id='task_a')
        sch.add_task(worker=WORKER, task_id='task_b')
        sch.add_task(worker=WORKER, task_id='task_c')
        sch.add_task(worker=WORKER, task_id='task_d')
        self.assertEqual({'num_tasks': 4},
                         sch.task_list('PENDING', '', search='a'))
        self.assertEqual(['task_a'],
                         list(
                             sch.task_list('PENDING', '', search='_a').keys()))

    def test_priority_update_dependency_chain(self):
        self.sch.add_task(worker=WORKER, task_id='A', priority=10, deps=['B'])
        self.sch.add_task(worker=WORKER, task_id='B', priority=5, deps=['C'])
        self.sch.add_task(worker=WORKER, task_id='C', priority=1)
        self.sch.add_task(worker=WORKER, task_id='D', priority=6)
        self.check_task_order(['C', 'B', 'A', 'D'])

    def test_priority_no_decrease_with_multiple_updates(self):
        self.sch.add_task(worker=WORKER, task_id='A', priority=1)
        self.sch.add_task(worker=WORKER, task_id='B', priority=10, deps=['A'])
        self.sch.add_task(worker=WORKER, task_id='C', priority=5, deps=['A'])
        self.sch.add_task(worker=WORKER, task_id='D', priority=6)
        self.check_task_order(['A', 'B', 'D', 'C'])

    def test_unique_tasks(self):
        self.sch.add_task(worker=WORKER, task_id='A')
        self.sch.add_task(worker=WORKER, task_id='B')
        self.sch.add_task(worker=WORKER, task_id='C')
        self.sch.add_task(worker=WORKER + "_2", task_id='B')

        response = self.sch.get_work(worker=WORKER)
        self.assertEqual(3, response['n_pending_tasks'])
        self.assertEqual(2, response['n_unique_pending'])

    def test_pending_downstream_disable(self):
        self.sch.add_task(worker=WORKER, task_id='A', status=DISABLED)
        self.sch.add_task(worker=WORKER, task_id='B', deps=('A', ))
        self.sch.add_task(worker=WORKER, task_id='C', deps=('B', ))

        response = self.sch.get_work(worker=WORKER)
        self.assertTrue(response['task_id'] is None)
        self.assertEqual(0, response['n_pending_tasks'])
        self.assertEqual(0, response['n_unique_pending'])

    def test_pending_downstream_failure(self):
        self.sch.add_task(worker=WORKER, task_id='A', status=FAILED)
        self.sch.add_task(worker=WORKER, task_id='B', deps=('A', ))
        self.sch.add_task(worker=WORKER, task_id='C', deps=('B', ))

        response = self.sch.get_work(worker=WORKER)
        self.assertTrue(response['task_id'] is None)
        self.assertEqual(2, response['n_pending_tasks'])
        self.assertEqual(2, response['n_unique_pending'])

    def test_task_list_no_deps(self):
        self.sch.add_task(worker=WORKER, task_id='B', deps=('A', ))
        self.sch.add_task(worker=WORKER, task_id='A')
        task_list = self.sch.task_list('PENDING', '')
        self.assertFalse('deps' in task_list['A'])

    def test_task_first_failure_time(self):
        self.sch.add_task(worker=WORKER, task_id='A')
        test_task = self.sch._state.get_task('A')
        self.assertIsNone(test_task.failures.first_failure_time)

        time_before_failure = time.time()
        test_task.add_failure()
        time_after_failure = time.time()

        self.assertLessEqual(time_before_failure,
                             test_task.failures.first_failure_time)
        self.assertGreaterEqual(time_after_failure,
                                test_task.failures.first_failure_time)

    def test_task_first_failure_time_remains_constant(self):
        self.sch.add_task(worker=WORKER, task_id='A')
        test_task = self.sch._state.get_task('A')
        self.assertIsNone(test_task.failures.first_failure_time)

        test_task.add_failure()
        first_failure_time = test_task.failures.first_failure_time

        test_task.add_failure()
        self.assertEqual(first_failure_time,
                         test_task.failures.first_failure_time)

    def test_task_has_excessive_failures(self):
        self.sch.add_task(worker=WORKER, task_id='A')
        test_task = self.sch._state.get_task('A')
        self.assertIsNone(test_task.failures.first_failure_time)

        self.assertFalse(test_task.has_excessive_failures())

        test_task.add_failure()
        self.assertFalse(test_task.has_excessive_failures())

        fake_failure_time = (test_task.failures.first_failure_time -
                             2 * 60 * 60)

        test_task.failures.first_failure_time = fake_failure_time
        self.assertTrue(test_task.has_excessive_failures())

    def test_quadratic_behavior(self):
        """ Test that get_work is not taking linear amount of time.

        This is of course impossible to test, however, doing reasonable
        assumptions about hardware. This time should finish in a timely
        manner.
        """
        # For 10000 it takes almost 1 second on my laptop.  Prior to these
        # changes it was being slow already at NUM_TASKS=300
        NUM_TASKS = 10000
        for i in range(NUM_TASKS):
            self.sch.add_task(worker=str(i), task_id=str(i), resources={})

        for i in range(NUM_TASKS):
            self.assertEqual(
                self.sch.get_work(worker=str(i))['task_id'], str(i))
            self.sch.add_task(worker=str(i), task_id=str(i), status=DONE)

    def test_get_work_speed(self):
        """ Test that get_work is fast for few workers and many DONEs.

        In #986, @daveFNbuck reported that he got a slowdown.
        """
        # This took almost 4 minutes without optimization.
        # Now it takes 10 seconds on my machine.
        NUM_PENDING = 1000
        NUM_DONE = 200000
        assert NUM_DONE >= NUM_PENDING
        for i in range(NUM_PENDING):
            self.sch.add_task(worker=WORKER, task_id=str(i), resources={})

        for i in range(NUM_PENDING, NUM_DONE):
            self.sch.add_task(worker=WORKER, task_id=str(i), status=DONE)

        for i in range(NUM_PENDING):
            res = int(self.sch.get_work(worker=WORKER)['task_id'])
            self.assertTrue(0 <= res < NUM_PENDING)
            self.sch.add_task(worker=WORKER, task_id=str(res), status=DONE)
Example #43
0
 def setUp(self):
     super(CentralPlannerTest, self).setUp()
     conf = self.get_scheduler_config()
     self.sch = CentralPlannerScheduler(**conf)
     self.time = time.time
Example #44
0
 def run(self, result=None):
     self.sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10)
     self.assistant = Worker(scheduler=self.sch, worker_id='Y', assistant=True)
     with Worker(scheduler=self.sch, worker_id='X') as w:
         self.w = w
         super(AssistantTest, self).run(result)
Example #45
0
class WorkerTest(unittest.TestCase):
    def setUp(self):
        # InstanceCache.disable()
        self.sch = CentralPlannerScheduler(retry_delay=100,
                                           remove_delay=1000,
                                           worker_disconnect_delay=10)
        self.w = Worker(scheduler=self.sch, worker_id='X')
        self.w2 = Worker(scheduler=self.sch, worker_id='Y')
        self.time = time.time

    def tearDown(self):
        if time.time != self.time:
            time.time = self.time
        self.w.stop()
        self.w2.stop()

    def setTime(self, t):
        time.time = lambda: t

    def test_dep(self):
        class A(Task):
            def run(self):
                self.has_run = True

            def complete(self):
                return self.has_run

        a = A()

        class B(Task):
            def requires(self):
                return a

            def run(self):
                self.has_run = True

            def complete(self):
                return self.has_run

        b = B()
        a.has_run = False
        b.has_run = False

        self.assertTrue(self.w.add(b))
        self.assertTrue(self.w.run())
        self.assertTrue(a.has_run)
        self.assertTrue(b.has_run)

    def test_stop_getting_new_work(self):
        d = DummyTask()
        self.w.add(d)

        self.assertFalse(d.complete())
        self.w.handle_interrupt(signal.SIGUSR1, None)
        self.w.run()
        self.assertFalse(d.complete())

    def test_external_dep(self):
        class A(ExternalTask):
            def complete(self):
                return False

        a = A()

        class B(Task):
            def requires(self):
                return a

            def run(self):
                self.has_run = True

            def complete(self):
                return self.has_run

        b = B()

        a.has_run = False
        b.has_run = False

        self.assertTrue(self.w.add(b))
        self.assertTrue(self.w.run())

        self.assertFalse(a.has_run)
        self.assertFalse(b.has_run)

    def test_tracking_url(self):
        tracking_url = 'http://test_url.com/'

        class A(Task):
            has_run = False

            def complete(self):
                return self.has_run

            def run(self, tracking_url_callback=None):
                if tracking_url_callback is not None:
                    tracking_url_callback(tracking_url)
                self.has_run = True

        a = A()
        self.assertTrue(self.w.add(a))
        self.assertTrue(self.w.run())
        tasks = self.sch.task_list('DONE', '')
        self.assertEqual(1, len(tasks))
        self.assertEqual(tracking_url, tasks['A()']['tracking_url'])

    def test_type_error_in_tracking_run(self):
        class A(Task):
            num_runs = 0

            def complete(self):
                return False

            def run(self, tracking_url_callback=None):
                self.num_runs += 1
                raise TypeError('bad type')

        a = A()
        self.assertTrue(self.w.add(a))
        self.assertFalse(self.w.run())

        # Should only run and fail once, not retry because of the type error
        self.assertEqual(1, a.num_runs)

    def test_fail(self):
        class CustomException(BaseException):
            def __init__(self, msg):
                self.msg = msg

        class A(Task):
            def run(self):
                self.has_run = True
                raise CustomException('bad things')

            def complete(self):
                return self.has_run

        a = A()

        class B(Task):
            def requires(self):
                return a

            def run(self):
                self.has_run = True

            def complete(self):
                return self.has_run

        b = B()

        a.has_run = False
        b.has_run = False

        self.assertTrue(self.w.add(b))
        self.assertFalse(self.w.run())

        self.assertTrue(a.has_run)
        self.assertFalse(b.has_run)

    def test_unknown_dep(self):
        # see central_planner_test.CentralPlannerTest.test_remove_dep
        class A(ExternalTask):
            def complete(self):
                return False

        class C(Task):
            def complete(self):
                return True

        def get_b(dep):
            class B(Task):
                def requires(self):
                    return dep

                def run(self):
                    self.has_run = True

                def complete(self):
                    return False

            b = B()
            b.has_run = False
            return b

        b_a = get_b(A())
        b_c = get_b(C())

        self.assertTrue(self.w.add(b_a))
        # So now another worker goes in and schedules C -> B
        # This should remove the dep A -> B but will screw up the first worker
        self.assertTrue(self.w2.add(b_c))

        self.assertFalse(
            self.w.run()
        )  # should not run anything - the worker should detect that A is broken
        self.assertFalse(b_a.has_run)
        # not sure what should happen??
        # self.w2.run() # should run B since C is fulfilled
        # self.assertTrue(b_c.has_run)

    def test_unfulfilled_dep(self):
        class A(Task):
            def complete(self):
                return self.done

            def run(self):
                self.done = True

        def get_b(a):
            class B(A):
                def requires(self):
                    return a

            b = B()
            b.done = False
            a.done = True
            return b

        a = A()
        b = get_b(a)

        self.assertTrue(self.w.add(b))
        a.done = False
        self.w.run()
        self.assertTrue(a.complete())
        self.assertTrue(b.complete())

    def test_avoid_infinite_reschedule(self):
        class A(Task):
            def complete(self):
                return False

        class B(Task):
            def complete(self):
                return False

            def requires(self):
                return A()

        self.assertTrue(self.w.add(B()))
        self.assertFalse(self.w.run())

    def test_allow_reschedule_with_many_missing_deps(self):
        class A(Task):
            """ Task that must run twice to succeed """
            i = luigi.IntParameter()

            runs = 0

            def complete(self):
                return self.runs >= 2

            def run(self):
                self.runs += 1

        class B(Task):
            done = False

            def requires(self):
                return map(A, range(20))

            def complete(self):
                return self.done

            def run(self):
                self.done = True

        b = B()
        w = Worker(scheduler=self.sch, worker_id='X', max_reschedules=1)
        self.assertTrue(w.add(b))
        self.assertFalse(w.run())

        # For b to be done, we must have rescheduled its dependencies to run them twice
        self.assertTrue(b.complete())
        self.assertTrue(all(a.complete() for a in b.deps()))

    def test_interleaved_workers(self):
        class A(DummyTask):
            pass

        a = A()

        class B(DummyTask):
            def requires(self):
                return a

        class ExternalB(ExternalTask):
            task_family = "B"

            def complete(self):
                return False

        b = B()
        eb = ExternalB()
        self.assertEqual(eb.task_id, "B()")

        sch = CentralPlannerScheduler(retry_delay=100,
                                      remove_delay=1000,
                                      worker_disconnect_delay=10)
        w = Worker(scheduler=sch, worker_id='X')
        w2 = Worker(scheduler=sch, worker_id='Y')

        self.assertTrue(w.add(b))
        self.assertTrue(w2.add(eb))
        logging.debug("RUNNING BROKEN WORKER")
        self.assertTrue(w2.run())
        self.assertFalse(a.complete())
        self.assertFalse(b.complete())
        logging.debug("RUNNING FUNCTIONAL WORKER")
        self.assertTrue(w.run())
        self.assertTrue(a.complete())
        self.assertTrue(b.complete())
        w.stop()
        w2.stop()

    def test_interleaved_workers2(self):
        # two tasks without dependencies, one external, one not
        class B(DummyTask):
            pass

        class ExternalB(ExternalTask):
            task_family = "B"

            def complete(self):
                return False

        b = B()
        eb = ExternalB()

        self.assertEqual(eb.task_id, "B()")

        sch = CentralPlannerScheduler(retry_delay=100,
                                      remove_delay=1000,
                                      worker_disconnect_delay=10)
        w = Worker(scheduler=sch, worker_id='X')
        w2 = Worker(scheduler=sch, worker_id='Y')

        self.assertTrue(w2.add(eb))
        self.assertTrue(w.add(b))

        self.assertTrue(w2.run())
        self.assertFalse(b.complete())
        self.assertTrue(w.run())
        self.assertTrue(b.complete())
        w.stop()
        w2.stop()

    def test_interleaved_workers3(self):
        class A(DummyTask):
            def run(self):
                logging.debug('running A')
                time.sleep(0.1)
                super(A, self).run()

        a = A()

        class B(DummyTask):
            def requires(self):
                return a

            def run(self):
                logging.debug('running B')
                super(B, self).run()

        b = B()

        sch = CentralPlannerScheduler(retry_delay=100,
                                      remove_delay=1000,
                                      worker_disconnect_delay=10)

        w = Worker(scheduler=sch,
                   worker_id='X',
                   keep_alive=True,
                   count_uniques=True)
        w2 = Worker(scheduler=sch,
                    worker_id='Y',
                    keep_alive=True,
                    count_uniques=True,
                    wait_interval=0.1)

        self.assertTrue(w.add(a))
        self.assertTrue(w2.add(b))

        threading.Thread(target=w.run).start()
        self.assertTrue(w2.run())

        self.assertTrue(a.complete())
        self.assertTrue(b.complete())

        w.stop()
        w2.stop()

    def test_die_for_non_unique_pending(self):
        class A(DummyTask):
            def run(self):
                logging.debug('running A')
                time.sleep(0.1)
                super(A, self).run()

        a = A()

        class B(DummyTask):
            def requires(self):
                return a

            def run(self):
                logging.debug('running B')
                super(B, self).run()

        b = B()

        sch = CentralPlannerScheduler(retry_delay=100,
                                      remove_delay=1000,
                                      worker_disconnect_delay=10)

        w = Worker(scheduler=sch,
                   worker_id='X',
                   keep_alive=True,
                   count_uniques=True)
        w2 = Worker(scheduler=sch,
                    worker_id='Y',
                    keep_alive=True,
                    count_uniques=True,
                    wait_interval=0.1)

        self.assertTrue(w.add(b))
        self.assertTrue(w2.add(b))

        self.assertEqual(w._get_work()[0], 'A()')
        self.assertTrue(w2.run())

        self.assertFalse(a.complete())
        self.assertFalse(b.complete())

        w2.stop()

    def test_complete_exception(self):
        "Tests that a task is still scheduled if its sister task crashes in the complete() method"

        class A(DummyTask):
            def complete(self):
                raise Exception("doh")

        a = A()

        class C(DummyTask):
            pass

        c = C()

        class B(DummyTask):
            def requires(self):
                return a, c

        b = B()
        sch = CentralPlannerScheduler(retry_delay=100,
                                      remove_delay=1000,
                                      worker_disconnect_delay=10)
        w = Worker(scheduler=sch, worker_id="foo")
        self.assertFalse(w.add(b))
        self.assertTrue(w.run())
        self.assertFalse(b.has_run)
        self.assertTrue(c.has_run)
        self.assertFalse(a.has_run)
        w.stop()

    def test_requires_exception(self):
        class A(DummyTask):
            def requires(self):
                raise Exception("doh")

        a = A()

        class C(DummyTask):
            pass

        c = C()

        class B(DummyTask):
            def requires(self):
                return a, c

        b = B()
        sch = CentralPlannerScheduler(retry_delay=100,
                                      remove_delay=1000,
                                      worker_disconnect_delay=10)
        w = Worker(scheduler=sch, worker_id="foo")
        self.assertFalse(w.add(b))
        self.assertTrue(w.run())
        self.assertFalse(b.has_run)
        self.assertTrue(c.has_run)
        self.assertFalse(a.has_run)
        w.stop()
Example #46
0
 def setUp(self):
     super(WorkerEmailTest, self).setUp()
     sch = CentralPlannerScheduler(retry_delay=100,
                                   remove_delay=1000,
                                   worker_disconnect_delay=10)
     self.worker = Worker(scheduler=sch, worker_id="foo")
Example #47
0
class WorkerTest(unittest.TestCase):
    def run(self, result=None):
        self.sch = CentralPlannerScheduler(retry_delay=100,
                                           remove_delay=1000,
                                           worker_disconnect_delay=10)
        self.time = time.time
        with Worker(scheduler=self.sch,
                    worker_id='X') as w, Worker(scheduler=self.sch,
                                                worker_id='Y') as w2:
            self.w = w
            self.w2 = w2
            super(WorkerTest, self).run(result)

        if time.time != self.time:
            time.time = self.time

    def setTime(self, t):
        time.time = lambda: t

    def test_dep(self):
        class A(Task):
            def run(self):
                self.has_run = True

            def complete(self):
                return self.has_run

        a = A()

        class B(Task):
            def requires(self):
                return a

            def run(self):
                self.has_run = True

            def complete(self):
                return self.has_run

        b = B()
        a.has_run = False
        b.has_run = False

        self.assertTrue(self.w.add(b))
        self.assertTrue(self.w.run())
        self.assertTrue(a.has_run)
        self.assertTrue(b.has_run)

    def test_stop_getting_new_work(self):
        d = DummyTask()
        self.w.add(d)

        self.assertFalse(d.complete())
        try:
            self.w.handle_interrupt(signal.SIGUSR1, None)
        except AttributeError:
            raise unittest.SkipTest('signal.SIGUSR1 not found on this system')
        self.w.run()
        self.assertFalse(d.complete())

    def test_disabled_shutdown_hook(self):
        w = Worker(scheduler=self.sch,
                   keep_alive=True,
                   no_install_shutdown_handler=True)
        with w:
            try:
                # try to kill the worker!
                os.kill(os.getpid(), signal.SIGUSR1)
            except AttributeError:
                raise unittest.SkipTest(
                    'signal.SIGUSR1 not found on this system')
            # try to kill the worker... AGAIN!
            t = SuicidalWorker(signal.SIGUSR1)
            w.add(t)
            w.run()
            # task should have stepped away from the ledge, and completed successfully despite all the SIGUSR1 signals
            self.assertEqual(list(self.sch.task_list('DONE', '').keys()),
                             [t.task_id])

    @with_config({"worker": {"no_install_shutdown_handler": "True"}})
    def test_can_run_luigi_in_thread(self):
        class A(DummyTask):
            pass

        task = A()
        # Note that ``signal.signal(signal.SIGUSR1, fn)`` can only be called in the main thread.
        # So if we do not disable the shutdown handler, this would fail.
        t = threading.Thread(
            target=lambda: luigi.build([task], local_scheduler=True))
        t.start()
        t.join()
        self.assertTrue(task.complete())

    def test_external_dep(self):
        class A(ExternalTask):
            def complete(self):
                return False

        a = A()

        class B(Task):
            def requires(self):
                return a

            def run(self):
                self.has_run = True

            def complete(self):
                return self.has_run

        b = B()

        a.has_run = False
        b.has_run = False

        self.assertTrue(self.w.add(b))
        self.assertTrue(self.w.run())

        self.assertFalse(a.has_run)
        self.assertFalse(b.has_run)

    def test_externalized_dep(self):
        class A(Task):
            has_run = False

            def run(self):
                self.has_run = True

            def complete(self):
                return self.has_run

        a = A()

        class B(A):
            def requires(self):
                return luigi.task.externalize(a)

        b = B()

        self.assertTrue(self.w.add(b))
        self.assertTrue(self.w.run())

        self.assertFalse(a.has_run)
        self.assertFalse(b.has_run)

    def test_legacy_externalized_dep(self):
        class A(Task):
            has_run = False

            def run(self):
                self.has_run = True

            def complete(self):
                return self.has_run

        a = A()
        a.run = NotImplemented

        class B(A):
            def requires(self):
                return a

        b = B()

        self.assertTrue(self.w.add(b))
        self.assertTrue(self.w.run())

        self.assertFalse(a.has_run)
        self.assertFalse(b.has_run)

    def test_type_error_in_tracking_run_deprecated(self):
        class A(Task):
            num_runs = 0

            def complete(self):
                return False

            def run(self, tracking_url_callback=None):
                self.num_runs += 1
                raise TypeError('bad type')

        a = A()
        self.assertTrue(self.w.add(a))
        self.assertFalse(self.w.run())

        # Should only run and fail once, not retry because of the type error
        self.assertEqual(1, a.num_runs)

    def test_tracking_url(self):
        tracking_url = 'http://test_url.com/'

        class A(Task):
            has_run = False

            def complete(self):
                return self.has_run

            def run(self):
                self.set_tracking_url(tracking_url)
                self.has_run = True

        a = A()
        self.assertTrue(self.w.add(a))
        self.assertTrue(self.w.run())
        tasks = self.sch.task_list('DONE', '')
        self.assertEqual(1, len(tasks))
        self.assertEqual(tracking_url, tasks[a.task_id]['tracking_url'])

    def test_fail(self):
        class CustomException(BaseException):
            def __init__(self, msg):
                self.msg = msg

        class A(Task):
            def run(self):
                self.has_run = True
                raise CustomException('bad things')

            def complete(self):
                return self.has_run

        a = A()

        class B(Task):
            def requires(self):
                return a

            def run(self):
                self.has_run = True

            def complete(self):
                return self.has_run

        b = B()

        a.has_run = False
        b.has_run = False

        self.assertTrue(self.w.add(b))
        self.assertFalse(self.w.run())

        self.assertTrue(a.has_run)
        self.assertFalse(b.has_run)

    def test_unknown_dep(self):
        # see central_planner_test.CentralPlannerTest.test_remove_dep
        class A(ExternalTask):
            def complete(self):
                return False

        class C(Task):
            def complete(self):
                return True

        def get_b(dep):
            class B(Task):
                def requires(self):
                    return dep

                def run(self):
                    self.has_run = True

                def complete(self):
                    return False

            b = B()
            b.has_run = False
            return b

        b_a = get_b(A())
        b_c = get_b(C())

        self.assertTrue(self.w.add(b_a))
        # So now another worker goes in and schedules C -> B
        # This should remove the dep A -> B but will screw up the first worker
        self.assertTrue(self.w2.add(b_c))

        self.assertFalse(
            self.w.run()
        )  # should not run anything - the worker should detect that A is broken
        self.assertFalse(b_a.has_run)
        # not sure what should happen??
        # self.w2.run() # should run B since C is fulfilled
        # self.assertTrue(b_c.has_run)

    def test_unfulfilled_dep(self):
        class A(Task):
            def complete(self):
                return self.done

            def run(self):
                self.done = True

        def get_b(a):
            class B(A):
                def requires(self):
                    return a

            b = B()
            b.done = False
            a.done = True
            return b

        a = A()
        b = get_b(a)

        self.assertTrue(self.w.add(b))
        a.done = False
        self.w.run()
        self.assertTrue(a.complete())
        self.assertTrue(b.complete())

    def test_gets_missed_work(self):
        class A(Task):
            done = False

            def complete(self):
                return self.done

            def run(self):
                self.done = True

        a = A()
        self.assertTrue(self.w.add(a))

        # simulate a missed get_work response
        self.assertEqual(a.task_id, self.sch.get_work(worker='X')['task_id'])

        self.assertTrue(self.w.run())
        self.assertTrue(a.complete())

    def test_avoid_infinite_reschedule(self):
        class A(Task):
            def complete(self):
                return False

        class B(Task):
            def complete(self):
                return False

            def requires(self):
                return A()

        self.assertTrue(self.w.add(B()))
        self.assertFalse(self.w.run())

    def test_fails_registering_signal(self):
        with mock.patch('luigi.worker.signal', spec=['signal']):
            # mock will raise an attribute error getting signal.SIGUSR1
            Worker()

    def test_allow_reschedule_with_many_missing_deps(self):
        class A(Task):
            """ Task that must run twice to succeed """
            i = luigi.IntParameter()

            runs = 0

            def complete(self):
                return self.runs >= 2

            def run(self):
                self.runs += 1

        class B(Task):
            done = False

            def requires(self):
                return map(A, range(20))

            def complete(self):
                return self.done

            def run(self):
                self.done = True

        b = B()
        w = Worker(scheduler=self.sch, worker_id='X', max_reschedules=1)
        self.assertTrue(w.add(b))
        self.assertFalse(w.run())

        # For b to be done, we must have rescheduled its dependencies to run them twice
        self.assertTrue(b.complete())
        self.assertTrue(all(a.complete() for a in b.deps()))

    def test_interleaved_workers(self):
        class A(DummyTask):
            pass

        a = A()

        class B(DummyTask):
            def requires(self):
                return a

        class ExternalB(ExternalTask):
            task_family = "B"

            def complete(self):
                return False

        b = B()
        eb = ExternalB()
        self.assertEqual(str(eb), "B()")

        sch = CentralPlannerScheduler(retry_delay=100,
                                      remove_delay=1000,
                                      worker_disconnect_delay=10)
        with Worker(scheduler=sch,
                    worker_id='X') as w, Worker(scheduler=sch,
                                                worker_id='Y') as w2:
            self.assertTrue(w.add(b))
            self.assertTrue(w2.add(eb))
            logging.debug("RUNNING BROKEN WORKER")
            self.assertTrue(w2.run())
            self.assertFalse(a.complete())
            self.assertFalse(b.complete())
            logging.debug("RUNNING FUNCTIONAL WORKER")
            self.assertTrue(w.run())
            self.assertTrue(a.complete())
            self.assertTrue(b.complete())

    def test_interleaved_workers2(self):
        # two tasks without dependencies, one external, one not
        class B(DummyTask):
            pass

        class ExternalB(ExternalTask):
            task_family = "B"

            def complete(self):
                return False

        b = B()
        eb = ExternalB()

        self.assertEqual(str(eb), "B()")

        sch = CentralPlannerScheduler(retry_delay=100,
                                      remove_delay=1000,
                                      worker_disconnect_delay=10)
        with Worker(scheduler=sch,
                    worker_id='X') as w, Worker(scheduler=sch,
                                                worker_id='Y') as w2:
            self.assertTrue(w2.add(eb))
            self.assertTrue(w.add(b))

            self.assertTrue(w2.run())
            self.assertFalse(b.complete())
            self.assertTrue(w.run())
            self.assertTrue(b.complete())

    def test_interleaved_workers3(self):
        class A(DummyTask):
            def run(self):
                logging.debug('running A')
                time.sleep(0.1)
                super(A, self).run()

        a = A()

        class B(DummyTask):
            def requires(self):
                return a

            def run(self):
                logging.debug('running B')
                super(B, self).run()

        b = B()

        sch = CentralPlannerScheduler(retry_delay=100,
                                      remove_delay=1000,
                                      worker_disconnect_delay=10)

        with Worker(scheduler=sch,
                    worker_id='X',
                    keep_alive=True,
                    count_uniques=True) as w:
            with Worker(scheduler=sch,
                        worker_id='Y',
                        keep_alive=True,
                        count_uniques=True,
                        wait_interval=0.1) as w2:
                self.assertTrue(w.add(a))
                self.assertTrue(w2.add(b))

                threading.Thread(target=w.run).start()
                self.assertTrue(w2.run())

                self.assertTrue(a.complete())
                self.assertTrue(b.complete())

    def test_die_for_non_unique_pending(self):
        class A(DummyTask):
            def run(self):
                logging.debug('running A')
                time.sleep(0.1)
                super(A, self).run()

        a = A()

        class B(DummyTask):
            def requires(self):
                return a

            def run(self):
                logging.debug('running B')
                super(B, self).run()

        b = B()

        sch = CentralPlannerScheduler(retry_delay=100,
                                      remove_delay=1000,
                                      worker_disconnect_delay=10)

        with Worker(scheduler=sch,
                    worker_id='X',
                    keep_alive=True,
                    count_uniques=True) as w:
            with Worker(scheduler=sch,
                        worker_id='Y',
                        keep_alive=True,
                        count_uniques=True,
                        wait_interval=0.1) as w2:
                self.assertTrue(w.add(b))
                self.assertTrue(w2.add(b))

                self.assertEqual(w._get_work()[0], a.task_id)
                self.assertTrue(w2.run())

                self.assertFalse(a.complete())
                self.assertFalse(b.complete())

    def test_complete_exception(self):
        "Tests that a task is still scheduled if its sister task crashes in the complete() method"

        class A(DummyTask):
            def complete(self):
                raise Exception("doh")

        a = A()

        class C(DummyTask):
            pass

        c = C()

        class B(DummyTask):
            def requires(self):
                return a, c

        b = B()
        sch = CentralPlannerScheduler(retry_delay=100,
                                      remove_delay=1000,
                                      worker_disconnect_delay=10)
        with Worker(scheduler=sch, worker_id="foo") as w:
            self.assertFalse(w.add(b))
            self.assertTrue(w.run())
            self.assertFalse(b.has_run)
            self.assertTrue(c.has_run)
            self.assertFalse(a.has_run)

    def test_requires_exception(self):
        class A(DummyTask):
            def requires(self):
                raise Exception("doh")

        a = A()

        class D(DummyTask):
            pass

        d = D()

        class C(DummyTask):
            def requires(self):
                return d

        c = C()

        class B(DummyTask):
            def requires(self):
                return c, a

        b = B()
        sch = CentralPlannerScheduler(retry_delay=100,
                                      remove_delay=1000,
                                      worker_disconnect_delay=10)
        with Worker(scheduler=sch, worker_id="foo") as w:
            self.assertFalse(w.add(b))
            self.assertTrue(w.run())
            self.assertFalse(b.has_run)
            self.assertTrue(c.has_run)
            self.assertTrue(d.has_run)
            self.assertFalse(a.has_run)
 def test_task_list_beyond_limit(self):
     sch = CentralPlannerScheduler(max_shown_tasks=3)
     for c in 'ABCD':
         sch.add_task(worker=WORKER, task_id=c)
     self.assertEqual(set('ABCD'), set(sch.task_list('PENDING', '', False).keys()))
     self.assertEqual({'num_tasks': 4}, sch.task_list('PENDING', ''))
class CentralPlannerTest(unittest.TestCase):

    def setUp(self):
        super(CentralPlannerTest, self).setUp()
        conf = self.get_scheduler_config()
        self.sch = CentralPlannerScheduler(**conf)
        self.time = time.time

    def get_scheduler_config(self):
        return {
            'retry_delay': 100,
            'remove_delay': 1000,
            'worker_disconnect_delay': 10,
            'disable_persist': 10,
            'disable_window': 10,
            'disable_failures': 3,
            'disable_hard_timeout': 60 * 60,
        }

    def tearDown(self):
        super(CentralPlannerTest, self).tearDown()
        if time.time != self.time:
            time.time = self.time

    def setTime(self, t):
        time.time = lambda: t

    def test_dep(self):
        self.sch.add_task(worker=WORKER, task_id='B', deps=('A',))
        self.sch.add_task(worker=WORKER, task_id='A')
        self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], 'A')
        self.sch.add_task(worker=WORKER, task_id='A', status=DONE)
        self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], 'B')
        self.sch.add_task(worker=WORKER, task_id='B', status=DONE)
        self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], None)

    def test_failed_dep(self):
        self.sch.add_task(worker=WORKER, task_id='B', deps=('A',))
        self.sch.add_task(worker=WORKER, task_id='A')

        self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], 'A')
        self.sch.add_task(worker=WORKER, task_id='A', status=FAILED)

        self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], None)  # can still wait and retry: TODO: do we want this?
        self.sch.add_task(worker=WORKER, task_id='A', status=DONE)
        self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], 'B')
        self.sch.add_task(worker=WORKER, task_id='B', status=DONE)
        self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], None)

    def test_broken_dep(self):
        self.sch.add_task(worker=WORKER, task_id='B', deps=('A',))
        self.sch.add_task(worker=WORKER, task_id='A', runnable=False)

        self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], None)  # can still wait and retry: TODO: do we want this?
        self.sch.add_task(worker=WORKER, task_id='A', status=DONE)
        self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], 'B')
        self.sch.add_task(worker=WORKER, task_id='B', status=DONE)
        self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], None)

    def test_two_workers(self):
        # Worker X wants to build A -> B
        # Worker Y wants to build A -> C
        self.sch.add_task(worker='X', task_id='A')
        self.sch.add_task(worker='Y', task_id='A')
        self.sch.add_task(task_id='B', deps=('A',), worker='X')
        self.sch.add_task(task_id='C', deps=('A',), worker='Y')

        self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A')
        self.assertEqual(self.sch.get_work(worker='Y')['task_id'], None)  # Worker Y is pending on A to be done
        self.sch.add_task(worker='X', task_id='A', status=DONE)
        self.assertEqual(self.sch.get_work(worker='Y')['task_id'], 'C')
        self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'B')

    def test_retry(self):
        # Try to build A but fails, will retry after 100s
        self.setTime(0)
        self.sch.add_task(worker=WORKER, task_id='A')
        self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], 'A')
        self.sch.add_task(worker=WORKER, task_id='A', status=FAILED)
        for t in range(100):
            self.setTime(t)
            self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], None)
            self.sch.ping(worker=WORKER)
            if t % 10 == 0:
                self.sch.prune()

        self.setTime(101)
        self.sch.prune()
        self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], 'A')

    def test_disconnect_running(self):
        # X and Y wants to run A.
        # X starts but does not report back. Y does.
        # After some timeout, Y will build it instead
        self.setTime(0)
        self.sch.add_task(task_id='A', worker='X')
        self.sch.add_task(task_id='A', worker='Y')
        self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A')
        for t in range(200):
            self.setTime(t)
            self.sch.ping(worker='Y')
            if t % 10 == 0:
                self.sch.prune()

        self.assertEqual(self.sch.get_work(worker='Y')['task_id'], 'A')

    def test_remove_dep(self):
        # X schedules A -> B, A is broken
        # Y schedules C -> B: this should remove A as a dep of B
        self.sch.add_task(task_id='A', worker='X', runnable=False)
        self.sch.add_task(task_id='B', deps=('A',), worker='X')

        # X can't build anything
        self.assertEqual(self.sch.get_work(worker='X')['task_id'], None)

        self.sch.add_task(task_id='B', deps=('C',), worker='Y')  # should reset dependencies for A
        self.sch.add_task(task_id='C', worker='Y', status=DONE)

        self.assertEqual(self.sch.get_work(worker='Y')['task_id'], 'B')

    def test_timeout(self):
        # A bug that was earlier present when restarting the same flow
        self.setTime(0)
        self.sch.add_task(task_id='A', worker='X')
        self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A')
        self.setTime(10000)
        self.sch.add_task(task_id='A', worker='Y')  # Will timeout X but not schedule A for removal
        for i in range(2000):
            self.setTime(10000 + i)
            self.sch.ping(worker='Y')
        self.sch.add_task(task_id='A', status=DONE, worker='Y')  # This used to raise an exception since A was removed

    def test_disallowed_state_changes(self):
        # Test that we can not schedule an already running task
        t = 'A'
        self.sch.add_task(task_id=t, worker='X')
        self.assertEqual(self.sch.get_work(worker='X')['task_id'], t)
        self.sch.add_task(task_id=t, worker='Y')
        self.assertEqual(self.sch.get_work(worker='Y')['task_id'], None)

    def test_two_worker_info(self):
        # Make sure the scheduler returns info that some other worker is running task A
        self.sch.add_task(worker='X', task_id='A')
        self.sch.add_task(worker='Y', task_id='A')

        self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A')
        r = self.sch.get_work(worker='Y')
        self.assertEqual(r['task_id'], None)  # Worker Y is pending on A to be done
        s = r['running_tasks'][0]
        self.assertEqual(s['task_id'], 'A')
        self.assertEqual(s['worker'], 'X')

    def test_assistant_get_work(self):
        self.sch.add_task(worker='X', task_id='A')
        self.sch.add_worker('Y', [])

        self.assertEqual(self.sch.get_work(worker='Y', assistant=True)['task_id'], 'A')

        # check that the scheduler recognizes tasks as running
        running_tasks = self.sch.task_list('RUNNING', '')
        self.assertEqual(len(running_tasks), 1)
        self.assertEqual(list(running_tasks.keys()), ['A'])
        self.assertEqual(running_tasks['A']['worker_running'], 'Y')

    def test_assistant_get_work_external_task(self):
        self.sch.add_task(worker='X', task_id='A', runnable=False)
        self.assertTrue(self.sch.get_work(worker='Y', assistant=True)['task_id'] is None)

    def test_task_fails_when_assistant_dies(self):
        self.setTime(0)
        self.sch.add_task(worker='X', task_id='A')
        self.sch.add_worker('Y', [])

        self.assertEqual(self.sch.get_work(worker='Y', assistant=True)['task_id'], 'A')
        self.assertEqual(list(self.sch.task_list('RUNNING', '').keys()), ['A'])

        # Y dies for 50 seconds, X stays alive
        self.setTime(50)
        self.sch.ping(worker='X')
        self.assertEqual(list(self.sch.task_list('FAILED', '').keys()), ['A'])

    def test_prune_with_live_assistant(self):
        self.setTime(0)
        self.sch.add_task(worker='X', task_id='A')
        self.sch.get_work(worker='Y', assistant=True)
        self.sch.add_task(worker='Y', task_id='A', status=DONE, assistant=True)

        # worker X stops communicating, A should be marked for removal
        self.setTime(600)
        self.sch.ping(worker='Y')
        self.sch.prune()

        # A will now be pruned
        self.setTime(2000)
        self.sch.prune()
        self.assertFalse(list(self.sch.task_list('', '')))

    def test_fail_job_from_dead_worker_with_live_assistant(self):
        self.setTime(0)
        self.sch.add_task(worker='X', task_id='A')
        self.assertEqual('A', self.sch.get_work(worker='X')['task_id'])
        self.sch.add_worker('Y', [('assistant', True)])

        self.setTime(600)
        self.sch.ping(worker='Y')
        self.sch.prune()

        self.assertEqual(['A'], list(self.sch.task_list('FAILED', '').keys()))

    def test_assistant_request_runnable_task(self):
        self.setTime(0)
        self.sch.add_task(worker='X', task_id='A', runnable=True)
        self.setTime(600)
        self.sch.prune()

        self.assertEqual('A', self.sch.get_work(worker='Y', assistant=True)['task_id'])

    def test_assistant_request_external_task(self):
        self.sch.add_task(worker='X', task_id='A', runnable=False)
        self.assertIsNone(self.sch.get_work(worker='Y', assistant=True)['task_id'])

    def test_prune_done_tasks(self, expected=None):
        self.setTime(0)
        self.sch.add_task(worker=WORKER, task_id='A', status=DONE)
        self.sch.add_task(worker=WORKER, task_id='B', deps=['A'], status=DONE)
        self.sch.add_task(worker=WORKER, task_id='C', deps=['B'])

        self.setTime(600)
        self.sch.ping(worker='ASSISTANT')
        self.sch.prune()
        self.setTime(2000)
        self.sch.ping(worker='ASSISTANT')
        self.sch.prune()

        self.assertEqual(set(expected or ()), set(self.sch.task_list('', '').keys()))

    def test_keep_tasks_for_assistant(self):
        self.sch.get_work(worker='ASSISTANT', assistant=True)  # tell the scheduler this is an assistant
        self.test_prune_done_tasks(['B', 'C'])

    def test_keep_scheduler_disabled_tasks_for_assistant(self):
        self.sch.get_work(worker='ASSISTANT', assistant=True)  # tell the scheduler this is an assistant

        # create a scheduler disabled task and a worker disabled task
        for i in range(10):
            self.sch.add_task(worker=WORKER, task_id='D', status=FAILED)
        self.sch.add_task(worker=WORKER, task_id='E', status=DISABLED)

        # scheduler prunes the worker disabled task
        self.assertEqual(set(['D', 'E']), set(self.sch.task_list(DISABLED, '')))
        self.test_prune_done_tasks(['B', 'C', 'D'])

    def test_keep_failed_tasks_for_assistant(self):
        self.sch.get_work(worker='ASSISTANT', assistant=True)  # tell the scheduler this is an assistant
        self.sch.add_task(worker=WORKER, task_id='D', status=FAILED, deps='A')
        self.test_prune_done_tasks(['A', 'B', 'C', 'D'])

    def test_scheduler_resources_none_allow_one(self):
        self.sch.add_task(worker='X', task_id='A', resources={'R1': 1})
        self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A')

    def test_scheduler_resources_none_disallow_two(self):
        self.sch.add_task(worker='X', task_id='A', resources={'R1': 2})
        self.assertFalse(self.sch.get_work(worker='X')['task_id'], 'A')

    def test_scheduler_with_insufficient_resources(self):
        self.sch.add_task(worker='X', task_id='A', resources={'R1': 3})
        self.sch.update_resources(R1=2)
        self.assertFalse(self.sch.get_work(worker='X')['task_id'])

    def test_scheduler_with_sufficient_resources(self):
        self.sch.add_task(worker='X', task_id='A', resources={'R1': 3})
        self.sch.update_resources(R1=3)
        self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A')

    def test_scheduler_with_resources_used(self):
        self.sch.add_task(worker='X', task_id='A', resources={'R1': 1})
        self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A')

        self.sch.add_task(worker='Y', task_id='B', resources={'R1': 1})
        self.sch.update_resources(R1=1)
        self.assertFalse(self.sch.get_work(worker='Y')['task_id'])

    def test_scheduler_overprovisioned_on_other_resource(self):
        self.sch.add_task(worker='X', task_id='A', resources={'R1': 2})
        self.sch.update_resources(R1=2)
        self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A')

        self.sch.add_task(worker='Y', task_id='B', resources={'R2': 2})
        self.sch.update_resources(R1=1, R2=2)
        self.assertEqual(self.sch.get_work(worker='Y')['task_id'], 'B')

    def test_scheduler_with_priority_and_competing_resources(self):
        self.sch.add_task(worker='X', task_id='A')
        self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A')

        self.sch.add_task(worker='X', task_id='B', resources={'R': 1}, priority=10)
        self.sch.add_task(worker='Y', task_id='C', resources={'R': 1}, priority=1)
        self.sch.update_resources(R=1)
        self.assertFalse(self.sch.get_work(worker='Y')['task_id'])

        self.sch.add_task(worker='Y', task_id='D', priority=0)
        self.assertEqual(self.sch.get_work(worker='Y')['task_id'], 'D')

    def test_do_not_lock_resources_when_not_ready(self):
        """ Test to make sure that resources won't go unused waiting on workers """
        self.sch.add_task(worker='X', task_id='A', priority=10)
        self.sch.add_task(worker='X', task_id='B', resources={'R': 1}, priority=5)
        self.sch.add_task(worker='Y', task_id='C', resources={'R': 1}, priority=1)

        self.sch.update_resources(R=1)
        self.sch.add_worker('X', [('workers', 1)])
        self.assertEqual('C', self.sch.get_work(worker='Y')['task_id'])

    def test_lock_resources_when_one_of_multiple_workers_is_ready(self):
        self.sch.get_work(worker='X')  # indicate to the scheduler that X is active
        self.sch.add_task(worker='X', task_id='A', priority=10)
        self.sch.add_task(worker='X', task_id='B', resources={'R': 1}, priority=5)
        self.sch.add_task(worker='Y', task_id='C', resources={'R': 1}, priority=1)

        self.sch.update_resources(R=1)
        self.sch.add_worker('X', [('workers', 2)])
        self.sch.add_worker('Y', [])
        self.assertFalse(self.sch.get_work(worker='Y')['task_id'])

    def test_do_not_lock_resources_while_running_higher_priority(self):
        """ Test to make sure that resources won't go unused waiting on workers """
        self.sch.add_task(worker='X', task_id='A', priority=10)
        self.sch.add_task(worker='X', task_id='B', resources={'R': 1}, priority=5)
        self.sch.add_task(worker='Y', task_id='C', resources={'R': 1}, priority=1)

        self.sch.update_resources(R=1)
        self.sch.add_worker('X', [('workers', 1)])
        self.assertEqual('A', self.sch.get_work(worker='X')['task_id'])
        self.assertEqual('C', self.sch.get_work(worker='Y')['task_id'])

    def test_lock_resources_while_running_lower_priority(self):
        """ Make sure resources will be made available while working on lower priority tasks """
        self.sch.add_task(worker='X', task_id='A', priority=4)
        self.assertEqual('A', self.sch.get_work(worker='X')['task_id'])
        self.sch.add_task(worker='X', task_id='B', resources={'R': 1}, priority=5)
        self.sch.add_task(worker='Y', task_id='C', resources={'R': 1}, priority=1)

        self.sch.update_resources(R=1)
        self.sch.add_worker('X', [('workers', 1)])
        self.assertFalse(self.sch.get_work(worker='Y')['task_id'])

    def test_lock_resources_for_second_worker(self):
        self.sch.get_work(worker='Y')  # indicate to the scheduler that Y is active
        self.sch.add_task(worker='X', task_id='A', resources={'R': 1})
        self.sch.add_task(worker='X', task_id='B', resources={'R': 1})
        self.sch.add_task(worker='Y', task_id='C', resources={'R': 1}, priority=10)

        self.sch.add_worker('X', {'workers': 2})
        self.sch.add_worker('Y', {'workers': 1})
        self.sch.update_resources(R=2)

        self.assertEqual('A', self.sch.get_work(worker='X')['task_id'])
        self.assertFalse(self.sch.get_work(worker='X')['task_id'])

    def test_can_work_on_lower_priority_while_waiting_for_resources(self):
        self.sch.add_task(worker='X', task_id='A', resources={'R': 1}, priority=0)
        self.assertEqual('A', self.sch.get_work(worker='X')['task_id'])

        self.sch.add_task(worker='Y', task_id='B', resources={'R': 1}, priority=10)
        self.sch.add_task(worker='Y', task_id='C', priority=0)
        self.sch.update_resources(R=1)

        self.assertEqual('C', self.sch.get_work(worker='Y')['task_id'])

    def test_priority_update_with_pruning(self):
        self.setTime(0)
        self.sch.add_task(task_id='A', worker='X')

        self.setTime(50)  # after worker disconnects
        self.sch.prune()
        self.sch.add_task(task_id='B', deps=['A'], worker='X')

        self.setTime(2000)  # after remove for task A
        self.sch.prune()

        # Here task A that B depends on is missing
        self.sch.add_task(worker=WORKER, task_id='C', deps=['B'], priority=100)
        self.sch.add_task(worker=WORKER, task_id='B', deps=['A'])
        self.sch.add_task(worker=WORKER, task_id='A')
        self.sch.add_task(worker=WORKER, task_id='D', priority=10)

        self.check_task_order('ABCD')

    def test_update_resources(self):
        self.sch.add_task(worker=WORKER, task_id='A', deps=['B'])
        self.sch.add_task(worker=WORKER, task_id='B', resources={'r': 2})
        self.sch.update_resources(r=1)

        # B requires too many resources, we can't schedule
        self.check_task_order([])

        self.sch.add_task(worker=WORKER, task_id='B', resources={'r': 1})

        # now we have enough resources
        self.check_task_order(['B', 'A'])

    def test_hendle_multiple_resources(self):
        self.sch.add_task(worker=WORKER, task_id='A', resources={'r1': 1, 'r2': 1})
        self.sch.add_task(worker=WORKER, task_id='B', resources={'r1': 1, 'r2': 1})
        self.sch.add_task(worker=WORKER, task_id='C', resources={'r1': 1})
        self.sch.update_resources(r1=2, r2=1)

        self.assertEqual('A', self.sch.get_work(worker=WORKER)['task_id'])
        self.check_task_order('C')

    def test_single_resource_lock(self):
        self.sch.add_task(worker='X', task_id='A', resources={'r': 1})
        self.assertEqual('A', self.sch.get_work(worker='X')['task_id'])

        self.sch.add_task(worker=WORKER, task_id='B', resources={'r': 2}, priority=10)
        self.sch.add_task(worker=WORKER, task_id='C', resources={'r': 1})
        self.sch.update_resources(r=2)

        # Should wait for 2 units of r to be available for B before scheduling C
        self.check_task_order([])

    def test_no_lock_if_too_many_resources_required(self):
        self.sch.add_task(worker=WORKER, task_id='A', resources={'r': 2}, priority=10)
        self.sch.add_task(worker=WORKER, task_id='B', resources={'r': 1})
        self.sch.update_resources(r=1)
        self.check_task_order('B')

    def test_multiple_resources_lock(self):
        self.sch.get_work(worker='X')  # indicate to the scheduler that X is active
        self.sch.add_task(worker='X', task_id='A', resources={'r1': 1, 'r2': 1}, priority=10)
        self.sch.add_task(worker=WORKER, task_id='B', resources={'r2': 1})
        self.sch.add_task(worker=WORKER, task_id='C', resources={'r1': 1})
        self.sch.update_resources(r1=1, r2=1)

        # should preserve both resources for worker 'X'
        self.check_task_order([])

    def test_multiple_resources_no_lock(self):
        self.sch.add_task(worker=WORKER, task_id='A', resources={'r1': 1}, priority=10)
        self.sch.add_task(worker=WORKER, task_id='B', resources={'r1': 1, 'r2': 1}, priority=10)
        self.sch.add_task(worker=WORKER, task_id='C', resources={'r2': 1})
        self.sch.update_resources(r1=1, r2=2)

        self.assertEqual('A', self.sch.get_work(worker=WORKER)['task_id'])
        # C doesn't block B, so it can go first
        self.check_task_order('C')

    def test_allow_resource_use_while_scheduling(self):
        self.sch.update_resources(r1=1)
        self.sch.add_task(worker='SCHEDULING', task_id='A', resources={'r1': 1}, priority=10)
        self.sch.add_task(worker=WORKER, task_id='B', resources={'r1': 1}, priority=1)
        self.assertEqual('B', self.sch.get_work(worker=WORKER)['task_id'])

    def test_stop_locking_resource_for_uninterested_worker(self):
        self.setTime(0)
        self.sch.update_resources(r1=1)
        self.assertIsNone(self.sch.get_work(worker=WORKER)['task_id'])
        self.sch.add_task(worker=WORKER, task_id='A', resources={'r1': 1}, priority=10)
        self.sch.add_task(worker='LOW_PRIO', task_id='B', resources={'r1': 1}, priority=1)
        self.assertIsNone(self.sch.get_work(worker='LOW_PRIO')['task_id'])

        self.setTime(120)
        self.assertEqual('B', self.sch.get_work(worker='LOW_PRIO')['task_id'])

    def check_task_order(self, order):
        for expected_id in order:
            self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], expected_id)
            self.sch.add_task(worker=WORKER, task_id=expected_id, status=DONE)
        self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], None)

    def test_priorities(self):
        self.sch.add_task(worker=WORKER, task_id='A', priority=10)
        self.sch.add_task(worker=WORKER, task_id='B', priority=5)
        self.sch.add_task(worker=WORKER, task_id='C', priority=15)
        self.sch.add_task(worker=WORKER, task_id='D', priority=9)
        self.check_task_order(['C', 'A', 'D', 'B'])

    def test_priorities_default_and_negative(self):
        self.sch.add_task(worker=WORKER, task_id='A', priority=10)
        self.sch.add_task(worker=WORKER, task_id='B')
        self.sch.add_task(worker=WORKER, task_id='C', priority=15)
        self.sch.add_task(worker=WORKER, task_id='D', priority=-20)
        self.sch.add_task(worker=WORKER, task_id='E', priority=1)
        self.check_task_order(['C', 'A', 'E', 'B', 'D'])

    def test_priorities_and_dependencies(self):
        self.sch.add_task(worker=WORKER, task_id='A', deps=['Z'], priority=10)
        self.sch.add_task(worker=WORKER, task_id='B', priority=5)
        self.sch.add_task(worker=WORKER, task_id='C', deps=['Z'], priority=3)
        self.sch.add_task(worker=WORKER, task_id='D', priority=2)
        self.sch.add_task(worker=WORKER, task_id='Z', priority=1)
        self.check_task_order(['Z', 'A', 'B', 'C', 'D'])

    def test_priority_update_dependency_after_scheduling(self):
        self.sch.add_task(worker=WORKER, task_id='A', priority=1)
        self.sch.add_task(worker=WORKER, task_id='B', priority=5, deps=['A'])
        self.sch.add_task(worker=WORKER, task_id='C', priority=10, deps=['B'])
        self.sch.add_task(worker=WORKER, task_id='D', priority=6)
        self.check_task_order(['A', 'B', 'C', 'D'])

    def test_disable(self):
        self.sch.add_task(worker=WORKER, task_id='A')
        self.sch.add_task(worker=WORKER, task_id='A', status=FAILED)
        self.sch.add_task(worker=WORKER, task_id='A', status=FAILED)
        self.sch.add_task(worker=WORKER, task_id='A', status=FAILED)

        # should be disabled at this point
        self.assertEqual(len(self.sch.task_list('DISABLED', '')), 1)
        self.assertEqual(len(self.sch.task_list('FAILED', '')), 0)
        self.sch.add_task(worker=WORKER, task_id='A')
        self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], None)

    def test_disable_and_reenable(self):
        self.sch.add_task(worker=WORKER, task_id='A')
        self.sch.add_task(worker=WORKER, task_id='A', status=FAILED)
        self.sch.add_task(worker=WORKER, task_id='A', status=FAILED)
        self.sch.add_task(worker=WORKER, task_id='A', status=FAILED)

        # should be disabled at this point
        self.assertEqual(len(self.sch.task_list('DISABLED', '')), 1)
        self.assertEqual(len(self.sch.task_list('FAILED', '')), 0)

        self.sch.re_enable_task('A')

        # should be enabled at this point
        self.assertEqual(len(self.sch.task_list('DISABLED', '')), 0)
        self.assertEqual(len(self.sch.task_list('FAILED', '')), 1)
        self.sch.add_task(worker=WORKER, task_id='A')
        self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], 'A')

    def test_disable_and_reenable_and_disable_again(self):
        self.sch.add_task(worker=WORKER, task_id='A')
        self.sch.add_task(worker=WORKER, task_id='A', status=FAILED)
        self.sch.add_task(worker=WORKER, task_id='A', status=FAILED)
        self.sch.add_task(worker=WORKER, task_id='A', status=FAILED)

        # should be disabled at this point
        self.assertEqual(len(self.sch.task_list('DISABLED', '')), 1)
        self.assertEqual(len(self.sch.task_list('FAILED', '')), 0)

        self.sch.re_enable_task('A')

        # should be enabled at this point
        self.assertEqual(len(self.sch.task_list('DISABLED', '')), 0)
        self.assertEqual(len(self.sch.task_list('FAILED', '')), 1)
        self.sch.add_task(worker=WORKER, task_id='A')
        self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], 'A')

        self.sch.add_task(worker=WORKER, task_id='A', status=FAILED)

        # should be still enabled
        self.assertEqual(len(self.sch.task_list('DISABLED', '')), 0)
        self.assertEqual(len(self.sch.task_list('FAILED', '')), 1)
        self.sch.add_task(worker=WORKER, task_id='A')
        self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], 'A')

        self.sch.add_task(worker=WORKER, task_id='A', status=FAILED)
        self.sch.add_task(worker=WORKER, task_id='A', status=FAILED)

        # should be disabled now
        self.assertEqual(len(self.sch.task_list('DISABLED', '')), 1)
        self.assertEqual(len(self.sch.task_list('FAILED', '')), 0)
        self.sch.add_task(worker=WORKER, task_id='A')
        self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], None)

    def test_disable_and_done(self):
        self.sch.add_task(worker=WORKER, task_id='A')
        self.sch.add_task(worker=WORKER, task_id='A', status=FAILED)
        self.sch.add_task(worker=WORKER, task_id='A', status=FAILED)
        self.sch.add_task(worker=WORKER, task_id='A', status=FAILED)

        # should be disabled at this point
        self.assertEqual(len(self.sch.task_list('DISABLED', '')), 1)
        self.assertEqual(len(self.sch.task_list('FAILED', '')), 0)

        self.sch.add_task(worker=WORKER, task_id='A', status=DONE)

        # should be enabled at this point
        self.assertEqual(len(self.sch.task_list('DISABLED', '')), 0)
        self.assertEqual(len(self.sch.task_list('DONE', '')), 1)
        self.sch.add_task(worker=WORKER, task_id='A')
        self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], 'A')

    def test_disable_by_worker(self):
        self.sch.add_task(worker=WORKER, task_id='A', status=DISABLED)
        self.assertEqual(len(self.sch.task_list('DISABLED', '')), 1)

        self.sch.add_task(worker=WORKER, task_id='A')

        # should be enabled at this point
        self.assertEqual(len(self.sch.task_list('DISABLED', '')), 0)
        self.sch.add_task(worker=WORKER, task_id='A')
        self.assertEqual(self.sch.get_work(worker=WORKER)['task_id'], 'A')

    def test_task_list_beyond_limit(self):
        sch = CentralPlannerScheduler(max_shown_tasks=3)
        for c in 'ABCD':
            sch.add_task(worker=WORKER, task_id=c)
        self.assertEqual(set('ABCD'), set(sch.task_list('PENDING', '', False).keys()))
        self.assertEqual({'num_tasks': 4}, sch.task_list('PENDING', ''))

    def test_task_list_within_limit(self):
        sch = CentralPlannerScheduler(max_shown_tasks=4)
        for c in 'ABCD':
            sch.add_task(worker=WORKER, task_id=c)
        self.assertEqual(set('ABCD'), set(sch.task_list('PENDING', '').keys()))

    def test_task_lists_some_beyond_limit(self):
        sch = CentralPlannerScheduler(max_shown_tasks=3)
        for c in 'ABCD':
            sch.add_task(worker=WORKER, task_id=c, status=DONE)
        for c in 'EFG':
            sch.add_task(worker=WORKER, task_id=c)
        self.assertEqual(set('EFG'), set(sch.task_list('PENDING', '').keys()))
        self.assertEqual({'num_tasks': 4}, sch.task_list('DONE', ''))

    def test_task_list_filter_by_search(self):
        self.sch.add_task(worker=WORKER, task_id='test_match_task')
        self.sch.add_task(worker=WORKER, task_id='test_filter_task')
        matches = self.sch.task_list('PENDING', '', search='match')
        self.assertEqual(['test_match_task'], list(matches.keys()))

    def test_task_list_filter_by_multiple_search_terms(self):
        self.sch.add_task(worker=WORKER, task_id='abcd')
        self.sch.add_task(worker=WORKER, task_id='abd')
        self.sch.add_task(worker=WORKER, task_id='acd')
        self.sch.add_task(worker=WORKER, task_id='ad')
        self.sch.add_task(worker=WORKER, task_id='bc')
        matches = self.sch.task_list('PENDING', '', search='b c')
        self.assertEqual(set(['abcd', 'bc']), set(matches.keys()))

    def test_search_results_beyond_limit(self):
        sch = CentralPlannerScheduler(max_shown_tasks=3)
        sch.add_task(worker=WORKER, task_id='task_a')
        sch.add_task(worker=WORKER, task_id='task_b')
        sch.add_task(worker=WORKER, task_id='task_c')
        sch.add_task(worker=WORKER, task_id='task_d')
        self.assertEqual({'num_tasks': 4}, sch.task_list('PENDING', '', search='a'))
        self.assertEqual(['task_a'], list(sch.task_list('PENDING', '', search='_a').keys()))

    def test_priority_update_dependency_chain(self):
        self.sch.add_task(worker=WORKER, task_id='A', priority=10, deps=['B'])
        self.sch.add_task(worker=WORKER, task_id='B', priority=5, deps=['C'])
        self.sch.add_task(worker=WORKER, task_id='C', priority=1)
        self.sch.add_task(worker=WORKER, task_id='D', priority=6)
        self.check_task_order(['C', 'B', 'A', 'D'])

    def test_priority_no_decrease_with_multiple_updates(self):
        self.sch.add_task(worker=WORKER, task_id='A', priority=1)
        self.sch.add_task(worker=WORKER, task_id='B', priority=10, deps=['A'])
        self.sch.add_task(worker=WORKER, task_id='C', priority=5, deps=['A'])
        self.sch.add_task(worker=WORKER, task_id='D', priority=6)
        self.check_task_order(['A', 'B', 'D', 'C'])

    def test_unique_tasks(self):
        self.sch.add_task(worker=WORKER, task_id='A')
        self.sch.add_task(worker=WORKER, task_id='B')
        self.sch.add_task(worker=WORKER, task_id='C')
        self.sch.add_task(worker=WORKER + "_2", task_id='B')

        response = self.sch.get_work(worker=WORKER)
        self.assertEqual(3, response['n_pending_tasks'])
        self.assertEqual(2, response['n_unique_pending'])

    def test_pending_downstream_disable(self):
        self.sch.add_task(worker=WORKER, task_id='A', status=DISABLED)
        self.sch.add_task(worker=WORKER, task_id='B', deps=('A',))
        self.sch.add_task(worker=WORKER, task_id='C', deps=('B',))

        response = self.sch.get_work(worker=WORKER)
        self.assertTrue(response['task_id'] is None)
        self.assertEqual(0, response['n_pending_tasks'])
        self.assertEqual(0, response['n_unique_pending'])

    def test_pending_downstream_failure(self):
        self.sch.add_task(worker=WORKER, task_id='A', status=FAILED)
        self.sch.add_task(worker=WORKER, task_id='B', deps=('A',))
        self.sch.add_task(worker=WORKER, task_id='C', deps=('B',))

        response = self.sch.get_work(worker=WORKER)
        self.assertTrue(response['task_id'] is None)
        self.assertEqual(2, response['n_pending_tasks'])
        self.assertEqual(2, response['n_unique_pending'])

    def test_task_list_no_deps(self):
        self.sch.add_task(worker=WORKER, task_id='B', deps=('A',))
        self.sch.add_task(worker=WORKER, task_id='A')
        task_list = self.sch.task_list('PENDING', '')
        self.assertFalse('deps' in task_list['A'])

    def test_task_first_failure_time(self):
        self.sch.add_task(worker=WORKER, task_id='A')
        test_task = self.sch._state.get_task('A')
        self.assertIsNone(test_task.failures.first_failure_time)

        time_before_failure = time.time()
        test_task.add_failure()
        time_after_failure = time.time()

        self.assertLessEqual(time_before_failure,
                             test_task.failures.first_failure_time)
        self.assertGreaterEqual(time_after_failure,
                                test_task.failures.first_failure_time)

    def test_task_first_failure_time_remains_constant(self):
        self.sch.add_task(worker=WORKER, task_id='A')
        test_task = self.sch._state.get_task('A')
        self.assertIsNone(test_task.failures.first_failure_time)

        test_task.add_failure()
        first_failure_time = test_task.failures.first_failure_time

        test_task.add_failure()
        self.assertEqual(first_failure_time, test_task.failures.first_failure_time)

    def test_task_has_excessive_failures(self):
        self.sch.add_task(worker=WORKER, task_id='A')
        test_task = self.sch._state.get_task('A')
        self.assertIsNone(test_task.failures.first_failure_time)

        self.assertFalse(test_task.has_excessive_failures())

        test_task.add_failure()
        self.assertFalse(test_task.has_excessive_failures())

        fake_failure_time = (test_task.failures.first_failure_time -
                             2 * 60 * 60)

        test_task.failures.first_failure_time = fake_failure_time
        self.assertTrue(test_task.has_excessive_failures())

    def test_quadratic_behavior(self):
        """ Test that get_work is not taking linear amount of time.

        This is of course impossible to test, however, doing reasonable
        assumptions about hardware. This time should finish in a timely
        manner.
        """
        # For 10000 it takes almost 1 second on my laptop.  Prior to these
        # changes it was being slow already at NUM_TASKS=300
        NUM_TASKS = 10000
        for i in range(NUM_TASKS):
            self.sch.add_task(worker=str(i), task_id=str(i), resources={})

        for i in range(NUM_TASKS):
            self.assertEqual(self.sch.get_work(worker=str(i))['task_id'], str(i))
            self.sch.add_task(worker=str(i), task_id=str(i), status=DONE)

    def test_get_work_speed(self):
        """ Test that get_work is fast for few workers and many DONEs.

        In #986, @daveFNbuck reported that he got a slowdown.
        """
        # This took almost 4 minutes without optimization.
        # Now it takes 10 seconds on my machine.
        NUM_PENDING = 1000
        NUM_DONE = 200000
        assert NUM_DONE >= NUM_PENDING
        for i in range(NUM_PENDING):
            self.sch.add_task(worker=WORKER, task_id=str(i), resources={})

        for i in range(NUM_PENDING, NUM_DONE):
            self.sch.add_task(worker=WORKER, task_id=str(i), status=DONE)

        for i in range(NUM_PENDING):
            res = int(self.sch.get_work(worker=WORKER)['task_id'])
            self.assertTrue(0 <= res < NUM_PENDING)
            self.sch.add_task(worker=WORKER, task_id=str(res), status=DONE)
Example #50
0
 def test_search_results_beyond_limit(self):
     sch = CentralPlannerScheduler(max_shown_tasks=3)
     for i in range(4):
         sch.add_task(worker=WORKER, family='Test', params={'p': str(i)}, task_id='Test_%i' % i)
     self.assertEqual({'num_tasks': 4}, sch.task_list('PENDING', '', search='Test'))
     self.assertEqual(['Test_0'], list(sch.task_list('PENDING', '', search='0').keys()))
 def setUp(self):
     super(CentralPlannerTest, self).setUp()
     conf = self.get_scheduler_config()
     self.sch = CentralPlannerScheduler(**conf)
     self.time = time.time
Example #52
0
 def setUp(self):
     self.sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10)
     self.time = time.time
 def test_task_list_within_limit(self):
     sch = CentralPlannerScheduler(max_shown_tasks=4)
     for c in 'ABCD':
         sch.add_task(worker=WORKER, task_id=c)
     self.assertEqual(set('ABCD'), set(sch.task_list('PENDING', '').keys()))
Example #54
0
class WorkerTest(unittest.TestCase):

    def run(self, result=None):
        self.sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10)
        self.time = time.time
        with Worker(scheduler=self.sch, worker_id='X') as w, Worker(scheduler=self.sch, worker_id='Y') as w2:
            self.w = w
            self.w2 = w2
            super(WorkerTest, self).run(result)

        if time.time != self.time:
            time.time = self.time

    def setTime(self, t):
        time.time = lambda: t

    def test_dep(self):
        class A(Task):

            def run(self):
                self.has_run = True

            def complete(self):
                return self.has_run
        a = A()

        class B(Task):

            def requires(self):
                return a

            def run(self):
                self.has_run = True

            def complete(self):
                return self.has_run

        b = B()
        a.has_run = False
        b.has_run = False

        self.assertTrue(self.w.add(b))
        self.assertTrue(self.w.run())
        self.assertTrue(a.has_run)
        self.assertTrue(b.has_run)

    def test_stop_getting_new_work(self):
        d = DummyTask()
        self.w.add(d)

        self.assertFalse(d.complete())
        try:
            self.w.handle_interrupt(signal.SIGUSR1, None)
        except AttributeError:
            raise unittest.SkipTest('signal.SIGUSR1 not found on this system')
        self.w.run()
        self.assertFalse(d.complete())

    def test_external_dep(self):
        class A(ExternalTask):

            def complete(self):
                return False
        a = A()

        class B(Task):

            def requires(self):
                return a

            def run(self):
                self.has_run = True

            def complete(self):
                return self.has_run

        b = B()

        a.has_run = False
        b.has_run = False

        self.assertTrue(self.w.add(b))
        self.assertTrue(self.w.run())

        self.assertFalse(a.has_run)
        self.assertFalse(b.has_run)

    def test_tracking_url(self):
        tracking_url = 'http://test_url.com/'

        class A(Task):
            has_run = False

            def complete(self):
                return self.has_run

            def run(self, tracking_url_callback=None):
                if tracking_url_callback is not None:
                    tracking_url_callback(tracking_url)
                self.has_run = True

        a = A()
        self.assertTrue(self.w.add(a))
        self.assertTrue(self.w.run())
        tasks = self.sch.task_list('DONE', '')
        self.assertEqual(1, len(tasks))
        self.assertEqual(tracking_url, tasks['A()']['tracking_url'])

    def test_type_error_in_tracking_run(self):
        class A(Task):
            num_runs = 0

            def complete(self):
                return False

            def run(self, tracking_url_callback=None):
                self.num_runs += 1
                raise TypeError('bad type')

        a = A()
        self.assertTrue(self.w.add(a))
        self.assertFalse(self.w.run())

        # Should only run and fail once, not retry because of the type error
        self.assertEqual(1, a.num_runs)

    def test_fail(self):
        class CustomException(BaseException):
            def __init__(self, msg):
                self.msg = msg

        class A(Task):

            def run(self):
                self.has_run = True
                raise CustomException('bad things')

            def complete(self):
                return self.has_run

        a = A()

        class B(Task):

            def requires(self):
                return a

            def run(self):
                self.has_run = True

            def complete(self):
                return self.has_run

        b = B()

        a.has_run = False
        b.has_run = False

        self.assertTrue(self.w.add(b))
        self.assertFalse(self.w.run())

        self.assertTrue(a.has_run)
        self.assertFalse(b.has_run)

    def test_unknown_dep(self):
        # see central_planner_test.CentralPlannerTest.test_remove_dep
        class A(ExternalTask):

            def complete(self):
                return False

        class C(Task):

            def complete(self):
                return True

        def get_b(dep):
            class B(Task):

                def requires(self):
                    return dep

                def run(self):
                    self.has_run = True

                def complete(self):
                    return False

            b = B()
            b.has_run = False
            return b

        b_a = get_b(A())
        b_c = get_b(C())

        self.assertTrue(self.w.add(b_a))
        # So now another worker goes in and schedules C -> B
        # This should remove the dep A -> B but will screw up the first worker
        self.assertTrue(self.w2.add(b_c))

        self.assertFalse(self.w.run())  # should not run anything - the worker should detect that A is broken
        self.assertFalse(b_a.has_run)
        # not sure what should happen??
        # self.w2.run() # should run B since C is fulfilled
        # self.assertTrue(b_c.has_run)

    def test_unfulfilled_dep(self):
        class A(Task):

            def complete(self):
                return self.done

            def run(self):
                self.done = True

        def get_b(a):
            class B(A):

                def requires(self):
                    return a
            b = B()
            b.done = False
            a.done = True
            return b

        a = A()
        b = get_b(a)

        self.assertTrue(self.w.add(b))
        a.done = False
        self.w.run()
        self.assertTrue(a.complete())
        self.assertTrue(b.complete())

    def test_gets_missed_work(self):
        class A(Task):
            done = False

            def complete(self):
                return self.done

            def run(self):
                self.done = True

        a = A()
        self.assertTrue(self.w.add(a))

        # simulate a missed get_work response
        self.assertEqual('A()', self.sch.get_work(worker='X')['task_id'])

        self.assertTrue(self.w.run())
        self.assertTrue(a.complete())

    def test_avoid_infinite_reschedule(self):
        class A(Task):

            def complete(self):
                return False

        class B(Task):

            def complete(self):
                return False

            def requires(self):
                return A()

        self.assertTrue(self.w.add(B()))
        self.assertFalse(self.w.run())

    def test_fails_registering_signal(self):
        with mock.patch('luigi.worker.signal', spec=['signal']):
            # mock will raise an attribute error getting signal.SIGUSR1
            Worker()

    def test_allow_reschedule_with_many_missing_deps(self):
        class A(Task):

            """ Task that must run twice to succeed """
            i = luigi.IntParameter()

            runs = 0

            def complete(self):
                return self.runs >= 2

            def run(self):
                self.runs += 1

        class B(Task):
            done = False

            def requires(self):
                return map(A, range(20))

            def complete(self):
                return self.done

            def run(self):
                self.done = True

        b = B()
        w = Worker(scheduler=self.sch, worker_id='X', max_reschedules=1)
        self.assertTrue(w.add(b))
        self.assertFalse(w.run())

        # For b to be done, we must have rescheduled its dependencies to run them twice
        self.assertTrue(b.complete())
        self.assertTrue(all(a.complete() for a in b.deps()))

    def test_interleaved_workers(self):
        class A(DummyTask):
            pass

        a = A()

        class B(DummyTask):

            def requires(self):
                return a

        class ExternalB(ExternalTask):
            task_family = "B"

            def complete(self):
                return False

        b = B()
        eb = ExternalB()
        self.assertEqual(eb.task_id, "B()")

        sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10)
        with Worker(scheduler=sch, worker_id='X') as w, Worker(scheduler=sch, worker_id='Y') as w2:
            self.assertTrue(w.add(b))
            self.assertTrue(w2.add(eb))
            logging.debug("RUNNING BROKEN WORKER")
            self.assertTrue(w2.run())
            self.assertFalse(a.complete())
            self.assertFalse(b.complete())
            logging.debug("RUNNING FUNCTIONAL WORKER")
            self.assertTrue(w.run())
            self.assertTrue(a.complete())
            self.assertTrue(b.complete())

    def test_interleaved_workers2(self):
        # two tasks without dependencies, one external, one not
        class B(DummyTask):
            pass

        class ExternalB(ExternalTask):
            task_family = "B"

            def complete(self):
                return False

        b = B()
        eb = ExternalB()

        self.assertEqual(eb.task_id, "B()")

        sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10)
        with Worker(scheduler=sch, worker_id='X') as w, Worker(scheduler=sch, worker_id='Y') as w2:
            self.assertTrue(w2.add(eb))
            self.assertTrue(w.add(b))

            self.assertTrue(w2.run())
            self.assertFalse(b.complete())
            self.assertTrue(w.run())
            self.assertTrue(b.complete())

    def test_interleaved_workers3(self):
        class A(DummyTask):

            def run(self):
                logging.debug('running A')
                time.sleep(0.1)
                super(A, self).run()

        a = A()

        class B(DummyTask):

            def requires(self):
                return a

            def run(self):
                logging.debug('running B')
                super(B, self).run()

        b = B()

        sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10)

        with Worker(scheduler=sch, worker_id='X', keep_alive=True, count_uniques=True) as w:
            with Worker(scheduler=sch, worker_id='Y', keep_alive=True, count_uniques=True, wait_interval=0.1) as w2:
                self.assertTrue(w.add(a))
                self.assertTrue(w2.add(b))

                threading.Thread(target=w.run).start()
                self.assertTrue(w2.run())

                self.assertTrue(a.complete())
                self.assertTrue(b.complete())

    def test_die_for_non_unique_pending(self):
        class A(DummyTask):

            def run(self):
                logging.debug('running A')
                time.sleep(0.1)
                super(A, self).run()

        a = A()

        class B(DummyTask):

            def requires(self):
                return a

            def run(self):
                logging.debug('running B')
                super(B, self).run()

        b = B()

        sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10)

        with Worker(scheduler=sch, worker_id='X', keep_alive=True, count_uniques=True) as w:
            with Worker(scheduler=sch, worker_id='Y', keep_alive=True, count_uniques=True, wait_interval=0.1) as w2:
                self.assertTrue(w.add(b))
                self.assertTrue(w2.add(b))

                self.assertEqual(w._get_work()[0], 'A()')
                self.assertTrue(w2.run())

                self.assertFalse(a.complete())
                self.assertFalse(b.complete())

    def test_complete_exception(self):
        "Tests that a task is still scheduled if its sister task crashes in the complete() method"
        class A(DummyTask):

            def complete(self):
                raise Exception("doh")

        a = A()

        class C(DummyTask):
            pass

        c = C()

        class B(DummyTask):

            def requires(self):
                return a, c

        b = B()
        sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10)
        with Worker(scheduler=sch, worker_id="foo") as w:
            self.assertFalse(w.add(b))
            self.assertTrue(w.run())
            self.assertFalse(b.has_run)
            self.assertTrue(c.has_run)
            self.assertFalse(a.has_run)

    def test_requires_exception(self):
        class A(DummyTask):

            def requires(self):
                raise Exception("doh")

        a = A()

        class D(DummyTask):
            pass

        d = D()

        class C(DummyTask):
            def requires(self):
                return d

        c = C()

        class B(DummyTask):

            def requires(self):
                return c, a

        b = B()
        sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10)
        with Worker(scheduler=sch, worker_id="foo") as w:
            self.assertFalse(w.add(b))
            self.assertTrue(w.run())
            self.assertFalse(b.has_run)
            self.assertTrue(c.has_run)
            self.assertTrue(d.has_run)
            self.assertFalse(a.has_run)
Example #55
0
 def setUp(self):
     # InstanceCache.disable()
     self.sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10)
     self.w = Worker(scheduler=self.sch, worker_id='X')
     self.w2 = Worker(scheduler=self.sch, worker_id='Y')
     self.time = time.time
Example #56
0
class WorkerExternalTaskTest(unittest.TestCase):

    def setUp(self):
        self.scheduler = CentralPlannerScheduler(retry_delay=0.01,
                                                 remove_delay=3,
                                                 worker_disconnect_delay=3,
                                                 disable_persist=3,
                                                 disable_window=5,
                                                 disable_failures=2)

    def _assert_complete(self, tasks):
        for t in tasks:
            self.assert_(t.complete())

    def _build(self, tasks):
        w = luigi.worker.Worker(scheduler=self.scheduler, worker_processes=1)
        for t in tasks:
            w.add(t)
        w.run()
        w.stop()

    def test_external_dependency_already_complete(self):
        """
        Test that the test task completes when its dependency exists at the
        start of the execution.
        """
        tempdir = tempfile.mkdtemp(prefix='luigi-test-')
        test_task = TestTask(tempdir=tempdir, complete_after=1)
        luigi.build([test_task], local_scheduler=True)

        assert os.path.exists(test_task.dep_path)
        assert os.path.exists(test_task.output_path)

        os.unlink(test_task.dep_path)
        os.unlink(test_task.output_path)
        os.rmdir(tempdir)

        # complete() is called once per failure, twice per success
        assert test_task.dependency.times_called == 2

    @with_config({'core': {'retry-external-tasks': 'true',
                           'disable-num-failures': '4',
                           'max-reschedules': '4',
                           'worker-keep-alive': 'true',
                           'retry-delay': '0.01'}})
    def test_external_dependency_completes_later(self):
        """
        Test that an external dependency that is not `complete` when luigi is invoked, but \
        becomes `complete` while the workflow is executing is re-evaluated and
        allows dependencies to run.
        """
        assert luigi.configuration.get_config().getboolean('core',
                                                           'retry-external-tasks',
                                                           False) is True

        original_get_work = self.scheduler.get_work

        def decorated_get_work(*args, **kwargs):
            # need to call `prune()` to make the scheduler run the retry logic
            self.scheduler.prune()
            return original_get_work(*args, **kwargs)

        self.scheduler.get_work = decorated_get_work

        tempdir = tempfile.mkdtemp(prefix='luigi-test-')

        with patch('random.randint', return_value=0.1):
            test_task = TestTask(tempdir=tempdir, complete_after=3)
            self._build([test_task])

        assert os.path.exists(test_task.dep_path)
        assert os.path.exists(test_task.output_path)

        os.unlink(test_task.dep_path)
        os.unlink(test_task.output_path)
        os.rmdir(tempdir)

        # complete() is called once per failure, twice per success
        assert test_task.dependency.times_called == 4
Example #57
0
 def setUp(self):
     self.sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10)
     self.w = Worker(scheduler=self.sch, worker_id='X')
     self.assistant = Worker(scheduler=self.sch, worker_id='Y', assistant=True)
Example #58
0
class CentralPlannerTest(unittest.TestCase):
    def setUp(self):
        self.sch = CentralPlannerScheduler(retry_delay=100,
                                           remove_delay=1000,
                                           worker_disconnect_delay=10,
                                           disable_persist=10,
                                           disable_window=10,
                                           disable_failures=3)
        self.time = time.time

    def tearDown(self):
        if time.time != self.time:
            time.time = self.time

    def setTime(self, t):
        time.time = lambda: t

    def test_dep(self):
        self.sch.add_task(WORKER, 'B', deps=('A',))
        self.sch.add_task(WORKER, 'A')
        self.assertEqual(self.sch.get_work(WORKER)['task_id'], 'A')
        self.sch.add_task(WORKER, 'A', status=DONE)
        self.assertEqual(self.sch.get_work(WORKER)['task_id'], 'B')
        self.sch.add_task(WORKER, 'B', status=DONE)
        self.assertEqual(self.sch.get_work(WORKER)['task_id'], None)

    def test_failed_dep(self):
        self.sch.add_task(WORKER, 'B', deps=('A',))
        self.sch.add_task(WORKER, 'A')

        self.assertEqual(self.sch.get_work(WORKER)['task_id'], 'A')
        self.sch.add_task(WORKER, 'A', status=FAILED)

        self.assertEqual(self.sch.get_work(WORKER)['task_id'], None)  # can still wait and retry: TODO: do we want this?
        self.sch.add_task(WORKER, 'A', DONE)
        self.assertEqual(self.sch.get_work(WORKER)['task_id'], 'B')
        self.sch.add_task(WORKER, 'B', DONE)
        self.assertEqual(self.sch.get_work(WORKER)['task_id'], None)

    def test_broken_dep(self):
        self.sch.add_task(WORKER, 'B', deps=('A',))
        self.sch.add_task(WORKER, 'A', runnable=False)

        self.assertEqual(self.sch.get_work(WORKER)['task_id'], None)  # can still wait and retry: TODO: do we want this?
        self.sch.add_task(WORKER, 'A', DONE)
        self.assertEqual(self.sch.get_work(WORKER)['task_id'], 'B')
        self.sch.add_task(WORKER, 'B', DONE)
        self.assertEqual(self.sch.get_work(WORKER)['task_id'], None)

    def test_two_workers(self):
        # Worker X wants to build A -> B
        # Worker Y wants to build A -> C
        self.sch.add_task(worker='X', task_id='A')
        self.sch.add_task(worker='Y', task_id='A')
        self.sch.add_task(task_id='B', deps=('A',), worker='X')
        self.sch.add_task(task_id='C', deps=('A',), worker='Y')

        self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A')
        self.assertEqual(self.sch.get_work(worker='Y')['task_id'], None)  # Worker Y is pending on A to be done
        self.sch.add_task(worker='X', task_id='A', status=DONE)
        self.assertEqual(self.sch.get_work(worker='Y')['task_id'], 'C')
        self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'B')

    def test_retry(self):
        # Try to build A but fails, will retry after 100s
        self.setTime(0)
        self.sch.add_task(WORKER, 'A')
        self.assertEqual(self.sch.get_work(WORKER)['task_id'], 'A')
        self.sch.add_task(WORKER, 'A', FAILED)
        for t in xrange(100):
            self.setTime(t)
            self.assertEqual(self.sch.get_work(WORKER)['task_id'], None)
            self.sch.ping(WORKER)
            if t % 10 == 0:
                self.sch.prune()

        self.setTime(101)
        self.sch.prune()
        self.assertEqual(self.sch.get_work(WORKER)['task_id'], 'A')

    def test_disconnect_running(self):
        # X and Y wants to run A.
        # X starts but does not report back. Y does.
        # After some timeout, Y will build it instead
        self.setTime(0)
        self.sch.add_task(task_id='A', worker='X')
        self.sch.add_task(task_id='A', worker='Y')
        self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A')
        for t in xrange(200):
            self.setTime(t)
            self.sch.ping(worker='Y')
            if t % 10 == 0:
                self.sch.prune()

        self.assertEqual(self.sch.get_work(worker='Y')['task_id'], 'A')

    def test_remove_dep(self):
        # X schedules A -> B, A is broken
        # Y schedules C -> B: this should remove A as a dep of B
        self.sch.add_task(task_id='A', worker='X', runnable=False)
        self.sch.add_task(task_id='B', deps=('A',), worker='X')

        # X can't build anything
        self.assertEqual(self.sch.get_work(worker='X')['task_id'], None)

        self.sch.add_task(task_id='B', deps=('C',), worker='Y')  # should reset dependencies for A
        self.sch.add_task(task_id='C', worker='Y', status=DONE)

        self.assertEqual(self.sch.get_work(worker='Y')['task_id'], 'B')

    def test_timeout(self):
        # A bug that was earlier present when restarting the same flow
        self.setTime(0)
        self.sch.add_task(task_id='A', worker='X')
        self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A')
        self.setTime(10000)
        self.sch.add_task(task_id='A', worker='Y')  # Will timeout X but not schedule A for removal
        for i in xrange(2000):
            self.setTime(10000 + i)
            self.sch.ping(worker='Y')
        self.sch.add_task(task_id='A', status=DONE, worker='Y')  # This used to raise an exception since A was removed

    def test_disallowed_state_changes(self):
        # Test that we can not schedule an already running task
        t = 'A'
        self.sch.add_task(task_id=t, worker='X')
        self.assertEqual(self.sch.get_work(worker='X')['task_id'], t)
        self.sch.add_task(task_id=t, worker='Y')
        self.assertEqual(self.sch.get_work(worker='Y')['task_id'], None)

    def test_two_worker_info(self):
        # Make sure the scheduler returns info that some other worker is running task A
        self.sch.add_task(worker='X', task_id='A')
        self.sch.add_task(worker='Y', task_id='A')

        self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A')
        r = self.sch.get_work(worker='Y')
        self.assertEqual(r['task_id'], None)  # Worker Y is pending on A to be done
        s = r['running_tasks'][0]
        self.assertEqual(s['task_id'], 'A')
        self.assertEqual(s['worker'], 'X')

    def test_scheduler_resources_none_allow_one(self):
        self.sch.add_task(worker='X', task_id='A', resources={'R1': 1})
        self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A')

    def test_scheduler_resources_none_disallow_two(self):
        self.sch.add_task(worker='X', task_id='A', resources={'R1': 2})
        self.assertFalse(self.sch.get_work(worker='X')['task_id'], 'A')

    def test_scheduler_with_insufficient_resources(self):
        self.sch.add_task(worker='X', task_id='A', resources={'R1': 3})
        self.sch.update_resources(R1=2)
        self.assertFalse(self.sch.get_work(worker='X')['task_id'])

    def test_scheduler_with_sufficient_resources(self):
        self.sch.add_task(worker='X', task_id='A', resources={'R1': 3})
        self.sch.update_resources(R1=3)
        self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A')

    def test_scheduler_with_resources_used(self):
        self.sch.add_task(worker='X', task_id='A', resources={'R1': 1})
        self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A')

        self.sch.add_task(worker='Y', task_id='B', resources={'R1': 1})
        self.sch.update_resources(R1=1)
        self.assertFalse(self.sch.get_work(worker='Y')['task_id'])

    def test_scheduler_overprovisioned_on_other_resource(self):
        self.sch.add_task(worker='X', task_id='A', resources={'R1': 2})
        self.sch.update_resources(R1=2)
        self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A')

        self.sch.add_task(worker='Y', task_id='B', resources={'R2': 2})
        self.sch.update_resources(R1=1, R2=2)
        self.assertEqual(self.sch.get_work(worker='Y')['task_id'], 'B')

    def test_scheduler_with_priority_and_competing_resources(self):
        self.sch.add_task(worker='X', task_id='A')
        self.assertEqual(self.sch.get_work(worker='X')['task_id'], 'A')

        self.sch.add_task(worker='X', task_id='B', resources={'R': 1}, priority=10)
        self.sch.add_task(worker='Y', task_id='C', resources={'R': 1}, priority=1)
        self.sch.update_resources(R=1)
        self.assertFalse(self.sch.get_work(worker='Y')['task_id'])

        self.sch.add_task(worker='Y', task_id='D', priority=0)
        self.assertEqual(self.sch.get_work(worker='Y')['task_id'], 'D')

    def test_priority_update_with_pruning(self):
        self.setTime(0)
        self.sch.add_task(task_id='A', worker='X')

        self.setTime(50)  # after worker disconnects
        self.sch.prune()
        self.sch.add_task(task_id='B', deps=['A'], worker='X')

        self.setTime(2000)  # after remove for task A
        self.sch.prune()

        # Here task A that B depends on is missing
        self.sch.add_task(WORKER, task_id='C', deps=['B'], priority=100)
        self.sch.add_task(WORKER, task_id='B', deps=['A'])
        self.sch.add_task(WORKER, task_id='A')
        self.sch.add_task(WORKER, task_id='D', priority=10)

        self.check_task_order('ABCD')

    def test_update_resources(self):
        self.sch.add_task(WORKER, task_id='A', deps=['B'])
        self.sch.add_task(WORKER, task_id='B', resources={'r': 2})
        self.sch.update_resources(r=1)

        # B requires too many resources, we can't schedule
        self.check_task_order([])

        self.sch.add_task(WORKER, task_id='B', resources={'r': 1})

        # now we have enough resources
        self.check_task_order(['B', 'A'])

    def test_hendle_multiple_resources(self):
        self.sch.add_task(WORKER, task_id='A', resources={'r1': 1, 'r2': 1})
        self.sch.add_task(WORKER, task_id='B', resources={'r1': 1, 'r2': 1})
        self.sch.add_task(WORKER, task_id='C', resources={'r1': 1})
        self.sch.update_resources(r1=2, r2=1)

        self.assertEqual('A', self.sch.get_work(WORKER)['task_id'])
        self.check_task_order('C')

    def test_single_resource_lock(self):
        self.sch.add_task(WORKER, task_id='A', resources={'r': 1}, priority=10)
        self.sch.add_task(WORKER, task_id='B', resources={'r': 2}, priority=10)
        self.sch.add_task(WORKER, task_id='C', resources={'r': 1})
        self.sch.update_resources(r=2)

        self.assertEqual('A', self.sch.get_work(WORKER)['task_id'])
        # Should wait for 2 units of r to be available for B before scheduling C
        self.check_task_order([])

    def test_no_lock_if_too_many_resources_required(self):
        self.sch.add_task(WORKER, task_id='A', resources={'r': 2}, priority=10)
        self.sch.add_task(WORKER, task_id='B', resources={'r': 1})
        self.sch.update_resources(r=1)
        self.check_task_order('B')

    def test_multiple_resources_lock(self):
        self.sch.add_task(WORKER, task_id='A', resources={'r1': 1}, priority=10)
        self.sch.add_task(WORKER, task_id='B', resources={'r1': 1, 'r2': 1}, priority=10)
        self.sch.add_task(WORKER, task_id='C', resources={'r2': 1})
        self.sch.update_resources(r1=1, r2=1)

        self.assertEqual('A', self.sch.get_work(WORKER)['task_id'])
        # Should wait for r1 to be available for B before using up r2
        self.check_task_order([])

    def test_multiple_resources_no_lock(self):
        self.sch.add_task(WORKER, task_id='A', resources={'r1': 1}, priority=10)
        self.sch.add_task(WORKER, task_id='B', resources={'r1': 1, 'r2': 1}, priority=10)
        self.sch.add_task(WORKER, task_id='C', resources={'r2': 1})
        self.sch.update_resources(r1=1, r2=2)

        self.assertEqual('A', self.sch.get_work(WORKER)['task_id'])
        # C doesn't block B, so it can go first
        self.check_task_order('C')

    def check_task_order(self, order):
        for expected_id in order:
            self.assertEqual(self.sch.get_work(WORKER)['task_id'], expected_id)
            self.sch.add_task(WORKER, expected_id, status=DONE)
        self.assertEqual(self.sch.get_work(WORKER)['task_id'], None)

    def test_priorities(self):
        self.sch.add_task(WORKER, 'A', priority=10)
        self.sch.add_task(WORKER, 'B', priority=5)
        self.sch.add_task(WORKER, 'C', priority=15)
        self.sch.add_task(WORKER, 'D', priority=9)
        self.check_task_order(['C', 'A', 'D', 'B'])

    def test_priorities_default_and_negative(self):
        self.sch.add_task(WORKER, 'A', priority=10)
        self.sch.add_task(WORKER, 'B')
        self.sch.add_task(WORKER, 'C', priority=15)
        self.sch.add_task(WORKER, 'D', priority=-20)
        self.sch.add_task(WORKER, 'E', priority=1)
        self.check_task_order(['C', 'A', 'E', 'B', 'D'])

    def test_priorities_and_dependencies(self):
        self.sch.add_task(WORKER, 'A', deps=['Z'], priority=10)
        self.sch.add_task(WORKER, 'B', priority=5)
        self.sch.add_task(WORKER, 'C', deps=['Z'], priority=3)
        self.sch.add_task(WORKER, 'D', priority=2)
        self.sch.add_task(WORKER, 'Z', priority=1)
        self.check_task_order(['Z', 'A', 'B', 'C', 'D'])

    def test_priority_update_dependency_after_scheduling(self):
        self.sch.add_task(WORKER, 'A', priority=1)
        self.sch.add_task(WORKER, 'B', priority=5, deps=['A'])
        self.sch.add_task(WORKER, 'C', priority=10, deps=['B'])
        self.sch.add_task(WORKER, 'D', priority=6)
        self.check_task_order(['A', 'B', 'C', 'D'])

    def test_disable(self):
        self.sch.add_task(WORKER, 'A')
        self.sch.add_task(WORKER, 'A', status=FAILED)
        self.sch.add_task(WORKER, 'A', status=FAILED)
        self.sch.add_task(WORKER, 'A', status=FAILED)

        # should be disabled at this point
        self.assertEqual(len(self.sch.task_list('DISABLED', '')), 1)
        self.assertEqual(len(self.sch.task_list('FAILED', '')), 0)
        self.sch.add_task(WORKER, 'A')
        self.assertEqual(self.sch.get_work(WORKER)['task_id'], None)

    def test_disable_and_reenable(self):
        self.sch.add_task(WORKER, 'A')
        self.sch.add_task(WORKER, 'A', status=FAILED)
        self.sch.add_task(WORKER, 'A', status=FAILED)
        self.sch.add_task(WORKER, 'A', status=FAILED)

        # should be disabled at this point
        self.assertEqual(len(self.sch.task_list('DISABLED', '')), 1)
        self.assertEqual(len(self.sch.task_list('FAILED', '')), 0)

        self.sch.re_enable_task('A')

        # should be enabled at this point
        self.assertEqual(len(self.sch.task_list('DISABLED', '')), 0)
        self.assertEqual(len(self.sch.task_list('FAILED', '')), 1)
        self.sch.add_task(WORKER, 'A')
        self.assertEqual(self.sch.get_work(WORKER)['task_id'], 'A')

    def test_disable_and_reenable_and_disable_again(self):
        self.sch.add_task(WORKER, 'A')
        self.sch.add_task(WORKER, 'A', status=FAILED)
        self.sch.add_task(WORKER, 'A', status=FAILED)
        self.sch.add_task(WORKER, 'A', status=FAILED)

        # should be disabled at this point
        self.assertEqual(len(self.sch.task_list('DISABLED', '')), 1)
        self.assertEqual(len(self.sch.task_list('FAILED', '')), 0)

        self.sch.re_enable_task('A')

        # should be enabled at this point
        self.assertEqual(len(self.sch.task_list('DISABLED', '')), 0)
        self.assertEqual(len(self.sch.task_list('FAILED', '')), 1)
        self.sch.add_task(WORKER, 'A')
        self.assertEqual(self.sch.get_work(WORKER)['task_id'], 'A')

        self.sch.add_task(WORKER, 'A', status=FAILED)

        # should be still enabled
        self.assertEqual(len(self.sch.task_list('DISABLED', '')), 0)
        self.assertEqual(len(self.sch.task_list('FAILED', '')), 1)
        self.sch.add_task(WORKER, 'A')
        self.assertEqual(self.sch.get_work(WORKER)['task_id'], 'A')

        self.sch.add_task(WORKER, 'A', status=FAILED)
        self.sch.add_task(WORKER, 'A', status=FAILED)

        # should be disabled now
        self.assertEqual(len(self.sch.task_list('DISABLED', '')), 1)
        self.assertEqual(len(self.sch.task_list('FAILED', '')), 0)
        self.sch.add_task(WORKER, 'A')
        self.assertEqual(self.sch.get_work(WORKER)['task_id'], None)

    def test_disable_and_done(self):
        self.sch.add_task(WORKER, 'A')
        self.sch.add_task(WORKER, 'A', status=FAILED)
        self.sch.add_task(WORKER, 'A', status=FAILED)
        self.sch.add_task(WORKER, 'A', status=FAILED)

        # should be disabled at this point
        self.assertEqual(len(self.sch.task_list('DISABLED', '')), 1)
        self.assertEqual(len(self.sch.task_list('FAILED', '')), 0)

        self.sch.add_task(WORKER, 'A', status=DONE)

        # should be enabled at this point
        self.assertEqual(len(self.sch.task_list('DISABLED', '')), 0)
        self.assertEqual(len(self.sch.task_list('DONE', '')), 1)
        self.sch.add_task(WORKER, 'A')
        self.assertEqual(self.sch.get_work(WORKER)['task_id'], 'A')

    def test_disable_by_worker(self):
        self.sch.add_task(WORKER, 'A', status=DISABLED)
        self.assertEqual(len(self.sch.task_list('DISABLED', '')), 1)

        self.sch.add_task(WORKER, 'A')

        # should be enabled at this point
        self.assertEqual(len(self.sch.task_list('DISABLED', '')), 0)
        self.sch.add_task(WORKER, 'A')
        self.assertEqual(self.sch.get_work(WORKER)['task_id'], 'A')

    def test_priority_update_dependency_chain(self):
        self.sch.add_task(WORKER, 'A', priority=10, deps=['B'])
        self.sch.add_task(WORKER, 'B', priority=5, deps=['C'])
        self.sch.add_task(WORKER, 'C', priority=1)
        self.sch.add_task(WORKER, 'D', priority=6)
        self.check_task_order(['C', 'B', 'A', 'D'])

    def test_priority_no_decrease_with_multiple_updates(self):
        self.sch.add_task(WORKER, 'A', priority=1)
        self.sch.add_task(WORKER, 'B', priority=10, deps=['A'])
        self.sch.add_task(WORKER, 'C', priority=5, deps=['A'])
        self.sch.add_task(WORKER, 'D', priority=6)
        self.check_task_order(['A', 'B', 'D', 'C'])

    def test_unique_tasks(self):
        self.sch.add_task(WORKER, 'A')
        self.sch.add_task(WORKER, 'B')
        self.sch.add_task(WORKER, 'C')
        self.sch.add_task(WORKER + "_2", 'B')

        response = self.sch.get_work(WORKER)
        self.assertEqual(3, response['n_pending_tasks'])
        self.assertEqual(2, response['n_unique_pending'])

    def test_prefer_more_dependents(self):
        self.sch.add_task(WORKER, 'A')
        self.sch.add_task(WORKER, 'B')
        self.sch.add_task(WORKER, 'C', deps=['B'])
        self.sch.add_task(WORKER, 'D', deps=['B'])
        self.sch.add_task(WORKER, 'E', deps=['A'])
        self.check_task_order('BACDE')

    def test_prefer_readier_dependents(self):
        self.sch.add_task(WORKER, 'A')
        self.sch.add_task(WORKER, 'B')
        self.sch.add_task(WORKER, 'C')
        self.sch.add_task(WORKER, 'D')
        self.sch.add_task(WORKER, 'F', deps=['A', 'B', 'C'])
        self.sch.add_task(WORKER, 'G', deps=['A', 'B', 'C'])
        self.sch.add_task(WORKER, 'E', deps=['D'])
        self.check_task_order('DABCFGE')

    def test_ignore_done_dependents(self):
        self.sch.add_task(WORKER, 'A')
        self.sch.add_task(WORKER, 'B')
        self.sch.add_task(WORKER, 'C')
        self.sch.add_task(WORKER, 'D', priority=1)
        self.sch.add_task(WORKER, 'E', deps=['C', 'D'])
        self.sch.add_task(WORKER, 'F', deps=['A', 'B'])
        self.check_task_order('DCABEF')