Ejemplo n.º 1
0
    def test_complete_exception(self):
        "Tests that a task is still scheduled if its sister task crashes in the complete() method"
        class A(DummyTask):

            def complete(self):
                raise Exception("doh")

        a = A()

        class C(DummyTask):
            pass

        c = C()

        class B(DummyTask):

            def requires(self):
                return a, c

        b = B()
        sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10)
        w = Worker(scheduler=sch, worker_id="foo")
        self.assertFalse(w.add(b))
        self.assertTrue(w.run())
        self.assertFalse(b.has_run)
        self.assertTrue(c.has_run)
        self.assertFalse(a.has_run)
        w.stop()
Ejemplo n.º 2
0
    def test_ping_retry(self):
        """ Worker ping fails once. Ping continues to try to connect to scheduler

        Kind of ugly since it uses actual timing with sleep to test the thread
        """
        sch = CentralPlannerScheduler(
            retry_delay=100,
            remove_delay=1000,
            worker_disconnect_delay=10,
        )

        self._total_pings = 0  # class var so it can be accessed from fail_ping

        def fail_ping(worker):
            # this will be called from within keep-alive thread...
            self._total_pings += 1
            raise Exception("Some random exception")

        sch.ping = fail_ping

        w = Worker(
            scheduler=sch,
            worker_id="foo",
            ping_interval=0.01  # very short between pings to make test fast
        )

        # let the keep-alive thread run for a bit...
        time.sleep(0.1)  # yes, this is ugly but it's exactly what we need to test
        w.stop()
        self.assertTrue(
            self._total_pings > 1,
            msg="Didn't retry pings (%d pings performed)" % (self._total_pings,)
        )
Ejemplo n.º 3
0
    def test_requires_exception(self):
        class A(DummyTask):

            def requires(self):
                raise Exception("doh")

        a = A()

        class C(DummyTask):
            pass

        c = C()

        class B(DummyTask):

            def requires(self):
                return a, c

        b = B()
        sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10)
        w = Worker(scheduler=sch, worker_id="foo")
        self.assertFalse(w.add(b))
        self.assertTrue(w.run())
        self.assertFalse(b.has_run)
        self.assertTrue(c.has_run)
        self.assertFalse(a.has_run)
        w.stop()
Ejemplo n.º 4
0
    def test_interleaved_workers(self):
        class A(DummyTask):
            pass

        a = A()

        class B(DummyTask):
            def requires(self):
                return a

        class ExternalB(ExternalTask):
            task_family = "B"

            def complete(self):
                return False

        b = B()
        eb = ExternalB()
        self.assertEquals(eb.task_id, "B()")

        sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10)
        w = Worker(scheduler=sch, worker_id='X')
        w2 = Worker(scheduler=sch, worker_id='Y')

        w.add(b)
        w2.add(eb)
        logging.debug("RUNNING BROKEN WORKER")
        w2.run()
        self.assertFalse(a.complete())
        self.assertFalse(b.complete())
        logging.debug("RUNNING FUNCTIONAL WORKER")
        w.run()
        self.assertTrue(a.complete())
        self.assertTrue(b.complete())
Ejemplo n.º 5
0
 def setUp(self):
     # InstanceCache.disable()
     self.sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10)
     self.w = Worker(scheduler=self.sch, worker_id='X')
     self.w_raise = Worker(scheduler=self.sch, worker_id='X_raise', raise_on_error=True)
     self.w2 = Worker(scheduler=self.sch, worker_id='Y')
     self.time = time.time
Ejemplo n.º 6
0
    def test_worker_executes_fork_handler(self):
        class Stub(luigi.Task):
            def complete(self):
                return False

            def run(self):
                pass

        s = CentralPlannerScheduler()
        w = Worker(scheduler=s, worker_processes=2)
        w.add(Stub())
        w.run()

        time.sleep(0.1)

        worker_res = [self.worker_queue.get(), self.worker_queue.get(), self.worker_queue.get()]
        self.assertIn("both", worker_res)
        self.assertIn("master", worker_res)

        child_res = [self.child_queue.get(), self.child_queue.get(), self.child_queue.get()]
        self.assertIn("both", child_res)
        self.assertIn("child", child_res)

        self.assertTrue(self.worker_queue.empty())
        self.assertTrue(self.child_queue.empty())
Ejemplo n.º 7
0
    def test_allow_reschedule_with_many_missing_deps(self):
        class A(Task):

            """ Task that must run twice to succeed """
            i = luigi.IntParameter()

            runs = 0

            def complete(self):
                return self.runs >= 2

            def run(self):
                self.runs += 1

        class B(Task):
            done = False

            def requires(self):
                return map(A, range(20))

            def complete(self):
                return self.done

            def run(self):
                self.done = True

        b = B()
        w = Worker(scheduler=self.sch, worker_id='X', max_reschedules=1)
        self.assertTrue(w.add(b))
        self.assertFalse(w.run())

        # For b to be done, we must have rescheduled its dependencies to run them twice
        self.assertTrue(b.complete())
        self.assertTrue(all(a.complete() for a in b.deps()))
Ejemplo n.º 8
0
    def test_interleaved_workers2(self):
        # two tasks without dependencies, one external, one not
        class B(DummyTask):
            pass

        class ExternalB(ExternalTask):
            task_family = "B"

            def complete(self):
                return False

        b = B()
        eb = ExternalB()

        self.assertEqual(eb.task_id, "B()")

        sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10)
        w = Worker(scheduler=sch, worker_id='X')
        w2 = Worker(scheduler=sch, worker_id='Y')

        self.assertTrue(w2.add(eb))
        self.assertTrue(w.add(b))

        self.assertTrue(w2.run())
        self.assertFalse(b.complete())
        self.assertTrue(w.run())
        self.assertTrue(b.complete())
        w.stop()
        w2.stop()
Ejemplo n.º 9
0
 def test_task_limit_exceeded(self):
     w = Worker()
     t = ForkBombTask(3, 2)
     w.add(t)
     w.run()
     self.assertFalse(t.complete())
     leaf_tasks = [ForkBombTask(3, 2, branch) for branch in [(0, 0, 0), (0, 0, 1), (0, 1, 0), (0, 1, 1)]]
     self.assertEquals(3, sum(t.complete() for t in leaf_tasks), "should have gracefully completed as much as possible even though the single last leaf didn't get scheduled")
Ejemplo n.º 10
0
    def _run(self, dt, interval):
        w = Worker(scheduler=self.sch)
        tasks = self.gen_tasks(dt, interval)
        map(w.add, tasks)

        for t in filter(lambda x: isinstance(x, RunMetricRules), tasks):
            if t.handler == 'save_to_mongodb':
                if t.filter_rules:
                    filter_metrics_args = {"require": t, 'filter_rules': t.filter_rules}
                    # 过滤
                    filter_metrics = FilterMetrics(**filter_metrics_args)
                    require_task = filter_metrics
                    w.add(filter_metrics)
                else:
                    require_task = t

                # 保存
                kwargs = dict(**self.mongo_conf)
                kwargs.update({"require": require_task, "date": dt, 'app_id': self.app_id, 'interval': interval})
                save_metrics = SaveMetrics(**kwargs)
                w.add(save_metrics)
            else:
                kwargs = dict(**self.mongo_conf)
                kwargs.update({"require": t,
                               "date": dt,
                               'app_id': self.app_id,
                               'interval': interval,
                               'handler_func': t.handler})
                metric_handler = MetricsHandler(**kwargs)
                w.add(metric_handler)

        w.run()
Ejemplo n.º 11
0
    def test_interleaved_workers3(self):
        class A(DummyTask):
            def run(self):
                logging.debug('running A')
                time.sleep(0.1)
                super(A, self).run()

        a = A()

        class B(DummyTask):
            def requires(self):
                return a
            def run(self):
                logging.debug('running B')
                super(B, self).run()

        b = B()

        sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10)

        w  = Worker(scheduler=sch, worker_id='X', keep_alive=True, count_uniques=True)
        w2 = Worker(scheduler=sch, worker_id='Y', keep_alive=True, count_uniques=True, wait_interval=0.1)

        self.assertTrue(w.add(a))
        self.assertTrue(w2.add(b))

        threading.Thread(target=w.run).start()
        self.assertTrue(w2.run())

        self.assertTrue(a.complete())
        self.assertTrue(b.complete())

        w.stop()
        w2.stop()
Ejemplo n.º 12
0
class AssistantTest(unittest.TestCase):
    def run(self, result=None):
        self.sch = Scheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10)
        self.assistant = Worker(scheduler=self.sch, worker_id='Y', assistant=True)
        with Worker(scheduler=self.sch, worker_id='X') as w:
            self.w = w
            super(AssistantTest, self).run(result)

    def test_get_work(self):
        d = Dummy2Task('123')
        self.w.add(d)

        self.assertFalse(d.complete())
        self.assistant.run()
        self.assertTrue(d.complete())

    def test_bad_job_type(self):
        class Dummy3Task(Dummy2Task):
            task_family = 'UnknownTaskFamily'

        d = Dummy3Task('123')
        self.w.add(d)

        self.assertFalse(d.complete())
        self.assertFalse(self.assistant.run())
        self.assertFalse(d.complete())
        self.assertEqual(list(self.sch.task_list('FAILED', '').keys()), [d.task_id])

    def test_unimported_job_type(self):
        MODULE_CONTENTS = b'''
import luigi


class UnimportedTask(luigi.Task):
    def complete(self):
        return False
'''

        class NotImportedTask(luigi.Task):
            task_family = 'UnimportedTask'
            task_module = None

        task = NotImportedTask()

        # verify that it can't run the task without the module info necessary to import it
        self.w.add(task)
        self.assertFalse(self.assistant.run())
        self.assertEqual(list(self.sch.task_list('FAILED', '').keys()), [task.task_id])

        # check that it can import with the right module
        with temporary_unloaded_module(MODULE_CONTENTS) as task.task_module:
            self.w.add(task)
            self.assertTrue(self.assistant.run())
            self.assertEqual(list(self.sch.task_list('DONE', '').keys()), [task.task_id])
Ejemplo n.º 13
0
    def test_wait_jitter_default(self, mock_sleep, mock_random):
        """ verify default jitter is as expected """
        mock_random.return_value = 1.0
        w = Worker()
        x = w._sleeper()
        six.next(x)
        mock_random.assert_called_with(0, 5.0)
        mock_sleep.assert_called_with(2.0)

        mock_random.return_value = 3.3
        six.next(x)
        mock_random.assert_called_with(0, 5.0)
        mock_sleep.assert_called_with(4.3)
Ejemplo n.º 14
0
 def test_disabled_shutdown_hook(self):
     w = Worker(scheduler=self.sch, keep_alive=True, no_install_shutdown_handler=True)
     with w:
         try:
             # try to kill the worker!
             os.kill(os.getpid(), signal.SIGUSR1)
         except AttributeError:
             raise unittest.SkipTest('signal.SIGUSR1 not found on this system')
         # try to kill the worker... AGAIN!
         t = SuicidalWorker(signal.SIGUSR1)
         w.add(t)
         w.run()
         # task should have stepped away from the ledge, and completed successfully despite all the SIGUSR1 signals
         self.assertEqual(list(self.sch.task_list('DONE', '').keys()), [t.task_id])
Ejemplo n.º 15
0
    def test_die_for_non_unique_pending(self):
        class A(DummyTask):
            def run(self):
                logging.debug('running A')
                time.sleep(0.1)
                super(A, self).run()

        a = A()

        class B(DummyTask):
            def requires(self):
                return a
            def run(self):
                logging.debug('running B')
                super(B, self).run()

        b = B()

        sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10)

        w  = Worker(scheduler=sch, worker_id='X', keep_alive=True, count_uniques=True)
        w2 = Worker(scheduler=sch, worker_id='Y', keep_alive=True, count_uniques=True, wait_interval=0.1)

        self.assertTrue(w.add(b))
        self.assertTrue(w2.add(b))

        self.assertEqual(w._get_work()[0], 'A()')
        self.assertTrue(w2.run())

        self.assertFalse(a.complete())
        self.assertFalse(b.complete())

        w2.stop()
Ejemplo n.º 16
0
    def test_wait_jitter(self, mock_sleep, mock_random):
        """ verify configured jitter amount """
        mock_random.return_value = 1.0

        w = Worker()
        x = w._sleeper()
        six.next(x)
        mock_random.assert_called_with(0, 10.0)
        mock_sleep.assert_called_with(2.0)

        mock_random.return_value = 2.0
        six.next(x)
        mock_random.assert_called_with(0, 10.0)
        mock_sleep.assert_called_with(3.0)
Ejemplo n.º 17
0
class ParallelSchedulingTest(unittest.TestCase):

    def setUp(self):
        self.sch = mock.Mock()
        self.w = Worker(scheduler=self.sch, worker_id='x')

    def added_tasks(self, status):
        return [kw['task_id'] for args, kw in self.sch.add_task.call_args_list if kw['status'] == status]

    def test_multiprocess_scheduling_with_overlapping_dependencies(self):
        self.w.add(OverlappingSelfDependenciesTask(5, 2), True)
        self.assertEqual(15, self.sch.add_task.call_count)
        self.assertEqual(set((
            'OverlappingSelfDependenciesTask(n=1, k=1)',
            'OverlappingSelfDependenciesTask(n=2, k=1)',
            'OverlappingSelfDependenciesTask(n=2, k=2)',
            'OverlappingSelfDependenciesTask(n=3, k=1)',
            'OverlappingSelfDependenciesTask(n=3, k=2)',
            'OverlappingSelfDependenciesTask(n=4, k=1)',
            'OverlappingSelfDependenciesTask(n=4, k=2)',
            'OverlappingSelfDependenciesTask(n=5, k=2)',
        )), set(self.added_tasks('PENDING')))
        self.assertEqual(set((
            'OverlappingSelfDependenciesTask(n=0, k=0)',
            'OverlappingSelfDependenciesTask(n=0, k=1)',
            'OverlappingSelfDependenciesTask(n=1, k=0)',
            'OverlappingSelfDependenciesTask(n=1, k=2)',
            'OverlappingSelfDependenciesTask(n=2, k=0)',
            'OverlappingSelfDependenciesTask(n=3, k=0)',
            'OverlappingSelfDependenciesTask(n=4, k=0)',
        )), set(self.added_tasks('DONE')))

    @mock.patch('luigi.notifications.send_error_email')
    def test_raise_exception_in_complete(self, send):
        self.w.add(ExceptionCompleteTask(), multiprocess=True)
        send.check_called_once()
        self.assertEqual(0, self.sch.add_task.call_count)
        self.assertTrue('assert False' in send.call_args[0][1])

    @mock.patch('luigi.notifications.send_error_email')
    def test_raise_unpicklable_exception_in_complete(self, send):
        # verify exception can't be pickled
        self.assertRaises(Exception, UnpicklableExceptionTask().complete)
        try:
            UnpicklableExceptionTask().complete()
        except Exception as e:
            ex = e
        self.assertRaises(pickle.PicklingError, pickle.dumps, ex)

        # verify this can run async
        self.w.add(UnpicklableExceptionTask(), multiprocess=True)
        send.check_called_once()
        self.assertEqual(0, self.sch.add_task.call_count)
        self.assertTrue('raise UnpicklableException()' in send.call_args[0][1])

    @mock.patch('luigi.notifications.send_error_email')
    def test_raise_exception_in_requires(self, send):
        self.w.add(ExceptionRequiresTask(), multiprocess=True)
        send.check_called_once()
        self.assertEqual(0, self.sch.add_task.call_count)
Ejemplo n.º 18
0
    def setUp(self):
        try:
            from luigi.sqs_history import SqsHistory, SqsTaskHistory, SqsWorkerHistory
        except ImportError as e:
            raise unittest.SkipTest('Could not test WorkerTaskGlobalEventHandlerTests: %s' % e)

        # Replace _config method with one that uses our dummy queue.
        def fake_config(s, *args):
            s._queue = DummyQueue()
        SqsHistory._config = fake_config

        # InstanceCache.disable()
        self.sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10)
        self.w = Worker(scheduler=self.sch, worker_id='X')
        self.w2 = Worker(scheduler=self.sch, worker_id='Y')
        self.time = time.time
Ejemplo n.º 19
0
    def test_purge_hung_worker_override_timeout_time(self, mock_time):
        w = Worker(worker_processes=2, wait_interval=0.01, timeout=5)
        mock_time.time.return_value = 0
        w.add(HungWorker(10))
        w._run_task('HungWorker(worker_timeout=10)')

        mock_time.time.return_value = 10
        w._handle_next_task()
        self.assertEqual(1, len(w._running_tasks))

        mock_time.time.return_value = 11
        w._handle_next_task()
        self.assertEqual(0, len(w._running_tasks))
Ejemplo n.º 20
0
    def test_purge_hung_worker_default_timeout_time(self, mock_time):
        w = Worker(worker_processes=2, wait_interval=0.01, worker_timeout=5)
        mock_time.time.return_value = 0
        w.add(HungWorker())
        w._run_task('HungWorker(worker_timeout=None)')

        mock_time.time.return_value = 5
        w._handle_next_task()
        self.assertEqual(1, len(w._running_tasks))

        mock_time.time.return_value = 6
        w._handle_next_task()
        self.assertEqual(0, len(w._running_tasks))
Ejemplo n.º 21
0
    def test_single_threaded_worker_doesnot_execute_fork_handler(self):
        class Stub(luigi.Task):
            def complete(self):
                return False

            def run(self):
                pass

        s = CentralPlannerScheduler()
        w = Worker(scheduler=s, worker_processes=1)
        w.add(Stub())
        w.run()

        time.sleep(0.1)

        self.assertTrue(self.worker_queue.empty())
        self.assertTrue(self.child_queue.empty())
Ejemplo n.º 22
0
    def test_purge_multiple_workers(self):
        w = Worker(worker_processes=2, wait_interval=0.01)
        t1 = SuicidalWorker(signal.SIGTERM)
        t2 = SuicidalWorker(signal.SIGKILL)
        w.add(t1)
        w.add(t2)

        w._run_task(t1.task_id)
        w._run_task(t2.task_id)
        time.sleep(1.0)

        w._handle_next_task()
        w._handle_next_task()
        w._handle_next_task()
Ejemplo n.º 23
0
class AssistantTest(unittest.TestCase):
    def run(self, result=None):
        self.sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10)
        self.assistant = Worker(scheduler=self.sch, worker_id='Y', assistant=True)
        with Worker(scheduler=self.sch, worker_id='X') as w:
            self.w = w
            super(AssistantTest, self).run(result)

    def test_get_work(self):
        d = Dummy2Task('123')
        self.w.add(d)

        self.assertFalse(d.complete())
        self.assistant.run()
        self.assertTrue(d.complete())

    def test_bad_job_type(self):
        class Dummy3Task(Dummy2Task):
            task_family = 'UnknownTaskFamily'

        d = Dummy3Task('123')
        self.w.add(d)

        self.assertFalse(d.complete())
        self.assertFalse(self.assistant.run())
        self.assertFalse(d.complete())
        self.assertEqual(list(self.sch.task_list('FAILED', '').keys()), [str(d)])

    def test_unimported_job_type(self):
        class NotImportedTask(luigi.Task):
            task_family = 'UnimportedTask'
            task_module = None

        task = NotImportedTask()

        # verify that it can't run the task without the module info necessary to import it
        self.w.add(task)
        self.assertFalse(self.assistant.run())
        self.assertEqual(list(self.sch.task_list('FAILED', '').keys()), ['UnimportedTask()'])

        # check that it can import with the right module
        task.task_module = 'dummy_test_module.not_imported'
        self.w.add(task)
        self.assertTrue(self.assistant.run())
        self.assertEqual(list(self.sch.task_list('DONE', '').keys()), ['UnimportedTask()'])
Ejemplo n.º 24
0
    def test_purge_hung_worker_default_timeout_time(self, mock_time):
        w = Worker(worker_processes=2, wait_interval=0.01, timeout=5)
        mock_time.time.return_value = 0
        task = HangTheWorkerTask()
        w.add(task)
        w._run_task(task.task_id)

        mock_time.time.return_value = 5
        w._handle_next_task()
        self.assertEqual(1, len(w._running_tasks))

        mock_time.time.return_value = 6
        w._handle_next_task()
        self.assertEqual(0, len(w._running_tasks))
Ejemplo n.º 25
0
    def _test_context_manager(self, force_multiprocessing):
        CONTEXT_MANAGER_MODULE = b'''
class MyContextManager(object):
    def __init__(self, task_process):
        self.task = task_process.task
    def __enter__(self):
        assert not self.task.run_event.is_set(), "the task should not have run yet"
        self.task.enter_event.set()
        return self
    def __exit__(self, exc_type=None, exc_value=None, traceback=None):
        assert self.task.run_event.is_set(), "the task should have run"
        self.task.exit_event.set()
'''

        class DummyEventRecordingTask(luigi.Task):
            def __init__(self, *args, **kwargs):
                self.enter_event = multiprocessing.Event()
                self.exit_event = multiprocessing.Event()
                self.run_event = multiprocessing.Event()
                super(DummyEventRecordingTask, self).__init__(*args, **kwargs)

            def run(self):
                assert self.enter_event.is_set(), "the context manager should have been entered"
                assert not self.exit_event.is_set(), "the context manager should not have been exited yet"
                assert not self.run_event.is_set(), "the task should not have run yet"
                self.run_event.set()

            def complete(self):
                return self.run_event.is_set()

        with temporary_unloaded_module(CONTEXT_MANAGER_MODULE) as module_name:
            t = DummyEventRecordingTask()
            w = Worker(task_process_context=module_name + '.MyContextManager',
                       force_multiprocessing=force_multiprocessing)
            w.add(t)
            self.assertTrue(w.run())
            self.assertTrue(t.complete())
            self.assertTrue(t.enter_event.is_set())
            self.assertTrue(t.exit_event.is_set())
Ejemplo n.º 26
0
    def test_stop_worker_kills_subprocesses(self):
        w = Worker(worker_processes=2)
        hung_task = HungWorker()
        w.add(hung_task)

        w._run_task(hung_task.task_id)
        pids = [p.pid for p in w._running_tasks.values()]
        self.assertEqual(1, len(pids))
        pid = pids[0]

        def is_running():
            return pid in {p.pid for p in psutil.Process().children()}

        self.assertTrue(is_running())
        w.stop()
        self.assertFalse(is_running())
Ejemplo n.º 27
0
def invoke_task(godzilla_task, task_id=None, **kwargs):
    print godzilla_task, task_id, kwargs
    task = tasks[godzilla_task]

    kwargs['task_id'] = task_id
    task = task.as_luigi(**kwargs)

    w = Worker(scheduler=sch)
    w.add(task)

    try:
        w.run()
        if not w.run_succeeded:
            raise Exception("job failed")
        return task.output().path
    finally:
        w.stop()
Ejemplo n.º 28
0
class MultiprocessWorkerTest(unittest.TestCase):

    def setUp(self):
        self.scheduler = RemoteScheduler()
        self.scheduler.add_worker = Mock()
        self.scheduler.add_task = Mock()
        self.worker = Worker(scheduler=self.scheduler, worker_id='X', worker_processes=2)

    def tearDown(self):
        self.worker.stop()

    def test_positive_path(self):
        a = DummyTask("a")
        b = DummyTask("b")

        class MultipleRequirementTask(DummyTask):

            def requires(self):
                return [a, b]

        c = MultipleRequirementTask("C")

        self.assertTrue(self.worker.add(c))

        self.scheduler.get_work = Mock(side_effect=[(3, str(a)), (2, str(b)), (1, str(c)), (0, None), (0, None)])

        self.assertTrue(self.worker.run())
        self.assertTrue(c.has_run)

    def test_path_with_task_failures(self):
        class FailingTask(DummyTask):

            def run(self):
                raise Exception("I am failing")

        a = FailingTask("a")
        b = FailingTask("b")

        class MultipleRequirementTask(DummyTask):

            def requires(self):
                return [a, b]

        c = MultipleRequirementTask("C")

        self.assertTrue(self.worker.add(c))

        self.scheduler.get_work = Mock(side_effect=[(3, str(a)), (2, str(b)), (1, str(c)), (0, None), (0, None)])

        self.assertFalse(self.worker.run())
Ejemplo n.º 29
0
    def test_process_killed_handler(self, task_proc):
        result = []

        @HangTheWorkerTask.event_handler(Event.PROCESS_FAILURE)
        def store_task(t, error_msg):
            self.assertTrue(error_msg)
            result.append(t)

        w = Worker()
        task = HangTheWorkerTask()
        task_process = mock.MagicMock(is_alive=lambda: False, exitcode=-14, task=task)
        task_proc.return_value = task_process

        w.add(task)
        w._run_task(task.task_id)
        w._handle_next_task()

        self.assertEqual(result, [task])
Ejemplo n.º 30
0
    def test_timeout_handler(self, mock_time):
        result = []

        @HangTheWorkerTask.event_handler(Event.TIMEOUT)
        def store_task(t, error_msg):
            self.assertTrue(error_msg)
            result.append(t)

        w = Worker(worker_processes=2, wait_interval=0.01, timeout=5)
        mock_time.time.return_value = 0
        task = HangTheWorkerTask(worker_timeout=1)
        w.add(task)
        w._run_task(task.task_id)

        mock_time.time.return_value = 3
        w._handle_next_task()

        self.assertEqual(result, [task])
Ejemplo n.º 31
0
 def test_fails_registering_signal(self):
     with mock.patch('luigi.worker.signal', spec=['signal']):
         # mock will raise an attribute error getting signal.SIGUSR1
         Worker()
Ejemplo n.º 32
0
 def test_asserts_for_worker(self):
     """
     Test that Worker() asserts that it's sanely configured
     """
     Worker(wait_interval=1)  # This shouldn't raise
     self.assertRaises(AssertionError, Worker, wait_interval=0)
Ejemplo n.º 33
0
    def test_interleaved_workers2(self):
        # two tasks without dependencies, one external, one not
        class B(DummyTask):
            pass

        class ExternalB(ExternalTask):
            task_family = "B"

            def complete(self):
                return False

        b = B()
        eb = ExternalB()

        self.assertEquals(eb.task_id, "B()")

        sch = CentralPlannerScheduler(retry_delay=100,
                                      remove_delay=1000,
                                      worker_disconnect_delay=10)
        w = Worker(scheduler=sch, worker_id='X')
        w2 = Worker(scheduler=sch, worker_id='Y')

        self.assertTrue(w2.add(eb))
        self.assertTrue(w.add(b))

        self.assertTrue(w2.run())
        self.assertFalse(b.complete())
        self.assertTrue(w.run())
        self.assertTrue(b.complete())
        w.stop()
        w2.stop()
Ejemplo n.º 34
0
class WorkerEmailTest(EmailTest):
    def setUp(self):
        super(WorkerEmailTest, self).setUp()
        sch = CentralPlannerScheduler(retry_delay=100,
                                      remove_delay=1000,
                                      worker_disconnect_delay=10)
        self.worker = Worker(scheduler=sch, worker_id="foo")

    def tearDown(self):
        self.worker.stop()

    @with_config(EMAIL_CONFIG)
    def test_connection_error(self):
        sch = RemoteScheduler(host="this_host_doesnt_exist",
                              port=1337,
                              connect_timeout=1)
        worker = Worker(scheduler=sch)

        self.waits = 0

        def dummy_wait():
            self.waits += 1

        sch._wait = dummy_wait

        class A(DummyTask):
            pass

        a = A()
        self.assertEquals(self.last_email, None)
        worker.add(a)
        self.assertEquals(self.waits, 2)  # should attempt to add it 3 times
        self.assertNotEquals(self.last_email, None)
        self.assertEquals(self.last_email[0],
                          "Luigi: Framework error while scheduling %s" % (a, ))
        worker.stop()

    @with_config(EMAIL_CONFIG)
    def test_complete_error(self):
        class A(DummyTask):
            def complete(self):
                raise Exception("b0rk")

        a = A()
        self.assertEquals(self.last_email, None)
        self.worker.add(a)
        self.assertEquals(("Luigi: %s failed scheduling" % (a, )),
                          self.last_email[0])
        self.worker.run()
        self.assertEquals(("Luigi: %s failed scheduling" % (a, )),
                          self.last_email[0])
        self.assertFalse(a.has_run)

    @with_config(EMAIL_CONFIG)
    def test_complete_return_value(self):
        class A(DummyTask):
            def complete(self):
                pass  # no return value should be an error

        a = A()
        self.assertEquals(self.last_email, None)
        self.worker.add(a)
        self.assertEquals(("Luigi: %s failed scheduling" % (a, )),
                          self.last_email[0])
        self.worker.run()
        self.assertEquals(("Luigi: %s failed scheduling" % (a, )),
                          self.last_email[0])
        self.assertFalse(a.has_run)

    @with_config(EMAIL_CONFIG)
    def test_run_error(self):
        class A(luigi.Task):
            def complete(self):
                return False

            def run(self):
                raise Exception("b0rk")

        a = A()
        self.worker.add(a)
        self.assertEquals(self.last_email, None)
        self.worker.run()
        self.assertEquals(("Luigi: %s FAILED" % (a, )), self.last_email[0])

    def test_no_error(self):
        class A(DummyTask):
            pass

        a = A()
        self.assertEquals(self.last_email, None)
        self.worker.add(a)
        self.assertEquals(self.last_email, None)
        self.worker.run()
        self.assertEquals(self.last_email, None)
        self.assertTrue(a.complete())
Ejemplo n.º 35
0
class WorkerTaskGlobalEventHandlerTests(unittest.TestCase):
    @with_config(
        dict(worker_history=dict(record_worker_history_sqs='true',
                                 sqs_queue_name='name',
                                 aws_access_key_id='key',
                                 aws_secret_access_key='secret_key'),
             worker_metadata=dict(meta1='data1')))
    def setUp(self):
        try:
            from luigi.sqs_history import SqsHistory, SqsTaskHistory, SqsWorkerHistory
        except ImportError as e:
            raise unittest.SkipTest(
                'Could not test WorkerTaskGlobalEventHandlerTests: %s' % e)

        # Replace _config method with one that uses our dummy queue.
        def fake_config(s, *args):
            s._queue = DummyQueue()

        SqsHistory._config = fake_config

        # InstanceCache.disable()
        self.sch = CentralPlannerScheduler(retry_delay=100,
                                           remove_delay=1000,
                                           worker_disconnect_delay=10)
        self.w = Worker(scheduler=self.sch, worker_id='X')
        self.w2 = Worker(scheduler=self.sch, worker_id='Y')
        self.time = time.time

    def tearDown(self):
        if time.time != self.time:
            time.time = self.time
        self.w.stop()
        self.w2.stop()

    def setTime(self, t):
        time.time = lambda: t

    def _parse_task_events(self, messages):
        results = {}
        for m in messages:
            event = m.get('event')

            if not event:
                continue

            messages = results.get(event, [])
            messages.append(m)
            results[event] = messages

        return results

    def test_dep(self):
        class A(Task):
            param_a = luigi.Parameter(default="a")

            def run(self):
                self.has_run = True

            def complete(self):
                return self.has_run

        a = A()

        class B(Task):
            def requires(self):
                return a

            def run(self):
                self.has_run = True

            def complete(self):
                return self.has_run

        b = B()
        a.has_run = False
        b.has_run = False

        self.assertTrue(self.w.add(b))
        self.assertTrue(self.w.run())
        self.assertTrue(a.has_run)
        self.assertTrue(b.has_run)

        sent_messages = [
            json.loads(m.get_body())
            for m in self.w._worker_history_impl._queue.messages
        ]
        event_messages = self._parse_task_events(sent_messages)

        self.assertEquals(4, len(event_messages))

        # Check started events:
        started_events = event_messages.get(Event.START)
        self.assertEquals(2, len(started_events))
        self.assertEquals('A(param_a=a)', started_events[0]['task']['id'])
        self.assertEquals('B()', started_events[1]['task']['id'])

        # Check success events
        success_events = event_messages.get(Event.SUCCESS)
        self.assertEquals(2, len(success_events))
        self.assertEquals('A(param_a=a)', success_events[0]['task']['id'])
        self.assertEquals('B()', success_events[1]['task']['id'])

        # Check processing time events
        processing_events = event_messages.get(Event.PROCESSING_TIME)
        self.assertEquals(2, len(processing_events))
        self.assertEquals('A(param_a=a)', processing_events[0]['task']['id'])
        self.assertTrue('processing_time' in processing_events[0])
        self.assertEquals('B()', processing_events[1]['task']['id'])
        self.assertTrue('processing_time' in processing_events[1])

        # Check dependency event
        dependency_event = event_messages.get(Event.DEPENDENCY_DISCOVERED)
        self.assertEquals(1, len(dependency_event))
        self.assertEquals('B()', dependency_event[0]['task']['id'])
        self.assertEquals('A(param_a=a)',
                          dependency_event[0]['dependency_task']['id'])

    def test_external_dep(self):
        class A(ExternalTask):
            def complete(self):
                return False

        a = A()

        class B(Task):
            def requires(self):
                return a

            def run(self):
                self.has_run = True

            def complete(self):
                return self.has_run

        b = B()

        a.has_run = False
        b.has_run = False

        self.assertTrue(self.w.add(b))
        self.assertTrue(self.w.run())

        self.assertFalse(a.has_run)
        self.assertFalse(b.has_run)

        sent_messages = [
            json.loads(m.get_body())
            for m in self.w._worker_history_impl._queue.messages
        ]
        event_messages = self._parse_task_events(sent_messages)

        self.assertEquals(2, len(event_messages))

        # Check dependency event
        dependency_event = event_messages.get(Event.DEPENDENCY_DISCOVERED)
        self.assertEquals(1, len(dependency_event))
        self.assertEquals('B()', dependency_event[0]['task']['id'])
        self.assertEquals('A()', dependency_event[0]['dependency_task']['id'])

        # Check dependency missing event
        dependency_missing_event = event_messages.get(Event.DEPENDENCY_MISSING)
        self.assertEquals(1, len(dependency_missing_event))
        self.assertEquals('A()', dependency_missing_event[0]['task']['id'])

    def test_fail(self):
        class A(Task):
            def run(self):
                self.has_run = True
                raise Exception()

            def complete(self):
                return self.has_run

        a = A()

        a.has_run = False

        self.assertTrue(self.w.add(a))
        self.assertFalse(self.w.run())

        self.assertTrue(a.has_run)

        sent_messages = [
            json.loads(m.get_body())
            for m in self.w._worker_history_impl._queue.messages
        ]
        event_messages = self._parse_task_events(sent_messages)

        self.assertEquals(3, len(event_messages))

        # Check failure event
        failure_event = event_messages.get(Event.FAILURE)
        self.assertEquals(1, len(failure_event))
        self.assertEquals('A()', failure_event[0]['task']['id'])
        self.assertEquals('Exception()', failure_event[0]['exception'])

    def test_unknown_dep(self):
        # see central_planner_test.CentralPlannerTest.test_remove_dep
        class A(ExternalTask):
            def complete(self):
                return False

        class C(Task):
            def complete(self):
                return True

        def get_b(dep):
            class B(Task):
                def requires(self):
                    return dep

                def run(self):
                    self.has_run = True

                def complete(self):
                    return False

            b = B()
            b.has_run = False
            return b

        b_a = get_b(A())
        b_c = get_b(C())

        self.assertTrue(self.w.add(b_a))
        self.assertTrue(self.w2.add(b_c))

        sent_messages = [
            json.loads(m.get_body())
            for m in self.w._worker_history_impl._queue.messages
        ]
        event_messages = self._parse_task_events(sent_messages)

        # Verify missing event
        dependency_missing_event = event_messages.get(Event.DEPENDENCY_MISSING)
        self.assertEquals(1, len(dependency_missing_event))
        self.assertEquals('A()', dependency_missing_event[0]['task']['id'])

        sent_messages2 = [
            json.loads(m.get_body())
            for m in self.w2._worker_history_impl._queue.messages
        ]
        event_messages2 = self._parse_task_events(sent_messages2)

        # Verify present event
        dependency_present_event = event_messages2.get(
            Event.DEPENDENCY_PRESENT)
        self.assertEquals(1, len(dependency_present_event))
        self.assertEquals('C()', dependency_present_event[0]['task']['id'])

    def test_broken_task(self):
        # see central_planner_test.CentralPlannerTest.test_remove_dep
        class A(object):
            def complete(self):
                return False

        a = A()

        class B(Task):
            def requires(self):
                return a

            def run(self):
                self.has_run = False

            def complete(self):
                return self.has_run

        b = B()
        b.has_run = False

        try:
            self.assertTrue(self.w.add(b))
        except:
            pass

        sent_messages = [
            json.loads(m.get_body())
            for m in self.w._worker_history_impl._queue.messages
        ]
        event_messages = self._parse_task_events(sent_messages)

        # Verify broken event
        broken_event = event_messages.get(Event.BROKEN_TASK)
        self.assertEquals(1, len(broken_event))
        self.assertEquals('B()', broken_event[0]['task']['id'])
        self.assertEquals("Exception('requires() must return Task objects',)",
                          broken_event[0]['exception'])
Ejemplo n.º 36
0
class AssistantTest(unittest.TestCase):
    def run(self, result=None):
        self.sch = Scheduler(retry_delay=100,
                             remove_delay=1000,
                             worker_disconnect_delay=10)
        self.assistant = Worker(scheduler=self.sch,
                                worker_id='Y',
                                assistant=True)
        with Worker(scheduler=self.sch, worker_id='X') as w:
            self.w = w
            super(AssistantTest, self).run(result)

    def test_get_work(self):
        d = Dummy2Task('123')
        self.w.add(d)

        self.assertFalse(d.complete())
        self.assistant.run()
        self.assertTrue(d.complete())

    def test_bad_job_type(self):
        class Dummy3Task(Dummy2Task):
            task_family = 'UnknownTaskFamily'

        d = Dummy3Task('123')
        self.w.add(d)

        self.assertFalse(d.complete())
        self.assertFalse(self.assistant.run())
        self.assertFalse(d.complete())
        self.assertEqual(list(self.sch.task_list('FAILED', '').keys()),
                         [d.task_id])

    def test_unimported_job_type(self):
        MODULE_CONTENTS = b'''
import luigi


class UnimportedTask(luigi.Task):
    def complete(self):
        return False
'''

        class NotImportedTask(luigi.Task):
            task_family = 'UnimportedTask'
            task_module = None

        task = NotImportedTask()

        # verify that it can't run the task without the module info necessary to import it
        self.w.add(task)
        self.assertFalse(self.assistant.run())
        self.assertEqual(list(self.sch.task_list('FAILED', '').keys()),
                         [task.task_id])

        # check that it can import with the right module
        with temporary_unloaded_module(MODULE_CONTENTS) as task.task_module:
            self.w.add(task)
            self.assertTrue(self.assistant.run())
            self.assertEqual(list(self.sch.task_list('DONE', '').keys()),
                             [task.task_id])
Ejemplo n.º 37
0
 def setUp(self):
     self.sch = CentralPlannerScheduler()
     self.w = Worker(scheduler=self.sch)
Ejemplo n.º 38
0
 def setUp(self):
     super(WorkerEmailTest, self).setUp()
     sch = CentralPlannerScheduler(retry_delay=100,
                                   remove_delay=1000,
                                   worker_disconnect_delay=10)
     self.worker = Worker(scheduler=sch, worker_id="foo")
Ejemplo n.º 39
0
 def setUp(self):
     self.sch = mock.Mock()
     self.w = Worker(scheduler=self.sch, worker_id='x')
Ejemplo n.º 40
0
class ParallelSchedulingTest(unittest.TestCase):
    def setUp(self):
        self.sch = mock.Mock()
        self.w = Worker(scheduler=self.sch, worker_id='x')

    def added_tasks(self, status):
        return [
            kw['task_id'] for args, kw in self.sch.add_task.call_args_list
            if kw['status'] == status
        ]

    def test_number_of_processes(self):
        import multiprocessing
        real_pool = multiprocessing.Pool(1)
        with mock.patch('multiprocessing.Pool') as mocked_pool:
            mocked_pool.return_value = real_pool
            self.w.add(OverlappingSelfDependenciesTask(n=1, k=1),
                       multiprocess=True,
                       processes=1234)
            mocked_pool.assert_called_once_with(processes=1234)

    def test_zero_processes(self):
        import multiprocessing
        real_pool = multiprocessing.Pool(1)
        with mock.patch('multiprocessing.Pool') as mocked_pool:
            mocked_pool.return_value = real_pool
            self.w.add(OverlappingSelfDependenciesTask(n=1, k=1),
                       multiprocess=True,
                       processes=0)
            mocked_pool.assert_called_once_with(processes=None)

    def test_children_terminated(self):
        before_children = running_children()
        with pause_gc():
            self.w.add(
                OverlappingSelfDependenciesTask(5, 2),
                multiprocess=True,
            )
            self.assertLessEqual(running_children(), before_children)

    def test_multiprocess_scheduling_with_overlapping_dependencies(self):
        self.w.add(OverlappingSelfDependenciesTask(5, 2), True)
        self.assertEqual(15, self.sch.add_task.call_count)
        self.assertEqual(
            set((
                OverlappingSelfDependenciesTask(n=1, k=1).task_id,
                OverlappingSelfDependenciesTask(n=2, k=1).task_id,
                OverlappingSelfDependenciesTask(n=2, k=2).task_id,
                OverlappingSelfDependenciesTask(n=3, k=1).task_id,
                OverlappingSelfDependenciesTask(n=3, k=2).task_id,
                OverlappingSelfDependenciesTask(n=4, k=1).task_id,
                OverlappingSelfDependenciesTask(n=4, k=2).task_id,
                OverlappingSelfDependenciesTask(n=5, k=2).task_id,
            )), set(self.added_tasks('PENDING')))
        self.assertEqual(
            set((
                OverlappingSelfDependenciesTask(n=0, k=0).task_id,
                OverlappingSelfDependenciesTask(n=0, k=1).task_id,
                OverlappingSelfDependenciesTask(n=1, k=0).task_id,
                OverlappingSelfDependenciesTask(n=1, k=2).task_id,
                OverlappingSelfDependenciesTask(n=2, k=0).task_id,
                OverlappingSelfDependenciesTask(n=3, k=0).task_id,
                OverlappingSelfDependenciesTask(n=4, k=0).task_id,
            )), set(self.added_tasks('DONE')))

    @mock.patch('luigi.notifications.send_error_email')
    def test_raise_exception_in_complete(self, send):
        self.w.add(ExceptionCompleteTask(), multiprocess=True)
        send.check_called_once()
        self.assertEqual(UNKNOWN, self.sch.add_task.call_args[1]['status'])
        self.assertFalse(self.sch.add_task.call_args[1]['runnable'])
        self.assertTrue('assert False' in send.call_args[0][1])

    @mock.patch('luigi.notifications.send_error_email')
    def test_raise_unpicklable_exception_in_complete(self, send):
        # verify exception can't be pickled
        self.assertRaises(Exception, UnpicklableExceptionTask().complete)
        try:
            UnpicklableExceptionTask().complete()
        except Exception as e:
            ex = e
        self.assertRaises((pickle.PicklingError, AttributeError), pickle.dumps,
                          ex)

        # verify this can run async
        self.w.add(UnpicklableExceptionTask(), multiprocess=True)
        send.check_called_once()
        self.assertEqual(UNKNOWN, self.sch.add_task.call_args[1]['status'])
        self.assertFalse(self.sch.add_task.call_args[1]['runnable'])
        self.assertTrue('raise UnpicklableException()' in send.call_args[0][1])

    @mock.patch('luigi.notifications.send_error_email')
    def test_raise_exception_in_requires(self, send):
        self.w.add(ExceptionRequiresTask(), multiprocess=True)
        send.check_called_once()
        self.assertEqual(UNKNOWN, self.sch.add_task.call_args[1]['status'])
        self.assertFalse(self.sch.add_task.call_args[1]['runnable'])
 def test_default_multiprocessing_behavior(self):
     with Worker(worker_processes=1) as worker:
         task = DummyTask("a")
         task_process = worker._create_task_process(task)
         self.assertFalse(task_process.use_multiprocessing)
 def test_force_multiprocessing(self):
     with Worker(worker_processes=1, force_multiprocessing=True) as worker:
         task = DummyTask("a")
         task_process = worker._create_task_process(task)
         self.assertTrue(task_process.use_multiprocessing)
Ejemplo n.º 43
0
    def test_interleaved_workers(self):
        class A(DummyTask):
            pass

        a = A()

        class B(DummyTask):
            def requires(self):
                return a

        class ExternalB(ExternalTask):
            task_family = "B"

            def complete(self):
                return False

        b = B()
        eb = ExternalB()
        self.assertEquals(eb.task_id, "B()")

        sch = CentralPlannerScheduler(retry_delay=100,
                                      remove_delay=1000,
                                      worker_disconnect_delay=10)
        w = Worker(scheduler=sch, worker_id='X')
        w2 = Worker(scheduler=sch, worker_id='Y')

        self.assertTrue(w.add(b))
        self.assertTrue(w2.add(eb))
        logging.debug("RUNNING BROKEN WORKER")
        self.assertTrue(w2.run())
        self.assertFalse(a.complete())
        self.assertFalse(b.complete())
        logging.debug("RUNNING FUNCTIONAL WORKER")
        self.assertTrue(w.run())
        self.assertTrue(a.complete())
        self.assertTrue(b.complete())
        w.stop()
        w2.stop()
Ejemplo n.º 44
0
 def test_no_task_limit(self):
     w = Worker()
     t = ForkBombTask(4, 2)
     w.add(t)
     w.run()
     self.assertTrue(t.complete())
Ejemplo n.º 45
0
class ExceptionFormatTest(unittest.TestCase):
    def setUp(self):
        self.sch = CentralPlannerScheduler()
        self.w = Worker(scheduler=self.sch)

    def tear_down(self):
        self.w.stop()

    def test_fail_run(self):
        task = FailRunTask(foo='foo', bar='bar')
        self._run_task(task)

    def test_fail_run_html(self):
        task = FailRunTask(foo='foo', bar='bar')
        self._run_task_html(task)

    def test_fail_schedule(self):
        task = FailSchedulingTask(foo='foo', bar='bar')
        self._run_task(task)

    def test_fail_schedule_html(self):
        task = FailSchedulingTask(foo='foo', bar='bar')
        self._run_task_html(task)

    @with_config({
        'core': {
            'error-email': '*****@*****.**',
            'email-prefix': '[TEST] '
        }
    })
    @mock.patch('luigi.notifications.send_error_email')
    def _run_task(self, task, mock_send):
        self.w.add(task)
        self.w.run()

        self.assertEqual(mock_send.call_count, 1)
        args, kwargs = mock_send.call_args
        self._check_subject(args[0], task)
        self._check_body(args[1], task, html=False)

    @with_config({
        'core': {
            'error-email': '*****@*****.**',
            'email-prefix': '[TEST] ',
            'email-type': 'html'
        }
    })
    @mock.patch('luigi.notifications.send_error_email')
    def _run_task_html(self, task, mock_send):
        self.w.add(task)
        self.w.run()

        self.assertEqual(mock_send.call_count, 1)
        args, kwargs = mock_send.call_args
        self._check_subject(args[0], task)
        self._check_body(args[1], task, html=True)

    def _check_subject(self, subject, task):
        self.assertIn(task.task_id, subject)

    def _check_body(self, body, task, html=False):
        if html:
            self.assertIn('<th>name</th><td>{}</td>'.format(task.task_family),
                          body)
            self.assertIn('<div class="highlight"', body)
            self.assertIn('Oops!', body)

            for param, value in task.param_kwargs.items():
                self.assertIn('<th>{}</th><td>{}</td>'.format(param, value),
                              body)
        else:
            self.assertIn('Name: {}\n'.format(task.task_family), body)
            self.assertIn('Parameters:\n', body)
            self.assertIn('TestException: Oops!', body)

            for param, value in task.param_kwargs.items():
                self.assertIn('{}: {}\n'.format(param, value), body)

    @with_config({"core": {"error-email": "[email protected]"}})
    def testEmailRecipients(self):
        six.assertCountEqual(self, notifications._email_recipients(),
                             ["[email protected]"])
        six.assertCountEqual(self, notifications._email_recipients("[email protected]"),
                             ["[email protected]", "[email protected]"])
        six.assertCountEqual(
            self, notifications._email_recipients(["[email protected]", "[email protected]"]),
            ["[email protected]", "[email protected]", "[email protected]"])

    @with_config({"core": {}}, replace_sections=True)
    def testEmailRecipientsNoConfig(self):
        six.assertCountEqual(self, notifications._email_recipients(), [])
        six.assertCountEqual(self, notifications._email_recipients("[email protected]"),
                             ["[email protected]"])
        six.assertCountEqual(
            self, notifications._email_recipients(["[email protected]", "[email protected]"]),
            ["[email protected]", "[email protected]"])
Ejemplo n.º 46
0
 def test_task_limit_not_exceeded(self):
     w = Worker()
     t = ForkBombTask(3, 2)
     w.add(t)
     w.run()
     self.assertTrue(t.complete())
Ejemplo n.º 47
0
class ParallelSchedulingTest(unittest.TestCase):
    def setUp(self):
        self.sch = mock.Mock()
        self.w = Worker(scheduler=self.sch, worker_id='x')

    def added_tasks(self, status):
        return [
            kw['task_id'] for args, kw in self.sch.add_task.call_args_list
            if kw['status'] == status
        ]

    def test_multiprocess_scheduling_with_overlapping_dependencies(self):
        self.w.add(OverlappingSelfDependenciesTask(5, 2), True)
        self.assertEqual(15, self.sch.add_task.call_count)
        self.assertEqual(
            set((
                'OverlappingSelfDependenciesTask(n=1, k=1)',
                'OverlappingSelfDependenciesTask(n=2, k=1)',
                'OverlappingSelfDependenciesTask(n=2, k=2)',
                'OverlappingSelfDependenciesTask(n=3, k=1)',
                'OverlappingSelfDependenciesTask(n=3, k=2)',
                'OverlappingSelfDependenciesTask(n=4, k=1)',
                'OverlappingSelfDependenciesTask(n=4, k=2)',
                'OverlappingSelfDependenciesTask(n=5, k=2)',
            )), set(self.added_tasks('PENDING')))
        self.assertEqual(
            set((
                'OverlappingSelfDependenciesTask(n=0, k=0)',
                'OverlappingSelfDependenciesTask(n=0, k=1)',
                'OverlappingSelfDependenciesTask(n=1, k=0)',
                'OverlappingSelfDependenciesTask(n=1, k=2)',
                'OverlappingSelfDependenciesTask(n=2, k=0)',
                'OverlappingSelfDependenciesTask(n=3, k=0)',
                'OverlappingSelfDependenciesTask(n=4, k=0)',
            )), set(self.added_tasks('DONE')))

    @mock.patch('luigi.notifications.send_error_email')
    def test_raise_exception_in_complete(self, send):
        self.w.add(ExceptionCompleteTask(), multiprocess=True)
        send.check_called_once()
        self.assertEqual(0, self.sch.add_task.call_count)
        self.assertTrue('assert False' in send.call_args[0][1])

    @mock.patch('luigi.notifications.send_error_email')
    def test_raise_unpicklable_exception_in_complete(self, send):
        # verify exception can't be pickled
        self.assertRaises(Exception, UnpicklableExceptionTask().complete)
        try:
            UnpicklableExceptionTask().complete()
        except Exception as e:
            ex = e
        self.assertRaises(pickle.PicklingError, pickle.dumps, ex)

        # verify this can run async
        self.w.add(UnpicklableExceptionTask(), multiprocess=True)
        send.check_called_once()
        self.assertEqual(0, self.sch.add_task.call_count)
        self.assertTrue('raise UnpicklableException()' in send.call_args[0][1])

    @mock.patch('luigi.notifications.send_error_email')
    def test_raise_exception_in_requires(self, send):
        self.w.add(ExceptionRequiresTask(), multiprocess=True)
        send.check_called_once()
        self.assertEqual(0, self.sch.add_task.call_count)
Ejemplo n.º 48
0
class MultiprocessWorkerTest(unittest.TestCase):
    def setUp(self):
        self.scheduler = RemoteScheduler()
        self.scheduler.add_worker = Mock()
        self.scheduler.add_task = Mock()
        self.worker = Worker(scheduler=self.scheduler,
                             worker_id='X',
                             worker_processes=2)

    def tearDown(self):
        self.worker.stop()

    def gw_res(self, pending, task_id):
        return dict(n_pending_tasks=pending,
                    task_id=task_id,
                    running_tasks=0,
                    n_unique_pending=0)

    def test_positive_path(self):
        a = DummyTask("a")
        b = DummyTask("b")

        class MultipleRequirementTask(DummyTask):
            def requires(self):
                return [a, b]

        c = MultipleRequirementTask("C")

        self.assertTrue(self.worker.add(c))

        self.scheduler.get_work = Mock(side_effect=[
            self.gw_res(3, str(a)),
            self.gw_res(2, str(b)),
            self.gw_res(1, str(c)),
            self.gw_res(0, None),
            self.gw_res(0, None)
        ])

        self.assertTrue(self.worker.run())
        self.assertTrue(c.has_run)

    def test_path_with_task_failures(self):
        class FailingTask(DummyTask):
            def run(self):
                raise Exception("I am failing")

        a = FailingTask("a")
        b = FailingTask("b")

        class MultipleRequirementTask(DummyTask):
            def requires(self):
                return [a, b]

        c = MultipleRequirementTask("C")

        self.assertTrue(self.worker.add(c))

        self.scheduler.get_work = Mock(side_effect=[
            self.gw_res(3, str(a)),
            self.gw_res(2, str(b)),
            self.gw_res(1, str(c)),
            self.gw_res(0, None),
            self.gw_res(0, None)
        ])

        self.assertFalse(self.worker.run())
Ejemplo n.º 49
0
 def setUp(self):
     # InstanceCache.disable()
     self.sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10)
     self.w = Worker(scheduler=self.sch, worker_id='X')
     self.w2 = Worker(scheduler=self.sch, worker_id='Y')
     self.time = time.time
Ejemplo n.º 50
0
class AssistantTest(unittest.TestCase):
    def setUp(self):
        self.sch = CentralPlannerScheduler(retry_delay=100,
                                           remove_delay=1000,
                                           worker_disconnect_delay=10)
        self.w = Worker(scheduler=self.sch, worker_id='X')
        self.assistant = Worker(scheduler=self.sch,
                                worker_id='Y',
                                assistant=True)

    def test_get_work(self):
        d = Dummy2Task('123')
        self.w.add(d)

        self.assertFalse(d.complete())
        self.assistant.run()
        self.assertTrue(d.complete())

    def test_bad_job_type(self):
        class Dummy3Task(Dummy2Task):
            task_family = 'UnknownTaskFamily'

        d = Dummy3Task('123')
        self.w.add(d)

        self.assertFalse(d.complete())
        self.assertFalse(self.assistant.run())
        self.assertFalse(d.complete())
        self.assertEqual(list(self.sch.task_list('FAILED', '').keys()),
                         [str(d)])

    def test_unimported_job_type(self):
        class NotImportedTask(luigi.Task):
            task_family = 'UnimportedTask'
            task_module = None

        task = NotImportedTask()

        # verify that it can't run the task without the module info necessary to import it
        self.w.add(task)
        self.assertFalse(self.assistant.run())
        self.assertEqual(list(self.sch.task_list('FAILED', '').keys()),
                         ['UnimportedTask()'])

        # check that it can import with the right module
        task.task_module = 'dummy_test_module.not_imported'
        self.w.add(task)
        self.assertTrue(self.assistant.run())
        self.assertEqual(list(self.sch.task_list('DONE', '').keys()),
                         ['UnimportedTask()'])
Ejemplo n.º 51
0
class WorkerTest(unittest.TestCase):
    def setUp(self):
        # InstanceCache.disable()
        self.sch = CentralPlannerScheduler(retry_delay=100,
                                           remove_delay=1000,
                                           worker_disconnect_delay=10)
        self.w = Worker(scheduler=self.sch, worker_id='X')
        self.w2 = Worker(scheduler=self.sch, worker_id='Y')
        self.time = time.time

    def tearDown(self):
        if time.time != self.time:
            time.time = self.time

    def setTime(self, t):
        time.time = lambda: t

    def test_dep(self):
        class A(Task):
            def run(self):
                self.has_run = True

            def complete(self):
                return self.has_run

        a = A()

        class B(Task):
            def requires(self):
                return a

            def run(self):
                self.has_run = True

            def complete(self):
                return self.has_run

        b = B()
        a.has_run = False
        b.has_run = False

        self.w.add(b)
        self.w.run()
        self.assertTrue(a.has_run)
        self.assertTrue(b.has_run)

    def test_external_dep(self):
        class A(ExternalTask):
            def complete(self):
                return False

        a = A()

        class B(Task):
            def requires(self):
                return a

            def run(self):
                self.has_run = True

            def complete(self):
                return self.has_run

        b = B()

        a.has_run = False
        b.has_run = False

        self.w.add(b)
        self.w.run()

        self.assertFalse(a.has_run)
        self.assertFalse(b.has_run)

    def test_fail(self):
        class A(Task):
            def run(self):
                self.has_run = True
                raise Exception()

            def complete(self):
                return self.has_run

        a = A()

        class B(Task):
            def requires(self):
                return a

            def run(self):
                self.has_run = True

            def complete(self):
                return self.has_run

        b = B()

        a.has_run = False
        b.has_run = False

        self.w.add(b)
        self.w.run()

        self.assertTrue(a.has_run)
        self.assertFalse(b.has_run)

    def test_unknown_dep(self):
        # see central_planner_test.CentralPlannerTest.test_remove_dep
        class A(ExternalTask):
            def complete(self):
                return False

        class C(Task):
            def complete(self):
                return True

        def get_b(dep):
            class B(Task):
                def requires(self):
                    return dep

                def run(self):
                    self.has_run = True

                def complete(self):
                    return False

            b = B()
            b.has_run = False
            return b

        b_a = get_b(A())
        b_c = get_b(C())

        self.w.add(b_a)
        # So now another worker goes in and schedules C -> B
        # This should remove the dep A -> B but will screw up the first worker
        self.w2.add(b_c)

        self.w.run(
        )  # should not run anything - the worker should detect that A is broken
        self.assertFalse(b_a.has_run)
        # not sure what should happen??
        # self.w2.run() # should run B since C is fulfilled
        # self.assertTrue(b_c.has_run)

    def test_interleaved_workers(self):
        class A(DummyTask):
            pass

        a = A()

        class B(DummyTask):
            def requires(self):
                return a

        class ExternalB(ExternalTask):
            task_family = "B"

            def complete(self):
                return False

        b = B()
        eb = ExternalB()
        self.assertEquals(eb.task_id, "B()")

        sch = CentralPlannerScheduler(retry_delay=100,
                                      remove_delay=1000,
                                      worker_disconnect_delay=10)
        w = Worker(scheduler=sch, worker_id='X')
        w2 = Worker(scheduler=sch, worker_id='Y')

        w.add(b)
        w2.add(eb)
        logging.debug("RUNNING BROKEN WORKER")
        w2.run()
        self.assertFalse(a.complete())
        self.assertFalse(b.complete())
        logging.debug("RUNNING FUNCTIONAL WORKER")
        w.run()
        self.assertTrue(a.complete())
        self.assertTrue(b.complete())

    def test_interleaved_workers2(self):
        # two tasks without dependencies, one external, one not
        class B(DummyTask):
            pass

        class ExternalB(ExternalTask):
            task_family = "B"

            def complete(self):
                return False

        b = B()
        eb = ExternalB()

        self.assertEquals(eb.task_id, "B()")

        sch = CentralPlannerScheduler(retry_delay=100,
                                      remove_delay=1000,
                                      worker_disconnect_delay=10)
        w = Worker(scheduler=sch, worker_id='X')
        w2 = Worker(scheduler=sch, worker_id='Y')

        w2.add(eb)
        w.add(b)

        w2.run()
        self.assertFalse(b.complete())
        w.run()
        self.assertTrue(b.complete())
Ejemplo n.º 52
0
 def setUp(self):
     self.sch = CentralPlannerScheduler(retry_delay=100, remove_delay=1000, worker_disconnect_delay=10)
     self.w = Worker(scheduler=self.sch, worker_id='X')
     self.assistant = Worker(scheduler=self.sch, worker_id='Y', assistant=True)
Ejemplo n.º 53
0
    def test_interleaved_workers3(self):
        class A(DummyTask):
            def run(self):
                logging.debug('running A')
                time.sleep(0.1)
                super(A, self).run()

        a = A()

        class B(DummyTask):
            def requires(self):
                return a

            def run(self):
                logging.debug('running B')
                super(B, self).run()

        b = B()

        sch = CentralPlannerScheduler(retry_delay=100,
                                      remove_delay=1000,
                                      worker_disconnect_delay=10)

        w = Worker(scheduler=sch, worker_id='X', keep_alive=True)
        w2 = Worker(scheduler=sch,
                    worker_id='Y',
                    keep_alive=True,
                    wait_interval=0.1)

        self.assertTrue(w.add(a))
        self.assertTrue(w2.add(b))

        threading.Thread(target=w.run).start()
        self.assertTrue(w2.run())

        self.assertTrue(a.complete())
        self.assertTrue(b.complete())

        w.stop()
        w2.stop()
Ejemplo n.º 54
0
class WorkerEmailTest(unittest.TestCase):
    def setUp(self):
        super(WorkerEmailTest, self).setUp()
        sch = CentralPlannerScheduler(retry_delay=100,
                                      remove_delay=1000,
                                      worker_disconnect_delay=10)
        self.worker = Worker(scheduler=sch, worker_id="foo")

    def tearDown(self):
        self.worker.stop()

    @email_patch
    def test_connection_error(self, emails):
        sch = RemoteScheduler('http://tld.invalid:1337', connect_timeout=1)
        worker = Worker(scheduler=sch)

        self.waits = 0

        def dummy_wait():
            self.waits += 1

        sch._wait = dummy_wait

        class A(DummyTask):
            pass

        a = A()
        self.assertEqual(emails, [])
        worker.add(a)
        self.assertEqual(self.waits, 2)  # should attempt to add it 3 times
        self.assertNotEquals(emails, [])
        self.assertTrue(
            emails[0].find("Luigi: Framework error while scheduling %s" %
                           (a, )) != -1)
        worker.stop()

    @email_patch
    def test_complete_error(self, emails):
        class A(DummyTask):
            def complete(self):
                raise Exception("b0rk")

        a = A()
        self.assertEqual(emails, [])
        self.worker.add(a)
        self.assertTrue(emails[0].find("Luigi: %s failed scheduling" %
                                       (a, )) != -1)
        self.worker.run()
        self.assertTrue(emails[0].find("Luigi: %s failed scheduling" %
                                       (a, )) != -1)
        self.assertFalse(a.has_run)

    @email_patch
    def test_complete_return_value(self, emails):
        class A(DummyTask):
            def complete(self):
                pass  # no return value should be an error

        a = A()
        self.assertEqual(emails, [])
        self.worker.add(a)
        self.assertTrue(emails[0].find("Luigi: %s failed scheduling" %
                                       (a, )) != -1)
        self.worker.run()
        self.assertTrue(emails[0].find("Luigi: %s failed scheduling" %
                                       (a, )) != -1)
        self.assertFalse(a.has_run)

    @email_patch
    def test_run_error(self, emails):
        class A(luigi.Task):
            def complete(self):
                return False

            def run(self):
                raise Exception("b0rk")

        a = A()
        self.worker.add(a)
        self.assertEqual(emails, [])
        self.worker.run()
        self.assertTrue(emails[0].find("Luigi: %s FAILED" % (a, )) != -1)

    @email_patch
    def test_no_error(self, emails):
        class A(DummyTask):
            pass

        a = A()
        self.assertEqual(emails, [])
        self.worker.add(a)
        self.assertEqual(emails, [])
        self.worker.run()
        self.assertEqual(emails, [])
        self.assertTrue(a.complete())
Ejemplo n.º 55
0
 def test_ping_thread_shutdown(self):
     with Worker(ping_interval=0.01) as w:
         self.assertTrue(w._keep_alive_thread.is_alive())
     self.assertFalse(w._keep_alive_thread.is_alive())
Ejemplo n.º 56
0
 def test_ping_thread_shutdown(self):
     w = Worker(ping_interval=0.01)
     self.assertTrue(w._keep_alive_thread.is_alive())
     w.stop()  # should stop within 0.01 s
     self.assertFalse(w._keep_alive_thread.is_alive())
Ejemplo n.º 57
0
class WorkerTest(unittest.TestCase):
    def setUp(self):
        # InstanceCache.disable()
        self.sch = CentralPlannerScheduler(retry_delay=100,
                                           remove_delay=1000,
                                           worker_disconnect_delay=10)
        self.w = Worker(scheduler=self.sch, worker_id='X')
        self.w2 = Worker(scheduler=self.sch, worker_id='Y')
        self.time = time.time

    def tearDown(self):
        if time.time != self.time:
            time.time = self.time
        self.w.stop()
        self.w2.stop()

    def setTime(self, t):
        time.time = lambda: t

    def test_dep(self):
        class A(Task):
            def run(self):
                self.has_run = True

            def complete(self):
                return self.has_run

        a = A()

        class B(Task):
            def requires(self):
                return a

            def run(self):
                self.has_run = True

            def complete(self):
                return self.has_run

        b = B()
        a.has_run = False
        b.has_run = False

        self.assertTrue(self.w.add(b))
        self.assertTrue(self.w.run())
        self.assertTrue(a.has_run)
        self.assertTrue(b.has_run)

    def test_external_dep(self):
        class A(ExternalTask):
            def complete(self):
                return False

        a = A()

        class B(Task):
            def requires(self):
                return a

            def run(self):
                self.has_run = True

            def complete(self):
                return self.has_run

        b = B()

        a.has_run = False
        b.has_run = False

        self.assertTrue(self.w.add(b))
        self.assertTrue(self.w.run())

        self.assertFalse(a.has_run)
        self.assertFalse(b.has_run)

    def test_fail(self):
        class A(Task):
            def run(self):
                self.has_run = True
                raise Exception()

            def complete(self):
                return self.has_run

        a = A()

        class B(Task):
            def requires(self):
                return a

            def run(self):
                self.has_run = True

            def complete(self):
                return self.has_run

        b = B()

        a.has_run = False
        b.has_run = False

        self.assertTrue(self.w.add(b))
        self.assertFalse(self.w.run())

        self.assertTrue(a.has_run)
        self.assertFalse(b.has_run)

    def test_unknown_dep(self):
        # see central_planner_test.CentralPlannerTest.test_remove_dep
        class A(ExternalTask):
            def complete(self):
                return False

        class C(Task):
            def complete(self):
                return True

        def get_b(dep):
            class B(Task):
                def requires(self):
                    return dep

                def run(self):
                    self.has_run = True

                def complete(self):
                    return False

            b = B()
            b.has_run = False
            return b

        b_a = get_b(A())
        b_c = get_b(C())

        self.assertTrue(self.w.add(b_a))
        # So now another worker goes in and schedules C -> B
        # This should remove the dep A -> B but will screw up the first worker
        self.assertTrue(self.w2.add(b_c))

        self.assertFalse(
            self.w.run()
        )  # should not run anything - the worker should detect that A is broken
        self.assertFalse(b_a.has_run)
        # not sure what should happen??
        # self.w2.run() # should run B since C is fulfilled
        # self.assertTrue(b_c.has_run)

    def test_unfulfilled_dep(self):
        class A(Task):
            def complete(self):
                return self.done

            def run(self):
                self.done = True

        def get_b(a):
            class B(A):
                def requires(self):
                    return a

            b = B()
            b.done = False
            a.done = True
            return b

        a = A()
        b = get_b(a)

        self.assertTrue(self.w.add(b))
        a.done = False
        self.w.run()
        self.assertTrue(a.complete())
        self.assertTrue(b.complete())

    def test_avoid_infinite_reschedule(self):
        class A(Task):
            def complete(self):
                return False

        class B(Task):
            def complete(self):
                return False

            def requires(self):
                return A()

        self.assertTrue(self.w.add(B()))
        self.assertFalse(self.w.run())

    def test_interleaved_workers(self):
        class A(DummyTask):
            pass

        a = A()

        class B(DummyTask):
            def requires(self):
                return a

        class ExternalB(ExternalTask):
            task_family = "B"

            def complete(self):
                return False

        b = B()
        eb = ExternalB()
        self.assertEquals(eb.task_id, "B()")

        sch = CentralPlannerScheduler(retry_delay=100,
                                      remove_delay=1000,
                                      worker_disconnect_delay=10)
        w = Worker(scheduler=sch, worker_id='X')
        w2 = Worker(scheduler=sch, worker_id='Y')

        self.assertTrue(w.add(b))
        self.assertTrue(w2.add(eb))
        logging.debug("RUNNING BROKEN WORKER")
        self.assertTrue(w2.run())
        self.assertFalse(a.complete())
        self.assertFalse(b.complete())
        logging.debug("RUNNING FUNCTIONAL WORKER")
        self.assertTrue(w.run())
        self.assertTrue(a.complete())
        self.assertTrue(b.complete())
        w.stop()
        w2.stop()

    def test_interleaved_workers2(self):
        # two tasks without dependencies, one external, one not
        class B(DummyTask):
            pass

        class ExternalB(ExternalTask):
            task_family = "B"

            def complete(self):
                return False

        b = B()
        eb = ExternalB()

        self.assertEquals(eb.task_id, "B()")

        sch = CentralPlannerScheduler(retry_delay=100,
                                      remove_delay=1000,
                                      worker_disconnect_delay=10)
        w = Worker(scheduler=sch, worker_id='X')
        w2 = Worker(scheduler=sch, worker_id='Y')

        self.assertTrue(w2.add(eb))
        self.assertTrue(w.add(b))

        self.assertTrue(w2.run())
        self.assertFalse(b.complete())
        self.assertTrue(w.run())
        self.assertTrue(b.complete())
        w.stop()
        w2.stop()

    def test_interleaved_workers3(self):
        class A(DummyTask):
            def run(self):
                logging.debug('running A')
                time.sleep(0.1)
                super(A, self).run()

        a = A()

        class B(DummyTask):
            def requires(self):
                return a

            def run(self):
                logging.debug('running B')
                super(B, self).run()

        b = B()

        sch = CentralPlannerScheduler(retry_delay=100,
                                      remove_delay=1000,
                                      worker_disconnect_delay=10)

        w = Worker(scheduler=sch, worker_id='X', keep_alive=True)
        w2 = Worker(scheduler=sch,
                    worker_id='Y',
                    keep_alive=True,
                    wait_interval=0.1)

        self.assertTrue(w.add(a))
        self.assertTrue(w2.add(b))

        threading.Thread(target=w.run).start()
        self.assertTrue(w2.run())

        self.assertTrue(a.complete())
        self.assertTrue(b.complete())

        w.stop()
        w2.stop()

    def test_complete_exception(self):
        "Tests that a task is still scheduled if its sister task crashes in the complete() method"

        class A(DummyTask):
            def complete(self):
                raise Exception("doh")

        a = A()

        class C(DummyTask):
            pass

        c = C()

        class B(DummyTask):
            def requires(self):
                return a, c

        b = B()
        sch = CentralPlannerScheduler(retry_delay=100,
                                      remove_delay=1000,
                                      worker_disconnect_delay=10)
        w = Worker(scheduler=sch, worker_id="foo")
        self.assertFalse(w.add(b))
        self.assertTrue(w.run())
        self.assertFalse(b.has_run)
        self.assertTrue(c.has_run)
        self.assertFalse(a.has_run)
        w.stop()

    def test_requires_exception(self):
        class A(DummyTask):
            def requires(self):
                raise Exception("doh")

        a = A()

        class C(DummyTask):
            pass

        c = C()

        class B(DummyTask):
            def requires(self):
                return a, c

        b = B()
        sch = CentralPlannerScheduler(retry_delay=100,
                                      remove_delay=1000,
                                      worker_disconnect_delay=10)
        w = Worker(scheduler=sch, worker_id="foo")
        self.assertFalse(w.add(b))
        self.assertTrue(w.run())
        self.assertFalse(b.has_run)
        self.assertTrue(c.has_run)
        self.assertFalse(a.has_run)
        w.stop()