Beispiel #1
0
    def test_job_order(self):
        from treetl import Job, JobRunner, JOB_STATUS

        job_tree = JobRunner(self.jobs).run()

        self.order_checks(self.actual_execution_order)
        self.assertTrue(len(self.actual_execution_order) == len(self.jobs), msg='Some job transformed twice.')

        # add jobs that will fail and dependents that won't run as a result
        # clear execution order to start over again
        self.actual_execution_order = []

        job_tree.reset_jobs()
        job_tree.add_jobs(self.faulty_jobs)

        for failure_child in [ 'VictimJob', 'OtherVictimJob' ]:
            self.assertNotIn(
                member=failure_child,
                container=self.actual_execution_order,
                msg='Child of faulty, failed job was executed.'
            )

        self.assertTrue(job_tree.run().status == JOB_STATUS.FAILED, msg='Job failure not recorded in status')
        self.assertItemsEqual(
            expected_seq=self.faulty_jobs,
            actual_seq=job_tree.failed_jobs(),
            msg='Not all faulty jobs were labeled as failed.'
        )

        self.assertItemsEqual(
            expected_seq=[ self.faulty_jobs[0] ],
            actual_seq=job_tree.failed_job_roots(),
            msg='Root failure not correctly identified.'
        )

        failed_root_paths_dict = job_tree.failed_job_root_paths()
        self.assertTrue(len(failed_root_paths_dict) == 1, msg='Too many failure roots.')
        self.assertItemsEqual(
            expected_seq=[ self.faulty_jobs[0] ],
            actual_seq=failed_root_paths_dict.keys(),
            msg='Incorrect failed root in { failed_root: paths_to_failed_root }'
        )
        self.assertItemsEqual(
            expected_seq=[
                [ 'JobC', 'JobF', 'FaultyJob' ],
                [ 'JobB', 'JobF', 'FaultyJob' ]
            ],
            actual_seq=[
                [ job.__class__.__name__ for job in path ]
                for path in failed_root_paths_dict[self.faulty_jobs[0]]
            ],
            msg='Incorrect paths to root failure.'
        )
Beispiel #2
0
class TestJobException(unittest.TestCase):

    def setUp(self):
        from treetl import Job, JobRunner

        class JobA(Job):
            def extract(self, **kwarg):
                raise ValueError()

        class JobB(Job):
            pass

        @Job.dependency(job_a=JobA, job_b=JobB)
        class JobC(Job):
            pass

        self.runner = JobRunner([ JobA(), JobB(), JobC() ])

    def test_exceptions(self):
        from treetl.job import JobException, ParentJobException

        self.runner.run()

        # run failed
        self.assertEqual(self.runner.run().status, 3)
        job_res = self.runner.job_results(submitted_only=True)

        # JobA is status 3 (Failed) and has JobException
        self.assertTrue(job_res[0].status == 3)
        self.assertIsInstance(job_res[0].error, JobException)

        # JobB may have been called first.
        # So it's either in the queue or done, but def no error
        self.assertTrue(job_res[1].status in [0, 2])
        self.assertTrue(job_res[1].error is None)

        # JobC has an error, but to no fault of its own
        # this one errored out bc parent job JobA had an error
        self.assertTrue(job_res[2].status == 3)
        self.assertIsInstance(job_res[2].error, ParentJobException)
Beispiel #3
0
class TestJobException(unittest.TestCase):
    def setUp(self):
        from treetl import Job, JobRunner

        class JobA(Job):
            def extract(self, **kwarg):
                raise ValueError()

        class JobB(Job):
            pass

        @Job.dependency(job_a=JobA, job_b=JobB)
        class JobC(Job):
            pass

        self.runner = JobRunner([JobA(), JobB(), JobC()])

    def test_exceptions(self):
        from treetl.job import JobException, ParentJobException

        self.runner.run()

        # run failed
        self.assertEqual(self.runner.run().status, 3)
        job_res = self.runner.job_results(submitted_only=True)

        # JobA is status 3 (Failed) and has JobException
        self.assertTrue(job_res[0].status == 3)
        self.assertIsInstance(job_res[0].error, JobException)

        # JobB may have been called first.
        # So it's either in the queue or done, but def no error
        self.assertTrue(job_res[1].status in [0, 2])
        self.assertTrue(job_res[1].error is None)

        # JobC has an error, but to no fault of its own
        # this one errored out bc parent job JobA had an error
        self.assertTrue(job_res[2].status == 3)
        self.assertIsInstance(job_res[2].error, ParentJobException)
Beispiel #4
0
    def test_job_order(self):
        from treetl import Job, JobRunner, JOB_STATUS

        job_tree = JobRunner(self.jobs).run()

        self.order_checks(self.actual_execution_order)
        self.assertTrue(len(self.actual_execution_order) == len(self.jobs),
                        msg='Some job transformed twice.')

        # add jobs that will fail and dependents that won't run as a result
        # clear execution order to start over again
        self.actual_execution_order = []

        job_tree.reset_jobs()
        job_tree.add_jobs(self.faulty_jobs)

        for failure_child in ['VictimJob', 'OtherVictimJob']:
            self.assertNotIn(member=failure_child,
                             container=self.actual_execution_order,
                             msg='Child of faulty, failed job was executed.')

        self.assertTrue(job_tree.run().status == JOB_STATUS.FAILED,
                        msg='Job failure not recorded in status')
        self.assertItemsEqual(
            expected_seq=self.faulty_jobs,
            actual_seq=job_tree.failed_jobs(),
            msg='Not all faulty jobs were labeled as failed.')

        self.assertItemsEqual(expected_seq=[self.faulty_jobs[0]],
                              actual_seq=job_tree.failed_job_roots(),
                              msg='Root failure not correctly identified.')

        failed_root_paths_dict = job_tree.failed_job_root_paths()
        self.assertTrue(len(failed_root_paths_dict) == 1,
                        msg='Too many failure roots.')
        self.assertItemsEqual(
            expected_seq=[self.faulty_jobs[0]],
            actual_seq=failed_root_paths_dict.keys(),
            msg='Incorrect failed root in { failed_root: paths_to_failed_root }'
        )
        self.assertItemsEqual(
            expected_seq=[['JobC', 'JobF', 'FaultyJob'],
                          ['JobB', 'JobF', 'FaultyJob']],
            actual_seq=[[job.__class__.__name__ for job in path]
                        for path in failed_root_paths_dict[self.faulty_jobs[0]]
                        ],
            msg='Incorrect paths to root failure.')
Beispiel #5
0
class TestLogging(unittest.TestCase):
    def set_up_handler(self):
        from treetl.tools.testing import MockLoggingHandler

        self.handler = MockLoggingHandler(level='DEBUG')
        logger.addHandler(self.handler)

    def set_up_jobs(self):
        from treetl import Job, JobRunner

        class JobA(Job):
            pass

        class JobB(Job):
            pass

        @Job.dependency(a=JobA, b=JobB)
        class JobC(Job):
            def transform(self, **kwargs):
                raise ValueError()

        @Job.dependency(c=JobC)
        class JobD(Job):
            def load(self, **kwargs):
                # will never get here
                raise ValueError()

        self.runner = JobRunner([ JobC(), JobD() ])

    def setUp(self):
        self.set_up_handler()
        self.set_up_jobs()

    def jobs_and_parents_were_added(self, debug_msgs):
        self.assertIn('JobRunner: Adding job JobC with 2 parent(s)', debug_msgs, msg='debug msg err: JobC add')
        self.assertIn('JobRunner: Adding job JobD with 1 parent(s)', debug_msgs, msg='debug msg err: JobD add')

    def successful_steps_were_in_debug(self, debug_msgs):
        # make sure all successful job steps made it to debug
        # JobA and JobB should run fully, JobC extract only, JobD not at all
        for m in [
            'JobA.extract()', 'JobA.transform()', 'JobA.load()',
            'JobB.extract()', 'JobB.transform()', 'JobB.load()', 'JobC.extract()'
        ]:
            self.assertIn(m, debug_msgs, msg='debug msg err: ' + m)

    def cached_jobs_in_debug(self, debug_msgs):
        # jobs A and B should be cached since they'll be called upon by C
        for j in ['A', 'B']:
            self.assertIn(
                'Job{}.cache() | children:1'.format(j),
                debug_msgs,
                msg='debug msg err: Job{}.cache()'.format(j)
            )

    def correct_transform_signature_in_debug(self, debug_msgs):
        err_msg = 'debug msg err: JobC transform signature is incorrect or missing from debug'
        self.assertIn('JobC.transform(a=None, b=None)', debug_msgs, msg=err_msg)

    def job_runner_job_status(self, info_msgs):
        err_msg = 'info msg err: did not start with RUNNING status'
        self.assertEqual('JobRunner: JOB_STATUS.RUNNING', info_msgs[0], msg=err_msg)

        err_msg = 'info msg err: did not end with FAILED status'
        self.assertEqual('JobRunner: JOB_STATUS.FAILED', info_msgs[-1], msg=err_msg)

    def check_info_for_job_start_attempts(self, info_msgs):
        err_msg = 'info msg err: Failed to note start of Job'
        # should start and try to run jobs A, B and C.
        # D should be skipped since C fails
        for j in ['A', 'B', 'C']:
            self.assertIn('JobRunner: Running Job' + j, info_msgs, msg=err_msg + j)

    def check_info_for_completed_jobs(self, info_msgs):
        err_msg = 'info msg err: Failed to note competion of Job'
        # jobs A and B should finish.
        for j in ['A', 'B']:
            self.assertIn('JobRunner: Completed Job' + j, info_msgs, msg=err_msg + j)

    def info_has_skipped_job(self, info_msgs):
        self.assertIn(
            'JobRunner: Skipped JobD due to failure in parent JobC',
            info_msgs,
            msg='info msg err: failed to note skipped JobD due to JobC failure'
        )

    def error_has_job_failure(self, err_msgs):
        self.assertIn(
            'JobRunner: Error on JobC',
            err_msgs,
            msg='err msg err: failed to note JobC failure'
        )

    def test_job_logging(self):
        self.runner.run()

        debug_msgs = self.handler.messages['debug']
        self.jobs_and_parents_were_added(debug_msgs)
        self.successful_steps_were_in_debug(debug_msgs)
        self.cached_jobs_in_debug(debug_msgs)

        info_msgs = self.handler.messages['info']
        self.job_runner_job_status(info_msgs)
        self.check_info_for_job_start_attempts(info_msgs)
        self.check_info_for_completed_jobs(info_msgs)

        self.error_has_job_failure(self.handler.messages['error'])

    def tearDown(self):
        logger.removeHandler(self.handler)