예제 #1
0
    def setUp(self):
        from treetl import Job, JobRunner

        class JobA(Job):
            def extract(self, **kwarg):
                raise ValueError()

        class JobB(Job):
            pass

        @Job.dependency(job_a=JobA, job_b=JobB)
        class JobC(Job):
            pass

        self.runner = JobRunner([JobA(), JobB(), JobC()])
예제 #2
0
    def test_job_order(self):
        from treetl import Job, JobRunner, JOB_STATUS

        job_tree = JobRunner(self.jobs).run()

        self.order_checks(self.actual_execution_order)
        self.assertTrue(len(self.actual_execution_order) == len(self.jobs), msg='Some job transformed twice.')

        # add jobs that will fail and dependents that won't run as a result
        # clear execution order to start over again
        self.actual_execution_order = []

        job_tree.reset_jobs()
        job_tree.add_jobs(self.faulty_jobs)

        for failure_child in [ 'VictimJob', 'OtherVictimJob' ]:
            self.assertNotIn(
                member=failure_child,
                container=self.actual_execution_order,
                msg='Child of faulty, failed job was executed.'
            )

        self.assertTrue(job_tree.run().status == JOB_STATUS.FAILED, msg='Job failure not recorded in status')
        self.assertItemsEqual(
            expected_seq=self.faulty_jobs,
            actual_seq=job_tree.failed_jobs(),
            msg='Not all faulty jobs were labeled as failed.'
        )

        self.assertItemsEqual(
            expected_seq=[ self.faulty_jobs[0] ],
            actual_seq=job_tree.failed_job_roots(),
            msg='Root failure not correctly identified.'
        )

        failed_root_paths_dict = job_tree.failed_job_root_paths()
        self.assertTrue(len(failed_root_paths_dict) == 1, msg='Too many failure roots.')
        self.assertItemsEqual(
            expected_seq=[ self.faulty_jobs[0] ],
            actual_seq=failed_root_paths_dict.keys(),
            msg='Incorrect failed root in { failed_root: paths_to_failed_root }'
        )
        self.assertItemsEqual(
            expected_seq=[
                [ 'JobC', 'JobF', 'FaultyJob' ],
                [ 'JobB', 'JobF', 'FaultyJob' ]
            ],
            actual_seq=[
                [ job.__class__.__name__ for job in path ]
                for path in failed_root_paths_dict[self.faulty_jobs[0]]
            ],
            msg='Incorrect paths to root failure.'
        )
예제 #3
0
    def test_parent_data_params(self):
        from treetl import JobRunner

        JobRunner(self.jobs).run()
        self.assertDictEqual(d1=self.expected_results,
                             d2=self.actual_results,
                             msg='Error in transformed data loaded to dict')
예제 #4
0
    def test_job_order(self):
        from treetl import Job, JobRunner, JOB_STATUS

        job_tree = JobRunner(self.jobs).run()

        self.order_checks(self.actual_execution_order)
        self.assertTrue(len(self.actual_execution_order) == len(self.jobs),
                        msg='Some job transformed twice.')

        # add jobs that will fail and dependents that won't run as a result
        # clear execution order to start over again
        self.actual_execution_order = []

        job_tree.reset_jobs()
        job_tree.add_jobs(self.faulty_jobs)

        for failure_child in ['VictimJob', 'OtherVictimJob']:
            self.assertNotIn(member=failure_child,
                             container=self.actual_execution_order,
                             msg='Child of faulty, failed job was executed.')

        self.assertTrue(job_tree.run().status == JOB_STATUS.FAILED,
                        msg='Job failure not recorded in status')
        self.assertItemsEqual(
            expected_seq=self.faulty_jobs,
            actual_seq=job_tree.failed_jobs(),
            msg='Not all faulty jobs were labeled as failed.')

        self.assertItemsEqual(expected_seq=[self.faulty_jobs[0]],
                              actual_seq=job_tree.failed_job_roots(),
                              msg='Root failure not correctly identified.')

        failed_root_paths_dict = job_tree.failed_job_root_paths()
        self.assertTrue(len(failed_root_paths_dict) == 1,
                        msg='Too many failure roots.')
        self.assertItemsEqual(
            expected_seq=[self.faulty_jobs[0]],
            actual_seq=failed_root_paths_dict.keys(),
            msg='Incorrect failed root in { failed_root: paths_to_failed_root }'
        )
        self.assertItemsEqual(
            expected_seq=[['JobC', 'JobF', 'FaultyJob'],
                          ['JobB', 'JobF', 'FaultyJob']],
            actual_seq=[[job.__class__.__name__ for job in path]
                        for path in failed_root_paths_dict[self.faulty_jobs[0]]
                        ],
            msg='Incorrect paths to root failure.')
예제 #5
0
    def test_dyn_jobs(self):
        from treetl import Job

        def make_job(**kwargs):
            return Job.create('DynJob',
                              extract=self.extract,
                              transform=self.transform,
                              load=self.load,
                              cache=self.cache,
                              uncache=self.uncache,
                              **kwargs)

        def run_test(inc, test_type, job):
            self.assertEqual(
                job.extracted_data,
                self.extract_ans,
                msg='{}: Incorrect extracted data'.format(test_type))
            self.assertEqual(
                job.transformed_data,
                self.transformed_ans + inc,
                msg='{}: Incorrect transformed data'.format(test_type))

            self.assertEqual(
                self.load_ans_dest,
                self.load_ans + inc,
                msg='{}: Incorrect load answer data'.format(test_type))

            self.assertEqual(
                self.cache_ans_dest,
                self.cache_ans + inc,
                msg='{}: Incorrect cache answer data'.format(test_type))

            self.assertEqual(
                self.uncache_ans_dest,
                self.uncache_ans + inc,
                msg='{}: Incorrect uncache answer data'.format(test_type))

        # run without dependent jobs
        job = make_job()().extract().transform().load().cache().uncache()
        run_test(0, 'No Parent Job', job)

        # test job dependency
        class Parent(Job):
            def transform(self, **kwargs):
                self.transformed_data = 3
                return self

        # run with dependent jobs
        from treetl import JobRunner

        dyn_job = make_job(parent_data=Parent)()
        JobRunner(jobs=[dyn_job]).run()
        dyn_job.cache().uncache(
        )  # only one parent, so won't be called by JobRunner
        run_test(3, 'With Parent Job', dyn_job)
예제 #6
0
    def set_up_jobs(self):
        from treetl import Job, JobRunner

        class JobA(Job):
            pass

        class JobB(Job):
            pass

        @Job.dependency(a=JobA, b=JobB)
        class JobC(Job):
            def transform(self, **kwargs):
                raise ValueError()

        @Job.dependency(c=JobC)
        class JobD(Job):
            def load(self, **kwargs):
                # will never get here
                raise ValueError()

        self.runner = JobRunner([ JobC(), JobD() ])
예제 #7
0
class TestJobException(unittest.TestCase):

    def setUp(self):
        from treetl import Job, JobRunner

        class JobA(Job):
            def extract(self, **kwarg):
                raise ValueError()

        class JobB(Job):
            pass

        @Job.dependency(job_a=JobA, job_b=JobB)
        class JobC(Job):
            pass

        self.runner = JobRunner([ JobA(), JobB(), JobC() ])

    def test_exceptions(self):
        from treetl.job import JobException, ParentJobException

        self.runner.run()

        # run failed
        self.assertEqual(self.runner.run().status, 3)
        job_res = self.runner.job_results(submitted_only=True)

        # JobA is status 3 (Failed) and has JobException
        self.assertTrue(job_res[0].status == 3)
        self.assertIsInstance(job_res[0].error, JobException)

        # JobB may have been called first.
        # So it's either in the queue or done, but def no error
        self.assertTrue(job_res[1].status in [0, 2])
        self.assertTrue(job_res[1].error is None)

        # JobC has an error, but to no fault of its own
        # this one errored out bc parent job JobA had an error
        self.assertTrue(job_res[2].status == 3)
        self.assertIsInstance(job_res[2].error, ParentJobException)
예제 #8
0
class TestJobException(unittest.TestCase):
    def setUp(self):
        from treetl import Job, JobRunner

        class JobA(Job):
            def extract(self, **kwarg):
                raise ValueError()

        class JobB(Job):
            pass

        @Job.dependency(job_a=JobA, job_b=JobB)
        class JobC(Job):
            pass

        self.runner = JobRunner([JobA(), JobB(), JobC()])

    def test_exceptions(self):
        from treetl.job import JobException, ParentJobException

        self.runner.run()

        # run failed
        self.assertEqual(self.runner.run().status, 3)
        job_res = self.runner.job_results(submitted_only=True)

        # JobA is status 3 (Failed) and has JobException
        self.assertTrue(job_res[0].status == 3)
        self.assertIsInstance(job_res[0].error, JobException)

        # JobB may have been called first.
        # So it's either in the queue or done, but def no error
        self.assertTrue(job_res[1].status in [0, 2])
        self.assertTrue(job_res[1].error is None)

        # JobC has an error, but to no fault of its own
        # this one errored out bc parent job JobA had an error
        self.assertTrue(job_res[2].status == 3)
        self.assertIsInstance(job_res[2].error, ParentJobException)
예제 #9
0
    def test_job_caching(self):
        from treetl import JobRunner

        JobRunner(self.jobs).run()

        # check order of calls including cache calls
        # make sure no cache calls were made on nodes without children
        self.check_event_order()

        # checks 2 things
        #   1. ensure single transform call on root node
        #   2. also (by way of adding JobA second) implicitly checks that the implicitly created
        #      parent JobA gets replaced with the explicitly created
        self.assertTrue(self.jobs[1].transformed_data == 1,
                        msg='JobA transformed more than once')
예제 #10
0
    def setUp(self):
        from treetl import Job, JobRunner

        class JobA(Job):
            def extract(self, **kwarg):
                raise ValueError()

        class JobB(Job):
            pass

        @Job.dependency(job_a=JobA, job_b=JobB)
        class JobC(Job):
            pass

        self.runner = JobRunner([ JobA(), JobB(), JobC() ])
예제 #11
0
class TestLogging(unittest.TestCase):
    def set_up_handler(self):
        from treetl.tools.testing import MockLoggingHandler

        self.handler = MockLoggingHandler(level='DEBUG')
        logger.addHandler(self.handler)

    def set_up_jobs(self):
        from treetl import Job, JobRunner

        class JobA(Job):
            pass

        class JobB(Job):
            pass

        @Job.dependency(a=JobA, b=JobB)
        class JobC(Job):
            def transform(self, **kwargs):
                raise ValueError()

        @Job.dependency(c=JobC)
        class JobD(Job):
            def load(self, **kwargs):
                # will never get here
                raise ValueError()

        self.runner = JobRunner([ JobC(), JobD() ])

    def setUp(self):
        self.set_up_handler()
        self.set_up_jobs()

    def jobs_and_parents_were_added(self, debug_msgs):
        self.assertIn('JobRunner: Adding job JobC with 2 parent(s)', debug_msgs, msg='debug msg err: JobC add')
        self.assertIn('JobRunner: Adding job JobD with 1 parent(s)', debug_msgs, msg='debug msg err: JobD add')

    def successful_steps_were_in_debug(self, debug_msgs):
        # make sure all successful job steps made it to debug
        # JobA and JobB should run fully, JobC extract only, JobD not at all
        for m in [
            'JobA.extract()', 'JobA.transform()', 'JobA.load()',
            'JobB.extract()', 'JobB.transform()', 'JobB.load()', 'JobC.extract()'
        ]:
            self.assertIn(m, debug_msgs, msg='debug msg err: ' + m)

    def cached_jobs_in_debug(self, debug_msgs):
        # jobs A and B should be cached since they'll be called upon by C
        for j in ['A', 'B']:
            self.assertIn(
                'Job{}.cache() | children:1'.format(j),
                debug_msgs,
                msg='debug msg err: Job{}.cache()'.format(j)
            )

    def correct_transform_signature_in_debug(self, debug_msgs):
        err_msg = 'debug msg err: JobC transform signature is incorrect or missing from debug'
        self.assertIn('JobC.transform(a=None, b=None)', debug_msgs, msg=err_msg)

    def job_runner_job_status(self, info_msgs):
        err_msg = 'info msg err: did not start with RUNNING status'
        self.assertEqual('JobRunner: JOB_STATUS.RUNNING', info_msgs[0], msg=err_msg)

        err_msg = 'info msg err: did not end with FAILED status'
        self.assertEqual('JobRunner: JOB_STATUS.FAILED', info_msgs[-1], msg=err_msg)

    def check_info_for_job_start_attempts(self, info_msgs):
        err_msg = 'info msg err: Failed to note start of Job'
        # should start and try to run jobs A, B and C.
        # D should be skipped since C fails
        for j in ['A', 'B', 'C']:
            self.assertIn('JobRunner: Running Job' + j, info_msgs, msg=err_msg + j)

    def check_info_for_completed_jobs(self, info_msgs):
        err_msg = 'info msg err: Failed to note competion of Job'
        # jobs A and B should finish.
        for j in ['A', 'B']:
            self.assertIn('JobRunner: Completed Job' + j, info_msgs, msg=err_msg + j)

    def info_has_skipped_job(self, info_msgs):
        self.assertIn(
            'JobRunner: Skipped JobD due to failure in parent JobC',
            info_msgs,
            msg='info msg err: failed to note skipped JobD due to JobC failure'
        )

    def error_has_job_failure(self, err_msgs):
        self.assertIn(
            'JobRunner: Error on JobC',
            err_msgs,
            msg='err msg err: failed to note JobC failure'
        )

    def test_job_logging(self):
        self.runner.run()

        debug_msgs = self.handler.messages['debug']
        self.jobs_and_parents_were_added(debug_msgs)
        self.successful_steps_were_in_debug(debug_msgs)
        self.cached_jobs_in_debug(debug_msgs)

        info_msgs = self.handler.messages['info']
        self.job_runner_job_status(info_msgs)
        self.check_info_for_job_start_attempts(info_msgs)
        self.check_info_for_completed_jobs(info_msgs)

        self.error_has_job_failure(self.handler.messages['error'])

    def tearDown(self):
        logger.removeHandler(self.handler)