def setUp(self): from treetl import Job, JobRunner class JobA(Job): def extract(self, **kwarg): raise ValueError() class JobB(Job): pass @Job.dependency(job_a=JobA, job_b=JobB) class JobC(Job): pass self.runner = JobRunner([JobA(), JobB(), JobC()])
def test_job_order(self): from treetl import Job, JobRunner, JOB_STATUS job_tree = JobRunner(self.jobs).run() self.order_checks(self.actual_execution_order) self.assertTrue(len(self.actual_execution_order) == len(self.jobs), msg='Some job transformed twice.') # add jobs that will fail and dependents that won't run as a result # clear execution order to start over again self.actual_execution_order = [] job_tree.reset_jobs() job_tree.add_jobs(self.faulty_jobs) for failure_child in [ 'VictimJob', 'OtherVictimJob' ]: self.assertNotIn( member=failure_child, container=self.actual_execution_order, msg='Child of faulty, failed job was executed.' ) self.assertTrue(job_tree.run().status == JOB_STATUS.FAILED, msg='Job failure not recorded in status') self.assertItemsEqual( expected_seq=self.faulty_jobs, actual_seq=job_tree.failed_jobs(), msg='Not all faulty jobs were labeled as failed.' ) self.assertItemsEqual( expected_seq=[ self.faulty_jobs[0] ], actual_seq=job_tree.failed_job_roots(), msg='Root failure not correctly identified.' ) failed_root_paths_dict = job_tree.failed_job_root_paths() self.assertTrue(len(failed_root_paths_dict) == 1, msg='Too many failure roots.') self.assertItemsEqual( expected_seq=[ self.faulty_jobs[0] ], actual_seq=failed_root_paths_dict.keys(), msg='Incorrect failed root in { failed_root: paths_to_failed_root }' ) self.assertItemsEqual( expected_seq=[ [ 'JobC', 'JobF', 'FaultyJob' ], [ 'JobB', 'JobF', 'FaultyJob' ] ], actual_seq=[ [ job.__class__.__name__ for job in path ] for path in failed_root_paths_dict[self.faulty_jobs[0]] ], msg='Incorrect paths to root failure.' )
def test_parent_data_params(self): from treetl import JobRunner JobRunner(self.jobs).run() self.assertDictEqual(d1=self.expected_results, d2=self.actual_results, msg='Error in transformed data loaded to dict')
def test_job_order(self): from treetl import Job, JobRunner, JOB_STATUS job_tree = JobRunner(self.jobs).run() self.order_checks(self.actual_execution_order) self.assertTrue(len(self.actual_execution_order) == len(self.jobs), msg='Some job transformed twice.') # add jobs that will fail and dependents that won't run as a result # clear execution order to start over again self.actual_execution_order = [] job_tree.reset_jobs() job_tree.add_jobs(self.faulty_jobs) for failure_child in ['VictimJob', 'OtherVictimJob']: self.assertNotIn(member=failure_child, container=self.actual_execution_order, msg='Child of faulty, failed job was executed.') self.assertTrue(job_tree.run().status == JOB_STATUS.FAILED, msg='Job failure not recorded in status') self.assertItemsEqual( expected_seq=self.faulty_jobs, actual_seq=job_tree.failed_jobs(), msg='Not all faulty jobs were labeled as failed.') self.assertItemsEqual(expected_seq=[self.faulty_jobs[0]], actual_seq=job_tree.failed_job_roots(), msg='Root failure not correctly identified.') failed_root_paths_dict = job_tree.failed_job_root_paths() self.assertTrue(len(failed_root_paths_dict) == 1, msg='Too many failure roots.') self.assertItemsEqual( expected_seq=[self.faulty_jobs[0]], actual_seq=failed_root_paths_dict.keys(), msg='Incorrect failed root in { failed_root: paths_to_failed_root }' ) self.assertItemsEqual( expected_seq=[['JobC', 'JobF', 'FaultyJob'], ['JobB', 'JobF', 'FaultyJob']], actual_seq=[[job.__class__.__name__ for job in path] for path in failed_root_paths_dict[self.faulty_jobs[0]] ], msg='Incorrect paths to root failure.')
def test_dyn_jobs(self): from treetl import Job def make_job(**kwargs): return Job.create('DynJob', extract=self.extract, transform=self.transform, load=self.load, cache=self.cache, uncache=self.uncache, **kwargs) def run_test(inc, test_type, job): self.assertEqual( job.extracted_data, self.extract_ans, msg='{}: Incorrect extracted data'.format(test_type)) self.assertEqual( job.transformed_data, self.transformed_ans + inc, msg='{}: Incorrect transformed data'.format(test_type)) self.assertEqual( self.load_ans_dest, self.load_ans + inc, msg='{}: Incorrect load answer data'.format(test_type)) self.assertEqual( self.cache_ans_dest, self.cache_ans + inc, msg='{}: Incorrect cache answer data'.format(test_type)) self.assertEqual( self.uncache_ans_dest, self.uncache_ans + inc, msg='{}: Incorrect uncache answer data'.format(test_type)) # run without dependent jobs job = make_job()().extract().transform().load().cache().uncache() run_test(0, 'No Parent Job', job) # test job dependency class Parent(Job): def transform(self, **kwargs): self.transformed_data = 3 return self # run with dependent jobs from treetl import JobRunner dyn_job = make_job(parent_data=Parent)() JobRunner(jobs=[dyn_job]).run() dyn_job.cache().uncache( ) # only one parent, so won't be called by JobRunner run_test(3, 'With Parent Job', dyn_job)
def set_up_jobs(self): from treetl import Job, JobRunner class JobA(Job): pass class JobB(Job): pass @Job.dependency(a=JobA, b=JobB) class JobC(Job): def transform(self, **kwargs): raise ValueError() @Job.dependency(c=JobC) class JobD(Job): def load(self, **kwargs): # will never get here raise ValueError() self.runner = JobRunner([ JobC(), JobD() ])
class TestJobException(unittest.TestCase): def setUp(self): from treetl import Job, JobRunner class JobA(Job): def extract(self, **kwarg): raise ValueError() class JobB(Job): pass @Job.dependency(job_a=JobA, job_b=JobB) class JobC(Job): pass self.runner = JobRunner([ JobA(), JobB(), JobC() ]) def test_exceptions(self): from treetl.job import JobException, ParentJobException self.runner.run() # run failed self.assertEqual(self.runner.run().status, 3) job_res = self.runner.job_results(submitted_only=True) # JobA is status 3 (Failed) and has JobException self.assertTrue(job_res[0].status == 3) self.assertIsInstance(job_res[0].error, JobException) # JobB may have been called first. # So it's either in the queue or done, but def no error self.assertTrue(job_res[1].status in [0, 2]) self.assertTrue(job_res[1].error is None) # JobC has an error, but to no fault of its own # this one errored out bc parent job JobA had an error self.assertTrue(job_res[2].status == 3) self.assertIsInstance(job_res[2].error, ParentJobException)
class TestJobException(unittest.TestCase): def setUp(self): from treetl import Job, JobRunner class JobA(Job): def extract(self, **kwarg): raise ValueError() class JobB(Job): pass @Job.dependency(job_a=JobA, job_b=JobB) class JobC(Job): pass self.runner = JobRunner([JobA(), JobB(), JobC()]) def test_exceptions(self): from treetl.job import JobException, ParentJobException self.runner.run() # run failed self.assertEqual(self.runner.run().status, 3) job_res = self.runner.job_results(submitted_only=True) # JobA is status 3 (Failed) and has JobException self.assertTrue(job_res[0].status == 3) self.assertIsInstance(job_res[0].error, JobException) # JobB may have been called first. # So it's either in the queue or done, but def no error self.assertTrue(job_res[1].status in [0, 2]) self.assertTrue(job_res[1].error is None) # JobC has an error, but to no fault of its own # this one errored out bc parent job JobA had an error self.assertTrue(job_res[2].status == 3) self.assertIsInstance(job_res[2].error, ParentJobException)
def test_job_caching(self): from treetl import JobRunner JobRunner(self.jobs).run() # check order of calls including cache calls # make sure no cache calls were made on nodes without children self.check_event_order() # checks 2 things # 1. ensure single transform call on root node # 2. also (by way of adding JobA second) implicitly checks that the implicitly created # parent JobA gets replaced with the explicitly created self.assertTrue(self.jobs[1].transformed_data == 1, msg='JobA transformed more than once')
def setUp(self): from treetl import Job, JobRunner class JobA(Job): def extract(self, **kwarg): raise ValueError() class JobB(Job): pass @Job.dependency(job_a=JobA, job_b=JobB) class JobC(Job): pass self.runner = JobRunner([ JobA(), JobB(), JobC() ])
class TestLogging(unittest.TestCase): def set_up_handler(self): from treetl.tools.testing import MockLoggingHandler self.handler = MockLoggingHandler(level='DEBUG') logger.addHandler(self.handler) def set_up_jobs(self): from treetl import Job, JobRunner class JobA(Job): pass class JobB(Job): pass @Job.dependency(a=JobA, b=JobB) class JobC(Job): def transform(self, **kwargs): raise ValueError() @Job.dependency(c=JobC) class JobD(Job): def load(self, **kwargs): # will never get here raise ValueError() self.runner = JobRunner([ JobC(), JobD() ]) def setUp(self): self.set_up_handler() self.set_up_jobs() def jobs_and_parents_were_added(self, debug_msgs): self.assertIn('JobRunner: Adding job JobC with 2 parent(s)', debug_msgs, msg='debug msg err: JobC add') self.assertIn('JobRunner: Adding job JobD with 1 parent(s)', debug_msgs, msg='debug msg err: JobD add') def successful_steps_were_in_debug(self, debug_msgs): # make sure all successful job steps made it to debug # JobA and JobB should run fully, JobC extract only, JobD not at all for m in [ 'JobA.extract()', 'JobA.transform()', 'JobA.load()', 'JobB.extract()', 'JobB.transform()', 'JobB.load()', 'JobC.extract()' ]: self.assertIn(m, debug_msgs, msg='debug msg err: ' + m) def cached_jobs_in_debug(self, debug_msgs): # jobs A and B should be cached since they'll be called upon by C for j in ['A', 'B']: self.assertIn( 'Job{}.cache() | children:1'.format(j), debug_msgs, msg='debug msg err: Job{}.cache()'.format(j) ) def correct_transform_signature_in_debug(self, debug_msgs): err_msg = 'debug msg err: JobC transform signature is incorrect or missing from debug' self.assertIn('JobC.transform(a=None, b=None)', debug_msgs, msg=err_msg) def job_runner_job_status(self, info_msgs): err_msg = 'info msg err: did not start with RUNNING status' self.assertEqual('JobRunner: JOB_STATUS.RUNNING', info_msgs[0], msg=err_msg) err_msg = 'info msg err: did not end with FAILED status' self.assertEqual('JobRunner: JOB_STATUS.FAILED', info_msgs[-1], msg=err_msg) def check_info_for_job_start_attempts(self, info_msgs): err_msg = 'info msg err: Failed to note start of Job' # should start and try to run jobs A, B and C. # D should be skipped since C fails for j in ['A', 'B', 'C']: self.assertIn('JobRunner: Running Job' + j, info_msgs, msg=err_msg + j) def check_info_for_completed_jobs(self, info_msgs): err_msg = 'info msg err: Failed to note competion of Job' # jobs A and B should finish. for j in ['A', 'B']: self.assertIn('JobRunner: Completed Job' + j, info_msgs, msg=err_msg + j) def info_has_skipped_job(self, info_msgs): self.assertIn( 'JobRunner: Skipped JobD due to failure in parent JobC', info_msgs, msg='info msg err: failed to note skipped JobD due to JobC failure' ) def error_has_job_failure(self, err_msgs): self.assertIn( 'JobRunner: Error on JobC', err_msgs, msg='err msg err: failed to note JobC failure' ) def test_job_logging(self): self.runner.run() debug_msgs = self.handler.messages['debug'] self.jobs_and_parents_were_added(debug_msgs) self.successful_steps_were_in_debug(debug_msgs) self.cached_jobs_in_debug(debug_msgs) info_msgs = self.handler.messages['info'] self.job_runner_job_status(info_msgs) self.check_info_for_job_start_attempts(info_msgs) self.check_info_for_completed_jobs(info_msgs) self.error_has_job_failure(self.handler.messages['error']) def tearDown(self): logger.removeHandler(self.handler)