def test_get_children_with_complicated_job_ids( func_name, valid1, valid2, valid3, valid3b, valid4): nt.assert_items_equal( list(dag_tools.get_children(valid1, "20151015_2")), [ (valid4, '20151015_101', 'default'), (valid3b, '20151015_101', 'another_way_to_state_same_thing'), (valid3b, '20151015_102', 'another_way_to_state_same_thing'), (valid3, '20151015_101', 'default'), (valid3, '20151015_102', 'default') ] ) nt.assert_items_equal( list(dag_tools.get_children(valid1, "20151015_3-%s" % func_name)), [] ) nt.assert_items_equal( list(dag_tools.get_children(valid2, "20151015_101")), [ (valid4, '20151015_101', 'default'), (valid3b, '20151015_101', 'another_way_to_state_same_thing'), (valid3, '20151015_101', 'default') ] ) nt.assert_items_equal( list(dag_tools.get_children(valid2, "20151015_103")), [] )
def _maybe_queue_children(parent_app_name, parent_job_id): """ This is basically a "set_state(completed=True)" pre-commit hook Assume the task identified by (parent_app_name, parent_job_id) is completed, and for each of that parent's children in the dag graph of tasks, set 1/num_parents worth of points towards that child's completion. If any one child has earned 1 point, then add it to its task queue We track the "score" of a child by counting files in the job path: .../parents/dependency_name/parent_app_name/parent_job_id """ qbcli = shared.get_qbclient() gen = dt.get_children(parent_app_name, parent_job_id, True) for child_app_name, cjob_id, dep_grp in gen: ld = dict(child_app_name=child_app_name, child_job_id=cjob_id, app_name=parent_app_name, job_id=parent_job_id) ptotal = len(list(dt.get_parents(child_app_name, cjob_id))) pcomplete = qbcli.increment( _path_num_complete_parents(child_app_name, cjob_id)) if (pcomplete >= ptotal): log.info("Parent is queuing a child task", extra=ld) if pcomplete > ptotal: log.warn( "For some reason, I calculated that more parents" " completed than there are parents." " If you aren't re-adding tasks, this could be a code bug" " that results in tasks unnecessarily sitting in queue.", extra=dict(num_complete_dependencies=pcomplete, num_total_dependencies=ptotal, **ld)) if check_state(child_app_name, cjob_id, completed=True): log.warn( "Queuing a previously completed child task" " presumably because of the following:" " 1) you manually queued both a" " parent/ancestor and the child," " and 2) the child completed first." " You probably shouldn't manually re-queue both parents" " and children. Just queue one of them.", extra=ld) try: readd_subtask( child_app_name, cjob_id, _reset_descendants=False, # descendants previously handled _ignore_if_queued=True) except exceptions.JobAlreadyQueued: log.info("Child already in queue", extra=dict(**ld)) raise elif (pcomplete < ptotal): log.info("Child job one step closer to being queued!", extra=dict(num_complete_dependencies=pcomplete, num_total_dependencies=ptotal, **ld))
def test_fan_out_tasks(app1, app2, app4, fanout1, func_name): # test for Many-to-Many relationships between parent and child tasks nt.assert_count_equal( list(dag_tools.get_parents( 'test_stolos/test_fan_out_tasks/fanout1', '20140715_8')), []) nt.assert_count_equal( list(dag_tools.get_parents( 'test_stolos/test_fan_out_tasks/fanout1', '20140715_testID5-%s' % func_name, True)), [ (app1, '20140714_555_profile-%s' % func_name, u'dep2'), (app1, '20140715_555_profile-%s' % func_name, u'dep2'), ]) nt.assert_count_equal( list(dag_tools.get_children( 'test_stolos/test_fan_out_tasks/app1', '20140715_9_profile-%s' % func_name, True,)), [(app2, '20140715_9_profile-%s' % func_name, 'default'), (app4, '20140715_9_profile-%s' % func_name, 'default'), (fanout1, '20140715_testID1-%s' % func_name, u'dep1'), (fanout1, '20140715_testID2-%s' % func_name, u'dep1'), (fanout1, '20140715_testID3-%s' % func_name, u'dep1'), ]) nt.assert_count_equal( list(dag_tools.get_children( app1, '20140715_555_profile-%s' % func_name, True,)), [ (app2, '20140715_555_profile-%s' % func_name, 'default'), (app4, '20140715_555_profile-%s' % func_name, 'default'), (fanout1, u'20140714_testID5-%s' % func_name, u'dep2'), (fanout1, u'20140714_testID6-%s' % func_name, u'dep2'), (fanout1, u'20140715_testID1-%s' % func_name, u'dep1'), (fanout1, u'20140715_testID2-%s' % func_name, u'dep1'), (fanout1, u'20140715_testID3-%s' % func_name, u'dep1'), (fanout1, u'20140715_testID5-%s' % func_name, u'dep2'), (fanout1, u'20140715_testID6-%s' % func_name, u'dep2'), ])
def test_get_children(func_name, app1, app2, app4, depends_on1, depends_on2, bash1, bash2): nt.assert_count_equal( list(dag_tools.get_children( depends_on2, '20140601_876_profile-%s' % func_name)), [(depends_on1, u'20140601_testID2-%s' % func_name, u'depgrp2')]) nt.assert_count_equal( list(dag_tools.get_children(bash2, '20140601_9899_purchase')), [] ) nt.assert_count_equal( list(dag_tools.get_children(bash1, '20140601_9899_purchase')), [(bash2, '20140601_9899_purchase', 'default')] ) nt.assert_count_equal( list(dag_tools.get_children( app1, '20140601_999_purchase-%s' % func_name)), [ (depends_on1, u'20140601_testID1-%s' % func_name, u'depgrp1'), (app2, '20140601_999_purchase-%s' % func_name, 'default'), (app4, '20140601_999_purchase-%s' % func_name, 'default'), ] ) nt.assert_count_equal( list(dag_tools.get_children( app1, '20140601_876_purchase-%s' % func_name)), [ (depends_on1, u'20140601_testID1-%s' % func_name, u'depgrp1'), (app2, '20140601_876_purchase-%s' % func_name, 'default'), (app4, '20140601_876_purchase-%s' % func_name, 'default'), ] )
def test_autofill_all(func_name, autofill1, autofill2, autofill3, autofill_getparents): # autofill1 out of bounds get_children nt.assert_items_equal(list(dag_tools.get_children(autofill1, '9')), []) nt.assert_items_equal(list(dag_tools.get_children(autofill1, '11')), []) nt.assert_items_equal(list(dag_tools.get_children(autofill1, '20')), []) # autofill1 in bounds get_children nt.assert_items_equal( list(dag_tools.get_children(autofill1, '10')), [ (autofill3, '20150101', 'default'), (autofill3, '20150102', 'default'), (autofill3, '20150103', 'default'), (autofill3, '20150104', 'default'), (autofill_getparents, '20150101_10_10', 'default'), ]) # autofill2 out of bounds get_children nt.assert_items_equal( list(dag_tools.get_children(autofill2, '20150128')), []) # autofill2 in bounds get_children nt.assert_items_equal( list(dag_tools.get_children(autofill2, '20150101')), [ (autofill3, '20150101', 'default'), (autofill_getparents, '20150101_10_10', 'default') ]) # autofill3 get parents out and in bounds nt.assert_items_equal( list(dag_tools.get_children(autofill3, '20150101')), []) nt.assert_items_equal( list(dag_tools.get_parents(autofill3, '20150128')), []) nt.assert_items_equal( list(dag_tools.get_parents(autofill3, '20150101')), [ (autofill1, '10'), (autofill1, '12'), (autofill1, '14'), (autofill1, '16'), (autofill1, '18'), (autofill2, '20150101'), ])
def _recursively_reset_child_task_state(parent_app_name, job_id, so_far=None): if so_far is None: so_far = set() # nodes with 2+ parents that we've already reset qbcli = shared.get_qbclient() log.debug( "recursively setting all descendant tasks to 'pending' and " " marking that the parent is not completed", extra=dict(app_name=parent_app_name, job_id=job_id)) gen = dt.get_children(parent_app_name, job_id, True) for child_app_name, cjob_id, dep_grp in gen: key = (child_app_name, cjob_id) if key in so_far: continue so_far.add(key) if qbcli.exists(shared.get_job_path(child_app_name, cjob_id)): set_state(child_app_name, cjob_id, pending=True) _recursively_reset_child_task_state(child_app_name, cjob_id, so_far) else: pass # no need to recurse further down the tree
def _recursively_reset_child_task_state(parent_app_name, job_id, so_far=None): if so_far is None: so_far = set() # nodes with 2+ parents that we've already reset qbcli = shared.get_qbclient() log.debug( "recursively setting all descendant tasks to 'pending' and " " marking that the parent is not completed", extra=dict(app_name=parent_app_name, job_id=job_id)) gen = dt.get_children(parent_app_name, job_id, True) for child_app_name, cjob_id, dep_grp in gen: key = (child_app_name, cjob_id) if key in so_far: continue so_far.add(key) if qbcli.exists(shared.get_job_path(child_app_name, cjob_id)): set_state(child_app_name, cjob_id, pending=True) _recursively_reset_child_task_state( child_app_name, cjob_id, so_far) else: pass # no need to recurse further down the tree
def test_depends_on_all(func_name, all_test1, all_test2, all_test3, all_test4, all_test4b, all_test5): # all_test1 children nt.assert_items_equal( list(dag_tools.get_children( all_test1, '20140601_1', True,)), [ (all_test3, '20140601_1', 'default'), (all_test3, '20140601_2', 'default'), (all_test4, '20140601', 'default'), (all_test4b, '20140601', 'both'), (all_test5, '20140601', 'both_apps'), ]) nt.assert_items_equal( list(dag_tools.get_children( all_test1, '20140601_1', True,)), list(dag_tools.get_children( all_test1, '20140601_2', True,)) ) nt.assert_items_equal( list(dag_tools.get_children( all_test1, '20140601_0', True,)), []) nt.assert_items_equal( list(dag_tools.get_children( all_test1, '20140601_0', True,)), []) # all_test2 children nt.assert_items_equal( list(dag_tools.get_children( all_test2, '20140601_0', True,)), []) nt.assert_items_equal( list(dag_tools.get_children( all_test2, '20140601_1', True,)), []) nt.assert_items_equal( list(dag_tools.get_children( all_test2, '20140601_2', True,)), [ (all_test4, '20140601', 'default'), (all_test4b, '20140601', 'both') ]) nt.assert_items_equal( list(dag_tools.get_children( all_test2, '20140601_3', True,)), [ (all_test4, '20140601', 'default'), (all_test4b, '20140601', 'both') ]) nt.assert_items_equal( list(dag_tools.get_children( all_test2, '20140601_4', True,)), [(all_test5, '20140601', 'both_apps')]) # all_test3 parents nt.assert_items_equal( list(dag_tools.get_parents(all_test3, "20140601_0", True)), []) nt.assert_items_equal( list(dag_tools.get_parents(all_test3, "20140601_1", True)), [ (all_test1, '20140601_1', 'default'), (all_test1, '20140601_2', 'default'), ]) nt.assert_items_equal( list(dag_tools.get_parents(all_test3, "20140601_2", True)), list(dag_tools.get_parents(all_test3, "20140601_1", True)), ) nt.assert_items_equal( list(dag_tools.get_parents(all_test3, "20140601_3", True)), []) # all_test4 parents nt.assert_items_equal( list(dag_tools.get_parents(all_test4, "20140601", True)), [ (all_test1, '20140601_1', 'default'), (all_test1, '20140601_2', 'default'), (all_test2, '20140601_2', 'default'), (all_test2, '20140601_3', 'default'), ]) nt.assert_items_equal( list(dag_tools.get_parents(all_test4, "20140601", True)), list(dag_tools.get_parents(all_test4, "20140601", True)), ) # all_test5 parents nt.assert_items_equal( list(dag_tools.get_parents(all_test5, "20140601", True)), [ (all_test1, '20140601_1', 'both_apps'), (all_test1, '20140601_2', 'both_apps'), (all_test2, '20140601_4', 'both_apps') ])
def _maybe_queue_children(parent_app_name, parent_job_id): """ This is basically a "set_state(completed=True)" pre-commit hook Assume the task identified by (parent_app_name, parent_job_id) is completed, and for each of that parent's children in the dag graph of tasks, set 1/num_parents worth of points towards that child's completion. If any one child has earned 1 point, then add it to its task queue We track the "score" of a child by counting files in the job path: .../parents/dependency_name/parent_app_name/parent_job_id """ qbcli = shared.get_qbclient() gen = dt.get_children(parent_app_name, parent_job_id, True) for child_app_name, cjob_id, dep_grp in gen: ld = dict( child_app_name=child_app_name, child_job_id=cjob_id, app_name=parent_app_name, job_id=parent_job_id) ptotal = len(list(dt.get_parents(child_app_name, cjob_id))) pcomplete = qbcli.increment( _path_num_complete_parents(child_app_name, cjob_id)) if (pcomplete >= ptotal): log.info( "Parent is queuing a child task", extra=ld) if pcomplete > ptotal: log.warn( "For some reason, I calculated that more parents" " completed than there are parents." " If you aren't re-adding tasks, this could be a code bug" " that results in tasks unnecessarily sitting in queue.", extra=dict( num_complete_dependencies=pcomplete, num_total_dependencies=ptotal, **ld)) if check_state(child_app_name, cjob_id, completed=True): log.warn( "Queuing a previously completed child task" " presumably because of the following:" " 1) you manually queued both a" " parent/ancestor and the child," " and 2) the child completed first." " You probably shouldn't manually re-queue both parents" " and children. Just queue one of them.", extra=ld) try: readd_subtask( child_app_name, cjob_id, _reset_descendants=False, # descendants previously handled _ignore_if_queued=True) except exceptions.JobAlreadyQueued: log.info("Child already in queue", extra=dict(**ld)) raise elif (pcomplete < ptotal): log.info( "Child job one step closer to being queued!", extra=dict( num_complete_dependencies=pcomplete, num_total_dependencies=ptotal, **ld))