Exemple #1
0
def test_get_children_with_complicated_job_ids(
        func_name, valid1, valid2, valid3, valid3b, valid4):
    nt.assert_items_equal(
        list(dag_tools.get_children(valid1, "20151015_2")),
        [
            (valid4, '20151015_101', 'default'),
            (valid3b, '20151015_101',
             'another_way_to_state_same_thing'),
            (valid3b, '20151015_102',
             'another_way_to_state_same_thing'),
            (valid3, '20151015_101', 'default'),
            (valid3, '20151015_102', 'default')
        ]
    )
    nt.assert_items_equal(
        list(dag_tools.get_children(valid1, "20151015_3-%s" % func_name)),
        []
    )
    nt.assert_items_equal(
        list(dag_tools.get_children(valid2, "20151015_101")),
        [
            (valid4, '20151015_101', 'default'),
            (valid3b, '20151015_101',
             'another_way_to_state_same_thing'),
            (valid3, '20151015_101', 'default')
        ]
    )
    nt.assert_items_equal(
        list(dag_tools.get_children(valid2, "20151015_103")),
        []
    )
Exemple #2
0
def _maybe_queue_children(parent_app_name, parent_job_id):
    """
    This is basically a "set_state(completed=True)" pre-commit hook

    Assume the task identified by (parent_app_name, parent_job_id) is
    completed, and for each of that parent's children in the dag graph of
    tasks, set 1/num_parents worth of points towards that child's completion.

    If any one child has earned 1 point, then add it to its task queue

    We track the "score" of a child by counting files in the job path:
        .../parents/dependency_name/parent_app_name/parent_job_id
    """
    qbcli = shared.get_qbclient()
    gen = dt.get_children(parent_app_name, parent_job_id, True)
    for child_app_name, cjob_id, dep_grp in gen:
        ld = dict(child_app_name=child_app_name,
                  child_job_id=cjob_id,
                  app_name=parent_app_name,
                  job_id=parent_job_id)
        ptotal = len(list(dt.get_parents(child_app_name, cjob_id)))
        pcomplete = qbcli.increment(
            _path_num_complete_parents(child_app_name, cjob_id))

        if (pcomplete >= ptotal):
            log.info("Parent is queuing a child task", extra=ld)
            if pcomplete > ptotal:
                log.warn(
                    "For some reason, I calculated that more parents"
                    " completed than there are parents."
                    " If you aren't re-adding tasks, this could be a code bug"
                    " that results in tasks unnecessarily sitting in queue.",
                    extra=dict(num_complete_dependencies=pcomplete,
                               num_total_dependencies=ptotal,
                               **ld))
            if check_state(child_app_name, cjob_id, completed=True):
                log.warn(
                    "Queuing a previously completed child task"
                    " presumably because of the following:"
                    " 1) you manually queued both a"
                    " parent/ancestor and the child,"
                    " and 2) the child completed first."
                    " You probably shouldn't manually re-queue both parents"
                    " and children. Just queue one of them.",
                    extra=ld)

            try:
                readd_subtask(
                    child_app_name,
                    cjob_id,
                    _reset_descendants=False,  # descendants previously handled
                    _ignore_if_queued=True)
            except exceptions.JobAlreadyQueued:
                log.info("Child already in queue", extra=dict(**ld))
                raise
        elif (pcomplete < ptotal):
            log.info("Child job one step closer to being queued!",
                     extra=dict(num_complete_dependencies=pcomplete,
                                num_total_dependencies=ptotal,
                                **ld))
Exemple #3
0
def test_fan_out_tasks(app1, app2, app4, fanout1, func_name):
    # test for Many-to-Many relationships between parent and child tasks
    nt.assert_count_equal(
        list(dag_tools.get_parents(
            'test_stolos/test_fan_out_tasks/fanout1', '20140715_8')),
        [])

    nt.assert_count_equal(
        list(dag_tools.get_parents(
            'test_stolos/test_fan_out_tasks/fanout1',
            '20140715_testID5-%s' % func_name, True)),
        [
            (app1, '20140714_555_profile-%s' % func_name, u'dep2'),
            (app1, '20140715_555_profile-%s' % func_name, u'dep2'),
        ])

    nt.assert_count_equal(
        list(dag_tools.get_children(
            'test_stolos/test_fan_out_tasks/app1',
            '20140715_9_profile-%s' % func_name, True,)),
        [(app2, '20140715_9_profile-%s' % func_name, 'default'),
         (app4, '20140715_9_profile-%s' % func_name, 'default'),
         (fanout1, '20140715_testID1-%s' % func_name, u'dep1'),
         (fanout1, '20140715_testID2-%s' % func_name, u'dep1'),
         (fanout1, '20140715_testID3-%s' % func_name, u'dep1'),
         ])

    nt.assert_count_equal(
        list(dag_tools.get_children(
            app1, '20140715_555_profile-%s' % func_name, True,)),
        [
            (app2, '20140715_555_profile-%s' % func_name, 'default'),
            (app4, '20140715_555_profile-%s' % func_name, 'default'),
            (fanout1, u'20140714_testID5-%s' % func_name, u'dep2'),
            (fanout1, u'20140714_testID6-%s' % func_name, u'dep2'),
            (fanout1, u'20140715_testID1-%s' % func_name, u'dep1'),
            (fanout1, u'20140715_testID2-%s' % func_name, u'dep1'),
            (fanout1, u'20140715_testID3-%s' % func_name, u'dep1'),
            (fanout1, u'20140715_testID5-%s' % func_name, u'dep2'),
            (fanout1, u'20140715_testID6-%s' % func_name, u'dep2'),
        ])
Exemple #4
0
def test_get_children(func_name, app1, app2, app4, depends_on1,
                      depends_on2, bash1, bash2):

    nt.assert_count_equal(
        list(dag_tools.get_children(
            depends_on2, '20140601_876_profile-%s' % func_name)),
        [(depends_on1, u'20140601_testID2-%s' % func_name, u'depgrp2')])

    nt.assert_count_equal(
        list(dag_tools.get_children(bash2, '20140601_9899_purchase')),
        []
    )

    nt.assert_count_equal(
        list(dag_tools.get_children(bash1, '20140601_9899_purchase')),
        [(bash2, '20140601_9899_purchase', 'default')]
    )

    nt.assert_count_equal(
        list(dag_tools.get_children(
            app1, '20140601_999_purchase-%s' % func_name)),
        [
            (depends_on1, u'20140601_testID1-%s' % func_name, u'depgrp1'),
            (app2, '20140601_999_purchase-%s' % func_name, 'default'),
            (app4, '20140601_999_purchase-%s' % func_name, 'default'),
        ]
    )

    nt.assert_count_equal(
        list(dag_tools.get_children(
            app1, '20140601_876_purchase-%s' % func_name)),
        [
            (depends_on1, u'20140601_testID1-%s' % func_name, u'depgrp1'),
            (app2, '20140601_876_purchase-%s' % func_name, 'default'),
            (app4, '20140601_876_purchase-%s' % func_name, 'default'),
        ]
    )
Exemple #5
0
def test_autofill_all(func_name, autofill1, autofill2, autofill3,
                      autofill_getparents):
    # autofill1 out of bounds get_children
    nt.assert_items_equal(list(dag_tools.get_children(autofill1, '9')), [])
    nt.assert_items_equal(list(dag_tools.get_children(autofill1, '11')), [])
    nt.assert_items_equal(list(dag_tools.get_children(autofill1, '20')), [])
    # autofill1 in bounds get_children
    nt.assert_items_equal(
        list(dag_tools.get_children(autofill1, '10')),
        [
            (autofill3, '20150101', 'default'),
            (autofill3, '20150102', 'default'),
            (autofill3, '20150103', 'default'),
            (autofill3, '20150104', 'default'),
            (autofill_getparents, '20150101_10_10', 'default'),
        ])

    # autofill2 out of bounds get_children
    nt.assert_items_equal(
        list(dag_tools.get_children(autofill2, '20150128')), [])
    # autofill2 in bounds get_children
    nt.assert_items_equal(
        list(dag_tools.get_children(autofill2, '20150101')),
        [
            (autofill3, '20150101', 'default'),
            (autofill_getparents, '20150101_10_10', 'default')
        ])

    # autofill3 get parents out and in bounds
    nt.assert_items_equal(
        list(dag_tools.get_children(autofill3, '20150101')), [])
    nt.assert_items_equal(
        list(dag_tools.get_parents(autofill3, '20150128')), [])
    nt.assert_items_equal(
        list(dag_tools.get_parents(autofill3, '20150101')),
        [
            (autofill1, '10'),
            (autofill1, '12'),
            (autofill1, '14'),
            (autofill1, '16'),
            (autofill1, '18'),
            (autofill2, '20150101'),
        ])
Exemple #6
0
def _recursively_reset_child_task_state(parent_app_name, job_id, so_far=None):
    if so_far is None:
        so_far = set()  # nodes with 2+ parents that we've already reset

    qbcli = shared.get_qbclient()
    log.debug(
        "recursively setting all descendant tasks to 'pending' and "
        " marking that the parent is not completed",
        extra=dict(app_name=parent_app_name, job_id=job_id))

    gen = dt.get_children(parent_app_name, job_id, True)
    for child_app_name, cjob_id, dep_grp in gen:
        key = (child_app_name, cjob_id)
        if key in so_far:
            continue
        so_far.add(key)
        if qbcli.exists(shared.get_job_path(child_app_name, cjob_id)):
            set_state(child_app_name, cjob_id, pending=True)
            _recursively_reset_child_task_state(child_app_name, cjob_id,
                                                so_far)
        else:
            pass  # no need to recurse further down the tree
Exemple #7
0
def _recursively_reset_child_task_state(parent_app_name, job_id, so_far=None):
    if so_far is None:
        so_far = set()  # nodes with 2+ parents that we've already reset

    qbcli = shared.get_qbclient()
    log.debug(
        "recursively setting all descendant tasks to 'pending' and "
        " marking that the parent is not completed",
        extra=dict(app_name=parent_app_name, job_id=job_id))

    gen = dt.get_children(parent_app_name, job_id, True)
    for child_app_name, cjob_id, dep_grp in gen:
        key = (child_app_name, cjob_id)
        if key in so_far:
            continue
        so_far.add(key)
        if qbcli.exists(shared.get_job_path(child_app_name, cjob_id)):
            set_state(child_app_name, cjob_id, pending=True)
            _recursively_reset_child_task_state(
                child_app_name, cjob_id, so_far)
        else:
            pass  # no need to recurse further down the tree
Exemple #8
0
def test_depends_on_all(func_name, all_test1, all_test2, all_test3, all_test4,
                        all_test4b, all_test5):
    # all_test1 children
    nt.assert_items_equal(
        list(dag_tools.get_children(
            all_test1, '20140601_1', True,)),
        [
            (all_test3, '20140601_1', 'default'),
            (all_test3, '20140601_2', 'default'),
            (all_test4, '20140601', 'default'),
            (all_test4b, '20140601', 'both'),
            (all_test5, '20140601', 'both_apps'),
        ])
    nt.assert_items_equal(
        list(dag_tools.get_children(
            all_test1, '20140601_1', True,)),
        list(dag_tools.get_children(
            all_test1, '20140601_2', True,))
    )
    nt.assert_items_equal(
        list(dag_tools.get_children(
            all_test1, '20140601_0', True,)),
        [])
    nt.assert_items_equal(
        list(dag_tools.get_children(
            all_test1, '20140601_0', True,)),
        [])

    # all_test2 children
    nt.assert_items_equal(
        list(dag_tools.get_children(
            all_test2, '20140601_0', True,)),
        [])
    nt.assert_items_equal(
        list(dag_tools.get_children(
            all_test2, '20140601_1', True,)),
        [])
    nt.assert_items_equal(
        list(dag_tools.get_children(
            all_test2, '20140601_2', True,)),
        [
            (all_test4, '20140601', 'default'),
            (all_test4b, '20140601', 'both')
        ])
    nt.assert_items_equal(
        list(dag_tools.get_children(
            all_test2, '20140601_3', True,)),
        [
            (all_test4, '20140601', 'default'),
            (all_test4b, '20140601', 'both')
        ])
    nt.assert_items_equal(
        list(dag_tools.get_children(
            all_test2, '20140601_4', True,)),
        [(all_test5, '20140601', 'both_apps')])

    # all_test3 parents
    nt.assert_items_equal(
        list(dag_tools.get_parents(all_test3, "20140601_0", True)),
        [])
    nt.assert_items_equal(
        list(dag_tools.get_parents(all_test3, "20140601_1", True)),
        [
            (all_test1, '20140601_1', 'default'),
            (all_test1, '20140601_2', 'default'),
        ])
    nt.assert_items_equal(
        list(dag_tools.get_parents(all_test3, "20140601_2", True)),
        list(dag_tools.get_parents(all_test3, "20140601_1", True)),
    )
    nt.assert_items_equal(
        list(dag_tools.get_parents(all_test3, "20140601_3", True)),
        [])

    # all_test4 parents
    nt.assert_items_equal(
        list(dag_tools.get_parents(all_test4, "20140601", True)),
        [
            (all_test1, '20140601_1', 'default'),
            (all_test1, '20140601_2', 'default'),
            (all_test2, '20140601_2', 'default'),
            (all_test2, '20140601_3', 'default'),
        ])
    nt.assert_items_equal(
        list(dag_tools.get_parents(all_test4, "20140601", True)),
        list(dag_tools.get_parents(all_test4, "20140601", True)),
    )

    # all_test5 parents
    nt.assert_items_equal(
        list(dag_tools.get_parents(all_test5, "20140601", True)),
        [
            (all_test1, '20140601_1', 'both_apps'),
            (all_test1, '20140601_2', 'both_apps'),
            (all_test2, '20140601_4', 'both_apps')
        ])
Exemple #9
0
def _maybe_queue_children(parent_app_name, parent_job_id):
    """
    This is basically a "set_state(completed=True)" pre-commit hook

    Assume the task identified by (parent_app_name, parent_job_id) is
    completed, and for each of that parent's children in the dag graph of
    tasks, set 1/num_parents worth of points towards that child's completion.

    If any one child has earned 1 point, then add it to its task queue

    We track the "score" of a child by counting files in the job path:
        .../parents/dependency_name/parent_app_name/parent_job_id
    """
    qbcli = shared.get_qbclient()
    gen = dt.get_children(parent_app_name, parent_job_id, True)
    for child_app_name, cjob_id, dep_grp in gen:
        ld = dict(
            child_app_name=child_app_name,
            child_job_id=cjob_id,
            app_name=parent_app_name,
            job_id=parent_job_id)
        ptotal = len(list(dt.get_parents(child_app_name, cjob_id)))
        pcomplete = qbcli.increment(
            _path_num_complete_parents(child_app_name, cjob_id))

        if (pcomplete >= ptotal):
            log.info(
                "Parent is queuing a child task", extra=ld)
            if pcomplete > ptotal:
                log.warn(
                    "For some reason, I calculated that more parents"
                    " completed than there are parents."
                    " If you aren't re-adding tasks, this could be a code bug"
                    " that results in tasks unnecessarily sitting in queue.",
                    extra=dict(
                        num_complete_dependencies=pcomplete,
                        num_total_dependencies=ptotal, **ld))
            if check_state(child_app_name, cjob_id, completed=True):
                log.warn(
                    "Queuing a previously completed child task"
                    " presumably because of the following:"
                    " 1) you manually queued both a"
                    " parent/ancestor and the child,"
                    " and 2) the child completed first."
                    " You probably shouldn't manually re-queue both parents"
                    " and children. Just queue one of them.",
                    extra=ld)

            try:
                readd_subtask(
                    child_app_name, cjob_id,
                    _reset_descendants=False,  # descendants previously handled
                    _ignore_if_queued=True)
            except exceptions.JobAlreadyQueued:
                log.info("Child already in queue", extra=dict(**ld))
                raise
        elif (pcomplete < ptotal):
            log.info(
                "Child job one step closer to being queued!",
                extra=dict(
                    num_complete_dependencies=pcomplete,
                    num_total_dependencies=ptotal, **ld))