コード例 #1
0
ファイル: modify_job_state.py プロジェクト: xyuan/stolos
def _maybe_queue_children(parent_app_name, parent_job_id):
    """
    This is basically a "set_state(completed=True)" pre-commit hook

    Assume the task identified by (parent_app_name, parent_job_id) is
    completed, and for each of that parent's children in the dag graph of
    tasks, set 1/num_parents worth of points towards that child's completion.

    If any one child has earned 1 point, then add it to its task queue

    We track the "score" of a child by counting files in the job path:
        .../parents/dependency_name/parent_app_name/parent_job_id
    """
    qbcli = shared.get_qbclient()
    gen = dt.get_children(parent_app_name, parent_job_id, True)
    for child_app_name, cjob_id, dep_grp in gen:
        ld = dict(child_app_name=child_app_name,
                  child_job_id=cjob_id,
                  app_name=parent_app_name,
                  job_id=parent_job_id)
        ptotal = len(list(dt.get_parents(child_app_name, cjob_id)))
        pcomplete = qbcli.increment(
            _path_num_complete_parents(child_app_name, cjob_id))

        if (pcomplete >= ptotal):
            log.info("Parent is queuing a child task", extra=ld)
            if pcomplete > ptotal:
                log.warn(
                    "For some reason, I calculated that more parents"
                    " completed than there are parents."
                    " If you aren't re-adding tasks, this could be a code bug"
                    " that results in tasks unnecessarily sitting in queue.",
                    extra=dict(num_complete_dependencies=pcomplete,
                               num_total_dependencies=ptotal,
                               **ld))
            if check_state(child_app_name, cjob_id, completed=True):
                log.warn(
                    "Queuing a previously completed child task"
                    " presumably because of the following:"
                    " 1) you manually queued both a"
                    " parent/ancestor and the child,"
                    " and 2) the child completed first."
                    " You probably shouldn't manually re-queue both parents"
                    " and children. Just queue one of them.",
                    extra=ld)

            try:
                readd_subtask(
                    child_app_name,
                    cjob_id,
                    _reset_descendants=False,  # descendants previously handled
                    _ignore_if_queued=True)
            except exceptions.JobAlreadyQueued:
                log.info("Child already in queue", extra=dict(**ld))
                raise
        elif (pcomplete < ptotal):
            log.info("Child job one step closer to being queued!",
                     extra=dict(num_complete_dependencies=pcomplete,
                                num_total_dependencies=ptotal,
                                **ld))
コード例 #2
0
def test_autofill_get_parents(autofill1, autofill2, autofill_getparents):
    # test when child job_id is a superset of parents and depends_on only
    # defines 2+ app_names (where each app has different job_id templates)
    # omg this is specific!
    nt.assert_items_equal(
        list(dag_tools.get_parents(autofill_getparents, '20150101_10_10')),
        [(autofill1, '10'), (autofill2, '20150101')]
    )
コード例 #3
0
def test_get_parents_with_complicated_job_ids(
        func_name, valid1, valid2, valid3, valid3b, valid4):
    nt.assert_items_equal(
        list(dag_tools.get_parents(valid3, '20151015_100')),
        []
    )
    nt.assert_items_equal(
        list(dag_tools.get_parents(valid3, '20151015_101')),
        [
            (valid1, '20151015_1'),
            (valid1, '20151015_2'),
            (valid2, '20151015_101')
        ]
    )
    # valid3b should be same as valid3
    nt.assert_items_equal(
        list(dag_tools.get_parents(valid3b, '20151015_100')),
        list(dag_tools.get_parents(valid3, '20151015_100')),
    )

    nt.assert_items_equal(
        list(dag_tools.get_parents(valid4, '20151015_100')),
        [])
    nt.assert_items_equal(
        list(dag_tools.get_parents(valid4, '20151015_102')),
        [])
    nt.assert_items_equal(
        list(dag_tools.get_parents(valid4, '20151015_101')),
        [
            (valid1, '20151015_1'),
            (valid1, '20151015_2'),
            (valid2, '20151015_101')
        ]
    )
コード例 #4
0
def test_autofill_all(func_name, autofill1, autofill2, autofill3,
                      autofill_getparents):
    # autofill1 out of bounds get_children
    nt.assert_items_equal(list(dag_tools.get_children(autofill1, '9')), [])
    nt.assert_items_equal(list(dag_tools.get_children(autofill1, '11')), [])
    nt.assert_items_equal(list(dag_tools.get_children(autofill1, '20')), [])
    # autofill1 in bounds get_children
    nt.assert_items_equal(
        list(dag_tools.get_children(autofill1, '10')),
        [
            (autofill3, '20150101', 'default'),
            (autofill3, '20150102', 'default'),
            (autofill3, '20150103', 'default'),
            (autofill3, '20150104', 'default'),
            (autofill_getparents, '20150101_10_10', 'default'),
        ])

    # autofill2 out of bounds get_children
    nt.assert_items_equal(
        list(dag_tools.get_children(autofill2, '20150128')), [])
    # autofill2 in bounds get_children
    nt.assert_items_equal(
        list(dag_tools.get_children(autofill2, '20150101')),
        [
            (autofill3, '20150101', 'default'),
            (autofill_getparents, '20150101_10_10', 'default')
        ])

    # autofill3 get parents out and in bounds
    nt.assert_items_equal(
        list(dag_tools.get_children(autofill3, '20150101')), [])
    nt.assert_items_equal(
        list(dag_tools.get_parents(autofill3, '20150128')), [])
    nt.assert_items_equal(
        list(dag_tools.get_parents(autofill3, '20150101')),
        [
            (autofill1, '10'),
            (autofill1, '12'),
            (autofill1, '14'),
            (autofill1, '16'),
            (autofill1, '18'),
            (autofill2, '20150101'),
        ])
コード例 #5
0
def test_fan_out_tasks(app1, app2, app4, fanout1, func_name):
    # test for Many-to-Many relationships between parent and child tasks
    nt.assert_count_equal(
        list(dag_tools.get_parents(
            'test_stolos/test_fan_out_tasks/fanout1', '20140715_8')),
        [])

    nt.assert_count_equal(
        list(dag_tools.get_parents(
            'test_stolos/test_fan_out_tasks/fanout1',
            '20140715_testID5-%s' % func_name, True)),
        [
            (app1, '20140714_555_profile-%s' % func_name, u'dep2'),
            (app1, '20140715_555_profile-%s' % func_name, u'dep2'),
        ])

    nt.assert_count_equal(
        list(dag_tools.get_children(
            'test_stolos/test_fan_out_tasks/app1',
            '20140715_9_profile-%s' % func_name, True,)),
        [(app2, '20140715_9_profile-%s' % func_name, 'default'),
         (app4, '20140715_9_profile-%s' % func_name, 'default'),
         (fanout1, '20140715_testID1-%s' % func_name, u'dep1'),
         (fanout1, '20140715_testID2-%s' % func_name, u'dep1'),
         (fanout1, '20140715_testID3-%s' % func_name, u'dep1'),
         ])

    nt.assert_count_equal(
        list(dag_tools.get_children(
            app1, '20140715_555_profile-%s' % func_name, True,)),
        [
            (app2, '20140715_555_profile-%s' % func_name, 'default'),
            (app4, '20140715_555_profile-%s' % func_name, 'default'),
            (fanout1, u'20140714_testID5-%s' % func_name, u'dep2'),
            (fanout1, u'20140714_testID6-%s' % func_name, u'dep2'),
            (fanout1, u'20140715_testID1-%s' % func_name, u'dep1'),
            (fanout1, u'20140715_testID2-%s' % func_name, u'dep1'),
            (fanout1, u'20140715_testID3-%s' % func_name, u'dep1'),
            (fanout1, u'20140715_testID5-%s' % func_name, u'dep2'),
            (fanout1, u'20140715_testID6-%s' % func_name, u'dep2'),
        ])
コード例 #6
0
def test_topological_sort(topological_sort1, app1, app2, depends_on1, bash2,
                          depends_on_job_id1, func_name):
    nt.assert_count_equal(
        list(dag_tools.topological_sort(dag_tools.get_parents(
            topological_sort1, depends_on_job_id1, True,))),
        [
            (app1, '20140601_101_profile-%s' % func_name, u'dep1'),
            (app1, '20140601_102_profile-%s' % func_name, u'dep1'),
            (app2, '20140601_101_profile-%s' % func_name, u'dep1'),
            (app2, '20140601_102_profile-%s' % func_name, u'dep1'),
            (depends_on1, u'20140601_testID1-%s' % func_name, u'dep1'),
            (bash2, '20140601_101_profile-%s' % func_name, u'dep1'),
            (bash2, '20140601_102_profile-%s' % func_name, u'dep1')
        ]
    )
コード例 #7
0
ファイル: modify_job_state.py プロジェクト: xyuan/stolos
def ensure_parents_completed(app_name, job_id):
    """
    Assume that given job_id is pulled from the app_name's queue.

    Check that the parent tasks for this (app_name, job_id) pair have completed
    If they haven't completed and aren't pending, maybe create the
    parent task in its appropriate queue.  Also decide whether the calling
    process should requeue given job_id or remove itself from queue.
    Because this needs to happen as one transaction, also return a list of
    execute locks that the calling code must release after it decides how to
    handle the current job_id.

    Returns a tuple:
        (are_parents_completed, should_job_id_be_consumed_from_queue,
         parent_execute_locks_to_release)
    """
    parents_completed = True
    consume_queue = False
    parent_lock = None
    for parent, pjob_id, dep_grp in dt.get_parents(app_name, job_id, True):
        if check_state(app_name=parent, job_id=pjob_id, completed=True):
            continue
        parents_completed = False
        log.info('My parent has not completed yet.',
                 extra=dict(parent_app_name=parent,
                            parent_job_id=pjob_id,
                            app_name=app_name,
                            job_id=job_id))

        # At this point, I need to be re-run
        # The question at this point is whether to requeue myself or assume the
        # parent will.

        # Assume the default is I requeue myself.  Sometimes, this might result
        # in me cycling through the queue a couple times until parent finishes.

        # If parent is running, it will be able to requeue me if I exit in
        # time.  If it doesn't, either I'll requeue myself by default or
        # another parent will.  So, do nothing in this case.

        # if parent is not running, I should try to maybe_add_subtask it.
        # - if can't add parent, then possibly something else is adding it, or
        # it ran once and is waiting on one of my grandparents.
        # - if I can maybe_add_subtask my parent, then it definitely wasn't
        # running before.

        # In both cases,
        # I should try to unqueue myself if I can guarantee that the parent
        # won't run by the time I unqueue myself.  Otherwise, I should just
        # default to assuming parent is running and requeue myself by default.

        added = maybe_add_subtask(parent, pjob_id)

        # if parent marked 'skipped' and then someone calls a maybe_add_subtask
        # on the child, child could requeue itself indefinitely.  to prevent,
        # child should unqueue itself and raise error complaint that for some
        # insane reason it's running but it's parent is "skipped"
        if not added and check_state(parent, pjob_id, skipped=True):
            consume_queue = True
            #  raise some sort of error
            log.warn(
                "My parent_job_id is marked as 'skipped',"
                " so should be impossible for me, the child, to exist!"
                " Requesting to unqueue myself.  This is odd.",
                extra=dict(parent_app_name=parent,
                           parent_job_id=pjob_id,
                           app_name=app_name,
                           job_id=job_id))
            break

        if parent_lock is not None:
            continue  # we already found a parent that promises to requeue me

        elock = obtain_execute_lock(parent,
                                    pjob_id,
                                    raise_on_error=False,
                                    blocking=False)
        if elock:
            if not check_state(parent, pjob_id, pending=True):
                elock.release()  # race condition: parent just did something!
            else:
                consume_queue = True
                parent_lock = elock
                log.info(
                    "I will unqueue myself with the expectation that"
                    " my parent will requeue me",
                    extra=dict(app_name=app_name, job_id=job_id))
    return parents_completed, consume_queue, parent_lock
コード例 #8
0
def test_depends_on_all(func_name, all_test1, all_test2, all_test3, all_test4,
                        all_test4b, all_test5):
    # all_test1 children
    nt.assert_items_equal(
        list(dag_tools.get_children(
            all_test1, '20140601_1', True,)),
        [
            (all_test3, '20140601_1', 'default'),
            (all_test3, '20140601_2', 'default'),
            (all_test4, '20140601', 'default'),
            (all_test4b, '20140601', 'both'),
            (all_test5, '20140601', 'both_apps'),
        ])
    nt.assert_items_equal(
        list(dag_tools.get_children(
            all_test1, '20140601_1', True,)),
        list(dag_tools.get_children(
            all_test1, '20140601_2', True,))
    )
    nt.assert_items_equal(
        list(dag_tools.get_children(
            all_test1, '20140601_0', True,)),
        [])
    nt.assert_items_equal(
        list(dag_tools.get_children(
            all_test1, '20140601_0', True,)),
        [])

    # all_test2 children
    nt.assert_items_equal(
        list(dag_tools.get_children(
            all_test2, '20140601_0', True,)),
        [])
    nt.assert_items_equal(
        list(dag_tools.get_children(
            all_test2, '20140601_1', True,)),
        [])
    nt.assert_items_equal(
        list(dag_tools.get_children(
            all_test2, '20140601_2', True,)),
        [
            (all_test4, '20140601', 'default'),
            (all_test4b, '20140601', 'both')
        ])
    nt.assert_items_equal(
        list(dag_tools.get_children(
            all_test2, '20140601_3', True,)),
        [
            (all_test4, '20140601', 'default'),
            (all_test4b, '20140601', 'both')
        ])
    nt.assert_items_equal(
        list(dag_tools.get_children(
            all_test2, '20140601_4', True,)),
        [(all_test5, '20140601', 'both_apps')])

    # all_test3 parents
    nt.assert_items_equal(
        list(dag_tools.get_parents(all_test3, "20140601_0", True)),
        [])
    nt.assert_items_equal(
        list(dag_tools.get_parents(all_test3, "20140601_1", True)),
        [
            (all_test1, '20140601_1', 'default'),
            (all_test1, '20140601_2', 'default'),
        ])
    nt.assert_items_equal(
        list(dag_tools.get_parents(all_test3, "20140601_2", True)),
        list(dag_tools.get_parents(all_test3, "20140601_1", True)),
    )
    nt.assert_items_equal(
        list(dag_tools.get_parents(all_test3, "20140601_3", True)),
        [])

    # all_test4 parents
    nt.assert_items_equal(
        list(dag_tools.get_parents(all_test4, "20140601", True)),
        [
            (all_test1, '20140601_1', 'default'),
            (all_test1, '20140601_2', 'default'),
            (all_test2, '20140601_2', 'default'),
            (all_test2, '20140601_3', 'default'),
        ])
    nt.assert_items_equal(
        list(dag_tools.get_parents(all_test4, "20140601", True)),
        list(dag_tools.get_parents(all_test4, "20140601", True)),
    )

    # all_test5 parents
    nt.assert_items_equal(
        list(dag_tools.get_parents(all_test5, "20140601", True)),
        [
            (all_test1, '20140601_1', 'both_apps'),
            (all_test1, '20140601_2', 'both_apps'),
            (all_test2, '20140601_4', 'both_apps')
        ])
コード例 #9
0
def test_get_parents(app1, app2, depends_on1, depends_on2, bash1, bash2,
                     depends_on_job_id1, func_name):

    # test case with no parents
    nt.assert_equal(
        list(dag_tools.get_parents(app1, '20140101_876_purchase', True)),
        []
    )

    # test the basic inheritance scenario
    nt.assert_count_equal(
        list(dag_tools.get_parents(bash2, '20140501_876_profile', True)),
        [(bash1, '20140501_876_profile', 'default')]
    )

    # test invalid job_id
    nt.assert_count_equal(
        list(dag_tools.get_parents(depends_on1, '20140101_999999', True)),
        []
    )

    # test invalid metadata in job_id
    nt.assert_count_equal(
        list(dag_tools.get_parents(depends_on1, '20140601_999', True)),
        []
    )

    # test depends_on for one of the dependency groups
    nt.assert_count_equal(
        list(dag_tools.get_parents(
            depends_on1, '20140601_testID2-%s' % func_name, True)),
        [
            (depends_on2, '20140601_1011_profile-%s' % func_name, u'depgrp2'),
            (depends_on2, '20140601_9020_profile-%s' % func_name, u'depgrp2'),
            (depends_on2, '20140601_876_profile-%s' % func_name, u'depgrp2')
        ])

    # test depends_on for one of the dependency groups
    # also tests that get_parents returns a stable ordering
    nt.assert_count_equal(
        list(dag_tools.get_parents(depends_on1, depends_on_job_id1, True)),
        [
            (app1, '20140601_1011_profile-%s' % func_name, u'depgrp1'),
            (app1, '20140601_1011_purchase-%s' % func_name, u'depgrp1'),
            (app1, '20140601_9020_profile-%s' % func_name, u'depgrp1'),
            (app1, '20140601_9020_purchase-%s' % func_name, u'depgrp1'),
            (app1, '20140601_876_profile-%s' % func_name, u'depgrp1'),
            (app1, '20140601_876_purchase-%s' % func_name, u'depgrp1'),
            (app1, '20140601_999_purchase-%s' % func_name, u'depgrp1'),
            (app2, '20140601_1011_profile-%s' % func_name, u'depgrp1'),
            (app2, '20140601_1011_purchase-%s' % func_name, u'depgrp1'),
            (app2, '20140601_9020_profile-%s' % func_name, u'depgrp1'),
            (app2, '20140601_9020_purchase-%s' % func_name, u'depgrp1'),
            (app2, '20140601_876_profile-%s' % func_name, u'depgrp1'),
            (app2, '20140601_876_purchase-%s' % func_name, u'depgrp1')
        ]
    )

    # test depends_on when multiple dependency groups map to the same job_id
    # I guess it's okay if they map to the same id?
    nt.assert_count_equal(
        list(dag_tools.get_parents(
            depends_on1, '20140601_testID3-%s' % func_name, True)),
        [(app1, '20140601_444_profile-%s' % func_name, u'depgrp4'),
         (app1, '20140601_876_profile-%s' % func_name, u'depgrp3'),
         ]
    )

    # test the filter_deps option
    nt.assert_count_equal(
        list(dag_tools.get_parents(
            depends_on1, '20140601_testID3-%s' % func_name, True,
            filter_deps=['depgrp4'])),
        [(app1, '20140601_444_profile-%s' % func_name, u'depgrp4')]
    )

    with nt.assert_raises(exceptions.DAGMisconfigured):
        list(dag_tools.get_parents(
            depends_on1, '20140601_testID3-%s' % func_name, True,
            filter_deps=['depgrp99999']))
コード例 #10
0
ファイル: modify_job_state.py プロジェクト: kszucs/stolos
def ensure_parents_completed(app_name, job_id):
    """
    Assume that given job_id is pulled from the app_name's queue.

    Check that the parent tasks for this (app_name, job_id) pair have completed
    If they haven't completed and aren't pending, maybe create the
    parent task in its appropriate queue.  Also decide whether the calling
    process should requeue given job_id or remove itself from queue.
    Because this needs to happen as one transaction, also return a list of
    execute locks that the calling code must release after it decides how to
    handle the current job_id.

    Returns a tuple:
        (are_parents_completed, should_job_id_be_consumed_from_queue,
         parent_execute_locks_to_release)
    """
    parents_completed = True
    consume_queue = False
    parent_lock = None
    for parent, pjob_id, dep_grp in dt.get_parents(app_name, job_id, True):
        if check_state(app_name=parent, job_id=pjob_id, completed=True):
            continue
        parents_completed = False
        log.info(
            'My parent has not completed yet.',
            extra=dict(
                parent_app_name=parent, parent_job_id=pjob_id,
                app_name=app_name, job_id=job_id))

        # At this point, I need to be re-run
        # The question at this point is whether to requeue myself or assume the
        # parent will.

        # Assume the default is I requeue myself.  Sometimes, this might result
        # in me cycling through the queue a couple times until parent finishes.

        # If parent is running, it will be able to requeue me if I exit in
        # time.  If it doesn't, either I'll requeue myself by default or
        # another parent will.  So, do nothing in this case.

        # if parent is not running, I should try to maybe_add_subtask it.
        # - if can't add parent, then possibly something else is adding it, or
        # it ran once and is waiting on one of my grandparents.
        # - if I can maybe_add_subtask my parent, then it definitely wasn't
        # running before.

        # In both cases,
        # I should try to unqueue myself if I can guarantee that the parent
        # won't run by the time I unqueue myself.  Otherwise, I should just
        # default to assuming parent is running and requeue myself by default.

        added = maybe_add_subtask(parent, pjob_id)

        # if parent marked 'skipped' and then someone calls a maybe_add_subtask
        # on the child, child could requeue itself indefinitely.  to prevent,
        # child should unqueue itself and raise error complaint that for some
        # insane reason it's running but it's parent is "skipped"
        if not added and check_state(parent, pjob_id, skipped=True):
            consume_queue = True
            #  raise some sort of error
            log.warn(
                "My parent_job_id is marked as 'skipped',"
                " so should be impossible for me, the child, to exist!"
                " Requesting to unqueue myself.  This is odd.", extra=dict(
                    parent_app_name=parent, parent_job_id=pjob_id,
                    app_name=app_name, job_id=job_id))
            break

        if parent_lock is not None:
            continue  # we already found a parent that promises to requeue me

        elock = obtain_execute_lock(
            parent, pjob_id, raise_on_error=False, blocking=False)
        if elock:
            if not check_state(parent, pjob_id, pending=True):
                elock.release()  # race condition: parent just did something!
            else:
                consume_queue = True
                parent_lock = elock
                log.info(
                    "I will unqueue myself with the expectation that"
                    " my parent will requeue me", extra=dict(
                        app_name=app_name, job_id=job_id))
    return parents_completed, consume_queue, parent_lock
コード例 #11
0
ファイル: modify_job_state.py プロジェクト: kszucs/stolos
def _maybe_queue_children(parent_app_name, parent_job_id):
    """
    This is basically a "set_state(completed=True)" pre-commit hook

    Assume the task identified by (parent_app_name, parent_job_id) is
    completed, and for each of that parent's children in the dag graph of
    tasks, set 1/num_parents worth of points towards that child's completion.

    If any one child has earned 1 point, then add it to its task queue

    We track the "score" of a child by counting files in the job path:
        .../parents/dependency_name/parent_app_name/parent_job_id
    """
    qbcli = shared.get_qbclient()
    gen = dt.get_children(parent_app_name, parent_job_id, True)
    for child_app_name, cjob_id, dep_grp in gen:
        ld = dict(
            child_app_name=child_app_name,
            child_job_id=cjob_id,
            app_name=parent_app_name,
            job_id=parent_job_id)
        ptotal = len(list(dt.get_parents(child_app_name, cjob_id)))
        pcomplete = qbcli.increment(
            _path_num_complete_parents(child_app_name, cjob_id))

        if (pcomplete >= ptotal):
            log.info(
                "Parent is queuing a child task", extra=ld)
            if pcomplete > ptotal:
                log.warn(
                    "For some reason, I calculated that more parents"
                    " completed than there are parents."
                    " If you aren't re-adding tasks, this could be a code bug"
                    " that results in tasks unnecessarily sitting in queue.",
                    extra=dict(
                        num_complete_dependencies=pcomplete,
                        num_total_dependencies=ptotal, **ld))
            if check_state(child_app_name, cjob_id, completed=True):
                log.warn(
                    "Queuing a previously completed child task"
                    " presumably because of the following:"
                    " 1) you manually queued both a"
                    " parent/ancestor and the child,"
                    " and 2) the child completed first."
                    " You probably shouldn't manually re-queue both parents"
                    " and children. Just queue one of them.",
                    extra=ld)

            try:
                readd_subtask(
                    child_app_name, cjob_id,
                    _reset_descendants=False,  # descendants previously handled
                    _ignore_if_queued=True)
            except exceptions.JobAlreadyQueued:
                log.info("Child already in queue", extra=dict(**ld))
                raise
        elif (pcomplete < ptotal):
            log.info(
                "Child job one step closer to being queued!",
                extra=dict(
                    num_complete_dependencies=pcomplete,
                    num_total_dependencies=ptotal, **ld))