Ejemplo n.º 1
0
def test_update_state_supports_recomputing_released_results():
    dsk = {'x': 1, 'y': (inc, 'x'), 'z': (inc, 'x')}
    dependencies, dependents = get_deps(dsk)

    waiting = dict()
    waiting_data = {'z': set()}

    who_wants = defaultdict(set, z={'client'})
    wants_what = defaultdict(set, client={'z'})
    who_has = {'z': {'alice'}}
    processing = set()
    released = {'x', 'y'}
    in_play = {'z'}

    new_dsk = {'x': 1, 'y': (inc, 'x')}
    new_dependencies = {'y': {'x'}}
    new_keys = {'y'}

    e_dsk = dsk.copy()
    e_waiting = {'x': set(), 'y': {'x'}}
    e_waiting_data = {'x': {'y'}, 'y': set(), 'z': set()}
    e_who_wants = {'z': {'client'}, 'y': {'client'}}
    e_wants_what = {'client': {'y', 'z'}}

    update_state(dsk, dependencies, dependents, who_wants, wants_what,
                 who_has, in_play,
                 waiting, waiting_data, new_dsk, new_keys, new_dependencies,
                 'client')

    assert dsk == e_dsk
    assert waiting == e_waiting
    assert waiting_data == e_waiting_data
    assert who_wants == e_who_wants
    assert wants_what == e_wants_what
    assert in_play == {'x', 'y', 'z'}
Ejemplo n.º 2
0
def test_dependent_keys():
    a, b, c, d, e, f, g = 'abcdefg'
    who_has = {a: [1], b: [1]}
    processing = {'alice': {c}}
    exceptions = {}
    dsk = {
        a: 1,
        b: 2,
        c: (add, a, b),
        d: (inc, a),
        e: (add, c, d),
        f: (inc, e)
    }
    dependencies, dependents = get_deps(dsk)

    assert dependent_keys(f,
                          who_has,
                          processing,
                          dependencies,
                          exceptions,
                          complete=False)[0] == {f, e, c, d}

    assert dependent_keys(f,
                          who_has,
                          processing,
                          dependencies,
                          exceptions,
                          complete=True)[0] == {a, b, c, d, e, f}
Ejemplo n.º 3
0
def test_fill_missing_data():
    dsk = {'x': 1, 'y': (inc, 'x'), 'z': (inc, 'y')}
    dependencies, dependents = get_deps(dsk)

    waiting = {}
    waiting_data = {'z': set()}

    who_wants = defaultdict(set, z={'client'})
    wants_what = defaultdict(set, client={'z'})
    who_has = {'z': {alice}}
    processing = set()
    released = set()
    in_play = {'z'}

    e_waiting = {'x': set(), 'y': {'x'}, 'z': {'y'}}
    e_waiting_data = {'x': {'y'}, 'y': {'z'}, 'z': set()}
    e_in_play = {'x', 'y', 'z'}

    lost = {'z'}
    del who_has['z']
    in_play.remove('z')

    heal_missing_data(dsk, dependencies, dependents, who_has, in_play, waiting,
            waiting_data, lost)

    assert waiting == e_waiting
    assert waiting_data == e_waiting_data
    assert in_play == e_in_play
Ejemplo n.º 4
0
def test_update_state_respects_WrappedKeys():
    dsk = {'x': 1, 'y': (inc, 'x')}
    dependencies, dependents = get_deps(dsk)

    waiting = {'y': set()}
    waiting_data = {'x': {'y'}, 'y': set()}

    held_data = {'y'}
    in_memory = {'x'}
    processing = set()
    released = set()
    in_play = {'x', 'y'}

    e_dsk = {'x': 1, 'y': (inc, 'x'), 'a': 1, 'z': (add, 'y', 'a')}
    e_dependencies = {'x': set(), 'a': set(), 'y': {'x'}, 'z': {'a', 'y'}}
    e_dependents = {'z': set(), 'y': {'z'}, 'a': {'z'}, 'x': {'y'}}

    e_waiting = {'y': set(), 'a': set(), 'z': {'a', 'y'}}
    e_waiting_data = {'x': {'y'}, 'y': {'z'}, 'a': {'z'}, 'z': set()}

    e_held_data = {'y', 'z'}

    new_dsk = {'z': (add, WrappedKey('y'), 10)}
    a = update_state(*map(deepcopy, [dsk, dependencies, dependents, held_data,
                                     in_memory, in_play,
                                     waiting, waiting_data, new_dsk, {'z'}]))
    new_dsk = {'z': (add, 'y', 10)}
    b = update_state(*map(deepcopy, [dsk, dependencies, dependents, held_data,
                                     in_memory, in_play,
                                     waiting, waiting_data, new_dsk, {'z'}]))
    assert a == b
Ejemplo n.º 5
0
def test_prefer_broker_nodes():
    """

    b0    b1  b2
    |      \  /
    a0      a1

    a1 should be run before a0
    """
    a, b, c = 'abc'
    dsk = {(a, 0): (f,), (a, 1): (f,),
           (b, 0): (f, (a, 0)), (b, 1): (f, (a, 1)), (b, 2): (f, (a, 1))}

    dependencies, dependents = get_deps(dsk)
    nd = ndependents(dependencies, dependents)
    cm = child_max(dependencies, dependents, nd)
    o = order(dsk)

    assert o[(a, 1)] < o[(a, 0)]

    # Switch name of 0, 1 to ensure that this isn't due to string comparison
    dsk = {(a, 0): (f,), (a, 1): (f,),
           (b, 0): (f, (a, 0)), (b, 1): (f, (a, 1)), (b, 2): (f, (a, 0))}

    o = order(dsk)

    assert o[(a, 1)] > o[(a, 0)]
Ejemplo n.º 6
0
def test_update_state_supports_recomputing_released_results():
    dsk = {'x': 1, 'y': (inc, 'x'), 'z': (inc, 'x')}
    dependencies, dependents = get_deps(dsk)

    waiting = dict()
    waiting_data = {'z': set()}

    held_data = {'z'}
    who_has = {'z': {'alice'}}
    processing = set()
    released = {'x', 'y'}
    in_play = {'z'}

    new_dsk = {'x': 1, 'y': (inc, 'x')}
    new_keys = {'y'}

    e_dsk = dsk.copy()
    e_waiting = {'x': set(), 'y': {'x'}}
    e_waiting_data = {'x': {'y'}, 'y': set(), 'z': set()}
    e_held_data = {'y', 'z'}

    update_state(dsk, dependencies, dependents, held_data, who_has, in_play,
                 waiting, waiting_data, new_dsk, new_keys)

    assert dsk == e_dsk
    assert waiting == e_waiting
    assert waiting_data == e_waiting_data
    assert held_data == e_held_data
    assert in_play == {'x', 'y', 'z'}
Ejemplo n.º 7
0
def test_fill_missing_data():
    dsk = {'x': 1, 'y': (inc, 'x'), 'z': (inc, 'y')}
    dependencies, dependents = get_deps(dsk)

    waiting = {}
    waiting_data = {'z': set()}

    held_data = {'z'}
    in_memory = {'z'}
    processing = set()
    released = set()
    in_play = {'z'}

    e_waiting = {'x': set(), 'y': {'x'}, 'z': {'y'}}
    e_waiting_data = {'x': {'y'}, 'y': {'z'}, 'z': set()}
    e_in_play = {'x', 'y', 'z'}

    lost = {'z'}
    in_memory.remove('z')
    in_play.remove('z')

    heal_missing_data(dsk, dependencies, dependents, held_data, in_memory,
                      in_play, waiting, waiting_data, lost)

    assert waiting == e_waiting
    assert waiting_data == e_waiting_data
    assert in_play == e_in_play
Ejemplo n.º 8
0
def test_get_deps():
    dsk = {
        "a": [1, 2, 3],
        "b": "a",
        "c": [1, (inc, 1)],
        "d": [(sum, "c")],
        "e": ["b", "zzz", "b"],
        "f": [["a", "b"], 2, 3],
    }
    dependencies, dependents = get_deps(dsk)
    assert dependencies == {
        "a": set(),
        "b": {"a"},
        "c": set(),
        "d": {"c"},
        "e": {"b"},
        "f": {"a", "b"},
    }
    assert dependents == {
        "a": {"b", "f"},
        "b": {"e", "f"},
        "c": {"d"},
        "d": set(),
        "e": set(),
        "f": set(),
    }
Ejemplo n.º 9
0
def test_get_deps():
    """
    >>> dsk = {'a': 1, 'b': (inc, 'a'), 'c': (inc, 'b')}
    >>> dependencies, dependents = get_deps(dsk)
    >>> dependencies
    {'a': set(), 'b': {'a'}, 'c': {'b'}}
    >>> dependents  # doctest: +SKIP
    {'a': {'b'}, 'b': {'c'}, 'c': set()}
    """
    dsk = {
        "a": [1, 2, 3],
        "b": "a",
        "c": [1, (inc, 1)],
        "d": [(sum, "c")],
        "e": ["b", "zzz", "b"],
        "f": [["a", "b"], 2, 3],
    }
    dependencies, dependents = get_deps(dsk)
    assert dependencies == {
        "a": set(),
        "b": {"a"},
        "c": set(),
        "d": {"c"},
        "e": {"b"},
        "f": {"a", "b"},
    }
    assert dependents == {
        "a": {"b", "f"},
        "b": {"e", "f"},
        "c": {"d"},
        "d": set(),
        "e": set(),
        "f": set(),
    }
Ejemplo n.º 10
0
def test_get_deps():
    """
    >>> dsk = {'a': 1, 'b': (inc, 'a'), 'c': (inc, 'b')}
    >>> dependencies, dependents = get_deps(dsk)
    >>> dependencies
    {'a': set([]), 'c': set(['b']), 'b': set(['a'])}
    >>> dependents
    {'a': set(['b']), 'c': set([]), 'b': set(['c'])}
    """
    dsk = {'a': [1, 2, 3],
           'b': 'a',
           'c': [1, (inc, 1)],
           'd': [(sum, 'c')],
           'e': ['b', 'zzz', 'b'],
           'f': [['a', 'b'], 2, 3]}
    dependencies, dependents = get_deps(dsk)
    assert dependencies == {'a': set(),
                            'b': {'a'},
                            'c': set(),
                            'd': {'c'},
                            'e': {'b'},
                            'f': {'a', 'b'},
                            }
    assert dependents == {'a': {'b', 'f'},
                          'b': {'e', 'f'},
                          'c': {'d'},
                          'd': set(),
                          'e': set(),
                          'f': set(),
                          }
Ejemplo n.º 11
0
def test_fill_missing_data():
    dsk = {'x': 1, 'y': (inc, 'x'), 'z': (inc, 'y')}
    dependencies, dependents = get_deps(dsk)

    waiting = {}
    waiting_data = {'z': set()}

    held_data = {'z'}
    in_memory = {'z'}
    processing = set()
    released = set()
    in_play = {'z'}

    e_waiting = {'x': set(), 'y': {'x'}, 'z': {'y'}}
    e_waiting_data = {'x': {'y'}, 'y': {'z'}, 'z': set()}
    e_in_play = {'x', 'y', 'z'}

    lost = {'z'}
    in_memory.remove('z')
    in_play.remove('z')

    heal_missing_data(dsk, dependencies, dependents, held_data,
                      in_memory, in_play,
                      waiting, waiting_data, lost)

    assert waiting == e_waiting
    assert waiting_data == e_waiting_data
    assert in_play == e_in_play
Ejemplo n.º 12
0
def test_update_state_supports_recomputing_released_results():
    dsk = {'x': 1, 'y': (inc, 'x'), 'z': (inc, 'x')}
    dependencies, dependents = get_deps(dsk)

    waiting = dict()
    waiting_data = {'z': set()}

    held_data = {'z'}
    who_has = {'z': {'alice'}}
    processing = set()
    released = {'x', 'y'}
    in_play = {'z'}

    new_dsk = {'x': 1, 'y': (inc, 'x')}
    new_keys = {'y'}

    e_dsk = dsk.copy()
    e_waiting = {'x': set(), 'y': {'x'}}
    e_waiting_data = {'x': {'y'}, 'y': set(), 'z': set()}
    e_held_data = {'y', 'z'}

    update_state(dsk, dependencies, dependents, held_data,
                 who_has, in_play,
                 waiting, waiting_data, new_dsk, new_keys)

    assert dsk == e_dsk
    assert waiting == e_waiting
    assert waiting_data == e_waiting_data
    assert held_data == e_held_data
    assert in_play == {'x', 'y', 'z'}
Ejemplo n.º 13
0
def test_update_state_supports_recomputing_released_results():
    dsk = {'x': 1, 'y': (inc, 'x'), 'z': (inc, 'x')}
    dependencies, dependents = get_deps(dsk)

    waiting = dict()
    waiting_data = {'z': set()}

    who_wants = defaultdict(set, z={'client'})
    wants_what = defaultdict(set, client={'z'})
    who_has = {'z': {'alice'}}
    processing = set()
    released = {'x', 'y'}
    in_play = {'z'}

    new_dsk = {'x': 1, 'y': (inc, 'x')}
    new_dependencies = {'y': {'x'}}
    new_keys = {'y'}

    e_dsk = dsk.copy()
    e_waiting = {'x': set(), 'y': {'x'}}
    e_waiting_data = {'x': {'y'}, 'y': set(), 'z': set()}
    e_who_wants = {'z': {'client'}, 'y': {'client'}}
    e_wants_what = {'client': {'y', 'z'}}

    update_state(dsk, dependencies, dependents, who_wants, wants_what, who_has,
                 in_play, waiting, waiting_data, new_dsk, new_keys,
                 new_dependencies, 'client')

    assert dsk == e_dsk
    assert waiting == e_waiting
    assert waiting_data == e_waiting_data
    assert who_wants == e_who_wants
    assert wants_what == e_wants_what
    assert in_play == {'x', 'y', 'z'}
Ejemplo n.º 14
0
def test_update_state_respects_data_in_memory():
    dsk = {'x': 1, 'y': (inc, 'x')}
    dependencies, dependents = get_deps(dsk)

    waiting = dict()
    waiting_data = {'y': set()}

    held_data = {'y'}
    in_memory = {'y'}
    processing = set()
    released = {'x'}
    in_play = {'y'}

    new_dsk = {'x': 1, 'y': (inc, 'x'), 'z': (add, 'y', 'x')}
    new_keys = {'z'}

    e_dsk = new_dsk.copy()
    e_waiting = {'z': {'x'}, 'x': set()}
    e_waiting_data = {'x': {'z'}, 'y': {'z'}, 'z': set()}
    e_held_data = {'y', 'z'}

    update_state(dsk, dependencies, dependents, held_data,
                 in_memory, in_play,
                 waiting, waiting_data, new_dsk, new_keys)

    assert dsk == e_dsk
    assert waiting == e_waiting
    assert waiting_data == e_waiting_data
    assert held_data == e_held_data
    assert in_play == {'x', 'y', 'z'}
Ejemplo n.º 15
0
def test_deep_bases_win_over_dependents():
    """
    d should come before e and probably before one of b and c

            a
          / | \   .
         b  c |
        / \ | /
       e    d
    """
    dsk = {
        'a': (f, 'b', 'c', 'd'),
        'b': (f, 'd', 'e'),
        'c': (f, 'd'),
        'd': 1,
        'e': 2
    }

    dependencies, dependents = get_deps(dsk)
    nd = ndependents(dependencies, dependents)
    cm = child_max(dependencies, dependents, nd)

    o = order(dsk)
    assert o['d'] < o['e']
    assert o['d'] < o['b'] or o['d'] < o['c']
Ejemplo n.º 16
0
def test_fill_missing_data():
    dsk = {'x': 1, 'y': (inc, 'x'), 'z': (inc, 'y')}
    dependencies, dependents = get_deps(dsk)

    waiting = {}
    waiting_data = {'z': set()}

    who_wants = defaultdict(set, z={'client'})
    wants_what = defaultdict(set, client={'z'})
    who_has = {'z': {alice}}
    processing = set()
    released = set()
    in_play = {'z'}

    e_waiting = {'x': set(), 'y': {'x'}, 'z': {'y'}}
    e_waiting_data = {'x': {'y'}, 'y': {'z'}, 'z': set()}
    e_in_play = {'x', 'y', 'z'}

    lost = {'z'}
    del who_has['z']
    in_play.remove('z')

    heal_missing_data(dsk, dependencies, dependents, who_has, in_play, waiting,
                      waiting_data, lost)

    assert waiting == e_waiting
    assert waiting_data == e_waiting_data
    assert in_play == e_in_play
Ejemplo n.º 17
0
def test_update_state_with_processing():
    dsk = {'x': 1, 'y': (inc, 'x'), 'z': (inc, 'y')}
    dependencies, dependents = get_deps(dsk)

    waiting = {'z': {'y'}}
    waiting_data = {'x': {'y'}, 'y': {'z'}, 'z': set()}

    who_wants = defaultdict(set, z={'client'})
    wants_what = defaultdict(set, client={'z'})
    who_has = {'x': {'alice'}}
    processing = {'y'}
    released = set()
    in_play = {'z', 'x', 'y'}

    new_dsk = {'a': (inc, 'x'), 'b': (add, 'a', 'y'), 'c': (inc, 'z')}
    new_dependencies = {'a': {'x'}, 'b': {'a', 'y'}, 'c': {'z'}}
    new_keys = {'b', 'c'}

    e_waiting = {'z': {'y'}, 'a': set(), 'b': {'a', 'y'}, 'c': {'z'}}
    e_waiting_data = {'x': {'y', 'a'}, 'y': {'z', 'b'}, 'z': {'c'},
                      'a': {'b'}, 'b': set(), 'c': set()}

    e_who_wants = {'b': {'client'}, 'c': {'client'}, 'z': {'client'}}
    e_wants_what = {'client': {'b', 'c', 'z'}}

    update_state(dsk, dependencies, dependents, who_wants, wants_what,
                 who_has, in_play,
                 waiting, waiting_data, new_dsk, new_keys, new_dependencies,
                 'client')

    assert waiting == e_waiting
    assert waiting_data == e_waiting_data
    assert who_wants == e_who_wants
    assert wants_what == e_wants_what
    assert in_play == {'x', 'y', 'z', 'a', 'b', 'c'}
Ejemplo n.º 18
0
def test_get_deps():
    """
    >>> dsk = {'a': 1, 'b': (inc, 'a'), 'c': (inc, 'b')}
    >>> dependencies, dependents = get_deps(dsk)
    >>> dependencies
    {'a': set([]), 'c': set(['b']), 'b': set(['a'])}
    >>> dependents
    {'a': set(['b']), 'c': set([]), 'b': set(['c'])}
    """
    dsk = {
        'a': [1, 2, 3],
        'b': 'a',
        'c': [1, (inc, 1)],
        'd': [(sum, 'c')],
        'e': ['b', 'zzz', 'b'],
        'f': [['a', 'b'], 2, 3]
    }
    dependencies, dependents = get_deps(dsk)
    assert dependencies == {
        'a': set(),
        'b': {'a'},
        'c': set(),
        'd': {'c'},
        'e': {'b'},
        'f': {'a', 'b'},
    }
    assert dependents == {
        'a': {'b', 'f'},
        'b': {'e', 'f'},
        'c': {'d'},
        'd': set(),
        'e': set(),
        'f': set(),
    }
Ejemplo n.º 19
0
def test_heal_restarts_leaf_tasks():
    dsk = {
        'a': 1,
        'b': (inc, 'a'),
        'c': (inc, 'b'),
        'x': 1,
        'y': (inc, 'x'),
        'z': (inc, 'y')
    }
    dependents, dependencies = get_deps(dsk)

    state = {
        'in_memory': set(),  # missing 'b'
        'stacks': {
            'alice': ['a'],
            'bob': ['x']
        },
        'processing': {
            'alice': set(),
            'bob': set()
        },
        'waiting': {},
        'waiting_data': {}
    }

    del state['stacks']['bob']
    del state['processing']['bob']

    output = heal(dependencies, dependents, **state)
    assert 'x' in output['waiting']
Ejemplo n.º 20
0
def test_update_state_with_processing():
    dsk = {'x': 1, 'y': (inc, 'x'), 'z': (inc, 'y')}
    dependencies, dependents = get_deps(dsk)

    waiting = {'z': {'y'}}
    waiting_data = {'x': {'y'}, 'y': {'z'}, 'z': set()}

    held_data = {'z'}
    who_has = {'x': {'alice'}}
    processing = {'y'}
    released = set()
    in_play = {'z', 'x', 'y'}

    new_dsk = {'a': (inc, 'x'), 'b': (add, 'a', 'y'), 'c': (inc, 'z')}
    new_keys = {'b', 'c'}

    e_waiting = {'z': {'y'}, 'a': set(), 'b': {'a', 'y'}, 'c': {'z'}}
    e_waiting_data = {'x': {'y', 'a'}, 'y': {'z', 'b'}, 'z': {'c'},
                      'a': {'b'}, 'b': set(), 'c': set()}

    e_held_data = {'b', 'c', 'z'}

    update_state(dsk, dependencies, dependents, held_data,
                 who_has, in_play,
                 waiting, waiting_data, new_dsk, new_keys)

    assert waiting == e_waiting
    assert waiting_data == e_waiting_data
    assert held_data == e_held_data
    assert in_play == {'x', 'y', 'z', 'a', 'b', 'c'}
Ejemplo n.º 21
0
def test_update_state_with_processing():
    dsk = {'x': 1, 'y': (inc, 'x'), 'z': (inc, 'y')}
    dependencies, dependents = get_deps(dsk)

    waiting = {'z': {'y'}}
    waiting_data = {'x': {'y'}, 'y': {'z'}, 'z': set()}

    held_data = {'z'}
    who_has = {'x': {'alice'}}
    processing = {'y'}
    released = set()
    in_play = {'z', 'x', 'y'}

    new_dsk = {'a': (inc, 'x'), 'b': (add, 'a', 'y'), 'c': (inc, 'z')}
    new_keys = {'b', 'c'}

    e_waiting = {'z': {'y'}, 'a': set(), 'b': {'a', 'y'}, 'c': {'z'}}
    e_waiting_data = {
        'x': {'y', 'a'},
        'y': {'z', 'b'},
        'z': {'c'},
        'a': {'b'},
        'b': set(),
        'c': set()
    }

    e_held_data = {'b', 'c', 'z'}

    update_state(dsk, dependencies, dependents, held_data, who_has, in_play,
                 waiting, waiting_data, new_dsk, new_keys)

    assert waiting == e_waiting
    assert waiting_data == e_waiting_data
    assert held_data == e_held_data
    assert in_play == {'x', 'y', 'z', 'a', 'b', 'c'}
Ejemplo n.º 22
0
def test_heal_culls():
    dsk = {
        'a': 1,
        'b': (inc, 'a'),
        'c': (inc, 'b'),
        'x': 1,
        'y': (inc, 'x'),
        'z': (inc, 'y')
    }
    dependencies, dependents = get_deps(dsk)

    state = {
        'in_memory': {'c', 'y'},
        'stacks': {
            'alice': ['a'],
            'bob': []
        },
        'processing': {
            'alice': set(),
            'bob': set('y')
        },
        'waiting': {},
        'waiting_data': {}
    }

    output = heal(dependencies, dependents, **state)
    assert 'a' not in output['stacks']['alice']
    assert output['released'] == {'a', 'b', 'x'}
    assert output['finished_results'] == {'c'}
    assert 'y' not in output['processing']['bob']

    assert output['waiting']['z'] == set()
Ejemplo n.º 23
0
def test_update_state_respects_WrappedKeys():
    dsk = {'x': 1, 'y': (inc, 'x')}
    dependencies, dependents = get_deps(dsk)

    waiting = {'y': set()}
    waiting_data = {'x': {'y'}, 'y': set()}

    held_data = {'y'}
    who_has = {'x': {'alice'}}
    processing = set()
    released = set()
    in_play = {'x', 'y'}

    e_dsk = {'x': 1, 'y': (inc, 'x'), 'a': 1, 'z': (add, 'y', 'a')}
    e_dependencies = {'x': set(), 'a': set(), 'y': {'x'}, 'z': {'a', 'y'}}
    e_dependents = {'z': set(), 'y': {'z'}, 'a': {'z'}, 'x': {'y'}}

    e_waiting = {'y': set(), 'a': set(), 'z': {'a', 'y'}}
    e_waiting_data = {'x': {'y'}, 'y': {'z'}, 'a': {'z'}, 'z': set()}

    e_held_data = {'y', 'z'}

    new_dsk = {'z': (add, WrappedKey('y'), 10)}
    a = update_state(*map(deepcopy, [dsk, dependencies, dependents, held_data,
                                     who_has, in_play,
                                     waiting, waiting_data, new_dsk, {'z'}]))
    new_dsk = {'z': (add, 'y', 10)}
    b = update_state(*map(deepcopy, [dsk, dependencies, dependents, held_data,
                                     who_has, in_play,
                                     waiting, waiting_data, new_dsk, {'z'}]))
    assert a == b
Ejemplo n.º 24
0
def test_ndependents():
    a, b, c = 'abc'
    dsk = dict(chain((((a, i), i * 2) for i in range(5)),
                     (((b, i), (add, i, (a, i))) for i in range(5)),
                     (((c, i), (add, i, (b, i))) for i in range(5))))
    result = ndependents(*get_deps(dsk))
    expected = dict(chain((((a, i), 3) for i in range(5)),
                          (((b, i), 2) for i in range(5)),
                          (((c, i), 1) for i in range(5))))
    assert result == expected

    dsk = {a: 1, b: 1}
    deps = get_deps(dsk)
    assert ndependents(*deps) == dsk

    dsk = {a: 1, b: (add, a, 1), c: (add, b, a)}
    assert ndependents(*get_deps(dsk)) == {a: 4, b: 2, c: 1}

    dsk = {a: 1, b: a, c: b}
    deps = get_deps(dsk)
    assert ndependents(*deps) == {a: 3, b: 2, c: 1}
Ejemplo n.º 25
0
def test_ndependents():
    a, b, c = 'abc'
    dsk = dict(chain((((a, i), i * 2) for i in range(5)),
                     (((b, i), (add, i, (a, i))) for i in range(5)),
                     (((c, i), (add, i, (b, i))) for i in range(5))))
    result = ndependents(*get_deps(dsk))
    expected = dict(chain((((a, i), 3) for i in range(5)),
                          (((b, i), 2) for i in range(5)),
                          (((c, i), 1) for i in range(5))))
    assert result == expected

    dsk = {a: 1, b: 1}
    deps = get_deps(dsk)
    assert ndependents(*deps) == dsk

    dsk = {a: 1, b: (add, a, 1), c: (add, b, a)}
    assert ndependents(*get_deps(dsk)) == {a: 4, b: 2, c: 1}

    dsk = {a: 1, b: a, c: b}
    deps = get_deps(dsk)
    assert ndependents(*deps) == {a: 3, b: 2, c: 1}
Ejemplo n.º 26
0
def diagnostics(dsk, o=None, dependencies=None):
    """Simulate runtime metrics as though running tasks one at a time in order.

    These diagnostics can help reveal behaviors of and issues with ``order``.

    Returns a dict of `namedtuple("OrderInfo")` and a list of the number of outputs held over time.

    OrderInfo fields:
    - order : the order in which the node is run.
    - age : how long the output of a node is held.
    - num_data_when_run : the number of outputs held in memory when a node is run.
    - num_data_when_released : the number of outputs held in memory when the output is released.
    - num_dependencies_freed : the number of dependencies freed by running the node.
    """
    if dependencies is None:
        dependencies, dependents = get_deps(dsk)
    else:
        dependents = reverse_dict(dependencies)
    if o is None:
        o = order(dsk, dependencies=dependencies)

    pressure = []
    num_in_memory = 0
    age = {}
    runpressure = {}
    releasepressure = {}
    freed = {}
    num_needed = {key: len(val) for key, val in dependents.items()}
    for i, key in enumerate(sorted(dsk, key=o.__getitem__)):
        pressure.append(num_in_memory)
        runpressure[key] = num_in_memory
        released = 0
        for dep in dependencies[key]:
            num_needed[dep] -= 1
            if num_needed[dep] == 0:
                age[dep] = i - o[dep]
                releasepressure[dep] = num_in_memory
                released += 1
        freed[key] = released
        if dependents[key]:
            num_in_memory -= released - 1
        else:
            age[key] = 0
            releasepressure[key] = num_in_memory
            num_in_memory -= released

    rv = {
        key: OrderInfo(val, age[key], runpressure[key], releasepressure[key],
                       freed[key])
        for key, val in o.items()
    }
    return rv, pressure
Ejemplo n.º 27
0
def test_dependent_keys():
    a, b, c, d, e, f, g = 'abcdefg'
    who_has = {a: [1], b: [1]}
    processing = {'alice': {c}}
    exceptions = {}
    dsk = {a: 1, b: 2, c: (add, a, b), d: (inc, a), e: (add, c, d), f: (inc, e)}
    dependencies, dependents = get_deps(dsk)

    assert dependent_keys(f, who_has, processing, dependencies,
            exceptions, complete=False)[0] == {f, e, c, d}

    assert dependent_keys(f, who_has, processing, dependencies,
            exceptions, complete=True)[0] == {a, b, c, d, e, f}
Ejemplo n.º 28
0
def test_hash_groupby_aggregate(npartitions, split_every, split_out):
    df = pd.DataFrame({'x': np.arange(100) % 10, 'y': np.ones(100)})
    ddf = dd.from_pandas(df, npartitions)

    result = ddf.groupby('x').y.var(split_every=split_every,
                                    split_out=split_out)

    dsk = result._optimize(result.dask, result._keys())
    from dask.core import get_deps
    dependencies, dependents = get_deps(dsk)

    assert result.npartitions == (split_out or 1)
    assert len([k for k, v in dependencies.items() if not v]) == npartitions

    assert_eq(result, df.groupby('x').y.var())
Ejemplo n.º 29
0
def test_heal_restarts_leaf_tasks():
    dsk = {'a': 1, 'b': (inc, 'a'), 'c': (inc, 'b'),
           'x': 1, 'y': (inc, 'x'), 'z': (inc, 'y')}
    dependents, dependencies = get_deps(dsk)

    state = {'who_has': dict(),  # missing 'b'
             'stacks': {'alice': ['a'], 'bob': ['x']},
             'processing': {'alice': set(), 'bob': set()},
             'waiting': {}, 'waiting_data': {}}

    del state['stacks']['bob']
    del state['processing']['bob']

    output = heal(dependencies, dependents, **state)
    assert 'x' in output['waiting']
Ejemplo n.º 30
0
def test_decide_worker_with_many_independent_leaves():
    dsk = merge({('y', i): (inc, ('x', i)) for i in range(100)},
                {('x', i): i for i in range(100)})
    dependencies, dependents = get_deps(dsk)
    stacks = {'alice': [], 'bob': []}
    who_has = merge({('x', i * 2): {'alice'} for i in range(50)},
                    {('x', i * 2 + 1): {'bob'} for i in range(50)})

    for key in dsk:
        worker = decide_worker(dependencies, stacks, who_has, {}, key)
        stacks[worker].append(key)

    nhits = (len([k for k in stacks['alice'] if 'alice' in who_has[('x', k[1])]])
             + len([k for k in stacks['bob'] if 'bob' in who_has[('x', k[1])]]))

    assert nhits > 90
Ejemplo n.º 31
0
def test_decide_worker_with_many_independent_leaves():
    dsk = merge({('y', i): (inc, ('x', i)) for i in range(100)},
                {('x', i): i for i in range(100)})
    dependencies, dependents = get_deps(dsk)
    stacks = {'alice': [], 'bob': []}
    who_has = merge({('x', i * 2): {'alice'} for i in range(50)},
                    {('x', i * 2 + 1): {'bob'} for i in range(50)})

    for key in dsk:
        worker = decide_worker(dependencies, stacks, who_has, {}, key)
        stacks[worker].append(key)

    nhits = (len([k for k in stacks['alice'] if 'alice' in who_has[('x', k[1])]])
             + len([k for k in stacks['bob'] if 'bob' in who_has[('x', k[1])]]))

    assert nhits > 90
Ejemplo n.º 32
0
def test_hash_groupby_aggregate(npartitions, split_every, split_out):
    df = pd.DataFrame({'x': np.arange(100) % 10,
                       'y': np.ones(100)})
    ddf = dd.from_pandas(df, npartitions)

    result = ddf.groupby('x').y.var(split_every=split_every,
                                    split_out=split_out)

    dsk = result._optimize(result.dask, result._keys())
    from dask.core import get_deps
    dependencies, dependents = get_deps(dsk)

    assert result.npartitions == (split_out or 1)
    assert len([k for k, v in dependencies.items() if not v]) == npartitions

    assert_eq(result, df.groupby('x').y.var())
Ejemplo n.º 33
0
def test_decide_worker_with_many_independent_leaves():
    dsk = merge({('y', i): (inc, ('x', i)) for i in range(100)},
                {('x', i): i for i in range(100)})
    dependencies, dependents = get_deps(dsk)
    stacks = {alice: [], bob: []}
    who_has = merge({('x', i * 2): {alice} for i in range(50)},
                    {('x', i * 2 + 1): {bob} for i in range(50)})
    nbytes = {k: 0 for k in who_has}

    for key in dsk:
        worker = decide_worker(dependencies, stacks, who_has, {}, set(), nbytes, key)
        stacks[worker].append(key)

    nhits = (len([k for k in stacks[alice] if alice in who_has[('x', k[1])]])
             + len([k for k in stacks[bob] if bob in who_has[('x', k[1])]]))

    assert nhits > 90
Ejemplo n.º 34
0
def test_decide_worker_with_many_independent_leaves():
    dsk = merge({('y', i): (inc, ('x', i)) for i in range(100)},
                {('x', i): i for i in range(100)})
    dependencies, dependents = get_deps(dsk)
    stacks = {alice: [], bob: []}
    who_has = merge({('x', i * 2): {alice} for i in range(50)},
                    {('x', i * 2 + 1): {bob} for i in range(50)})
    nbytes = {k: 0 for k in who_has}

    for key in dsk:
        worker = decide_worker(dependencies, stacks, who_has, {}, set(), nbytes, key)
        stacks[worker].append(key)

    nhits = (len([k for k in stacks[alice] if alice in who_has[('x', k[1])]])
             + len([k for k in stacks[bob] if bob in who_has[('x', k[1])]]))

    assert nhits > 90
Ejemplo n.º 35
0
def test_heal_culls():
    dsk = {'a': 1, 'b': (inc, 'a'), 'c': (inc, 'b'),
           'x': 1, 'y': (inc, 'x'), 'z': (inc, 'y')}
    dependencies, dependents = get_deps(dsk)

    state = {'who_has': {'c': {alice}, 'y': {alice}},
             'stacks': {'alice': ['a'], 'bob': []},
             'processing': {'alice': set(), 'bob': set('y')},
             'waiting': {}, 'waiting_data': {}}

    output = heal(dependencies, dependents, **state)
    assert 'a' not in output['stacks']['alice']
    assert output['released'] == {'a', 'b', 'x'}
    assert output['finished_results'] == {'c'}
    assert 'y' not in output['processing']['bob']

    assert output['waiting']['z'] == set()
Ejemplo n.º 36
0
def test_deep_bases_win_over_dependents():
    """
    d should come before e and probably before one of b and c

            a
          / | \   .
         b  c |
        / \ | /
       e    d
    """
    dsk = {'a': (f, 'b', 'c', 'd'), 'b': (f, 'd', 'e'), 'c': (f, 'd'), 'd': 1,
            'e': 2}

    dependencies, dependents = get_deps(dsk)
    nd = ndependents(dependencies, dependents)
    cm = child_max(dependencies, dependents, nd)

    o = order(dsk)
    assert o['d'] < o['e']
    assert o['d'] < o['b'] or o['d'] < o['c']
Ejemplo n.º 37
0
def test_update_state_with_processing():
    dsk = {'x': 1, 'y': (inc, 'x'), 'z': (inc, 'y')}
    dependencies, dependents = get_deps(dsk)

    waiting = {'z': {'y'}}
    waiting_data = {'x': {'y'}, 'y': {'z'}, 'z': set()}

    who_wants = defaultdict(set, z={'client'})
    wants_what = defaultdict(set, client={'z'})
    who_has = {'x': {'alice'}}
    processing = {'y'}
    released = set()
    in_play = {'z', 'x', 'y'}

    new_dsk = {'a': (inc, 'x'), 'b': (add, 'a', 'y'), 'c': (inc, 'z')}
    new_dependencies = {'a': {'x'}, 'b': {'a', 'y'}, 'c': {'z'}}
    new_keys = {'b', 'c'}

    e_waiting = {'z': {'y'}, 'a': set(), 'b': {'a', 'y'}, 'c': {'z'}}
    e_waiting_data = {
        'x': {'y', 'a'},
        'y': {'z', 'b'},
        'z': {'c'},
        'a': {'b'},
        'b': set(),
        'c': set()
    }

    e_who_wants = {'b': {'client'}, 'c': {'client'}, 'z': {'client'}}
    e_wants_what = {'client': {'b', 'c', 'z'}}

    update_state(dsk, dependencies, dependents, who_wants, wants_what, who_has,
                 in_play, waiting, waiting_data, new_dsk, new_keys,
                 new_dependencies, 'client')

    assert waiting == e_waiting
    assert waiting_data == e_waiting_data
    assert who_wants == e_who_wants
    assert wants_what == e_wants_what
    assert in_play == {'x', 'y', 'z', 'a', 'b', 'c'}
Ejemplo n.º 38
0
def test_prefer_broker_nodes():
    """

    b0    b1  b2
    |      \  /
    a0      a1

    a1 should be run before a0
    """
    a, b, c = 'abc'
    dsk = {
        (a, 0): (f, ),
        (a, 1): (f, ),
        (b, 0): (f, (a, 0)),
        (b, 1): (f, (a, 1)),
        (b, 2): (f, (a, 1))
    }

    dependencies, dependents = get_deps(dsk)
    nd = ndependents(dependencies, dependents)
    cm = child_max(dependencies, dependents, nd)
    o = order(dsk)

    assert o[(a, 1)] < o[(a, 0)]

    # Switch name of 0, 1 to ensure that this isn't due to string comparison
    dsk = {
        (a, 0): (f, ),
        (a, 1): (f, ),
        (b, 0): (f, (a, 0)),
        (b, 1): (f, (a, 1)),
        (b, 2): (f, (a, 0))
    }

    o = order(dsk)

    assert o[(a, 1)] > o[(a, 0)]
Ejemplo n.º 39
0
def extract_compilable_subgraphs(
        dsk: Dict,
        compiler: str,
        output_keys: List[str],
        include_singletons=True) -> List[DaskSubgraph]:
    """Find compilable subgraphs in this Dask task graph.

    Currently only works with one compiler at a time, and only will return
    linear chains of compilable tasks.  If include_singletons is True,
    returned chains may be of length 1.  If False, the chain length must be
    >1.

    If present in a subgraph, tasks corresponding to output_keys can only be
    at the end of a chain.
    """

    if include_singletons:
        chain_threshold = 1
    else:
        chain_threshold = 2
    dependencies, dependents = get_deps(dsk)

    compilable_keys, non_compilable_keys = _get_compilable_dask_keys(
        dsk, compiler)

    if len(compilable_keys) == 0:
        return []

    output_keys_set = set(output_keys)

    subgraphs = []
    ordered_keys = _dfs_sorted_dask_keys(compilable_keys, dependencies,
                                         dependents)
    key = next(ordered_keys)
    current_chain = [key]

    def _note_subgraph(chain):
        output_key = chain[-1]
        chain = set(chain)
        inputs = reduce(set.union, (dependencies[chain_key] - chain
                                    for chain_key in chain))
        tasks = {chain_key: dsk[chain_key] for chain_key in chain}
        subgraphs.append(
            DaskSubgraph(tasks=tasks,
                         input_keys=list(inputs),
                         output_key=output_key))

    for next_key in ordered_keys:
        next_key_dependencies = dependencies[next_key]
        key_dependents = dependents[key]

        if (len(next_key_dependencies) == 1 and len(key_dependents) == 1
                and next_key in key_dependents and key in next_key_dependencies
                and key not in
                output_keys_set  # output keys must be at the end of a chain
            ):
            current_chain.append(next_key)
        elif len(current_chain) >= chain_threshold:
            _note_subgraph(current_chain)
            current_chain = [next_key]
        else:
            current_chain = [next_key]
        key = next_key

    if len(current_chain) >= chain_threshold:
        _note_subgraph(current_chain)

    return subgraphs
Ejemplo n.º 40
0
def test_eager_to_compute_dependent_to_free_parent():
    r"""https://github.com/dask/dask/pull/7929

    This graph begins with many motifs like the following:

    |      |
    c1    c2
      \ /
       b
       |
       a

    We want to compute c2 and c3 pretty close together, because if we choose to
    compute c1, then we should also compute c2 so we can release b.  Being
    greedy here allows us to release memory sooner and be more globally optimal.
    """
    dsk = {
        "a00": (f, "a06", "a08"),
        "a01": (f, "a28", "a26"),
        "a02": (f, "a24", "a21"),
        "a03": (f, "a22", "a25"),
        "a04": (f, "a29", "a20"),
        "a05": (f, "a23", "a27"),
        "a06": (f, "a04", "a02"),
        "a07": (f, "a00", "a01"),
        "a08": (f, "a05", "a03"),
        "a09": (f, "a43"),
        "a10": (f, "a36"),
        "a11": (f, "a33"),
        "a12": (f, "a47"),
        "a13": (f, "a44"),
        "a14": (f, "a42"),
        "a15": (f, "a37"),
        "a16": (f, "a48"),
        "a17": (f, "a49"),
        "a18": (f, "a35"),
        "a19": (f, "a46"),
        "a20": (f, "a55"),
        "a21": (f, "a53"),
        "a22": (f, "a60"),
        "a23": (f, "a54"),
        "a24": (f, "a59"),
        "a25": (f, "a56"),
        "a26": (f, "a61"),
        "a27": (f, "a52"),
        "a28": (f, "a57"),
        "a29": (f, "a58"),
        "a30": (f, "a19"),
        "a31": (f, "a07"),
        "a32": (f, "a30", "a31"),
        "a33": (f, "a58"),
        "a34": (f, "a11", "a09"),
        "a35": (f, "a60"),
        "a36": (f, "a52"),
        "a37": (f, "a61"),
        "a38": (f, "a14", "a10"),
        "a39": (f, "a38", "a40"),
        "a40": (f, "a18", "a17"),
        "a41": (f, "a34", "a50"),
        "a42": (f, "a54"),
        "a43": (f, "a55"),
        "a44": (f, "a53"),
        "a45": (f, "a16", "a15"),
        "a46": (f, "a51", "a45"),
        "a47": (f, "a59"),
        "a48": (f, "a57"),
        "a49": (f, "a56"),
        "a50": (f, "a12", "a13"),
        "a51": (f, "a41", "a39"),
        "a52": (f, "a62"),
        "a53": (f, "a68"),
        "a54": (f, "a70"),
        "a55": (f, "a67"),
        "a56": (f, "a71"),
        "a57": (f, "a64"),
        "a58": (f, "a65"),
        "a59": (f, "a63"),
        "a60": (f, "a69"),
        "a61": (f, "a66"),
        "a62": (f, f),
        "a63": (f, f),
        "a64": (f, f),
        "a65": (f, f),
        "a66": (f, f),
        "a67": (f, f),
        "a68": (f, f),
        "a69": (f, f),
        "a70": (f, f),
        "a71": (f, f),
    }
    dependencies, dependents = get_deps(dsk)
    o = order(dsk)
    parents = {
        deps.pop()
        for key, deps in dependents.items() if not dependencies[key]
    }

    def cost(deps):
        a, b = deps
        return abs(o[a] - o[b])

    cost_of_pairs = {key: cost(dependents[key]) for key in parents}
    # Allow one to be bad, b/c this is hard!
    costs = sorted(cost_of_pairs.values())
    assert sum(costs[:-1]) <= 25 or sum(costs) <= 31
Ejemplo n.º 41
0
def test_stacklimit(abcde):
    dsk = dict(('x%s' % (i + 1), (inc, 'x%s' % i)) for i in range(10000))
    dependencies, dependents = get_deps(dsk)
    ndependencies(dependencies, dependents)
Ejemplo n.º 42
0
def assert_max_deps(x, n, eq=True):
    dependencies, dependents = get_deps(x.dask)
    if eq:
        assert max(map(len, dependencies.values())) == n
    else:
        assert max(map(len, dependencies.values())) <= n
Ejemplo n.º 43
0
def test_stacklimit(abcde):
    dsk = {"x%s" % (i + 1): (inc, "x%s" % i) for i in range(10000)}
    dependencies, dependents = get_deps(dsk)
    ndependencies(dependencies, dependents)
Ejemplo n.º 44
0
def assert_max_deps(x, n, eq=True):
    dependencies, dependents = get_deps(x.dask)
    if eq:
        assert max(map(len, dependencies.values())) == n
    else:
        assert max(map(len, dependencies.values())) <= n
Ejemplo n.º 45
0
def test_stacklimit(abcde):
    dsk = dict(('x%s' % (i + 1), (inc, 'x%s' % i)) for i in range(10000))
    dependencies, dependents = get_deps(dsk)
    ndependencies(dependencies, dependents)
Ejemplo n.º 46
0
def test_stacklimit():
    dsk = dict(('x%s' % (i + 1), (inc, 'x%s' % i)) for i in range(10000))
    dependencies, dependents = get_deps(dsk)
    scores = dict.fromkeys(dsk, 1)
    child_max(dependencies, dependents, scores)
    ndependents(dependencies, dependents)
Ejemplo n.º 47
0
def test_stacklimit():
    dsk = dict(('x%s' % (i + 1), (inc, 'x%s' % i)) for i in range(10000))
    dependencies, dependents = get_deps(dsk)
    scores = dict.fromkeys(dsk, 1)
    child_max(dependencies, dependents, scores)
    ndependents(dependencies, dependents)