def test_update_state_supports_recomputing_released_results(): dsk = {'x': 1, 'y': (inc, 'x'), 'z': (inc, 'x')} dependencies, dependents = get_deps(dsk) waiting = dict() waiting_data = {'z': set()} who_wants = defaultdict(set, z={'client'}) wants_what = defaultdict(set, client={'z'}) who_has = {'z': {'alice'}} processing = set() released = {'x', 'y'} in_play = {'z'} new_dsk = {'x': 1, 'y': (inc, 'x')} new_dependencies = {'y': {'x'}} new_keys = {'y'} e_dsk = dsk.copy() e_waiting = {'x': set(), 'y': {'x'}} e_waiting_data = {'x': {'y'}, 'y': set(), 'z': set()} e_who_wants = {'z': {'client'}, 'y': {'client'}} e_wants_what = {'client': {'y', 'z'}} update_state(dsk, dependencies, dependents, who_wants, wants_what, who_has, in_play, waiting, waiting_data, new_dsk, new_keys, new_dependencies, 'client') assert dsk == e_dsk assert waiting == e_waiting assert waiting_data == e_waiting_data assert who_wants == e_who_wants assert wants_what == e_wants_what assert in_play == {'x', 'y', 'z'}
def test_dependent_keys(): a, b, c, d, e, f, g = 'abcdefg' who_has = {a: [1], b: [1]} processing = {'alice': {c}} exceptions = {} dsk = { a: 1, b: 2, c: (add, a, b), d: (inc, a), e: (add, c, d), f: (inc, e) } dependencies, dependents = get_deps(dsk) assert dependent_keys(f, who_has, processing, dependencies, exceptions, complete=False)[0] == {f, e, c, d} assert dependent_keys(f, who_has, processing, dependencies, exceptions, complete=True)[0] == {a, b, c, d, e, f}
def test_fill_missing_data(): dsk = {'x': 1, 'y': (inc, 'x'), 'z': (inc, 'y')} dependencies, dependents = get_deps(dsk) waiting = {} waiting_data = {'z': set()} who_wants = defaultdict(set, z={'client'}) wants_what = defaultdict(set, client={'z'}) who_has = {'z': {alice}} processing = set() released = set() in_play = {'z'} e_waiting = {'x': set(), 'y': {'x'}, 'z': {'y'}} e_waiting_data = {'x': {'y'}, 'y': {'z'}, 'z': set()} e_in_play = {'x', 'y', 'z'} lost = {'z'} del who_has['z'] in_play.remove('z') heal_missing_data(dsk, dependencies, dependents, who_has, in_play, waiting, waiting_data, lost) assert waiting == e_waiting assert waiting_data == e_waiting_data assert in_play == e_in_play
def test_update_state_respects_WrappedKeys(): dsk = {'x': 1, 'y': (inc, 'x')} dependencies, dependents = get_deps(dsk) waiting = {'y': set()} waiting_data = {'x': {'y'}, 'y': set()} held_data = {'y'} in_memory = {'x'} processing = set() released = set() in_play = {'x', 'y'} e_dsk = {'x': 1, 'y': (inc, 'x'), 'a': 1, 'z': (add, 'y', 'a')} e_dependencies = {'x': set(), 'a': set(), 'y': {'x'}, 'z': {'a', 'y'}} e_dependents = {'z': set(), 'y': {'z'}, 'a': {'z'}, 'x': {'y'}} e_waiting = {'y': set(), 'a': set(), 'z': {'a', 'y'}} e_waiting_data = {'x': {'y'}, 'y': {'z'}, 'a': {'z'}, 'z': set()} e_held_data = {'y', 'z'} new_dsk = {'z': (add, WrappedKey('y'), 10)} a = update_state(*map(deepcopy, [dsk, dependencies, dependents, held_data, in_memory, in_play, waiting, waiting_data, new_dsk, {'z'}])) new_dsk = {'z': (add, 'y', 10)} b = update_state(*map(deepcopy, [dsk, dependencies, dependents, held_data, in_memory, in_play, waiting, waiting_data, new_dsk, {'z'}])) assert a == b
def test_prefer_broker_nodes(): """ b0 b1 b2 | \ / a0 a1 a1 should be run before a0 """ a, b, c = 'abc' dsk = {(a, 0): (f,), (a, 1): (f,), (b, 0): (f, (a, 0)), (b, 1): (f, (a, 1)), (b, 2): (f, (a, 1))} dependencies, dependents = get_deps(dsk) nd = ndependents(dependencies, dependents) cm = child_max(dependencies, dependents, nd) o = order(dsk) assert o[(a, 1)] < o[(a, 0)] # Switch name of 0, 1 to ensure that this isn't due to string comparison dsk = {(a, 0): (f,), (a, 1): (f,), (b, 0): (f, (a, 0)), (b, 1): (f, (a, 1)), (b, 2): (f, (a, 0))} o = order(dsk) assert o[(a, 1)] > o[(a, 0)]
def test_update_state_supports_recomputing_released_results(): dsk = {'x': 1, 'y': (inc, 'x'), 'z': (inc, 'x')} dependencies, dependents = get_deps(dsk) waiting = dict() waiting_data = {'z': set()} held_data = {'z'} who_has = {'z': {'alice'}} processing = set() released = {'x', 'y'} in_play = {'z'} new_dsk = {'x': 1, 'y': (inc, 'x')} new_keys = {'y'} e_dsk = dsk.copy() e_waiting = {'x': set(), 'y': {'x'}} e_waiting_data = {'x': {'y'}, 'y': set(), 'z': set()} e_held_data = {'y', 'z'} update_state(dsk, dependencies, dependents, held_data, who_has, in_play, waiting, waiting_data, new_dsk, new_keys) assert dsk == e_dsk assert waiting == e_waiting assert waiting_data == e_waiting_data assert held_data == e_held_data assert in_play == {'x', 'y', 'z'}
def test_fill_missing_data(): dsk = {'x': 1, 'y': (inc, 'x'), 'z': (inc, 'y')} dependencies, dependents = get_deps(dsk) waiting = {} waiting_data = {'z': set()} held_data = {'z'} in_memory = {'z'} processing = set() released = set() in_play = {'z'} e_waiting = {'x': set(), 'y': {'x'}, 'z': {'y'}} e_waiting_data = {'x': {'y'}, 'y': {'z'}, 'z': set()} e_in_play = {'x', 'y', 'z'} lost = {'z'} in_memory.remove('z') in_play.remove('z') heal_missing_data(dsk, dependencies, dependents, held_data, in_memory, in_play, waiting, waiting_data, lost) assert waiting == e_waiting assert waiting_data == e_waiting_data assert in_play == e_in_play
def test_get_deps(): dsk = { "a": [1, 2, 3], "b": "a", "c": [1, (inc, 1)], "d": [(sum, "c")], "e": ["b", "zzz", "b"], "f": [["a", "b"], 2, 3], } dependencies, dependents = get_deps(dsk) assert dependencies == { "a": set(), "b": {"a"}, "c": set(), "d": {"c"}, "e": {"b"}, "f": {"a", "b"}, } assert dependents == { "a": {"b", "f"}, "b": {"e", "f"}, "c": {"d"}, "d": set(), "e": set(), "f": set(), }
def test_get_deps(): """ >>> dsk = {'a': 1, 'b': (inc, 'a'), 'c': (inc, 'b')} >>> dependencies, dependents = get_deps(dsk) >>> dependencies {'a': set(), 'b': {'a'}, 'c': {'b'}} >>> dependents # doctest: +SKIP {'a': {'b'}, 'b': {'c'}, 'c': set()} """ dsk = { "a": [1, 2, 3], "b": "a", "c": [1, (inc, 1)], "d": [(sum, "c")], "e": ["b", "zzz", "b"], "f": [["a", "b"], 2, 3], } dependencies, dependents = get_deps(dsk) assert dependencies == { "a": set(), "b": {"a"}, "c": set(), "d": {"c"}, "e": {"b"}, "f": {"a", "b"}, } assert dependents == { "a": {"b", "f"}, "b": {"e", "f"}, "c": {"d"}, "d": set(), "e": set(), "f": set(), }
def test_get_deps(): """ >>> dsk = {'a': 1, 'b': (inc, 'a'), 'c': (inc, 'b')} >>> dependencies, dependents = get_deps(dsk) >>> dependencies {'a': set([]), 'c': set(['b']), 'b': set(['a'])} >>> dependents {'a': set(['b']), 'c': set([]), 'b': set(['c'])} """ dsk = {'a': [1, 2, 3], 'b': 'a', 'c': [1, (inc, 1)], 'd': [(sum, 'c')], 'e': ['b', 'zzz', 'b'], 'f': [['a', 'b'], 2, 3]} dependencies, dependents = get_deps(dsk) assert dependencies == {'a': set(), 'b': {'a'}, 'c': set(), 'd': {'c'}, 'e': {'b'}, 'f': {'a', 'b'}, } assert dependents == {'a': {'b', 'f'}, 'b': {'e', 'f'}, 'c': {'d'}, 'd': set(), 'e': set(), 'f': set(), }
def test_update_state_respects_data_in_memory(): dsk = {'x': 1, 'y': (inc, 'x')} dependencies, dependents = get_deps(dsk) waiting = dict() waiting_data = {'y': set()} held_data = {'y'} in_memory = {'y'} processing = set() released = {'x'} in_play = {'y'} new_dsk = {'x': 1, 'y': (inc, 'x'), 'z': (add, 'y', 'x')} new_keys = {'z'} e_dsk = new_dsk.copy() e_waiting = {'z': {'x'}, 'x': set()} e_waiting_data = {'x': {'z'}, 'y': {'z'}, 'z': set()} e_held_data = {'y', 'z'} update_state(dsk, dependencies, dependents, held_data, in_memory, in_play, waiting, waiting_data, new_dsk, new_keys) assert dsk == e_dsk assert waiting == e_waiting assert waiting_data == e_waiting_data assert held_data == e_held_data assert in_play == {'x', 'y', 'z'}
def test_deep_bases_win_over_dependents(): """ d should come before e and probably before one of b and c a / | \ . b c | / \ | / e d """ dsk = { 'a': (f, 'b', 'c', 'd'), 'b': (f, 'd', 'e'), 'c': (f, 'd'), 'd': 1, 'e': 2 } dependencies, dependents = get_deps(dsk) nd = ndependents(dependencies, dependents) cm = child_max(dependencies, dependents, nd) o = order(dsk) assert o['d'] < o['e'] assert o['d'] < o['b'] or o['d'] < o['c']
def test_update_state_with_processing(): dsk = {'x': 1, 'y': (inc, 'x'), 'z': (inc, 'y')} dependencies, dependents = get_deps(dsk) waiting = {'z': {'y'}} waiting_data = {'x': {'y'}, 'y': {'z'}, 'z': set()} who_wants = defaultdict(set, z={'client'}) wants_what = defaultdict(set, client={'z'}) who_has = {'x': {'alice'}} processing = {'y'} released = set() in_play = {'z', 'x', 'y'} new_dsk = {'a': (inc, 'x'), 'b': (add, 'a', 'y'), 'c': (inc, 'z')} new_dependencies = {'a': {'x'}, 'b': {'a', 'y'}, 'c': {'z'}} new_keys = {'b', 'c'} e_waiting = {'z': {'y'}, 'a': set(), 'b': {'a', 'y'}, 'c': {'z'}} e_waiting_data = {'x': {'y', 'a'}, 'y': {'z', 'b'}, 'z': {'c'}, 'a': {'b'}, 'b': set(), 'c': set()} e_who_wants = {'b': {'client'}, 'c': {'client'}, 'z': {'client'}} e_wants_what = {'client': {'b', 'c', 'z'}} update_state(dsk, dependencies, dependents, who_wants, wants_what, who_has, in_play, waiting, waiting_data, new_dsk, new_keys, new_dependencies, 'client') assert waiting == e_waiting assert waiting_data == e_waiting_data assert who_wants == e_who_wants assert wants_what == e_wants_what assert in_play == {'x', 'y', 'z', 'a', 'b', 'c'}
def test_get_deps(): """ >>> dsk = {'a': 1, 'b': (inc, 'a'), 'c': (inc, 'b')} >>> dependencies, dependents = get_deps(dsk) >>> dependencies {'a': set([]), 'c': set(['b']), 'b': set(['a'])} >>> dependents {'a': set(['b']), 'c': set([]), 'b': set(['c'])} """ dsk = { 'a': [1, 2, 3], 'b': 'a', 'c': [1, (inc, 1)], 'd': [(sum, 'c')], 'e': ['b', 'zzz', 'b'], 'f': [['a', 'b'], 2, 3] } dependencies, dependents = get_deps(dsk) assert dependencies == { 'a': set(), 'b': {'a'}, 'c': set(), 'd': {'c'}, 'e': {'b'}, 'f': {'a', 'b'}, } assert dependents == { 'a': {'b', 'f'}, 'b': {'e', 'f'}, 'c': {'d'}, 'd': set(), 'e': set(), 'f': set(), }
def test_heal_restarts_leaf_tasks(): dsk = { 'a': 1, 'b': (inc, 'a'), 'c': (inc, 'b'), 'x': 1, 'y': (inc, 'x'), 'z': (inc, 'y') } dependents, dependencies = get_deps(dsk) state = { 'in_memory': set(), # missing 'b' 'stacks': { 'alice': ['a'], 'bob': ['x'] }, 'processing': { 'alice': set(), 'bob': set() }, 'waiting': {}, 'waiting_data': {} } del state['stacks']['bob'] del state['processing']['bob'] output = heal(dependencies, dependents, **state) assert 'x' in output['waiting']
def test_update_state_with_processing(): dsk = {'x': 1, 'y': (inc, 'x'), 'z': (inc, 'y')} dependencies, dependents = get_deps(dsk) waiting = {'z': {'y'}} waiting_data = {'x': {'y'}, 'y': {'z'}, 'z': set()} held_data = {'z'} who_has = {'x': {'alice'}} processing = {'y'} released = set() in_play = {'z', 'x', 'y'} new_dsk = {'a': (inc, 'x'), 'b': (add, 'a', 'y'), 'c': (inc, 'z')} new_keys = {'b', 'c'} e_waiting = {'z': {'y'}, 'a': set(), 'b': {'a', 'y'}, 'c': {'z'}} e_waiting_data = {'x': {'y', 'a'}, 'y': {'z', 'b'}, 'z': {'c'}, 'a': {'b'}, 'b': set(), 'c': set()} e_held_data = {'b', 'c', 'z'} update_state(dsk, dependencies, dependents, held_data, who_has, in_play, waiting, waiting_data, new_dsk, new_keys) assert waiting == e_waiting assert waiting_data == e_waiting_data assert held_data == e_held_data assert in_play == {'x', 'y', 'z', 'a', 'b', 'c'}
def test_update_state_with_processing(): dsk = {'x': 1, 'y': (inc, 'x'), 'z': (inc, 'y')} dependencies, dependents = get_deps(dsk) waiting = {'z': {'y'}} waiting_data = {'x': {'y'}, 'y': {'z'}, 'z': set()} held_data = {'z'} who_has = {'x': {'alice'}} processing = {'y'} released = set() in_play = {'z', 'x', 'y'} new_dsk = {'a': (inc, 'x'), 'b': (add, 'a', 'y'), 'c': (inc, 'z')} new_keys = {'b', 'c'} e_waiting = {'z': {'y'}, 'a': set(), 'b': {'a', 'y'}, 'c': {'z'}} e_waiting_data = { 'x': {'y', 'a'}, 'y': {'z', 'b'}, 'z': {'c'}, 'a': {'b'}, 'b': set(), 'c': set() } e_held_data = {'b', 'c', 'z'} update_state(dsk, dependencies, dependents, held_data, who_has, in_play, waiting, waiting_data, new_dsk, new_keys) assert waiting == e_waiting assert waiting_data == e_waiting_data assert held_data == e_held_data assert in_play == {'x', 'y', 'z', 'a', 'b', 'c'}
def test_heal_culls(): dsk = { 'a': 1, 'b': (inc, 'a'), 'c': (inc, 'b'), 'x': 1, 'y': (inc, 'x'), 'z': (inc, 'y') } dependencies, dependents = get_deps(dsk) state = { 'in_memory': {'c', 'y'}, 'stacks': { 'alice': ['a'], 'bob': [] }, 'processing': { 'alice': set(), 'bob': set('y') }, 'waiting': {}, 'waiting_data': {} } output = heal(dependencies, dependents, **state) assert 'a' not in output['stacks']['alice'] assert output['released'] == {'a', 'b', 'x'} assert output['finished_results'] == {'c'} assert 'y' not in output['processing']['bob'] assert output['waiting']['z'] == set()
def test_update_state_respects_WrappedKeys(): dsk = {'x': 1, 'y': (inc, 'x')} dependencies, dependents = get_deps(dsk) waiting = {'y': set()} waiting_data = {'x': {'y'}, 'y': set()} held_data = {'y'} who_has = {'x': {'alice'}} processing = set() released = set() in_play = {'x', 'y'} e_dsk = {'x': 1, 'y': (inc, 'x'), 'a': 1, 'z': (add, 'y', 'a')} e_dependencies = {'x': set(), 'a': set(), 'y': {'x'}, 'z': {'a', 'y'}} e_dependents = {'z': set(), 'y': {'z'}, 'a': {'z'}, 'x': {'y'}} e_waiting = {'y': set(), 'a': set(), 'z': {'a', 'y'}} e_waiting_data = {'x': {'y'}, 'y': {'z'}, 'a': {'z'}, 'z': set()} e_held_data = {'y', 'z'} new_dsk = {'z': (add, WrappedKey('y'), 10)} a = update_state(*map(deepcopy, [dsk, dependencies, dependents, held_data, who_has, in_play, waiting, waiting_data, new_dsk, {'z'}])) new_dsk = {'z': (add, 'y', 10)} b = update_state(*map(deepcopy, [dsk, dependencies, dependents, held_data, who_has, in_play, waiting, waiting_data, new_dsk, {'z'}])) assert a == b
def test_ndependents(): a, b, c = 'abc' dsk = dict(chain((((a, i), i * 2) for i in range(5)), (((b, i), (add, i, (a, i))) for i in range(5)), (((c, i), (add, i, (b, i))) for i in range(5)))) result = ndependents(*get_deps(dsk)) expected = dict(chain((((a, i), 3) for i in range(5)), (((b, i), 2) for i in range(5)), (((c, i), 1) for i in range(5)))) assert result == expected dsk = {a: 1, b: 1} deps = get_deps(dsk) assert ndependents(*deps) == dsk dsk = {a: 1, b: (add, a, 1), c: (add, b, a)} assert ndependents(*get_deps(dsk)) == {a: 4, b: 2, c: 1} dsk = {a: 1, b: a, c: b} deps = get_deps(dsk) assert ndependents(*deps) == {a: 3, b: 2, c: 1}
def diagnostics(dsk, o=None, dependencies=None): """Simulate runtime metrics as though running tasks one at a time in order. These diagnostics can help reveal behaviors of and issues with ``order``. Returns a dict of `namedtuple("OrderInfo")` and a list of the number of outputs held over time. OrderInfo fields: - order : the order in which the node is run. - age : how long the output of a node is held. - num_data_when_run : the number of outputs held in memory when a node is run. - num_data_when_released : the number of outputs held in memory when the output is released. - num_dependencies_freed : the number of dependencies freed by running the node. """ if dependencies is None: dependencies, dependents = get_deps(dsk) else: dependents = reverse_dict(dependencies) if o is None: o = order(dsk, dependencies=dependencies) pressure = [] num_in_memory = 0 age = {} runpressure = {} releasepressure = {} freed = {} num_needed = {key: len(val) for key, val in dependents.items()} for i, key in enumerate(sorted(dsk, key=o.__getitem__)): pressure.append(num_in_memory) runpressure[key] = num_in_memory released = 0 for dep in dependencies[key]: num_needed[dep] -= 1 if num_needed[dep] == 0: age[dep] = i - o[dep] releasepressure[dep] = num_in_memory released += 1 freed[key] = released if dependents[key]: num_in_memory -= released - 1 else: age[key] = 0 releasepressure[key] = num_in_memory num_in_memory -= released rv = { key: OrderInfo(val, age[key], runpressure[key], releasepressure[key], freed[key]) for key, val in o.items() } return rv, pressure
def test_dependent_keys(): a, b, c, d, e, f, g = 'abcdefg' who_has = {a: [1], b: [1]} processing = {'alice': {c}} exceptions = {} dsk = {a: 1, b: 2, c: (add, a, b), d: (inc, a), e: (add, c, d), f: (inc, e)} dependencies, dependents = get_deps(dsk) assert dependent_keys(f, who_has, processing, dependencies, exceptions, complete=False)[0] == {f, e, c, d} assert dependent_keys(f, who_has, processing, dependencies, exceptions, complete=True)[0] == {a, b, c, d, e, f}
def test_hash_groupby_aggregate(npartitions, split_every, split_out): df = pd.DataFrame({'x': np.arange(100) % 10, 'y': np.ones(100)}) ddf = dd.from_pandas(df, npartitions) result = ddf.groupby('x').y.var(split_every=split_every, split_out=split_out) dsk = result._optimize(result.dask, result._keys()) from dask.core import get_deps dependencies, dependents = get_deps(dsk) assert result.npartitions == (split_out or 1) assert len([k for k, v in dependencies.items() if not v]) == npartitions assert_eq(result, df.groupby('x').y.var())
def test_heal_restarts_leaf_tasks(): dsk = {'a': 1, 'b': (inc, 'a'), 'c': (inc, 'b'), 'x': 1, 'y': (inc, 'x'), 'z': (inc, 'y')} dependents, dependencies = get_deps(dsk) state = {'who_has': dict(), # missing 'b' 'stacks': {'alice': ['a'], 'bob': ['x']}, 'processing': {'alice': set(), 'bob': set()}, 'waiting': {}, 'waiting_data': {}} del state['stacks']['bob'] del state['processing']['bob'] output = heal(dependencies, dependents, **state) assert 'x' in output['waiting']
def test_decide_worker_with_many_independent_leaves(): dsk = merge({('y', i): (inc, ('x', i)) for i in range(100)}, {('x', i): i for i in range(100)}) dependencies, dependents = get_deps(dsk) stacks = {'alice': [], 'bob': []} who_has = merge({('x', i * 2): {'alice'} for i in range(50)}, {('x', i * 2 + 1): {'bob'} for i in range(50)}) for key in dsk: worker = decide_worker(dependencies, stacks, who_has, {}, key) stacks[worker].append(key) nhits = (len([k for k in stacks['alice'] if 'alice' in who_has[('x', k[1])]]) + len([k for k in stacks['bob'] if 'bob' in who_has[('x', k[1])]])) assert nhits > 90
def test_decide_worker_with_many_independent_leaves(): dsk = merge({('y', i): (inc, ('x', i)) for i in range(100)}, {('x', i): i for i in range(100)}) dependencies, dependents = get_deps(dsk) stacks = {alice: [], bob: []} who_has = merge({('x', i * 2): {alice} for i in range(50)}, {('x', i * 2 + 1): {bob} for i in range(50)}) nbytes = {k: 0 for k in who_has} for key in dsk: worker = decide_worker(dependencies, stacks, who_has, {}, set(), nbytes, key) stacks[worker].append(key) nhits = (len([k for k in stacks[alice] if alice in who_has[('x', k[1])]]) + len([k for k in stacks[bob] if bob in who_has[('x', k[1])]])) assert nhits > 90
def test_heal_culls(): dsk = {'a': 1, 'b': (inc, 'a'), 'c': (inc, 'b'), 'x': 1, 'y': (inc, 'x'), 'z': (inc, 'y')} dependencies, dependents = get_deps(dsk) state = {'who_has': {'c': {alice}, 'y': {alice}}, 'stacks': {'alice': ['a'], 'bob': []}, 'processing': {'alice': set(), 'bob': set('y')}, 'waiting': {}, 'waiting_data': {}} output = heal(dependencies, dependents, **state) assert 'a' not in output['stacks']['alice'] assert output['released'] == {'a', 'b', 'x'} assert output['finished_results'] == {'c'} assert 'y' not in output['processing']['bob'] assert output['waiting']['z'] == set()
def test_deep_bases_win_over_dependents(): """ d should come before e and probably before one of b and c a / | \ . b c | / \ | / e d """ dsk = {'a': (f, 'b', 'c', 'd'), 'b': (f, 'd', 'e'), 'c': (f, 'd'), 'd': 1, 'e': 2} dependencies, dependents = get_deps(dsk) nd = ndependents(dependencies, dependents) cm = child_max(dependencies, dependents, nd) o = order(dsk) assert o['d'] < o['e'] assert o['d'] < o['b'] or o['d'] < o['c']
def test_update_state_with_processing(): dsk = {'x': 1, 'y': (inc, 'x'), 'z': (inc, 'y')} dependencies, dependents = get_deps(dsk) waiting = {'z': {'y'}} waiting_data = {'x': {'y'}, 'y': {'z'}, 'z': set()} who_wants = defaultdict(set, z={'client'}) wants_what = defaultdict(set, client={'z'}) who_has = {'x': {'alice'}} processing = {'y'} released = set() in_play = {'z', 'x', 'y'} new_dsk = {'a': (inc, 'x'), 'b': (add, 'a', 'y'), 'c': (inc, 'z')} new_dependencies = {'a': {'x'}, 'b': {'a', 'y'}, 'c': {'z'}} new_keys = {'b', 'c'} e_waiting = {'z': {'y'}, 'a': set(), 'b': {'a', 'y'}, 'c': {'z'}} e_waiting_data = { 'x': {'y', 'a'}, 'y': {'z', 'b'}, 'z': {'c'}, 'a': {'b'}, 'b': set(), 'c': set() } e_who_wants = {'b': {'client'}, 'c': {'client'}, 'z': {'client'}} e_wants_what = {'client': {'b', 'c', 'z'}} update_state(dsk, dependencies, dependents, who_wants, wants_what, who_has, in_play, waiting, waiting_data, new_dsk, new_keys, new_dependencies, 'client') assert waiting == e_waiting assert waiting_data == e_waiting_data assert who_wants == e_who_wants assert wants_what == e_wants_what assert in_play == {'x', 'y', 'z', 'a', 'b', 'c'}
def test_prefer_broker_nodes(): """ b0 b1 b2 | \ / a0 a1 a1 should be run before a0 """ a, b, c = 'abc' dsk = { (a, 0): (f, ), (a, 1): (f, ), (b, 0): (f, (a, 0)), (b, 1): (f, (a, 1)), (b, 2): (f, (a, 1)) } dependencies, dependents = get_deps(dsk) nd = ndependents(dependencies, dependents) cm = child_max(dependencies, dependents, nd) o = order(dsk) assert o[(a, 1)] < o[(a, 0)] # Switch name of 0, 1 to ensure that this isn't due to string comparison dsk = { (a, 0): (f, ), (a, 1): (f, ), (b, 0): (f, (a, 0)), (b, 1): (f, (a, 1)), (b, 2): (f, (a, 0)) } o = order(dsk) assert o[(a, 1)] > o[(a, 0)]
def extract_compilable_subgraphs( dsk: Dict, compiler: str, output_keys: List[str], include_singletons=True) -> List[DaskSubgraph]: """Find compilable subgraphs in this Dask task graph. Currently only works with one compiler at a time, and only will return linear chains of compilable tasks. If include_singletons is True, returned chains may be of length 1. If False, the chain length must be >1. If present in a subgraph, tasks corresponding to output_keys can only be at the end of a chain. """ if include_singletons: chain_threshold = 1 else: chain_threshold = 2 dependencies, dependents = get_deps(dsk) compilable_keys, non_compilable_keys = _get_compilable_dask_keys( dsk, compiler) if len(compilable_keys) == 0: return [] output_keys_set = set(output_keys) subgraphs = [] ordered_keys = _dfs_sorted_dask_keys(compilable_keys, dependencies, dependents) key = next(ordered_keys) current_chain = [key] def _note_subgraph(chain): output_key = chain[-1] chain = set(chain) inputs = reduce(set.union, (dependencies[chain_key] - chain for chain_key in chain)) tasks = {chain_key: dsk[chain_key] for chain_key in chain} subgraphs.append( DaskSubgraph(tasks=tasks, input_keys=list(inputs), output_key=output_key)) for next_key in ordered_keys: next_key_dependencies = dependencies[next_key] key_dependents = dependents[key] if (len(next_key_dependencies) == 1 and len(key_dependents) == 1 and next_key in key_dependents and key in next_key_dependencies and key not in output_keys_set # output keys must be at the end of a chain ): current_chain.append(next_key) elif len(current_chain) >= chain_threshold: _note_subgraph(current_chain) current_chain = [next_key] else: current_chain = [next_key] key = next_key if len(current_chain) >= chain_threshold: _note_subgraph(current_chain) return subgraphs
def test_eager_to_compute_dependent_to_free_parent(): r"""https://github.com/dask/dask/pull/7929 This graph begins with many motifs like the following: | | c1 c2 \ / b | a We want to compute c2 and c3 pretty close together, because if we choose to compute c1, then we should also compute c2 so we can release b. Being greedy here allows us to release memory sooner and be more globally optimal. """ dsk = { "a00": (f, "a06", "a08"), "a01": (f, "a28", "a26"), "a02": (f, "a24", "a21"), "a03": (f, "a22", "a25"), "a04": (f, "a29", "a20"), "a05": (f, "a23", "a27"), "a06": (f, "a04", "a02"), "a07": (f, "a00", "a01"), "a08": (f, "a05", "a03"), "a09": (f, "a43"), "a10": (f, "a36"), "a11": (f, "a33"), "a12": (f, "a47"), "a13": (f, "a44"), "a14": (f, "a42"), "a15": (f, "a37"), "a16": (f, "a48"), "a17": (f, "a49"), "a18": (f, "a35"), "a19": (f, "a46"), "a20": (f, "a55"), "a21": (f, "a53"), "a22": (f, "a60"), "a23": (f, "a54"), "a24": (f, "a59"), "a25": (f, "a56"), "a26": (f, "a61"), "a27": (f, "a52"), "a28": (f, "a57"), "a29": (f, "a58"), "a30": (f, "a19"), "a31": (f, "a07"), "a32": (f, "a30", "a31"), "a33": (f, "a58"), "a34": (f, "a11", "a09"), "a35": (f, "a60"), "a36": (f, "a52"), "a37": (f, "a61"), "a38": (f, "a14", "a10"), "a39": (f, "a38", "a40"), "a40": (f, "a18", "a17"), "a41": (f, "a34", "a50"), "a42": (f, "a54"), "a43": (f, "a55"), "a44": (f, "a53"), "a45": (f, "a16", "a15"), "a46": (f, "a51", "a45"), "a47": (f, "a59"), "a48": (f, "a57"), "a49": (f, "a56"), "a50": (f, "a12", "a13"), "a51": (f, "a41", "a39"), "a52": (f, "a62"), "a53": (f, "a68"), "a54": (f, "a70"), "a55": (f, "a67"), "a56": (f, "a71"), "a57": (f, "a64"), "a58": (f, "a65"), "a59": (f, "a63"), "a60": (f, "a69"), "a61": (f, "a66"), "a62": (f, f), "a63": (f, f), "a64": (f, f), "a65": (f, f), "a66": (f, f), "a67": (f, f), "a68": (f, f), "a69": (f, f), "a70": (f, f), "a71": (f, f), } dependencies, dependents = get_deps(dsk) o = order(dsk) parents = { deps.pop() for key, deps in dependents.items() if not dependencies[key] } def cost(deps): a, b = deps return abs(o[a] - o[b]) cost_of_pairs = {key: cost(dependents[key]) for key in parents} # Allow one to be bad, b/c this is hard! costs = sorted(cost_of_pairs.values()) assert sum(costs[:-1]) <= 25 or sum(costs) <= 31
def test_stacklimit(abcde): dsk = dict(('x%s' % (i + 1), (inc, 'x%s' % i)) for i in range(10000)) dependencies, dependents = get_deps(dsk) ndependencies(dependencies, dependents)
def assert_max_deps(x, n, eq=True): dependencies, dependents = get_deps(x.dask) if eq: assert max(map(len, dependencies.values())) == n else: assert max(map(len, dependencies.values())) <= n
def test_stacklimit(abcde): dsk = {"x%s" % (i + 1): (inc, "x%s" % i) for i in range(10000)} dependencies, dependents = get_deps(dsk) ndependencies(dependencies, dependents)
def test_stacklimit(): dsk = dict(('x%s' % (i + 1), (inc, 'x%s' % i)) for i in range(10000)) dependencies, dependents = get_deps(dsk) scores = dict.fromkeys(dsk, 1) child_max(dependencies, dependents, scores) ndependents(dependencies, dependents)