def test_SubgraphCallable(): non_hashable = [1, 2, 3] dsk = { "a": (apply, add, ["in1", 2]), "b": ( apply, partial_by_order, ["in2"], { "function": func_with_kwargs, "other": [(1, 20)], "c": 4 }, ), "c": ( apply, partial_by_order, ["in2", "in1"], { "function": func_with_kwargs, "other": [(1, 20)] }, ), "d": (inc, "a"), "e": (add, "c", "d"), "f": ["a", 2, "b", (add, "b", (sum, non_hashable))], "h": (add, (sum, "f"), (sum, ["a", "b"])), } f = SubgraphCallable(dsk, "h", ["in1", "in2"], name="test") assert f.name == "test" assert repr(f) == "test" f2 = SubgraphCallable(dsk, "h", ["in1", "in2"], name="test") assert f == f2 f3 = SubgraphCallable(dsk, "g", ["in1", "in2"], name="test") assert f != f3 assert hash(SubgraphCallable(None, None, [None])) assert hash(f3) != hash(f2) dsk2 = dsk.copy() dsk2.update({"in1": 1, "in2": 2}) assert f(1, 2) == get_sync(cull(dsk2, ["h"])[0], ["h"])[0] assert f(1, 2) == f(1, 2) f2 = pickle.loads(pickle.dumps(f)) assert f2 == f assert hash(f2) == hash(f) assert f2(1, 2) == f(1, 2)
def test_fuse_subgraphs_linear_chains_of_duplicate_deps( compare_subgraph_callables): dsk = { "x-1": 1, "add-1": (add, "x-1", "x-1"), "add-2": (add, "add-1", "add-1"), "add-3": (add, "add-2", "add-2"), "add-4": (add, "add-3", "add-3"), "add-5": (add, "add-4", "add-4"), } res = fuse(dsk, "add-5", fuse_subgraphs=True) sol = with_deps({ "add-x-1": (SubgraphCallable( { "x-1": 1, "add-1": (add, "x-1", "x-1"), "add-2": (add, "add-1", "add-1"), "add-3": (add, "add-2", "add-2"), "add-4": (add, "add-3", "add-3"), "add-5": (add, "add-4", "add-4"), }, "add-5", (), ), ), "add-5": "add-x-1", }) assert res == sol
def convert(task, dsk, extra_values): typ = type(task) if typ is tuple and task: if type(task[0]) is SubgraphCallable: sc = task[0] return ( SubgraphCallable( convert(sc.dsk, dsk, extra_values), sc.outkey, convert(sc.inkeys, dsk, extra_values), sc.name, ), ) + tuple(convert(x, dsk, extra_values) for x in task[1:]) elif callable(task[0]): return (task[0],) + tuple(convert(x, dsk, extra_values) for x in task[1:]) if typ is list: return [convert(v, dsk, extra_values) for v in task] if typ is dict: return {k: convert(v, dsk, extra_values) for k, v in task.items()} try: if task in dsk or task in extra_values: return tokey(task) except TypeError: pass if typ is tuple: # If the tuple itself isn't a key, check its elements return tuple(convert(v, dsk, extra_values) for v in task) return task
def test_fuse_subgraphs_linear_chains_of_duplicate_deps(): dsk = { 'x-1': 1, 'add-1': (add, 'x-1', 'x-1'), 'add-2': (add, 'add-1', 'add-1'), 'add-3': (add, 'add-2', 'add-2'), 'add-4': (add, 'add-3', 'add-3'), 'add-5': (add, 'add-4', 'add-4') } res = fuse(dsk, 'add-5', fuse_subgraphs=True) sol = with_deps({ 'add-x-1': (SubgraphCallable( { 'x-1': 1, 'add-1': (add, 'x-1', 'x-1'), 'add-2': (add, 'add-1', 'add-1'), 'add-3': (add, 'add-2', 'add-2'), 'add-4': (add, 'add-3', 'add-3'), 'add-5': (add, 'add-4', 'add-4') }, 'add-5', ()), ), 'add-5': 'add-x-1' }) assert res == sol
def test_SubgraphCallable(): non_hashable = [1, 2, 3] dsk = { 'a': (apply, add, ['in1', 2]), 'b': (apply, partial_by_order, ['in2'], { 'function': func_with_kwargs, 'other': [(1, 20)], 'c': 4 }), 'c': (apply, partial_by_order, ['in2', 'in1'], { 'function': func_with_kwargs, 'other': [(1, 20)] }), 'd': (inc, 'a'), 'e': (add, 'c', 'd'), 'f': ['a', 2, 'b', (add, 'b', (sum, non_hashable))], 'h': (add, (sum, 'f'), (sum, ['a', 'b'])) } f = SubgraphCallable(dsk, 'h', ['in1', 'in2'], name='test') assert f.name == 'test' assert repr(f) == 'test' dsk2 = dsk.copy() dsk2.update({'in1': 1, 'in2': 2}) assert f(1, 2) == get_sync(cull(dsk2, ['h'])[0], ['h'])[0] assert f(1, 2) == f(1, 2) f2 = pickle.loads(pickle.dumps(f)) assert f2(1, 2) == f(1, 2)
def test_str_graph(): dsk = {"x": 1} assert str_graph(dsk) == dsk dsk = {("x", 1): (inc, 1)} assert str_graph(dsk) == {str(("x", 1)): (inc, 1)} dsk = {("x", 1): (inc, 1), ("x", 2): (inc, ("x", 1))} assert str_graph(dsk) == { str(("x", 1)): (inc, 1), str(("x", 2)): (inc, str(("x", 1))), } dsks = [ {"x": 1}, {("x", 1): (inc, 1), ("x", 2): (inc, ("x", 1))}, {("x", 1): (sum, [1, 2, 3]), ("x", 2): (sum, [("x", 1), ("x", 1)])}, ] for dsk in dsks: sdsk = str_graph(dsk) keys = list(dsk) skeys = [str(k) for k in keys] assert all(isinstance(k, str) for k in sdsk) assert dask.get(dsk, keys) == dask.get(sdsk, skeys) dsk = {("y", 1): (SubgraphCallable({"x": ("y", 1)}, "x", (("y", 1),)), (("z", 1),))} dsk = str_graph(dsk, extra_values=(("z", 1),)) assert dsk["('y', 1)"][0].dsk["x"] == "('y', 1)" assert dsk["('y', 1)"][1][0] == "('z', 1)"
def test_SubgraphCallable_with_numpy(): np = pytest.importorskip("numpy") # Testing support of numpy arrays in `dsk`, which uses elementwise equalities. dsk1 = {"a": np.arange(10)} f1 = SubgraphCallable(dsk1, "a", [None], name="test") f2 = SubgraphCallable(dsk1, "a", [None], name="test") assert f1 == f2 # Notice, even though `dsk1` and `dsk2` are not equal they compare equal because # SubgraphCallable.__eq__() only checks name, outkeys, and inkeys. dsk2 = {"a": np.arange(10) + 1} f3 = SubgraphCallable(dsk2, "a", [None], name="test") assert f1 == f3 f4 = SubgraphCallable(dsk1, "a", [None], name="test2") assert f1 != f4
def test_SubgraphCallable_eq(): dsk1 = {"a": 1, "b": 2, "c": (add, "d", "e")} dsk2 = {"a": (inc, 0), "b": (inc, "a"), "c": (add, "d", "e")} f1 = SubgraphCallable(dsk1, "c", ["d", "e"]) f2 = SubgraphCallable(dsk2, "c", ["d", "e"]) # Different graphs must compare unequal (when no name given) assert f1 != f2 # Different inputs must compare unequal f3 = SubgraphCallable(dsk2, "c", ["d", "f"], name=f1.name) assert f3 != f1 # Different outputs must compare unequal f4 = SubgraphCallable(dsk2, "a", ["d", "e"], name=f1.name) assert f4 != f1 # Reordering the inputs must not prevent equality f5 = SubgraphCallable(dsk1, "c", ["e", "d"], name=f1.name) assert f1 == f5 assert hash(f1) == hash(f5) # Explicitly named graphs with different names must be unequal unnamed1 = SubgraphCallable(dsk1, "c", ["d", "e"], name="first") unnamed2 = SubgraphCallable(dsk1, "c", ["d", "e"], name="second") assert unnamed1 != unnamed2
def _wrap_getter(func, wrap): """ Getters generated from a Blockwise layer might be wrapped in a SubgraphCallable. Make sure that the optimization functions can still work if that is the case. """ if wrap: return SubgraphCallable({"key": (func, "index")}, outkey="key", inkeys="index") else: return func
def test_stringify(): obj = "Hello" assert stringify(obj) is obj obj = b"Hello" assert stringify(obj) is obj dsk = {"x": 1} assert stringify(dsk) == str(dsk) assert stringify(dsk, exclusive=()) == dsk dsk = {("x", 1): (inc, 1)} assert stringify(dsk) == str({("x", 1): (inc, 1)}) assert stringify(dsk, exclusive=()) == {("x", 1): (inc, 1)} dsk = {("x", 1): (inc, 1), ("x", 2): (inc, ("x", 1))} assert stringify(dsk, exclusive=dsk) == { ("x", 1): (inc, 1), ("x", 2): (inc, str(("x", 1))), } dsks = [ { "x": 1 }, { ("x", 1): (inc, 1), ("x", 2): (inc, ("x", 1)) }, { ("x", 1): (sum, [1, 2, 3]), ("x", 2): (sum, [("x", 1), ("x", 1)]) }, ] for dsk in dsks: sdsk = { stringify(k): stringify(v, exclusive=dsk) for k, v in dsk.items() } keys = list(dsk) skeys = [str(k) for k in keys] assert all(isinstance(k, str) for k in sdsk) assert get(dsk, keys) == get(sdsk, skeys) dsk = { ("y", 1): (SubgraphCallable({"x": ("y", 1)}, "x", (("y", 1), )), (("z", 1), )) } dsk = stringify(dsk, exclusive=set(dsk) | {("z", 1)}) assert dsk[("y", 1)][0].dsk["x"] == "('y', 1)" assert dsk[("y", 1)][1][0] == "('z', 1)"
def test_fuse_subgraphs(compare_subgraph_callables): dsk = { "x-1": 1, "inc-1": (inc, "x-1"), "inc-2": (inc, "inc-1"), "add-1": (add, "x-1", "inc-2"), "inc-3": (inc, "add-1"), "inc-4": (inc, "inc-3"), "add-2": (add, "add-1", "inc-4"), "inc-5": (inc, "add-2"), "inc-6": (inc, "inc-5"), } res = fuse(dsk, "inc-6", fuse_subgraphs=True) sol = with_deps({ "inc-6": "add-inc-x-1", "add-inc-x-1": (SubgraphCallable( { "x-1": 1, "add-1": (add, "x-1", (inc, (inc, "x-1"))), "inc-6": (inc, (inc, (add, "add-1", (inc, (inc, "add-1"))))), }, "inc-6", (), ), ), }) assert res == sol res = fuse(dsk, "inc-6", fuse_subgraphs=True, rename_keys=False) sol = with_deps({ "inc-6": (SubgraphCallable( { "x-1": 1, "add-1": (add, "x-1", (inc, (inc, "x-1"))), "inc-6": (inc, (inc, (add, "add-1", (inc, (inc, "add-1"))))), }, "inc-6", (), ), ) }) assert res == sol res = fuse(dsk, "add-2", fuse_subgraphs=True) sol = with_deps({ "add-inc-x-1": (SubgraphCallable( { "x-1": 1, "add-1": (add, "x-1", (inc, (inc, "x-1"))), "add-2": (add, "add-1", (inc, (inc, "add-1"))), }, "add-2", (), ), ), "add-2": "add-inc-x-1", "inc-6": (inc, (inc, "add-2")), }) assert res == sol res = fuse(dsk, "inc-2", fuse_subgraphs=True) # ordering of arguments is unstable, check all permutations sols = [] for inkeys in itertools.permutations(("x-1", "inc-2")): sols.append( with_deps({ "x-1": 1, "inc-2": (inc, (inc, "x-1")), "inc-6": "inc-add-1", "inc-add-1": (SubgraphCallable( { "add-1": (add, "x-1", "inc-2"), "inc-6": ( inc, (inc, (add, "add-1", (inc, (inc, "add-1")))), ), }, "inc-6", inkeys, ), ) + inkeys, })) assert res in sols res = fuse(dsk, ["inc-2", "add-2"], fuse_subgraphs=True) # ordering of arguments is unstable, check all permutations sols = [] for inkeys in itertools.permutations(("x-1", "inc-2")): sols.append( with_deps({ "x-1": 1, "inc-2": (inc, (inc, "x-1")), "inc-add-1": (SubgraphCallable( { "add-1": (add, "x-1", "inc-2"), "add-2": (add, "add-1", (inc, (inc, "add-1"))), }, "add-2", inkeys, ), ) + inkeys, "add-2": "inc-add-1", "inc-6": (inc, (inc, "add-2")), })) assert res in sols
def unpack_remotedata(o, byte_keys=False, myset=None): """ Unpack WrappedKey objects from collection Returns original collection and set of all found WrappedKey objects Examples -------- >>> rd = WrappedKey('mykey') >>> unpack_remotedata(1) (1, set()) >>> unpack_remotedata(()) ((), set()) >>> unpack_remotedata(rd) ('mykey', {WrappedKey('mykey')}) >>> unpack_remotedata([1, rd]) ([1, 'mykey'], {WrappedKey('mykey')}) >>> unpack_remotedata({1: rd}) ({1: 'mykey'}, {WrappedKey('mykey')}) >>> unpack_remotedata({1: [rd]}) ({1: ['mykey']}, {WrappedKey('mykey')}) Use the ``byte_keys=True`` keyword to force string keys >>> rd = WrappedKey(('x', 1)) >>> unpack_remotedata(rd, byte_keys=True) ("('x', 1)", {WrappedKey('('x', 1)')}) """ if myset is None: myset = set() out = unpack_remotedata(o, byte_keys, myset) return out, myset typ = type(o) if typ is tuple: if not o: return o if type(o[0]) is SubgraphCallable: sc = o[0] futures = set() dsk = { k: unpack_remotedata(v, byte_keys, futures) for k, v in sc.dsk.items() } args = tuple( unpack_remotedata(i, byte_keys, futures) for i in o[1:]) if futures: myset.update(futures) futures = (tuple(tokey(f.key) for f in futures) if byte_keys else tuple( f.key for f in futures)) inkeys = sc.inkeys + futures return ((SubgraphCallable(dsk, sc.outkey, inkeys, sc.name), ) + args + futures) else: return o else: return tuple( unpack_remotedata(item, byte_keys, myset) for item in o) if typ in collection_types: if not o: return o outs = [unpack_remotedata(item, byte_keys, myset) for item in o] return typ(outs) elif typ is dict: if o: values = [ unpack_remotedata(v, byte_keys, myset) for v in o.values() ] return dict(zip(o.keys(), values)) else: return o elif issubclass(typ, WrappedKey): # TODO use type is Future k = o.key if byte_keys: k = tokey(k) myset.add(o) return k else: return o
def test_fuse_subgraphs(): dsk = { 'x-1': 1, 'inc-1': (inc, 'x-1'), 'inc-2': (inc, 'inc-1'), 'add-1': (add, 'x-1', 'inc-2'), 'inc-3': (inc, 'add-1'), 'inc-4': (inc, 'inc-3'), 'add-2': (add, 'add-1', 'inc-4'), 'inc-5': (inc, 'add-2'), 'inc-6': (inc, 'inc-5') } res = fuse(dsk, 'inc-6', fuse_subgraphs=True) sol = with_deps({ 'inc-6': 'add-inc-x-1', 'add-inc-x-1': (SubgraphCallable( { 'x-1': 1, 'add-1': (add, 'x-1', (inc, (inc, 'x-1'))), 'inc-6': (inc, (inc, (add, 'add-1', (inc, (inc, 'add-1'))))) }, 'inc-6', ()), ) }) assert res == sol res = fuse(dsk, 'inc-6', fuse_subgraphs=True, rename_keys=False) sol = with_deps({ 'inc-6': (SubgraphCallable( { 'x-1': 1, 'add-1': (add, 'x-1', (inc, (inc, 'x-1'))), 'inc-6': (inc, (inc, (add, 'add-1', (inc, (inc, 'add-1'))))) }, 'inc-6', ()), ) }) assert res == sol res = fuse(dsk, 'add-2', fuse_subgraphs=True) sol = with_deps({ 'add-inc-x-1': (SubgraphCallable( { 'x-1': 1, 'add-1': (add, 'x-1', (inc, (inc, 'x-1'))), 'add-2': (add, 'add-1', (inc, (inc, 'add-1'))) }, 'add-2', ()), ), 'add-2': 'add-inc-x-1', 'inc-6': (inc, (inc, 'add-2')) }) assert res == sol res = fuse(dsk, 'inc-2', fuse_subgraphs=True) # ordering of arguements is unstable, check all permutations sols = [] for inkeys in itertools.permutations(('x-1', 'inc-2')): sols.append( with_deps({ 'x-1': 1, 'inc-2': (inc, (inc, 'x-1')), 'inc-6': 'inc-add-1', 'inc-add-1': (SubgraphCallable( { 'add-1': (add, 'x-1', 'inc-2'), 'inc-6': (inc, (inc, (add, 'add-1', (inc, (inc, 'add-1'))))) }, 'inc-6', inkeys), ) + inkeys })) assert res in sols res = fuse(dsk, ['inc-2', 'add-2'], fuse_subgraphs=True) # ordering of arguements is unstable, check all permutations sols = [] for inkeys in itertools.permutations(('x-1', 'inc-2')): sols.append( with_deps({ 'x-1': 1, 'inc-2': (inc, (inc, 'x-1')), 'inc-add-1': (SubgraphCallable( { 'add-1': (add, 'x-1', 'inc-2'), 'add-2': (add, 'add-1', (inc, (inc, 'add-1'))) }, 'add-2', inkeys), ) + inkeys, 'add-2': 'inc-add-1', 'inc-6': (inc, (inc, 'add-2')) })) assert res in sols