Beispiel #1
0
def test_ordering_keeps_groups_together(abcde):
    a, b, c, d, e = abcde
    d = dict(((a, i), (f, )) for i in range(4))
    d.update({(b, 0): (f, (a, 0), (a, 1)), (b, 1): (f, (a, 2), (a, 3))})
    o = order(d)

    assert abs(o[(a, 0)] - o[(a, 1)]) == 1
    assert abs(o[(a, 2)] - o[(a, 3)]) == 1

    d = dict(((a, i), (f, )) for i in range(4))
    d.update({(b, 0): (f, (a, 0), (a, 2)), (b, 1): (f, (a, 1), (a, 3))})
    o = order(d)

    assert abs(o[(a, 0)] - o[(a, 2)]) == 1
    assert abs(o[(a, 1)] - o[(a, 3)]) == 1
Beispiel #2
0
def test_base_of_reduce_preferred(abcde):
    r"""
               a3
              /|
            a2 |
           /|  |
         a1 |  |
        /|  |  |
      a0 |  |  |
      |  |  |  |
      b0 b1 b2 b3
        \ \ / /
           c

    We really want to run b0 quickly
    """
    a, b, c, d, e = abcde
    dsk = {(a, i): (f, (a, i - 1), (b, i)) for i in [1, 2, 3]}
    dsk[(a, 0)] = (f, (b, 0))
    dsk.update({(b, i): (f, c, 1) for i in [0, 1, 2, 3]})
    dsk[c] = 1

    o = order(dsk)

    assert o[(b, 0)] <= 4
    assert o[(b, 1)] <= 6
def test_many_branches_use_ndependencies(abcde):
    """From https://github.com/dask/dask/pull/5646#issuecomment-562700533.

    Sometimes we need larger or wider DAGs to test behavior.  This test ensures we choose the branch with more work
    twice in successtion. This is important, because ``order`` may search along dependencies and then along dependents.
    """
    a, b, c, d, e = abcde
    dd = d + d
    ee = e + e
    dsk = {
        (a, 0): 0,
        (a, 1): (f, (a, 0)),
        (a, 2): (f, (a, 1)),
        (b, 1): (f, (a, 0)),
        (b, 2): (f, (b, 1)),
        (c, 1): (f, (a, 0)),  # most short and thin; should go last
        (d, 1): (f, (a, 0)),
        (d, 2): (f, (d, 1)),
        (dd, 1): (f, (a, 0)),
        (dd, 2): (f, (dd, 1)),
        (dd, 3): (f, (d, 2), (dd, 2)),
        (e, 1): (f, (a, 0)),
        (e, 2): (f, (e, 1)),
        (ee, 1): (f, (a, 0)),
        (ee, 2): (f, (ee, 1)),
        (ee, 3): (f, (e, 2), (ee, 2)),
        (a, 3): (f, (a, 2), (b, 2), (c, 1), (dd, 3), (ee, 3)),
    }
    o = order(dsk)
    # run all d's and e's first
    expected = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
    actual = sorted(v for (letter, _), v in o.items()
                    if letter in {d, dd, e, ee})
    assert actual == expected
    assert o[(c, 1)] == o[(a, 3)] - 1
Beispiel #4
0
def test_avoid_upwards_branching(abcde):
    r"""
         a1
         |
         a2
         |
         a3    d1
        /  \  /
      b1    c1
      |     |
      b2    c2
            |
            c3

    Prefer b1 over c1 because it won't stick around waiting for d1 to complete
    """
    a, b, c, d, e = abcde
    dsk = {
        (a, 1): (f, (a, 2)),
        (a, 2): (f, (a, 3)),
        (a, 3): (f, (b, 1), (c, 1)),
        (b, 1): (f, (b, 2)),
        (c, 1): (f, (c, 2)),
        (c, 2): (f, (c, 3)),
        (d, 1): (f, (c, 1)),
    }

    o = order(dsk)
    assert o[(b, 1)] < o[(c, 1)]
Beispiel #5
0
def test_nearest_neighbor(abcde):
    r"""

    a1  a2  a3  a4  a5  a6  a7 a8  a9
     \  |  /  \ |  /  \ |  / \ |  /
        b1      b2      b3     b4

    Want to finish off a local group before moving on.
    This is difficult because all groups are connected.
    """
    a, b, c, _, _ = abcde
    a1, a2, a3, a4, a5, a6, a7, a8, a9 = [a + i for i in "123456789"]
    b1, b2, b3, b4 = [b + i for i in "1234"]

    dsk = {
        b1: (f, ),
        b2: (f, ),
        b3: (f, ),
        b4: (f, ),
        a1: (f, b1),
        a2: (f, b1),
        a3: (f, b1, b2),
        a4: (f, b2),
        a5: (f, b2, b3),
        a6: (f, b3),
        a7: (f, b3, b4),
        a8: (f, b4),
        a9: (f, b4),
    }

    o = order(dsk)

    assert 3 < sum(o[a + i] < len(o) / 2 for i in "123456789") < 7
    assert 1 < sum(o[b + i] < len(o) / 2 for i in "1234") < 4
    assert o[min([b1, b2, b3, b4])] == 0
Beispiel #6
0
def test_map_overlap(abcde):
    r"""
      b1      b3      b5
       |\    / | \  / |
      c1  c2  c3  c4  c5
       |/  | \ | / | \|
      d1  d2  d3  d4  d5
       |       |      |
      e1      e2      e5

    Want to finish b1 before we start on e5
    """
    a, b, c, d, e = abcde
    dsk = {
        (e, 1): (f, ),
        (d, 1): (f, (e, 1)),
        (c, 1): (f, (d, 1)),
        (b, 1): (f, (c, 1), (c, 2)),
        (d, 2): (f, ),
        (c, 2): (f, (d, 1), (d, 2), (d, 3)),
        (e, 3): (f, ),
        (d, 3): (f, (e, 3)),
        (c, 3): (f, (d, 3)),
        (b, 3): (f, (c, 2), (c, 3), (c, 4)),
        (d, 4): (f, ),
        (c, 4): (f, (d, 3), (d, 4), (d, 5)),
        (e, 5): (f, ),
        (d, 5): (f, (e, 5)),
        (c, 5): (f, (d, 5)),
        (b, 5): (f, (c, 4), (c, 5)),
    }

    o = order(dsk)

    assert o[(b, 1)] < o[(e, 5)] or o[(b, 5)] < o[(e, 1)]
Beispiel #7
0
def test_local_parents_of_reduction(abcde):
    """

            c1
            |
        b1  c2
        |  /|
    a1  b2  c3
    |  /|
    a2  b3
    |
    a3

    Prefer to finish a1 stack before proceeding to b2
    """
    a, b, c, d, e = abcde
    a1, a2, a3 = [a + i for i in "123"]
    b1, b2, b3 = [b + i for i in "123"]
    c1, c2, c3 = [c + i for i in "123"]

    expected = [a3, a2, a1, b3, b2, b1, c3, c2, c1]

    log = []

    def f(x):
        def _(*args):
            log.append(x)

        return _

    dsk = {
        a3: (f(a3), ),
        a2: (f(a2), a3),
        a1: (f(a1), a2),
        b3: (f(b3), ),
        b2: (f(b2), b3, a2),
        b1: (f(b1), b2),
        c3: (f(c3), ),
        c2: (f(c2), c3, b2),
        c1: (f(c1), c2),
    }

    order(dsk)
    get(dsk, [a1, b1, c1])  # trigger computation

    assert log == expected
Beispiel #8
0
def test_prefer_short_ancestor(abcde):
    r"""
    From https://github.com/dask/dask-ml/issues/206#issuecomment-395869929

    Two cases, one where chunks of an array are independent, and one where the
    chunks of an array have a shared source. We handled the independent one
    "well" earlier.

    Good:

                    c2
                   / \ \
                  /   \ \
                c1     \ \
              / | \     \ \
            c0  a0 b0   a1 b1

    Bad:

                    c2
                   / \ \
                  /   \ \
                c1     \ \
              / | \     \ \
            c0  a0 b0   a1 b1
                   \ \   / /
                    \ \ / /
                      a-b


    The difference is that all the `a` and `b` tasks now have a common
    ancestor.

    We would like to choose c1 *before* a1, and b1 because

    * we can release a0 and b0 once c1 is done
    * we don't need a1 and b1 to compute c1.
    """
    a, b, c, _, _ = abcde
    ab = a + b

    dsk = {
        ab: 0,
        (a, 0): (f, ab, 0, 0),
        (b, 0): (f, ab, 0, 1),
        (c, 0): 0,
        (c, 1): (f, (c, 0), (a, 0), (b, 0)),
        (a, 1): (f, ab, 1, 0),
        (b, 1): (f, ab, 1, 1),
        (c, 2): (f, (c, 1), (a, 1), (b, 1)),
    }
    o = order(dsk)

    assert o[(a, 0)] < o[(a, 1)]
    assert o[(b, 0)] < o[(b, 1)]
    assert o[(b, 0)] < o[(c, 2)]
    assert o[(c, 1)] < o[(c, 2)]
    assert o[(c, 1)] < o[(a, 1)]
Beispiel #9
0
def test_string_ordering_dependents():
    """ Prefer ordering tasks by name first even when in dependencies """
    dsk = {
        ("a", 1): (f, "b"),
        ("a", 2): (f, "b"),
        ("a", 3): (f, "b"),
        "b": (f, )
    }
    o = order(dsk)
    assert o == {"b": 0, ("a", 1): 1, ("a", 2): 2, ("a", 3): 3}
Beispiel #10
0
def test_break_ties_by_str(abcde):
    a, b, c, d, e = abcde
    dsk = {("x", i): (inc, i) for i in range(10)}
    x_keys = sorted(dsk)
    dsk["y"] = list(x_keys)

    o = order(dsk)
    expected = {"y": 10}
    expected.update({k: i for i, k in enumerate(x_keys)})

    assert o == expected
Beispiel #11
0
def test_avoid_broker_nodes(abcde):
    r"""

    b0    b1  b2
    |      \  /
    a0      a1

    a0 should be run before a1
    """
    a, b, c, d, e = abcde
    dsk = {
        (a, 0): (f, ),
        (a, 1): (f, ),
        (b, 0): (f, (a, 0)),
        (b, 1): (f, (a, 1)),
        (b, 2): (f, (a, 1)),
    }
    o = order(dsk)
    assert o[(a, 0)] < o[(a, 1)]

    # Switch name of 0, 1 to ensure that this isn't due to string comparison
    dsk = {
        (a, 1): (f, ),
        (a, 0): (f, ),
        (b, 0): (f, (a, 1)),
        (b, 1): (f, (a, 0)),
        (b, 2): (f, (a, 0)),
    }
    o = order(dsk)
    assert o[(a, 0)] > o[(a, 1)]

    # Switch name of 0, 1 for "b"s too
    dsk = {
        (a, 0): (f, ),
        (a, 1): (f, ),
        (b, 1): (f, (a, 0)),
        (b, 0): (f, (a, 1)),
        (b, 2): (f, (a, 1)),
    }
    o = order(dsk)
    assert o[(a, 0)] < o[(a, 1)]
Beispiel #12
0
def test_switching_dependents(abcde):
    r"""

    a7 a8  <-- do these last
    | /
    a6                e6
    |                /
    a5   c5    d5  e5
    |    |    /   /
    a4   c4 d4  e4
    |  \ | /   /
    a3   b3---/
    |
    a2
    |
    a1
    |
    a0  <-- start here

    Test that we are able to switch to better dependents.
    In this graph, we expect to start at a0.  To compute a4, we need to compute b3.
    After computing b3, three "better" paths become available.
    Confirm that we take the better paths before continuing down `a` path.

    This test is pretty specific to how `order` is implemented
    and is intended to increase code coverage.
    """
    a, b, c, d, e = abcde
    dsk = {
        (a, 0): 0,
        (a, 1): (f, (a, 0)),
        (a, 2): (f, (a, 1)),
        (a, 3): (f, (a, 2)),
        (a, 4): (f, (a, 3), (b, 3)),
        (a, 5): (f, (a, 4)),
        (a, 6): (f, (a, 5)),
        (a, 7): (f, (a, 6)),
        (a, 8): (f, (a, 6)),
        (b, 3): 1,
        (c, 4): (f, (b, 3)),
        (c, 5): (f, (c, 4)),
        (d, 4): (f, (b, 3)),
        (d, 5): (f, (d, 4)),
        (e, 4): (f, (b, 3)),
        (e, 5): (f, (e, 4)),
        (e, 6): (f, (e, 5)),
    }
    o = order(dsk)

    assert o[(a, 0)] == 0  # probably
    assert o[(a, 5)] > o[(c, 5)]
    assert o[(a, 5)] > o[(d, 5)]
    assert o[(a, 5)] > o[(e, 6)]
Beispiel #13
0
def test_order_cycle():
    with pytest.raises(RuntimeError, match="Cycle detected"):
        get({"a": (f, "a")}, "a")  # we encounter this in `get`
    with pytest.raises(RuntimeError, match="Cycle detected"):
        order({"a": (f, "a")})  # trivial self-loop
    with pytest.raises(RuntimeError, match="Cycle detected"):
        order({("a", 0): (f, ("a", 0))})  # non-string
    with pytest.raises(RuntimeError, match="Cycle detected"):
        order({
            "a": (f, "b"),
            "b": (f, "c"),
            "c": (f, "a")
        })  # non-trivial loop
    with pytest.raises(RuntimeError, match="Cycle detected"):
        order({"a": (f, "b"), "b": (f, "c"), "c": (f, "a", "d"), "d": 1})
    with pytest.raises(RuntimeError, match="Cycle detected"):
        order({
            "a": (f, "b"),
            "b": (f, "c"),
            "c": (f, "a", "d"),
            "d": (f, "b")
        })
Beispiel #14
0
def test_prefer_short_narrow(abcde):
    # See test_prefer_short_ancestor for a fail case.
    a, b, c, _, _ = abcde
    dsk = {
        (a, 0): 0,
        (b, 0): 0,
        (c, 0): 0,
        (c, 1): (f, (c, 0), (a, 0), (b, 0)),
        (a, 1): 1,
        (b, 1): 1,
        (c, 2): (f, (c, 1), (a, 1), (b, 1)),
    }
    o = order(dsk)
    assert o[(b, 0)] < o[(b, 1)]
    assert o[(b, 0)] < o[(c, 2)]
    assert o[(c, 1)] < o[(c, 2)]
Beispiel #15
0
def test_prefer_deep(abcde):
    """
        c
        |
    e   b
    |   |
    d   a

    Prefer longer chains first so we should start with c
    """
    a, b, c, d, e = abcde
    dsk = {a: 1, b: (f, a), c: (f, b), d: 1, e: (f, d)}

    o = order(dsk)
    assert o[a] < o[d]
    assert o[b] < o[d]
Beispiel #16
0
def test_dont_run_all_dependents_too_early(abcde):
    """ From https://github.com/dask/dask-ml/issues/206#issuecomment-395873372 """
    a, b, c, d, e = abcde
    depth = 10
    dsk = {
        (a, 0): 0,
        (b, 0): 1,
        (c, 0): 2,
        (d, 0): (f, (a, 0), (b, 0), (c, 0))
    }
    for i in range(1, depth):
        dsk[(b, i)] = (f, (b, 0))
        dsk[(c, i)] = (f, (c, 0))
        dsk[(d, i)] = (f, (d, i - 1), (b, i), (c, i))
    o = order(dsk)
    expected = [3, 6, 9, 12, 15, 18, 21, 24, 27, 30]
    actual = sorted(v for (letter, num), v in o.items() if letter == d)
    assert expected == actual
Beispiel #17
0
def test_prefer_short_dependents(abcde):
    r"""

         a
         |
      d  b  e
       \ | /
         c

    Prefer to finish d and e before starting b.  That way c can be released
    during the long computations.
    """
    a, b, c, d, e = abcde
    dsk = {c: (f, ), d: (f, c), e: (f, c), b: (f, c), a: (f, b)}

    o = order(dsk)
    assert o[d] < o[b]
    assert o[e] < o[b]
Beispiel #18
0
def test_deep_bases_win_over_dependents(abcde):
    r"""
    It's not clear who should run first, e or d

    1.  d is nicer because it exposes parallelism
    2.  e is nicer (hypothetically) because it will be sooner released
        (though in this case we need d to run first regardless)

    Regardless of e or d first, we should run b before c.

            a
          / | \   .
         b  c |
        / \ | /
       e    d
    """
    a, b, c, d, e = abcde
    dsk = {a: (f, b, c, d), b: (f, d, e), c: (f, d), d: 1, e: 2}

    o = order(dsk)
    assert o[e] < o[d]  # ambiguous, but this is what we currently expect
    assert o[b] < o[c]
Beispiel #19
0
def test_use_structure_not_keys(abcde):
    """See https://github.com/dask/dask/issues/5584#issuecomment-554963958

    We were using key names to infer structure, which could result in funny behavior.
    """
    a, b, _, _, _ = abcde
    dsk = {
        (a, 0): (f, ),
        (a, 1): (f, ),
        (a, 2): (f, ),
        (a, 3): (f, ),
        (a, 4): (f, ),
        (a, 5): (f, ),
        (a, 6): (f, ),
        (a, 7): (f, ),
        (a, 8): (f, ),
        (a, 9): (f, ),
        (b, 5): (f, (a, 2)),
        (b, 7): (f, (a, 0), (a, 2)),
        (b, 9): (f, (a, 7), (a, 0), (a, 2)),
        (b, 1): (f, (a, 4), (a, 7), (a, 0)),
        (b, 2): (f, (a, 9), (a, 4), (a, 7)),
        (b, 4): (f, (a, 6), (a, 9), (a, 4)),
        (b, 3): (f, (a, 5), (a, 6), (a, 9)),
        (b, 8): (f, (a, 1), (a, 5), (a, 6)),
        (b, 6): (f, (a, 8), (a, 1), (a, 5)),
        (b, 0): (f, (a, 3), (a, 8), (a, 1)),
    }
    o = order(dsk)
    As = sorted(val for (letter, _), val in o.items() if letter == a)
    Bs = sorted(val for (letter, _), val in o.items() if letter == b)
    assert Bs[0] in {1, 3}
    if Bs[0] == 3:
        assert As == [0, 1, 2, 4, 6, 8, 10, 12, 14, 16]
        assert Bs == [3, 5, 7, 9, 11, 13, 15, 17, 18, 19]
    else:
        assert As == [0, 2, 4, 6, 8, 10, 12, 14, 16, 18]
        assert Bs == [1, 3, 5, 7, 9, 11, 13, 15, 17, 19]
Beispiel #20
0
def test_avoid_upwards_branching_complex(abcde):
    r"""
         a1
         |
    e2   a2  d2  d3
    |    |    \  /
    e1   a3    d1
     \  /  \  /
      b1    c1
      |     |
      b2    c2
            |
            c3

    Prefer c1 over b1 because c1 will stay in memory less long while b1
    computes
    """
    a, b, c, d, e = abcde
    dsk = {
        (a, 1): (f, (a, 2)),
        (a, 2): (f, (a, 3)),
        (a, 3): (f, (b, 1), (c, 1)),
        (b, 1): (f, (b, 2)),
        (b, 2): (f, ),
        (c, 1): (f, (c, 2)),
        (c, 2): (f, (c, 3)),
        (c, 3): (f, ),
        (d, 1): (f, (c, 1)),
        (d, 2): (f, (d, 1)),
        (d, 3): (f, (d, 1)),
        (e, 1): (f, (b, 1)),
        (e, 2): (f, (e, 1)),
    }

    o = order(dsk)
    assert o[(c, 1)] < o[(b, 1)]
    assert abs(o[(d, 2)] - o[(d, 3)]) == 1
Beispiel #21
0
def test_order_with_equal_dependents(abcde):
    """From https://github.com/dask/dask/issues/5859#issuecomment-608422198

    See the visualization of `(maxima, argmax)` example from the above comment.

    This DAG has enough structure to exercise more parts of `order`

    """
    a, b, c, d, e = abcde
    dsk = {}
    abc = [a, b, c, d]
    for x in abc:
        dsk.update({
            (x, 0): 0,
            (x, 1): (f, (x, 0)),
            (x, 2, 0): (f, (x, 0)),
            (x, 2, 1): (f, (x, 1)),
        })
        for i, y in enumerate(abc):
            dsk.update({
                (x, 3, i): (f, (x, 2, 0), (y, 2, 1)),  # cross x and y
                (x, 4, i): (f, (x, 3, i)),
                (x, 5, i, 0): (f, (x, 4, i)),
                (x, 5, i, 1): (f, (x, 4, i)),
                (x, 6, i, 0): (f, (x, 5, i, 0)),
                (x, 6, i, 1): (f, (x, 5, i, 1)),
            })
    o = order(dsk)
    total = 0
    for x in abc:
        for i in range(len(abc)):
            val = o[(x, 6, i, 1)] - o[(x, 6, i, 0)]
            assert val > 0  # ideally, val == 2
            total += val
    assert total <= 32  # ideally, this should be 2 * 16 = 32

    # Add one to the end of the nine bundles
    dsk2 = dict(dsk)
    for x in abc:
        for i in range(len(abc)):
            dsk2[(x, 7, i, 0)] = (f, (x, 6, i, 0))
    o = order(dsk2)
    total = 0
    for x in abc:
        for i in range(len(abc)):
            val = o[(x, 7, i, 0)] - o[(x, 6, i, 1)]
            assert val > 0  # ideally, val == 3
            total += val
    assert total <= 165  # ideally, this should be 3 * 16 == 48

    # Remove one from each of the nine bundles
    dsk3 = dict(dsk)
    for x in abc:
        for i in range(len(abc)):
            del dsk3[(x, 6, i, 1)]
    o = order(dsk3)
    total = 0
    for x in abc:
        for i in range(len(abc)):
            val = o[(x, 6, i, 0)] - o[(x, 5, i, 1)]
            assert val > 0  # ideally, val == 2
            total += val
    assert total <= 119  # ideally, this should be 2 * 16 == 32

    # Remove another one from each of the nine bundles
    dsk4 = dict(dsk3)
    for x in abc:
        for i in range(len(abc)):
            del dsk4[(x, 6, i, 0)]
    o = order(dsk4)
    total = 0
    for x in abc:
        for i in range(len(abc)):
            assert o[(x, 5, i, 1)] - o[(x, 5, i, 0)] == 1
Beispiel #22
0
def test_order_empty():
    assert order({}) == {}
Beispiel #23
0
def test_order_doesnt_fail_on_mixed_type_keys(abcde):
    order({"x": (inc, 1), ("y", 0): (inc, 2), "z": (add, "x", ("y", 0))})
Beispiel #24
0
def test_string_ordering():
    """ Prefer ordering tasks by name first """
    dsk = {("a", 1): (f, ), ("a", 2): (f, ), ("a", 3): (f, )}
    o = order(dsk)
    assert o == {("a", 1): 0, ("a", 2): 1, ("a", 3): 2}
Beispiel #25
0
def test_type_comparisions_ok(abcde):
    a, b, c, d, e = abcde
    dsk = {a: 1, (a, 1): 2, (a, b, 1): 3}
    order(dsk)  # this doesn't err