Example #1
0
def test_prefer_cheap_dependent():
    dsk = {'x': (f, 0.01, 10), 'y': (f, 0.000001, 1, 'x')}
    c = Cache(10000)
    with c:
        get_sync(dsk, 'y')

    assert c.cache.scorer.cost['x'] < c.cache.scorer.cost['y']
Example #2
0
def test_prefer_cheap_dependent():
    dsk = {'x': (f, 0.01, 10), 'y': (f, 0.000001, 1, 'x')}
    c = Cache(10000)
    with c:
        get_sync(dsk, 'y')

    assert c.cache.scorer.cost['x'] < c.cache.scorer.cost['y']
Example #3
0
def test_shuffle():
    s = shuffle(d, d.b, npartitions=2)
    assert isinstance(s, dd.DataFrame)
    assert s.npartitions == 2

    x = get_sync(s.dask, (s._name, 0))
    y = get_sync(s.dask, (s._name, 1))

    assert not (set(x.b) & set(y.b))  # disjoint
Example #4
0
def test_shuffle():
    s = shuffle(d, d.b, npartitions=2)
    assert isinstance(s, dd.DataFrame)
    assert s.npartitions == 2

    x = get_sync(s.dask, (s._name, 0))
    y = get_sync(s.dask, (s._name, 1))

    assert not (set(x.b) & set(y.b))  # disjoint
Example #5
0
def test_callback():
    flag = [False]

    class MyCallback(Callback):
        def _start(self, dsk):
            flag[0] = True

    with MyCallback():
        get_sync({'x': 1}, 'x')

    assert flag[0] is True
Example #6
0
def test_start_callback():
    flag = [False]

    class MyCallback(Callback):
        def _start(self, dsk):
            flag[0] = True

    with MyCallback():
        get_sync({'x': 1}, 'x')

    assert flag[0] is True
Example #7
0
def test_ordering():
    L = []
    def append(i):
        L.append(i)

    dsk = {('x', i): (append, i) for i in range(10)}
    x_keys = sorted(dsk)
    dsk['y'] = (lambda *args: None, list(x_keys))

    get_sync(dsk, 'y')

    assert L == sorted(L)
Example #8
0
def test_cache_options():
    try:
        from chest import Chest
    except ImportError:
        return
    cache = Chest()
    def inc2(x):
        assert 'y' in cache
        return x + 1

    with dask.set_options(cache=cache):
        get_sync({'x': (inc2, 'y'), 'y': 1}, 'x')
Example #9
0
def test_shuffle(shuffle):
    s = shuffle_func(d, d.b, shuffle=shuffle)
    assert isinstance(s, dd.DataFrame)
    assert s.npartitions == d.npartitions

    x = get_sync(s.dask, (s._name, 0))
    y = get_sync(s.dask, (s._name, 1))

    assert not (set(x.b) & set(y.b))  # disjoint
    assert set(s.dask).issuperset(d.dask)

    assert shuffle_func(d, d.b)._name == shuffle_func(d, d.b)._name
Example #10
0
def test_shuffle(shuffle):
    s = shuffle_func(d, d.b, shuffle=shuffle)
    assert isinstance(s, dd.DataFrame)
    assert s.npartitions == d.npartitions

    x = get_sync(s.dask, (s._name, 0))
    y = get_sync(s.dask, (s._name, 1))

    assert not (set(x.b) & set(y.b))  # disjoint
    assert set(s.dask).issuperset(d.dask)

    assert shuffle_func(d, d.b)._name == shuffle_func(d, d.b)._name
Example #11
0
def test_start_state_callback():
    flag = [False]

    class MyCallback(Callback):
        def _start_state(self, dsk, state):
            flag[0] = True
            assert dsk['x'] == 1
            assert len(state['cache']) == 1

    with MyCallback():
        get_sync({'x': 1}, 'x')

    assert flag[0] is True
Example #12
0
def test_start_state_callback():
    flag = [False]

    class MyCallback(Callback):
        def _start_state(self, dsk, state):
            flag[0] = True
            assert dsk['x'] == 1
            assert len(state['cache']) == 1

    with MyCallback():
        get_sync({'x': 1}, 'x')

    assert flag[0] is True
Example #13
0
def test_ordering():
    L = []

    def append(i):
        L.append(i)

    dsk = {('x', i): (append, i) for i in range(10)}
    x_keys = sorted(dsk)
    dsk['y'] = (lambda *args: None, list(x_keys))

    get_sync(dsk, 'y')

    assert L == sorted(L)
Example #14
0
def test_cache_options():
    try:
        from chest import Chest
    except ImportError:
        return
    cache = Chest()

    def inc2(x):
        assert 'y' in cache
        return x + 1

    with dask.set_options(cache=cache):
        get_sync({'x': (inc2, 'y'), 'y': 1}, 'x')
Example #15
0
def test_finish_always_called():
    flag = [False]

    class MyCallback(Callback):
        def _finish(self, dsk, state, errored):
            flag[0] = True
            assert errored

    dsk = {'x': (lambda: 1 / 0, )}

    # `raise_on_exception=True`
    try:
        with MyCallback():
            get_sync(dsk, 'x')
    except Exception as e:
        assert isinstance(e, ZeroDivisionError)
    assert flag[0]

    # `raise_on_exception=False`
    flag[0] = False
    try:
        with MyCallback():
            get_threaded(dsk, 'x')
    except Exception as e:
        assert isinstance(e, ZeroDivisionError)
    assert flag[0]

    # KeyboardInterrupt
    def raise_keyboard():
        raise KeyboardInterrupt()

    dsk = {'x': (raise_keyboard, )}
    flag[0] = False
    try:
        with MyCallback():
            get_sync(dsk, 'x')
    except BaseException as e:
        assert isinstance(e, KeyboardInterrupt)
    assert flag[0]
Example #16
0
def test_finish_always_called():
    flag = [False]

    class MyCallback(Callback):
        def _finish(self, dsk, state, errored):
            flag[0] = True
            assert errored

    dsk = {'x': (lambda: 1 / 0,)}

    # `raise_on_exception=True`
    try:
        with MyCallback():
            get_sync(dsk, 'x')
    except Exception as e:
        assert isinstance(e, ZeroDivisionError)
    assert flag[0]

    # `raise_on_exception=False`
    flag[0] = False
    try:
        with MyCallback():
            get_threaded(dsk, 'x')
    except Exception as e:
        assert isinstance(e, ZeroDivisionError)
    assert flag[0]

    # KeyboardInterrupt
    def raise_keyboard():
        raise KeyboardInterrupt()

    dsk = {'x': (raise_keyboard,)}
    flag[0] = False
    try:
        with MyCallback():
            get_sync(dsk, 'x')
    except BaseException as e:
        assert isinstance(e, KeyboardInterrupt)
    assert flag[0]
Example #17
0
def test_rearrange(shuffle):
    df = pd.DataFrame({'x': range(10)})
    ddf = dd.from_pandas(df, npartitions=4)
    ddf2 = ddf.assign(y=ddf.x % 4)

    result = rearrange_by_column(ddf2, 'y', max_branch=32, shuffle=shuffle)
    assert result.npartitions == ddf.npartitions
    assert set(ddf.dask).issubset(result.dask)

    # Every value in exactly one partition
    a = result.compute()
    parts = get_sync(result.dask, result._keys())
    for i in a.y.drop_duplicates():
        assert sum(i in part.y for part in parts) == 1
Example #18
0
def test_rearrange(shuffle):
    df = pd.DataFrame({'x': range(10)})
    ddf = dd.from_pandas(df, npartitions=4)
    ddf2 = ddf.assign(y=ddf.x % 4)

    result = rearrange_by_column(ddf2, 'y', max_branch=32, shuffle=shuffle)
    assert result.npartitions == ddf.npartitions
    assert set(ddf.dask).issubset(result.dask)

    # Every value in exactly one partition
    a = result.compute()
    parts = get_sync(result.dask, result._keys())
    for i in a.y.drop_duplicates():
        assert sum(i in part.y for part in parts) == 1
Example #19
0
def test_divisions(ddf):
    if not hasattr(ddf, 'divisions'):
        return
    if not hasattr(ddf, 'index'):
        return
    if not ddf.known_divisions:
        return

    results = get_sync(ddf.dask, ddf._keys())
    for i, df in enumerate(results[:-1]):
        if len(df):
            assert df.index.min() >= ddf.divisions[i]
            assert df.index.max() < ddf.divisions[i + 1]

    if len(results[-1]):
        assert results[-1].index.min() >= ddf.divisions[-2]
        assert results[-1].index.max() <= ddf.divisions[-1]
Example #20
0
def assert_divisions(ddf):
    if not hasattr(ddf, 'divisions'):
        return
    if not hasattr(ddf, 'index'):
        return
    if not ddf.known_divisions:
        return

    results = get_sync(ddf.dask, ddf._keys())
    for i, df in enumerate(results[:-1]):
        if len(df):
            assert df.index.min() >= ddf.divisions[i]
            assert df.index.max() < ddf.divisions[i + 1]

    if len(results[-1]):
        assert results[-1].index.min() >= ddf.divisions[-2]
        assert results[-1].index.max() <= ddf.divisions[-1]