Ejemplo n.º 1
0
def test__futures_to_collection(s, a, b):
    e = Executor((s.ip, s.port), start=False)
    yield e._start()

    remote_dfs = e.map(identity, dfs)
    ddf = yield _futures_to_collection(remote_dfs, divisions=True)
    ddf2 = yield _futures_to_dask_dataframe(remote_dfs, divisions=True)
    assert isinstance(ddf, dd.DataFrame)

    assert ddf.dask == ddf2.dask

    remote_arrays = e.map(np.arange, range(3, 5))
    x = yield _futures_to_collection(remote_arrays)
    y = yield _futures_to_dask_array(remote_arrays)

    assert type(x) == type(y)
    assert x.dask == y.dask

    remote_lists = yield e._scatter([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
    b = yield _futures_to_collection(remote_lists)
    c = yield _futures_to_dask_bag(remote_lists)

    assert type(b) == type(c)
    assert b.dask == b.dask

    yield e._shutdown()
Ejemplo n.º 2
0
def test__dask_array_collections(s, a, b):
    import dask.array as da
    e = Executor((s.ip, s.port), start=False)
    yield e._start()

    x_dsk = {('x', i, j): np.random.random((3, 3))
             for i in range(3) for j in range(2)}
    y_dsk = {('y', i, j): np.random.random((3, 3))
             for i in range(2) for j in range(3)}
    x_futures = yield e._scatter(x_dsk)
    y_futures = yield e._scatter(y_dsk)

    dt = np.random.random(0).dtype
    x_local = da.Array(x_dsk, 'x', ((3, 3, 3), (3, 3)), dt)
    y_local = da.Array(y_dsk, 'y', ((3, 3), (3, 3, 3)), dt)

    x_remote = da.Array(x_futures, 'x', ((3, 3, 3), (3, 3)), dt)
    y_remote = da.Array(y_futures, 'y', ((3, 3), (3, 3, 3)), dt)

    exprs = [
        lambda x, y: x.T + y, lambda x, y: x.mean() + y.mean(),
        lambda x, y: x.dot(y).std(axis=0),
        lambda x, y: x - x.mean(axis=1)[:, None]
    ]

    for expr in exprs:
        local = expr(x_local, y_local).compute(get=dask.get)

        remote, = e.compute(expr(x_remote, y_remote))
        remote = yield remote._result()

        assert np.all(local == remote)

    yield e._shutdown()
Ejemplo n.º 3
0
def test_with_data(s, a, b):
    ss = HTTPScheduler(s)
    ss.listen(0)

    e = Executor((s.ip, s.port), start=False)
    yield e._start()

    L = e.map(inc, [1, 2, 3])
    L2 = yield e._scatter(['Hello', 'world!'])
    yield _wait(L)

    client = AsyncHTTPClient()
    response = yield client.fetch('http://localhost:%s/memory-load.json' %
                                  ss.port)
    out = json.loads(response.body.decode())

    assert all(isinstance(v, int) for v in out.values())
    assert set(out) == {a.address_string, b.address_string}
    assert sum(out.values()) == sum(
        map(sys.getsizeof, [1, 2, 3, 'Hello', 'world!']))

    response = yield client.fetch(
        'http://localhost:%s/memory-load-by-key.json' % ss.port)
    out = json.loads(response.body.decode())
    assert set(out) == {a.address_string, b.address_string}
    assert all(isinstance(v, dict) for v in out.values())
    assert all(k in {'inc', 'data'} for d in out.values() for k in d)
    assert all(isinstance(v, int) for d in out.values() for v in d.values())

    assert sum(v for d in out.values() for v in d.values()) == \
            sum(map(sys.getsizeof, [1, 2, 3, 'Hello', 'world!']))

    ss.stop()
    yield e._shutdown()
Ejemplo n.º 4
0
def test__dask_array_collections(s, a, b):
    import dask.array as da
    e = Executor((s.ip, s.port), start=False)
    yield e._start()

    x_dsk = {('x', i, j): np.random.random((3, 3)) for i in range(3)
                                                   for j in range(2)}
    y_dsk = {('y', i, j): np.random.random((3, 3)) for i in range(2)
                                                   for j in range(3)}
    x_futures = yield e._scatter(x_dsk)
    y_futures = yield e._scatter(y_dsk)

    dt = np.random.random(0).dtype
    x_local = da.Array(x_dsk, 'x', ((3, 3, 3), (3, 3)), dt)
    y_local = da.Array(y_dsk, 'y', ((3, 3), (3, 3, 3)), dt)

    x_remote = da.Array(x_futures, 'x', ((3, 3, 3), (3, 3)), dt)
    y_remote = da.Array(y_futures, 'y', ((3, 3), (3, 3, 3)), dt)

    exprs = [lambda x, y: x.T + y,
             lambda x, y: x.mean() + y.mean(),
             lambda x, y: x.dot(y).std(axis=0),
             lambda x, y: x - x.mean(axis=1)[:, None]]

    for expr in exprs:
        local = expr(x_local, y_local).compute(get=dask.get)

        remote = e.compute(expr(x_remote, y_remote))
        remote = yield remote._result()

        assert np.all(local == remote)

    yield e._shutdown()
Ejemplo n.º 5
0
def test__futures_to_dask_bag(s, a, b):
    import dask.bag as db
    e = Executor((s.ip, s.port), start=False)
    yield e._start()

    L = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
    futures = yield e._scatter(L)

    rb = yield _futures_to_dask_bag(futures)
    assert isinstance(rb, db.Bag)
    assert rb.npartitions == len(L)

    lb = db.from_sequence([1, 2, 3, 4, 5, 6, 7, 8, 9], npartitions=3)

    exprs = [lambda x: x.map(lambda x: x + 1).sum(),
             lambda x: x.filter(lambda x: x % 2)]

    for expr in exprs:
        local = expr(lb).compute(get=dask.get)
        remote = e.compute(expr(rb))
        remote = yield remote._result()

        assert local == remote

    yield e._shutdown()
Ejemplo n.º 6
0
def test_with_data(s, a, b):
    ss = HTTPScheduler(s)
    ss.listen(0)

    e = Executor((s.ip, s.port), start=False)
    yield e._start()

    L = e.map(inc, [1, 2, 3])
    L2 = yield e._scatter(['Hello', 'world!'])
    yield _wait(L)

    client = AsyncHTTPClient()
    response = yield client.fetch('http://localhost:%s/memory-load.json' %
                                  ss.port)
    out = json.loads(response.body.decode())

    assert all(isinstance(v, int) for v in out.values())
    assert set(out) == {a.address_string, b.address_string}
    assert sum(out.values()) == sum(map(sys.getsizeof,
                                        [1, 2, 3, 'Hello', 'world!']))

    response = yield client.fetch('http://localhost:%s/memory-load-by-key.json'
                                  % ss.port)
    out = json.loads(response.body.decode())
    assert set(out) == {a.address_string, b.address_string}
    assert all(isinstance(v, dict) for v in out.values())
    assert all(k in {'inc', 'data'} for d in out.values() for k in d)
    assert all(isinstance(v, int) for d in out.values() for v in d.values())

    assert sum(v for d in out.values() for v in d.values()) == \
            sum(map(sys.getsizeof, [1, 2, 3, 'Hello', 'world!']))

    ss.stop()
    yield e._shutdown()
Ejemplo n.º 7
0
def test__futures_to_collection(s, a, b):
    e = Executor((s.ip, s.port), start=False)
    yield e._start()

    remote_dfs = e.map(identity, dfs)
    ddf = yield _futures_to_collection(remote_dfs, divisions=True)
    ddf2 = yield _futures_to_dask_dataframe(remote_dfs, divisions=True)
    assert isinstance(ddf, dd.DataFrame)

    assert ddf.dask == ddf2.dask

    remote_arrays = e.map(np.arange, range(3, 5))
    x = yield _futures_to_collection(remote_arrays)
    y = yield _futures_to_dask_array(remote_arrays)

    assert type(x) == type(y)
    assert x.dask == y.dask

    remote_lists = yield e._scatter([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
    b = yield _futures_to_collection(remote_lists)
    c = yield _futures_to_dask_bag(remote_lists)

    assert type(b) == type(c)
    assert b.dask == b.dask

    yield e._shutdown()
Ejemplo n.º 8
0
def test__futures_to_dask_bag(s, a, b):
    import dask.bag as db
    e = Executor((s.ip, s.port), start=False)
    yield e._start()

    L = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
    futures = yield e._scatter(L)

    rb = yield _futures_to_dask_bag(futures)
    assert isinstance(rb, db.Bag)
    assert rb.npartitions == len(L)

    lb = db.from_sequence([1, 2, 3, 4, 5, 6, 7, 8, 9], npartitions=3)

    exprs = [
        lambda x: x.map(lambda x: x + 1).sum(),
        lambda x: x.filter(lambda x: x % 2)
    ]

    for expr in exprs:
        local = expr(lb).compute(get=dask.get)
        remote = e.compute(expr(rb))
        remote = yield remote._result()

        assert local == remote

    yield e._shutdown()
Ejemplo n.º 9
0
    def f(c, a, b):
        e = Executor((c.ip, c.port), start=False, loop=loop)
        yield e._start()

        x_dsk = {('x', i, j): np.random.random((3, 3)) for i in range(3)
                                                       for j in range(2)}
        y_dsk = {('y', i, j): np.random.random((3, 3)) for i in range(2)
                                                       for j in range(3)}
        x_futures = yield e._scatter(x_dsk)
        y_futures = yield e._scatter(y_dsk)

        dt = np.random.random(0).dtype
        x_local = da.Array(x_dsk, 'x', ((3, 3, 3), (3, 3)), dt)
        y_local = da.Array(y_dsk, 'y', ((3, 3), (3, 3, 3)), dt)

        x_remote = da.Array(x_futures, 'x', ((3, 3, 3), (3, 3)), dt)
        y_remote = da.Array(y_futures, 'y', ((3, 3), (3, 3, 3)), dt)

        exprs = [lambda x, y: x.T + y,
                 lambda x, y: x.mean() + y.mean(),
                 lambda x, y: x.dot(y).std(axis=0),
                 lambda x, y: x - x.mean(axis=1)[:, None]]

        for expr in exprs:
            local = expr(x_local, y_local)
            local_results = dask.get(local.dask, local._keys())
            local_result = da.Array._finalize(local, local_results)

            remote = expr(x_remote, y_remote)
            remote_results = yield e._get(remote.dask, remote._keys())
            remote_result = da.Array._finalize(remote, remote_results)

            assert np.all(local_result == remote_result)

        yield e._shutdown()
Ejemplo n.º 10
0
def test_write_bytes(s, a, b):
    with make_hdfs() as hdfs:
        e = Executor((s.ip, s.port), start=False)
        yield e._start()

        data = [b'123', b'456', b'789']
        remote_data = yield e._scatter(data)

        futures = write_bytes('/tmp/test/data/file.*.dat', remote_data, hdfs=hdfs)
        yield _wait(futures)

        assert len(hdfs.ls('/tmp/test/data/')) == 3
        with hdfs.open('/tmp/test/data/file.1.dat') as f:
            assert f.read() == b'456'


        futures = write_bytes('/tmp/test/data2/', remote_data, hdfs=hdfs)
        yield _wait(futures)

        assert len(hdfs.ls('/tmp/test/data2/')) == 3
Ejemplo n.º 11
0
def test_write_binary(s, a, b):
    with make_hdfs() as hdfs:
        e = Executor((s.ip, s.port), start=False)
        yield e._start()

        data = [b'123', b'456', b'789']
        remote_data = yield e._scatter(data)

        futures = write_binary('/tmp/test/data/file.*.dat',
                               remote_data,
                               hdfs=hdfs)
        yield _wait(futures)

        assert len(hdfs.ls('/tmp/test/data/')) == 3
        with hdfs.open('/tmp/test/data/file.1.dat') as f:
            assert f.read() == b'456'

        futures = write_binary('/tmp/test/data2/', remote_data, hdfs=hdfs)
        yield _wait(futures)

        assert len(hdfs.ls('/tmp/test/data2/')) == 3