Beispiel #1
0
def test_write_bytes(e, s, a, b):
    with make_hdfs() as hdfs:
        data = [b'123', b'456', b'789']
        remote_data = yield e._scatter(data)

        futures = write_bytes('/tmp/test/data/file.*.dat', remote_data, hdfs=hdfs)
        yield _wait(futures)

        assert len(hdfs.ls('/tmp/test/data/')) == 3
        with hdfs.open('/tmp/test/data/file.1.dat') as f:
            assert f.read() == b'456'

        futures = write_bytes('/tmp/test/data2/', remote_data, hdfs=hdfs)
        yield _wait(futures)

        assert len(hdfs.ls('/tmp/test/data2/')) == 3
Beispiel #2
0
def test_write_bytes(e, s, a, b):
    with make_hdfs() as hdfs:
        data = [b'123', b'456', b'789']
        remote_data = yield e._scatter(data)

        futures = write_bytes('/tmp/test/data/file.*.dat', remote_data, hdfs=hdfs)
        yield _wait(futures)

        assert len(hdfs.ls('/tmp/test/data/')) == 3
        with hdfs.open('/tmp/test/data/file.1.dat') as f:
            assert f.read() == b'456'

        futures = write_bytes('/tmp/test/data2/', remote_data, hdfs=hdfs)
        yield _wait(futures)

        assert len(hdfs.ls('/tmp/test/data2/')) == 3
Beispiel #3
0
def test_write_bytes(s3):
    paths = ['s3://' + test_bucket_name + '/more/' + f for f in files]
    values = [delayed(v) for v in files.values()]
    out = core.write_bytes(values, paths)
    compute(*out)
    sample, values = read_bytes('s3://' + test_bucket_name + '/more/test/accounts.*')
    results = compute(*concat(values))
    assert set(list(files.values())) == set(results)
def test_write_bytes(s3):
    paths = ['s3://' + test_bucket_name + '/more/' + f for f in files]
    values = [delayed(v) for v in files.values()]
    out = core.write_bytes(values, paths)
    compute(*out)
    sample, values = read_bytes('s3://' + test_bucket_name + '/more/test/accounts.*')
    results = compute(*concat(values))
    assert set(list(files.values())) == set(results)
Beispiel #5
0
def test_write_bytes(hdfs):
    path = 'hdfs://%s/' % basedir
    data = [b'test data %i' % i for i in range(5)]
    values = write_bytes([delayed(d) for d in data], path)
    dask.compute(values)
    assert len(hdfs.ls(basedir)) == 5

    sample, vals = read_bytes('hdfs://%s/*.part' % basedir)
    (results,) = dask.compute(list(concat(vals)))
    assert data == results
Beispiel #6
0
def test_simple_write(tmpdir):
    tmpdir = str(tmpdir)
    make_bytes = lambda: b'000'
    some_bytes = delayed(make_bytes)()
    data = [some_bytes, some_bytes]
    out = write_bytes(data, tmpdir)
    assert len(out) == 2
    compute(*out)
    files = os.listdir(tmpdir)
    assert len(files) == 2
    assert '0.part' in files
    d = open(os.path.join(tmpdir, files[0]), 'rb').read()
    assert d == b'000'
Beispiel #7
0
def test_simple_write(tmpdir):
    tmpdir = str(tmpdir)
    make_bytes = lambda: b'000'
    some_bytes = delayed(make_bytes)()
    data = [some_bytes, some_bytes]
    out = write_bytes(data, tmpdir)
    assert len(out) == 2
    compute(*out)
    files = os.listdir(tmpdir)
    assert len(files) == 2
    assert '0.part' in files
    d = open(os.path.join(tmpdir, files[0]), 'rb').read()
    assert d == b'000'
Beispiel #8
0
def test_write_bytes(c, s, a, b):
    with make_hdfs() as (hdfs, basedir):
        hdfs.mkdir('%s/data/' % basedir)
        data = [b'123', b'456', b'789']
        remote_data = yield c._scatter(data)

        futures = c.compute(
            write_bytes(remote_data, 'hdfs://%s/data/file.*.dat' % basedir))
        yield _wait(futures)

        yield futures[0]

        assert len(hdfs.ls('%s/data/' % basedir)) == 3
        with hdfs.open('%s/data/file.1.dat' % basedir) as f:
            assert f.read() == b'456'

        hdfs.mkdir('%s/data2/' % basedir)
        futures = c.compute(
            write_bytes(remote_data, 'hdfs://%s/data2/' % basedir))
        yield _wait(futures)

        assert len(hdfs.ls('%s/data2/' % basedir)) == 3
Beispiel #9
0
def test_write_bytes(c, s, a, b):
    with make_hdfs() as (hdfs, basedir):
        hdfs.mkdir('%s/data/' % basedir)
        data = [b'123', b'456', b'789']
        remote_data = yield c._scatter(data)

        futures = c.compute(write_bytes(remote_data,
            'hdfs://%s/data/file.*.dat' % basedir))
        yield _wait(futures)

        yield futures[0]._result()

        assert len(hdfs.ls('%s/data/' % basedir)) == 3
        with hdfs.open('%s/data/file.1.dat' % basedir) as f:
            assert f.read() == b'456'

        hdfs.mkdir('%s/data2/' % basedir)
        futures = c.compute(write_bytes(remote_data,
            'hdfs://%s/data2/' % basedir))
        yield _wait(futures)

        assert len(hdfs.ls('%s/data2/' % basedir)) == 3
Beispiel #10
0
def test_write_bytes(c, s, a, b):
    with make_hdfs() as hdfs:
        hdfs.mkdir('/tmp/test/data/')
        data = [b'123', b'456', b'789']
        remote_data = yield c._scatter(data)

        futures = c.compute(
            write_bytes(remote_data, 'hdfs:///tmp/test/data/file.*.dat'))
        yield _wait(futures)

        yield futures[0]._result()

        assert len(hdfs.ls('/tmp/test/data/')) == 3
        with hdfs.open('/tmp/test/data/file.1.dat') as f:
            assert f.read() == b'456'

        hdfs.mkdir('/tmp/test/data2/')
        futures = c.compute(write_bytes(remote_data,
                                        'hdfs:///tmp/test/data2/'))
        yield _wait(futures)

        assert len(hdfs.ls('/tmp/test/data2/')) == 3
Beispiel #11
0
def test_compressed_write(tmpdir):
    tmpdir = str(tmpdir)
    make_bytes = lambda: b'000'
    some_bytes = delayed(make_bytes)()
    data = [some_bytes, some_bytes]
    out = write_bytes(data, os.path.join(tmpdir, 'bytes-*.gz'),
                      compression='gzip')
    compute(*out)
    files = os.listdir(tmpdir)
    assert len(files) == 2
    assert 'bytes-0.gz' in files
    import gzip
    d = gzip.GzipFile(os.path.join(tmpdir, files[0])).read()
    assert d == b'000'
Beispiel #12
0
def test_write_bytes_2(c, s, a, b):
    with make_hdfs() as (hdfs, basedir):
        path = 'hdfs://%s/' % basedir
        data = [b'test data %i' % i for i in range(5)]
        values = [delayed(d) for d in data]
        out = write_bytes(values, path)
        futures = c.compute(out)
        results = yield c._gather(futures)
        assert len(hdfs.ls(basedir)) == 5

        sample, vals = read_bytes('hdfs://%s/*.part' % basedir)
        futures = c.compute(list(concat(vals)))
        results = yield c._gather(futures)
        assert data == results
def test_compressed_write(tmpdir):
    tmpdir = str(tmpdir)
    make_bytes = lambda: b'000'
    some_bytes = delayed(make_bytes)()
    data = [some_bytes, some_bytes]
    out = write_bytes(data, os.path.join(tmpdir, 'bytes-*.gz'),
                      compression='gzip')
    compute(*out)
    files = os.listdir(tmpdir)
    assert len(files) == 2
    assert 'bytes-0.gz' in files
    import gzip
    d = gzip.GzipFile(os.path.join(tmpdir, files[0])).read()
    assert d == b'000'
Beispiel #14
0
def test_write_bytes_2(c, s, a, b):
    with make_hdfs() as (hdfs, basedir):
        path = 'hdfs://%s/' % basedir
        data = [b'test data %i' % i for i in range(5)]
        values = [delayed(d) for d in data]
        out = write_bytes(values, path)
        futures = c.compute(out)
        results = yield c._gather(futures)
        assert len(hdfs.ls(basedir)) == 5

        sample, vals = read_bytes('hdfs://%s/*.part' % basedir)
        futures = c.compute(list(concat(vals)))
        results = yield c._gather(futures)
        assert data == results
Beispiel #15
0
def test_write_bytes(e, s, a, b):
    from dask.bytes.core import write_bytes, read_bytes
    with make_hdfs() as hdfs:
        path = 'hdfs:///tmp/test/'
        data = [b'test data %i' % i for i in range(5)]
        values = [delayed(d) for d in data]
        out = write_bytes(values, path, hdfs=hdfs)
        futures = e.compute(out)
        results = yield e._gather(futures)
        assert len(hdfs.ls('/tmp/test/')) == 5

        sample, vals = read_bytes('hdfs:///tmp/test/*.part',
                                    hdfs=hdfs, lazy=True)
        futures = e.compute(vals)
        results = yield e._gather(futures)
        assert data == results
Beispiel #16
0
def test_write_bytes(e, s, a, b):
    from dask.bytes.core import write_bytes, read_bytes
    with make_hdfs() as hdfs:
        path = 'hdfs:///tmp/test/'
        data = [b'test data %i' % i for i in range(5)]
        values = [delayed(d) for d in data]
        out = write_bytes(values, path, hdfs=hdfs)
        futures = e.compute(out)
        results = yield e._gather(futures)
        assert len(hdfs.ls('/tmp/test/')) == 5

        sample, vals = read_bytes('hdfs:///tmp/test/*.part',
                                    hdfs=hdfs, lazy=True)
        futures = e.compute(vals)
        results = yield e._gather(futures)
        assert data == results