def test_write_bytes(e, s, a, b): with make_hdfs() as hdfs: data = [b'123', b'456', b'789'] remote_data = yield e._scatter(data) futures = write_bytes('/tmp/test/data/file.*.dat', remote_data, hdfs=hdfs) yield _wait(futures) assert len(hdfs.ls('/tmp/test/data/')) == 3 with hdfs.open('/tmp/test/data/file.1.dat') as f: assert f.read() == b'456' futures = write_bytes('/tmp/test/data2/', remote_data, hdfs=hdfs) yield _wait(futures) assert len(hdfs.ls('/tmp/test/data2/')) == 3
def test_write_bytes(e, s, a, b): with make_hdfs() as hdfs: data = [b'123', b'456', b'789'] remote_data = yield e._scatter(data) futures = write_bytes('/tmp/test/data/file.*.dat', remote_data, hdfs=hdfs) yield _wait(futures) assert len(hdfs.ls('/tmp/test/data/')) == 3 with hdfs.open('/tmp/test/data/file.1.dat') as f: assert f.read() == b'456' futures = write_bytes('/tmp/test/data2/', remote_data, hdfs=hdfs) yield _wait(futures) assert len(hdfs.ls('/tmp/test/data2/')) == 3
def test_write_bytes(s3): paths = ['s3://' + test_bucket_name + '/more/' + f for f in files] values = [delayed(v) for v in files.values()] out = core.write_bytes(values, paths) compute(*out) sample, values = read_bytes('s3://' + test_bucket_name + '/more/test/accounts.*') results = compute(*concat(values)) assert set(list(files.values())) == set(results)
def test_write_bytes(s3): paths = ['s3://' + test_bucket_name + '/more/' + f for f in files] values = [delayed(v) for v in files.values()] out = core.write_bytes(values, paths) compute(*out) sample, values = read_bytes('s3://' + test_bucket_name + '/more/test/accounts.*') results = compute(*concat(values)) assert set(list(files.values())) == set(results)
def test_write_bytes(hdfs): path = 'hdfs://%s/' % basedir data = [b'test data %i' % i for i in range(5)] values = write_bytes([delayed(d) for d in data], path) dask.compute(values) assert len(hdfs.ls(basedir)) == 5 sample, vals = read_bytes('hdfs://%s/*.part' % basedir) (results,) = dask.compute(list(concat(vals))) assert data == results
def test_simple_write(tmpdir): tmpdir = str(tmpdir) make_bytes = lambda: b'000' some_bytes = delayed(make_bytes)() data = [some_bytes, some_bytes] out = write_bytes(data, tmpdir) assert len(out) == 2 compute(*out) files = os.listdir(tmpdir) assert len(files) == 2 assert '0.part' in files d = open(os.path.join(tmpdir, files[0]), 'rb').read() assert d == b'000'
def test_simple_write(tmpdir): tmpdir = str(tmpdir) make_bytes = lambda: b'000' some_bytes = delayed(make_bytes)() data = [some_bytes, some_bytes] out = write_bytes(data, tmpdir) assert len(out) == 2 compute(*out) files = os.listdir(tmpdir) assert len(files) == 2 assert '0.part' in files d = open(os.path.join(tmpdir, files[0]), 'rb').read() assert d == b'000'
def test_write_bytes(c, s, a, b): with make_hdfs() as (hdfs, basedir): hdfs.mkdir('%s/data/' % basedir) data = [b'123', b'456', b'789'] remote_data = yield c._scatter(data) futures = c.compute( write_bytes(remote_data, 'hdfs://%s/data/file.*.dat' % basedir)) yield _wait(futures) yield futures[0] assert len(hdfs.ls('%s/data/' % basedir)) == 3 with hdfs.open('%s/data/file.1.dat' % basedir) as f: assert f.read() == b'456' hdfs.mkdir('%s/data2/' % basedir) futures = c.compute( write_bytes(remote_data, 'hdfs://%s/data2/' % basedir)) yield _wait(futures) assert len(hdfs.ls('%s/data2/' % basedir)) == 3
def test_write_bytes(c, s, a, b): with make_hdfs() as (hdfs, basedir): hdfs.mkdir('%s/data/' % basedir) data = [b'123', b'456', b'789'] remote_data = yield c._scatter(data) futures = c.compute(write_bytes(remote_data, 'hdfs://%s/data/file.*.dat' % basedir)) yield _wait(futures) yield futures[0]._result() assert len(hdfs.ls('%s/data/' % basedir)) == 3 with hdfs.open('%s/data/file.1.dat' % basedir) as f: assert f.read() == b'456' hdfs.mkdir('%s/data2/' % basedir) futures = c.compute(write_bytes(remote_data, 'hdfs://%s/data2/' % basedir)) yield _wait(futures) assert len(hdfs.ls('%s/data2/' % basedir)) == 3
def test_write_bytes(c, s, a, b): with make_hdfs() as hdfs: hdfs.mkdir('/tmp/test/data/') data = [b'123', b'456', b'789'] remote_data = yield c._scatter(data) futures = c.compute( write_bytes(remote_data, 'hdfs:///tmp/test/data/file.*.dat')) yield _wait(futures) yield futures[0]._result() assert len(hdfs.ls('/tmp/test/data/')) == 3 with hdfs.open('/tmp/test/data/file.1.dat') as f: assert f.read() == b'456' hdfs.mkdir('/tmp/test/data2/') futures = c.compute(write_bytes(remote_data, 'hdfs:///tmp/test/data2/')) yield _wait(futures) assert len(hdfs.ls('/tmp/test/data2/')) == 3
def test_compressed_write(tmpdir): tmpdir = str(tmpdir) make_bytes = lambda: b'000' some_bytes = delayed(make_bytes)() data = [some_bytes, some_bytes] out = write_bytes(data, os.path.join(tmpdir, 'bytes-*.gz'), compression='gzip') compute(*out) files = os.listdir(tmpdir) assert len(files) == 2 assert 'bytes-0.gz' in files import gzip d = gzip.GzipFile(os.path.join(tmpdir, files[0])).read() assert d == b'000'
def test_write_bytes_2(c, s, a, b): with make_hdfs() as (hdfs, basedir): path = 'hdfs://%s/' % basedir data = [b'test data %i' % i for i in range(5)] values = [delayed(d) for d in data] out = write_bytes(values, path) futures = c.compute(out) results = yield c._gather(futures) assert len(hdfs.ls(basedir)) == 5 sample, vals = read_bytes('hdfs://%s/*.part' % basedir) futures = c.compute(list(concat(vals))) results = yield c._gather(futures) assert data == results
def test_compressed_write(tmpdir): tmpdir = str(tmpdir) make_bytes = lambda: b'000' some_bytes = delayed(make_bytes)() data = [some_bytes, some_bytes] out = write_bytes(data, os.path.join(tmpdir, 'bytes-*.gz'), compression='gzip') compute(*out) files = os.listdir(tmpdir) assert len(files) == 2 assert 'bytes-0.gz' in files import gzip d = gzip.GzipFile(os.path.join(tmpdir, files[0])).read() assert d == b'000'
def test_write_bytes_2(c, s, a, b): with make_hdfs() as (hdfs, basedir): path = 'hdfs://%s/' % basedir data = [b'test data %i' % i for i in range(5)] values = [delayed(d) for d in data] out = write_bytes(values, path) futures = c.compute(out) results = yield c._gather(futures) assert len(hdfs.ls(basedir)) == 5 sample, vals = read_bytes('hdfs://%s/*.part' % basedir) futures = c.compute(list(concat(vals))) results = yield c._gather(futures) assert data == results
def test_write_bytes(e, s, a, b): from dask.bytes.core import write_bytes, read_bytes with make_hdfs() as hdfs: path = 'hdfs:///tmp/test/' data = [b'test data %i' % i for i in range(5)] values = [delayed(d) for d in data] out = write_bytes(values, path, hdfs=hdfs) futures = e.compute(out) results = yield e._gather(futures) assert len(hdfs.ls('/tmp/test/')) == 5 sample, vals = read_bytes('hdfs:///tmp/test/*.part', hdfs=hdfs, lazy=True) futures = e.compute(vals) results = yield e._gather(futures) assert data == results
def test_write_bytes(e, s, a, b): from dask.bytes.core import write_bytes, read_bytes with make_hdfs() as hdfs: path = 'hdfs:///tmp/test/' data = [b'test data %i' % i for i in range(5)] values = [delayed(d) for d in data] out = write_bytes(values, path, hdfs=hdfs) futures = e.compute(out) results = yield e._gather(futures) assert len(hdfs.ls('/tmp/test/')) == 5 sample, vals = read_bytes('hdfs:///tmp/test/*.part', hdfs=hdfs, lazy=True) futures = e.compute(vals) results = yield e._gather(futures) assert data == results