def test_get_block_locations(): with make_hdfs() as hdfs: data = b'a' * int(1e8) # todo: reduce block size to speed up test fn_1 = '/tmp/test/file1' fn_2 = '/tmp/test/file2' with hdfs.open(fn_1, 'w', repl=1) as f: f.write(data) with hdfs.open(fn_2, 'w', repl=1) as f: f.write(data) L = get_block_locations(hdfs, '/tmp/test/') assert L == get_block_locations(hdfs, fn_1) + get_block_locations(hdfs, fn_2) assert L[0]['filename'] == L[1]['filename'] == fn_1 assert L[2]['filename'] == L[3]['filename'] == fn_2
def test_get_block_locations(): with make_hdfs() as hdfs: data = b'a' * int(1e8) # todo: reduce block size to speed up test fn_1 = '/tmp/test/file1' fn_2 = '/tmp/test/file2' with hdfs.open(fn_1, 'w', repl=1) as f: f.write(data) with hdfs.open(fn_2, 'w', repl=1) as f: f.write(data) L = get_block_locations(hdfs, '/tmp/test/') assert L == get_block_locations(hdfs, fn_1) + get_block_locations( hdfs, fn_2) assert L[0]['filename'] == L[1]['filename'] == fn_1 assert L[2]['filename'] == L[3]['filename'] == fn_2
def test_get_block_locations_nested(): with make_hdfs() as hdfs: data = b'a' for i in range(3): hdfs.mkdir('/tmp/test/data-%d' % i) for j in range(2): fn = '/tmp/test/data-%d/file-%d.csv' % (i, j) with hdfs.open(fn, 'wb', replication=1) as f: f.write(data) L = get_block_locations(hdfs, '/tmp/test/') assert len(L) == 6
def test_get_block_locations_nested(): with make_hdfs() as hdfs: data = b'a' for i in range(3): hdfs.mkdir('/tmp/test/data-%d' % i) for j in range(2): fn = '/tmp/test/data-%d/file-%d.csv' % (i, j) with hdfs.open(fn, 'w', repl=1) as f: f.write(data) L = get_block_locations(hdfs, '/tmp/test/') assert len(L) == 6
def test_get_block_locations_nested(e, s, a, b): with make_hdfs() as hdfs: data = b'a' for i in range(3): hdfs.mkdir('/tmp/test/data-%d' % i) for j in range(2): fn = '/tmp/test/data-%d/file-%d.csv' % (i, j) with hdfs.open(fn, 'wb', replication=1) as f: f.write(data) L = get_block_locations(hdfs, '/tmp/test/') assert len(L) == 6 futures = read_bytes('/tmp/test/', hdfs=hdfs, lazy=False) results = yield e._gather(futures) assert len(results) == 6 assert all(x == b'a' for x in results)
def test_get_block_locations_nested(s, a, b): with make_hdfs() as hdfs: data = b'a' for i in range(3): hdfs.mkdir('/tmp/test/data-%d' % i) for j in range(2): fn = '/tmp/test/data-%d/file-%d.csv' % (i, j) with hdfs.open(fn, 'w', repl=1) as f: f.write(data) L = get_block_locations(hdfs, '/tmp/test/') assert len(L) == 6 e = Executor((s.ip, s.port), start=False) yield e._start() futures = read_bytes('/tmp/test/', hdfs=hdfs) results = yield e._gather(futures) assert len(results) == 6 assert all(x == b'a' for x in results)
def test_get_block_locations_nested(s, a, b): with make_hdfs() as hdfs: data = b'a' for i in range(3): hdfs.mkdir('/tmp/test/data-%d' % i) for j in range(2): fn = '/tmp/test/data-%d/file-%d.csv' % (i, j) with hdfs.open(fn, 'w', repl=1) as f: f.write(data) L = get_block_locations(hdfs, '/tmp/test/') assert len(L) == 6 e = Executor((s.ip, s.port), start=False) yield e._start() futures = read_binary('/tmp/test/', hdfs=hdfs) results = yield e._gather(futures) assert len(results) == 6 assert all(x == b'a' for x in results)