def test__read_text(e, s, a, b): with make_hdfs() as hdfs: with hdfs.open('/tmp/test/text.1.txt', 'wb') as f: f.write('Alice 100\nBob 200\nCharlie 300'.encode()) with hdfs.open('/tmp/test/text.2.txt', 'wb') as f: f.write('Dan 400\nEdith 500\nFrank 600'.encode()) with hdfs.open('/tmp/test/other.txt', 'wb') as f: f.write('a b\nc d'.encode()) b = read_text('/tmp/test/text.*.txt', collection=True, lazy=True) yield gen.sleep(0.5) assert not s.tasks future = e.compute(b.str.strip().str.split().map(len)) result = yield future._result() assert result == [2, 2, 2, 2, 2, 2] b = read_text('/tmp/test/other.txt', collection=True, lazy=False) future = e.compute(b.str.split().concat()) result = yield future._result() assert result == ['a', 'b', 'c', 'd'] L = read_text('/tmp/test/text.*.txt', collection=False, lazy=False) assert all(isinstance(x, Future) for x in L) L = read_text('/tmp/test/text.*.txt', collection=False, lazy=True) assert all(isinstance(x, Value) for x in L)
def test_read_text_sync(loop): with make_hdfs() as hdfs: with hdfs.open('/tmp/test/data.txt', 'wb') as f: f.write(b'hello\nworld') with cluster(nworkers=3) as (s, [a, b, c]): with Executor(('127.0.0.1', s['port']), loop=loop) as e: b = read_text('/tmp/test/*.txt', lazy=False) assert list(b.str.upper()) == ['HELLO', 'WORLD']
def test__read_text_json_endline(e, s, a): import json with make_hdfs() as hdfs: with hdfs.open('/tmp/test/text.1.txt', 'wb') as f: f.write(b'{"x": 1}\n{"x": 2}\n') b = read_text('/tmp/test/text.1.txt').map(json.loads) result = yield e.compute(b)._result() assert result == [{"x": 1}, {"x": 2}]
def test__read_text_unicode(e, s, a, b): fn = '/tmp/test/data.txt' data = b'abcd\xc3\xa9' with make_hdfs() as hdfs: with hdfs.open(fn, 'wb') as f: f.write(b'\n'.join([data, data])) f = read_text(fn, collection=False, lazy=False) result = yield f[0]._result() assert len(result) == 2 assert list(map(unicode.strip, result)) == [data.decode('utf-8')] * 2 assert len(result[0]) == 5