def test_from_filenames_large_gzip(): with tmpfile('gz') as fn: f = GzipFile(fn, 'wb') f.write(b'Hello, world!\n' * 100) f.close() b = db.from_filenames(fn, chunkbytes=100, linesep='\n') c = db.from_filenames(fn, linesep='\n') assert len(b.dask) > 5 assert list(b) == list(c)
def test_read_text_large_gzip(): with tmpfile('gz') as fn: f = GzipFile(fn, 'wb') f.write(b'Hello, world!\n' * 100) f.close() with pytest.raises(ValueError): b = db.read_text(fn, blocksize=100, linedelimiter='\n') c = db.read_text(fn) assert c.npartitions == 1
def test_stream_decompress(): data = 'abc\ndef\n123'.encode() assert [s.strip() for s in stream_decompress('', data)] == \ ['abc', 'def', '123'] assert [s.strip() for s in stream_decompress('bz2', bz2.compress(data))] == \ ['abc', 'def', '123'] with tmpfile() as fn: f = GzipFile(fn, 'wb') f.write(data) f.close() with open(fn, 'rb') as f: compressed = f.read() assert [s.strip() for s in stream_decompress('gz', compressed)] == \ [b'abc', b'def', b'123']
def test_read_text_large_gzip(): with tmpfile('gz') as fn: f = GzipFile(fn, 'wb') f.write(b'Hello, world!\n' * 100) f.close() with pytest.raises(ValueError): db.read_text(fn, blocksize=100, linedelimiter='\n') c = db.read_text(fn) assert c.npartitions == 1
def test_read_text_large_gzip(): with tmpfile("gz") as fn: f = GzipFile(fn, "wb") f.write(b"Hello, world!\n" * 100) f.close() with pytest.raises(ValueError): db.read_text(fn, blocksize=50, linedelimiter="\n") c = db.read_text(fn) assert c.npartitions == 1
def test_read_text_large_gzip(): with tmpfile("gz") as fn: data = b"Hello, world!\n" * 100 f = GzipFile(fn, "wb") f.write(data) f.close() with pytest.raises(ValueError): # not allowed blocks when compressed db.read_text(fn, blocksize=50, linedelimiter="\n") c = db.read_text(fn, blocksize=None) assert c.npartitions == 1 assert "".join(c.compute()) == data.decode()