예제 #1
0
파일: test_bag.py 프로젝트: rla3rd/dask
def test_from_filenames_large_gzip():
    with tmpfile('gz') as fn:
        f = GzipFile(fn, 'wb')
        f.write(b'Hello, world!\n' * 100)
        f.close()

        b = db.from_filenames(fn, chunkbytes=100, linesep='\n')
        c = db.from_filenames(fn, linesep='\n')
        assert len(b.dask) > 5
        assert list(b) == list(c)
예제 #2
0
파일: test_bag.py 프로젝트: dukebody/dask
def test_read_text_large_gzip():
    with tmpfile('gz') as fn:
        f = GzipFile(fn, 'wb')
        f.write(b'Hello, world!\n' * 100)
        f.close()

        with pytest.raises(ValueError):
            b = db.read_text(fn, blocksize=100, linedelimiter='\n')

        c = db.read_text(fn)
        assert c.npartitions == 1
예제 #3
0
파일: test_bag.py 프로젝트: jrenner/dask
def test_from_filenames_large_gzip():
    with tmpfile('gz') as fn:
        f = GzipFile(fn, 'wb')
        f.write(b'Hello, world!\n' * 100)
        f.close()

        b = db.from_filenames(fn, chunkbytes=100, linesep='\n')
        c = db.from_filenames(fn, linesep='\n')
        assert len(b.dask) > 5
        assert list(b) == list(c)
예제 #4
0
파일: test_bag.py 프로젝트: jrenner/dask
def test_stream_decompress():
    data = 'abc\ndef\n123'.encode()
    assert [s.strip() for s in stream_decompress('', data)] == \
            ['abc', 'def', '123']
    assert [s.strip() for s in stream_decompress('bz2', bz2.compress(data))] == \
            ['abc', 'def', '123']
    with tmpfile() as fn:
        f = GzipFile(fn, 'wb')
        f.write(data)
        f.close()
        with open(fn, 'rb') as f:
            compressed = f.read()
    assert [s.strip() for s in stream_decompress('gz', compressed)] == \
            [b'abc', b'def', b'123']
예제 #5
0
파일: test_bag.py 프로젝트: serazing/dask
def test_read_text_large_gzip():
    with tmpfile('gz') as fn:
        f = GzipFile(fn, 'wb')
        f.write(b'Hello, world!\n' * 100)
        f.close()

        with pytest.raises(ValueError):
            db.read_text(fn, blocksize=100, linedelimiter='\n')

        c = db.read_text(fn)
        assert c.npartitions == 1
예제 #6
0
def test_read_text_large_gzip():
    with tmpfile("gz") as fn:
        f = GzipFile(fn, "wb")
        f.write(b"Hello, world!\n" * 100)
        f.close()

        with pytest.raises(ValueError):
            db.read_text(fn, blocksize=50, linedelimiter="\n")

        c = db.read_text(fn)
        assert c.npartitions == 1
예제 #7
0
파일: test_bag.py 프로젝트: rla3rd/dask
def test_stream_decompress():
    data = 'abc\ndef\n123'.encode()
    assert [s.strip() for s in stream_decompress('', data)] == \
            ['abc', 'def', '123']
    assert [s.strip() for s in stream_decompress('bz2', bz2.compress(data))] == \
            ['abc', 'def', '123']
    with tmpfile() as fn:
        f = GzipFile(fn, 'wb')
        f.write(data)
        f.close()
        with open(fn, 'rb') as f:
            compressed = f.read()
    assert [s.strip() for s in stream_decompress('gz', compressed)] == \
            [b'abc', b'def', b'123']
예제 #8
0
파일: test_bag.py 프로젝트: xvr-hlt/dask
def test_read_text_large_gzip():
    with tmpfile("gz") as fn:
        data = b"Hello, world!\n" * 100
        f = GzipFile(fn, "wb")
        f.write(data)
        f.close()

        with pytest.raises(ValueError):
            # not allowed blocks when compressed
            db.read_text(fn, blocksize=50, linedelimiter="\n")

        c = db.read_text(fn, blocksize=None)
        assert c.npartitions == 1
        assert "".join(c.compute()) == data.decode()