Exemple #1
0
def test_read_bytes_delimited():
    with filetexts(files, mode='b'):
        for bs in [5, 15, 45, 1500]:
            _, values = read_bytes('.test.accounts*',
                                   blocksize=bs,
                                   delimiter=b'\n')
            _, values2 = read_bytes('.test.accounts*',
                                    blocksize=bs,
                                    delimiter=b'foo')
            assert ([a.key for a in concat(values)] !=
                    [b.key for b in concat(values2)])

            results = compute(*concat(values))
            res = [r for r in results if r]
            assert all(r.endswith(b'\n') for r in res)
            ourlines = b''.join(res).split(b'\n')
            testlines = b"".join(files[k] for k in sorted(files)).split(b'\n')
            assert ourlines == testlines

            # delimiter not at the end
            d = b'}'
            _, values = read_bytes('.test.accounts*',
                                   blocksize=bs,
                                   delimiter=d)
            results = compute(*concat(values))
            res = [r for r in results if r]
            # All should end in } except EOF
            assert sum(r.endswith(b'}') for r in res) == len(res) - 2
            ours = b"".join(res)
            test = b"".join(files[v] for v in sorted(files))
            assert ours == test
Exemple #2
0
def test_read_bytes_delimited():
    with filetexts(files, mode='b'):
        for bs in [5, 15, 45, 1500]:
            _, values = read_bytes('.test.accounts*',
                                    blocksize=bs, delimiter=b'\n')
            _, values2 = read_bytes('.test.accounts*',
                                    blocksize=bs, delimiter=b'foo')
            assert ([a.key for a in concat(values)] !=
                    [b.key for b in concat(values2)])

            results = compute(*concat(values))
            res = [r for r in results if r]
            assert all(r.endswith(b'\n') for r in res)
            ourlines = b''.join(res).split(b'\n')
            testlines = b"".join(files[k] for k in sorted(files)).split(b'\n')
            assert ourlines == testlines

            # delimiter not at the end
            d = b'}'
            _, values = read_bytes('.test.accounts*', blocksize=bs, delimiter=d)
            results = compute(*concat(values))
            res = [r for r in results if r]
            # All should end in } except EOF
            assert sum(r.endswith(b'}') for r in res) == len(res) - 2
            ours = b"".join(res)
            test = b"".join(files[v] for v in sorted(files))
            assert ours == test
Exemple #3
0
def test_read_bytes_sample_delimiter():
    with filetexts(files, mode='b'):
        sample, values = read_bytes('.test.accounts.*',
                                    sample=80, delimiter=b'\n')
        assert sample.endswith(b'\n')
        sample, values = read_bytes('.test.accounts.1.json',
                                    sample=80, delimiter=b'\n')
        assert sample.endswith(b'\n')
        sample, values = read_bytes('.test.accounts.1.json',
                                    sample=2, delimiter=b'\n')
        assert sample.endswith(b'\n')
Exemple #4
0
def test_read_bytes_sample_delimiter():
    with filetexts(files, mode='b'):
        sample, values = read_bytes('.test.accounts.*',
                                    sample=80,
                                    delimiter=b'\n')
        assert sample.endswith(b'\n')
        sample, values = read_bytes('.test.accounts.1.json',
                                    sample=80,
                                    delimiter=b'\n')
        assert sample.endswith(b'\n')
        sample, values = read_bytes('.test.accounts.1.json',
                                    sample=2,
                                    delimiter=b'\n')
        assert sample.endswith(b'\n')
Exemple #5
0
def test_registered_read_bytes():
    from dask.bytes.core import read_bytes
    with filetexts(files, mode='b'):
        sample, values = read_bytes('.test.accounts.*')

        results = compute(*concat(values))
        assert set(results) == set(files.values())
Exemple #6
0
def test_registered_read_bytes():
    from dask.bytes.core import read_bytes
    with filetexts(files, mode='b'):
        sample, values = read_bytes('.test.accounts.*')

        results = compute(*concat(values))
        assert set(results) == set(files.values())
Exemple #7
0
def test_names():
    with filetexts(files, mode='b'):
        _, a = read_bytes('.test.accounts.*')
        _, b = read_bytes('.test.accounts.*')
        a = list(concat(a))
        b = list(concat(b))

        assert [aa._key for aa in a] == [bb._key for bb in b]

        sleep(1)
        for fn in files:
            with open(fn, 'ab') as f:
                f.write(b'x')

        _, c = read_bytes('.test.accounts.*')
        c = list(concat(c))
        assert [aa._key for aa in a] != [cc._key for cc in c]
Exemple #8
0
def test_names():
    with filetexts(files, mode='b'):
        _, a = read_bytes('.test.accounts.*')
        _, b = read_bytes('.test.accounts.*')
        a = list(concat(a))
        b = list(concat(b))

        assert [aa._key for aa in a] == [bb._key for bb in b]

        sleep(1)
        for fn in files:
            with open(fn, 'ab') as f:
                f.write(b'x')

        _, c = read_bytes('.test.accounts.*')
        c = list(concat(c))
        assert [aa._key for aa in a] != [cc._key for cc in c]
Exemple #9
0
def test_compression(fmt, blocksize):
    compress = compression.compress[fmt]
    files2 = valmap(compress, files)
    with filetexts(files2, mode='b'):
        sample, values = read_bytes('.test.accounts.*.json',
                blocksize=blocksize, delimiter=b'\n', compression=fmt)
        assert sample[:5] == files[sorted(files)[0]][:5]

        results = compute(*concat(values))
        assert (b''.join(results) ==
                b''.join([files[k] for k in sorted(files)]))
Exemple #10
0
def test_read_bytes():
    with filetexts(files, mode='b'):
        sample, values = read_bytes('.test.accounts.*')
        assert isinstance(sample, bytes)
        assert sample[:5] == files[sorted(files)[0]][:5]

        assert isinstance(values, (list, tuple))
        assert isinstance(values[0], (list, tuple))
        assert hasattr(values[0][0], 'dask')

        assert sum(map(len, values)) >= len(files)
        results = compute(*concat(values))
        assert set(results) == set(files.values())
Exemple #11
0
def test_compression(fmt, blocksize):
    compress = compression.compress[fmt]
    files2 = valmap(compress, files)
    with filetexts(files2, mode='b'):
        sample, values = read_bytes('.test.accounts.*.json',
                                    blocksize=blocksize,
                                    delimiter=b'\n',
                                    compression=fmt)
        assert sample[:5] == files[sorted(files)[0]][:5]

        results = compute(*concat(values))
        assert (b''.join(results) == b''.join(
            [files[k] for k in sorted(files)]))
Exemple #12
0
def test_read_bytes():
    with filetexts(files, mode='b'):
        sample, values = read_bytes('.test.accounts.*')
        assert isinstance(sample, bytes)
        assert sample[:5] == files[sorted(files)[0]][:5]

        assert isinstance(values, (list, tuple))
        assert isinstance(values[0], (list, tuple))
        assert hasattr(values[0][0], 'dask')

        assert sum(map(len, values)) >= len(files)
        results = compute(*concat(values))
        assert set(results) == set(files.values())
Exemple #13
0
def test_read_bytes_block():
    with filetexts(files, mode='b'):
        for bs in [5, 15, 45, 1500]:
            sample, vals = read_bytes('.test.account*', blocksize=bs)
            assert (list(map(len, vals)) ==
                    [(len(v) // bs + 1) for v in files.values()])

            results = compute(*concat(vals))
            assert (sum(len(r) for r in results) ==
                    sum(len(v) for v in files.values()))

            ourlines = b"".join(results).split(b'\n')
            testlines = b"".join(files.values()).split(b'\n')
            assert set(ourlines) == set(testlines)
Exemple #14
0
def test_read_bytes_block():
    with filetexts(files, mode='b'):
        for bs in [5, 15, 45, 1500]:
            sample, vals = read_bytes('.test.account*', blocksize=bs)
            assert (list(map(len, vals)) == [(len(v) // bs + 1)
                                             for v in files.values()])

            results = compute(*concat(vals))
            assert (sum(len(r) for r in results) == sum(
                len(v) for v in files.values()))

            ourlines = b"".join(results).split(b'\n')
            testlines = b"".join(files.values()).split(b'\n')
            assert set(ourlines) == set(testlines)
Exemple #15
0
def test_read_bytes_blocksize_none():
    with filetexts(files, mode='b'):
        sample, values = read_bytes('.test.accounts.*', blocksize=None)
        assert sum(map(len, values)) == len(files)
Exemple #16
0
def test_not_found():
    fn = 'not-a-file'
    with pytest.raises(FileNotFoundError) as e:
        read_bytes(fn)
    assert fn in str(e)
Exemple #17
0
def test_not_found():
    fn = 'not-a-file'
    with pytest.raises(FileNotFoundError) as e:
        read_bytes(fn)
    assert fn in str(e)
Exemple #18
0
def test_read_bytes_blocksize_none():
    with filetexts(files, mode='b'):
        sample, values = read_bytes('.test.accounts.*', blocksize=None)
        assert sum(map(len, values)) == len(files)