Ejemplo n.º 1
0
def test_read_bytes_delimited(e, s, a, b):
    for bs in [5, 15, 45, 1500]:
        futures = read_bytes(test_bucket_name+'/test/accounts*',
                             lazy=False, blocksize=bs, delimiter=b'\n')
        futures2 = read_bytes(test_bucket_name+'/test/accounts*',
                             lazy=False, blocksize=bs, delimiter=b'foo')
        assert [a.key for a in futures] != [b.key for b in futures2]
        results = yield e._gather(futures)
        res = [r for r in results if r]
        assert all(r.endswith(b'\n') for r in res)
        ourlines = b''.join(res).split(b'\n')
        testlines = b"".join(files[k] for k in sorted(files)).split(b'\n')
        assert ourlines == testlines

        # delimiter not at the end
        d = b'}'
        futures = read_bytes(test_bucket_name+'/test/accounts*',
                             lazy=False, blocksize=bs, delimiter=d)
        results = yield e._gather(futures)
        res = [r for r in results if r]
        # All should end in } except EOF
        assert sum(r.endswith(b'}') for r in res) == len(res) - 2
        ours = b"".join(res)
        test = b"".join(files[v] for v in sorted(files))
        assert ours == test
Ejemplo n.º 2
0
def test_read_bytes_blocksize_on_large_data(e, s, a, b):
    L = read_bytes('dask-data/nyc-taxi/2015/yellow_tripdata_2015-01.csv',
                    lazy=True, blocksize=None)
    assert len(L) == 1

    L = read_bytes('dask-data/nyc-taxi/2014/*.csv', lazy=True, blocksize=None)
    assert len(L) == 12
Ejemplo n.º 3
0
def test_read_bytes_delimited(e, s, a, b):
    for bs in [5, 15, 45, 1500]:
        futures = read_bytes(test_bucket_name+'/test/accounts*',
                             lazy=False, blocksize=bs, delimiter=b'\n')
        futures2 = read_bytes(test_bucket_name+'/test/accounts*',
                             lazy=False, blocksize=bs, delimiter=b'foo')
        assert [a.key for a in futures] != [b.key for b in futures2]
        results = yield e._gather(futures)
        res = [r for r in results if r]
        assert all(r.endswith(b'\n') for r in res)
        ourlines = b''.join(res).split(b'\n')
        testlines = b"".join(files[k] for k in sorted(files)).split(b'\n')
        assert ourlines == testlines

        # delimiter not at the end
        d = b'}'
        futures = read_bytes(test_bucket_name+'/test/accounts*',
                             lazy=False, blocksize=bs, delimiter=d)
        results = yield e._gather(futures)
        res = [r for r in results if r]
        # All should end in } except EOF
        assert sum(r.endswith(b'}') for r in res) == len(res) - 2
        ours = b"".join(res)
        test = b"".join(files[v] for v in sorted(files))
        assert ours == test
Ejemplo n.º 4
0
def test_read_bytes_blocksize_on_large_data(e, s, a, b):
    L = read_bytes('dask-data/nyc-taxi/2015/yellow_tripdata_2015-01.csv',
                    lazy=True, blocksize=None)
    assert len(L) == 1

    L = read_bytes('dask-data/nyc-taxi/2014/*.csv', lazy=True, blocksize=None)
    assert len(L) == 12
Ejemplo n.º 5
0
def test_read_bytes_block(e, s, a, b):
    for bs in [5, 15, 45, 1500]:
        vals = read_bytes(test_bucket_name+'/test/account*', blocksize=bs)
        assert len(vals) == sum([(len(v) // bs + 1) for v in files.values()])
        futures = e.compute(vals)
        results = yield e._gather(futures)
        assert sum(len(r) for r in results) == sum(len(v) for v in
                   files.values())
        futures = read_bytes(test_bucket_name+'/test/accounts*', blocksize=bs,
                             lazy=False)
        assert len(vals) == len(futures)
        results = yield e._gather(futures)
        assert sum(len(r) for r in results) == sum(len(v) for v in
                   files.values())
        ourlines = b"".join(results).split(b'\n')
        testlines = b"".join(files.values()).split(b'\n')
        assert set(ourlines) == set(testlines)
Ejemplo n.º 6
0
def test_read_bytes_lazy(e, s, a, b):
    values = read_bytes(test_bucket_name+'/test/', lazy=True)
    assert all(isinstance(v, Value) for v in values)

    results = e.compute(values, sync=False)
    results = yield e._gather(results)

    assert set(results).issuperset(set(files.values()))
Ejemplo n.º 7
0
def test_read_bytes_block(e, s, a, b):
    for bs in [5, 15, 45, 1500]:
        vals = read_bytes(test_bucket_name+'/test/account*', blocksize=bs)
        assert len(vals) == sum([(len(v) // bs + 1) for v in files.values()])
        futures = e.compute(vals)
        results = yield e._gather(futures)
        assert sum(len(r) for r in results) == sum(len(v) for v in
                   files.values())
        futures = read_bytes(test_bucket_name+'/test/accounts*', blocksize=bs,
                             lazy=False)
        assert len(vals) == len(futures)
        results = yield e._gather(futures)
        assert sum(len(r) for r in results) == sum(len(v) for v in
                   files.values())
        ourlines = b"".join(results).split(b'\n')
        testlines = b"".join(files.values()).split(b'\n')
        assert set(ourlines) == set(testlines)
Ejemplo n.º 8
0
def test_read_bytes_lazy(e, s, a, b):
    values = read_bytes(test_bucket_name+'/test/', lazy=True)
    assert all(isinstance(v, Value) for v in values)

    results = e.compute(values, sync=False)
    results = yield e._gather(results)

    assert set(results).issuperset(set(files.values()))
Ejemplo n.º 9
0
def test_read_bytes(s, a, b):
    e = Executor((s.ip, s.port), start=False)
    yield e._start()

    futures = read_bytes(test_bucket_name, prefix='test/', anon=True)
    assert len(futures) >= len(files)
    results = yield e._gather(futures)
    assert set(results).issuperset(set(files.values()))

    yield e._shutdown()
Ejemplo n.º 10
0
def test_read_bytes_lazy(s, a, b):
    e = Executor((s.ip, s.port), start=False)
    yield e._start()

    values = read_bytes(test_bucket_name, 'test/', lazy=True, anon=True)
    assert all(isinstance(v, Value) for v in values)

    results = e.compute(values, sync=False)
    results = yield e._gather(results)

    assert set(results).issuperset(set(files.values()))

    yield e._shutdown()
Ejemplo n.º 11
0
def test_read_bytes(s, a, b):
    e = Executor((s.ip, s.port), start=False)
    yield e._start()

    futures = read_bytes(test_bucket_name,
                         prefix='test/',
                         anon=True,
                         lazy=False)
    assert len(futures) >= len(files)
    results = yield e._gather(futures)
    assert set(results).issuperset(set(files.values()))

    yield e._shutdown()
Ejemplo n.º 12
0
def test_read_bytes_lazy(s, a, b):
    e = Executor((s.ip, s.port), start=False)
    yield e._start()

    values = read_bytes(test_bucket_name, 'test/', lazy=True, anon=True)
    assert all(isinstance(v, Value) for v in values)

    results = e.compute(values, sync=False)
    results = yield e._gather(results)

    assert set(results).issuperset(set(files.values()))

    yield e._shutdown()
Ejemplo n.º 13
0
def test_read_bytes_blocksize_none(e, s, a, b):
    futures = read_bytes(test_bucket_name+'/test/accounts.*', lazy=False,
                         blocksize=None)
    assert len(futures) == len(files)
Ejemplo n.º 14
0
def test_read_bytes(e, s, a, b):
    futures = read_bytes(test_bucket_name+'/test/accounts.*', lazy=False)
    assert len(futures) >= len(files)
    results = yield e._gather(futures)
    assert set(results) == set(files.values())
Ejemplo n.º 15
0
def test_read_bytes_blocksize_none(e, s, a, b):
    futures = read_bytes(test_bucket_name+'/test/accounts.*', lazy=False,
                         blocksize=None)
    assert len(futures) == len(files)
Ejemplo n.º 16
0
def test_read_bytes(e, s, a, b):
    futures = read_bytes(test_bucket_name+'/test/accounts.*', lazy=False)
    assert len(futures) >= len(files)
    results = yield e._gather(futures)
    assert set(results) == set(files.values())