예제 #1
0
def test_read_text(s, a, b):
    pytest.importorskip('dask.bag')
    import dask.bag as db
    from dask.imperative import Value
    e = Executor((s.ip, s.port), start=False)
    yield e._start()

    b = read_text(test_bucket_name, 'test/accounts', lazy=True,
                  collection=True, anon=True)
    assert isinstance(b, db.Bag)
    yield gen.sleep(0.2)
    assert not s.tasks

    future = e.compute(b.filter(None).map(json.loads).pluck('amount').sum())
    result = yield future._result()

    assert result == (1 + 2 + 3 + 4 + 5 + 6 + 7 + 8) * 100

    text = read_text(test_bucket_name, 'test/accounts', lazy=True,
                     collection=False, anon=True)
    assert all(isinstance(v, Value) for v in text)

    text = read_text(test_bucket_name, 'test/accounts', lazy=False,
                     collection=False, anon=True)
    assert all(isinstance(v, Future) for v in text)

    yield e._shutdown()
예제 #2
0
def test_read_text(e, s, a, b):
    import dask.bag as db

    b = read_text(test_bucket_name + '/test/accounts*',
                  lazy=True,
                  collection=True,
                  anon=True)
    assert isinstance(b, db.Bag)
    yield gen.sleep(0.2)
    assert not s.tasks

    future = e.compute(b.map(json.loads).pluck('amount').sum())
    result = yield future._result()

    assert result == (1 + 2 + 3 + 4 + 5 + 6 + 7 + 8) * 100

    text = read_text(test_bucket_name + '/test/accounts*',
                     lazy=True,
                     collection=False,
                     anon=True)
    assert all(isinstance(v, Delayed) for v in text)

    text = read_text(test_bucket_name + '/test/accounts*',
                     lazy=False,
                     collection=False,
                     anon=True)
    assert all(isinstance(v, Future) for v in text)
예제 #3
0
def test_read_text_bucket_key_inputs(loop):
    with cluster() as (s, [a, b]):
        with Executor(('127.0.0.1', s['port']), loop=loop) as e:
            a = read_text(test_bucket_name, '/text/accounts', lazy=True)
            b = read_text(test_bucket_name, 'text/accounts', lazy=True)
            c = read_text(test_bucket_name + '/text/accounts', lazy=True)

            assert a._keys() == b._keys() == c._keys()
예제 #4
0
def test_read_text_bucket_key_inputs(loop):
    with cluster() as (s, [a, b]):
        with Executor(('127.0.0.1', s['port']), loop=loop) as e:
            a = read_text(test_bucket_name, '/text/accounts', lazy=True)
            b = read_text(test_bucket_name, 'text/accounts', lazy=True)
            c = read_text(test_bucket_name + '/text/accounts', lazy=True)

            assert a._keys() == b._keys() == c._keys()
예제 #5
0
def test_read_text_blocksize(e, s, a, b):
    for bs in [20, 27, 12]:
        b = read_text(test_bucket_name + '/test/accounts*',
                      lazy=True,
                      blocksize=bs,
                      collection=True)
        assert b.npartitions == sum(ceil(len(b) / bs) for b in files.values())
예제 #6
0
def test_read_text_compression(e, s, a, b):
    b = read_text('distributed-test/csv/gzip/*', compression='gzip',
                  blocksize=None, anon=True)
    result = yield e.compute(b)._result()
    assert result == [line + '\n' for k in sorted(csv_files)
                                  for line in csv_files[k].decode().split('\n')
                                  if line]
예제 #7
0
def test_read_text_compression(e, s, a, b):
    b = read_text('distributed-test/csv/gzip/*',
                  compression='gzip',
                  blocksize=None)
    result = yield e.compute(b)._result()
    assert result == [
        line + '\n' for k in sorted(csv_files)
        for line in csv_files[k].decode().split('\n') if line
    ]
예제 #8
0
def test_read_text_sync(loop):
    import dask.bag as db
    with cluster() as (s, [a, b]):
        with Executor(('127.0.0.1', s['port']), loop=loop) as e:
            b = read_text(test_bucket_name+'/test/accounts*', lazy=True,
                          collection=True)
            assert isinstance(b, db.Bag)
            c = b.map(json.loads).pluck('amount').sum()
            result = c.compute(get=e.get)

            assert result == (1 + 2 + 3 + 4 + 5 + 6 + 7 + 8) * 100
예제 #9
0
def test_read_text_sync(loop):
    import dask.bag as db
    with cluster() as (s, [a, b]):
        with Executor(('127.0.0.1', s['port']), loop=loop) as e:
            b = read_text(test_bucket_name+'/test/accounts*', lazy=True,
                          collection=True)
            assert isinstance(b, db.Bag)
            c = b.filter(None).map(json.loads).pluck('amount').sum()
            result = c.compute(get=e.get)

            assert result == (1 + 2 + 3 + 4 + 5 + 6 + 7 + 8) * 100
예제 #10
0
def test_read_text(e, s, a, b):
    import dask.bag as db

    b = read_text(test_bucket_name+'/test/accounts*', lazy=True,
                  collection=True, anon=True)
    assert isinstance(b, db.Bag)
    yield gen.sleep(0.2)
    assert not s.tasks

    future = e.compute(b.map(json.loads).pluck('amount').sum())
    result = yield future._result()

    assert result == (1 + 2 + 3 + 4 + 5 + 6 + 7 + 8) * 100

    text = read_text(test_bucket_name+'/test/accounts*', lazy=True,
                     collection=False, anon=True)
    assert all(isinstance(v, Delayed) for v in text)

    text = read_text(test_bucket_name+'/test/accounts*', lazy=False,
                     collection=False, anon=True)
    assert all(isinstance(v, Future) for v in text)
예제 #11
0
def test_read_text(s, a, b):
    pytest.importorskip('dask.bag')
    import dask.bag as db
    from dask.imperative import Value
    e = Executor((s.ip, s.port), start=False)
    yield e._start()

    b = read_text(test_bucket_name,
                  'test/accounts',
                  lazy=True,
                  collection=True,
                  anon=True)
    assert isinstance(b, db.Bag)
    yield gen.sleep(0.2)
    assert not s.tasks

    future = e.compute(b.filter(None).map(json.loads).pluck('amount').sum())
    result = yield future._result()

    assert result == (1 + 2 + 3 + 4 + 5 + 6 + 7 + 8) * 100

    text = read_text(test_bucket_name,
                     'test/accounts',
                     lazy=True,
                     collection=False,
                     anon=True)
    assert all(isinstance(v, Value) for v in text)

    text = read_text(test_bucket_name,
                     'test/accounts',
                     lazy=False,
                     collection=False,
                     anon=True)
    assert all(isinstance(v, Future) for v in text)

    yield e._shutdown()
예제 #12
0
def test_read_text_blocksize(e, s, a, b):
    for bs in [20, 27, 12]:
        b = read_text(test_bucket_name+'/test/accounts*', lazy=True,
                             blocksize=bs, collection=True)
        assert b.npartitions == sum(ceil(len(b) / bs) for b in files.values())
예제 #13
0
def test_read_text_compression(e, s, a, b):
    b = read_text('distributed-test/csv/gzip/', compression='gzip')
    result = yield e.compute(b)._result()
    assert result == [line for k in sorted(csv_files)
                           for line in csv_files[k].decode().split('\n')]