Python Executor._gather 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: distributed

클래스/타입: Executor

메소드/함수: _gather

hotexamples.com에서의 예제들: 14

Python Executor._gather - 14개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 distributed.Executor._gather에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

Executor(30)

_start(24)

_shutdown(23)

map(11)

compute(10)

_gather(7)

_scatter(5)

gather(4)

shutdown(4)

submit(4)

_get(3)

_restart(1)

has_what(1)

ncores(1)

restart(1)

who_has(1)

예제 #1

파일 보기

파일: test_hdfs.py 프로젝트: kevineriklee/distributed

def test_lazy_values(s, a, b):
    with make_hdfs() as hdfs:
        data = b'a'

        for i in range(3):
            hdfs.mkdir('/tmp/test/data-%d' % i)
            for j in range(2):
                fn = '/tmp/test/data-%d/file-%d.csv' % (i, j)
                with hdfs.open(fn, 'w', repl=1) as f:
                    f.write(data)

        e = Executor((s.ip, s.port), start=False)
        yield e._start()

        values = read_bytes('/tmp/test/', hdfs=hdfs, lazy=True)
        assert all(isinstance(v, Value) for v in values)

        while not s.restrictions:
            yield gen.sleep(0.01)
        assert not s.dask

        results = e.compute(*values, sync=False)
        results = yield e._gather(results)
        assert len(results) == 6
        assert all(x == b'a' for x in results)

예제 #2

파일 보기

파일: test_hdfs.py 프로젝트: kevineriklee/distributed

def dont_test_dataframes(s, a):  # slow
    pytest.importorskip('pandas')
    n = 3000000
    fn = '/tmp/test/file.csv'
    with make_hdfs() as hdfs:
        data = (b'name,amount,id\r\n' +
                b'Alice,100,1\r\nBob,200,2\r\n' * n)
        with hdfs.open(fn, 'w') as f:
            f.write(data)

        e = Executor((s.ip, s.port), start=False)
        yield e._start()

        futures = read_bytes(fn, hdfs=hdfs, delimiter=b'\r\n')
        assert len(futures) > 1

        def load(b, **kwargs):
            assert b
            from io import BytesIO
            import pandas as pd
            bio = BytesIO(b)
            return pd.read_csv(bio, **kwargs)

        dfs = e.map(load, futures, names=['name', 'amount', 'id'], skiprows=1)
        dfs2 = yield e._gather(dfs)
        assert sum(map(len, dfs2)) == n * 2 - 1

예제 #3

파일 보기

파일: test_avro.py 프로젝트: kevineriklee/distributed

def test_avro(s, a, b):
    e = Executor((s.ip, s.port), start=False)
    yield e._start()

    avro_files = {'/tmp/test/1.avro': avro_bytes,
                  '/tmp/test/2.avro': avro_bytes}

    with make_hdfs() as hdfs:
        for k, v in avro_files.items():
            with hdfs.open(k, 'w') as f:
                f.write(v)

            assert hdfs.info(k)['size'] > 0

        L = yield _read_avro('/tmp/test/*.avro', lazy=False)
        assert isinstance(L, list)
        assert all(isinstance(x, Future) for x in L)

        results = yield e._gather(L)
        assert all(isinstance(r, list) for r in results)
        assert results[0][:5] == data[:5]
        assert results[-1][-5:] == data[-5:]

        L = yield _read_avro('/tmp/test/*.avro', lazy=True)
        assert isinstance(L, list)
        assert all(isinstance(x, Value) for x in L)

예제 #4

파일 보기

파일: test_avro.py 프로젝트: ogrisel/distributed

def test_avro(s, a, b):
    e = Executor((s.ip, s.port), start=False)
    yield e._start()

    avro_files = {
        '/tmp/test/1.avro': avro_bytes,
        '/tmp/test/2.avro': avro_bytes
    }

    with make_hdfs() as hdfs:
        for k, v in avro_files.items():
            with hdfs.open(k, 'wb') as f:
                f.write(v)

            assert hdfs.info(k)['size'] > 0

        L = yield _read_avro('/tmp/test/*.avro', lazy=False)
        assert isinstance(L, list)
        assert all(isinstance(x, Future) for x in L)

        results = yield e._gather(L)
        assert all(isinstance(r, list) for r in results)
        assert results[0][:5] == data[:5]
        assert results[-1][-5:] == data[-5:]

        L = yield _read_avro('/tmp/test/*.avro', lazy=True)
        assert isinstance(L, list)
        assert all(isinstance(x, Value) for x in L)

    yield e._shutdown()

예제 #5

파일 보기

def dont_test_dataframes(s, a):  # slow
    pytest.importorskip('pandas')
    n = 3000000
    fn = '/tmp/test/file.csv'
    with make_hdfs() as hdfs:
        data = (b'name,amount,id\r\n' + b'Alice,100,1\r\nBob,200,2\r\n' * n)
        with hdfs.open(fn, 'w') as f:
            f.write(data)

        e = Executor((s.ip, s.port), start=False)
        yield e._start()

        futures = read_binary(fn, hdfs=hdfs, delimiter=b'\r\n')
        assert len(futures) > 1

        def load(b, **kwargs):
            assert b
            from io import BytesIO
            import pandas as pd
            bio = BytesIO(b)
            return pd.read_csv(bio, **kwargs)

        dfs = e.map(load, futures, names=['name', 'amount', 'id'], skiprows=1)
        dfs2 = yield e._gather(dfs)
        assert sum(map(len, dfs2)) == n * 2 - 1

예제 #6

파일 보기

def test_lazy_values(s, a, b):
    with make_hdfs() as hdfs:
        data = b'a'

        for i in range(3):
            hdfs.mkdir('/tmp/test/data-%d' % i)
            for j in range(2):
                fn = '/tmp/test/data-%d/file-%d.csv' % (i, j)
                with hdfs.open(fn, 'w', repl=1) as f:
                    f.write(data)

        e = Executor((s.ip, s.port), start=False)
        yield e._start()

        values = read_binary('/tmp/test/', hdfs=hdfs, lazy=True)
        assert all(isinstance(v, Value) for v in values)

        while not s.restrictions:
            yield gen.sleep(0.01)
        assert not s.dask

        results = e.compute(*values, sync=False)
        results = yield e._gather(results)
        assert len(results) == 6
        assert all(x == b'a' for x in results)

예제 #7

파일 보기

파일: test_s3.py 프로젝트: kevineriklee/distributed

def test_read_bytes(s, a, b):
    e = Executor((s.ip, s.port), start=False)
    yield e._start()

    futures = read_bytes(test_bucket_name, prefix='test/', anon=True)
    assert len(futures) >= len(files)
    results = yield e._gather(futures)
    assert set(results).issuperset(set(files.values()))

    yield e._shutdown()

예제 #8

파일 보기

def test_read_bytes_lazy(s, a, b):
    e = Executor((s.ip, s.port), start=False)
    yield e._start()

    values = read_bytes(test_bucket_name, 'test/', lazy=True, anon=True)
    assert all(isinstance(v, Value) for v in values)

    results = e.compute(values, sync=False)
    results = yield e._gather(results)

    assert set(results).issuperset(set(files.values()))

    yield e._shutdown()

예제 #9

파일 보기

def test_read_bytes(s, a, b):
    e = Executor((s.ip, s.port), start=False)
    yield e._start()

    futures = read_bytes(test_bucket_name,
                         prefix='test/',
                         anon=True,
                         lazy=False)
    assert len(futures) >= len(files)
    results = yield e._gather(futures)
    assert set(results).issuperset(set(files.values()))

    yield e._shutdown()

예제 #10

파일 보기

파일: test_s3.py 프로젝트: canavandl/distributed

def test_read_bytes_lazy(s, a, b):
    e = Executor((s.ip, s.port), start=False)
    yield e._start()

    values = read_bytes(test_bucket_name, 'test/', lazy=True, anon=True)
    assert all(isinstance(v, Value) for v in values)

    results = e.compute(values, sync=False)
    results = yield e._gather(results)

    assert set(results).issuperset(set(files.values()))

    yield e._shutdown()

예제 #11

파일 보기

파일: test_hdfs.py 프로젝트: kevineriklee/distributed

def test_read_bytes(s, a, b):
    with make_hdfs() as hdfs:
        data = b'a' * int(1e8)
        fn = '/tmp/test/file'

        with hdfs.open(fn, 'w', repl=1) as f:
            f.write(data)

        blocks = hdfs.get_block_locations(fn)
        assert len(blocks) > 1

        e = Executor((s.ip, s.port), start=False)
        yield e._start()

        futures = read_bytes(fn, hdfs=hdfs)
        assert len(futures) == len(blocks)
        assert futures[0].executor is e
        results = yield e._gather(futures)
        assert b''.join(results) == data
        assert s.restrictions
        assert {f.key for f in futures}.issubset(s.loose_restrictions)

예제 #12

파일 보기

파일: test_hdfs.py 프로젝트: kevineriklee/distributed

def test_get_block_locations_nested(s, a, b):
    with make_hdfs() as hdfs:
        data = b'a'

        for i in range(3):
            hdfs.mkdir('/tmp/test/data-%d' % i)
            for j in range(2):
                fn = '/tmp/test/data-%d/file-%d.csv' % (i, j)
                with hdfs.open(fn, 'w', repl=1) as f:
                    f.write(data)

        L =  get_block_locations(hdfs, '/tmp/test/')
        assert len(L) == 6

        e = Executor((s.ip, s.port), start=False)
        yield e._start()

        futures = read_bytes('/tmp/test/', hdfs=hdfs)
        results = yield e._gather(futures)
        assert len(results) == 6
        assert all(x == b'a' for x in results)

예제 #13

파일 보기

def test_get_block_locations_nested(s, a, b):
    with make_hdfs() as hdfs:
        data = b'a'

        for i in range(3):
            hdfs.mkdir('/tmp/test/data-%d' % i)
            for j in range(2):
                fn = '/tmp/test/data-%d/file-%d.csv' % (i, j)
                with hdfs.open(fn, 'w', repl=1) as f:
                    f.write(data)

        L = get_block_locations(hdfs, '/tmp/test/')
        assert len(L) == 6

        e = Executor((s.ip, s.port), start=False)
        yield e._start()

        futures = read_binary('/tmp/test/', hdfs=hdfs)
        results = yield e._gather(futures)
        assert len(results) == 6
        assert all(x == b'a' for x in results)

예제 #14

파일 보기

def test_read_binary(s, a, b):
    with make_hdfs() as hdfs:
        assert hdfs._handle > 0
        data = b'a' * int(1e8)
        fn = '/tmp/test/file'

        with hdfs.open(fn, 'w', repl=1) as f:
            f.write(data)

        blocks = hdfs.get_block_locations(fn)
        assert len(blocks) > 1

        e = Executor((s.ip, s.port), start=False)
        yield e._start()

        futures = read_binary(fn, hdfs=hdfs)
        assert len(futures) == len(blocks)
        assert futures[0].executor is e
        results = yield e._gather(futures)
        assert b''.join(results) == data
        assert s.restrictions
        assert {f.key for f in futures}.issubset(s.loose_restrictions)