예제 #1
0
파일: core.py 프로젝트: tym1062/dask-cudf
def optimize(dsk, keys, **kwargs):
    flatkeys = list(flatten(keys)) if isinstance(keys, list) else [keys]
    dsk, dependencies = cull(dsk, flatkeys)
    dsk, dependencies = fuse(dsk, keys, dependencies=dependencies,
                             ave_width=_globals.get('fuse_ave_width', 1))
    dsk, _ = cull(dsk, keys)
    return dsk
예제 #2
0
def test_set_options_context_manger():
    with set_options(foo='bar'):
        assert _globals['foo'] == 'bar'
    assert _globals.get('foo', None) is None

    try:
        set_options(foo='baz')
        assert _globals['foo'] == 'baz'
    finally:
        del _globals['foo']
예제 #3
0
def test_set_options_context_manger():
    with set_options(foo='bar'):
        assert _globals['foo'] == 'bar'
    assert _globals.get('foo', None) is None

    try:
        set_options(foo='baz')
        assert _globals['foo'] == 'baz'
    finally:
        del _globals['foo']
예제 #4
0
def dask_pipeline(df, schema, canvas, glyph, summary):
    dsk, name = glyph_dispatch(glyph, df, schema, canvas, summary)

    if LooseVersion(dask.__version__) >= '0.18.0':
        get = dask.base.get_scheduler() or df.__dask_scheduler__
    else:
        get = _globals.get('get') or getattr(df, '__dask_scheduler__', None) or df._default_get
    keys = getattr(df, '__dask_keys__', None) or df._keys
    optimize = getattr(df, '__dask_optimize__', None) or df._optimize

    dsk.update(optimize(df.dask, keys()))

    return get(dsk, name)
예제 #5
0
def maybe_compress(payload, min_size=1e4, sample_size=1e4, nsamples=5):
    """
    Maybe compress payload

    1.  We don't compress small messages
    2.  We sample the payload in a few spots, compress that, and if it doesn't
        do any good we return the original
    3.  We then compress the full original, it it doesn't compress well then we
        return the original
    4.  We return the compressed result
    """
    compression = _globals.get('compression', default_compression)

    if not compression:
        return None, payload
    if len(payload) < min_size:
        return None, payload
    if len(payload) > 2**31:  # Too large, compression libraries often fail
        return None, payload

    min_size = int(min_size)
    sample_size = int(sample_size)

    compress = compressions[compression]['compress']

    # Compress a sample, return original if not very compressed
    sample = byte_sample(payload, sample_size, nsamples)
    if len(compress(
            sample)) > 0.9 * len(sample):  # sample not very compressible
        return None, payload

    if type(payload) is memoryview:
        nbytes = payload.itemsize * len(payload)
    else:
        nbytes = len(payload)

    if default_compression and blosc and type(payload) is memoryview:
        # Blosc does itemsize-aware shuffling, resulting in better compression
        compressed = blosc.compress(payload,
                                    typesize=payload.itemsize,
                                    cname='lz4',
                                    clevel=5)
        compression = 'blosc'
    else:
        compressed = compress(ensure_bytes(payload))

    if len(compressed) > 0.9 * nbytes:  # full data not very compressible
        return None, payload
    else:
        return compression, compressed