Ejemplo n.º 1
0
def test_pure():
    v1 = do(add, pure=True)(1, 2)
    v2 = do(add, pure=True)(1, 2)
    assert v1.key == v2.key

    myrand = do(random)
    assert myrand().key != myrand().key
Ejemplo n.º 2
0
def test_pure():
    v1 = do(add, pure=True)(1, 2)
    v2 = do(add, pure=True)(1, 2)
    assert v1.key == v2.key

    myrand = do(random)
    assert myrand().key != myrand().key
Ejemplo n.º 3
0
def test_kwargs():
    def mysum(a, b, c=(), **kwargs):
        return a + b + sum(c) + sum(kwargs.values())
    dmysum = do(mysum)
    ten = dmysum(1, 2, c=[value(3), 0], four=dmysum(2,2))
    assert ten.compute() == 10
    dmysum = do(mysum, pure=True)
    ten = dmysum(1, 2, c=[value(3), 0], four=dmysum(2,2))
    assert ten.compute() == 10
Ejemplo n.º 4
0
def test_sync_compute(loop):
    with cluster() as (c, [a, b]):
        with Executor(('127.0.0.1', c['port'])) as e:
            from dask.imperative import do, value
            x = value(1)
            y = do(inc)(x)
            z = do(dec)(x)

            yy, zz = e.compute(y, z, sync=True)
            assert (yy, zz) == (2, 0)
Ejemplo n.º 5
0
def test_kwargs():
    def mysum(a, b, c=(), **kwargs):
        return a + b + sum(c) + sum(kwargs.values())

    dmysum = do(mysum)
    ten = dmysum(1, 2, c=[value(3), 0], four=dmysum(2, 2))
    assert ten.compute() == 10
    dmysum = do(mysum, pure=True)
    ten = dmysum(1, 2, c=[value(3), 0], four=dmysum(2, 2))
    assert ten.compute() == 10
Ejemplo n.º 6
0
def test_sync_compute(loop):
    with cluster() as (s, [a, b]):
        with Executor(('127.0.0.1', s['port'])) as e:
            from dask.imperative import do, value
            x = value(1)
            y = do(inc)(x)
            z = do(dec)(x)

            yy, zz = e.compute(y, z, sync=True)
            assert (yy, zz) == (2, 0)
Ejemplo n.º 7
0
    def fit(self, X, y=None):
        X = value(X)
        if y is not None:
            y = value(y)
        new_ests = []
        for name, est in self.steps:
            new_est = do(fit)(est, X, y)
            X = do(transform)(new_est, X)
            new_ests.append(new_est)

        return Pipeline([(name, new_est) for (name, old_est), new_est
                                          in zip(self.steps, new_ests)])
Ejemplo n.º 8
0
def test_iterators():
    a = value(1)
    b = value(2)
    c = do(sum)(iter([a, b]))

    assert c.compute() == 3

    def f(seq):
        assert isinstance(seq, Iterator)
        return sum(seq)

    c = do(f)(iter([a, b]))
    assert c.compute() == 3
Ejemplo n.º 9
0
def test_iterators():
    a = value(1)
    b = value(2)
    c = do(sum)(iter([a, b]))

    assert c.compute() == 3

    def f(seq):
        assert isinstance(seq, Iterator)
        return sum(seq)

    c = do(f)(iter([a, b]))
    assert c.compute() == 3
Ejemplo n.º 10
0
def test_do():
    add2 = do(add)
    assert add2(1, 2).compute() == 3
    assert (add2(1, 2) + 3).compute() == 6
    assert add2(add2(1, 2), 3).compute() == 6
    a = value(1)
    b = add2(add2(a, 2), 3)
    assert a.key in b.dask
Ejemplo n.º 11
0
def test_do():
    add2 = do(add)
    assert add2(1, 2).compute() == 3
    assert (add2(1, 2) + 3).compute() == 6
    assert add2(add2(1, 2), 3).compute() == 6
    a = value(1)
    b = add2(add2(a, 2), 3)
    assert a.key in b.dask
Ejemplo n.º 12
0
def test_async_compute(s, a, b):
    e = Executor((s.ip, s.port), start=False)
    yield e._start()

    from dask.imperative import do, value
    x = value(1)
    y = do(inc)(x)
    z = do(dec)(x)

    yy, zz, aa = e.compute(y, z, 3, sync=False)
    assert isinstance(yy, Future)
    assert isinstance(zz, Future)
    assert aa == 3

    result = yield e._gather([yy, zz])
    assert result == [2, 0]

    yield e._shutdown()
Ejemplo n.º 13
0
def test_array_bag_imperative():
    arr1 = np.arange(100).reshape((10, 10))
    arr2 = arr1.dot(arr1.T)
    darr1 = da.from_array(arr1, chunks=(5, 5))
    darr2 = da.from_array(arr2, chunks=(5, 5))
    b = db.from_sequence([1, 2, 3])
    seq = [arr1, arr2, darr1, darr2, b]
    out = do(sum)([i.sum() for i in seq])
    assert out.compute() == 2 * arr1.sum() + 2 * arr2.sum() + sum([1, 2, 3])
Ejemplo n.º 14
0
    def f(c, a, b):
        e = Executor((c.ip, c.port), start=False, loop=loop)
        yield e._start()

        from dask.imperative import do, value
        x = value(1)
        y = do(inc)(x)
        z = do(dec)(x)

        yy, zz, aa = e.compute(y, z, 3, sync=False)
        assert isinstance(yy, Future)
        assert isinstance(zz, Future)
        assert aa == 3

        result = yield e._gather([yy, zz])
        assert result == [2, 0]

        yield e._shutdown()
Ejemplo n.º 15
0
def test_array_bag_imperative():
    arr1 = np.arange(100).reshape((10, 10))
    arr2 = arr1.dot(arr1.T)
    darr1 = da.from_array(arr1, chunks=(5, 5))
    darr2 = da.from_array(arr2, chunks=(5, 5))
    b = db.from_sequence([1, 2, 3])
    seq = [arr1, arr2, darr1, darr2, b]
    out = do(sum)([i.sum() for i in seq])
    assert out.compute() == 2 * arr1.sum() + 2 * arr2.sum() + sum([1, 2, 3])
Ejemplo n.º 16
0
def test_from_imperative():
    from dask.imperative import value, do
    a, b, c = value([1, 2, 3]), value([4, 5, 6]), value([7, 8, 9])
    bb = from_imperative([a, b, c])
    assert bb.name == from_imperative([a, b, c]).name

    assert isinstance(bb, Bag)
    assert list(bb) == [1, 2, 3, 4, 5, 6, 7, 8, 9]

    asum_value = do(lambda X: sum(X))(a)
    asum_item = db.Item.from_imperative(asum_value)
    assert asum_value.compute() == asum_item.compute() == 6
Ejemplo n.º 17
0
def test_from_imperative():
    from dask.imperative import value, do
    a, b, c = value([1, 2, 3]), value([4, 5, 6]), value([7, 8, 9])
    bb = from_imperative([a, b, c])
    assert bb.name == from_imperative([a, b, c]).name

    assert isinstance(bb, Bag)
    assert list(bb) == [1, 2, 3, 4, 5, 6, 7, 8, 9]

    asum_value = do(lambda X: sum(X))(a)
    asum_item = db.Item.from_imperative(asum_value)
    assert asum_value.compute() == asum_item.compute() == 6
Ejemplo n.º 18
0
def test_array_imperative():
    arr = np.arange(100).reshape((10, 10))
    darr = da.from_array(arr, chunks=(5, 5))
    val = do(sum)([arr, darr, 1])
    assert isinstance(val, Value)
    assert np.allclose(val.compute(), arr + arr + 1)
    assert val.sum().compute() == (arr + arr + 1).sum()
    assert val[0, 0].compute() == (arr + arr + 1)[0, 0]

    task, dasks = to_task_dasks(darr)
    assert len(dasks) == 1
    orig = set(darr.dask)
    final = set(dasks[0])
    assert orig.issubset(final)
    diff = final.difference(orig)
    assert len(diff) == 1
Ejemplo n.º 19
0
def test_array_imperative():
    arr = np.arange(100).reshape((10, 10))
    darr = da.from_array(arr, chunks=(5, 5))
    val = do(sum)([arr, darr, 1])
    assert isinstance(val, Value)
    assert np.allclose(val.compute(), arr + arr + 1)
    assert val.sum().compute() == (arr + arr + 1).sum()
    assert val[0, 0].compute() == (arr + arr + 1)[0, 0]

    task, dasks = to_task_dasks(darr)
    assert len(dasks) == 1
    orig = set(darr.dask)
    final = set(dasks[0])
    assert orig.issubset(final)
    diff = final.difference(orig)
    assert len(diff) == 1
Ejemplo n.º 20
0
def best_parameters(estimator, cv, X, y, parameter_iterable, scorer,
                    fit_params, iid):
    """ Lazily apply fit-and-score to data on all parameters / folds

    This function does little of the input checking and it doesn't trigger
    computation.

    Returns a lazy value object.  This should return almost immediately
    """
    _X, _y = X, y
    X = value(X)
    y = y if y is None else value(y)
    cv = [(value(train), value(test)) for train, test in cv]

    out = [_fit_and_score(estimator, X, y, scorer, train,
                          test, parameters, fit_params)
           for parameters in parameter_iterable
           for train, test in cv]

    return do(pick_best_parameters)(out, len(cv), iid)
Ejemplo n.º 21
0
def best_parameters(estimator, cv, X, y, parameter_iterable, scorer,
                    fit_params, iid):
    """ Lazily apply fit-and-score to data on all parameters / folds

    This function does little of the input checking and it doesn't trigger
    computation.

    Returns a lazy value object.  This should return almost immediately
    """
    _X, _y = X, y
    X = value(X)
    y = y if y is None else value(y)
    cv = [(value(train), value(test)) for train, test in cv]

    out = [
        _fit_and_score(estimator, X, y, scorer, train, test, parameters,
                       fit_params) for parameters in parameter_iterable
        for train, test in cv
    ]

    return do(pick_best_parameters)(out, len(cv), iid)
Ejemplo n.º 22
0
def test_do_method_descriptor():
    do(bytes.decode)(b'')  # does not err
Ejemplo n.º 23
0
 def predict(self, X):
     for name, est in self.steps[:-1]:
         X = do(transform)(est, X)
     y = do(predict)(self.steps[-1][1], X)
     return y
Ejemplo n.º 24
0
 def transform(self, X):
     for name, est in self.steps:
         X = do(transform)(est, X)
     return X
Ejemplo n.º 25
0
def test_lists_are_concrete():
    a = value(1)
    b = value(2)
    c = do(max)([[a, 10], [b, 20]], key=lambda x: x[0])[1]

    assert c.compute() == 20
Ejemplo n.º 26
0
 def score(self, X, y):
     X = value(X)
     y = value(y)
     y_predicted = self.predict(X)
     return do(accuracy_score)(y_predicted, y)
Ejemplo n.º 27
0
    def to_sklearn(self):
        """ Create an sklearn pipeline object wrapped in a value

        >>> pipeline.to_sklearn().compute()  # doctest: +SKIP
        """
        return do(sklearn.pipeline.Pipeline)(self.steps)
Ejemplo n.º 28
0
def test_lists():
    a = value(1)
    b = value(2)
    c = do(sum)([a, b])
    assert c.compute() == 3
Ejemplo n.º 29
0
def read_text(fn,
              keyname=None,
              encoding='utf-8',
              errors='strict',
              lineterminator='\n',
              executor=None,
              fs=None,
              lazy=True,
              collection=True,
              blocksize=2**27,
              compression=None):
    """ Read text lines from S3

    Parameters
    ----------
    path: string
        Path of files on S3, including both bucket, key, or globstring
    keyname: string, optional
        If path is only the bucket name, provide key name as second argument
    collection: boolean, optional
        Whether or not to return a high level collection
    lazy: boolean, optional
        Whether or not to start reading immediately
    blocksize: int, optional
        Number of bytes per partition.  Use ``None`` for no blocking.
        Silently ignored if data is compressed with a non-splittable format like gzip.
    lineterminator: str, optional
        The endline string used to deliniate line breaks
    compression: str, optional
        Compression to use options include: gzip
        The use of compression will suppress blocking

    Examples
    --------

    Provide bucket and keyname joined by slash.
    >>> b = read_text('bucket/key-directory/')  # doctest: +SKIP

    Alternatively use support globstrings
    >>> b = read_text('bucket/key-directory/2015-*.json').map(json.loads)  # doctest: +SKIP

    Or separate bucket and keyname
    >>> b = read_text('bucket', 'key-directory/2015-*.json').map(json.loads)  # doctest: +SKIP

    Optionally provide blocksizes and delimiter to chunk up large files
    >>> b = read_text('bucket', 'key-directory/2015-*.json',
    ...               linedelimiter='\\n', blocksize=2**25)  # doctest: +SKIP

    Specify compression, blocksizes not allowed
    >>> b = read_text('bucket/my-data.*.json.gz',
    ...               compression='gzip', blocksize=None)  # doctest: +SKIP

    Returns
    -------
    Dask bag if collection=True or Futures or dask values otherwise
    """
    if keyname is not None:
        if not keyname.startswith('/'):
            keyname = '/' + keyname
        fn = fn + keyname
    fs = fs or S3FileSystem()
    executor = default_executor(executor)

    if compression:
        blocksize = None
        decompress = decompressors[compression]

    filenames = sorted(fs.glob(fn))
    blocks = [
        block for fn in filenames
        for block in read_bytes(fn,
                                executor,
                                fs,
                                lazy=True,
                                delimiter=lineterminator.encode(),
                                blocksize=blocksize)
    ]
    if compression:
        blocks = [do(decompress)(b) for b in blocks]
    strings = [do(bytes.decode)(b, encoding, errors) for b in blocks]
    lines = [do(unicode.split)(s, lineterminator) for s in strings]

    ensure_default_get(executor)
    from dask.bag import from_imperative
    if collection:
        result = from_imperative(lines)
    else:
        result = lines

    if not lazy:
        if collection:
            result = executor.persist(result)
        else:
            result = executor.compute(result)

    return result
Ejemplo n.º 30
0
def test_key_names_include_function_names():
    def myfunc(x):
        return x + 1

    assert do(myfunc)(1).key.startswith('myfunc')
Ejemplo n.º 31
0
def read_text(fn, keyname=None, encoding='utf-8', errors='strict', lineterminator='\n',
               executor=None, fs=None, lazy=True, collection=True,
               blocksize=2**27, compression=None):
    """ Read text lines from S3

    Parameters
    ----------
    path: string
        Path of files on S3, including both bucket, key, or globstring
    keyname: string, optional
        If path is only the bucket name, provide key name as second argument
    collection: boolean, optional
        Whether or not to return a high level collection
    lazy: boolean, optional
        Whether or not to start reading immediately
    blocksize: int, optional
        Number of bytes per partition.  Use ``None`` for no blocking.
        Silently ignored if data is compressed with a non-splittable format like gzip.
    lineterminator: str, optional
        The endline string used to deliniate line breaks
    compression: str, optional
        Compression to use options include: gzip
        The use of compression will suppress blocking

    Examples
    --------

    Provide bucket and keyname joined by slash.
    >>> b = read_text('bucket/key-directory/')  # doctest: +SKIP

    Alternatively use support globstrings
    >>> b = read_text('bucket/key-directory/2015-*.json').map(json.loads)  # doctest: +SKIP

    Or separate bucket and keyname
    >>> b = read_text('bucket', 'key-directory/2015-*.json').map(json.loads)  # doctest: +SKIP

    Optionally provide blocksizes and delimiter to chunk up large files
    >>> b = read_text('bucket', 'key-directory/2015-*.json',
    ...               linedelimiter='\\n', blocksize=2**25)  # doctest: +SKIP

    Specify compression, blocksizes not allowed
    >>> b = read_text('bucket/my-data.*.json.gz',
    ...               compression='gzip', blocksize=None)  # doctest: +SKIP

    Returns
    -------
    Dask bag if collection=True or Futures or dask values otherwise
    """
    if keyname is not None:
        if not keyname.startswith('/'):
            keyname = '/' + keyname
        fn = fn + keyname
    fs = fs or S3FileSystem()
    executor = default_executor(executor)

    if compression:
        blocksize=None
        decompress = decompressors[compression]

    filenames = sorted(fs.glob(fn))
    blocks = [block for fn in filenames
                    for block in read_bytes(fn, executor, fs, lazy=True,
                                            delimiter=lineterminator.encode(),
                                            blocksize=blocksize)]
    if compression:
        blocks = [do(decompress)(b) for b in blocks]
    strings = [do(bytes.decode)(b, encoding, errors) for b in blocks]
    lines = [do(unicode.split)(s, lineterminator) for s in strings]

    ensure_default_get(executor)
    from dask.bag import from_imperative
    if collection:
        result = from_imperative(lines)
    else:
        result = lines

    if not lazy:
        if collection:
            result = executor.persist(result)
        else:
            result = executor.compute(result)

    return result
Ejemplo n.º 32
0
def test_key_names_include_function_names():
    def myfunc(x):
        return x + 1
    assert do(myfunc)(1).key.startswith('myfunc')
Ejemplo n.º 33
0
def test_do_method_descriptor():
    do(bytes.decode)(b'')  # does not err
Ejemplo n.º 34
0
def test_lists():
    a = value(1)
    b = value(2)
    c = do(sum)([a, b])
    assert c.compute() == 3
Ejemplo n.º 35
0
def test_lists_are_concrete():
    a = value(1)
    b = value(2)
    c = do(max)([[a, 10], [b, 20]], key=lambda x: x[0])[1]

    assert c.compute() == 20