Exemple #1
0
def rolling_functions_tests(p, d):
    # Old-fashioned rolling API
    assert_eq(pd.rolling_count(p, 3), dd.rolling_count(d, 3))
    assert_eq(pd.rolling_sum(p, 3), dd.rolling_sum(d, 3))
    assert_eq(pd.rolling_mean(p, 3), dd.rolling_mean(d, 3))
    assert_eq(pd.rolling_median(p, 3), dd.rolling_median(d, 3))
    assert_eq(pd.rolling_min(p, 3), dd.rolling_min(d, 3))
    assert_eq(pd.rolling_max(p, 3), dd.rolling_max(d, 3))
    assert_eq(pd.rolling_std(p, 3), dd.rolling_std(d, 3))
    assert_eq(pd.rolling_var(p, 3), dd.rolling_var(d, 3))
    # see note around test_rolling_dataframe for logic concerning precision
    assert_eq(pd.rolling_skew(p, 3),
              dd.rolling_skew(d, 3),
              check_less_precise=True)
    assert_eq(pd.rolling_kurt(p, 3),
              dd.rolling_kurt(d, 3),
              check_less_precise=True)
    assert_eq(pd.rolling_quantile(p, 3, 0.5), dd.rolling_quantile(d, 3, 0.5))
    assert_eq(pd.rolling_apply(p, 3, mad), dd.rolling_apply(d, 3, mad))
    with ignoring(ImportError):
        assert_eq(pd.rolling_window(p, 3, 'boxcar'),
                  dd.rolling_window(d, 3, 'boxcar'))
    # Test with edge-case window sizes
    assert_eq(pd.rolling_sum(p, 0), dd.rolling_sum(d, 0))
    assert_eq(pd.rolling_sum(p, 1), dd.rolling_sum(d, 1))
    # Test with kwargs
    assert_eq(pd.rolling_sum(p, 3, min_periods=3),
              dd.rolling_sum(d, 3, min_periods=3))
Exemple #2
0
def basic_rolling_tests(p, d): # Works for series or df
    # New rolling API
    eq(p.rolling(3).count(), d.rolling(3).count())
    eq(p.rolling(3).sum(), d.rolling(3).sum())
    eq(p.rolling(3).mean(), d.rolling(3).mean())
    eq(p.rolling(3).median(), d.rolling(3).median())
    eq(p.rolling(3).min(), d.rolling(3).min())
    eq(p.rolling(3).max(), d.rolling(3).max())
    eq(p.rolling(3).std(), d.rolling(3).std())
    eq(p.rolling(3).var(), d.rolling(3).var())
    # see note around test_rolling_dataframe for logic concerning precision
    eq(p.rolling(3).skew(), d.rolling(3).skew(), check_less_precise=True)
    eq(p.rolling(3).kurt(), d.rolling(3).kurt(), check_less_precise=True)
    eq(p.rolling(3).quantile(0.5), d.rolling(3).quantile(0.5))
    eq(p.rolling(3).apply(mad), d.rolling(3).apply(mad))
    with ignoring(ImportError):
        eq(p.rolling(3, win_type='boxcar').sum(),
           d.rolling(3, win_type='boxcar').sum())
    # Test with edge-case window sizes
    eq(p.rolling(0).sum(), d.rolling(0).sum())
    eq(p.rolling(1).sum(), d.rolling(1).sum())
    # Test with kwargs
    eq(p.rolling(3, min_periods=2).sum(), d.rolling(3, min_periods=2).sum())
    # Test with center
    eq(p.rolling(3, center=True).max(), d.rolling(3, center=True).max())
    eq(p.rolling(3, center=False).std(), d.rolling(3, center=False).std())
    eq(p.rolling(6, center=True).var(), d.rolling(6, center=True).var())
    # see note around test_rolling_dataframe for logic concerning precision
    eq(p.rolling(7, center=True).skew(), d.rolling(7, center=True).skew(),
                 check_less_precise=True)
Exemple #3
0
def rolling_functions_tests(p, d):
    # Old-fashioned rolling API
    assert_eq(pd.rolling_count(p, 3), dd.rolling_count(d, 3))
    assert_eq(pd.rolling_sum(p, 3), dd.rolling_sum(d, 3))
    assert_eq(pd.rolling_mean(p, 3), dd.rolling_mean(d, 3))
    assert_eq(pd.rolling_median(p, 3), dd.rolling_median(d, 3))
    assert_eq(pd.rolling_min(p, 3), dd.rolling_min(d, 3))
    assert_eq(pd.rolling_max(p, 3), dd.rolling_max(d, 3))
    assert_eq(pd.rolling_std(p, 3), dd.rolling_std(d, 3))
    assert_eq(pd.rolling_var(p, 3), dd.rolling_var(d, 3))
    # see note around test_rolling_dataframe for logic concerning precision
    assert_eq(pd.rolling_skew(p, 3),
              dd.rolling_skew(d, 3), check_less_precise=True)
    assert_eq(pd.rolling_kurt(p, 3),
              dd.rolling_kurt(d, 3), check_less_precise=True)
    assert_eq(pd.rolling_quantile(p, 3, 0.5), dd.rolling_quantile(d, 3, 0.5))
    assert_eq(pd.rolling_apply(p, 3, mad), dd.rolling_apply(d, 3, mad))
    with ignoring(ImportError):
        assert_eq(pd.rolling_window(p, 3, 'boxcar'),
                  dd.rolling_window(d, 3, 'boxcar'))
    # Test with edge-case window sizes
    assert_eq(pd.rolling_sum(p, 0), dd.rolling_sum(d, 0))
    assert_eq(pd.rolling_sum(p, 1), dd.rolling_sum(d, 1))
    # Test with kwargs
    assert_eq(pd.rolling_sum(p, 3, min_periods=3),
              dd.rolling_sum(d, 3, min_periods=3))
Exemple #4
0
def test_reductions_2D(dtype):
    x = np.arange(1, 122).reshape((11, 11)).astype(dtype)
    a = da.from_array(x, chunks=(4, 4))

    b = a.sum(keepdims=True)
    assert b._keys() == [[(b.name, 0, 0)]]

    reduction_2d_test(da.sum, a, np.sum, x)
    reduction_2d_test(da.prod, a, np.prod, x)
    reduction_2d_test(da.mean, a, np.mean, x)
    reduction_2d_test(da.var, a, np.var, x, False)  # Difference in dtype algo
    reduction_2d_test(da.std, a, np.std, x, False)  # Difference in dtype algo
    reduction_2d_test(da.min, a, np.min, x, False)
    reduction_2d_test(da.max, a, np.max, x, False)
    reduction_2d_test(da.any, a, np.any, x, False)
    reduction_2d_test(da.all, a, np.all, x, False)

    reduction_2d_test(da.nansum, a, np.nansum, x)
    with ignoring(AttributeError):
        reduction_2d_test(da.nanprod, a, np.nanprod, x)
    reduction_2d_test(da.nanmean, a, np.mean, x)
    reduction_2d_test(da.nanvar, a, np.nanvar, x,
                      False)  # Difference in dtype algo
    reduction_2d_test(da.nanstd, a, np.nanstd, x,
                      False)  # Difference in dtype algo
    reduction_2d_test(da.nanmin, a, np.nanmin, x, False)
    reduction_2d_test(da.nanmax, a, np.nanmax, x, False)
Exemple #5
0
def test_reductions_2D_nans():
    # chunks are a mix of some/all/no NaNs
    x = np.full((4, 4), np.nan)
    x[:2, :2] = np.array([[1, 2], [3, 4]])
    x[2, 2] = 5
    x[3, 3] = 6
    a = da.from_array(x, chunks=(2, 2))

    reduction_2d_test(da.sum, a, np.sum, x, False, False)
    reduction_2d_test(da.prod, a, np.prod, x, False, False)
    reduction_2d_test(da.mean, a, np.mean, x, False, False)
    reduction_2d_test(da.var, a, np.var, x, False, False)
    reduction_2d_test(da.std, a, np.std, x, False, False)
    reduction_2d_test(da.min, a, np.min, x, False, False)
    reduction_2d_test(da.max, a, np.max, x, False, False)
    reduction_2d_test(da.any, a, np.any, x, False, False)
    reduction_2d_test(da.all, a, np.all, x, False, False)

    reduction_2d_test(da.nansum, a, np.nansum, x, False, False)
    with ignoring(AttributeError):
        reduction_2d_test(da.nanprod, a, np.nanprod, x, False, False)
    reduction_2d_test(da.nanmean, a, np.nanmean, x, False, False)
    reduction_2d_test(da.nanvar, a, np.nanvar, x, False, False)
    reduction_2d_test(da.nanstd, a, np.nanstd, x, False, False)
    reduction_2d_test(da.nanmin, a, np.nanmin, x, False, False)
    reduction_2d_test(da.nanmax, a, np.nanmax, x, False, False)
Exemple #6
0
def test_reductions_1D_int():
    x = np.arange(5).astype('i4')
    a = da.from_array(x, chunks=(2,))

    reduction_1d_test(da.sum, a, np.sum, x)
    reduction_1d_test(da.prod, a, np.prod, x)
    reduction_1d_test(da.mean, a, np.mean, x)
    reduction_1d_test(da.var, a, np.var, x)
    reduction_1d_test(da.std, a, np.std, x)
    reduction_1d_test(da.min, a, np.min, x, False)
    reduction_1d_test(da.max, a, np.max, x, False)
    reduction_1d_test(da.any, a, np.any, x, False)
    reduction_1d_test(da.all, a, np.all, x, False)

    reduction_1d_test(da.nansum, a, np.nansum, x)
    with ignoring(AttributeError):
        reduction_1d_test(da.nanprod, a, np.nanprod, x)
    reduction_1d_test(da.nanmean, a, np.mean, x)
    reduction_1d_test(da.nanvar, a, np.var, x)
    reduction_1d_test(da.nanstd, a, np.std, x)
    reduction_1d_test(da.nanmin, a, np.nanmin, x, False)
    reduction_1d_test(da.nanmax, a, np.nanmax, x, False)

    assert eq(da.argmax(a, axis=0), np.argmax(x, axis=0))
    assert eq(da.argmin(a, axis=0), np.argmin(x, axis=0))
    assert eq(da.nanargmax(a, axis=0), np.nanargmax(x, axis=0))
    assert eq(da.nanargmin(a, axis=0), np.nanargmin(x, axis=0))
Exemple #7
0
def test_reductions_2D(dtype):
    x = np.arange(1, 122).reshape((11, 11)).astype(dtype)
    a = da.from_array(x, chunks=(4, 4))

    b = a.sum(keepdims=True)
    assert b._keys() == [[(b.name, 0, 0)]]

    reduction_2d_test(da.sum, a, np.sum, x)
    reduction_2d_test(da.prod, a, np.prod, x)
    reduction_2d_test(da.mean, a, np.mean, x)
    reduction_2d_test(da.var, a, np.var, x, False)  # Difference in dtype algo
    reduction_2d_test(da.std, a, np.std, x, False)  # Difference in dtype algo
    reduction_2d_test(da.min, a, np.min, x, False)
    reduction_2d_test(da.max, a, np.max, x, False)
    reduction_2d_test(da.any, a, np.any, x, False)
    reduction_2d_test(da.all, a, np.all, x, False)

    reduction_2d_test(da.nansum, a, np.nansum, x)
    with ignoring(AttributeError):
        reduction_2d_test(da.nanprod, a, np.nanprod, x)
    reduction_2d_test(da.nanmean, a, np.mean, x)
    reduction_2d_test(da.nanvar, a, np.nanvar, x, False)  # Difference in dtype algo
    reduction_2d_test(da.nanstd, a, np.nanstd, x, False)  # Difference in dtype algo
    reduction_2d_test(da.nanmin, a, np.nanmin, x, False)
    reduction_2d_test(da.nanmax, a, np.nanmax, x, False)
Exemple #8
0
def test_reductions_2D_nans():
    # chunks are a mix of some/all/no NaNs
    x = np.full((4, 4), np.nan)
    x[:2, :2] = np.array([[1, 2], [3, 4]])
    x[2, 2] = 5
    x[3, 3] = 6
    a = da.from_array(x, chunks=(2, 2))

    reduction_2d_test(da.sum, a, np.sum, x, False)
    reduction_2d_test(da.prod, a, np.prod, x, False)
    reduction_2d_test(da.mean, a, np.mean, x, False)
    reduction_2d_test(da.var, a, np.var, x, False)
    reduction_2d_test(da.std, a, np.std, x, False)
    reduction_2d_test(da.min, a, np.min, x, False)
    reduction_2d_test(da.max, a, np.max, x, False)
    reduction_2d_test(da.any, a, np.any, x, False)
    reduction_2d_test(da.all, a, np.all, x, False)

    reduction_2d_test(da.nansum, a, np.nansum, x, False)
    with ignoring(AttributeError):
        reduction_2d_test(da.nanprod, a, np.nanprod, x, False)
    reduction_2d_test(da.nanmean, a, np.nanmean, x, False)
    reduction_2d_test(da.nanvar, a, np.nanvar, x, False)
    reduction_2d_test(da.nanstd, a, np.nanstd, x, False)
    reduction_2d_test(da.nanmin, a, np.nanmin, x, False)
    reduction_2d_test(da.nanmax, a, np.nanmax, x, False)
Exemple #9
0
def basic_rolling_tests(p, d):  # Works for series or df
    # New rolling API
    eq(p.rolling(3).count(), d.rolling(3).count())
    eq(p.rolling(3).sum(), d.rolling(3).sum())
    eq(p.rolling(3).mean(), d.rolling(3).mean())
    eq(p.rolling(3).median(), d.rolling(3).median())
    eq(p.rolling(3).min(), d.rolling(3).min())
    eq(p.rolling(3).max(), d.rolling(3).max())
    eq(p.rolling(3).std(), d.rolling(3).std())
    eq(p.rolling(3).var(), d.rolling(3).var())
    # see note around test_rolling_dataframe for logic concerning precision
    eq(p.rolling(3).skew(), d.rolling(3).skew(), check_less_precise=True)
    eq(p.rolling(3).kurt(), d.rolling(3).kurt(), check_less_precise=True)
    eq(p.rolling(3).quantile(0.5), d.rolling(3).quantile(0.5))
    eq(p.rolling(3).apply(mad), d.rolling(3).apply(mad))
    with ignoring(ImportError):
        eq(
            p.rolling(3, win_type='boxcar').sum(),
            d.rolling(3, win_type='boxcar').sum())
    # Test with edge-case window sizes
    eq(p.rolling(0).sum(), d.rolling(0).sum())
    eq(p.rolling(1).sum(), d.rolling(1).sum())
    # Test with kwargs
    eq(p.rolling(3, min_periods=2).sum(), d.rolling(3, min_periods=2).sum())
    # Test with center
    eq(p.rolling(3, center=True).max(), d.rolling(3, center=True).max())
    eq(p.rolling(3, center=False).std(), d.rolling(3, center=False).std())
    eq(p.rolling(6, center=True).var(), d.rolling(6, center=True).var())
    # see note around test_rolling_dataframe for logic concerning precision
    eq(p.rolling(7, center=True).skew(),
       d.rolling(7, center=True).skew(),
       check_less_precise=True)
Exemple #10
0
def test_reductions_1D_int():
    x = np.arange(5).astype('i4')
    a = da.from_array(x, chunks=(2, ))

    reduction_1d_test(da.sum, a, np.sum, x)
    reduction_1d_test(da.prod, a, np.prod, x)
    reduction_1d_test(da.mean, a, np.mean, x)
    reduction_1d_test(da.var, a, np.var, x)
    reduction_1d_test(da.std, a, np.std, x)
    reduction_1d_test(da.min, a, np.min, x, False)
    reduction_1d_test(da.max, a, np.max, x, False)
    reduction_1d_test(da.any, a, np.any, x, False)
    reduction_1d_test(da.all, a, np.all, x, False)

    reduction_1d_test(da.nansum, a, np.nansum, x)
    with ignoring(AttributeError):
        reduction_1d_test(da.nanprod, a, np.nanprod, x)
    reduction_1d_test(da.nanmean, a, np.mean, x)
    reduction_1d_test(da.nanvar, a, np.var, x)
    reduction_1d_test(da.nanstd, a, np.std, x)
    reduction_1d_test(da.nanmin, a, np.nanmin, x, False)
    reduction_1d_test(da.nanmax, a, np.nanmax, x, False)

    assert eq(da.argmax(a, axis=0), np.argmax(x, axis=0))
    assert eq(da.argmin(a, axis=0), np.argmin(x, axis=0))
    assert eq(da.nanargmax(a, axis=0), np.nanargmax(x, axis=0))
    assert eq(da.nanargmin(a, axis=0), np.nanargmin(x, axis=0))
Exemple #11
0
def test_reductions_2D_int():
    x = np.arange(1, 122).reshape((11, 11)).astype('i4')
    a = da.from_array(x, chunks=(4, 4))

    reduction_2d_test(da.sum, a, np.sum, x)
    reduction_2d_test(da.prod, a, np.prod, x)
    reduction_2d_test(da.mean, a, np.mean, x)
    reduction_2d_test(da.var, a, np.var, x, False)  # Difference in dtype algo
    reduction_2d_test(da.std, a, np.std, x, False)  # Difference in dtype algo
    reduction_2d_test(da.min, a, np.min, x, False)
    reduction_2d_test(da.max, a, np.max, x, False)
    reduction_2d_test(da.any, a, np.any, x, False)
    reduction_2d_test(da.all, a, np.all, x, False)

    reduction_2d_test(da.nansum, a, np.nansum, x)
    with ignoring(AttributeError):
        reduction_2d_test(da.nanprod, a, np.nanprod, x)
    reduction_2d_test(da.nanmean, a, np.mean, x)
    reduction_2d_test(da.nanvar, a, np.nanvar, x, False)  # Difference in dtype algo
    reduction_2d_test(da.nanstd, a, np.nanstd, x, False)  # Difference in dtype algo
    reduction_2d_test(da.nanmin, a, np.nanmin, x, False)
    reduction_2d_test(da.nanmax, a, np.nanmax, x, False)

    assert eq(da.argmax(a, axis=0), np.argmax(x, axis=0))
    assert eq(da.argmin(a, axis=0), np.argmin(x, axis=0))
    assert eq(da.nanargmax(a, axis=0), np.nanargmax(x, axis=0))
    assert eq(da.nanargmin(a, axis=0), np.nanargmin(x, axis=0))
    assert eq(da.argmax(a, axis=1), np.argmax(x, axis=1))
    assert eq(da.argmin(a, axis=1), np.argmin(x, axis=1))
    assert eq(da.nanargmax(a, axis=1), np.nanargmax(x, axis=1))
    assert eq(da.nanargmin(a, axis=1), np.nanargmin(x, axis=1))
Exemple #12
0
def test_reductions_2D_int():
    x = np.arange(1, 122).reshape((11, 11)).astype('i4')
    a = da.from_array(x, chunks=(4, 4))

    reduction_2d_test(da.sum, a, np.sum, x)
    reduction_2d_test(da.prod, a, np.prod, x)
    reduction_2d_test(da.mean, a, np.mean, x)
    reduction_2d_test(da.var, a, np.var, x, False)  # Difference in dtype algo
    reduction_2d_test(da.std, a, np.std, x, False)  # Difference in dtype algo
    reduction_2d_test(da.min, a, np.min, x, False)
    reduction_2d_test(da.max, a, np.max, x, False)
    reduction_2d_test(da.any, a, np.any, x, False)
    reduction_2d_test(da.all, a, np.all, x, False)

    reduction_2d_test(da.nansum, a, np.nansum, x)
    with ignoring(AttributeError):
        reduction_2d_test(da.nanprod, a, np.nanprod, x)
    reduction_2d_test(da.nanmean, a, np.mean, x)
    reduction_2d_test(da.nanvar, a, np.nanvar, x,
                      False)  # Difference in dtype algo
    reduction_2d_test(da.nanstd, a, np.nanstd, x,
                      False)  # Difference in dtype algo
    reduction_2d_test(da.nanmin, a, np.nanmin, x, False)
    reduction_2d_test(da.nanmax, a, np.nanmax, x, False)

    assert eq(da.argmax(a, axis=0), np.argmax(x, axis=0))
    assert eq(da.argmin(a, axis=0), np.argmin(x, axis=0))
    assert eq(da.nanargmax(a, axis=0), np.nanargmax(x, axis=0))
    assert eq(da.nanargmin(a, axis=0), np.nanargmin(x, axis=0))
    assert eq(da.argmax(a, axis=1), np.argmax(x, axis=1))
    assert eq(da.argmin(a, axis=1), np.argmin(x, axis=1))
    assert eq(da.nanargmax(a, axis=1), np.nanargmax(x, axis=1))
    assert eq(da.nanargmin(a, axis=1), np.nanargmin(x, axis=1))
Exemple #13
0
def test_tokenize_ordered_dict():
    with ignoring(ImportError):
        from collections import OrderedDict
        a = OrderedDict([('a', 1), ('b', 2)])
        b = OrderedDict([('a', 1), ('b', 2)])
        c = OrderedDict([('b', 2), ('a', 1)])

        assert tokenize(a) == tokenize(b)
        assert tokenize(a) != tokenize(c)
Exemple #14
0
def test_tokenize_ordered_dict():
    with ignoring(ImportError):
        from collections import OrderedDict
        a = OrderedDict([('a', 1), ('b', 2)])
        b = OrderedDict([('a', 1), ('b', 2)])
        c = OrderedDict([('b', 2), ('a', 1)])

        assert tokenize(a) == tokenize(b)
        assert tokenize(a) != tokenize(c)
Exemple #15
0
def test_profiler_works_under_error():
    div = lambda x, y: x / y
    dsk = {"x": (div, 1, 1), "y": (div, "x", 2), "z": (div, "y", 0)}

    with ignoring(ZeroDivisionError):
        with prof:
            out = get(dsk, "z")

    assert all(len(v) == 5 for v in prof.results)
    assert len(prof.results) == 2
Exemple #16
0
def test_profiler_works_under_error():
    div = lambda x, y: x / y
    dsk = {'x': (div, 1, 1), 'y': (div, 'x', 2), 'z': (div, 'y', 0)}

    with ignoring(ZeroDivisionError):
        with prof:
            out = get(dsk, 'z')

    assert all(len(v) == 5 for v in prof.results)
    assert len(prof.results) == 2
Exemple #17
0
def test_profiler_works_under_error():
    div = lambda x, y: x / y
    dsk = {'x': (div, 1, 1), 'y': (div, 'x', 2), 'z': (div, 'y', 0)}

    with ignoring(ZeroDivisionError):
        with prof:
            out = get(dsk, 'z')

    assert all(len(v) == 5 for v in prof.results)
    assert len(prof.results) == 2
Exemple #18
0
def test_profiler_works_under_error():
    div = lambda x, y: x / y
    dsk = {"x": (div, 1, 1), "y": (div, "x", 2), "z": (div, "y", 0)}

    with ignoring(ZeroDivisionError):
        with prof:
            get(dsk, "z")

    assert all(len(v) == 5 for v in prof.results)
    assert len(prof.results) == 2
Exemple #19
0
def test_nan():
    x = np.array([[1, np.nan, 3, 4], [5, 6, 7, np.nan], [9, 10, 11, 12]])
    d = da.from_array(x, chunks=(2, 2))

    assert eq(np.nansum(x), da.nansum(d))
    assert eq(np.nansum(x, axis=0), da.nansum(d, axis=0))
    assert eq(np.nanmean(x, axis=1), da.nanmean(d, axis=1))
    assert eq(np.nanmin(x, axis=1), da.nanmin(d, axis=1))
    assert eq(np.nanmax(x, axis=(0, 1)), da.nanmax(d, axis=(0, 1)))
    assert eq(np.nanvar(x), da.nanvar(d))
    assert eq(np.nanstd(x, axis=0), da.nanstd(d, axis=0))
    assert eq(np.nanargmin(x, axis=0), da.nanargmin(d, axis=0))
    assert eq(np.nanargmax(x, axis=0), da.nanargmax(d, axis=0))
    with ignoring(AttributeError):
        assert eq(np.nanprod(x), da.nanprod(d))
Exemple #20
0
def test_nan():
    x = np.array([[1, np.nan, 3, 4],
                  [5, 6, 7, np.nan],
                  [9, 10, 11, 12]])
    d = da.from_array(x, chunks=(2, 2))

    assert eq(np.nansum(x), da.nansum(d))
    assert eq(np.nansum(x, axis=0), da.nansum(d, axis=0))
    assert eq(np.nanmean(x, axis=1), da.nanmean(d, axis=1))
    assert eq(np.nanmin(x, axis=1), da.nanmin(d, axis=1))
    assert eq(np.nanmax(x, axis=(0, 1)), da.nanmax(d, axis=(0, 1)))
    assert eq(np.nanvar(x), da.nanvar(d))
    assert eq(np.nanstd(x, axis=0), da.nanstd(d, axis=0))
    assert eq(np.nanargmin(x, axis=0), da.nanargmin(d, axis=0))
    assert eq(np.nanargmax(x, axis=0), da.nanargmax(d, axis=0))
    with ignoring(AttributeError):
        assert eq(np.nanprod(x), da.nanprod(d))
Exemple #21
0
def test_dtype_complex():
    x = np.arange(24).reshape((4, 6)).astype('f4')
    y = np.arange(24).reshape((4, 6)).astype('i8')
    z = np.arange(24).reshape((4, 6)).astype('i2')

    a = da.from_array(x, chunks=(2, 3))
    b = da.from_array(y, chunks=(2, 3))
    c = da.from_array(z, chunks=(2, 3))

    def eq(a, b):
        return (isinstance(a, np.dtype) and
                isinstance(b, np.dtype) and
                str(a) == str(b))

    assert eq(a._dtype, x.dtype)
    assert eq(b._dtype, y.dtype)

    assert eq((a + 1)._dtype, (x + 1).dtype)
    assert eq((a + b)._dtype, (x + y).dtype)
    assert eq(a.T._dtype, x.T.dtype)
    assert eq(a[:3]._dtype, x[:3].dtype)
    assert eq((a.dot(b.T))._dtype, (x.dot(y.T)).dtype)

    assert eq(stack([a, b])._dtype, np.vstack([x, y]).dtype)
    assert eq(concatenate([a, b])._dtype, np.concatenate([x, y]).dtype)

    assert eq(b.std()._dtype, y.std().dtype)
    assert eq(c.sum()._dtype, z.sum().dtype)
    assert eq(a.min()._dtype, a.min().dtype)
    assert eq(b.std()._dtype, b.std().dtype)
    assert eq(a.argmin(axis=0)._dtype, a.argmin(axis=0).dtype)

    assert eq(da.sin(c)._dtype, np.sin(z).dtype)
    assert eq(da.exp(b)._dtype, np.exp(y).dtype)
    assert eq(da.floor(a)._dtype, np.floor(x).dtype)
    assert eq(da.isnan(b)._dtype, np.isnan(y).dtype)
    with ignoring(ImportError):
        assert da.isnull(b)._dtype == 'bool'
        assert da.notnull(b)._dtype == 'bool'

    x = np.array([('a', 1)], dtype=[('text', 'S1'), ('numbers', 'i4')])
    d = da.from_array(x, chunks=(1,))

    assert eq(d['text']._dtype, x['text'].dtype)
    assert eq(d[['numbers', 'text']]._dtype, x[['numbers', 'text']].dtype)
Exemple #22
0
def update_bar(elapsed, prev_completed, prev_estimated, pb):
    total = 0
    completed = 0
    estimated = 0.0
    time_guess = 0.0

    # update
    with pb._lock:
        for k, v in pb.task_data.items():
            total += v.total
            completed += v.completed

            if v.completed > 0:
                avg_time = v.time_sum / v.completed
                estimated += avg_time * v.total
                time_guess += v.time_sum

    # If we've completed some new tasks, update our estimate
    # otherwise use previous estimate. This prevents jumps
    # relative to the elapsed time
    if completed != prev_completed:
        estimated = estimated * elapsed / time_guess
    else:
        estimated = prev_estimated

    # For the first 10 seconds, tell the user estimates improve over time
    # then display the bar
    if elapsed < 10.0:
        fraction = 0.0
        bar = " estimate improves over time"
    else:
        # Print out the progress bar
        fraction = elapsed / estimated if estimated > 0.0 else 0.0
        bar = "#" * int(pb._width * fraction)

    percent = int(100 * fraction)
    msg = "\r[{0:{1}.{1}}] | {2}% Complete (Estimate) | {3} / ~{4}".format(
        bar, pb._width, percent, format_time(elapsed),
        "???" if estimated == 0.0 else format_time(estimated))

    with ignoring(ValueError):
        pb._file.write(msg)
        pb._file.flush()

    return completed, estimated
Exemple #23
0
def test_dtype_complex():
    x = np.arange(24).reshape((4, 6)).astype('f4')
    y = np.arange(24).reshape((4, 6)).astype('i8')
    z = np.arange(24).reshape((4, 6)).astype('i2')

    a = da.from_array(x, chunks=(2, 3))
    b = da.from_array(y, chunks=(2, 3))
    c = da.from_array(z, chunks=(2, 3))

    def eq(a, b):
        return (isinstance(a, np.dtype) and isinstance(b, np.dtype)
                and str(a) == str(b))

    assert eq(a._dtype, x.dtype)
    assert eq(b._dtype, y.dtype)

    assert eq((a + 1)._dtype, (x + 1).dtype)
    assert eq((a + b)._dtype, (x + y).dtype)
    assert eq(a.T._dtype, x.T.dtype)
    assert eq(a[:3]._dtype, x[:3].dtype)
    assert eq((a.dot(b.T))._dtype, (x.dot(y.T)).dtype)

    assert eq(stack([a, b])._dtype, np.vstack([x, y]).dtype)
    assert eq(concatenate([a, b])._dtype, np.concatenate([x, y]).dtype)

    assert eq(b.std()._dtype, y.std().dtype)
    assert eq(c.sum()._dtype, z.sum().dtype)
    assert eq(a.min()._dtype, a.min().dtype)
    assert eq(b.std()._dtype, b.std().dtype)
    assert eq(a.argmin(axis=0)._dtype, a.argmin(axis=0).dtype)

    assert eq(da.sin(c)._dtype, np.sin(z).dtype)
    assert eq(da.exp(b)._dtype, np.exp(y).dtype)
    assert eq(da.floor(a)._dtype, np.floor(x).dtype)
    assert eq(da.isnan(b)._dtype, np.isnan(y).dtype)
    with ignoring(ImportError):
        assert da.isnull(b)._dtype == 'bool'
        assert da.notnull(b)._dtype == 'bool'

    x = np.array([('a', 1)], dtype=[('text', 'S1'), ('numbers', 'i4')])
    d = da.from_array(x, chunks=(1, ))

    assert eq(d['text']._dtype, x['text'].dtype)
    assert eq(d[['numbers', 'text']]._dtype, x[['numbers', 'text']].dtype)
Exemple #24
0
def rolling_functions_tests(p, d):
    # Old-fashioned rolling API
    eq(pd.rolling_count(p, 3), dd.rolling_count(d, 3))
    eq(pd.rolling_sum(p, 3), dd.rolling_sum(d, 3))
    eq(pd.rolling_mean(p, 3), dd.rolling_mean(d, 3))
    eq(pd.rolling_median(p, 3), dd.rolling_median(d, 3))
    eq(pd.rolling_min(p, 3), dd.rolling_min(d, 3))
    eq(pd.rolling_max(p, 3), dd.rolling_max(d, 3))
    eq(pd.rolling_std(p, 3), dd.rolling_std(d, 3))
    eq(pd.rolling_var(p, 3), dd.rolling_var(d, 3))
    eq(pd.rolling_skew(p, 3), dd.rolling_skew(d, 3))
    eq(pd.rolling_kurt(p, 3), dd.rolling_kurt(d, 3))
    eq(pd.rolling_quantile(p, 3, 0.5), dd.rolling_quantile(d, 3, 0.5))
    eq(pd.rolling_apply(p, 3, mad), dd.rolling_apply(d, 3, mad))
    with ignoring(ImportError):
        eq(pd.rolling_window(p, 3, 'boxcar'), dd.rolling_window(d, 3, 'boxcar'))
    # Test with edge-case window sizes
    eq(pd.rolling_sum(p, 0), dd.rolling_sum(d, 0))
    eq(pd.rolling_sum(p, 1), dd.rolling_sum(d, 1))
    # Test with kwargs
    eq(pd.rolling_sum(p, 3, min_periods=3), dd.rolling_sum(d, 3, min_periods=3))
Exemple #25
0
def rolling_functions_tests(p, d):
    # Old-fashioned rolling API
    eq(pd.rolling_count(p, 3), dd.rolling_count(d, 3))
    eq(pd.rolling_sum(p, 3), dd.rolling_sum(d, 3))
    eq(pd.rolling_mean(p, 3), dd.rolling_mean(d, 3))
    eq(pd.rolling_median(p, 3), dd.rolling_median(d, 3))
    eq(pd.rolling_min(p, 3), dd.rolling_min(d, 3))
    eq(pd.rolling_max(p, 3), dd.rolling_max(d, 3))
    eq(pd.rolling_std(p, 3), dd.rolling_std(d, 3))
    eq(pd.rolling_var(p, 3), dd.rolling_var(d, 3))
    eq(pd.rolling_skew(p, 3), dd.rolling_skew(d, 3))
    eq(pd.rolling_kurt(p, 3), dd.rolling_kurt(d, 3))
    eq(pd.rolling_quantile(p, 3, 0.5), dd.rolling_quantile(d, 3, 0.5))
    eq(pd.rolling_apply(p, 3, mad), dd.rolling_apply(d, 3, mad))
    with ignoring(ImportError):
        eq(pd.rolling_window(p, 3, "boxcar"), dd.rolling_window(d, 3, "boxcar"))
    # Test with edge-case window sizes
    eq(pd.rolling_sum(p, 0), dd.rolling_sum(d, 0))
    eq(pd.rolling_sum(p, 1), dd.rolling_sum(d, 1))
    # Test with kwargs
    eq(pd.rolling_sum(p, 3, min_periods=3), dd.rolling_sum(d, 3, min_periods=3))
Exemple #26
0
def basic_rolling_tests(p, d):  # Works for series or df
    # New rolling API
    eq(p.rolling(3).count(), d.rolling(3).count())
    eq(p.rolling(3).sum(), d.rolling(3).sum())
    eq(p.rolling(3).mean(), d.rolling(3).mean())
    eq(p.rolling(3).median(), d.rolling(3).median())
    eq(p.rolling(3).min(), d.rolling(3).min())
    eq(p.rolling(3).max(), d.rolling(3).max())
    eq(p.rolling(3).std(), d.rolling(3).std())
    eq(p.rolling(3).var(), d.rolling(3).var())
    eq(p.rolling(3).skew(), d.rolling(3).skew())
    eq(p.rolling(3).kurt(), d.rolling(3).kurt())
    eq(p.rolling(3).quantile(0.5), d.rolling(3).quantile(0.5))
    eq(p.rolling(3).apply(mad), d.rolling(3).apply(mad))
    with ignoring(ImportError):
        eq(p.rolling(3, win_type="boxcar").sum(), d.rolling(3, win_type="boxcar").sum())
    # Test with edge-case window sizes
    eq(p.rolling(0).sum(), d.rolling(0).sum())
    eq(p.rolling(1).sum(), d.rolling(1).sum())
    # Test with kwargs
    eq(p.rolling(3, min_periods=2).sum(), d.rolling(3, min_periods=2).sum())
Exemple #27
0
def rolling_tests(p, d):
    eq(pd.rolling_count(p, 3), dd.rolling_count(d, 3))
    eq(pd.rolling_sum(p, 3), dd.rolling_sum(d, 3))
    eq(pd.rolling_mean(p, 3), dd.rolling_mean(d, 3))
    eq(pd.rolling_median(p, 3), dd.rolling_median(d, 3))
    eq(pd.rolling_min(p, 3), dd.rolling_min(d, 3))
    eq(pd.rolling_max(p, 3), dd.rolling_max(d, 3))
    eq(pd.rolling_std(p, 3), dd.rolling_std(d, 3))
    eq(pd.rolling_var(p, 3), dd.rolling_var(d, 3))
    eq(pd.rolling_skew(p, 3), dd.rolling_skew(d, 3))
    eq(pd.rolling_kurt(p, 3), dd.rolling_kurt(d, 3))
    eq(pd.rolling_quantile(p, 3, 0.5), dd.rolling_quantile(d, 3, 0.5))
    mad = lambda x: np.fabs(x - x.mean()).mean()
    eq(pd.rolling_apply(p, 3, mad), dd.rolling_apply(d, 3, mad))
    with ignoring(ImportError):
        eq(pd.rolling_window(p, 3, "boxcar"), dd.rolling_window(d, 3, "boxcar"))
    # Test with edge-case window sizes
    eq(pd.rolling_sum(p, 0), dd.rolling_sum(d, 0))
    eq(pd.rolling_sum(p, 1), dd.rolling_sum(d, 1))
    # Test with kwargs
    eq(pd.rolling_sum(p, 3, min_periods=3), dd.rolling_sum(d, 3, min_periods=3))
Exemple #28
0
def test_reductions_1D(dtype):
    x = np.arange(5).astype(dtype)
    a = da.from_array(x, chunks=(2,))

    reduction_1d_test(da.sum, a, np.sum, x)
    reduction_1d_test(da.prod, a, np.prod, x)
    reduction_1d_test(da.mean, a, np.mean, x)
    reduction_1d_test(da.var, a, np.var, x)
    reduction_1d_test(da.std, a, np.std, x)
    reduction_1d_test(da.min, a, np.min, x, False)
    reduction_1d_test(da.max, a, np.max, x, False)
    reduction_1d_test(da.any, a, np.any, x, False)
    reduction_1d_test(da.all, a, np.all, x, False)

    reduction_1d_test(da.nansum, a, np.nansum, x)
    with ignoring(AttributeError):
        reduction_1d_test(da.nanprod, a, np.nanprod, x)
    reduction_1d_test(da.nanmean, a, np.mean, x)
    reduction_1d_test(da.nanvar, a, np.var, x)
    reduction_1d_test(da.nanstd, a, np.std, x)
    reduction_1d_test(da.nanmin, a, np.nanmin, x, False)
    reduction_1d_test(da.nanmax, a, np.nanmax, x, False)
Exemple #29
0
def test_reductions_1D(dtype):
    x = np.arange(5).astype(dtype)
    a = da.from_array(x, chunks=(2, ))

    reduction_1d_test(da.sum, a, np.sum, x)
    reduction_1d_test(da.prod, a, np.prod, x)
    reduction_1d_test(da.mean, a, np.mean, x)
    reduction_1d_test(da.var, a, np.var, x)
    reduction_1d_test(da.std, a, np.std, x)
    reduction_1d_test(da.min, a, np.min, x, False)
    reduction_1d_test(da.max, a, np.max, x, False)
    reduction_1d_test(da.any, a, np.any, x, False)
    reduction_1d_test(da.all, a, np.all, x, False)

    reduction_1d_test(da.nansum, a, np.nansum, x)
    with ignoring(AttributeError):
        reduction_1d_test(da.nanprod, a, np.nanprod, x)
    reduction_1d_test(da.nanmean, a, np.mean, x)
    reduction_1d_test(da.nanvar, a, np.var, x)
    reduction_1d_test(da.nanstd, a, np.std, x)
    reduction_1d_test(da.nanmin, a, np.nanmin, x, False)
    reduction_1d_test(da.nanmax, a, np.nanmax, x, False)
Exemple #30
0
def basic_rolling_tests(p, d): # Works for series or df
    # New rolling API
    eq(p.rolling(3).count(), d.rolling(3).count())
    eq(p.rolling(3).sum(), d.rolling(3).sum())
    eq(p.rolling(3).mean(), d.rolling(3).mean())
    eq(p.rolling(3).median(), d.rolling(3).median())
    eq(p.rolling(3).min(), d.rolling(3).min())
    eq(p.rolling(3).max(), d.rolling(3).max())
    eq(p.rolling(3).std(), d.rolling(3).std())
    eq(p.rolling(3).var(), d.rolling(3).var())
    eq(p.rolling(3).skew(), d.rolling(3).skew())
    eq(p.rolling(3).kurt(), d.rolling(3).kurt())
    eq(p.rolling(3).quantile(0.5), d.rolling(3).quantile(0.5))
    eq(p.rolling(3).apply(mad), d.rolling(3).apply(mad))
    with ignoring(ImportError):
        eq(p.rolling(3, win_type='boxcar').sum(),
           d.rolling(3, win_type='boxcar').sum())
    # Test with edge-case window sizes
    eq(p.rolling(0).sum(), d.rolling(0).sum())
    eq(p.rolling(1).sum(), d.rolling(1).sum())
    # Test with kwargs
    eq(p.rolling(3, min_periods=2).sum(), d.rolling(3, min_periods=2).sum())
Exemple #31
0
def rolling_tests(p, d):
    eq(pd.rolling_count(p, 3), dd.rolling_count(d, 3))
    eq(pd.rolling_sum(p, 3), dd.rolling_sum(d, 3))
    eq(pd.rolling_mean(p, 3), dd.rolling_mean(d, 3))
    eq(pd.rolling_median(p, 3), dd.rolling_median(d, 3))
    eq(pd.rolling_min(p, 3), dd.rolling_min(d, 3))
    eq(pd.rolling_max(p, 3), dd.rolling_max(d, 3))
    eq(pd.rolling_std(p, 3), dd.rolling_std(d, 3))
    eq(pd.rolling_var(p, 3), dd.rolling_var(d, 3))
    eq(pd.rolling_skew(p, 3), dd.rolling_skew(d, 3))
    eq(pd.rolling_kurt(p, 3), dd.rolling_kurt(d, 3))
    eq(pd.rolling_quantile(p, 3, 0.5), dd.rolling_quantile(d, 3, 0.5))
    mad = lambda x: np.fabs(x - x.mean()).mean()
    eq(pd.rolling_apply(p, 3, mad), dd.rolling_apply(d, 3, mad))
    with ignoring(ImportError):
        eq(pd.rolling_window(p, 3, 'boxcar'),
           dd.rolling_window(d, 3, 'boxcar'))
    # Test with edge-case window sizes
    eq(pd.rolling_sum(p, 0), dd.rolling_sum(d, 0))
    eq(pd.rolling_sum(p, 1), dd.rolling_sum(d, 1))
    # Test with kwargs
    eq(pd.rolling_sum(p, 3, min_periods=3), dd.rolling_sum(d, 3,
                                                           min_periods=3))
Exemple #32
0
def import_or_none(path):
    with ignoring():
        return pytest.importorskip(path)
    return None
Exemple #33
0
def test_ignoring_deprecated():
    with pytest.warns(FutureWarning, match="contextlib.suppress"):
        with ignoring(ValueError):
            pass
Exemple #34
0
def import_or_none(path):
    with ignoring(BaseException):
        return pytest.importorskip(path)
    return None
Exemple #35
0
def test_isnull_result_is_an_array():
    # regression test for https://github.com/dask/dask/issues/3822
    arr = da.from_array(np.arange(3, dtype=np.int64), chunks=-1)
    with ignoring(ImportError):
        result = da.isnull(arr[0]).compute()
        assert type(result) is np.ndarray
Exemple #36
0
 def _close_job(cls, job_id):
     if job_id:
         with ignoring(RuntimeError):  # deleting job when job already gone
             cls._call(shlex.split(cls.cancel_command) + [job_id])
         logger.debug("Closed job %s", job_id)
Exemple #37
0
def main(scheduler, host, worker_port, listen_address, contact_address,
         nanny_port, nthreads, nprocs, nanny, name, pid_file, resources,
         dashboard, bokeh, bokeh_port, scheduler_file, dashboard_prefix,
         tls_ca_file, tls_cert, tls_key, dashboard_address, **kwargs):
    g0, g1, g2 = gc.get_threshold(
    )  # https://github.com/dask/distributed/issues/1653
    gc.set_threshold(g0 * 3, g1 * 3, g2 * 3)

    enable_proctitle_on_current()
    enable_proctitle_on_children()

    if bokeh_port is not None:
        warnings.warn(
            "The --bokeh-port flag has been renamed to --dashboard-address. "
            "Consider adding ``--dashboard-address :%d`` " % bokeh_port)
        dashboard_address = bokeh_port
    if bokeh is not None:
        warnings.warn(
            "The --bokeh/--no-bokeh flag has been renamed to --dashboard/--no-dashboard. "
        )
        dashboard = bokeh

    sec = Security(
        **{
            k: v
            for k, v in [
                ("tls_ca_file", tls_ca_file),
                ("tls_worker_cert", tls_cert),
                ("tls_worker_key", tls_key),
            ] if v is not None
        })

    if nprocs > 1 and worker_port != 0:
        logger.error(
            "Failed to launch worker.  You cannot use the --port argument when nprocs > 1."
        )
        exit(1)

    if nprocs > 1 and not nanny:
        logger.error(
            "Failed to launch worker.  You cannot use the --no-nanny argument when nprocs > 1."
        )
        exit(1)

    if contact_address and not listen_address:
        logger.error(
            "Failed to launch worker. "
            "Must specify --listen-address when --contact-address is given")
        exit(1)

    if nprocs > 1 and listen_address:
        logger.error("Failed to launch worker. "
                     "You cannot specify --listen-address when nprocs > 1.")
        exit(1)

    if (worker_port or host) and listen_address:
        logger.error(
            "Failed to launch worker. "
            "You cannot specify --listen-address when --worker-port or --host is given."
        )
        exit(1)

    try:
        if listen_address:
            (host, worker_port) = get_address_host_port(listen_address,
                                                        strict=True)

        if contact_address:
            # we only need this to verify it is getting parsed
            (_, _) = get_address_host_port(contact_address, strict=True)
        else:
            # if contact address is not present we use the listen_address for contact
            contact_address = listen_address
    except ValueError as e:
        logger.error("Failed to launch worker. " + str(e))
        exit(1)

    if nanny:
        port = nanny_port
    else:
        port = worker_port

    if not nthreads:
        nthreads = CPU_COUNT // nprocs

    if pid_file:
        with open(pid_file, "w") as f:
            f.write(str(os.getpid()))

        def del_pid_file():
            if os.path.exists(pid_file):
                os.remove(pid_file)

        atexit.register(del_pid_file)

    if resources:
        resources = resources.replace(",", " ").split()
        resources = dict(pair.split("=") for pair in resources)
        resources = valmap(float, resources)
    else:
        resources = None

    loop = IOLoop.current()

    if nanny:
        kwargs.update({
            "worker_port": worker_port,
            "listen_address": listen_address
        })
        t = Nanny
    else:
        if nanny_port:
            kwargs["service_ports"] = {"nanny": nanny_port}
        t = Worker

    if (not scheduler and not scheduler_file
            and dask.config.get("scheduler-address", None) is None):
        raise ValueError("Need to provide scheduler address like\n"
                         "dask-worker SCHEDULER_ADDRESS:8786")

    with ignoring(TypeError, ValueError):
        name = int(name)

    nannies = [
        t(scheduler,
          scheduler_file=scheduler_file,
          nthreads=nthreads,
          loop=loop,
          resources=resources,
          security=sec,
          contact_address=contact_address,
          host=host,
          port=port,
          dashboard_address=dashboard_address if dashboard else None,
          service_kwargs={"dashboard": {
              "prefix": dashboard_prefix
          }},
          name=name if nprocs == 1 or name is None or name == "" else
          str(name) + "-" + str(i),
          **kwargs) for i in range(nprocs)
    ]

    @gen.coroutine
    def close_all():
        # Unregister all workers from scheduler
        if nanny:
            yield [n.close(timeout=2) for n in nannies]

    def on_signal(signum):
        logger.info("Exiting on signal %d", signum)
        close_all()

    @gen.coroutine
    def run():
        yield nannies
        yield [n.finished() for n in nannies]

    install_signal_handlers(loop, cleanup=on_signal)

    try:
        loop.run_sync(run)
    except TimeoutError:
        # We already log the exception in nanny / worker. Don't do it again.
        raise TimeoutError("Timed out starting worker.") from None
    except KeyboardInterrupt:
        pass
    finally:
        logger.info("End worker")
Exemple #38
0
def test_isnull():
    x = np.array([1, np.nan])
    a = from_array(x, chunks=(2,))
    with ignoring(ImportError):
        assert eq(isnull(a), np.isnan(x))
        assert eq(notnull(a), ~np.isnan(x))
Exemple #39
0
def import_or_none(path):
    with ignoring():
        return pytest.importorskip(path)
    return None
Exemple #40
0
def import_or_none(path):
    with ignoring(BaseException):
        return pytest.importorskip(path)
    return None
Exemple #41
0
class RandomState(object):
    """
    Mersenne Twister pseudo-random number generator

    This object contains state to deterministically generate pseudo-random
    numbers from a variety of probability distributions.  It is identical to
    ``Random_State_class`` except that all functions also take a ``chunks=``
    keyword argument.

    Examples
    --------

    >>> import dask.array as da
    >>> state = da.random.RandomState(1234)  # a seed
    >>> x = state.normal(10, 0.1, size=3, chunks=(2,))
    >>> x.compute()
    array([ 10.01867852,  10.04812289,   9.89649746])

    See Also:
        Random_State_class
    """
    def __init__(self, seed=None):
        self._numpy_state = Random_State_class(seed)

    def seed(self, seed=None):
        self._numpy_state.seed(seed)

    def _wrap(self, func, *args, **kwargs):
        """ Wrap numpy random function to produce dask.array random function

        extra_chunks should be a chunks tuple to append to the end of chunks
        """
        size = kwargs.pop('size', None)
        chunks = kwargs.pop('chunks')
        extra_chunks = kwargs.pop('extra_chunks', ())

        if size is not None and not isinstance(size, (tuple, list)):
            size = (size, )

        args_shapes = {
            ar.shape
            for ar in args if isinstance(ar, (Array, np.ndarray))
        }
        args_shapes.union({
            ar.shape
            for ar in kwargs.values() if isinstance(ar, (Array, np.ndarray))
        })

        shapes = list(args_shapes)
        if size is not None:
            shapes += [size]
        # broadcast to the final size(shape)
        size = broadcast_shapes(*shapes)
        chunks = normalize_chunks(chunks, size)
        slices = slices_from_chunks(chunks)

        def _broadcast_any(ar, shape, chunks):
            if isinstance(ar, Array):
                return broadcast_to(ar, shape).rechunk(chunks)
            if isinstance(ar, np.ndarray):
                return np.ascontiguousarray(np.broadcast_to(ar, shape))

        # Broadcast all arguments, get tiny versions as well
        # Start adding the relevant bits to the graph
        dsk = {}
        dsks = []
        lookup = {}
        small_args = []
        for i, ar in enumerate(args):
            if isinstance(ar, (np.ndarray, Array)):
                res = _broadcast_any(ar, size, chunks)
                if isinstance(res, Array):
                    dsks.append(res.dask)
                    lookup[i] = res.name
                elif isinstance(res, np.ndarray):
                    name = 'array-{}'.format(tokenize(res))
                    lookup[i] = name
                    dsk[name] = res
                small_args.append(ar[tuple(0 for _ in ar.shape)])
            else:
                small_args.append(ar)

        small_kwargs = {}
        for key, ar in kwargs.items():
            if isinstance(ar, (np.ndarray, Array)):
                res = _broadcast_any(ar, size, chunks)
                if isinstance(res, Array):
                    dsks.append(res.dask)
                    lookup[key] = res.name
                elif isinstance(res, np.ndarray):
                    name = 'array-{}'.format(tokenize(res))
                    lookup[key] = name
                    dsk[name] = res
                small_kwargs[key] = ar[tuple(0 for _ in ar.shape)]
            else:
                small_kwargs[key] = ar

        # Get dtype
        small_kwargs['size'] = (0, )
        dtype = func(xoroshiro128plus.RandomState(), *small_args,
                     **small_kwargs).dtype

        sizes = list(product(*chunks))
        state_data = random_state_data(len(sizes), self._numpy_state)
        token = tokenize(state_data, size, chunks, args, kwargs)
        name = 'da.random.{0}-{1}'.format(func.__name__, token)

        keys = product([name],
                       *([range(len(bd))
                          for bd in chunks] + [[0]] * len(extra_chunks)))
        blocks = product(*[range(len(bd)) for bd in chunks])
        vals = []
        for state, size, slc, block in zip(state_data, sizes, slices, blocks):
            arg = []
            for i, ar in enumerate(args):
                if i not in lookup:
                    arg.append(ar)
                else:
                    if isinstance(ar, Array):
                        arg.append((lookup[i], ) + block)
                    else:  # np.ndarray
                        arg.append((getitem, lookup[i], slc))
            kwrg = {}
            for k, ar in kwargs.items():
                if k not in lookup:
                    kwrg[k] = ar
                else:
                    if isinstance(ar, Array):
                        kwrg[k] = (lookup[k], ) + block
                    else:  # np.ndarray
                        kwrg[k] = (getitem, lookup[k], slc)
            vals.append((_apply_random, func.__name__, state, size, arg, kwrg))
        dsk.update(dict(zip(keys, vals)))
        dsk = sharedict.merge((name, dsk), *dsks)
        return Array(dsk, name, chunks + extra_chunks, dtype=dtype)

    @doc_wraps(Random_State_class.beta)
    def beta(self, a, b, size=None, chunks=None):
        return self._wrap(Random_State_class.beta,
                          a,
                          b,
                          size=size,
                          chunks=chunks)

    @doc_wraps(Random_State_class.binomial)
    def binomial(self, n, p, size=None, chunks=None):
        return self._wrap(Random_State_class.binomial,
                          n,
                          p,
                          size=size,
                          chunks=chunks)

    @doc_wraps(Random_State_class.chisquare)
    def chisquare(self, df, size=None, chunks=None):
        return self._wrap(Random_State_class.chisquare,
                          df,
                          size=size,
                          chunks=chunks)

    with ignoring(AttributeError):

        @doc_wraps(Random_State_class.choice)
        def choice(self, a, size=None, replace=True, p=None, chunks=None):
            dsks = []
            # Normalize and validate `a`
            if isinstance(a, Integral):
                # On windows the output dtype differs if p is provided or
                # absent, see https://github.com/numpy/numpy/issues/9867
                dummy_p = np.array([1]) if p is not None else p
                dtype = np.random.choice(1, size=(), p=dummy_p).dtype
                len_a = a
                if a < 0:
                    raise ValueError("a must be greater than 0")
            else:
                a = asarray(a).rechunk(a.shape)
                dtype = a.dtype
                if a.ndim != 1:
                    raise ValueError("a must be one dimensional")
                len_a = len(a)
                dsks.append(a.dask)
                a = a.__dask_keys__()[0]

            # Normalize and validate `p`
            if p is not None:
                if not isinstance(p, Array):
                    # If p is not a dask array, first check the sum is close
                    # to 1 before converting.
                    p = np.asarray(p)
                    if not np.isclose(p.sum(), 1, rtol=1e-7, atol=0):
                        raise ValueError("probabilities do not sum to 1")
                    p = asarray(p)
                else:
                    p = p.rechunk(p.shape)

                if p.ndim != 1:
                    raise ValueError("p must be one dimensional")
                if len(p) != len_a:
                    raise ValueError("a and p must have the same size")

                dsks.append(p.dask)
                p = p.__dask_keys__()[0]

            if size is None:
                size = ()
            elif not isinstance(size, (tuple, list)):
                size = (size, )

            chunks = normalize_chunks(chunks, size)
            sizes = list(product(*chunks))
            state_data = random_state_data(len(sizes), self._numpy_state)

            name = 'da.random.choice-%s' % tokenize(state_data, size, chunks,
                                                    a, replace, p)
            keys = product([name], *(range(len(bd)) for bd in chunks))
            dsk = {
                k: (_choice, state, a, size, replace, p)
                for k, state, size in zip(keys, state_data, sizes)
            }

            return Array(sharedict.merge((name, dsk), *dsks),
                         name,
                         chunks,
                         dtype=dtype)

    # @doc_wraps(Random_State_class.dirichlet)
    # def dirichlet(self, alpha, size=None, chunks=None):

    @doc_wraps(Random_State_class.exponential)
    def exponential(self, scale=1.0, size=None, chunks=None):
        return self._wrap(Random_State_class.exponential,
                          scale,
                          size=size,
                          chunks=chunks)

    @doc_wraps(Random_State_class.f)
    def f(self, dfnum, dfden, size=None, chunks=None):
        return self._wrap(Random_State_class.f,
                          dfnum,
                          dfden,
                          size=size,
                          chunks=chunks)

    @doc_wraps(Random_State_class.gamma)
    def gamma(self, shape, scale=1.0, size=None, chunks=None):
        return self._wrap(Random_State_class.gamma,
                          shape,
                          scale,
                          size=size,
                          chunks=chunks)

    @doc_wraps(Random_State_class.geometric)
    def geometric(self, p, size=None, chunks=None):
        return self._wrap(Random_State_class.geometric,
                          p,
                          size=size,
                          chunks=chunks)

    @doc_wraps(Random_State_class.gumbel)
    def gumbel(self, loc=0.0, scale=1.0, size=None, chunks=None):
        return self._wrap(Random_State_class.gumbel,
                          loc,
                          scale,
                          size=size,
                          chunks=chunks)

    @doc_wraps(Random_State_class.hypergeometric)
    def hypergeometric(self, ngood, nbad, nsample, size=None, chunks=None):
        return self._wrap(Random_State_class.hypergeometric,
                          ngood,
                          nbad,
                          nsample,
                          size=size,
                          chunks=chunks)

    @doc_wraps(Random_State_class.laplace)
    def laplace(self, loc=0.0, scale=1.0, size=None, chunks=None):
        return self._wrap(Random_State_class.laplace,
                          loc,
                          scale,
                          size=size,
                          chunks=chunks)

    @doc_wraps(Random_State_class.logistic)
    def logistic(self, loc=0.0, scale=1.0, size=None, chunks=None):
        return self._wrap(Random_State_class.logistic,
                          loc,
                          scale,
                          size=size,
                          chunks=chunks)

    @doc_wraps(Random_State_class.lognormal)
    def lognormal(self, mean=0.0, sigma=1.0, size=None, chunks=None):
        return self._wrap(Random_State_class.lognormal,
                          mean,
                          sigma,
                          size=size,
                          chunks=chunks)

    @doc_wraps(Random_State_class.logseries)
    def logseries(self, p, size=None, chunks=None):
        return self._wrap(Random_State_class.logseries,
                          p,
                          size=size,
                          chunks=chunks)

    @doc_wraps(Random_State_class.multinomial)
    def multinomial(self, n, pvals, size=None, chunks=None):
        return self._wrap(Random_State_class.multinomial,
                          n,
                          pvals,
                          size=size,
                          chunks=chunks,
                          extra_chunks=((len(pvals), ), ))

    @doc_wraps(Random_State_class.negative_binomial)
    def negative_binomial(self, n, p, size=None, chunks=None):
        return self._wrap(Random_State_class.negative_binomial,
                          n,
                          p,
                          size=size,
                          chunks=chunks)

    @doc_wraps(Random_State_class.noncentral_chisquare)
    def noncentral_chisquare(self, df, nonc, size=None, chunks=None):
        return self._wrap(Random_State_class.noncentral_chisquare,
                          df,
                          nonc,
                          size=size,
                          chunks=chunks)

    @doc_wraps(Random_State_class.noncentral_f)
    def noncentral_f(self, dfnum, dfden, nonc, size=None, chunks=None):
        return self._wrap(Random_State_class.noncentral_f,
                          dfnum,
                          dfden,
                          nonc,
                          size=size,
                          chunks=chunks)

    @doc_wraps(Random_State_class.normal)
    def normal(self, loc=0.0, scale=1.0, size=None, chunks=None):
        return self._wrap(Random_State_class.normal,
                          loc,
                          scale,
                          size=size,
                          chunks=chunks,
                          method='zig')

    @doc_wraps(Random_State_class.pareto)
    def pareto(self, a, size=None, chunks=None):
        return self._wrap(Random_State_class.pareto,
                          a,
                          size=size,
                          chunks=chunks)

    @doc_wraps(Random_State_class.poisson)
    def poisson(self, lam=1.0, size=None, chunks=None):
        return self._wrap(Random_State_class.poisson,
                          lam,
                          size=size,
                          chunks=chunks)

    @doc_wraps(Random_State_class.power)
    def power(self, a, size=None, chunks=None):
        return self._wrap(Random_State_class.power,
                          a,
                          size=size,
                          chunks=chunks)

    @doc_wraps(Random_State_class.randint)
    def randint(self, low, high=None, size=None, chunks=None):
        return self._wrap(Random_State_class.randint,
                          low,
                          high,
                          size=size,
                          chunks=chunks)

    @doc_wraps(Random_State_class.random_integers)
    def random_integers(self, low, high=None, size=None, chunks=None):
        return self._wrap(Random_State_class.random_integers,
                          low,
                          high,
                          size=size,
                          chunks=chunks)

    @doc_wraps(Random_State_class.random_sample)
    def random_sample(self, size=None, chunks=None):
        return self._wrap(Random_State_class.random_sample,
                          size=size,
                          chunks=chunks)

    random = random_sample

    @doc_wraps(Random_State_class.rayleigh)
    def rayleigh(self, scale=1.0, size=None, chunks=None):
        return self._wrap(Random_State_class.rayleigh,
                          scale,
                          size=size,
                          chunks=chunks)

    @doc_wraps(Random_State_class.standard_cauchy)
    def standard_cauchy(self, size=None, chunks=None):
        return self._wrap(Random_State_class.standard_cauchy,
                          size=size,
                          chunks=chunks)

    @doc_wraps(Random_State_class.standard_exponential)
    def standard_exponential(self, size=None, chunks=None):
        return self._wrap(Random_State_class.standard_exponential,
                          size=size,
                          chunks=chunks)

    @doc_wraps(Random_State_class.standard_gamma)
    def standard_gamma(self, shape, size=None, chunks=None):
        return self._wrap(Random_State_class.standard_gamma,
                          shape,
                          size=size,
                          chunks=chunks)

    @doc_wraps(Random_State_class.standard_normal)
    def standard_normal(self, size=None, chunks=None):
        return self._wrap(Random_State_class.standard_normal,
                          size=size,
                          chunks=chunks)

    @doc_wraps(Random_State_class.standard_t)
    def standard_t(self, df, size=None, chunks=None):
        return self._wrap(Random_State_class.standard_t,
                          df,
                          size=size,
                          chunks=chunks)

    @doc_wraps(Random_State_class.tomaxint)
    def tomaxint(self, size=None, chunks=None):
        return self._wrap(Random_State_class.tomaxint,
                          size=size,
                          chunks=chunks)

    @doc_wraps(Random_State_class.triangular)
    def triangular(self, left, mode, right, size=None, chunks=None):
        return self._wrap(Random_State_class.triangular,
                          left,
                          mode,
                          right,
                          size=size,
                          chunks=chunks)

    @doc_wraps(Random_State_class.uniform)
    def uniform(self, low=0.0, high=1.0, size=None, chunks=None):
        return self._wrap(Random_State_class.uniform,
                          low,
                          high,
                          size=size,
                          chunks=chunks)

    @doc_wraps(Random_State_class.vonmises)
    def vonmises(self, mu, kappa, size=None, chunks=None):
        return self._wrap(Random_State_class.vonmises,
                          mu,
                          kappa,
                          size=size,
                          chunks=chunks)

    @doc_wraps(Random_State_class.wald)
    def wald(self, mean, scale, size=None, chunks=None):
        return self._wrap(Random_State_class.wald,
                          mean,
                          scale,
                          size=size,
                          chunks=chunks)

    @doc_wraps(Random_State_class.weibull)
    def weibull(self, a, size=None, chunks=None):
        return self._wrap(Random_State_class.weibull,
                          a,
                          size=size,
                          chunks=chunks)

    @doc_wraps(Random_State_class.zipf)
    def zipf(self, a, size=None, chunks=None):
        return self._wrap(Random_State_class.zipf, a, size=size, chunks=chunks)
def test_isnull():
    x = np.array([1, np.nan])
    a = da.from_array(x, chunks=(2, ))
    with ignoring(ImportError):
        assert_eq(da.isnull(a), np.isnan(x))
        assert_eq(da.notnull(a), ~np.isnan(x))
Exemple #43
0
def test_query():
    df = pd.DataFrame({'x': [1, 2, 3, 4], 'y': [5, 6, 7, 8]})
    a = dd.from_pandas(df, npartitions=2)
    q = a.query('x**2 > y')
    with ignoring(ImportError):
        assert eq(q, df.query('x**2 > y'))
Exemple #44
0
def test_query():
    df = pd.DataFrame({"x": [1, 2, 3, 4], "y": [5, 6, 7, 8]})
    a = dd.from_pandas(df, npartitions=2)
    q = a.query("x**2 > y")
    with ignoring(ImportError):
        assert eq(q, df.query("x**2 > y"))
Exemple #45
0
def test_isnull_result_is_an_array():
    # regression test for https://github.com/dask/dask/issues/3822
    arr = da.from_array(np.arange(3, dtype=np.int64), chunks=-1)
    with ignoring(ImportError):
        result = da.isnull(arr[0]).compute()
        assert type(result) is np.ndarray