def rolling_functions_tests(p, d): # Old-fashioned rolling API assert_eq(pd.rolling_count(p, 3), dd.rolling_count(d, 3)) assert_eq(pd.rolling_sum(p, 3), dd.rolling_sum(d, 3)) assert_eq(pd.rolling_mean(p, 3), dd.rolling_mean(d, 3)) assert_eq(pd.rolling_median(p, 3), dd.rolling_median(d, 3)) assert_eq(pd.rolling_min(p, 3), dd.rolling_min(d, 3)) assert_eq(pd.rolling_max(p, 3), dd.rolling_max(d, 3)) assert_eq(pd.rolling_std(p, 3), dd.rolling_std(d, 3)) assert_eq(pd.rolling_var(p, 3), dd.rolling_var(d, 3)) # see note around test_rolling_dataframe for logic concerning precision assert_eq(pd.rolling_skew(p, 3), dd.rolling_skew(d, 3), check_less_precise=True) assert_eq(pd.rolling_kurt(p, 3), dd.rolling_kurt(d, 3), check_less_precise=True) assert_eq(pd.rolling_quantile(p, 3, 0.5), dd.rolling_quantile(d, 3, 0.5)) assert_eq(pd.rolling_apply(p, 3, mad), dd.rolling_apply(d, 3, mad)) with ignoring(ImportError): assert_eq(pd.rolling_window(p, 3, 'boxcar'), dd.rolling_window(d, 3, 'boxcar')) # Test with edge-case window sizes assert_eq(pd.rolling_sum(p, 0), dd.rolling_sum(d, 0)) assert_eq(pd.rolling_sum(p, 1), dd.rolling_sum(d, 1)) # Test with kwargs assert_eq(pd.rolling_sum(p, 3, min_periods=3), dd.rolling_sum(d, 3, min_periods=3))
def basic_rolling_tests(p, d): # Works for series or df # New rolling API eq(p.rolling(3).count(), d.rolling(3).count()) eq(p.rolling(3).sum(), d.rolling(3).sum()) eq(p.rolling(3).mean(), d.rolling(3).mean()) eq(p.rolling(3).median(), d.rolling(3).median()) eq(p.rolling(3).min(), d.rolling(3).min()) eq(p.rolling(3).max(), d.rolling(3).max()) eq(p.rolling(3).std(), d.rolling(3).std()) eq(p.rolling(3).var(), d.rolling(3).var()) # see note around test_rolling_dataframe for logic concerning precision eq(p.rolling(3).skew(), d.rolling(3).skew(), check_less_precise=True) eq(p.rolling(3).kurt(), d.rolling(3).kurt(), check_less_precise=True) eq(p.rolling(3).quantile(0.5), d.rolling(3).quantile(0.5)) eq(p.rolling(3).apply(mad), d.rolling(3).apply(mad)) with ignoring(ImportError): eq(p.rolling(3, win_type='boxcar').sum(), d.rolling(3, win_type='boxcar').sum()) # Test with edge-case window sizes eq(p.rolling(0).sum(), d.rolling(0).sum()) eq(p.rolling(1).sum(), d.rolling(1).sum()) # Test with kwargs eq(p.rolling(3, min_periods=2).sum(), d.rolling(3, min_periods=2).sum()) # Test with center eq(p.rolling(3, center=True).max(), d.rolling(3, center=True).max()) eq(p.rolling(3, center=False).std(), d.rolling(3, center=False).std()) eq(p.rolling(6, center=True).var(), d.rolling(6, center=True).var()) # see note around test_rolling_dataframe for logic concerning precision eq(p.rolling(7, center=True).skew(), d.rolling(7, center=True).skew(), check_less_precise=True)
def test_reductions_2D(dtype): x = np.arange(1, 122).reshape((11, 11)).astype(dtype) a = da.from_array(x, chunks=(4, 4)) b = a.sum(keepdims=True) assert b._keys() == [[(b.name, 0, 0)]] reduction_2d_test(da.sum, a, np.sum, x) reduction_2d_test(da.prod, a, np.prod, x) reduction_2d_test(da.mean, a, np.mean, x) reduction_2d_test(da.var, a, np.var, x, False) # Difference in dtype algo reduction_2d_test(da.std, a, np.std, x, False) # Difference in dtype algo reduction_2d_test(da.min, a, np.min, x, False) reduction_2d_test(da.max, a, np.max, x, False) reduction_2d_test(da.any, a, np.any, x, False) reduction_2d_test(da.all, a, np.all, x, False) reduction_2d_test(da.nansum, a, np.nansum, x) with ignoring(AttributeError): reduction_2d_test(da.nanprod, a, np.nanprod, x) reduction_2d_test(da.nanmean, a, np.mean, x) reduction_2d_test(da.nanvar, a, np.nanvar, x, False) # Difference in dtype algo reduction_2d_test(da.nanstd, a, np.nanstd, x, False) # Difference in dtype algo reduction_2d_test(da.nanmin, a, np.nanmin, x, False) reduction_2d_test(da.nanmax, a, np.nanmax, x, False)
def test_reductions_2D_nans(): # chunks are a mix of some/all/no NaNs x = np.full((4, 4), np.nan) x[:2, :2] = np.array([[1, 2], [3, 4]]) x[2, 2] = 5 x[3, 3] = 6 a = da.from_array(x, chunks=(2, 2)) reduction_2d_test(da.sum, a, np.sum, x, False, False) reduction_2d_test(da.prod, a, np.prod, x, False, False) reduction_2d_test(da.mean, a, np.mean, x, False, False) reduction_2d_test(da.var, a, np.var, x, False, False) reduction_2d_test(da.std, a, np.std, x, False, False) reduction_2d_test(da.min, a, np.min, x, False, False) reduction_2d_test(da.max, a, np.max, x, False, False) reduction_2d_test(da.any, a, np.any, x, False, False) reduction_2d_test(da.all, a, np.all, x, False, False) reduction_2d_test(da.nansum, a, np.nansum, x, False, False) with ignoring(AttributeError): reduction_2d_test(da.nanprod, a, np.nanprod, x, False, False) reduction_2d_test(da.nanmean, a, np.nanmean, x, False, False) reduction_2d_test(da.nanvar, a, np.nanvar, x, False, False) reduction_2d_test(da.nanstd, a, np.nanstd, x, False, False) reduction_2d_test(da.nanmin, a, np.nanmin, x, False, False) reduction_2d_test(da.nanmax, a, np.nanmax, x, False, False)
def test_reductions_1D_int(): x = np.arange(5).astype('i4') a = da.from_array(x, chunks=(2,)) reduction_1d_test(da.sum, a, np.sum, x) reduction_1d_test(da.prod, a, np.prod, x) reduction_1d_test(da.mean, a, np.mean, x) reduction_1d_test(da.var, a, np.var, x) reduction_1d_test(da.std, a, np.std, x) reduction_1d_test(da.min, a, np.min, x, False) reduction_1d_test(da.max, a, np.max, x, False) reduction_1d_test(da.any, a, np.any, x, False) reduction_1d_test(da.all, a, np.all, x, False) reduction_1d_test(da.nansum, a, np.nansum, x) with ignoring(AttributeError): reduction_1d_test(da.nanprod, a, np.nanprod, x) reduction_1d_test(da.nanmean, a, np.mean, x) reduction_1d_test(da.nanvar, a, np.var, x) reduction_1d_test(da.nanstd, a, np.std, x) reduction_1d_test(da.nanmin, a, np.nanmin, x, False) reduction_1d_test(da.nanmax, a, np.nanmax, x, False) assert eq(da.argmax(a, axis=0), np.argmax(x, axis=0)) assert eq(da.argmin(a, axis=0), np.argmin(x, axis=0)) assert eq(da.nanargmax(a, axis=0), np.nanargmax(x, axis=0)) assert eq(da.nanargmin(a, axis=0), np.nanargmin(x, axis=0))
def test_reductions_2D_nans(): # chunks are a mix of some/all/no NaNs x = np.full((4, 4), np.nan) x[:2, :2] = np.array([[1, 2], [3, 4]]) x[2, 2] = 5 x[3, 3] = 6 a = da.from_array(x, chunks=(2, 2)) reduction_2d_test(da.sum, a, np.sum, x, False) reduction_2d_test(da.prod, a, np.prod, x, False) reduction_2d_test(da.mean, a, np.mean, x, False) reduction_2d_test(da.var, a, np.var, x, False) reduction_2d_test(da.std, a, np.std, x, False) reduction_2d_test(da.min, a, np.min, x, False) reduction_2d_test(da.max, a, np.max, x, False) reduction_2d_test(da.any, a, np.any, x, False) reduction_2d_test(da.all, a, np.all, x, False) reduction_2d_test(da.nansum, a, np.nansum, x, False) with ignoring(AttributeError): reduction_2d_test(da.nanprod, a, np.nanprod, x, False) reduction_2d_test(da.nanmean, a, np.nanmean, x, False) reduction_2d_test(da.nanvar, a, np.nanvar, x, False) reduction_2d_test(da.nanstd, a, np.nanstd, x, False) reduction_2d_test(da.nanmin, a, np.nanmin, x, False) reduction_2d_test(da.nanmax, a, np.nanmax, x, False)
def basic_rolling_tests(p, d): # Works for series or df # New rolling API eq(p.rolling(3).count(), d.rolling(3).count()) eq(p.rolling(3).sum(), d.rolling(3).sum()) eq(p.rolling(3).mean(), d.rolling(3).mean()) eq(p.rolling(3).median(), d.rolling(3).median()) eq(p.rolling(3).min(), d.rolling(3).min()) eq(p.rolling(3).max(), d.rolling(3).max()) eq(p.rolling(3).std(), d.rolling(3).std()) eq(p.rolling(3).var(), d.rolling(3).var()) # see note around test_rolling_dataframe for logic concerning precision eq(p.rolling(3).skew(), d.rolling(3).skew(), check_less_precise=True) eq(p.rolling(3).kurt(), d.rolling(3).kurt(), check_less_precise=True) eq(p.rolling(3).quantile(0.5), d.rolling(3).quantile(0.5)) eq(p.rolling(3).apply(mad), d.rolling(3).apply(mad)) with ignoring(ImportError): eq( p.rolling(3, win_type='boxcar').sum(), d.rolling(3, win_type='boxcar').sum()) # Test with edge-case window sizes eq(p.rolling(0).sum(), d.rolling(0).sum()) eq(p.rolling(1).sum(), d.rolling(1).sum()) # Test with kwargs eq(p.rolling(3, min_periods=2).sum(), d.rolling(3, min_periods=2).sum()) # Test with center eq(p.rolling(3, center=True).max(), d.rolling(3, center=True).max()) eq(p.rolling(3, center=False).std(), d.rolling(3, center=False).std()) eq(p.rolling(6, center=True).var(), d.rolling(6, center=True).var()) # see note around test_rolling_dataframe for logic concerning precision eq(p.rolling(7, center=True).skew(), d.rolling(7, center=True).skew(), check_less_precise=True)
def test_reductions_1D_int(): x = np.arange(5).astype('i4') a = da.from_array(x, chunks=(2, )) reduction_1d_test(da.sum, a, np.sum, x) reduction_1d_test(da.prod, a, np.prod, x) reduction_1d_test(da.mean, a, np.mean, x) reduction_1d_test(da.var, a, np.var, x) reduction_1d_test(da.std, a, np.std, x) reduction_1d_test(da.min, a, np.min, x, False) reduction_1d_test(da.max, a, np.max, x, False) reduction_1d_test(da.any, a, np.any, x, False) reduction_1d_test(da.all, a, np.all, x, False) reduction_1d_test(da.nansum, a, np.nansum, x) with ignoring(AttributeError): reduction_1d_test(da.nanprod, a, np.nanprod, x) reduction_1d_test(da.nanmean, a, np.mean, x) reduction_1d_test(da.nanvar, a, np.var, x) reduction_1d_test(da.nanstd, a, np.std, x) reduction_1d_test(da.nanmin, a, np.nanmin, x, False) reduction_1d_test(da.nanmax, a, np.nanmax, x, False) assert eq(da.argmax(a, axis=0), np.argmax(x, axis=0)) assert eq(da.argmin(a, axis=0), np.argmin(x, axis=0)) assert eq(da.nanargmax(a, axis=0), np.nanargmax(x, axis=0)) assert eq(da.nanargmin(a, axis=0), np.nanargmin(x, axis=0))
def test_reductions_2D_int(): x = np.arange(1, 122).reshape((11, 11)).astype('i4') a = da.from_array(x, chunks=(4, 4)) reduction_2d_test(da.sum, a, np.sum, x) reduction_2d_test(da.prod, a, np.prod, x) reduction_2d_test(da.mean, a, np.mean, x) reduction_2d_test(da.var, a, np.var, x, False) # Difference in dtype algo reduction_2d_test(da.std, a, np.std, x, False) # Difference in dtype algo reduction_2d_test(da.min, a, np.min, x, False) reduction_2d_test(da.max, a, np.max, x, False) reduction_2d_test(da.any, a, np.any, x, False) reduction_2d_test(da.all, a, np.all, x, False) reduction_2d_test(da.nansum, a, np.nansum, x) with ignoring(AttributeError): reduction_2d_test(da.nanprod, a, np.nanprod, x) reduction_2d_test(da.nanmean, a, np.mean, x) reduction_2d_test(da.nanvar, a, np.nanvar, x, False) # Difference in dtype algo reduction_2d_test(da.nanstd, a, np.nanstd, x, False) # Difference in dtype algo reduction_2d_test(da.nanmin, a, np.nanmin, x, False) reduction_2d_test(da.nanmax, a, np.nanmax, x, False) assert eq(da.argmax(a, axis=0), np.argmax(x, axis=0)) assert eq(da.argmin(a, axis=0), np.argmin(x, axis=0)) assert eq(da.nanargmax(a, axis=0), np.nanargmax(x, axis=0)) assert eq(da.nanargmin(a, axis=0), np.nanargmin(x, axis=0)) assert eq(da.argmax(a, axis=1), np.argmax(x, axis=1)) assert eq(da.argmin(a, axis=1), np.argmin(x, axis=1)) assert eq(da.nanargmax(a, axis=1), np.nanargmax(x, axis=1)) assert eq(da.nanargmin(a, axis=1), np.nanargmin(x, axis=1))
def test_tokenize_ordered_dict(): with ignoring(ImportError): from collections import OrderedDict a = OrderedDict([('a', 1), ('b', 2)]) b = OrderedDict([('a', 1), ('b', 2)]) c = OrderedDict([('b', 2), ('a', 1)]) assert tokenize(a) == tokenize(b) assert tokenize(a) != tokenize(c)
def test_profiler_works_under_error(): div = lambda x, y: x / y dsk = {"x": (div, 1, 1), "y": (div, "x", 2), "z": (div, "y", 0)} with ignoring(ZeroDivisionError): with prof: out = get(dsk, "z") assert all(len(v) == 5 for v in prof.results) assert len(prof.results) == 2
def test_profiler_works_under_error(): div = lambda x, y: x / y dsk = {'x': (div, 1, 1), 'y': (div, 'x', 2), 'z': (div, 'y', 0)} with ignoring(ZeroDivisionError): with prof: out = get(dsk, 'z') assert all(len(v) == 5 for v in prof.results) assert len(prof.results) == 2
def test_profiler_works_under_error(): div = lambda x, y: x / y dsk = {"x": (div, 1, 1), "y": (div, "x", 2), "z": (div, "y", 0)} with ignoring(ZeroDivisionError): with prof: get(dsk, "z") assert all(len(v) == 5 for v in prof.results) assert len(prof.results) == 2
def test_nan(): x = np.array([[1, np.nan, 3, 4], [5, 6, 7, np.nan], [9, 10, 11, 12]]) d = da.from_array(x, chunks=(2, 2)) assert eq(np.nansum(x), da.nansum(d)) assert eq(np.nansum(x, axis=0), da.nansum(d, axis=0)) assert eq(np.nanmean(x, axis=1), da.nanmean(d, axis=1)) assert eq(np.nanmin(x, axis=1), da.nanmin(d, axis=1)) assert eq(np.nanmax(x, axis=(0, 1)), da.nanmax(d, axis=(0, 1))) assert eq(np.nanvar(x), da.nanvar(d)) assert eq(np.nanstd(x, axis=0), da.nanstd(d, axis=0)) assert eq(np.nanargmin(x, axis=0), da.nanargmin(d, axis=0)) assert eq(np.nanargmax(x, axis=0), da.nanargmax(d, axis=0)) with ignoring(AttributeError): assert eq(np.nanprod(x), da.nanprod(d))
def test_dtype_complex(): x = np.arange(24).reshape((4, 6)).astype('f4') y = np.arange(24).reshape((4, 6)).astype('i8') z = np.arange(24).reshape((4, 6)).astype('i2') a = da.from_array(x, chunks=(2, 3)) b = da.from_array(y, chunks=(2, 3)) c = da.from_array(z, chunks=(2, 3)) def eq(a, b): return (isinstance(a, np.dtype) and isinstance(b, np.dtype) and str(a) == str(b)) assert eq(a._dtype, x.dtype) assert eq(b._dtype, y.dtype) assert eq((a + 1)._dtype, (x + 1).dtype) assert eq((a + b)._dtype, (x + y).dtype) assert eq(a.T._dtype, x.T.dtype) assert eq(a[:3]._dtype, x[:3].dtype) assert eq((a.dot(b.T))._dtype, (x.dot(y.T)).dtype) assert eq(stack([a, b])._dtype, np.vstack([x, y]).dtype) assert eq(concatenate([a, b])._dtype, np.concatenate([x, y]).dtype) assert eq(b.std()._dtype, y.std().dtype) assert eq(c.sum()._dtype, z.sum().dtype) assert eq(a.min()._dtype, a.min().dtype) assert eq(b.std()._dtype, b.std().dtype) assert eq(a.argmin(axis=0)._dtype, a.argmin(axis=0).dtype) assert eq(da.sin(c)._dtype, np.sin(z).dtype) assert eq(da.exp(b)._dtype, np.exp(y).dtype) assert eq(da.floor(a)._dtype, np.floor(x).dtype) assert eq(da.isnan(b)._dtype, np.isnan(y).dtype) with ignoring(ImportError): assert da.isnull(b)._dtype == 'bool' assert da.notnull(b)._dtype == 'bool' x = np.array([('a', 1)], dtype=[('text', 'S1'), ('numbers', 'i4')]) d = da.from_array(x, chunks=(1,)) assert eq(d['text']._dtype, x['text'].dtype) assert eq(d[['numbers', 'text']]._dtype, x[['numbers', 'text']].dtype)
def update_bar(elapsed, prev_completed, prev_estimated, pb): total = 0 completed = 0 estimated = 0.0 time_guess = 0.0 # update with pb._lock: for k, v in pb.task_data.items(): total += v.total completed += v.completed if v.completed > 0: avg_time = v.time_sum / v.completed estimated += avg_time * v.total time_guess += v.time_sum # If we've completed some new tasks, update our estimate # otherwise use previous estimate. This prevents jumps # relative to the elapsed time if completed != prev_completed: estimated = estimated * elapsed / time_guess else: estimated = prev_estimated # For the first 10 seconds, tell the user estimates improve over time # then display the bar if elapsed < 10.0: fraction = 0.0 bar = " estimate improves over time" else: # Print out the progress bar fraction = elapsed / estimated if estimated > 0.0 else 0.0 bar = "#" * int(pb._width * fraction) percent = int(100 * fraction) msg = "\r[{0:{1}.{1}}] | {2}% Complete (Estimate) | {3} / ~{4}".format( bar, pb._width, percent, format_time(elapsed), "???" if estimated == 0.0 else format_time(estimated)) with ignoring(ValueError): pb._file.write(msg) pb._file.flush() return completed, estimated
def test_dtype_complex(): x = np.arange(24).reshape((4, 6)).astype('f4') y = np.arange(24).reshape((4, 6)).astype('i8') z = np.arange(24).reshape((4, 6)).astype('i2') a = da.from_array(x, chunks=(2, 3)) b = da.from_array(y, chunks=(2, 3)) c = da.from_array(z, chunks=(2, 3)) def eq(a, b): return (isinstance(a, np.dtype) and isinstance(b, np.dtype) and str(a) == str(b)) assert eq(a._dtype, x.dtype) assert eq(b._dtype, y.dtype) assert eq((a + 1)._dtype, (x + 1).dtype) assert eq((a + b)._dtype, (x + y).dtype) assert eq(a.T._dtype, x.T.dtype) assert eq(a[:3]._dtype, x[:3].dtype) assert eq((a.dot(b.T))._dtype, (x.dot(y.T)).dtype) assert eq(stack([a, b])._dtype, np.vstack([x, y]).dtype) assert eq(concatenate([a, b])._dtype, np.concatenate([x, y]).dtype) assert eq(b.std()._dtype, y.std().dtype) assert eq(c.sum()._dtype, z.sum().dtype) assert eq(a.min()._dtype, a.min().dtype) assert eq(b.std()._dtype, b.std().dtype) assert eq(a.argmin(axis=0)._dtype, a.argmin(axis=0).dtype) assert eq(da.sin(c)._dtype, np.sin(z).dtype) assert eq(da.exp(b)._dtype, np.exp(y).dtype) assert eq(da.floor(a)._dtype, np.floor(x).dtype) assert eq(da.isnan(b)._dtype, np.isnan(y).dtype) with ignoring(ImportError): assert da.isnull(b)._dtype == 'bool' assert da.notnull(b)._dtype == 'bool' x = np.array([('a', 1)], dtype=[('text', 'S1'), ('numbers', 'i4')]) d = da.from_array(x, chunks=(1, )) assert eq(d['text']._dtype, x['text'].dtype) assert eq(d[['numbers', 'text']]._dtype, x[['numbers', 'text']].dtype)
def rolling_functions_tests(p, d): # Old-fashioned rolling API eq(pd.rolling_count(p, 3), dd.rolling_count(d, 3)) eq(pd.rolling_sum(p, 3), dd.rolling_sum(d, 3)) eq(pd.rolling_mean(p, 3), dd.rolling_mean(d, 3)) eq(pd.rolling_median(p, 3), dd.rolling_median(d, 3)) eq(pd.rolling_min(p, 3), dd.rolling_min(d, 3)) eq(pd.rolling_max(p, 3), dd.rolling_max(d, 3)) eq(pd.rolling_std(p, 3), dd.rolling_std(d, 3)) eq(pd.rolling_var(p, 3), dd.rolling_var(d, 3)) eq(pd.rolling_skew(p, 3), dd.rolling_skew(d, 3)) eq(pd.rolling_kurt(p, 3), dd.rolling_kurt(d, 3)) eq(pd.rolling_quantile(p, 3, 0.5), dd.rolling_quantile(d, 3, 0.5)) eq(pd.rolling_apply(p, 3, mad), dd.rolling_apply(d, 3, mad)) with ignoring(ImportError): eq(pd.rolling_window(p, 3, 'boxcar'), dd.rolling_window(d, 3, 'boxcar')) # Test with edge-case window sizes eq(pd.rolling_sum(p, 0), dd.rolling_sum(d, 0)) eq(pd.rolling_sum(p, 1), dd.rolling_sum(d, 1)) # Test with kwargs eq(pd.rolling_sum(p, 3, min_periods=3), dd.rolling_sum(d, 3, min_periods=3))
def rolling_functions_tests(p, d): # Old-fashioned rolling API eq(pd.rolling_count(p, 3), dd.rolling_count(d, 3)) eq(pd.rolling_sum(p, 3), dd.rolling_sum(d, 3)) eq(pd.rolling_mean(p, 3), dd.rolling_mean(d, 3)) eq(pd.rolling_median(p, 3), dd.rolling_median(d, 3)) eq(pd.rolling_min(p, 3), dd.rolling_min(d, 3)) eq(pd.rolling_max(p, 3), dd.rolling_max(d, 3)) eq(pd.rolling_std(p, 3), dd.rolling_std(d, 3)) eq(pd.rolling_var(p, 3), dd.rolling_var(d, 3)) eq(pd.rolling_skew(p, 3), dd.rolling_skew(d, 3)) eq(pd.rolling_kurt(p, 3), dd.rolling_kurt(d, 3)) eq(pd.rolling_quantile(p, 3, 0.5), dd.rolling_quantile(d, 3, 0.5)) eq(pd.rolling_apply(p, 3, mad), dd.rolling_apply(d, 3, mad)) with ignoring(ImportError): eq(pd.rolling_window(p, 3, "boxcar"), dd.rolling_window(d, 3, "boxcar")) # Test with edge-case window sizes eq(pd.rolling_sum(p, 0), dd.rolling_sum(d, 0)) eq(pd.rolling_sum(p, 1), dd.rolling_sum(d, 1)) # Test with kwargs eq(pd.rolling_sum(p, 3, min_periods=3), dd.rolling_sum(d, 3, min_periods=3))
def basic_rolling_tests(p, d): # Works for series or df # New rolling API eq(p.rolling(3).count(), d.rolling(3).count()) eq(p.rolling(3).sum(), d.rolling(3).sum()) eq(p.rolling(3).mean(), d.rolling(3).mean()) eq(p.rolling(3).median(), d.rolling(3).median()) eq(p.rolling(3).min(), d.rolling(3).min()) eq(p.rolling(3).max(), d.rolling(3).max()) eq(p.rolling(3).std(), d.rolling(3).std()) eq(p.rolling(3).var(), d.rolling(3).var()) eq(p.rolling(3).skew(), d.rolling(3).skew()) eq(p.rolling(3).kurt(), d.rolling(3).kurt()) eq(p.rolling(3).quantile(0.5), d.rolling(3).quantile(0.5)) eq(p.rolling(3).apply(mad), d.rolling(3).apply(mad)) with ignoring(ImportError): eq(p.rolling(3, win_type="boxcar").sum(), d.rolling(3, win_type="boxcar").sum()) # Test with edge-case window sizes eq(p.rolling(0).sum(), d.rolling(0).sum()) eq(p.rolling(1).sum(), d.rolling(1).sum()) # Test with kwargs eq(p.rolling(3, min_periods=2).sum(), d.rolling(3, min_periods=2).sum())
def rolling_tests(p, d): eq(pd.rolling_count(p, 3), dd.rolling_count(d, 3)) eq(pd.rolling_sum(p, 3), dd.rolling_sum(d, 3)) eq(pd.rolling_mean(p, 3), dd.rolling_mean(d, 3)) eq(pd.rolling_median(p, 3), dd.rolling_median(d, 3)) eq(pd.rolling_min(p, 3), dd.rolling_min(d, 3)) eq(pd.rolling_max(p, 3), dd.rolling_max(d, 3)) eq(pd.rolling_std(p, 3), dd.rolling_std(d, 3)) eq(pd.rolling_var(p, 3), dd.rolling_var(d, 3)) eq(pd.rolling_skew(p, 3), dd.rolling_skew(d, 3)) eq(pd.rolling_kurt(p, 3), dd.rolling_kurt(d, 3)) eq(pd.rolling_quantile(p, 3, 0.5), dd.rolling_quantile(d, 3, 0.5)) mad = lambda x: np.fabs(x - x.mean()).mean() eq(pd.rolling_apply(p, 3, mad), dd.rolling_apply(d, 3, mad)) with ignoring(ImportError): eq(pd.rolling_window(p, 3, "boxcar"), dd.rolling_window(d, 3, "boxcar")) # Test with edge-case window sizes eq(pd.rolling_sum(p, 0), dd.rolling_sum(d, 0)) eq(pd.rolling_sum(p, 1), dd.rolling_sum(d, 1)) # Test with kwargs eq(pd.rolling_sum(p, 3, min_periods=3), dd.rolling_sum(d, 3, min_periods=3))
def test_reductions_1D(dtype): x = np.arange(5).astype(dtype) a = da.from_array(x, chunks=(2,)) reduction_1d_test(da.sum, a, np.sum, x) reduction_1d_test(da.prod, a, np.prod, x) reduction_1d_test(da.mean, a, np.mean, x) reduction_1d_test(da.var, a, np.var, x) reduction_1d_test(da.std, a, np.std, x) reduction_1d_test(da.min, a, np.min, x, False) reduction_1d_test(da.max, a, np.max, x, False) reduction_1d_test(da.any, a, np.any, x, False) reduction_1d_test(da.all, a, np.all, x, False) reduction_1d_test(da.nansum, a, np.nansum, x) with ignoring(AttributeError): reduction_1d_test(da.nanprod, a, np.nanprod, x) reduction_1d_test(da.nanmean, a, np.mean, x) reduction_1d_test(da.nanvar, a, np.var, x) reduction_1d_test(da.nanstd, a, np.std, x) reduction_1d_test(da.nanmin, a, np.nanmin, x, False) reduction_1d_test(da.nanmax, a, np.nanmax, x, False)
def test_reductions_1D(dtype): x = np.arange(5).astype(dtype) a = da.from_array(x, chunks=(2, )) reduction_1d_test(da.sum, a, np.sum, x) reduction_1d_test(da.prod, a, np.prod, x) reduction_1d_test(da.mean, a, np.mean, x) reduction_1d_test(da.var, a, np.var, x) reduction_1d_test(da.std, a, np.std, x) reduction_1d_test(da.min, a, np.min, x, False) reduction_1d_test(da.max, a, np.max, x, False) reduction_1d_test(da.any, a, np.any, x, False) reduction_1d_test(da.all, a, np.all, x, False) reduction_1d_test(da.nansum, a, np.nansum, x) with ignoring(AttributeError): reduction_1d_test(da.nanprod, a, np.nanprod, x) reduction_1d_test(da.nanmean, a, np.mean, x) reduction_1d_test(da.nanvar, a, np.var, x) reduction_1d_test(da.nanstd, a, np.std, x) reduction_1d_test(da.nanmin, a, np.nanmin, x, False) reduction_1d_test(da.nanmax, a, np.nanmax, x, False)
def basic_rolling_tests(p, d): # Works for series or df # New rolling API eq(p.rolling(3).count(), d.rolling(3).count()) eq(p.rolling(3).sum(), d.rolling(3).sum()) eq(p.rolling(3).mean(), d.rolling(3).mean()) eq(p.rolling(3).median(), d.rolling(3).median()) eq(p.rolling(3).min(), d.rolling(3).min()) eq(p.rolling(3).max(), d.rolling(3).max()) eq(p.rolling(3).std(), d.rolling(3).std()) eq(p.rolling(3).var(), d.rolling(3).var()) eq(p.rolling(3).skew(), d.rolling(3).skew()) eq(p.rolling(3).kurt(), d.rolling(3).kurt()) eq(p.rolling(3).quantile(0.5), d.rolling(3).quantile(0.5)) eq(p.rolling(3).apply(mad), d.rolling(3).apply(mad)) with ignoring(ImportError): eq(p.rolling(3, win_type='boxcar').sum(), d.rolling(3, win_type='boxcar').sum()) # Test with edge-case window sizes eq(p.rolling(0).sum(), d.rolling(0).sum()) eq(p.rolling(1).sum(), d.rolling(1).sum()) # Test with kwargs eq(p.rolling(3, min_periods=2).sum(), d.rolling(3, min_periods=2).sum())
def rolling_tests(p, d): eq(pd.rolling_count(p, 3), dd.rolling_count(d, 3)) eq(pd.rolling_sum(p, 3), dd.rolling_sum(d, 3)) eq(pd.rolling_mean(p, 3), dd.rolling_mean(d, 3)) eq(pd.rolling_median(p, 3), dd.rolling_median(d, 3)) eq(pd.rolling_min(p, 3), dd.rolling_min(d, 3)) eq(pd.rolling_max(p, 3), dd.rolling_max(d, 3)) eq(pd.rolling_std(p, 3), dd.rolling_std(d, 3)) eq(pd.rolling_var(p, 3), dd.rolling_var(d, 3)) eq(pd.rolling_skew(p, 3), dd.rolling_skew(d, 3)) eq(pd.rolling_kurt(p, 3), dd.rolling_kurt(d, 3)) eq(pd.rolling_quantile(p, 3, 0.5), dd.rolling_quantile(d, 3, 0.5)) mad = lambda x: np.fabs(x - x.mean()).mean() eq(pd.rolling_apply(p, 3, mad), dd.rolling_apply(d, 3, mad)) with ignoring(ImportError): eq(pd.rolling_window(p, 3, 'boxcar'), dd.rolling_window(d, 3, 'boxcar')) # Test with edge-case window sizes eq(pd.rolling_sum(p, 0), dd.rolling_sum(d, 0)) eq(pd.rolling_sum(p, 1), dd.rolling_sum(d, 1)) # Test with kwargs eq(pd.rolling_sum(p, 3, min_periods=3), dd.rolling_sum(d, 3, min_periods=3))
def import_or_none(path): with ignoring(): return pytest.importorskip(path) return None
def test_ignoring_deprecated(): with pytest.warns(FutureWarning, match="contextlib.suppress"): with ignoring(ValueError): pass
def import_or_none(path): with ignoring(BaseException): return pytest.importorskip(path) return None
def test_isnull_result_is_an_array(): # regression test for https://github.com/dask/dask/issues/3822 arr = da.from_array(np.arange(3, dtype=np.int64), chunks=-1) with ignoring(ImportError): result = da.isnull(arr[0]).compute() assert type(result) is np.ndarray
def _close_job(cls, job_id): if job_id: with ignoring(RuntimeError): # deleting job when job already gone cls._call(shlex.split(cls.cancel_command) + [job_id]) logger.debug("Closed job %s", job_id)
def main(scheduler, host, worker_port, listen_address, contact_address, nanny_port, nthreads, nprocs, nanny, name, pid_file, resources, dashboard, bokeh, bokeh_port, scheduler_file, dashboard_prefix, tls_ca_file, tls_cert, tls_key, dashboard_address, **kwargs): g0, g1, g2 = gc.get_threshold( ) # https://github.com/dask/distributed/issues/1653 gc.set_threshold(g0 * 3, g1 * 3, g2 * 3) enable_proctitle_on_current() enable_proctitle_on_children() if bokeh_port is not None: warnings.warn( "The --bokeh-port flag has been renamed to --dashboard-address. " "Consider adding ``--dashboard-address :%d`` " % bokeh_port) dashboard_address = bokeh_port if bokeh is not None: warnings.warn( "The --bokeh/--no-bokeh flag has been renamed to --dashboard/--no-dashboard. " ) dashboard = bokeh sec = Security( **{ k: v for k, v in [ ("tls_ca_file", tls_ca_file), ("tls_worker_cert", tls_cert), ("tls_worker_key", tls_key), ] if v is not None }) if nprocs > 1 and worker_port != 0: logger.error( "Failed to launch worker. You cannot use the --port argument when nprocs > 1." ) exit(1) if nprocs > 1 and not nanny: logger.error( "Failed to launch worker. You cannot use the --no-nanny argument when nprocs > 1." ) exit(1) if contact_address and not listen_address: logger.error( "Failed to launch worker. " "Must specify --listen-address when --contact-address is given") exit(1) if nprocs > 1 and listen_address: logger.error("Failed to launch worker. " "You cannot specify --listen-address when nprocs > 1.") exit(1) if (worker_port or host) and listen_address: logger.error( "Failed to launch worker. " "You cannot specify --listen-address when --worker-port or --host is given." ) exit(1) try: if listen_address: (host, worker_port) = get_address_host_port(listen_address, strict=True) if contact_address: # we only need this to verify it is getting parsed (_, _) = get_address_host_port(contact_address, strict=True) else: # if contact address is not present we use the listen_address for contact contact_address = listen_address except ValueError as e: logger.error("Failed to launch worker. " + str(e)) exit(1) if nanny: port = nanny_port else: port = worker_port if not nthreads: nthreads = CPU_COUNT // nprocs if pid_file: with open(pid_file, "w") as f: f.write(str(os.getpid())) def del_pid_file(): if os.path.exists(pid_file): os.remove(pid_file) atexit.register(del_pid_file) if resources: resources = resources.replace(",", " ").split() resources = dict(pair.split("=") for pair in resources) resources = valmap(float, resources) else: resources = None loop = IOLoop.current() if nanny: kwargs.update({ "worker_port": worker_port, "listen_address": listen_address }) t = Nanny else: if nanny_port: kwargs["service_ports"] = {"nanny": nanny_port} t = Worker if (not scheduler and not scheduler_file and dask.config.get("scheduler-address", None) is None): raise ValueError("Need to provide scheduler address like\n" "dask-worker SCHEDULER_ADDRESS:8786") with ignoring(TypeError, ValueError): name = int(name) nannies = [ t(scheduler, scheduler_file=scheduler_file, nthreads=nthreads, loop=loop, resources=resources, security=sec, contact_address=contact_address, host=host, port=port, dashboard_address=dashboard_address if dashboard else None, service_kwargs={"dashboard": { "prefix": dashboard_prefix }}, name=name if nprocs == 1 or name is None or name == "" else str(name) + "-" + str(i), **kwargs) for i in range(nprocs) ] @gen.coroutine def close_all(): # Unregister all workers from scheduler if nanny: yield [n.close(timeout=2) for n in nannies] def on_signal(signum): logger.info("Exiting on signal %d", signum) close_all() @gen.coroutine def run(): yield nannies yield [n.finished() for n in nannies] install_signal_handlers(loop, cleanup=on_signal) try: loop.run_sync(run) except TimeoutError: # We already log the exception in nanny / worker. Don't do it again. raise TimeoutError("Timed out starting worker.") from None except KeyboardInterrupt: pass finally: logger.info("End worker")
def test_isnull(): x = np.array([1, np.nan]) a = from_array(x, chunks=(2,)) with ignoring(ImportError): assert eq(isnull(a), np.isnan(x)) assert eq(notnull(a), ~np.isnan(x))
class RandomState(object): """ Mersenne Twister pseudo-random number generator This object contains state to deterministically generate pseudo-random numbers from a variety of probability distributions. It is identical to ``Random_State_class`` except that all functions also take a ``chunks=`` keyword argument. Examples -------- >>> import dask.array as da >>> state = da.random.RandomState(1234) # a seed >>> x = state.normal(10, 0.1, size=3, chunks=(2,)) >>> x.compute() array([ 10.01867852, 10.04812289, 9.89649746]) See Also: Random_State_class """ def __init__(self, seed=None): self._numpy_state = Random_State_class(seed) def seed(self, seed=None): self._numpy_state.seed(seed) def _wrap(self, func, *args, **kwargs): """ Wrap numpy random function to produce dask.array random function extra_chunks should be a chunks tuple to append to the end of chunks """ size = kwargs.pop('size', None) chunks = kwargs.pop('chunks') extra_chunks = kwargs.pop('extra_chunks', ()) if size is not None and not isinstance(size, (tuple, list)): size = (size, ) args_shapes = { ar.shape for ar in args if isinstance(ar, (Array, np.ndarray)) } args_shapes.union({ ar.shape for ar in kwargs.values() if isinstance(ar, (Array, np.ndarray)) }) shapes = list(args_shapes) if size is not None: shapes += [size] # broadcast to the final size(shape) size = broadcast_shapes(*shapes) chunks = normalize_chunks(chunks, size) slices = slices_from_chunks(chunks) def _broadcast_any(ar, shape, chunks): if isinstance(ar, Array): return broadcast_to(ar, shape).rechunk(chunks) if isinstance(ar, np.ndarray): return np.ascontiguousarray(np.broadcast_to(ar, shape)) # Broadcast all arguments, get tiny versions as well # Start adding the relevant bits to the graph dsk = {} dsks = [] lookup = {} small_args = [] for i, ar in enumerate(args): if isinstance(ar, (np.ndarray, Array)): res = _broadcast_any(ar, size, chunks) if isinstance(res, Array): dsks.append(res.dask) lookup[i] = res.name elif isinstance(res, np.ndarray): name = 'array-{}'.format(tokenize(res)) lookup[i] = name dsk[name] = res small_args.append(ar[tuple(0 for _ in ar.shape)]) else: small_args.append(ar) small_kwargs = {} for key, ar in kwargs.items(): if isinstance(ar, (np.ndarray, Array)): res = _broadcast_any(ar, size, chunks) if isinstance(res, Array): dsks.append(res.dask) lookup[key] = res.name elif isinstance(res, np.ndarray): name = 'array-{}'.format(tokenize(res)) lookup[key] = name dsk[name] = res small_kwargs[key] = ar[tuple(0 for _ in ar.shape)] else: small_kwargs[key] = ar # Get dtype small_kwargs['size'] = (0, ) dtype = func(xoroshiro128plus.RandomState(), *small_args, **small_kwargs).dtype sizes = list(product(*chunks)) state_data = random_state_data(len(sizes), self._numpy_state) token = tokenize(state_data, size, chunks, args, kwargs) name = 'da.random.{0}-{1}'.format(func.__name__, token) keys = product([name], *([range(len(bd)) for bd in chunks] + [[0]] * len(extra_chunks))) blocks = product(*[range(len(bd)) for bd in chunks]) vals = [] for state, size, slc, block in zip(state_data, sizes, slices, blocks): arg = [] for i, ar in enumerate(args): if i not in lookup: arg.append(ar) else: if isinstance(ar, Array): arg.append((lookup[i], ) + block) else: # np.ndarray arg.append((getitem, lookup[i], slc)) kwrg = {} for k, ar in kwargs.items(): if k not in lookup: kwrg[k] = ar else: if isinstance(ar, Array): kwrg[k] = (lookup[k], ) + block else: # np.ndarray kwrg[k] = (getitem, lookup[k], slc) vals.append((_apply_random, func.__name__, state, size, arg, kwrg)) dsk.update(dict(zip(keys, vals))) dsk = sharedict.merge((name, dsk), *dsks) return Array(dsk, name, chunks + extra_chunks, dtype=dtype) @doc_wraps(Random_State_class.beta) def beta(self, a, b, size=None, chunks=None): return self._wrap(Random_State_class.beta, a, b, size=size, chunks=chunks) @doc_wraps(Random_State_class.binomial) def binomial(self, n, p, size=None, chunks=None): return self._wrap(Random_State_class.binomial, n, p, size=size, chunks=chunks) @doc_wraps(Random_State_class.chisquare) def chisquare(self, df, size=None, chunks=None): return self._wrap(Random_State_class.chisquare, df, size=size, chunks=chunks) with ignoring(AttributeError): @doc_wraps(Random_State_class.choice) def choice(self, a, size=None, replace=True, p=None, chunks=None): dsks = [] # Normalize and validate `a` if isinstance(a, Integral): # On windows the output dtype differs if p is provided or # absent, see https://github.com/numpy/numpy/issues/9867 dummy_p = np.array([1]) if p is not None else p dtype = np.random.choice(1, size=(), p=dummy_p).dtype len_a = a if a < 0: raise ValueError("a must be greater than 0") else: a = asarray(a).rechunk(a.shape) dtype = a.dtype if a.ndim != 1: raise ValueError("a must be one dimensional") len_a = len(a) dsks.append(a.dask) a = a.__dask_keys__()[0] # Normalize and validate `p` if p is not None: if not isinstance(p, Array): # If p is not a dask array, first check the sum is close # to 1 before converting. p = np.asarray(p) if not np.isclose(p.sum(), 1, rtol=1e-7, atol=0): raise ValueError("probabilities do not sum to 1") p = asarray(p) else: p = p.rechunk(p.shape) if p.ndim != 1: raise ValueError("p must be one dimensional") if len(p) != len_a: raise ValueError("a and p must have the same size") dsks.append(p.dask) p = p.__dask_keys__()[0] if size is None: size = () elif not isinstance(size, (tuple, list)): size = (size, ) chunks = normalize_chunks(chunks, size) sizes = list(product(*chunks)) state_data = random_state_data(len(sizes), self._numpy_state) name = 'da.random.choice-%s' % tokenize(state_data, size, chunks, a, replace, p) keys = product([name], *(range(len(bd)) for bd in chunks)) dsk = { k: (_choice, state, a, size, replace, p) for k, state, size in zip(keys, state_data, sizes) } return Array(sharedict.merge((name, dsk), *dsks), name, chunks, dtype=dtype) # @doc_wraps(Random_State_class.dirichlet) # def dirichlet(self, alpha, size=None, chunks=None): @doc_wraps(Random_State_class.exponential) def exponential(self, scale=1.0, size=None, chunks=None): return self._wrap(Random_State_class.exponential, scale, size=size, chunks=chunks) @doc_wraps(Random_State_class.f) def f(self, dfnum, dfden, size=None, chunks=None): return self._wrap(Random_State_class.f, dfnum, dfden, size=size, chunks=chunks) @doc_wraps(Random_State_class.gamma) def gamma(self, shape, scale=1.0, size=None, chunks=None): return self._wrap(Random_State_class.gamma, shape, scale, size=size, chunks=chunks) @doc_wraps(Random_State_class.geometric) def geometric(self, p, size=None, chunks=None): return self._wrap(Random_State_class.geometric, p, size=size, chunks=chunks) @doc_wraps(Random_State_class.gumbel) def gumbel(self, loc=0.0, scale=1.0, size=None, chunks=None): return self._wrap(Random_State_class.gumbel, loc, scale, size=size, chunks=chunks) @doc_wraps(Random_State_class.hypergeometric) def hypergeometric(self, ngood, nbad, nsample, size=None, chunks=None): return self._wrap(Random_State_class.hypergeometric, ngood, nbad, nsample, size=size, chunks=chunks) @doc_wraps(Random_State_class.laplace) def laplace(self, loc=0.0, scale=1.0, size=None, chunks=None): return self._wrap(Random_State_class.laplace, loc, scale, size=size, chunks=chunks) @doc_wraps(Random_State_class.logistic) def logistic(self, loc=0.0, scale=1.0, size=None, chunks=None): return self._wrap(Random_State_class.logistic, loc, scale, size=size, chunks=chunks) @doc_wraps(Random_State_class.lognormal) def lognormal(self, mean=0.0, sigma=1.0, size=None, chunks=None): return self._wrap(Random_State_class.lognormal, mean, sigma, size=size, chunks=chunks) @doc_wraps(Random_State_class.logseries) def logseries(self, p, size=None, chunks=None): return self._wrap(Random_State_class.logseries, p, size=size, chunks=chunks) @doc_wraps(Random_State_class.multinomial) def multinomial(self, n, pvals, size=None, chunks=None): return self._wrap(Random_State_class.multinomial, n, pvals, size=size, chunks=chunks, extra_chunks=((len(pvals), ), )) @doc_wraps(Random_State_class.negative_binomial) def negative_binomial(self, n, p, size=None, chunks=None): return self._wrap(Random_State_class.negative_binomial, n, p, size=size, chunks=chunks) @doc_wraps(Random_State_class.noncentral_chisquare) def noncentral_chisquare(self, df, nonc, size=None, chunks=None): return self._wrap(Random_State_class.noncentral_chisquare, df, nonc, size=size, chunks=chunks) @doc_wraps(Random_State_class.noncentral_f) def noncentral_f(self, dfnum, dfden, nonc, size=None, chunks=None): return self._wrap(Random_State_class.noncentral_f, dfnum, dfden, nonc, size=size, chunks=chunks) @doc_wraps(Random_State_class.normal) def normal(self, loc=0.0, scale=1.0, size=None, chunks=None): return self._wrap(Random_State_class.normal, loc, scale, size=size, chunks=chunks, method='zig') @doc_wraps(Random_State_class.pareto) def pareto(self, a, size=None, chunks=None): return self._wrap(Random_State_class.pareto, a, size=size, chunks=chunks) @doc_wraps(Random_State_class.poisson) def poisson(self, lam=1.0, size=None, chunks=None): return self._wrap(Random_State_class.poisson, lam, size=size, chunks=chunks) @doc_wraps(Random_State_class.power) def power(self, a, size=None, chunks=None): return self._wrap(Random_State_class.power, a, size=size, chunks=chunks) @doc_wraps(Random_State_class.randint) def randint(self, low, high=None, size=None, chunks=None): return self._wrap(Random_State_class.randint, low, high, size=size, chunks=chunks) @doc_wraps(Random_State_class.random_integers) def random_integers(self, low, high=None, size=None, chunks=None): return self._wrap(Random_State_class.random_integers, low, high, size=size, chunks=chunks) @doc_wraps(Random_State_class.random_sample) def random_sample(self, size=None, chunks=None): return self._wrap(Random_State_class.random_sample, size=size, chunks=chunks) random = random_sample @doc_wraps(Random_State_class.rayleigh) def rayleigh(self, scale=1.0, size=None, chunks=None): return self._wrap(Random_State_class.rayleigh, scale, size=size, chunks=chunks) @doc_wraps(Random_State_class.standard_cauchy) def standard_cauchy(self, size=None, chunks=None): return self._wrap(Random_State_class.standard_cauchy, size=size, chunks=chunks) @doc_wraps(Random_State_class.standard_exponential) def standard_exponential(self, size=None, chunks=None): return self._wrap(Random_State_class.standard_exponential, size=size, chunks=chunks) @doc_wraps(Random_State_class.standard_gamma) def standard_gamma(self, shape, size=None, chunks=None): return self._wrap(Random_State_class.standard_gamma, shape, size=size, chunks=chunks) @doc_wraps(Random_State_class.standard_normal) def standard_normal(self, size=None, chunks=None): return self._wrap(Random_State_class.standard_normal, size=size, chunks=chunks) @doc_wraps(Random_State_class.standard_t) def standard_t(self, df, size=None, chunks=None): return self._wrap(Random_State_class.standard_t, df, size=size, chunks=chunks) @doc_wraps(Random_State_class.tomaxint) def tomaxint(self, size=None, chunks=None): return self._wrap(Random_State_class.tomaxint, size=size, chunks=chunks) @doc_wraps(Random_State_class.triangular) def triangular(self, left, mode, right, size=None, chunks=None): return self._wrap(Random_State_class.triangular, left, mode, right, size=size, chunks=chunks) @doc_wraps(Random_State_class.uniform) def uniform(self, low=0.0, high=1.0, size=None, chunks=None): return self._wrap(Random_State_class.uniform, low, high, size=size, chunks=chunks) @doc_wraps(Random_State_class.vonmises) def vonmises(self, mu, kappa, size=None, chunks=None): return self._wrap(Random_State_class.vonmises, mu, kappa, size=size, chunks=chunks) @doc_wraps(Random_State_class.wald) def wald(self, mean, scale, size=None, chunks=None): return self._wrap(Random_State_class.wald, mean, scale, size=size, chunks=chunks) @doc_wraps(Random_State_class.weibull) def weibull(self, a, size=None, chunks=None): return self._wrap(Random_State_class.weibull, a, size=size, chunks=chunks) @doc_wraps(Random_State_class.zipf) def zipf(self, a, size=None, chunks=None): return self._wrap(Random_State_class.zipf, a, size=size, chunks=chunks)
def test_isnull(): x = np.array([1, np.nan]) a = da.from_array(x, chunks=(2, )) with ignoring(ImportError): assert_eq(da.isnull(a), np.isnan(x)) assert_eq(da.notnull(a), ~np.isnan(x))
def test_query(): df = pd.DataFrame({'x': [1, 2, 3, 4], 'y': [5, 6, 7, 8]}) a = dd.from_pandas(df, npartitions=2) q = a.query('x**2 > y') with ignoring(ImportError): assert eq(q, df.query('x**2 > y'))
def test_query(): df = pd.DataFrame({"x": [1, 2, 3, 4], "y": [5, 6, 7, 8]}) a = dd.from_pandas(df, npartitions=2) q = a.query("x**2 > y") with ignoring(ImportError): assert eq(q, df.query("x**2 > y"))