def cgls_project(A, x, y, tol=1e-8, **options): r""" Project (x, y) onto graph G = {(y, x) | y = Ax} via CGLS In particular, form outputs as: :math:`x_{out} = x + argmin_x 1/2 \|Ax' - (y - Ax)\| + 1/2 \|x'\|_2^2` :math:`y_{out} = Ax_{out}` """ fmt = 'array {} compatible' assert A.shape[0] == y.shape[0] and A.shape[1] == x.shape[0], fmt.format( 'dims') assert A.chunks[0] == y.chunks[0] and A.chunks[1] == x.chunks[ 0], fmt.format('chunks') token = options.pop( 'name', 'cgls-project-' + dask.base.tokenize(A, x, y, tol, **options)) nm_b, nm_x, nm_y = map(lambda nm: nm + '-' + token, ('b', 'x', 'y')) # b = y - Ax # x_cg = argmin \|Ax' - (b)\| + \|x'\|_2^2 b = atoms2.gemv(-1, A, x, 1, y, name=nm_b) x_cg, res, iters = cg.cgls(A, b, 1, tol=tol) x_out = da.add(x, x_cg, name=nm_x) y_out = atoms2.dot(A, x_out, name=nm_y) return x_out, y_out, res, iters
def _tree_sum_list(list_to_sum): import dask.array as da while len(list_to_sum) > 1: new_list_to_sum = [] for i in range(0, len(list_to_sum), 2): if i < len(list_to_sum) - 1: lazy = da.add(list_to_sum[i],list_to_sum[i+1]) else: lazy = list_to_sum[i] new_list_to_sum.append(lazy) list_to_sum = new_list_to_sum return list_to_sum
def test_dtype_kwarg(dt): arr1 = np.array([1, 2, 3]) arr2 = np.array([4, 5, 6]) darr1 = da.from_array(arr1) darr2 = da.from_array(arr2) expected = np.add(arr1, arr2, dtype=dt) result = np.add(darr1, darr2, dtype=dt) assert_eq(expected, result) result = da.add(darr1, darr2, dtype=dt) assert_eq(expected, result)
def test_ufunc_where_doesnt_mutate_out(): """Dask array's are immutable, ensure that the backing numpy array for `out` isn't actually mutated""" left = da.from_array(np.arange(4, dtype="i8"), chunks=2) right = da.from_array(np.arange(4, 8, dtype="i8"), chunks=2) where = da.from_array(np.array([1, 0, 0, 1], dtype="bool"), chunks=2) out_np = np.zeros(4, dtype="i8") out = da.from_array(out_np, chunks=2) result = da.add(left, right, where=where, out=out) assert out is result assert_eq(out, np.array([4, 0, 0, 10], dtype="i8")) # Check that original `out` array isn't mutated assert np.equal(out_np, 0).all()
def _remove_bad_pixels(dask_array, bad_pixel_array): """Replace values in bad pixels with mean of neighbors. Parameters ---------- dask_array : Dask array Must be at least two dimensions bad_pixel_array : array-like Must either have the same shape as dask_array, or the same shape as the two last dimensions of dask_array. Returns ------- data_output : Dask array Examples -------- >>> import pyxem.utils.dask_tools as dt >>> s = pxm.dummy_data.dummy_data.get_dead_pixel_signal(lazy=True) >>> dead_pixels = dt._find_dead_pixels(s.data) >>> data_output = dt._remove_bad_pixels(s.data, dead_pixels) """ if len(dask_array.shape) < 2: raise ValueError("dask_array {0} must be at least 2 dimensions".format( dask_array.shape)) if bad_pixel_array.shape == dask_array.shape: pass elif bad_pixel_array.shape == dask_array.shape[-2:]: temp_array = da.zeros_like(dask_array) bad_pixel_array = da.add(temp_array, bad_pixel_array) else: raise ValueError( "bad_pixel_array {0} must either 2-D and have the same shape " "as the two last dimensions in dask_array {1}. Or be " "the same shape as dask_array {2}".format(bad_pixel_array.shape, dask_array.shape[-2:], dask_array.shape)) dif0 = da.roll(dask_array, shift=1, axis=-2) dif1 = da.roll(dask_array, shift=-1, axis=-2) dif2 = da.roll(dask_array, shift=1, axis=-1) dif3 = da.roll(dask_array, shift=-1, axis=-1) dif = (dif0 + dif1 + dif2 + dif3) / 4 dif = dif * bad_pixel_array data_output = da.multiply(dask_array, da.logical_not(bad_pixel_array)) data_output = data_output + dif return data_output
def test_ufunc_where_broadcasts(left_is_da, right_is_da, where_is_da): left = np.arange(4) right = np.arange(4, 8) where = np.array([[0, 1, 1, 0], [1, 0, 0, 1], [0, 1, 0, 1]]).astype("bool") out = np.zeros(where.shape, dtype=left.dtype) d_out = da.zeros(where.shape, dtype=left.dtype) d_where = da.from_array(where, chunks=2) if where_is_da else where d_left = da.from_array(left, chunks=2) if left_is_da else left d_right = da.from_array(right, chunks=2) if right_is_da else right expected = np.add(left, right, where=where, out=out) result = da.add(d_left, d_right, where=d_where, out=d_out) assert result is d_out assert_eq(expected, result)
def test_ufunc_where(dtype, left_is_da, right_is_da, where_kind): left = np.arange(12).reshape((3, 4)) right = np.arange(4) out = np.zeros_like(left, dtype=dtype) d_out = da.zeros_like(left, dtype=dtype) if where_kind in (True, False): d_where = where = where_kind else: d_where = where = np.array([False, True, True, False]) if where_kind == "dask": d_where = da.from_array(where, chunks=2) d_left = da.from_array(left, chunks=2) if left_is_da else left d_right = da.from_array(right, chunks=2) if right_is_da else right expected = np.add(left, right, where=where, out=out, dtype=dtype) result = da.add(d_left, d_right, where=d_where, out=d_out, dtype=dtype) assert result is d_out assert_eq(expected, result)
def test_ufunc_where_no_out(): left = np.arange(4) right = np.arange(4, 8) where = np.array([[0, 1, 1, 0], [1, 0, 0, 1], [0, 1, 0, 1]]).astype("bool") d_where = da.from_array(where, chunks=2) d_left = da.from_array(left, chunks=2) d_right = da.from_array(right, chunks=2) expected = np.add(left, right, where=where) result = da.add(d_left, d_right, where=d_where) # If no `out` is provided, numpy leaves elements that don't match `where` # uninitialized, so they effectively may be any random value. We test that # the set values match, and that the unset values aren't equal to if # `where` wasn't provided (to test that `where` was actually passed). expected_masked = np.where(where, expected, 0) result_masked = np.where(where, expected, 0) assert_eq(expected_masked, result_masked) expected_no_where = np.add(left, right) assert not np.equal(result.compute(), expected_no_where).all()
dask_arr_from_np = da.from_array(np_arr, chunks=5) print(dask_arr_from_np.compute()) print(dask_arr_from_np.chunks) dask_arr_from_np.sum().visualize(rankdir="LR") (dask_arr_from_np + 1).visualize(rankdir="LR") dask_arr_mean = da.mean(dask_arr_from_np) dask_arr_mean.compute() dask_arr_mean.visualize(rankdir="LR") x = da.random.random(10, chunks=2) y = da.random.random(10, chunks=2) sum_x_y = da.add(x, y) mean_x_y = da.mean(sum_x_y) sum_x_y.compute() sum_x_y.visualize() mean_x_y.visualize() da_arr_large = da.random.randint(10000, size=(50000, 50000), chunks=(5000, 1000)) da_sum_large = da_arr_large.sum() print(da_arr_large.nbytes / 1e+9) # print(da_sum_large.compute()) # Going Deeper
import numpy as np import dask.array as da arr1 = np.random.random(10) print(arr1) dask_arr1 = da.random.random(10, chunks=3) print(dask_arr1) print(dask_arr1.compute()) print(dask_arr1.chunks) dask_arr2 = da.from_array(arr1, chunks=2) print(dask_arr2.compute()) dask_arr3 = da.random.random(1000000, chunks=100) dask_arr4 = da.random.random(1000000, chunks=100) dask_arr5 = da.add(dask_arr3, dask_arr4) #print(dask_arr5.compute()) print(dask_arr5.chunks)