def test_apply_along_axis(func1d_name, func1d, shape, axis): a = np.random.randint(0, 10, shape) d = da.from_array(a, chunks=(len(shape) * (5, ))) if (func1d_name == "range2" and LooseVersion(np.__version__) < LooseVersion("1.13.0")): with pytest.raises(ValueError): da.apply_along_axis(func1d, axis, d) else: assert_eq(da.apply_along_axis(func1d, axis, d), np.apply_along_axis(func1d, axis, a))
def test_apply_along_axis(func1d_name, func1d, shape, axis): a = np.random.randint(0, 10, shape) d = da.from_array(a, chunks=(len(shape) * (5,))) if (func1d_name == "range2" and LooseVersion(np.__version__) < LooseVersion("1.13.0")): with pytest.raises(ValueError): da.apply_along_axis(func1d, axis, d) else: assert_eq( da.apply_along_axis(func1d, axis, d), np.apply_along_axis(func1d, axis, a) )
def local_times(coords, time_index, hour_range=(7, 17)): """Return a 2d array of local date times for a set of points between 8 AM and 5 PM. """ # Day One t1 = time_index[0][:16].decode("utf-8") d1 = dt.datetime.strptime(t1, "%Y-%m-%dT%H:%M") coords["day1"] = coords["zone"].apply(lambda x: d1 + dt.timedelta(hours=x)) # An array with every day in the hour range would be useful... # An array of first days # first_days = [pd.Timestamp(d).to_pydatetime() for d in coords["day1"]] first_days = coords["day1"].values # Every half increment in a day for one start date rt = range(len(time_index)) periods = [30 * i for i in rt] def single(d): return np.array([d + np.timedelta64(periods[i], "m") for i in rt]) # Longest way ndarray = [] for i in tqdm(range(len(first_days))): array = single(first_days[i]) ndarray.append(array) # With dask? ddays = da.from_array(first_days, chunks="auto") fddays = da.apply_along_axis(single, 0, ddays, dtype="<M8[ns]")
def least_squares(lhs, rhs, rcond=None, skipna=False): import dask.array as da lhs_da = da.from_array(lhs, chunks=(rhs.chunks[0], lhs.shape[1])) if skipna: added_dim = rhs.ndim == 1 if added_dim: rhs = rhs.reshape(rhs.shape[0], 1) results = da.apply_along_axis( nputils._nanpolyfit_1d, 0, rhs, lhs_da, dtype=float, shape=(lhs.shape[1] + 1, ), rcond=rcond, ) coeffs = results[:-1, ...] residuals = results[-1, ...] if added_dim: coeffs = coeffs.reshape(coeffs.shape[0]) residuals = residuals.reshape(residuals.shape[0]) else: coeffs, residuals, _, _ = da.linalg.lstsq(lhs_da, rhs) return coeffs, residuals
def test_apply_along_axis(func1d_name, func1d, shape, axis): a = np.random.randint(0, 10, shape) d = da.from_array(a, chunks=(len(shape) * (5,))) assert_eq( da.apply_along_axis(func1d, axis, d), np.apply_along_axis(func1d, axis, a) )
def detrend(cube, dimension='time', method='linear'): """ Detrend data along a given dimension. Parameters ---------- cube: iris.cube.Cube input cube. dimension: str Dimension to detrend method: str Method to detrend. Available: linear, constant. See documentation of 'scipy.signal.detrend' for details Returns ------- iris.cube.Cube Detrended cube """ coord = cube.coord(dimension) axis = cube.coord_dims(coord)[0] detrended = da.apply_along_axis( scipy.signal.detrend, axis=axis, arr=cube.lazy_data(), type=method, shape=(cube.shape[axis],) ) return cube.copy(detrended)
def calibrate_posterior_predictive(post_pred, qc): """ Function to calibrate posterior predictive. This allows the calibrated model to make predictions. This function is required to compute mean and log likelihood of the calibrated model. Args: post_pred: posterior predictive of shape (num samples, num X values) qc: calibration object as defined in class QuantileCalibration Returns: calibrated posterior predictive of shape (num samples, num X values) """ # Need to convert from jax array to dask array to avoid # out of memory error (on a 32GB machine for 8000 samples) in the next step. # This also helps to parallelize the task to all cpu cores. post_pred_shape = post_pred.shape res_main_post_pred = da.from_array( np.array(post_pred), chunks=( 1000, # reduce this value if out of memory! np.ceil(post_pred_shape[1] / dask.system.cpu_count()), ), ) # expand to 3D: axis 0: num observations; axis 1: num samples; axis 2: num samples uncalibrated_pp_quantiles = ( da.sum(res_main_post_pred.T[:, :, np.newaxis] <= res_main_post_pred.T[:, np.newaxis, :], axis=1).T / post_pred_shape[0]) # calculate inverse R inverse_calibrated_pp_quantiles = da.apply_along_axis( qc.inverse_transform, 0, uncalibrated_pp_quantiles) # inverse CDF by looking up existing samples with np.quantile() da_combined = da.vstack( [res_main_post_pred, inverse_calibrated_pp_quantiles.compute()]) calibrated_post_pred = da.apply_along_axis( lambda q: np.quantile( q[:post_pred_shape[0]], q[post_pred_shape[0]:], axis=0), 0, da_combined, ).compute() return calibrated_post_pred
def apply_distance_using_dask(t1,f1): # Compute the grid for any configuration. import numpy as np import dask.array as da # gps = da.stack(da.meshgrid(da.linspace(-0.5,0.5, t1.nx_total), da.linspace(-0.5,0.5, t1.ny_total),da.linspace(-0.5,0.5, t1.nz_total)), -1).reshape(-1, 3) gps = da.stack(da.meshgrid(t1.x_grid, t1.y_grid,t1.z_grid, indexing='ij'), -1).reshape(-1, 3) # * Only the unit cell. gps =gps.rechunk(10000,3) grid = da.apply_along_axis(func1d=grid_point_distance, frameda=da.from_array(t1.coord), Ada=da.from_array(t1.A),sig = da.from_array(f1.sigma), sigda=da.from_array(f1.sigma_array), epsda=da.from_array(f1.epsilon_array), axis=1, arr=gps) return grid
def detrend(cube, dimension='time', method='linear'): coord = cube.coord(dimension) axis = cube.coord_dims(coord)[0] detrended = da.apply_along_axis(scipy.signal.detrend, axis=axis, arr=cube.lazy_data(), type=method, shape=(cube.shape[axis], )) return cube.copy(detrended)
def _qg_dask_array(x, axis, inplace): import dask.array as da from scipy.stats import norm from numpy_sugar import nanrankdata if inplace: raise NotImplementedError() x = x.swapaxes(1, axis) x = dask.array_shape_reveal(x) shape = da.compute(*x.shape) x = da.ma.masked_array(x) x *= -1 x = da.apply_along_axis(_dask_apply, 0, x, nanrankdata, shape[0]) x = x / (da.isfinite(x).sum(axis=0) + 1) x = da.apply_along_axis(_dask_apply, 0, x, norm.isf, shape[0]) return x.swapaxes(1, axis)
def lazy_func(data, axis, x_data): """Calculate trend standard error lazily.""" trend_std_arr = da.apply_along_axis(_get_slope_stderr, axis, data, x_data, dtype=data.dtype, shape=()) trend_std_arr = da.ma.masked_invalid(trend_std_arr) return trend_std_arr
def code_range(path_dict): """ path_dict = RANGE_CATEGORIES[5] """ # Create a numeric dictionary for these keys key_dict = {key: i + 1 for i, key in enumerate(path_dict.keys())} number_dict = {k: i for i, k in key_dict.items()} vals = key_dict.values() combos = [[c for c in combinations(vals, i + 1)] for i in range(len(vals))] combos = [c for sc in combos for c in sc] combo_keys = {} for combo in combos: key = "-".join([number_dict[c] for c in combo]) value = seq_int(combo) combo_keys[key] = value # Assign each raster a unique value arrays = [] for key, path in path_dict.items(): value = key_dict[key] full_path = DP.join(path) array = xr.open_rasterio(full_path, chunks=CHUNKS)[0].data array[da.isnan(array)] = 0 array[array > 0] = value arrays.append(array) # Stack everything together - we might have to save this a temporary file stack = da.stack(arrays, axis=0) stack = stack.rechunk((stack.shape[0], 5000, 5000)) stack = stack[:, 4000:10000, 4000:10000] # Try to map the function to each point client = Client() codes = da.apply_along_axis(seq_int, 0, stack, dtype="uint8") future = client.compute(codes) result = future.result() client.shutdown() client.close() # Save to temp and delete template = rasterio.open(full_path) temp_path = DP2.join("test.tif") with rasterio.Env(): profile = template.profile profile.update(dtype=rasterio.uint8, count=1, compress='lzw') with rasterio.open(temp_path, 'w', **profile) as dst: dst.write(result)
def xarray_MK_trend(self): """ Computes linear trend over 'dim' of xr.dataarray. Slope and intercept of the least square fit are added to a array which contains the slope, significance mask and p-test. """ da = self.DataArray.copy().transpose(*self.ordered_dims) axis_num = da.get_axis_num(self.dim) data = dsa.apply_along_axis(self._calc_slope_MK, axis_num, da.data, dtype=np.float64, shape=(4, )) return data
def test_apply_along_axis(func1d_name, func1d, specify_output_props, input_shape, axis): a = np.random.randint(0, 10, input_shape) d = da.from_array(a, chunks=(len(input_shape) * (5,))) output_shape = None output_dtype = None if specify_output_props: slices = [0] * a.ndim slices[axis] = slice(None) slices = tuple(slices) sample = np.array(func1d(a[slices])) output_shape = sample.shape output_dtype = sample.dtype assert_eq( da.apply_along_axis(func1d, axis, d, dtype=output_dtype, shape=output_shape), np.apply_along_axis(func1d, axis, a), )
def to_xarray(zarr_data: xr.Dataset, channel_index: int, bottom_depth: xr.DataArray, attributes: dict) -> xr.Dataset: heave_corrected_transducer_depth = zarr_data['heave'] + zarr_data[ 'transducer_draft'][channel_index] bottom_range = bottom_depth - heave_corrected_transducer_depth # Get indices of the bottom_range bottom_range_idx = da.searchsorted(da.from_array(zarr_data.range), bottom_range.data) # Append indices to the last range bottom_range_1 = zarr_data.sv.isel(frequency=0).data bottom_range_1[:, -1] = bottom_range_idx # Convert annotation to 2d array bottom_range_2 = da.apply_along_axis(_bottom_2d, 1, bottom_range_1, dtype='float32', shape=(bottom_range_1.shape[1], )) # Create dataset ds = xr.Dataset( data_vars=dict(bottom_range=(['ping_time', 'range'], bottom_range_2), ), coords=dict( frequency=zarr_data['frequency'][0], range=zarr_data['range'], ping_time=zarr_data['ping_time'], ), ) # Remove unused dims remove_list = list( filter(lambda s: s not in ['frequency', 'ping_time', 'range'], list(ds.coords))) ds = ds.drop_vars(remove_list) for key in attributes.keys(): ds.attrs[key] = attributes[key] return ds
def _mean_standardize_dask_array(x, axis, inplace): import dask.array as da from numpy_sugar import epsilon from numpy import nanmean, clip, nanstd, inf if inplace: raise NotImplementedError() x = x.swapaxes(1, axis) x = dask.array_shape_reveal(x) shape = da.compute(*x.shape) def func(a): a -= nanmean(a, axis=0) a /= clip(nanstd(a, axis=0), epsilon.tiny, inf) return a with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=RuntimeWarning) x = da.apply_along_axis(_dask_apply, 0, x, func, shape[0]) return x.swapaxes(1, axis)
pass logger.info('Initiating temporary file - ' + tmp_f) f = h5py.File(tmp_f) betavec = f.create_dataset('betavec',data=np.zeros([bim.shape[0],1]),chunks=(bim.shape[0]/nchunk, 1)) zvec = f.create_dataset('zvec',data=np.zeros([bim.shape[0],1]),chunks=(bim.shape[0]/nchunk, 1)) meanvec = f.create_dataset('meanvec',data=np.zeros([bim.shape[0],1]),chunks=(bim.shape[0]/nchunk, 1)) hetvec = f.create_dataset('hetvec',data=np.zeros([bim.shape[0],1]),chunks=(bim.shape[0]/nchunk, 1)) #####################################Calculation by Chunk ############## # # Begin chunk processing for chunk1 in chunk_array: print('Processing Chunk - ' + str(chunk_ind) + ' of ' + str(nchunk)) # Compute geno_chunk = geno_ia[chunk1,:] g = da.apply_along_axis(SimpleImpute, 1, geno_chunk) with ProgressBar(): results = da.compute(g, get=get) gtmp = np.stack(results[0][:,0]) if args.apr_flag == 'N': vtmp = da.diag(da.matmul(gtmp, da.matmul(gtmp, C).transpose())).compute() else: vtmp = np.diagonal(np.matmul(gtmp, gtmp.transpose())) # assuming difference is identity beta_tmp = np.matmul(gtmp, res_surv) betavec[chunk1, 0] = beta_tmp zvec[chunk1, 0] = beta_tmp/np.sqrt(vtmp) meanvec[chunk1, 0] = np.stack(results[0][:,1]) hetvec[chunk1, 0] = np.stack(results[0][:,2]) chunk_ind += 1 ##########################################################################
""" Function: Author: Du Fei Create Time: 2020/5/31 11:08 """ import numpy as np import dask.array as da from dask.dataframe.utils import make_meta if __name__ == '__main__': source_array = np.random.randint(0, 10, (2, 4)) index_array = np.asarray([[0, 0], [1, 0], [2, 1], [3, 2]]) b = np.apply_along_axis(lambda a: a[index_array], 1, source_array) print(b) source_array = da.from_array(source_array) # b = da.apply_along_axis(lambda a: a[index_array], 1, source_array) res = da.apply_along_axis(lambda a: a[index_array], 1, source_array, shape=make_meta(source_array).shape, dtype=make_meta(source_array).dtype).compute() print(res)
print(res) # start = time.time() # res = d_da.map_blocks( # lambda df: ruzicka_mat(cp.array(df), vector_new), dtype=cp.float32 # ).compute() # print("dask_da_cupy:", time.time() - start) # print(res) if cupy_raw: # 3 CUPY start = time.time() y = cp.zeros(len(d_da), dtype=cp.float32) ruzicka_kernel( ((len(d_da) + 31) // 32, (len(d_da) + 31) // 32), (32, 32), (cp.array(d_da), vector_new, y, 1024, len(d_da)), ) print("cupy_raw:", time.time() - start) # print(y, len(y)) if dask_apply: # 4 DASK ARRAY WITH apply_along_axis start = time.time() res = da.apply_along_axis(lambda df: ruzicka_vec(df, vector_new), 1, d_da).compute() print("dask_apply:", time.time() - start) # print(res, y) client.close()