def multidim_lazy_stack(stack): """ Recursively build a multidimensional stacked dask array. This is needed because dask.array.stack only accepts a 1-dimensional list. Args: * stack: An ndarray of dask arrays. Returns: The input array converted to a lazy dask array. """ if stack.ndim == 0: # A 0-d array cannot be stacked. result = stack.item() elif stack.ndim == 1: # Another base case : simple 1-d goes direct in dask. result = da.stack(list(stack)) else: # Recurse because dask.stack does not do multi-dimensional. result = da.stack([multidim_lazy_stack(subarray) for subarray in stack]) return result
def haversines(x1, x2, y1, y2, z1=None, z2=None): x1, x2 = da.deg2rad(x1), da.deg2rad(x2) y1, y2 = da.deg2rad(y1), da.deg2rad(y2) x = (x2 - x1) * da.cos((y1 + y2) * 0.5) * cst.r_earth y = (y2 - y1) * cst.r_earth * da.ones_like(x1) * da.ones_like(x2) if z1 is None or z2 is None: return da.stack((x, y), axis=-1) else: z1 = da.where(da.isnan(z1), 0, z1) z2 = da.where(da.isnan(z2), 0, z2) z = (z2 - z1) * da.ones_like(x) return da.stack((x, y, z), axis=-1)
def __call__(self, times=None): """Run the filtering process on this experiment.""" # run over the full range of valid time indices unless specified otherwise tgrid = self.fieldset.gridset.grids[0].time if times is None: times = tgrid.copy() if self.uneven_window: raise NotImplementedError("uneven windows aren't supported") # restrict to period covered by window times = np.array(times) window_left = times - tgrid[0] >= self.window_size window_right = times <= tgrid[-1] - self.window_size times = times[window_left & window_right] da_out = {v: [] for v in self.sample_variables} # do the filtering at each timestep for idx, time in enumerate(times): # returns a dictionary of sample_variable -> dask array filtered = self.filter_step(idx, time) for v, a in filtered.items(): da_out[v].append(a) # dump all to disk da.to_hdf5(self.name + ".h5", {v: da.stack(a) for v, a in da_out.items()})
def cutOffAtDEM(self, assign=True): topo = self.gridObj.topo + self.gridObj.mdata["datum"] #topo = da.flipud(topo) removalIndex = [] #verify that there is presently a DEM if (not (self.gridObj.gridTopography)): print("no dem present in grid file, please assign one to continue") return #do it one complete layer at a time looping downward for i in range(self.gridObj.z.shape[2] - 1, -1, -1): #the greater than is because the indexing of the rFile is flipped from whay you would expect #the transposition is to deal with the fact that the 2d mesh grid is flipped reletive to the 3d one removalIndex.append(self.gridObj.z[:, :, i] >= (topo)) #join into a single dask array removalIndex = da.stack(removalIndex, axis=-1) removalIndex = removalIndex.flatten() #drop values self.gridObj.vp[removalIndex] = -999 self.gridObj.vs[removalIndex] = -999 self.gridObj.p[removalIndex] = -999 self.gridObj.qp[removalIndex] = -999 self.gridObj.qs[removalIndex] = -999 if (assign): print("assiging dem cut off to gridfile") self.gridObj.assignNewGridProperties(self.gridObj.vp, self.gridObj.vs, self.gridObj.p, self.gridObj.qp, self.gridObj.qs)
def hdulists_to_dask_cube(all_hduls, plane_shape, ext=0, dtype=float): cube = da.stack([ da.from_delayed(hdul[ext].data, shape=plane_shape, dtype=dtype) for hdul in all_hduls ]) log.info(f"Dask Array of shape {cube.shape} created from HDULists") return cube
def cat_arrays(self): no_rain= self.dask_array(self.no_rain_path) normal= self.dask_array(self.normal_path) night= self.dask_array(self.night_path) heavy= self.dask_array(self.heavy_path) datasets= da.stack([no_rain, normal, heavy, night], axis=0) return datasets
def stats_for_tiff_images(filenames, use_test_data=False): def read_image(filename, band, use_test_data): filename = filename.format(band) if use_test_data: num = int(band) band_shape = (120, 120) return np.full(band_shape, num) band_ds = rasterio.open(filename) return np.array(band_ds.read(1)) def images_for_band(band): delayed_read = dask.delayed(read_image, pure=True) lazy_images = [ da.from_delayed(delayed_read(filename, band, use_test_data), dtype=np.uint16, shape=(120, 120)) for filename in filenames ] stack = da.stack(lazy_images, axis=0).rechunk('auto') return stack.flatten() all_bands = da.stack( [images_for_band("02"), images_for_band("03"), images_for_band("04")], axis=-1) stats = defaultdict(dict) for stat_name, stat_func in [('mean', da.mean), ('std', da.std), ('min', da.min), ('max', da.max)]: stats[stat_name] = stat_func(all_bands, axis=0).compute() return pd.DataFrame(stats, index=['red', 'blue', 'green'])
def as_stitched_array(self, channel_index=0, channel_name=None, t_index=0, verbose=True): if channel_name is not None: channel_index = self._channel_name_to_index(channel_name) z_list = [] for z in self.z_indices: # this doesn't work with explore acquisitions and would need to be updated rows, cols = self.get_num_rows_and_cols() empty_tile = np.zeros((self.image_height, self.image_width), self.pixel_type) row_list = [] for row in range(rows): if verbose: print('stitching row {} of {}'.format(row + 1, rows)) col_list = [] for col in range(cols): pos_index_array = np.nonzero(np.logical_and(self.row_col_array[:, 0] == row, self.row_col_array[:, 1] == col))[0] pos_index = None if pos_index_array.size == 0 else pos_index_array[0] if pos_index is not None and self.has_image( channel_index=channel_index, z_index=z, t_index=t_index, pos_index=pos_index): img = self.read_image(channel_index=channel_index, z_index=z, t_index=t_index, pos_index=pos_index, memmapped=True) else: img = empty_tile # crop to center of tile col_list.append(img[self.overlap[0] // 2: -self.overlap[0] // 2, self.overlap[1] // 2: -self.overlap[1] // 2]) stitched_col = da.concatenate(col_list, axis=1) row_list.append(stitched_col) stitched = da.concatenate(row_list, axis=0) z_list.append(stitched) return da.stack(z_list)
def get_pet(from_time, to_time, da_pet_mask, zarr_path): import dask.array as da from_time = str2datetime(from_time) to_time = str2datetime(to_time) ds_pet = xr.open_zarr(gcsfs.GCSMap('pangeo-data/cgiar_pet')) da_pet = ds_pet['PET'] pet = get_ws_p(1 / 120, da_pet_mask, da_pet, tolerance=0.000001).chunk({ 'label': 1 }).compute() date_range = pd.date_range(start=from_time + timedelta(minutes=15), end=to_time, freq='30min') arrays = [ da.from_delayed(dask.delayed(get_pet_for_label)(date_range, label, pet), dtype='float32', shape=(len(date_range), )) for label in pet.label.values ] stack = da.stack(arrays, axis=0) pet_over_time = xr.DataArray(stack, coords=[pet.label.values, date_range], dims=['label', 'time']) return pet_over_time
def get_layer_list(channels, nd2_func, path, frame_shape, frame_dtype, n_timepoints): channel_dict = dict(zip(channels, [[] for _ in range(len(channels))])) for i, channel in enumerate(channels): arr = da.stack([ da.from_delayed(delayed(nd2_func(path, i))(j), shape=frame_shape, dtype=frame_dtype) for j in range(n_timepoints) ]) channel_dict[channel] = dask.optimize(arr)[0] layer_list = [] for channel_name, channel in channel_dict.items(): visible = channel_name in VISIBLE blending = 'additive' if visible else 'translucent' channel_color = list(CHANNEL_COLORS[channel_name]) color = Colormap([[0, 0, 0], channel_color]) meta = get_metadata(path) add_kwargs = { "name": channel_name, "visible": visible, "colormap": color, "blending": blending, **meta } layer_type = "image" layer_list.append((channel, add_kwargs, layer_type)) return layer_list
def position_grid(shape, blocksize): """ """ coords = da.meshgrid(*[range(x) for x in shape], indexing='ij') coords = da.stack(coords, axis=-1).astype(np.int16) return da.rechunk(coords, chunks=tuple(blocksize) + (3, ))
def stretch_logarithmic(self, factor=100.): """Move data into range [1:factor] through normalized logarithm.""" logger.debug("Perform a logarithmic contrast stretch.") crange = (0., 1.0) b__ = float(crange[1] - crange[0]) / np.log(factor) c__ = float(crange[0]) def _band_log(arr): slope = (factor - 1.) / float(arr.max() - arr.min()) arr = 1. + (arr - arr.min()) * slope arr = c__ + b__ * da.log(arr) return arr band_results = [] for band in self.data['bands'].values: if band == 'A': continue band_data = self.data.sel(bands=band) res = _band_log(band_data.data) band_results.append(res) if 'A' in self.data.coords['bands'].values: band_results.append(self.data.sel(bands='A')) self.data.data = da.stack(band_results, axis=self.data.dims.index('bands'))
def classify(texts): batch_x_text = [clearstring(t) for t in texts] batch_x = str_idx(batch_x_text, dict_sentiment['dictionary'], 100) output_sentiment = sess_sentiment.run(logits_sentiment, feed_dict={x_sentiment: batch_x}) labels = [sentiment_label[l] for l in np.argmax(output_sentiment, 1)] return da.stack(labels, axis=0)
def da_linregress(x, y): """ Refactor of the scipy linregress with numba, less checks for speed sake and done with dask arrays :param x: array for independent :param y: :return: """ TINY = 1.0e-20 # x = np.asarray(x) # y = np.asarray(y) arr = da.stack([x, y], axis=1) n = len(x) # average sum of squares: ssxm, ssxym, ssyxm, ssym = (da.dot(arr.T, arr) / n).ravel() r_num = ssxym r_den = np.sqrt(ssxm * ssym) if r_den == 0.0: r = 0.0 else: r = r_num / r_den # test for numerical error propagation if r > 1.0: r = 1.0 elif r < -1.0: r = -1.0 df = n - 2 slope = r_num / ssxm r_t = r + TINY t = r * da.sqrt(df / ((1.0 - r_t) * (1.0 + r_t))) prob = 2 * stats.distributions.t.sf(np.abs(t), df) return slope, r**2, prob
def transform_lonlats(self, lons, lats): R = 6370997.0 x_coords = R * da.cos(da.deg2rad(lats)) * da.cos(da.deg2rad(lons)) y_coords = R * da.cos(da.deg2rad(lats)) * da.sin(da.deg2rad(lons)) z_coords = R * da.sin(da.deg2rad(lats)) return da.stack((x_coords, y_coords, z_coords), axis=-1)
def load_data(path, chunks): raw_bag = read_text(path) \ .str.strip() \ .map(row_to_numpy) return da.stack(raw_bag, axis=0)
def calculatePk(number): print("working on {0}".format(number)) sys.stdout.flush() cat = CSVCatalog(path=inpath.format(number), names=["x", "y", "z", "vx", "vy", "vz", "M"]) cat['position'] = da.stack([cat['x'], cat['y'], cat['z']]).T mesh = cat.to_mesh(window='cic', Nmesh=1024, compensated=True, BoxSize=1500, position='position') print("start making power") sys.stdout.flush() # compute the power, specifying desired linear k-binning r = FFTPower(mesh, mode='1d', dk=0.05, kmin=0.01) Pk = r.power k = Pk['k'] power = Pk['power'].real - Pk.attrs['shotnoise'] print("start writing power") sys.stdout.flush() f = open(outpath + "Pk_{0}.pk".format(number), "w") f.write("k(h/Mpc) Pk(h/Mpc)^3\n") for i in range(len(Pk['k'])): f.write("{0} {1}\n".format(k[i], power[i])) f.close()
def set_norm_factors(self, data_group, fold_group, overwrite=False): # Get Zarr arrays train_indexes = fold_group['train'][:] X = da.from_zarr(data_group['X']) norm_shape = X.shape[1:] # Create normalization data Zarr arrays norm_group = fold_group.require_group('norm_data') norm_group.require_dataset('s1', shape=norm_shape, dtype='float32', chunks=None) norm_group.require_dataset('s2', shape=norm_shape, dtype='float32', chunks=None) norm_group.require_dataset('mean', shape=norm_shape, dtype='float32', chunks=None) norm_group.require_dataset('std', shape=norm_shape, dtype='float32', chunks=None) # Stop processing if already done AND we don't want to overwrite the dataset if (norm_group['s1'].nchunks == norm_group['s1'].nchunks_initialized) and not overwrite: return # Compute normalization factors fold_num = pathlib.PurePath(fold_group.name).name[-1] print(f'Computing the normalization factors for the cross-validation fold #{fold_num}.\nThis may take some time...') # Compute sum and squared sum s1 = X[train_indexes,].sum(axis=0) s2 = (X[train_indexes,] ** 2).sum(axis=0) S = da.stack([s1, s2], axis=0).compute() s1 = S[0,] s2 = S[1,] n = train_indexes.size # Fill Zarr arrays with the normalization factors norm_group['s1'][:] = s1 norm_group['s2'][:] = s2 norm_group['mean'][:] = s1 / n norm_group['std'][:] = np.sqrt((n * s2 - (s1 * s1)) / (n * (n - 1)))
def _get_schema(self): from dask.bytes import open_files import dask.array as da if self._arr is None: path = self._get_cache(self.path)[0] files = open_files(path, 'rb', compression=None, **self.storage) if self.shape is None: arr = NumpyAccess(files[0]) self.shape = arr.shape self.dtype = arr.dtype arrs = [arr] + [NumpyAccess(f, self.shape, self.dtype) for f in files[1:]] else: arrs = [NumpyAccess(f, self.shape, self.dtype) for f in files] self.chunks = (self._chunks, ) + (-1, ) * (len(self.shape) - 1) self._arrs = [da.from_array(arr, self.chunks) for arr in arrs] if len(self._arrs) > 1: self._arr = da.stack(self._arrs) else: self._arr = self._arrs[0] self.chunks = self._arr.chunks return Schema(dtype=str(self.dtype), shape=self.shape, extra_metadata=self.metadata, npartitions=self._arr.npartitions, chunks=self.chunks)
def test_clock_tec_solve_dask(): np.random.seed(1234) import pylab as plt times = np.arange(2) freqs = np.linspace(110e6, 170e6, 1000) cs = np.array([1, 1]) tec = np.array([0.1, 0.2]) delay = np.ones(len(times)) * 2e-9 # 10ns phase = np.multiply.outer(np.ones( len(freqs)), cs) + 8.44797256e-7 * TECU * np.multiply.outer( 1. / freqs, tec) + 2. * np.pi * np.multiply.outer(freqs, delay) phase += 15 * np.pi / 180. * np.random.normal( size=[len(freqs), len(times)]) #plt.imshow(phase,origin='lower',extent=(times[0],times[-1],freqs[0],freqs[-1]),aspect='auto') #plt.colorbar() #plt.xlabel('times (s)') #plt.ylabel('freqs (Hz)') #plt.show() m, cov = least_squares_solve(phase, freqs, times, 15, Ct_ratio=0.01) m_exact = np.array([delay, tec, cs]).T import dask.array as da solsMH = [ da.from_delayed(clock_tec_solve_dask(phase[:, i], freqs, m[i, :], cov[i, :, :], 15, 0.01), shape=(3, ), dtype=np.double) for i in range(len(times)) ] sol_stacked = da.stack(solsMH, axis=0) sol = sol_stacked.compute() print(sol)
def get_layer_list(channels, nd2_func, path, frame_shape, frame_dtype, n_timepoints): # channel_dict = dict(zip(channels, [[] for _ in range(len(channels))])) channel_dict = {} for i, channel in enumerate(channels): arr = da.stack([ da.from_delayed(delayed(nd2_func(path, i))(j), shape=frame_shape, dtype=frame_dtype) for j in range(n_timepoints) ]) channel_dict[color_maps[i % len(color_maps)]] = dask.optimize(arr)[0] layer_list = [] print("channel_dict", channel_dict) for channel_name, channel in channel_dict.items(): blending = 'additive' meta = get_metadata(path) add_kwargs = { "name": channel_name, "colormap": channel_name, "blending": blending, "rendering": "mip", **meta } layer_type = "image" layer_list.append((channel, add_kwargs, layer_type)) return layer_list
def get_layer_list(channels, nd2_func, path, frame_shape, frame_dtype, n_timepoints): channel_dict = dict(zip(channels, [[] for _ in range(len(channels))])) for i, channel in enumerate(channels): arr = da.stack([ da.from_delayed(delayed(nd2_func(path, i))(j), shape=frame_shape, dtype=frame_dtype) for j in range(n_timepoints) ]) channel_dict[channel] = dask.optimize(arr)[0] layer_list = [] for channel_name, channel in channel_dict.items(): visible = True blending = 'additive' if visible else 'translucent' meta = get_metadata(path) channel_color = meta['channels'][channel_name] color = Colormap([[0, 0, 0], channel_color[:-1]]) # ignore alpha add_kwargs = { "name": channel_name, "visible": visible, "colormap": color, "blending": blending, "scale": meta['scale'], "translate": meta['translate'], } layer_type = "image" layer_list.append((channel, add_kwargs, layer_type)) return layer_list
def _generate_volume(self, image_list, scale=1): """ """ image_info = self._volume_info['coordinates_list'] # get the shape of each plane in the x y axis shape = self._shape shape = np.round(np.array(shape) * scale).astype(int) shape = (shape[1], shape[2]) # find out if the image is RGB shaped if isinstance(image_list[0], Delayed): image = image_list[0].compute() else: image = image_list[0] # get the data type dtype = image.dtype # add the RGB dim if necessary if image.shape[-1] == 3: shape = (shape[0], shape[1], 3) del image # get a list of delayed arrays representing padded images arrays = [ da.from_delayed(self._padded_image(z, image_list, image_info, shape, scale), shape, dtype=dtype) for z in range(self._n) ] # get dask array representing image volume volume = da.stack(arrays, axis=0) return volume
def read_raster(path, block_size=1): """Read all bands from raster""" bands = range(1, get_band_count(path) + 1) return da.stack([ read_raster_band(path, band=band, block_size=block_size) for band in bands ])
def black_scholes(nopt, price, strike, t, rate, vol, schd=None): mr = -rate sig_sig_two = vol * vol * 2 P = price S = strike T = t a = log(P / S) b = T * mr z = T * sig_sig_two c = 0.25 * z y = da.map_blocks(invsqrt, z) w1 = (a - b + c) * y w2 = (a - b - c) * y d1 = 0.5 + 0.5 * da.map_blocks(erf, w1) d2 = 0.5 + 0.5 * da.map_blocks(erf, w2) Se = exp(b) * S call = P * d1 - Se * d2 put = call - P + Se return da.compute(da.stack((put, call)), get=schd)
def get_da_background(files, shape=ZTF_IMAGE_SHAPE, dtype="float32"): """ Get a dask.array stacked for each of the ziff image you want. = Works only with single ziff = """ lazy_array = [dask.delayed(get_ziff_single_background)(f_) for f_ in files] lazy_arrays = [da.from_delayed(x_, shape=shape, dtype=dtype) for x_ in lazy_array] return da.stack(lazy_arrays)
def read_raster(path, band=None, block_size=1): """Read all or some bands from raster Arguments: path {string} -- path to raster file Keyword Arguments: band {int, iterable(int)} -- band number or iterable of bands. When passing None, it reads all bands (default: {None}) block_size {int} -- block size multiplier (default: {1}) Returns: dask.array.Array -- a Dask array """ if isinstance(band, int): return read_raster_band(path, band=band, block_size=block_size) else: if band is None: bands = range(1, get_band_count(path) + 1) else: bands = list(band) return da.stack([ read_raster_band(path, band=band, block_size=block_size) for band in bands ])
def correlations_multiple(data, correlations, periodic_boundary=True, cutoff=None): """Calculate 2-point stats for a multiple auto/cross correlation Args: data: the discretized data (n_samples,n_x,n_y,n_correlation) correlation_pair: the correlation pairs periodic_boundary: whether to assume a periodic boudnary (default is true) cutoff: the subarray of the 2 point stats to keep Returns: the 2-points stats array >>> data = np.arange(18).reshape(1, 3, 3, 2) >>> out = correlations_multiple(data, [[0, 1], [1, 1]]) >>> out dask.array<stack, shape=(1, 3, 3, 2), dtype=float64, chunksize=(1, 3, 3, 1)> >>> answer = np.array([[[58, 62, 58], [94, 98, 94], [58, 62, 58]]]) + 1. / 3. >>> assert(out.compute()[...,0], answer) """ return pipe( range(data.shape[-1]), map_(lambda x: (0, x)), lambda x: correlations if correlations else x, map_( lambda x: two_point_stats( data[..., x[0]], data[..., x[1]], periodic_boundary=periodic_boundary, cutoff=cutoff, ) ), list, lambda x: da.stack(x, axis=-1), )
def compute_adjoint_dask(rays, g, dobs, i0, K_ne, m_tci, m_prior, CdCt, sigma_m, Nkernel, size_cell): L_m = Nkernel * size_cell # #i not eq i0 mask # mask = np.ones(rays.shape[0],dtype=np.bool) # mask[i0] = False # rays = rays[mask,:,:,:,:] # g = g[mask,:,:] # dobs = dobs[mask,:,:] # CdCt = CdCt[mask,:,:] #residuals #g.shape, dobs.shape [Na,Nt,Nd] dd = g - dobs #weighted residuals #Cd.shape [Na,Nt,Nd] i.e. diagonal #CdCt^-1 = 1./CdCt dd /= (CdCt + 1e-15) #get ray info Na, Nt, Nd, _, Ns = rays.shape #parallelize over directions gradient = da.sum(da.stack([ da.from_delayed(delayed(do_adjoint)( rays[:, :, d, :, :], dd[:, :, d], K_ne, m_tci, sigma_m, Nkernel, size_cell, i0), (m_tci.nx, m_tci.ny, m_tci.nz), dtype=np.double) for d in range(Nd) ], axis=-1), axis=-1) gradient = gradient.compute(get=get) gradient += m_tci.M gradient -= m_prior return gradient
def est_sh_part(varr, max_sh, npart, local): if varr.shape[0] <= 1: return varr.squeeze(), np.array([[0, 0]]) idx_spt = np.array_split(np.arange(varr.shape[0]), npart) fm_ls, sh_ls = [], [] for idx in idx_spt: if len(idx) > 0: fm, sh = est_sh_part(varr[idx, :, :], max_sh, npart, local) fm_ls.append(fm) sh_ls.append(sh) mid = int(len(sh_ls) / 2) sh_add_ls = [np.array([0, 0])] * len(sh_ls) for i, fm in enumerate(fm_ls): if i < mid: temp = fm_ls[i + 1] sh_idx = np.arange(i + 1) elif i > mid: temp = fm_ls[i - 1] sh_idx = np.arange(i, len(sh_ls)) else: continue sh_add = darr.from_delayed( delayed(match_temp)(fm, temp, max_sh, local), (2,), float ) for j in sh_idx: sh_ls[j] = sh_ls[j] + sh_add.reshape((1, -1)) sh_add_ls[j] = sh_add_ls[j] + sh_add for i, (fm, sh) in enumerate(zip(fm_ls, sh_add_ls)): fm_ls[i] = darr.nan_to_num( darr.from_delayed(delayed(shift_perframe)(fm, sh), fm.shape, fm.dtype) ) sh_ret = darr.concatenate(sh_ls) fm_ret = darr.stack(fm_ls) return fm_ret.max(axis=0), sh_ret
def wavg_full_t(data, flags, weights, solint, times=None, threshold=0.8): """Perform weighted average of data, flags and weights, over axis 0. This applies flags and uses specified solution interval increments. Parameters ---------- data : array of complex flags : array of boolean weights : array of floats solint : index interval over which to average, integer times : optional array of times to average, array of floats threshold : optional float Returns ------- av_data : weighted average of data av_flags : weighted average of flags av_weights : weighted average of weights av_times : optional average of times """ # ensure solint is an intager solint = np.int(solint) inc_array = range(0, data.shape[0], solint) av_data = [] av_flags = [] av_weights = [] # TODO: might be more efficient to use reduceat? for ti in inc_array: w_out = wavg_full(data[ti:ti + solint], flags[ti:ti + solint], weights[ti:ti + solint], threshold=threshold) av_data.append(w_out[0]) av_flags.append(w_out[1]) av_weights.append(w_out[2]) av_data = da.stack(av_data) av_flags = da.stack(av_flags) av_weights = da.stack(av_weights) if times is not None: av_times = np.array( [np.average(times[ti:ti + solint], axis=0) for ti in inc_array]) return av_data, av_flags, av_weights, av_times else: return av_data, av_flags, av_weights
def lonlat2xyz(lons, lats): R = 6370997.0 x_coords = R * da.cos(da.deg2rad(lats)) * da.cos(da.deg2rad(lons)) y_coords = R * da.cos(da.deg2rad(lats)) * da.sin(da.deg2rad(lons)) z_coords = R * da.sin(da.deg2rad(lats)) return da.stack( (x_coords.ravel(), y_coords.ravel(), z_coords.ravel()), axis=-1)
def _map_iterate(self, function, iterating_kwargs=(), show_progressbar=None, parallel=None, ragged=None, inplace=True, **kwargs): if ragged not in (True, False): raise ValueError('"ragged" kwarg has to be bool for lazy signals') _logger.debug("Entering '_map_iterate'") size = max(1, self.axes_manager.navigation_size) from hyperspy.misc.utils import (create_map_objects, map_result_construction) func, iterators = create_map_objects(function, size, iterating_kwargs, **kwargs) iterators = (self._iterate_signal(), ) + iterators res_shape = self.axes_manager._navigation_shape_in_array # no navigation if not len(res_shape) and ragged: res_shape = (1,) all_delayed = [dd(func)(data) for data in zip(*iterators)] if ragged: sig_shape = () sig_dtype = np.dtype('O') else: one_compute = all_delayed[0].compute() sig_shape = one_compute.shape sig_dtype = one_compute.dtype pixels = [ da.from_delayed( res, shape=sig_shape, dtype=sig_dtype) for res in all_delayed ] for step in reversed(res_shape): _len = len(pixels) starts = range(0, _len, step) ends = range(step, _len + step, step) pixels = [ da.stack( pixels[s:e], axis=0) for s, e in zip(starts, ends) ] result = pixels[0] res = map_result_construction( self, inplace, result, ragged, sig_shape, lazy=True) return res
def stretch_hist_equalize(self, approximate=False): """Stretch the current image's colors through histogram equalization. Args: approximate (bool): Use a faster less-accurate percentile calculation. At the time of writing the dask version of `percentile` is not as accurate as the numpy version. This will likely change in the future. Current dask version 0.17. """ logger.info("Perform a histogram equalized contrast stretch.") nwidth = 2048. logger.debug("Make histogram bins having equal amount of data, " + "using numpy percentile function:") def _band_hist(band_data): cdf = da.arange(0., 1., 1. / nwidth, chunks=nwidth) if approximate: # need a 1D array flat_data = band_data.ravel() # replace with nanpercentile in the future, if available # dask < 0.17 returns all NaNs for this bins = da.percentile(flat_data[da.notnull(flat_data)], cdf * 100.) else: bins = dask.delayed(np.nanpercentile)(band_data, cdf * 100.) bins = da.from_delayed(bins, shape=(nwidth,), dtype=cdf.dtype) res = dask.delayed(np.interp)(band_data, bins, cdf) res = da.from_delayed(res, shape=band_data.shape, dtype=band_data.dtype) return res band_results = [] for band in self.data['bands'].values: if band == 'A': continue band_data = self.data.sel(bands=band) res = _band_hist(band_data.data) band_results.append(res) if 'A' in self.data.coords['bands'].values: band_results.append(self.data.sel(bands='A')) self.data.data = da.stack(band_results, axis=self.data.dims.index('bands'))
def load(s, measure, dset_name, transpose_lst, df_attr='demog_df'): ''' given measure, h5 dataset name, transpose list: load data ''' df = getattr(s, df_attr) if measure in dir(s): print(measure, 'already loaded') if df.shape[0] != getattr(s, measure).shape[0]: print('shape of loaded data does not match demogs, reloading') else: return np.array([]) dsets = [h5py.File(fn, 'r')[dset_name] for fn in df['path'].values] arrays = [da.from_array(dset, chunks=dset.shape) for dset in dsets] stack = da.stack(arrays, axis=-1) # concatenate along last axis stack = stack.transpose(transpose_lst) # do transposition data = np.empty(stack.shape) da.store(stack, data) print(data.shape) return data
def dec10216(inbuf): """Decode 10 bits data into 16 bits words. :: /* * pack 4 10-bit words in 5 bytes into 4 16-bit words * * 0 1 2 3 4 5 * 01234567890123456789012345678901234567890 * 0 1 2 3 4 */ ip = &in_buffer[i]; op = &out_buffer[j]; op[0] = ip[0]*4 + ip[1]/64; op[1] = (ip[1] & 0x3F)*16 + ip[2]/16; op[2] = (ip[2] & 0x0F)*64 + ip[3]/4; op[3] = (ip[3] & 0x03)*256 +ip[4]; """ arr10 = inbuf.astype(np.uint16) arr16_len = int(len(arr10) * 4 / 5) arr10_len = int((arr16_len * 5) / 4) arr10 = arr10[:arr10_len] # adjust size # dask is slow with indexing arr10_0 = arr10[::5] arr10_1 = arr10[1::5] arr10_2 = arr10[2::5] arr10_3 = arr10[3::5] arr10_4 = arr10[4::5] arr16_0 = (arr10_0 << 2) + (arr10_1 >> 6) arr16_1 = ((arr10_1 & 63) << 4) + (arr10_2 >> 4) arr16_2 = ((arr10_2 & 15) << 6) + (arr10_3 >> 2) arr16_3 = ((arr10_3 & 3) << 8) + arr10_4 arr16 = da.stack([arr16_0, arr16_1, arr16_2, arr16_3], axis=-1).ravel() arr16 = da.rechunk(arr16, arr16.shape[0]) return arr16
def test_clock_tec_solve_dask(): np.random.seed(1234) import pylab as plt times = np.arange(2) freqs = np.linspace(110e6,170e6,1000) cs = np.array([1,1]) tec = np.array([0.1,0.2]) delay = np.ones(len(times)) * 2e-9# 10ns phase = np.multiply.outer(np.ones(len(freqs)),cs) + 8.44797256e-7*TECU*np.multiply.outer(1./freqs,tec) + 2.*np.pi*np.multiply.outer(freqs,delay) phase += 15*np.pi/180.*np.random.normal(size=[len(freqs),len(times)]) #plt.imshow(phase,origin='lower',extent=(times[0],times[-1],freqs[0],freqs[-1]),aspect='auto') #plt.colorbar() #plt.xlabel('times (s)') #plt.ylabel('freqs (Hz)') #plt.show() m,cov = least_squares_solve(phase, freqs, times,15,Ct_ratio=0.01) m_exact = np.array([delay,tec,cs]).T import dask.array as da solsMH = [da.from_delayed(clock_tec_solve_dask(phase[:,i],freqs,m[i,:], cov[i,:,:],15,0.01),shape=(3,),dtype=np.double) for i in range(len(times))] sol_stacked = da.stack(solsMH, axis = 0) sol = sol_stacked.compute() print(sol)
def test_stack_scalars(): d = da.arange(4, chunks=2) s = da.stack([d.mean(), d.sum()]) assert s.compute().tolist() == [np.arange(4).mean(), np.arange(4).sum()]
def test_short_stack(): x = np.array([1]) d = da.from_array(x, chunks=(1,)) s = da.stack([d]) assert s.shape == (1, 1) assert Array._get(s.dask, s._keys())[0][0].shape == (1, 1)
def test_gh4043(lock, asarray, fancy): a1 = da.from_array(np.zeros(3,), chunks=1, asarray=asarray, lock=lock, fancy=fancy) a2 = da.from_array(np.ones(3,), chunks=1, asarray=asarray, lock=lock, fancy=fancy) al = da.stack([a1, a2]) assert_eq(al, al)
def __init__( self, dirname, iters=None, deltaT=1, prefix=None, ref_date=None, calendar=None, ignore_pickup=True, geometry="Cartesian", skip_vars=[], ): """iters: list of iteration numbers deltaT: timestep prefix: list of file prefixes (if None use all) """ assert geometry in _valid_geometry self.geometry = geometry # the directory where the files live self.dirname = dirname # storage dicts for variables and attributes self._variables = xray.core.pycompat.OrderedDict() self._attributes = xray.core.pycompat.OrderedDict() self._dimensions = [] ### figure out the mapping between diagnostics names and variable properties ### read grid files for k in _grid_variables: dims, desc, units = _grid_variables[k] data = _read_and_shape_grid_data(k, dirname) if data is not None: self._variables[k] = Variable(dims, MemmapArrayWrapper(data), {"description": desc, "units": units}) self._dimensions.append(k) ## check for layers Nlayers = None for varname, dims, desc, units, data in _get_layers_grid_variables(dirname): self._variables[varname] = Variable(dims, MemmapArrayWrapper(data), {"description": desc, "units": units}) self._dimensions.append(varname) # if there are multiple layers coordinates, they all have the same # size, so this works (although it is sloppy) if varname[-7:] == "_bounds": Nlayers = len(data) ## load metadata for all possible diagnostics diag_meta = _parse_available_diagnostics(os.path.join(dirname, "available_diagnostics.log"), Nlayers=Nlayers) # now get variables from our iters if iters is not None: # create iteration array iterdata = np.asarray(iters) self._variables["iter"] = Variable(("time",), iterdata, {"description": "model timestep number"}) # create time array timedata = np.asarray(iters) * deltaT time_attrs = {"description": "model time"} if ref_date is not None: time_attrs["units"] = "seconds since %s" % ref_date else: time_attrs["units"] = "seconds" if calendar is not None: time_attrs["calendar"] = calendar self._variables["time"] = Variable(("time",), timedata, time_attrs) self._dimensions.append("time") varnames = [] fnames = [] _data_vars = xray.core.pycompat.OrderedDict() # look at first iter to get variable metadata for f in glob(os.path.join(dirname, "*.%010d.meta" % iters[0])): if ignore_pickup and re.search("pickup", f): pass else: go = True if prefix is not None: bname = os.path.basename(f[:-16]) matches = [bname == p for p in prefix] if not any(matches): go = False if go: meta = _parse_meta(f) if "fldList" in meta: # we have multiple variables per file flds = meta["fldList"] [varnames.append(fl) for fl in flds] else: # just use the filename as the variable name varnames.append(meta["basename"]) fnames.append(os.path.join(dirname, meta["basename"])) # strip unwanted variables for v in skip_vars: try: varnames.remove(v) except ValueError: pass # read data as dask arrays (should be an option) vardata = {} for k in varnames: vardata[k] = [] for i in iters: for f in fnames: try: data = _read_mds(f, i, force_dict=True) # this can screw up if the same variable appears in # multiple diagnostic files for k in data: if k in varnames: mwrap = MemmapArrayWrapper(data[k]) # for some reason, da.from_array does not # necessarily give a unique name # need to specify array name myda = da.from_array(mwrap, mwrap.shape, name="%s_%010d" % (k, i)) vardata[k].append(myda) except IOError: # couldn't find the variable, remove it from the list # print 'Removing %s from list (iter %g)' % (k, i) varnames.remove(k) # final loop to create Variable objects for k in varnames: try: dims, desc, units = _state_variables[k] except KeyError: try: dims, desc, units = _ptracers[k] except KeyError: dims, desc, units = diag_meta[k] # check for shape compatability varshape = vardata[k][0].shape varndims = len(varshape) # maybe promote 2d data to 3d if (len(dims) == 3) and (varndims == 2): if len(self._variables[dims[0]]) == 1: vardata[k] = [v.reshape((1,) + varshape) for v in vardata[k]] warnings.warn("Promiting 2D data to 3D data " "for variable %s" % k) varndims += 1 if len(dims) != varndims: warnings.warn( "Shape of variable data is not compatible " "with expected number of dimensions. This " "can arise if the 'levels' option is used " "in data.diagnostics. Right now we have no " "way to infer the level, so the variable is " "skipped: " + k ) else: # add time to dimension dims_time = ("time",) + dims # wrap variable in dask array # -- why? it's already a dask array # vardask = da.stack([da.from_array(d, varshape) for d in vardata[k]]) vardask = da.stack(vardata[k]) # for nkdsk in range(len(vardata[k])): # print 'Key %s, vardata[%g] sum %g, name %s' % (k, nkdsk, # vardata[k][nkdsk].sum(), vardata[k][nkdsk].name) # print 'Key %s, vardask[%g] sum %g' % (k, nkdsk, # vardask[nkdsk].sum()) newvar = Variable(dims_time, vardask, {"description": desc, "units": units}) self._variables[k] = newvar self._attributes = {"history": "Some made up attribute"}
def __init__(self, dirname, iters=None, deltaT=1, prefix=None, ref_date=None, calendar=None, ignore_pickup=True, geometry='Cartesian'): """iters: list of iteration numbers deltaT: timestep prefix: list of file prefixes (if None use all) """ assert geometry in _valid_geometry self.geometry = geometry # the directory where the files live self.dirname = dirname # storage dicts for variables and attributes self._variables = OrderedDict() self._attributes = OrderedDict() self._dimensions = [] ### figure out the mapping between diagnostics names and variable properties # all possible diagnostics diag_meta = _parse_available_diagnostics( os.path.join(dirname, 'available_diagnostics.log')) ### read grid files for k in _grid_variables: if _grid_special_mapping.has_key(k): fname = _grid_special_mapping[k][0] sl = _grid_special_mapping[k][1] else: fname = k sl = None data = None try: data = _read_mds(os.path.join(dirname, fname), force_dict=False) except IOError: try: data = _read_mds(os.path.join(dirname, fname.upper()), force_dict=False) except IOError: warnings.warn("Couldn't load grid variable " + k) if data is not None: data = data[sl] if sl is not None else data.squeeze() dims, desc, units = _grid_variables[k] self._variables[k] = Variable( dims, MemmapArrayWrapper(data), {'description': desc, 'units': units}) self._dimensions.append(k) # now get variables from our iters if iters is not None: # create iteration array iterdata = np.asarray(iters) self._variables['iter'] = Variable(('time',), iterdata, {'description': 'model timestep number'}) # create time array timedata = np.asarray(iters)*deltaT time_attrs = {'description': 'model time'} if ref_date is not None: time_attrs['units'] = 'seconds since %s' % ref_date else: time_attrs['units'] = 'seconds' if calendar is not None: time_attrs['calendar'] = calendar self._variables['time'] = Variable( ('time',), timedata, time_attrs) self._dimensions.append('time') varnames = [] fnames = [] _data_vars = OrderedDict() # look at first iter to get variable metadata for f in glob(os.path.join(dirname, '*.%010d.meta' % iters[0])): if ignore_pickup and re.search('pickup', f): pass else: go = True if prefix is not None: bname = os.path.basename(f[:-16]) matches = [bname==p for p in prefix] if not any(matches): go = False if go: meta = _parse_meta(f) if meta.has_key('fldList'): flds = meta['fldList'] [varnames.append(fl) for fl in flds] else: varnames.append(meta['basename']) fnames.append(os.path.join(dirname,meta['basename'])) # read data as dask arrays (should be an option) vardata = {} for k in varnames: vardata[k] = [] for i in iters: for f in fnames: try: data = _read_mds(f, i, force_dict=True) for k in data.keys(): mwrap = MemmapArrayWrapper(data[k]) vardata[k].append( da.from_array(mwrap, mwrap.shape)) except IOError: # couldn't find the variable, remove it from the list #print 'Removing %s from list (iter %g)' % (k, i) varnames.remove(k) # final loop to create Variable objects for k in varnames: try: dims, desc, units = _state_variables[k] except KeyError: dims, desc, units = diag_meta[k] # check for shape compatability varshape = vardata[k][0].shape varndims = len(varshape) if len(dims) != varndims: warnings.warn("Shape of variable data is not compatible " "with expected number of dimensions. This " "can arise if the 'levels' option is used " "in data.diagnostics. Right now we have no " "way to infer the level, so the variable is " "skipped: " + k) else: # add time to dimension dims_time = ('time',) + dims # wrap variable in dask array vardask = da.stack([da.from_array(d, varshape) for d in vardata[k]]) self._variables[k] = Variable( dims_time, vardask, {'description': desc, 'units': units}) self._attributes = {'history': 'Some made up attribute'}
def CartesianToEquatorial(pos, observer=[0,0,0], frame='icrs'): """ Convert Cartesian position coordinates to equatorial right ascension and declination, using the specified observer location. .. note:: RA and DEC will be returned in degrees, with RA in the range [0,360] and DEC in the range [-90, 90]. Parameters ---------- pos : array_like a N x 3 array holding the Cartesian position coordinates observer : array_like a length 3 array holding the observer location frame : string A string, 'icrs' or 'galactic'. The frame of the input position. Use 'icrs' if the cartesian position is already in Equatorial. Returns ------- ra, dec : array_like the right ascension and declination coordinates, in degrees. RA will be in the range [0,360] and DEC in the range [-90, 90] """ # split x, y, z to signify that we do not need to have pos # as a full chunk in the last dimension. # this is useful when we use apply_gufunc. x, y, z = [pos[..., i] - observer[i] for i in range(3)] if frame == 'icrs': # FIXME: Convert these to a gufunc that uses astropy? # might be a step backward. # from equatorial to equatorial s = da.hypot(x, y) lon = da.arctan2(y, x) lat = da.arctan2(z, s) # convert to degrees lon = da.rad2deg(lon) lat = da.rad2deg(lat) # wrap lon to [0,360] lon = da.mod(lon-360., 360.) ra, dec = lon, lat else: from astropy.coordinates import SkyCoord def cart_to_eq(x, y, z): try: sc = SkyCoord(x, y, z, representation_type='cartesian', frame=frame) scg = sc.transform_to(frame='icrs') scg.representation_type = 'unitspherical' except: sc = SkyCoord(x, y, z, representation='cartesian', frame=frame) scg = sc.transform_to(frame='icrs') scg.representation = 'unitspherical' ra, dec = scg.ra.value, scg.dec.value return ra, dec dtype = pos.dtype ra, dec = da.apply_gufunc(cart_to_eq, '(),(),()->(),()', x, y, z, output_dtypes=[dtype, dtype]) return da.stack((ra, dec), axis=0)
def stack(signal_list, axis=None, new_axis_name='stack_element', lazy=None, **kwargs): """Concatenate the signals in the list over a given axis or a new axis. The title is set to that of the first signal in the list. Parameters ---------- signal_list : list of BaseSignal instances axis : {None, int, str} If None, the signals are stacked over a new axis. The data must have the same dimensions. Otherwise the signals are stacked over the axis given by its integer index or its name. The data must have the same shape, except in the dimension corresponding to `axis`. new_axis_name : string The name of the new axis when `axis` is None. If an axis with this name already exists it automatically append '-i', where `i` are integers, until it finds a name that is not yet in use. lazy: {bool, None} Returns a LazySignal if True. If None, only returns lazy rezult if at least one is lazy. Returns ------- signal : BaseSignal instance (or subclass, determined by the objects in signal list) Examples -------- >>> data = np.arange(20) >>> s = hs.stack([hs.signals.Signal1D(data[:10]), ... hs.signals.Signal1D(data[10:])]) >>> s <Signal1D, title: Stack of , dimensions: (2, 10)> >>> s.data array([[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [10, 11, 12, 13, 14, 15, 16, 17, 18, 19]]) """ from itertools import zip_longest from hyperspy.signals import BaseSignal import dask.array as da from numbers import Number # TODO: remove next time deprecated = ['mmap', 'mmap_dir'] warn_str = "'{}' argument is deprecated, please use 'lazy' instead" for k in deprecated: if k in kwargs: lazy=True warnings.warn(warn_str.format(k), VisibleDeprecationWarning) axis_input = copy.deepcopy(axis) signal_list = list(signal_list) # Get the real signal with the most axes to get metadata/class/etc # first = sorted(filter(lambda _s: isinstance(_s, BaseSignal), signal_list), # key=lambda _s: _s.data.ndim)[-1] first = next(filter(lambda _s: isinstance(_s, BaseSignal), signal_list)) # Cast numbers as signals. Will broadcast later for i, _s in enumerate(signal_list): if isinstance(_s, BaseSignal): pass elif isinstance(_s, Number): sig = BaseSignal(_s) signal_list[i] = sig else: raise ValueError("{} type cannot be stacked.".format(type(_s))) if lazy is None: lazy = any(_s._lazy for _s in signal_list) if not isinstance(lazy, bool): raise ValueError("'lazy' argument has to be None, True or False") # Cast all as lazy if required for i, _s in enumerate(signal_list): if not _s._lazy: signal_list[i] = _s.as_lazy() if len(signal_list) > 1: newlist = broadcast_signals(*signal_list, ignore_axis=axis_input) if axis is not None: step_sizes = [s.axes_manager[axis].size for s in newlist] axis = newlist[0].axes_manager[axis] datalist = [s.data for s in newlist] newdata = da.stack(datalist, axis=0) if axis is None else \ da.concatenate(datalist, axis=axis.index_in_array) if axis_input is None: signal = first.__class__(newdata) signal._lazy = True signal._assign_subclass() signal.axes_manager._axes[1:] = copy.deepcopy(newlist[0].axes_manager._axes) axis_name = new_axis_name axis_names = [axis_.name for axis_ in signal.axes_manager._axes[1:]] j = 1 while axis_name in axis_names: axis_name = new_axis_name + "_%i" % j j += 1 eaxis = signal.axes_manager._axes[0] eaxis.name = axis_name eaxis.navigate = True # This triggers _update_parameters signal.metadata = copy.deepcopy(first.metadata) # Get the title from 1st object signal.metadata.General.title = ( "Stack of " + first.metadata.General.title) signal.original_metadata = DictionaryTreeBrowser({}) else: signal = newlist[0]._deepcopy_with_new_data(newdata) signal._lazy = True signal._assign_subclass() signal.get_dimensions_from_data() signal.original_metadata.add_node('stack_elements') for i, obj in enumerate(signal_list): signal.original_metadata.stack_elements.add_node('element%i' % i) node = signal.original_metadata.stack_elements['element%i' % i] node.original_metadata = \ obj.original_metadata.as_dictionary() node.metadata = \ obj.metadata.as_dictionary() if axis_input is None: axis_input = signal.axes_manager[-1 + 1j].index_in_axes_manager step_sizes = 1 signal.metadata._HyperSpy.set_item('Stacking_history.axis', axis_input) signal.metadata._HyperSpy.set_item('Stacking_history.step_sizes', step_sizes) if np.all([ s.metadata.has_item('Signal.Noise_properties.variance') for s in signal_list ]): variance = stack([ s.metadata.Signal.Noise_properties.variance for s in signal_list ], axis) signal.metadata.set_item('Signal.Noise_properties.variance', variance) else: signal = signal_list[0] # Leave as lazy or compute if lazy: signal = signal.as_lazy() else: signal.compute(False) return signal
def CartesianToSky(pos, cosmo, velocity=None, observer=[0,0,0], zmax=100., frame='icrs'): r""" Convert Cartesian position coordinates to RA/Dec and redshift, using the specified cosmology to convert radial distances from the origin into redshift. If velocity is supplied, the returned redshift accounts for the additional peculiar velocity shift. Users should ensure that ``zmax`` is larger than the largest possible redshift being considered to avoid an interpolation exception. .. note:: Cartesian coordinates should be in units of Mpc/h and velocity should be in units of km/s. Parameters ---------- pos : dask array a N x 3 array holding the Cartesian position coordinates in Mpc/h cosmo : :class:`~nbodykit.cosmology.cosmology.Cosmology` the cosmology used to meausre the comoving distance from ``redshift`` velocity : array_like a N x 3 array holding velocity in km/s observer : array_like, optional a length 3 array holding the observer location zmax : float, optional the maximum possible redshift, should be set to a reasonably large value to avoid interpolation failure going from comoving distance to redshift frame : string ('icrs' or 'galactic') speciefies which frame the Cartesian coordinates is. Useful if you know the simulation (usually cartesian) is in galactic units but you want to convert to the icrs (ra, dec) usually used in surveys. Returns ------- ra, dec, z : dask array the right ascension (in degrees), declination (in degrees), and redshift coordinates. RA will be in the range [0,360] and DEC in the range [-90, 90] Notes ----- If velocity is provided, redshift-space distortions are added to the real-space redshift :math:`z_\mathrm{real}`, via: .. math:: z_\mathrm{redshift} = ( v_\mathrm{pec} / c ) (1 + z_\mathrm{reals}) Raises ------ TypeError If the input columns are not dask arrays """ from astropy.constants import c from scipy.interpolate import interp1d if not isinstance(pos, da.Array): pos = da.from_array(pos, chunks=100000) pos = pos - observer # RA,dec coordinates (in degrees) ra, dec = CartesianToEquatorial(pos, frame=frame) # the distance from the origin r = da.linalg.norm(pos, axis=-1) def z_from_comoving_distance(x): zgrid = numpy.logspace(-8, numpy.log10(zmax), 1024) zgrid = numpy.concatenate([[0.], zgrid]) rgrid = cosmo.comoving_distance(zgrid) return interp1d(rgrid, zgrid)(x) # invert distance - redshift relation z = r.map_blocks(z_from_comoving_distance) # add in velocity offsets? if velocity is not None: vpec = (pos * velocity).sum(axis=-1) / r z += vpec / c.to('km/s').value * (1 + z) return da.stack((ra, dec, z), axis=0)
import h5py from glob import glob import os filenames = sorted(glob(os.path.join('data', 'weather-big', '*.hdf5'))) dsets = [h5py.File(filename, mode='r')['/t2m'] for filename in filenames] import dask.array as da arrays = [da.from_array(dset, chunks=(500, 500)) for dset in dsets] x = da.stack(arrays, axis=0) result = x[:, ::2, ::2] da.to_hdf5(os.path.join('data', 'myfile.hdf5'), '/output', result)