def test_diff_append(n): x = cupy.arange(5) + 1 a = da.from_array(x, chunks=2) assert_eq(da.diff(a, n, append=0), cupy.diff(x, n, append=0)) assert_eq(da.diff(a, n, append=[0]), cupy.diff(x, n, append=[0])) assert_eq(da.diff(a, n, append=[-1, 0]), cupy.diff(x, n, append=[-1, 0])) x = cupy.arange(16).reshape(4, 4) a = da.from_array(x, chunks=2) assert_eq(da.diff(a, n, axis=1, append=0), cupy.diff(x, n, axis=1, append=0)) assert_eq( da.diff(a, n, axis=1, append=[[0], [0], [0], [0]]), cupy.diff(x, n, axis=1, append=[[0], [0], [0], [0]]), ) assert_eq(da.diff(a, n, axis=0, append=0), cupy.diff(x, n, axis=0, append=0)) assert_eq( da.diff(a, n, axis=0, append=[[0, 0, 0, 0]]), cupy.diff(x, n, axis=0, append=[[0, 0, 0, 0]]), ) if n > 0: with pytest.raises(ValueError): # When order is 0 the result is the icupyut array, it doesn't raise # an error da.diff(a, n, append=cupy.zeros((3, 3)))
def test_diff(shape, n, axis): x = np.random.randint(0, 10, shape) a = da.from_array(x, chunks=(len(shape) * (5, ))) assert_eq(da.diff(a, n, axis), np.diff(x, n, axis))
def test_diff(shape, n, axis): x = np.random.randint(0, 10, shape) a = da.from_array(x, chunks=(len(shape) * (5,))) assert_eq(da.diff(a, n, axis), np.diff(x, n, axis))
def new_grid_mapping_from_coords( x_coords: xr.DataArray, y_coords: xr.DataArray, crs: Union[str, pyproj.crs.CRS], *, tile_size: Union[int, Tuple[int, int]] = None, tolerance: float = DEFAULT_TOLERANCE, ) -> GridMapping: crs = _normalize_crs(crs) assert_instance(x_coords, xr.DataArray, name='x_coords') assert_instance(y_coords, xr.DataArray, name='y_coords') assert_true(x_coords.ndim in (1, 2), 'x_coords and y_coords must be either 1D or 2D arrays') assert_instance(tolerance, float, name='tolerance') assert_true(tolerance > 0.0, 'tolerance must be greater zero') if x_coords.name and y_coords.name: xy_var_names = str(x_coords.name), str(y_coords.name) else: xy_var_names = _default_xy_var_names(crs) tile_size = _normalize_int_pair(tile_size, default=None) is_lon_360 = None # None means "not yet known" if crs.is_geographic: is_lon_360 = bool(np.any(x_coords > 180)) x_res = 0 y_res = 0 if x_coords.ndim == 1: # We have 1D x,y coordinates cls = Coords1DGridMapping assert_true(x_coords.size >= 2 and y_coords.size >= 2, 'sizes of x_coords and y_coords 1D arrays must be >= 2') size = x_coords.size, y_coords.size x_dim, y_dim = x_coords.dims[0], y_coords.dims[0] x_diff = _abs_no_zero(x_coords.diff(dim=x_dim).values) y_diff = _abs_no_zero(y_coords.diff(dim=y_dim).values) if not is_lon_360 and crs.is_geographic: is_anti_meridian_crossed = np.any(np.nanmax(x_diff) > 180) if is_anti_meridian_crossed: x_coords = to_lon_360(x_coords) x_diff = _abs_no_zero(x_coords.diff(dim=x_dim)) is_lon_360 = True x_res, y_res = x_diff[0], y_diff[0] x_diff_equal = np.allclose(x_diff, x_res, atol=tolerance) y_diff_equal = np.allclose(y_diff, y_res, atol=tolerance) is_regular = x_diff_equal and y_diff_equal if is_regular: x_res = round_to_fraction(x_res, 5, 0.25) y_res = round_to_fraction(y_res, 5, 0.25) else: x_res = round_to_fraction(float(np.nanmedian(x_diff)), 2, 0.5) y_res = round_to_fraction(float(np.nanmedian(y_diff)), 2, 0.5) if tile_size is None \ and x_coords.chunks is not None \ and y_coords.chunks is not None: tile_size = (max(0, *x_coords.chunks[0]), max(0, *y_coords.chunks[0])) # Guess j axis direction is_j_axis_up = bool(y_coords[0] < y_coords[-1]) else: # We have 2D x,y coordinates cls = Coords2DGridMapping assert_true( x_coords.shape == y_coords.shape, 'shapes of x_coords and y_coords' ' 2D arrays must be equal') assert_true( x_coords.dims == y_coords.dims, 'dimensions of x_coords and y_coords' ' 2D arrays must be equal') y_dim, x_dim = x_coords.dims height, width = x_coords.shape size = width, height x = da.asarray(x_coords) y = da.asarray(y_coords) x_x_diff = _abs_no_nan(da.diff(x, axis=1)) x_y_diff = _abs_no_nan(da.diff(x, axis=0)) y_x_diff = _abs_no_nan(da.diff(y, axis=1)) y_y_diff = _abs_no_nan(da.diff(y, axis=0)) if not is_lon_360 and crs.is_geographic: is_anti_meridian_crossed = da.any(da.max(x_x_diff) > 180) \ or da.any(da.max(x_y_diff) > 180) if is_anti_meridian_crossed: x_coords = to_lon_360(x_coords) x = da.asarray(x_coords) x_x_diff = _abs_no_nan(da.diff(x, axis=1)) x_y_diff = _abs_no_nan(da.diff(x, axis=0)) is_lon_360 = True is_regular = False if da.all(x_y_diff == 0) and da.all(y_x_diff == 0): x_res = x_x_diff[0, 0] y_res = y_y_diff[0, 0] is_regular = \ da.allclose(x_x_diff[0, :], x_res, atol=tolerance) \ and da.allclose(x_x_diff[-1, :], x_res, atol=tolerance) \ and da.allclose(y_y_diff[:, 0], y_res, atol=tolerance) \ and da.allclose(y_y_diff[:, -1], y_res, atol=tolerance) if not is_regular: # Let diff arrays have same shape as original by # doubling last rows and columns. x_x_diff_c = da.concatenate([x_x_diff, x_x_diff[:, -1:]], axis=1) y_x_diff_c = da.concatenate([y_x_diff, y_x_diff[:, -1:]], axis=1) x_y_diff_c = da.concatenate([x_y_diff, x_y_diff[-1:, :]], axis=0) y_y_diff_c = da.concatenate([y_y_diff, y_y_diff[-1:, :]], axis=0) # Find resolution via area x_abs_diff = da.sqrt(da.square(x_x_diff_c) + da.square(x_y_diff_c)) y_abs_diff = da.sqrt(da.square(y_x_diff_c) + da.square(y_y_diff_c)) if crs.is_geographic: # Convert degrees into meters x_abs_diff_r = da.radians(x_abs_diff) y_abs_diff_r = da.radians(y_abs_diff) x_abs_diff = _ER * da.cos(x_abs_diff_r) * y_abs_diff_r y_abs_diff = _ER * y_abs_diff_r xy_areas = (x_abs_diff * y_abs_diff).flatten() xy_areas = da.where(xy_areas > 0, xy_areas, np.nan) # Get indices of min and max area xy_area_index_min = da.nanargmin(xy_areas) xy_area_index_max = da.nanargmax(xy_areas) # Convert area to edge length xy_res_min = math.sqrt(xy_areas[xy_area_index_min]) xy_res_max = math.sqrt(xy_areas[xy_area_index_max]) # Empirically weight min more than max xy_res = 0.7 * xy_res_min + 0.3 * xy_res_max if crs.is_geographic: # Convert meters back into degrees # print(f'xy_res in meters: {xy_res}') xy_res = math.degrees(xy_res / _ER) # print(f'xy_res in degrees: {xy_res}') # Because this is an estimation, we can round to a nice number xy_res = round_to_fraction(xy_res, digits=1, resolution=0.5) x_res, y_res = float(xy_res), float(xy_res) if tile_size is None and x_coords.chunks is not None: j_chunks, i_chunks = x_coords.chunks tile_size = max(0, *i_chunks), max(0, *j_chunks) if tile_size is not None: tile_width, tile_height = tile_size x_coords = x_coords.chunk((tile_height, tile_width)) y_coords = y_coords.chunk((tile_height, tile_width)) # Guess j axis direction is_j_axis_up = np.all(y_coords[0, :] < y_coords[-1, :]) or None assert_true(x_res > 0 and y_res > 0, 'internal error: x_res and y_res could not be determined', exception_type=RuntimeError) x_res, y_res = _to_int_or_float(x_res), _to_int_or_float(y_res) x_res_05, y_res_05 = x_res / 2, y_res / 2 x_min = _to_int_or_float(x_coords.min() - x_res_05) y_min = _to_int_or_float(y_coords.min() - y_res_05) x_max = _to_int_or_float(x_coords.max() + x_res_05) y_max = _to_int_or_float(y_coords.max() + y_res_05) return cls(x_coords=x_coords, y_coords=y_coords, crs=crs, size=size, tile_size=tile_size, xy_bbox=(x_min, y_min, x_max, y_max), xy_res=(x_res, y_res), xy_var_names=xy_var_names, xy_dim_names=(str(x_dim), str(y_dim)), is_regular=is_regular, is_lon_360=is_lon_360, is_j_axis_up=is_j_axis_up)
# not convertable to dask easily: fVabs_old = lambda Gxyz, kVabs: np.polyval(kVabs.flat, np.sqrt(np.tan(fInclination(Gxyz)))) rep2mean = lambda x, bOk: np.interp(np.arange(len(x)), np.flatnonzero(bOk), x[bOk], np.NaN, np.NaN) fForce2Vabs_fitted = lambda x: da.where(x > 2, 2, da.where(x < 1, 0.25 * x, 0.25 * x + 0.3 * (x - 1) ** 4)) fIncl2Force = lambda incl: da.sqrt(da.tan(incl)) fVabs = lambda Gxyz, kVabs: fForce2Vabs_fitted(fIncl2Force(fInclination(Gxyz))) f = lambda fun, *args: fun(*args) positiveInd = lambda i, L: np.int32(da.where(i < 0, L - i, i)) minInterval = lambda iLims1, iLims2, L: f( lambda iL1, iL2: da.transpose([max(iL1[:, 0], iL2[:, 0]), min(iL1[:, -1], iL2[:, -1])]), positiveInd(iLims1, L), positiveInd(iLims2, L)) fStEn2bool = lambda iStEn, length: da.hstack( [(da.ones(iEn2iSt, dtype=np.bool8) if b else da.zeros(iEn2iSt, dtype=np.bool8)) for iEn2iSt, b in da.vstack(( da.diff( da.hstack( ( 0, iStEn.flat, length))), da.hstack( ( da.repeat( [ ( False, True)], np.size( iStEn, 0), 0).flat, False)))).T]) TimeShift_Log_sec = 60
def dataset_chunks(datasets, time_bin_secs, max_row_chunks): """ Given ``max_row_chunks`` determine a chunking strategy for each dataset that prevents binning unique times in separate chunks. """ # Calculate (utime, idx, counts) tuple for each dataset # then tranpose to get lists for each tuple entry if len(datasets) == 0: return (), () utimes = [] interval_avg = [] counts = [] monotonicity_checks = [] for ds in datasets: # Compute unique times, their counts and interval sum # for each row chunk block_values = da.blockwise(_time_interval_sum, "r", ds.TIME.data, "r", ds.INTERVAL.data, "r", meta=np.empty((0, ), dtype=np.object), dtype=np.object) # Reduce each row chunk's values reduction = da.reduction(block_values, chunk=_chunk, combine=_time_int_combine, aggregate=_time_int_agg, concatenate=False, split_every=16, meta=np.empty((0, ), dtype=np.object), dtype=np.object) # Pull out the final unique times, counts and interval average utime = reduction.map_blocks(getitem, 0, dtype=ds.TIME.dtype) count = reduction.map_blocks(getitem, 1, dtype=np.int32) int_avg = reduction.map_blocks(getitem, 2, dtype=ds.INTERVAL.dtype) # Check monotonicity of TIME while we're at it is_monotonic = da.all(da.diff(ds.TIME.data) >= 0.0) utimes.append(utime) counts.append(count) interval_avg.append(int_avg) monotonicity_checks.append(is_monotonic) # Work out the unique times, average intervals for those times # and the frequency of those times (ds_utime, ds_avg_intervals, ds_counts, ds_monotonicity_checks) = dask.compute(utimes, interval_avg, counts, monotonicity_checks) if not all(ds_monotonicity_checks): raise ValueError("TIME is not monotonically increasing. " "This is required.") # Produce row and time chunking strategies for each dataset ds_row_chunks = [] ds_time_chunks = [] ds_interval_secs = [] it = zip(ds_utime, ds_avg_intervals, ds_counts) for di, (utime, avg_interval, counts) in enumerate(it): # Maintain row and time chunks for this dataset row_chunks = [] time_chunks = [] interval_secs = [] # Start out with first entries bin_rows = counts[0] bin_times = 1 bin_secs = avg_interval[0] dsit = enumerate(zip(utime[1:], avg_interval[1:], counts[1:])) for ti, (ut, avg_int, count) in dsit: if count > max_row_chunks: logger.warning( "Unique time {:3f} occurred {:d} times " "in dataset {:d} but this exceeds the " "requested row chunks {:d}. " "Consider increasing --row-chunks", ut, count, di, max_row_chunks) if avg_int > time_bin_secs: logger.warning( "The average INTERVAL associated with " "unique time {:3f} in dataset {:d} " "is {:3f} but this exceeds the requested " "number of seconds in a time bin {:3f}s. " "Consider increasing --time-bin-secs", ut, di, avg_int, time_bin_secs) next_rows = bin_rows + count # If we're still within the number of rows for this bin # keep going if next_rows < max_row_chunks: bin_rows = next_rows bin_times += 1 bin_secs += avg_int # Otherwise finalize this bin and # start a new one with the counts # we were trying to add else: row_chunks.append(bin_rows) time_chunks.append(bin_times) interval_secs.append(bin_secs) bin_rows = count bin_times = 1 bin_secs = avg_int # Finish any remaining bins if bin_rows > 0: assert bin_times > 0 row_chunks.append(bin_rows) time_chunks.append(bin_times) interval_secs.append(bin_secs) row_chunks = tuple(row_chunks) time_chunks = tuple(time_chunks) interval_secs = tuple(interval_secs) ds_row_chunks.append(row_chunks) ds_time_chunks.append(time_chunks) ds_interval_secs.append(interval_secs) logger.info("Dataset Chunking: (r)ow - (t)imes - (s)econds") it = zip(datasets, ds_row_chunks, ds_time_chunks, ds_interval_secs) for di, (ds, ds_rcs, ds_tcs, ds_int_secs) in enumerate(it): ds_rows = ds.dims['row'] ds_crows = sum(ds_rcs) if not ds_rows == ds_crows: raise ValueError("Number of dataset rows %d " "does not match the sum %d " "of the row chunks %s" % (ds_rows, ds_crows, ds_rcs)) log_str = ", ".join("(%dr,%dt,%.1fs)" % (rc, tc, its) for rc, tc, its in zip(*(ds_rcs, ds_tcs, ds_int_secs))) logger.info("Dataset {d}: {s}", d=di, s=log_str) return ds_row_chunks, ds_time_chunks
def dataset_chunks(datasets, time_bin_secs, max_row_chunks): """ Given ``max_row_chunks`` determine a chunking strategy for each dataset that prevents binning unique times in separate chunks. """ # Calculate (utime, idx, counts) tuple for each dataset # then tranpose to get lists for each tuple entry if len(datasets) == 0: return (), () utimes = [] interval_avg = [] counts = [] monotonicity_checks = [] for ds in datasets: # Compute unique times, their counts and interval sum # for each row chunk block_values = da.blockwise(_time_interval_sum, "r", ds.TIME.data, "r", ds.INTERVAL.data, "r", meta=np.empty((0, ), dtype=np.object), dtype=np.object) # Reduce each row chunk's values reduction = da.reduction(block_values, chunk=_chunk, combine=_time_int_combine, aggregate=_time_int_agg, concatenate=False, split_every=16, meta=np.empty((0, ), dtype=np.object), dtype=np.object) # Pull out the final unique times, counts and interval average utime = reduction.map_blocks(getitem, 0, dtype=ds.TIME.dtype) count = reduction.map_blocks(getitem, 1, dtype=np.int32) int_avg = reduction.map_blocks(getitem, 2, dtype=ds.INTERVAL.dtype) # Check monotonicity of TIME while we're at it is_monotonic = da.all(da.diff(ds.TIME.data) >= 0.0) utimes.append(utime) counts.append(count) interval_avg.append(int_avg) monotonicity_checks.append(is_monotonic) # Work out the unique times, average intervals for those times # and the frequency of those times (ds_utime, ds_avg_intervals, ds_counts, ds_monotonicity_checks) = dask.compute(utimes, interval_avg, counts, monotonicity_checks) if not all(ds_monotonicity_checks): raise ValueError("TIME is not monotonically increasing. " "This is required.") grouper = DatasetGrouper(time_bin_secs, max_row_chunks) res = grouper.group(ds_utime, ds_avg_intervals, ds_counts) ds_row_chunks, ds_time_chunks, ds_interval_secs = res logger.info("Dataset Chunking: (r)ow - (t)imes - (s)econds") it = zip(datasets, ds_row_chunks, ds_time_chunks, ds_interval_secs) for di, (ds, ds_rcs, ds_tcs, ds_int_secs) in enumerate(it): ds_rows = ds.dims['row'] ds_crows = sum(ds_rcs) if not ds_rows == ds_crows: raise ValueError("Number of dataset rows %d " "does not match the sum %d " "of the row chunks %s" % (ds_rows, ds_crows, ds_rcs)) log_str = ", ".join("(%dr,%dt,%.1fs)" % (rc, tc, its) for rc, tc, its in zip(*(ds_rcs, ds_tcs, ds_int_secs))) logger.info("Dataset {d}: {s}", d=di, s=log_str) return ds_row_chunks, ds_time_chunks