def test_raise_on_bad_kwargs(): x = da.ones(5, chunks=3) try: da.minimum(x, out=None) except TypeError as e: assert 'minimum' in str(e) assert 'out' in str(e)
def __call__(self, projectables, optional_datasets=None, **info): """Get the corrected reflectance when removing Rayleigh scattering. Uses pyspectral. """ from pyspectral.rayleigh import Rayleigh if not optional_datasets or len(optional_datasets) != 4: vis, red = self.match_data_arrays(projectables) sata, satz, suna, sunz = self.get_angles(vis) red.data = da.rechunk(red.data, vis.data.chunks) else: vis, red, sata, satz, suna, sunz = self.match_data_arrays( projectables + optional_datasets) sata, satz, suna, sunz = optional_datasets # get the dask array underneath sata = sata.data satz = satz.data suna = suna.data sunz = sunz.data # First make sure the two azimuth angles are in the range 0-360: sata = sata % 360. suna = suna % 360. ssadiff = da.absolute(suna - sata) ssadiff = da.minimum(ssadiff, 360 - ssadiff) del sata, suna atmosphere = self.attrs.get('atmosphere', 'us-standard') aerosol_type = self.attrs.get('aerosol_type', 'marine_clean_aerosol') rayleigh_key = (vis.attrs['platform_name'], vis.attrs['sensor'], atmosphere, aerosol_type) logger.info( "Removing Rayleigh scattering with atmosphere '%s' and " "aerosol type '%s' for '%s'", atmosphere, aerosol_type, vis.attrs['name']) if rayleigh_key not in self._rayleigh_cache: corrector = Rayleigh(vis.attrs['platform_name'], vis.attrs['sensor'], atmosphere=atmosphere, aerosol_type=aerosol_type) self._rayleigh_cache[rayleigh_key] = corrector else: corrector = self._rayleigh_cache[rayleigh_key] try: refl_cor_band = corrector.get_reflectance(sunz, satz, ssadiff, vis.attrs['name'], red.data) except (KeyError, IOError): logger.warning( "Could not get the reflectance correction using band name: %s", vis.attrs['name']) logger.warning( "Will try use the wavelength, however, this may be ambiguous!") refl_cor_band = corrector.get_reflectance( sunz, satz, ssadiff, vis.attrs['wavelength'][1], red.data) proj = vis - refl_cor_band proj.attrs = vis.attrs self.apply_modifier_info(vis, proj) return proj
def searchdask(a, v, how=None, atol=None): n_a = a.shape[0] searchfunc, args = presearch(a, v) if how == 'nearest': l_index = da.maximum(searchfunc(*args, side='right') - 1, 0) r_index = da.minimum(searchfunc(*args), n_a - 1) cond = 2 * v < (select(a, r_index) + select(a, l_index)) indexer = da.maximum(da.where(cond, l_index, r_index), 0) elif how == 'bfill': indexer = searchfunc(*args) elif how == 'ffill': indexer = searchfunc(*args, side='right') - 1 indexer = da.where(indexer == -1, n_a, indexer) elif how is None: l_index = searchfunc(*args) r_index = searchfunc(*args, side='right') indexer = da.where(l_index == r_index, n_a, l_index) else: return NotImplementedError if atol is not None: a2 = da.concatenate([a, [atol + da.max(v) + 1]]) indexer = da.where( da.absolute(select(a2, indexer) - v) > atol, n_a, indexer) return indexer
def get_value(self, group, corr, extras, flag, flag_row, chanslice): coldata = self.get_column_data(group) # correlation may be pre-set by plot type, or may be passed to us corr = self.corr if self.corr is not None else corr # apply correlation reduction if coldata is not None and coldata.ndim == 3: assert corr is not None # the mapper can't have a specific axis set if self.mapper.axis is not None: raise TypeError(f"{self.name}: unexpected column with ndim=3") coldata = self.ms.corr_data_mappers[corr](coldata) # apply mapping function coldata = self.mapper.mapper( coldata, **{name: extras[name] for name in self.mapper.extras}) # scalar expanded to row vector if numpy.isscalar(coldata): coldata = da.full_like(flag_row, fill_value=coldata, dtype=type(coldata)) flag = flag_row else: # apply channel slicing, if there's a channel axis in the array (and the array is a DataArray) if type(coldata) is xarray.DataArray and 'chan' in coldata.dims: coldata = coldata[dict(chan=chanslice)] # determine flags -- start with original flags if flag is not None: if coldata.ndim == 2: flag = self.ms.corr_flag_mappers[corr](flag) elif coldata.ndim == 1: if not self.mapper.axis: flag = flag_row elif self.mapper.axis == 1: flag = None # shapes must now match if flag is not None and coldata.shape != flag.shape: raise TypeError(f"{self.name}: unexpected column shape") # discretize if self.nlevels: # minmax set? discretize over that if self.discretized_delta is not None: coldata = da.floor( (coldata - self.minmax[0]) / self.discretized_delta) coldata = da.minimum(da.maximum(coldata, 0), self.nlevels - 1).astype(COUNT_DTYPE) else: if coldata.dtype is bool: if not numpy.issubdtype(coldata.dtype, numpy.integer): raise TypeError( f"{self.name}: min/max must be set to colour by non-integer values" ) coldata = da.remainder(coldata, self.nlevels).astype(COUNT_DTYPE) if flag is not None: flag |= ~da.isfinite(coldata) return dama.masked_array(coldata, flag) else: return dama.masked_array(coldata, ~da.isfinite(coldata))
def missing_spectrum( # pylint: disable=too-many-locals df: DataArray, bins: int) -> Dict[str, da.Array]: """Calculate a missing spectrum for each column.""" nrows, ncols = df.shape data = df.nulls if nrows > 1: num_bins = min(bins, nrows - 1) bin_size = nrows // num_bins chunk_size = min(1024 * 1024 * 128, nrows * ncols) # max 1024 x 1024 x 128 Bytes bool values nbins_per_chunk = max(chunk_size // (bin_size * data.shape[1]), 1) chunk_size = nbins_per_chunk * bin_size data = data.rechunk((chunk_size, None)) sep = nrows // chunk_size * chunk_size else: # avoid division or module by zero bin_size = 1 nbins_per_chunk = 1 chunk_size = 1 data = data.rechunk((chunk_size, None)) sep = 1 spectrum_missing_percs = data[:sep].map_blocks( missing_perc_blockwise(bin_size), chunks=(nbins_per_chunk, *data.chunksize[1:]), dtype=float, ) # calculation for the last chunk if sep != nrows: spectrum_missing_percs_remain = data[sep:].map_blocks( missing_perc_blockwise(bin_size), chunks=(int(np.ceil((nrows - sep) / bin_size)), *data.shape[1:]), dtype=float, ) spectrum_missing_percs = da.concatenate( [spectrum_missing_percs, spectrum_missing_percs_remain], axis=0) num_bins = spectrum_missing_percs.shape[0] locs0 = da.arange(num_bins) * bin_size locs1 = da.minimum(locs0 + bin_size, nrows) locs_middle = locs0 + bin_size / 2 return { "column": da.repeat(da.from_array(df.columns.values, (1, )), num_bins), "location": da.tile(locs_middle, ncols), "missing_rate": spectrum_missing_percs.T.ravel().rechunk(locs_middle.shape[0]), "loc_start": da.tile(locs0, ncols), "loc_end": da.tile(locs1, ncols), }
def searchdaskuniform(a0, step, n_a, v, how=None, atol=None): index = (v - a0) / step if how == 'nearest': indexer = da.maximum(da.minimum(da.around(index), n_a - 1), 0) elif how == 'bfill': indexer = da.maximum(da.ceil(index), 0) elif how == 'ffill': indexer = da.minimum(da.floor(index), n_a - 1) elif how is None: indexer = da.ceil(index) indexer = da.where(indexer != index, n_a, indexer) if atol is not None: indexer = da.where((da.absolute(indexer - index) * step > atol) | (indexer < 0) | (indexer >= n_a), n_a, indexer) else: indexer = da.where((indexer < 0) | (indexer >= n_a), n_a, indexer) return indexer.astype(int)
def __call__(self, projectables, optional_datasets=None, **info): """Get the corrected reflectance when removing Rayleigh scattering. Uses pyspectral. """ from pyspectral.rayleigh import Rayleigh if not optional_datasets or len(optional_datasets) != 4: vis, red = self.check_areas(projectables) sata, satz, suna, sunz = self.get_angles(vis) red.data = da.rechunk(red.data, vis.data.chunks) else: vis, red, sata, satz, suna, sunz = self.check_areas( projectables + optional_datasets) sata, satz, suna, sunz = optional_datasets # get the dask array underneath sata = sata.data satz = satz.data suna = suna.data sunz = sunz.data LOG.info('Removing Rayleigh scattering and aerosol absorption') # First make sure the two azimuth angles are in the range 0-360: sata = sata % 360. suna = suna % 360. ssadiff = da.absolute(suna - sata) ssadiff = da.minimum(ssadiff, 360 - ssadiff) del sata, suna atmosphere = self.attrs.get('atmosphere', 'us-standard') aerosol_type = self.attrs.get('aerosol_type', 'marine_clean_aerosol') rayleigh_key = (vis.attrs['platform_name'], vis.attrs['sensor'], atmosphere, aerosol_type) if rayleigh_key not in self._rayleigh_cache: corrector = Rayleigh(vis.attrs['platform_name'], vis.attrs['sensor'], atmosphere=atmosphere, aerosol_type=aerosol_type) self._rayleigh_cache[rayleigh_key] = corrector else: corrector = self._rayleigh_cache[rayleigh_key] try: refl_cor_band = corrector.get_reflectance(sunz, satz, ssadiff, vis.attrs['name'], red.data) except (KeyError, IOError): LOG.warning("Could not get the reflectance correction using band name: %s", vis.attrs['name']) LOG.warning("Will try use the wavelength, however, this may be ambiguous!") refl_cor_band = corrector.get_reflectance(sunz, satz, ssadiff, vis.attrs['wavelength'][1], red.data) proj = vis - refl_cor_band proj.attrs = vis.attrs self.apply_modifier_info(vis, proj) return proj
def missing_spectrum( # pylint: disable=too-many-locals data: da.Array, cols: np.ndarray, bins: int) -> dd.DataFrame: """ Calculate a missing spectrum for each column """ nrows, ncols = data.shape num_bins = min(bins, nrows - 1) bin_size = nrows // num_bins chunk_size = min(1024 * 1024 * 128, nrows * ncols) # max 1024 x 1024 x 128 Bytes bool values nbins_per_chunk = max(chunk_size // (bin_size * data.shape[1]), 1) chunk_size = nbins_per_chunk * bin_size data = data.rechunk((chunk_size, None)) sep = nrows // chunk_size * chunk_size spectrum_missing_percs = data[:sep].map_blocks( missing_perc_blockwise(bin_size), chunks=(nbins_per_chunk, *data.shape[1:]), dtype=float, ) # calculation for the last chunk if sep != nrows: spectrum_missing_percs_remain = data[sep:].map_blocks( missing_perc_blockwise(bin_size), chunks=(int(np.ceil((nrows - sep) / bin_size)), *data.shape[1:]), dtype=float, ) spectrum_missing_percs = da.concatenate( [spectrum_missing_percs, spectrum_missing_percs_remain], axis=0) num_bins = spectrum_missing_percs.shape[0] locs0 = da.arange(num_bins) * bin_size locs1 = da.minimum(locs0 + bin_size, nrows) locs_middle = locs0 + bin_size / 2 df = dd.from_dask_array( da.repeat(da.from_array(cols, (1, )), num_bins), columns=["column"], ) df = df.assign( location=da.tile(locs_middle, ncols), missing_rate=spectrum_missing_percs.T.ravel().rechunk( locs_middle.shape[0]), loc_start=da.tile(locs0, ncols), loc_end=da.tile(locs1, ncols), ) return df
def missing_spectrum(df: dd.DataFrame, bins: int, ncols: int) -> Tuple[dd.DataFrame, dd.DataFrame]: """ Calculate a missing spectrum for each column """ # pylint: disable=too-many-locals num_bins = min(bins, len(df) - 1) df = df.iloc[:, :ncols] cols = df.columns[:ncols] ncols = len(cols) nrows = len(df) chunk_size = len(df) // num_bins data = df.isnull().to_dask_array() data.compute_chunk_sizes() data = data.rechunk((chunk_size, None)) notnull_counts = data.sum(axis=0) / data.shape[0] total_missing_percs = { col: notnull_counts[idx] for idx, col in enumerate(cols) } spectrum_missing_percs = data.map_blocks(missing_perc_blockwise, chunks=(1, data.shape[1]), dtype=float) nsegments = len(spectrum_missing_percs) locs0 = da.arange(nsegments) * chunk_size locs1 = da.minimum(locs0 + chunk_size, nrows) locs_middle = locs0 + chunk_size / 2 df = dd.from_dask_array( da.repeat(da.from_array(cols.values, (1, )), nsegments), columns=["column"], ) df = df.assign( location=da.tile(locs_middle, ncols), missing_rate=spectrum_missing_percs.T.ravel(), loc_start=da.tile(locs0, ncols), loc_end=da.tile(locs1, ncols), ) return df, total_missing_percs
def test_arithmetic(): x = np.arange(5).astype('f4') + 2 y = np.arange(5).astype('i8') + 2 z = np.arange(5).astype('i4') + 2 a = da.from_array(x, chunks=(2,)) b = da.from_array(y, chunks=(2,)) c = da.from_array(z, chunks=(2,)) assert eq(a + b, x + y) assert eq(a * b, x * y) assert eq(a - b, x - y) assert eq(a / b, x / y) assert eq(b & b, y & y) assert eq(b | b, y | y) assert eq(b ^ b, y ^ y) assert eq(a // b, x // y) assert eq(a ** b, x ** y) assert eq(a % b, x % y) assert eq(a > b, x > y) assert eq(a < b, x < y) assert eq(a >= b, x >= y) assert eq(a <= b, x <= y) assert eq(a == b, x == y) assert eq(a != b, x != y) assert eq(a + 2, x + 2) assert eq(a * 2, x * 2) assert eq(a - 2, x - 2) assert eq(a / 2, x / 2) assert eq(b & True, y & True) assert eq(b | True, y | True) assert eq(b ^ True, y ^ True) assert eq(a // 2, x // 2) assert eq(a ** 2, x ** 2) assert eq(a % 2, x % 2) assert eq(a > 2, x > 2) assert eq(a < 2, x < 2) assert eq(a >= 2, x >= 2) assert eq(a <= 2, x <= 2) assert eq(a == 2, x == 2) assert eq(a != 2, x != 2) assert eq(2 + b, 2 + y) assert eq(2 * b, 2 * y) assert eq(2 - b, 2 - y) assert eq(2 / b, 2 / y) assert eq(True & b, True & y) assert eq(True | b, True | y) assert eq(True ^ b, True ^ y) assert eq(2 // b, 2 // y) assert eq(2 ** b, 2 ** y) assert eq(2 % b, 2 % y) assert eq(2 > b, 2 > y) assert eq(2 < b, 2 < y) assert eq(2 >= b, 2 >= y) assert eq(2 <= b, 2 <= y) assert eq(2 == b, 2 == y) assert eq(2 != b, 2 != y) assert eq(-a, -x) assert eq(abs(a), abs(x)) assert eq(~(a == b), ~(x == y)) assert eq(~(a == b), ~(x == y)) assert eq(da.logaddexp(a, b), np.logaddexp(x, y)) assert eq(da.logaddexp2(a, b), np.logaddexp2(x, y)) assert eq(da.exp(b), np.exp(y)) assert eq(da.log(a), np.log(x)) assert eq(da.log10(a), np.log10(x)) assert eq(da.log1p(a), np.log1p(x)) assert eq(da.expm1(b), np.expm1(y)) assert eq(da.sqrt(a), np.sqrt(x)) assert eq(da.square(a), np.square(x)) assert eq(da.sin(a), np.sin(x)) assert eq(da.cos(b), np.cos(y)) assert eq(da.tan(a), np.tan(x)) assert eq(da.arcsin(b/10), np.arcsin(y/10)) assert eq(da.arccos(b/10), np.arccos(y/10)) assert eq(da.arctan(b/10), np.arctan(y/10)) assert eq(da.arctan2(b*10, a), np.arctan2(y*10, x)) assert eq(da.hypot(b, a), np.hypot(y, x)) assert eq(da.sinh(a), np.sinh(x)) assert eq(da.cosh(b), np.cosh(y)) assert eq(da.tanh(a), np.tanh(x)) assert eq(da.arcsinh(b*10), np.arcsinh(y*10)) assert eq(da.arccosh(b*10), np.arccosh(y*10)) assert eq(da.arctanh(b/10), np.arctanh(y/10)) assert eq(da.deg2rad(a), np.deg2rad(x)) assert eq(da.rad2deg(a), np.rad2deg(x)) assert eq(da.logical_and(a < 1, b < 4), np.logical_and(x < 1, y < 4)) assert eq(da.logical_or(a < 1, b < 4), np.logical_or(x < 1, y < 4)) assert eq(da.logical_xor(a < 1, b < 4), np.logical_xor(x < 1, y < 4)) assert eq(da.logical_not(a < 1), np.logical_not(x < 1)) assert eq(da.maximum(a, 5 - a), np.maximum(a, 5 - a)) assert eq(da.minimum(a, 5 - a), np.minimum(a, 5 - a)) assert eq(da.fmax(a, 5 - a), np.fmax(a, 5 - a)) assert eq(da.fmin(a, 5 - a), np.fmin(a, 5 - a)) assert eq(da.isreal(a + 1j * b), np.isreal(x + 1j * y)) assert eq(da.iscomplex(a + 1j * b), np.iscomplex(x + 1j * y)) assert eq(da.isfinite(a), np.isfinite(x)) assert eq(da.isinf(a), np.isinf(x)) assert eq(da.isnan(a), np.isnan(x)) assert eq(da.signbit(a - 3), np.signbit(x - 3)) assert eq(da.copysign(a - 3, b), np.copysign(x - 3, y)) assert eq(da.nextafter(a - 3, b), np.nextafter(x - 3, y)) assert eq(da.ldexp(c, c), np.ldexp(z, z)) assert eq(da.fmod(a * 12, b), np.fmod(x * 12, y)) assert eq(da.floor(a * 0.5), np.floor(x * 0.5)) assert eq(da.ceil(a), np.ceil(x)) assert eq(da.trunc(a / 2), np.trunc(x / 2)) assert eq(da.degrees(b), np.degrees(y)) assert eq(da.radians(a), np.radians(x)) assert eq(da.rint(a + 0.3), np.rint(x + 0.3)) assert eq(da.fix(a - 2.5), np.fix(x - 2.5)) assert eq(da.angle(a + 1j), np.angle(x + 1j)) assert eq(da.real(a + 1j), np.real(x + 1j)) assert eq((a + 1j).real, np.real(x + 1j)) assert eq(da.imag(a + 1j), np.imag(x + 1j)) assert eq((a + 1j).imag, np.imag(x + 1j)) assert eq(da.conj(a + 1j * b), np.conj(x + 1j * y)) assert eq((a + 1j * b).conj(), (x + 1j * y).conj()) assert eq(da.clip(b, 1, 4), np.clip(y, 1, 4)) assert eq(da.fabs(b), np.fabs(y)) assert eq(da.sign(b - 2), np.sign(y - 2)) l1, l2 = da.frexp(a) r1, r2 = np.frexp(x) assert eq(l1, r1) assert eq(l2, r2) l1, l2 = da.modf(a) r1, r2 = np.modf(x) assert eq(l1, r1) assert eq(l2, r2) assert eq(da.around(a, -1), np.around(x, -1))
def test_arithmetic(): x = np.arange(5).astype('f4') + 2 y = np.arange(5).astype('i8') + 2 z = np.arange(5).astype('i4') + 2 a = da.from_array(x, chunks=(2, )) b = da.from_array(y, chunks=(2, )) c = da.from_array(z, chunks=(2, )) assert eq(a + b, x + y) assert eq(a * b, x * y) assert eq(a - b, x - y) assert eq(a / b, x / y) assert eq(b & b, y & y) assert eq(b | b, y | y) assert eq(b ^ b, y ^ y) assert eq(a // b, x // y) assert eq(a**b, x**y) assert eq(a % b, x % y) assert eq(a > b, x > y) assert eq(a < b, x < y) assert eq(a >= b, x >= y) assert eq(a <= b, x <= y) assert eq(a == b, x == y) assert eq(a != b, x != y) assert eq(a + 2, x + 2) assert eq(a * 2, x * 2) assert eq(a - 2, x - 2) assert eq(a / 2, x / 2) assert eq(b & True, y & True) assert eq(b | True, y | True) assert eq(b ^ True, y ^ True) assert eq(a // 2, x // 2) assert eq(a**2, x**2) assert eq(a % 2, x % 2) assert eq(a > 2, x > 2) assert eq(a < 2, x < 2) assert eq(a >= 2, x >= 2) assert eq(a <= 2, x <= 2) assert eq(a == 2, x == 2) assert eq(a != 2, x != 2) assert eq(2 + b, 2 + y) assert eq(2 * b, 2 * y) assert eq(2 - b, 2 - y) assert eq(2 / b, 2 / y) assert eq(True & b, True & y) assert eq(True | b, True | y) assert eq(True ^ b, True ^ y) assert eq(2 // b, 2 // y) assert eq(2**b, 2**y) assert eq(2 % b, 2 % y) assert eq(2 > b, 2 > y) assert eq(2 < b, 2 < y) assert eq(2 >= b, 2 >= y) assert eq(2 <= b, 2 <= y) assert eq(2 == b, 2 == y) assert eq(2 != b, 2 != y) assert eq(-a, -x) assert eq(abs(a), abs(x)) assert eq(~(a == b), ~(x == y)) assert eq(~(a == b), ~(x == y)) assert eq(da.logaddexp(a, b), np.logaddexp(x, y)) assert eq(da.logaddexp2(a, b), np.logaddexp2(x, y)) assert eq(da.exp(b), np.exp(y)) assert eq(da.log(a), np.log(x)) assert eq(da.log10(a), np.log10(x)) assert eq(da.log1p(a), np.log1p(x)) assert eq(da.expm1(b), np.expm1(y)) assert eq(da.sqrt(a), np.sqrt(x)) assert eq(da.square(a), np.square(x)) assert eq(da.sin(a), np.sin(x)) assert eq(da.cos(b), np.cos(y)) assert eq(da.tan(a), np.tan(x)) assert eq(da.arcsin(b / 10), np.arcsin(y / 10)) assert eq(da.arccos(b / 10), np.arccos(y / 10)) assert eq(da.arctan(b / 10), np.arctan(y / 10)) assert eq(da.arctan2(b * 10, a), np.arctan2(y * 10, x)) assert eq(da.hypot(b, a), np.hypot(y, x)) assert eq(da.sinh(a), np.sinh(x)) assert eq(da.cosh(b), np.cosh(y)) assert eq(da.tanh(a), np.tanh(x)) assert eq(da.arcsinh(b * 10), np.arcsinh(y * 10)) assert eq(da.arccosh(b * 10), np.arccosh(y * 10)) assert eq(da.arctanh(b / 10), np.arctanh(y / 10)) assert eq(da.deg2rad(a), np.deg2rad(x)) assert eq(da.rad2deg(a), np.rad2deg(x)) assert eq(da.logical_and(a < 1, b < 4), np.logical_and(x < 1, y < 4)) assert eq(da.logical_or(a < 1, b < 4), np.logical_or(x < 1, y < 4)) assert eq(da.logical_xor(a < 1, b < 4), np.logical_xor(x < 1, y < 4)) assert eq(da.logical_not(a < 1), np.logical_not(x < 1)) assert eq(da.maximum(a, 5 - a), np.maximum(a, 5 - a)) assert eq(da.minimum(a, 5 - a), np.minimum(a, 5 - a)) assert eq(da.fmax(a, 5 - a), np.fmax(a, 5 - a)) assert eq(da.fmin(a, 5 - a), np.fmin(a, 5 - a)) assert eq(da.isreal(a + 1j * b), np.isreal(x + 1j * y)) assert eq(da.iscomplex(a + 1j * b), np.iscomplex(x + 1j * y)) assert eq(da.isfinite(a), np.isfinite(x)) assert eq(da.isinf(a), np.isinf(x)) assert eq(da.isnan(a), np.isnan(x)) assert eq(da.signbit(a - 3), np.signbit(x - 3)) assert eq(da.copysign(a - 3, b), np.copysign(x - 3, y)) assert eq(da.nextafter(a - 3, b), np.nextafter(x - 3, y)) assert eq(da.ldexp(c, c), np.ldexp(z, z)) assert eq(da.fmod(a * 12, b), np.fmod(x * 12, y)) assert eq(da.floor(a * 0.5), np.floor(x * 0.5)) assert eq(da.ceil(a), np.ceil(x)) assert eq(da.trunc(a / 2), np.trunc(x / 2)) assert eq(da.degrees(b), np.degrees(y)) assert eq(da.radians(a), np.radians(x)) assert eq(da.rint(a + 0.3), np.rint(x + 0.3)) assert eq(da.fix(a - 2.5), np.fix(x - 2.5)) assert eq(da.angle(a + 1j), np.angle(x + 1j)) assert eq(da.real(a + 1j), np.real(x + 1j)) assert eq((a + 1j).real, np.real(x + 1j)) assert eq(da.imag(a + 1j), np.imag(x + 1j)) assert eq((a + 1j).imag, np.imag(x + 1j)) assert eq(da.conj(a + 1j * b), np.conj(x + 1j * y)) assert eq((a + 1j * b).conj(), (x + 1j * y).conj()) assert eq(da.clip(b, 1, 4), np.clip(y, 1, 4)) assert eq(da.fabs(b), np.fabs(y)) assert eq(da.sign(b - 2), np.sign(y - 2)) l1, l2 = da.frexp(a) r1, r2 = np.frexp(x) assert eq(l1, r1) assert eq(l2, r2) l1, l2 = da.modf(a) r1, r2 = np.modf(x) assert eq(l1, r1) assert eq(l2, r2) assert eq(da.around(a, -1), np.around(x, -1))
def volume_curvature(self, darray_il, darray_xl, dip_factor=10, kernel=(3, 3, 3), preview=None): """ Description ----------- Compute volume curvature attributes from 3D seismic dips Parameters ---------- darray_il : Array-like, Inline dip - acceptable inputs include Numpy, HDF5, or Dask Arrays darray_xl : Array-like, Crossline dip - acceptable inputs include Numpy, HDF5, or Dask Arrays Keywork Arguments ----------------- dip_factor : Number, scalar for dip values kernel : tuple (len 3), operator size preview : str, enables or disables preview mode and specifies direction Acceptable inputs are (None, 'inline', 'xline', 'z') Optimizes chunk size in different orientations to facilitate rapid screening of algorithm output Returns ------- H, K, Kmax, Kmin, KMPos, KMNeg : Dask Array, {H : 'Mean Curvature', K : 'Gaussian Curvature', Kmax : 'Max Curvature', Kmin : 'Min Curvature', KMPos : Most Positive Curvature, KMNeg : Most Negative Curvature} """ np.seterr(all='ignore') # Generate Dask Array as necessary darray_il, chunks_init = self.create_array(darray_il, kernel, preview=preview) darray_xl, chunks_init = self.create_array(darray_xl, kernel, preview=preview) u = -darray_il / dip_factor v = -darray_xl / dip_factor w = da.ones_like(u, chunks=u.chunks) # Compute Gradients ux = sp().first_derivative(u, axis=0) uy = sp().first_derivative(u, axis=1) uz = sp().first_derivative(u, axis=2) vx = sp().first_derivative(v, axis=0) vy = sp().first_derivative(v, axis=1) vz = sp().first_derivative(v, axis=2) # Smooth Gradients ux = ux.map_blocks(ndi.uniform_filter, size=kernel, dtype=ux.dtype) uy = uy.map_blocks(ndi.uniform_filter, size=kernel, dtype=ux.dtype) uz = uz.map_blocks(ndi.uniform_filter, size=kernel, dtype=ux.dtype) vx = vx.map_blocks(ndi.uniform_filter, size=kernel, dtype=ux.dtype) vy = vy.map_blocks(ndi.uniform_filter, size=kernel, dtype=ux.dtype) vz = vz.map_blocks(ndi.uniform_filter, size=kernel, dtype=ux.dtype) u = util.trim_dask_array(u, kernel) v = util.trim_dask_array(v, kernel) w = util.trim_dask_array(w, kernel) ux = util.trim_dask_array(ux, kernel) uy = util.trim_dask_array(uy, kernel) uz = util.trim_dask_array(uz, kernel) vx = util.trim_dask_array(vx, kernel) vy = util.trim_dask_array(vy, kernel) vz = util.trim_dask_array(vz, kernel) wx = da.zeros_like(ux, chunks=ux.chunks, dtype=ux.dtype) wy = da.zeros_like(ux, chunks=ux.chunks, dtype=ux.dtype) wz = da.zeros_like(ux, chunks=ux.chunks, dtype=ux.dtype) uv = u * v vw = v * w u2 = u * u v2 = v * v w2 = w * w u2pv2 = u2 + v2 v2pw2 = v2 + w2 s = da.sqrt(u2pv2 + w2) # Measures of surfaces E = da.ones_like(u, chunks=u.chunks, dtype=u.dtype) F = -(u * w) / (da.sqrt(u2pv2) * da.sqrt(v2pw2)) G = da.ones_like(u, chunks=u.chunks, dtype=u.dtype) D = -(-uv * vx + u2 * vy + v2 * ux - uv * uy) / (u2pv2 * s) Di = -(vw * (uy + vx) - 2 * u * w * vy - v2 * (uz + wx) + uv * (vz + wy)) / (2 * da.sqrt(u2pv2) * da.sqrt(v2pw2) * s) Dii = -(-vw * wy + v2 * wz + w2 * vy - vw * vz) / (v2pw2 * s) H = (E * Dii - 2 * F * Di + G * D) / (2 * (E * G - F * F)) K = (D * Dii - Di * Di) / (E * G - F * F) Kmin = H - da.sqrt(H * H - K) Kmax = H + da.sqrt(H * H - K) H[da.isnan(H)] = 0 K[da.isnan(K)] = 0 Kmax[da.isnan(Kmax)] = 0 Kmin[da.isnan(Kmin)] = 0 KMPos = da.maximum(Kmax, Kmin) KMNeg = da.minimum(Kmax, Kmin) return (H, K, Kmax, Kmin, KMPos, KMNeg)
def get_plot_data(msinfo, group_cols, mytaql, chan_freqs, chanslice, subset, noflags, noconj, iter_field, iter_spw, iter_scan, iter_ant, iter_baseline, join_corrs=False, row_chunk_size=100000): ms_cols = {'ANTENNA1', 'ANTENNA2'} ms_cols.update(msinfo.indexing_columns.keys()) if not noflags: ms_cols.update({'FLAG', 'FLAG_ROW'}) # get visibility columns for axis in DataAxis.all_axes.values(): ms_cols.update(axis.columns) total_num_points = 0 # total number of points to plot # output dataframes, indexed by (field, spw, scan, antenna_or_baseline) # If any of these axes is not being iterated over, then the index at that position is None output_dataframes = OrderedDict() # number of rows per each dataframe output_rows = OrderedDict() # output subsets of indexing columns, indexed by same tuple output_subsets = OrderedDict() if iter_ant: antenna_subsets = zip(subset.ant.numbers, subset.ant.names) else: antenna_subsets = [(None, None)] taql = mytaql for antenna, antname in antenna_subsets: if antenna is not None: taql = f"({mytaql})&&(ANTENNA1=={antenna} || ANTENNA2=={antenna})" if mytaql else \ f"(ANTENNA1=={antenna} || ANTENNA2=={antenna})" # add baselines to group columns if iter_baseline: group_cols = list(group_cols) + ["ANTENNA1", "ANTENNA2"] # get MS data msdata = daskms.xds_from_ms(msinfo.msname, columns=list(ms_cols), group_cols=group_cols, taql_where=taql, chunks=dict(row=row_chunk_size)) nrow = sum([len(group.row) for group in msdata]) if not nrow: continue if antenna is not None: log.info(f': Indexing sub-MS (antenna {antname}) and building dataframes ({nrow} rows, chunk size is {row_chunk_size})') else: log.info(f': Indexing MS and building dataframes ({nrow} rows, chunk size is {row_chunk_size})') # iterate over groups for group in msdata: if not len(group.row): continue ddid = group.DATA_DESC_ID # always present fld = group.FIELD_ID # always present if fld not in subset.field or ddid not in subset.spw: log.debug(f"field {fld} ddid {ddid} not in selection, skipping") continue scan = getattr(group, 'SCAN_NUMBER', None) # will be present if iterating over scans if iter_baseline: ant1 = getattr(group, 'ANTENNA1', None) # will be present if iterating over baselines ant2 = getattr(group, 'ANTENNA2', None) # will be present if iterating over baselines baseline = msinfo.baseline_number(ant1, ant2) else: baseline = None # Make frame key -- data subset corresponds to this frame dataframe_key = (fld if iter_field else None, ddid if iter_spw else None, scan if iter_scan else None, antenna if antenna is not None else baseline) # update subsets of MS indexing columns that we've seen for this dataframe output_subset1 = output_subsets.setdefault(dataframe_key, {column:set() for column in msinfo.indexing_columns.keys()}) for column, _ in msinfo.indexing_columns.items(): value = getattr(group, column) if np.isscalar(value): output_subset1[column].add(value) else: output_subset1[column].update(value.compute().data) # number of rows in dataframe nrows0 = output_rows.setdefault(dataframe_key, 0) # always read flags -- easier that way flag = group.FLAG if not noflags else None flag_row = group.FLAG_ROW if not noflags else None a1 = da.minimum(group.ANTENNA1.data, group.ANTENNA2.data) a2 = da.maximum(group.ANTENNA1.data, group.ANTENNA2.data) baselines = msinfo.baseline_number(a1, a2) freqs = chan_freqs[ddid] chans = xarray.DataArray(range(len(freqs)), dims=("chan",)) wavel = freq_to_wavel(freqs) extras = dict(chans=chans, freqs=freqs, wavel=wavel, rows=group.row, baselines=baselines) nchan = len(group.chan) if flag is not None: flag = flag[dict(chan=chanslice)] nchan = flag.shape[1] shape = (len(group.row), nchan) arrays = OrderedDict() shapes = OrderedDict() ddf = None num_points = 0 # counts number of new points generated for corr in subset.corr.numbers: # make dictionary of extra values for DataMappers extras['corr'] = corr # loop over datums to be computed for axis in DataAxis.all_axes.values(): value = arrays.get(axis.label) # a datum was already computed? if value is not None: # if not joining correlations, then that's the only one we'll need, so continue if not join_corrs: continue # joining correlations, and datum has a correlation dependence: compute another one if axis.corr is None: value = None if value is None: value = axis.get_value(group, corr, extras, flag=flag, flag_row=flag_row, chanslice=chanslice) # print(axis.label, value.compute().min(), value.compute().max()) num_points = max(num_points, value.size) if value.ndim == 0: shapes[axis.label] = () elif value.ndim == 1: timefreq_axis = axis.mapper.axis or 0 assert value.shape[0] == shape[timefreq_axis], \ f"{axis.mapper.fullname}: size {value.shape[0]}, expected {shape[timefreq_axis]}" shapes[axis.label] = ("row",) if timefreq_axis == 0 else ("chan",) # else 2D value better match expected shape else: assert value.shape == shape, f"{axis.mapper.fullname}: shape {value.shape}, expected {shape}" shapes[axis.label] = ("row", "chan") arrays[axis.label] = value # any new data generated for this correlation? Make dataframe if num_points: total_num_points += num_points args = (v for pair in ((array, shapes[key]) for key, array in arrays.items()) for v in pair) df1 = dataframe_factory(("row", "chan"), *args, columns=arrays.keys()) # if any axis needs to be conjugated, double up all of them if not noconj and any([axis.conjugate for axis in DataAxis.all_axes.values()]): arr_shape = [(-arrays[axis.label] if axis.conjugate else arrays[axis.label], shapes[axis.label]) for axis in DataAxis.all_axes.values()] args = (v for pair in arr_shape for v in pair) df2 = dataframe_factory(("row", "chan"), *args, columns=arrays.keys()) df1 = dask_df.concat([df1, df2], axis=0) ddf = dask_df.concat([ddf, df1], axis=0) if ddf is not None else df1 # do we already have a frame for this key ddf0 = output_dataframes.get(dataframe_key) if ddf0 is None: log.debug(f"first frame for {dataframe_key}") output_dataframes[dataframe_key] = ddf else: log.debug(f"appending to frame for {dataframe_key}") output_dataframes[dataframe_key] = dask_df.concat([ddf0, ddf], axis=0) # convert discrete axes into categoricals if data_mappers.USE_COUNT_CAT: categorical_axes = [axis.label for axis in DataAxis.all_axes.values() if axis.nlevels] if categorical_axes: log.info(": counting colours") for key, ddf in list(output_dataframes.items()): output_dataframes[key] = ddf.categorize(categorical_axes) # print("===") # for ddf in output_dataframes.values(): # for axis in DataAxis.all_axes.values(): # value = ddf[axis.label].values.compute() # print(axis.label, np.nanmin(value), np.nanmax(value)) log.info(": complete") return output_dataframes, output_subsets, total_num_points