def test_nangeomedian_fixed(self): data = self.data fixeddata = (data * 10000).astype(np.int16) # fixeddata[1,1,0,:] = -999 fgm = hdstats.nangeomedian_pcm(fixeddata) gm = (hdstats.nangeomedian_pcm(data)*10000).astype(np.int16) npt.assert_approx_equal(np.nanmean(fgm), np.nanmean(gm), significant=4)
def gm_tmad(arr, **kw): """ arr: a high dimensional numpy array where the last dimension will be reduced. returns: a numpy array with one less dimension than input. """ gm = hdstats.nangeomedian_pcm(arr, **kw) nt = kw.pop('num_threads', None) emad = hdstats.emad_pcm(arr, gm, num_threads=nt)[:,:, np.newaxis] smad = hdstats.smad_pcm(arr, gm, num_threads=nt)[:,:, np.newaxis] bcmad = hdstats.bcmad_pcm(arr, gm, num_threads=nt)[:,:, np.newaxis] return np.concatenate([gm, emad, smad, bcmad], axis=-1)
def _gm_mads_compute_f32(yxbt, compute_mads=True, compute_count=True, nodata=None, scale=1, offset=0, **kw): """ output axis order is: y, x, band When extra stats are compute they ar returned in the following order: [*bands, smad, emad, bcmad, count] note that when supplying non-float input, it is scaled according to scale/offset/nodata parameters, output is however returned in that scaled range. """ import hdstats if yxbt.dtype.kind != "f": yxbt = to_float_np(yxbt, scale=scale, offset=offset, nodata=nodata) gm = hdstats.nangeomedian_pcm(yxbt, nocheck=True, **kw) stats_bands = [] if compute_mads: mads = [hdstats.smad_pcm, hdstats.emad_pcm, hdstats.bcmad_pcm] for i, op in enumerate(mads): stats_bands.append( op(yxbt, gm, num_threads=kw.get("num_threads", 1))) if compute_count: nbads = np.isnan(yxbt).sum(axis=2, dtype="bool").sum(axis=2, dtype="uint16") count = yxbt.dtype.type(yxbt.shape[-1]) - nbads stats_bands.append(count) if len(stats_bands) == 0: return gm stats_bands = [a[..., np.newaxis] for a in stats_bands] return np.concatenate([gm, *stats_bands], axis=2)
def int_geomedian_np(*bands, nodata=None, scale=1, offset=0, wk_rows=-1, **kw): """ On input each band is expected to be same shape and dtype with 3 dimensions: time, y, x On output: band, y, x """ from hdstats import nangeomedian_pcm nt, ny, nx = bands[0].shape dtype = bands[0].dtype nb = len(bands) gm_int = np.empty((nb, ny, nx), dtype=dtype) if wk_rows > ny or wk_rows <= 0: wk_rows = ny _wk_f32 = np.empty((wk_rows, nx, nb, nt), dtype='float32') for _y in _slices(wk_rows, ny): _ny = _y.stop - _y.start bb_f32 = _wk_f32[:_ny, ...] # extract part of the image with scaling for b_idx, b in enumerate(bands): for t_idx in range(nt): bb_f32[:, :, b_idx, t_idx] = to_float_np(b[t_idx, _y, :], nodata=nodata, scale=scale, offset=offset, dtype='float32') # run partial computation gm_f32 = nangeomedian_pcm(bb_f32, **kw) # extract results with scaling back for b_idx in range(nb): gm_int[b_idx, _y, :] = from_float_np(gm_f32[:, :, b_idx], dtype, nodata=nodata, scale=1/scale, offset=-offset/scale) return gm_int
def int_geomedian_np(*bands, nodata=None, scale=1, offset=0, **kw): """ On input each band is expected to be same shape and dtype with 3 dimensions: time, y, x On output: band, y, x """ from hdstats import nangeomedian_pcm nt, ny, nx = bands[0].shape dtype = bands[0].dtype nb = len(bands) bb_f32 = np.empty((ny, nx, nb, nt), dtype='float32') for b_idx, b in enumerate(bands): for t_idx in range(nt): bb_f32[:, :, b_idx, t_idx] = to_float_np(b[t_idx, :, :], nodata=nodata, scale=scale, offset=offset, dtype='float32') kw.setdefault('nocheck', True) kw.setdefault('num_threads', 1) kw.setdefault('eps', 0.5 * scale) gm_f32 = nangeomedian_pcm(bb_f32, **kw) del bb_f32 # free temp memory early gm_int = np.empty((nb, ny, nx), dtype=dtype) for b_idx in range(nb): gm_int[b_idx, :, :] = from_float_np(gm_f32[:, :, b_idx], dtype, nodata=nodata, scale=1 / scale, offset=-offset / scale) return gm_int
def xr_geomedian(ds, axis="time", where=None, **kw): """ :param ds: xr.Dataset|xr.DataArray|numpy array Other parameters: **kwargs -- passed on to pcm.gnmpcm maxiters : int 1000 eps : float 0.0001 num_threads: int| None None """ from hdstats import nangeomedian_pcm def norm_input(ds, axis): if isinstance(ds, xr.DataArray): xx = ds if len(xx.dims) != 4: raise ValueError("Expect 4 dimensions on input: y,x,band,time") if axis is not None and xx.dims[3] != axis: raise ValueError( f"Can only reduce last dimension, expect: y,x,band,{axis}") return None, xx, xx.data elif isinstance(ds, xr.Dataset): xx = reshape_for_geomedian(ds, axis) return ds, xx, xx.data else: # assume numpy or similar xx_data = ds if xx_data.ndim != 4: raise ValueError("Expect 4 dimensions on input: y,x,band,time") return None, None, xx_data kw.setdefault("nocheck", True) kw.setdefault("num_threads", 1) kw.setdefault("eps", 1e-6) ds, xx, xx_data = norm_input(ds, axis) is_dask = dask.is_dask_collection(xx_data) if where is not None: if is_dask: raise NotImplementedError( "Dask version doesn't support output masking currently") if where.shape != xx_data.shape[:2]: raise ValueError("Shape for `where` parameter doesn't match") set_nan = ~where else: set_nan = None if is_dask: if xx_data.shape[-2:] != xx_data.chunksize[-2:]: xx_data = xx_data.rechunk(xx_data.chunksize[:2] + (-1, -1)) data = da.map_blocks( lambda x: nangeomedian_pcm(x, **kw), xx_data, name=randomize("geomedian"), dtype=xx_data.dtype, drop_axis=3, ) else: data = nangeomedian_pcm(xx_data, **kw) if set_nan is not None: data[set_nan, :] = np.nan if xx is None: return data dims = xx.dims[:-1] cc = {k: xx.coords[k] for k in dims} xx_out = xr.DataArray(data, dims=dims, coords=cc) if ds is None: xx_out.attrs.update(xx.attrs) return xx_out ds_out = xx_out.to_dataset(dim="band") for b in ds.data_vars.keys(): src, dst = ds[b], ds_out[b] dst.attrs.update(src.attrs) return ds_out
class TestMedianAbsoluteDeviation: data = joblib.load('data/landchar-small.pkl') gm = hdstats.nangeomedian_pcm(data) def test_emad(self): emad = hdstats.emad_pcm(self.data, self.gm) assert emad.shape == (200, 200) def test_emad_uint16(self): stat = hdstats.emad_pcm(self.data, self.gm) intdata = (self.data * 10000).astype(np.uint16) intdata[1,1,0,:] = 0 intstat = hdstats.emad_pcm(intdata, self.gm, nodata=0) npt.assert_approx_equal(np.nanmean(stat), np.nanmean(intstat), significant=4) def test_emad_baddata(self): baddata = self.data[:3,:3,:,:].copy() baddata[1,1,0,:] = np.nan emad = hdstats.emad_pcm(baddata, self.gm) print(emad.shape) assert np.isnan(emad[1,1]) def test_smad(self): smad = hdstats.smad_pcm(self.data, self.gm) assert smad.shape == (200, 200) def test_smad_uint16(self): stat = hdstats.smad_pcm(self.data, self.gm) intdata = (self.data * 10000).astype(np.uint16) intdata[1,1,0,:] = 0 intstat = hdstats.smad_pcm(intdata, self.gm, nodata=0) npt.assert_approx_equal(np.nanmean(stat), np.nanmean(intstat), significant=4) def test_smad_baddata(self): baddata = self.data[:3,:3,:,:].copy() baddata[1,1,0,:] = np.nan smad = hdstats.smad_pcm(baddata, self.gm) assert np.isnan(smad[1,1]) def test_bcmad(self): bcmad = hdstats.smad_pcm(self.data, self.gm) assert bcmad.shape == (200, 200) def test_bcmad_uint16(self): stat = hdstats.bcmad_pcm(self.data, self.gm) intdata = (self.data * 10000).astype(np.uint16) intdata[1,1,0,:] = 0 intstat = hdstats.bcmad_pcm(intdata, self.gm, nodata=0) npt.assert_approx_equal(np.nanmean(stat), np.nanmean(intstat), significant=4) def test_bcmad_baddata(self): baddata = self.data[:3,:3,:,:].copy() baddata[1,1,0,:] = np.nan bcmad = hdstats.smad_pcm(baddata, self.gm) assert np.isnan(bcmad[1,1])
def test_nangeomedian_baddata(self): baddata = self.data[:3,:3,:,:].copy() baddata[1,1,0,:] = np.nan gm = hdstats.nangeomedian_pcm(baddata) print(gm[1,1,0]) assert np.isnan(gm[1,1,0])
def test_nangeomedian_ro(self): data = self.data.copy() data.setflags(write=False) gm = hdstats.nangeomedian_pcm(data) assert gm.shape == (200, 200, 8)
def test_nangeomedian_value(self): gm = hdstats.nangeomedian_pcm(self.data, nodata=np.nan) npt.assert_allclose(gm[0,0,:], hdstats.nangeomedian(self.data[0,0,:,:]), rtol=1e-4, atol=1e-4) npt.assert_approx_equal(np.nanmean(gm), 0.1432, significant=4)
def test_nangeomedian_shape(self): gm = hdstats.nangeomedian_pcm(self.data) assert gm.shape == (200, 200, 8)