def _compute_image_stats_chunked(dataset: 'DatasetReader') -> Optional[Dict[str, Any]]: """Compute statistics for the given rasterio dataset by looping over chunks.""" from rasterio import features, warp, windows from shapely import geometry total_count = valid_data_count = 0 tdigest = TDigest() sstats = SummaryStats() convex_hull = geometry.Polygon() block_windows = [w for _, w in dataset.block_windows(1)] for w in block_windows: with warnings.catch_warnings(): warnings.filterwarnings('ignore', message='invalid value encountered.*') block_data = dataset.read(1, window=w, masked=True) # handle NaNs for float rasters block_data = np.ma.masked_invalid(block_data, copy=False) total_count += int(block_data.size) valid_data = block_data.compressed() if valid_data.size == 0: continue valid_data_count += int(valid_data.size) if np.any(block_data.mask): hull_candidates = RasterDriver._hull_candidate_mask(~block_data.mask) hull_shapes = [geometry.shape(s) for s, _ in features.shapes( np.ones(hull_candidates.shape, 'uint8'), mask=hull_candidates, transform=windows.transform(w, dataset.transform) )] else: w, s, e, n = windows.bounds(w, dataset.transform) hull_shapes = [geometry.Polygon([(w, s), (e, s), (e, n), (w, n)])] convex_hull = geometry.MultiPolygon([convex_hull, *hull_shapes]).convex_hull tdigest.update(valid_data) sstats.update(valid_data) if sstats.count() == 0: return None convex_hull_wgs = warp.transform_geom( dataset.crs, 'epsg:4326', geometry.mapping(convex_hull) ) return { 'valid_percentage': valid_data_count / total_count * 100, 'range': (sstats.min(), sstats.max()), 'mean': sstats.mean(), 'stdev': sstats.std(), 'percentiles': tdigest.quantile(np.arange(0.01, 1, 0.01)), 'convex_hull': convex_hull_wgs }
def test_basic_stats(x): s = SummaryStats() s.update(x) assert s.count() == np.count_nonzero(~np.isnan(x)) np.testing.assert_allclose(s.sum(), np.nansum(x), rtol=RTOL, atol=ATOL) np.testing.assert_equal(s.min(), np.nanmin(x) if len(x) else np.nan) np.testing.assert_equal(s.max(), np.nanmax(x) if len(x) else np.nan) np.testing.assert_allclose(s.mean(), np.nanmean(x) if len(x) else np.nan, rtol=RTOL, atol=ATOL) np.testing.assert_allclose(s.var(), np.nanvar(x) if len(x) else np.nan, rtol=RTOL, atol=ATOL) np.testing.assert_allclose(s.std(), np.nanstd(x) if len(x) else np.nan, rtol=RTOL, atol=ATOL)