Example #1
0
    def _compute_image_stats_chunked(dataset: 'DatasetReader') -> Optional[Dict[str, Any]]:
        """Compute statistics for the given rasterio dataset by looping over chunks."""
        from rasterio import features, warp, windows
        from shapely import geometry

        total_count = valid_data_count = 0
        tdigest = TDigest()
        sstats = SummaryStats()
        convex_hull = geometry.Polygon()

        block_windows = [w for _, w in dataset.block_windows(1)]

        for w in block_windows:
            with warnings.catch_warnings():
                warnings.filterwarnings('ignore', message='invalid value encountered.*')
                block_data = dataset.read(1, window=w, masked=True)

            # handle NaNs for float rasters
            block_data = np.ma.masked_invalid(block_data, copy=False)

            total_count += int(block_data.size)
            valid_data = block_data.compressed()

            if valid_data.size == 0:
                continue

            valid_data_count += int(valid_data.size)

            if np.any(block_data.mask):
                hull_candidates = RasterDriver._hull_candidate_mask(~block_data.mask)
                hull_shapes = [geometry.shape(s) for s, _ in features.shapes(
                    np.ones(hull_candidates.shape, 'uint8'),
                    mask=hull_candidates,
                    transform=windows.transform(w, dataset.transform)
                )]
            else:
                w, s, e, n = windows.bounds(w, dataset.transform)
                hull_shapes = [geometry.Polygon([(w, s), (e, s), (e, n), (w, n)])]
            convex_hull = geometry.MultiPolygon([convex_hull, *hull_shapes]).convex_hull

            tdigest.update(valid_data)
            sstats.update(valid_data)

        if sstats.count() == 0:
            return None

        convex_hull_wgs = warp.transform_geom(
            dataset.crs, 'epsg:4326', geometry.mapping(convex_hull)
        )

        return {
            'valid_percentage': valid_data_count / total_count * 100,
            'range': (sstats.min(), sstats.max()),
            'mean': sstats.mean(),
            'stdev': sstats.std(),
            'percentiles': tdigest.quantile(np.arange(0.01, 1, 0.01)),
            'convex_hull': convex_hull_wgs
        }
def test_basic_stats(x):
    s = SummaryStats()
    s.update(x)

    assert s.count() == np.count_nonzero(~np.isnan(x))
    np.testing.assert_allclose(s.sum(), np.nansum(x), rtol=RTOL, atol=ATOL)
    np.testing.assert_equal(s.min(), np.nanmin(x) if len(x) else np.nan)
    np.testing.assert_equal(s.max(), np.nanmax(x) if len(x) else np.nan)
    np.testing.assert_allclose(s.mean(),
                               np.nanmean(x) if len(x) else np.nan,
                               rtol=RTOL,
                               atol=ATOL)
    np.testing.assert_allclose(s.var(),
                               np.nanvar(x) if len(x) else np.nan,
                               rtol=RTOL,
                               atol=ATOL)
    np.testing.assert_allclose(s.std(),
                               np.nanstd(x) if len(x) else np.nan,
                               rtol=RTOL,
                               atol=ATOL)