def test_skew(x, bias): stats = pytest.importorskip('scipy.stats') s = SummaryStats() s.update(x) res = s.skew(bias=bias) sol = stats.skew(x[~np.isnan(x)], bias=bias) if len(x) else np.nan np.testing.assert_allclose(res, sol, rtol=RTOL, atol=ATOL)
def test_kurt(x, bias, fisher): stats = pytest.importorskip('scipy.stats') s = SummaryStats() s.update(x) res = s.kurt(bias=bias, fisher=fisher) if len(x): sol = stats.kurtosis(x[~np.isnan(x)], bias=bias, fisher=fisher) else: sol = np.nan np.testing.assert_allclose(res, sol, rtol=RTOL, atol=ATOL)
def _compute_image_stats_chunked(dataset: 'DatasetReader') -> Optional[Dict[str, Any]]: """Compute statistics for the given rasterio dataset by looping over chunks.""" from rasterio import features, warp, windows from shapely import geometry total_count = valid_data_count = 0 tdigest = TDigest() sstats = SummaryStats() convex_hull = geometry.Polygon() block_windows = [w for _, w in dataset.block_windows(1)] for w in block_windows: with warnings.catch_warnings(): warnings.filterwarnings('ignore', message='invalid value encountered.*') block_data = dataset.read(1, window=w, masked=True) # handle NaNs for float rasters block_data = np.ma.masked_invalid(block_data, copy=False) total_count += int(block_data.size) valid_data = block_data.compressed() if valid_data.size == 0: continue valid_data_count += int(valid_data.size) if np.any(block_data.mask): hull_candidates = RasterDriver._hull_candidate_mask(~block_data.mask) hull_shapes = [geometry.shape(s) for s, _ in features.shapes( np.ones(hull_candidates.shape, 'uint8'), mask=hull_candidates, transform=windows.transform(w, dataset.transform) )] else: w, s, e, n = windows.bounds(w, dataset.transform) hull_shapes = [geometry.Polygon([(w, s), (e, s), (e, n), (w, n)])] convex_hull = geometry.MultiPolygon([convex_hull, *hull_shapes]).convex_hull tdigest.update(valid_data) sstats.update(valid_data) if sstats.count() == 0: return None convex_hull_wgs = warp.transform_geom( dataset.crs, 'epsg:4326', geometry.mapping(convex_hull) ) return { 'valid_percentage': valid_data_count / total_count * 100, 'range': (sstats.min(), sstats.max()), 'mean': sstats.mean(), 'stdev': sstats.std(), 'percentiles': tdigest.quantile(np.arange(0.01, 1, 0.01)), 'convex_hull': convex_hull_wgs }
def test_add_update_errors(): s = SummaryStats() x = np.array([1, 2, 3]) for c in [-1, 0, np.array([1, 1, -1])]: with pytest.raises(ValueError): s.update(x, c) for c in [-1, 0]: with pytest.raises(ValueError): s.update(1, c) with pytest.raises(ValueError): s.update(x, np.nan) with pytest.raises(ValueError): s.add(1, np.nan)
def test_weights(): s = SummaryStats() s.add(10, 2) assert s.count() == 2 assert s.sum() == 10 x = np.array([1, 2, 3, 4, 5]) s.update(x, 2) assert s.count() == 12 assert s.sum() == x.sum() + 10 s = SummaryStats() x = np.array([1, 2, 3, 4, 5]) s.update(x, x) assert s.count() == x.sum()
def test_repr(): s = SummaryStats() assert str(s) == 'SummaryStats<count=0>' s.add(10) assert str(s) == 'SummaryStats<count=1>'
def test_pickle(x): s = SummaryStats() s.update(x) s2 = pickle.loads(pickle.dumps(s, protocol=2)) np.testing.assert_equal(s.count(), s2.count()) np.testing.assert_equal(s.sum(), s2.sum()) np.testing.assert_equal(s.min(), s2.min()) np.testing.assert_equal(s.max(), s2.max()) np.testing.assert_equal(s.var(), s2.var()) np.testing.assert_equal(s.skew(), s2.skew()) np.testing.assert_equal(s.kurt(), s2.kurt())
def test_basic_stats(x): s = SummaryStats() s.update(x) assert s.count() == np.count_nonzero(~np.isnan(x)) np.testing.assert_allclose(s.sum(), np.nansum(x), rtol=RTOL, atol=ATOL) np.testing.assert_equal(s.min(), np.nanmin(x) if len(x) else np.nan) np.testing.assert_equal(s.max(), np.nanmax(x) if len(x) else np.nan) np.testing.assert_allclose(s.mean(), np.nanmean(x) if len(x) else np.nan, rtol=RTOL, atol=ATOL) np.testing.assert_allclose(s.var(), np.nanvar(x) if len(x) else np.nan, rtol=RTOL, atol=ATOL) np.testing.assert_allclose(s.std(), np.nanstd(x) if len(x) else np.nan, rtol=RTOL, atol=ATOL)
def test_merge(): s = SummaryStats() half = int(len(normal) / 2) s.update(normal[:half]) s2 = SummaryStats() s2.update(normal[half:]) sol = SummaryStats() sol.update(normal) s.merge(s2) np.testing.assert_allclose(s.count(), sol.count(), rtol=RTOL, atol=ATOL) np.testing.assert_allclose(s.sum(), sol.sum(), rtol=RTOL, atol=ATOL) np.testing.assert_allclose(s.min(), sol.min(), rtol=RTOL, atol=ATOL) np.testing.assert_allclose(s.max(), sol.max(), rtol=RTOL, atol=ATOL) np.testing.assert_allclose(s.var(), sol.var(), rtol=RTOL, atol=ATOL) np.testing.assert_allclose(s.skew(), sol.skew(), rtol=RTOL, atol=ATOL) np.testing.assert_allclose(s.kurt(), sol.kurt(), rtol=RTOL, atol=ATOL) # Test merge both ways is idempotent empty_with_full = SummaryStats() empty_with_full.merge(sol) full_with_empty = copy(sol) full_with_empty.merge(SummaryStats()) for s in [empty_with_full, full_with_empty]: np.testing.assert_equal(s.count(), sol.count()) np.testing.assert_equal(s.sum(), sol.sum()) np.testing.assert_equal(s.min(), sol.min()) np.testing.assert_equal(s.max(), sol.max()) np.testing.assert_equal(s.var(), sol.var()) np.testing.assert_equal(s.skew(), sol.skew()) np.testing.assert_equal(s.kurt(), sol.kurt())