def _test_stats_invalid_custom_stat(): def cal_sum(values): return values.sum() custom_stats = {'sum': cal_sum} # custom stat only takes 1 argument. Thus, raise error with pytest.raises(Exception) as e_info: # noqa stats(zones=zones, values=values, stat_funcs=custom_stats)
def test_stats_invalid_stat_input(): zones, values = stats_create_zones_values() # invalid stats custom_stats = ['some_stat'] with pytest.raises(Exception) as e_info: # noqa stats(zones=zones, values=values, stat_funcs=custom_stats) # invalid values: zones = xa.DataArray(np.array([1, 2, 0], dtype=np.int)) values = xa.DataArray(np.array(['apples', 'foobar', 'cowboy'])) with pytest.raises(Exception) as e_info: # noqa stats(zones=zones, values=values) # invalid zones zones = xa.DataArray(np.array([1, 2, 0.5])) values = xa.DataArray(np.array([1, 2, 0.5])) with pytest.raises(Exception) as e_info: # noqa stats(zones=zones, values=values) # mismatch shape between zones and values: zones = xa.DataArray(np.array([1, 2, 0])) values = xa.DataArray(np.array([1, 2, 0, np.nan])) with pytest.raises(Exception) as e_info: # noqa stats(zones=zones, values=values)
def test_stats_custom_stat(): def cal_sum(values): return values.sum() def cal_double_sum(values): return values.sum() * 2 zone_sums = [ cal_sum(zone_vals_1), cal_sum(zone_vals_2), cal_sum(zone_vals_3) ] zone_double_sums = [ cal_double_sum(zone_vals_1), cal_double_sum(zone_vals_2), cal_double_sum(zone_vals_3) ] custom_stats = {'sum': cal_sum, 'double sum': cal_double_sum} df = stats(zones=zones, values=values, stat_funcs=custom_stats) assert isinstance(df, pd.DataFrame) # indices of the output DataFrame matches the unique values in `zones` idx = df.index.tolist() assert idx == unique_values num_cols = len(df.columns) # there are 2 statistics assert num_cols == 2 assert zone_sums == df['sum'].tolist() assert zone_double_sums == df['double sum'].tolist()
def test_zone_ids_stats(backend, data_zones, data_values_2d, result_zone_ids_stats): zone_ids, expected_result = result_zone_ids_stats df_result = stats(zones=data_zones, values=data_values_2d, zone_ids=zone_ids) check_results(backend, df_result, expected_result)
def test_stats_dtypes(): zones, values = stats_create_zones_values() values = values.astype(np.float16) # default stat_funcs=['mean', 'max', 'min', 'std', 'var', 'count'] df = stats(zones=zones, values=values) assert isinstance(df, pd.DataFrame)
def test_zone_ids_stats(backend, data_zones, data_values_2d, result_zone_ids_stats): if backend == 'cupy' and not has_cuda_and_cupy(): pytest.skip("Requires CUDA and CuPy") zone_ids, expected_result = result_zone_ids_stats df_result = stats(zones=data_zones, values=data_values_2d, zone_ids=zone_ids) check_results(backend, df_result, expected_result)
def test_zone_ids_stats_dataarray(backend, data_zones, data_values_2d, result_zone_ids_stats_dataarray): zone_ids, expected_result = result_zone_ids_stats_dataarray dataarray_result = stats(zones=data_zones, values=data_values_2d, zone_ids=zone_ids, return_type='xarray.DataArray') general_output_checks(data_values_2d, dataarray_result, expected_result, verify_dtype=False, verify_attrs=False)
def test_default_stats_dataarray(backend, data_zones, data_values_2d, result_default_stats_dataarray): dataarray_result = stats(zones=data_zones, values=data_values_2d, return_type='xarray.DataArray') general_output_checks( data_values_2d, dataarray_result, result_default_stats_dataarray, verify_dtype=False, verify_attrs=False, )
def test_custom_stats(backend, data_zones, data_values_2d, result_custom_stats): # ---- custom stats (NumPy only) ---- custom_stats = { 'double_sum': _double_sum, 'range': _range, } nodata_values, zone_ids, expected_result = result_custom_stats df_result = stats(zones=data_zones, values=data_values_2d, stats_funcs=custom_stats, zone_ids=zone_ids, nodata_values=nodata_values) check_results(backend, df_result, expected_result)
def test_custom_stats(backend, data_zones, data_values_2d, result_custom_stats): # ---- custom stats (NumPy and CuPy only) ---- if backend == 'cupy' and not has_cuda_and_cupy(): pytest.skip("Requires CUDA and CuPy") custom_stats = { 'double_sum': _double_sum, 'range': _range, } nodata_values, zone_ids, expected_result = result_custom_stats df_result = stats(zones=data_zones, values=data_values_2d, stats_funcs=custom_stats, zone_ids=zone_ids, nodata_values=nodata_values) check_results(backend, df_result, expected_result)
def test_custom_stats_dataarray(backend, data_zones, data_values_2d, result_custom_stats_dataarray): # ---- custom stats returns a xr.DataArray (NumPy only) ---- custom_stats = { 'double_sum': _double_sum, 'range': _range, } nodata_values, zone_ids, expected_result = result_custom_stats_dataarray dataarray_result = stats(zones=data_zones, values=data_values_2d, stats_funcs=custom_stats, zone_ids=zone_ids, nodata_values=nodata_values, return_type='xarray.DataArray') general_output_checks(data_values_2d, dataarray_result, expected_result, verify_dtype=False, verify_attrs=False)
def test_stats_default(): # default stat_funcs=['mean', 'max', 'min', 'std', 'var'] df = stats(zones=zones, values=values) assert isinstance(df, pd.DataFrame) # indices of the output DataFrame matches the unique values in `zones` idx = df.index.tolist() assert idx == unique_values num_cols = len(df.columns) # there are 5 statistics in default setting assert num_cols == 5 assert zone_means == df['mean'].tolist() assert zone_maxes == df['max'].tolist() assert zone_mins == df['min'].tolist() assert zone_stds == df['std'].tolist() assert zone_vars == df['var'].tolist()
def test_default_stats(backend, data_zones, data_values_2d, result_default_stats): if backend == 'cupy' and not has_cuda_and_cupy(): pytest.skip("Requires CUDA and CuPy") df_result = stats(zones=data_zones, values=data_values_2d) check_results(backend, df_result, result_default_stats)
def test_stats_mismatch_zones_values_shape(): zones = xa.DataArray(np.array([1, 2, 0])) values = xa.DataArray(np.array([1, 2, 0, np.nan])) with pytest.raises(Exception) as e_info: # noqa stats(zones=zones, values=values)
def test_stats_invalid_values(): zones = xa.DataArray(np.array([1, 2, 0], dtype=np.int)) values = xa.DataArray(np.array(['apples', 'foobar', 'cowboy'])) with pytest.raises(Exception) as e_info: # noqa stats(zones=zones, values=values)
def test_stats_invalid_zones(): zones = xa.DataArray(np.array([1, 2, 0.5])) values = xa.DataArray(np.array([1, 2, 0.5])) with pytest.raises(Exception) as e_info: # noqa stats(zones=zones, values=values)
def test_stats_invalid_stat_list(): custom_stats = ['some_stat'] with pytest.raises(Exception) as e_info: # noqa stats(zones=zones, values=values, stat_funcs=custom_stats)
def test_default_stats(backend, data_zones, data_values_2d, result_default_stats): df_result = stats(zones=data_zones, values=data_values_2d) check_results(backend, df_result, result_default_stats)
def test_stats_default(): zones, values = stats_create_zones_values() unique_values = [1, 2, 4] masked_values = np.ma.masked_invalid(values.values) zone_vals_1 = np.ma.masked_where(zones != 1, masked_values) zone_vals_2 = np.ma.masked_where(zones != 2, masked_values) zone_vals_3 = np.ma.masked_where(zones != 4, masked_values) zone_means = [zone_vals_1.mean(), zone_vals_2.mean(), zone_vals_3.mean()] zone_maxes = [zone_vals_1.max(), zone_vals_2.max(), zone_vals_3.max()] zone_mins = [zone_vals_1.min(), zone_vals_2.min(), zone_vals_3.min()] zone_stds = [zone_vals_1.std(), zone_vals_2.std(), zone_vals_3.std()] zone_vars = [zone_vals_1.var(), zone_vals_2.var(), zone_vals_3.var()] zone_counts = [ np.ma.count(zone_vals_1), np.ma.count(zone_vals_2), np.ma.count(zone_vals_3) ] # default stat_funcs=['mean', 'max', 'min', 'std', 'var', 'count'] df = stats(zones=zones, values=values) assert isinstance(df, pd.DataFrame) # indices of the output DataFrame matches the unique values in `zones` idx = df.index.tolist() assert idx == unique_values num_cols = len(df.columns) # there are 5 statistics in default setting assert num_cols == 6 assert zone_means == df['mean'].tolist() assert zone_maxes == df['max'].tolist() assert zone_mins == df['min'].tolist() assert zone_stds == df['std'].tolist() assert zone_vars == df['var'].tolist() assert zone_counts == df['count'].tolist() # custom stats def cal_sum(values): return values.sum() def cal_double_sum(values): return values.sum() * 2 zone_sums = [ cal_sum(zone_vals_1), cal_sum(zone_vals_2), cal_sum(zone_vals_3) ] zone_double_sums = [ cal_double_sum(zone_vals_1), cal_double_sum(zone_vals_2), cal_double_sum(zone_vals_3) ] custom_stats = {'sum': cal_sum, 'double sum': cal_double_sum} df = stats(zones=zones, values=values, stat_funcs=custom_stats) assert isinstance(df, pd.DataFrame) # indices of the output DataFrame matches the unique values in `zones` idx = df.index.tolist() assert idx == unique_values num_cols = len(df.columns) # there are 2 statistics assert num_cols == 2 assert zone_sums == df['sum'].tolist() assert zone_double_sums == df['double sum'].tolist()
def test_stats(): # expected results default_stats_results = { 'zone': [0, 1, 2, 3], 'mean': [0, 1, 2, 2.4], 'max': [0, 1, 2, 3], 'min': [0, 1, 2, 0], 'sum': [0, 6, 8, 12], 'std': [0, 0, 0, 1.2], 'var': [0, 0, 0, 1.44], 'count': [5, 6, 4, 5] } # numpy case zones_np, values_np, _ = create_zones_values(backend='numpy') # default stats_funcs df_np = stats(zones=zones_np, values=values_np) # dask case zones_da, values_da, _ = create_zones_values(backend='dask') df_da = stats(zones=zones_da, values=values_da) check_results(df_np, df_da, default_stats_results) # expected results stats_results_zone_0_3 = { 'zone': [0, 3], 'mean': [0, 2.4], 'max': [0, 3], 'min': [0, 0], 'sum': [0, 12], 'std': [0, 1.2], 'var': [0, 1.44], 'count': [5, 5] } # numpy case df_np_zone_0_3 = stats(zones=zones_np, values=values_np, zone_ids=[0, 3]) # dask case df_da_zone_0_3 = stats(zones=zones_da, values=values_da, zone_ids=[0, 3]) check_results(df_np_zone_0_3, df_da_zone_0_3, stats_results_zone_0_3) # ---- custom stats (NumPy only) ---- # expected results custom_stats_results = { 'zone': [1, 2], 'double_sum': [12, 16], 'range': [0, 0], } def _double_sum(values): return values.sum() * 2 def _range(values): return values.max() - values.min() custom_stats = { 'double_sum': _double_sum, 'range': _range, } # numpy case df_np = stats(zones=zones_np, values=values_np, stats_funcs=custom_stats, zone_ids=[1, 2], nodata_values=0) # dask case df_da = None check_results(df_np, df_da, custom_stats_results)