Exemple #1
0
def _test_stats_invalid_custom_stat():
    def cal_sum(values):
        return values.sum()

    custom_stats = {'sum': cal_sum}

    # custom stat only takes 1 argument. Thus, raise error
    with pytest.raises(Exception) as e_info:  # noqa
        stats(zones=zones, values=values, stat_funcs=custom_stats)
Exemple #2
0
def test_stats_invalid_stat_input():
    zones, values = stats_create_zones_values()

    # invalid stats
    custom_stats = ['some_stat']
    with pytest.raises(Exception) as e_info:  # noqa
        stats(zones=zones, values=values, stat_funcs=custom_stats)

    # invalid values:
    zones = xa.DataArray(np.array([1, 2, 0], dtype=np.int))
    values = xa.DataArray(np.array(['apples', 'foobar', 'cowboy']))
    with pytest.raises(Exception) as e_info:  # noqa
        stats(zones=zones, values=values)

    # invalid zones
    zones = xa.DataArray(np.array([1, 2, 0.5]))
    values = xa.DataArray(np.array([1, 2, 0.5]))
    with pytest.raises(Exception) as e_info:  # noqa
        stats(zones=zones, values=values)

    # mismatch shape between zones and values:
    zones = xa.DataArray(np.array([1, 2, 0]))
    values = xa.DataArray(np.array([1, 2, 0, np.nan]))
    with pytest.raises(Exception) as e_info:  # noqa
        stats(zones=zones, values=values)
Exemple #3
0
def test_stats_custom_stat():
    def cal_sum(values):
        return values.sum()

    def cal_double_sum(values):
        return values.sum() * 2

    zone_sums = [
        cal_sum(zone_vals_1),
        cal_sum(zone_vals_2),
        cal_sum(zone_vals_3)
    ]

    zone_double_sums = [
        cal_double_sum(zone_vals_1),
        cal_double_sum(zone_vals_2),
        cal_double_sum(zone_vals_3)
    ]

    custom_stats = {'sum': cal_sum, 'double sum': cal_double_sum}
    df = stats(zones=zones, values=values, stat_funcs=custom_stats)

    assert isinstance(df, pd.DataFrame)

    # indices of the output DataFrame matches the unique values in `zones`
    idx = df.index.tolist()
    assert idx == unique_values

    num_cols = len(df.columns)
    # there are 2 statistics
    assert num_cols == 2

    assert zone_sums == df['sum'].tolist()
    assert zone_double_sums == df['double sum'].tolist()
Exemple #4
0
def test_zone_ids_stats(backend, data_zones, data_values_2d,
                        result_zone_ids_stats):
    zone_ids, expected_result = result_zone_ids_stats
    df_result = stats(zones=data_zones,
                      values=data_values_2d,
                      zone_ids=zone_ids)
    check_results(backend, df_result, expected_result)
Exemple #5
0
def test_stats_dtypes():
    zones, values = stats_create_zones_values()
    values = values.astype(np.float16)

    # default stat_funcs=['mean', 'max', 'min', 'std', 'var', 'count']
    df = stats(zones=zones, values=values)

    assert isinstance(df, pd.DataFrame)
def test_zone_ids_stats(backend, data_zones, data_values_2d,
                        result_zone_ids_stats):
    if backend == 'cupy' and not has_cuda_and_cupy():
        pytest.skip("Requires CUDA and CuPy")
    zone_ids, expected_result = result_zone_ids_stats
    df_result = stats(zones=data_zones,
                      values=data_values_2d,
                      zone_ids=zone_ids)
    check_results(backend, df_result, expected_result)
def test_zone_ids_stats_dataarray(backend, data_zones, data_values_2d,
                                  result_zone_ids_stats_dataarray):
    zone_ids, expected_result = result_zone_ids_stats_dataarray
    dataarray_result = stats(zones=data_zones,
                             values=data_values_2d,
                             zone_ids=zone_ids,
                             return_type='xarray.DataArray')
    general_output_checks(data_values_2d,
                          dataarray_result,
                          expected_result,
                          verify_dtype=False,
                          verify_attrs=False)
def test_default_stats_dataarray(backend, data_zones, data_values_2d,
                                 result_default_stats_dataarray):
    dataarray_result = stats(zones=data_zones,
                             values=data_values_2d,
                             return_type='xarray.DataArray')
    general_output_checks(
        data_values_2d,
        dataarray_result,
        result_default_stats_dataarray,
        verify_dtype=False,
        verify_attrs=False,
    )
Exemple #9
0
def test_custom_stats(backend, data_zones, data_values_2d,
                      result_custom_stats):
    # ---- custom stats (NumPy only) ----
    custom_stats = {
        'double_sum': _double_sum,
        'range': _range,
    }
    nodata_values, zone_ids, expected_result = result_custom_stats
    df_result = stats(zones=data_zones,
                      values=data_values_2d,
                      stats_funcs=custom_stats,
                      zone_ids=zone_ids,
                      nodata_values=nodata_values)
    check_results(backend, df_result, expected_result)
Exemple #10
0
def test_custom_stats(backend, data_zones, data_values_2d,
                      result_custom_stats):
    # ---- custom stats (NumPy and CuPy only) ----
    if backend == 'cupy' and not has_cuda_and_cupy():
        pytest.skip("Requires CUDA and CuPy")

    custom_stats = {
        'double_sum': _double_sum,
        'range': _range,
    }

    nodata_values, zone_ids, expected_result = result_custom_stats
    df_result = stats(zones=data_zones,
                      values=data_values_2d,
                      stats_funcs=custom_stats,
                      zone_ids=zone_ids,
                      nodata_values=nodata_values)
    check_results(backend, df_result, expected_result)
Exemple #11
0
def test_custom_stats_dataarray(backend, data_zones, data_values_2d,
                                result_custom_stats_dataarray):
    # ---- custom stats returns a xr.DataArray (NumPy only) ----
    custom_stats = {
        'double_sum': _double_sum,
        'range': _range,
    }
    nodata_values, zone_ids, expected_result = result_custom_stats_dataarray
    dataarray_result = stats(zones=data_zones,
                             values=data_values_2d,
                             stats_funcs=custom_stats,
                             zone_ids=zone_ids,
                             nodata_values=nodata_values,
                             return_type='xarray.DataArray')
    general_output_checks(data_values_2d,
                          dataarray_result,
                          expected_result,
                          verify_dtype=False,
                          verify_attrs=False)
Exemple #12
0
def test_stats_default():
    # default stat_funcs=['mean', 'max', 'min', 'std', 'var']
    df = stats(zones=zones, values=values)

    assert isinstance(df, pd.DataFrame)

    # indices of the output DataFrame matches the unique values in `zones`
    idx = df.index.tolist()
    assert idx == unique_values

    num_cols = len(df.columns)
    # there are 5 statistics in default setting
    assert num_cols == 5

    assert zone_means == df['mean'].tolist()
    assert zone_maxes == df['max'].tolist()
    assert zone_mins == df['min'].tolist()
    assert zone_stds == df['std'].tolist()
    assert zone_vars == df['var'].tolist()
Exemple #13
0
def test_default_stats(backend, data_zones, data_values_2d,
                       result_default_stats):
    if backend == 'cupy' and not has_cuda_and_cupy():
        pytest.skip("Requires CUDA and CuPy")
    df_result = stats(zones=data_zones, values=data_values_2d)
    check_results(backend, df_result, result_default_stats)
Exemple #14
0
def test_stats_mismatch_zones_values_shape():
    zones = xa.DataArray(np.array([1, 2, 0]))
    values = xa.DataArray(np.array([1, 2, 0, np.nan]))

    with pytest.raises(Exception) as e_info:  # noqa
        stats(zones=zones, values=values)
Exemple #15
0
def test_stats_invalid_values():
    zones = xa.DataArray(np.array([1, 2, 0], dtype=np.int))
    values = xa.DataArray(np.array(['apples', 'foobar', 'cowboy']))

    with pytest.raises(Exception) as e_info:  # noqa
        stats(zones=zones, values=values)
Exemple #16
0
def test_stats_invalid_zones():
    zones = xa.DataArray(np.array([1, 2, 0.5]))
    values = xa.DataArray(np.array([1, 2, 0.5]))

    with pytest.raises(Exception) as e_info:  # noqa
        stats(zones=zones, values=values)
Exemple #17
0
def test_stats_invalid_stat_list():
    custom_stats = ['some_stat']
    with pytest.raises(Exception) as e_info:  # noqa
        stats(zones=zones, values=values, stat_funcs=custom_stats)
Exemple #18
0
def test_default_stats(backend, data_zones, data_values_2d,
                       result_default_stats):
    df_result = stats(zones=data_zones, values=data_values_2d)
    check_results(backend, df_result, result_default_stats)
Exemple #19
0
def test_stats_default():
    zones, values = stats_create_zones_values()

    unique_values = [1, 2, 4]
    masked_values = np.ma.masked_invalid(values.values)
    zone_vals_1 = np.ma.masked_where(zones != 1, masked_values)
    zone_vals_2 = np.ma.masked_where(zones != 2, masked_values)
    zone_vals_3 = np.ma.masked_where(zones != 4, masked_values)

    zone_means = [zone_vals_1.mean(), zone_vals_2.mean(), zone_vals_3.mean()]
    zone_maxes = [zone_vals_1.max(), zone_vals_2.max(), zone_vals_3.max()]
    zone_mins = [zone_vals_1.min(), zone_vals_2.min(), zone_vals_3.min()]
    zone_stds = [zone_vals_1.std(), zone_vals_2.std(), zone_vals_3.std()]
    zone_vars = [zone_vals_1.var(), zone_vals_2.var(), zone_vals_3.var()]

    zone_counts = [
        np.ma.count(zone_vals_1),
        np.ma.count(zone_vals_2),
        np.ma.count(zone_vals_3)
    ]

    # default stat_funcs=['mean', 'max', 'min', 'std', 'var', 'count']
    df = stats(zones=zones, values=values)

    assert isinstance(df, pd.DataFrame)

    # indices of the output DataFrame matches the unique values in `zones`
    idx = df.index.tolist()
    assert idx == unique_values

    num_cols = len(df.columns)
    # there are 5 statistics in default setting
    assert num_cols == 6

    assert zone_means == df['mean'].tolist()
    assert zone_maxes == df['max'].tolist()
    assert zone_mins == df['min'].tolist()
    assert zone_stds == df['std'].tolist()
    assert zone_vars == df['var'].tolist()
    assert zone_counts == df['count'].tolist()

    # custom stats
    def cal_sum(values):
        return values.sum()

    def cal_double_sum(values):
        return values.sum() * 2

    zone_sums = [
        cal_sum(zone_vals_1),
        cal_sum(zone_vals_2),
        cal_sum(zone_vals_3)
    ]
    zone_double_sums = [
        cal_double_sum(zone_vals_1),
        cal_double_sum(zone_vals_2),
        cal_double_sum(zone_vals_3)
    ]

    custom_stats = {'sum': cal_sum, 'double sum': cal_double_sum}
    df = stats(zones=zones, values=values, stat_funcs=custom_stats)

    assert isinstance(df, pd.DataFrame)
    # indices of the output DataFrame matches the unique values in `zones`
    idx = df.index.tolist()
    assert idx == unique_values
    num_cols = len(df.columns)
    # there are 2 statistics
    assert num_cols == 2
    assert zone_sums == df['sum'].tolist()
    assert zone_double_sums == df['double sum'].tolist()
def test_stats():
    # expected results
    default_stats_results = {
        'zone': [0, 1, 2, 3],
        'mean': [0, 1, 2, 2.4],
        'max': [0, 1, 2, 3],
        'min': [0, 1, 2, 0],
        'sum': [0, 6, 8, 12],
        'std': [0, 0, 0, 1.2],
        'var': [0, 0, 0, 1.44],
        'count': [5, 6, 4, 5]
    }

    # numpy case
    zones_np, values_np, _ = create_zones_values(backend='numpy')
    # default stats_funcs
    df_np = stats(zones=zones_np, values=values_np)

    # dask case
    zones_da, values_da, _ = create_zones_values(backend='dask')
    df_da = stats(zones=zones_da, values=values_da)
    check_results(df_np, df_da, default_stats_results)

    # expected results
    stats_results_zone_0_3 = {
        'zone': [0, 3],
        'mean': [0, 2.4],
        'max': [0, 3],
        'min': [0, 0],
        'sum': [0, 12],
        'std': [0, 1.2],
        'var': [0, 1.44],
        'count': [5, 5]
    }

    # numpy case
    df_np_zone_0_3 = stats(zones=zones_np, values=values_np, zone_ids=[0, 3])

    # dask case
    df_da_zone_0_3 = stats(zones=zones_da, values=values_da, zone_ids=[0, 3])

    check_results(df_np_zone_0_3, df_da_zone_0_3, stats_results_zone_0_3)

    # ---- custom stats (NumPy only) ----
    # expected results
    custom_stats_results = {
        'zone': [1, 2],
        'double_sum': [12, 16],
        'range': [0, 0],
    }

    def _double_sum(values):
        return values.sum() * 2

    def _range(values):
        return values.max() - values.min()

    custom_stats = {
        'double_sum': _double_sum,
        'range': _range,
    }

    # numpy case
    df_np = stats(zones=zones_np,
                  values=values_np,
                  stats_funcs=custom_stats,
                  zone_ids=[1, 2],
                  nodata_values=0)
    # dask case
    df_da = None
    check_results(df_np, df_da, custom_stats_results)