def test_crosstab_mismatch_zones_values_shape(): zones = xa.DataArray(np.array([[1, 2]])) values = xa.DataArray(np.array([[[1, 2, np.nan]]]), dims=['lat', 'lon', 'race']) values['race'] = ['cat1', 'cat2', 'cat3'] with pytest.raises(Exception) as e_info: # noqa crosstab(zones_agg=zones, values_agg=values)
def test_crosstab_invalid_layer(): zones = xa.DataArray(np.array([[1, 2]])) values = xa.DataArray(np.array([[[1, 2, np.nan]]]), dims=['lat', 'lon', 'race']) values['race'] = ['cat1', 'cat2', 'cat3'] layer = 'cat' with pytest.raises(Exception) as e_info: # noqa crosstab(zones_agg=zones, values_agg=values, layer=layer)
def test_crosstab_invalid_values(): zones = xa.DataArray(np.array([[1, 2, 0]], dtype=np.int)) # must be either int or float values = xa.DataArray(np.array([[['apples', 'foobar', 'cowboy']]]), dims=['lat', 'lon', 'race']) values['race'] = ['cat1', 'cat2', 'cat3'] with pytest.raises(Exception) as e_info: # noqa crosstab(zones_agg=zones, values_agg=values)
def test_crosstab_3d(): # expected results crosstab_3d_results = { 'zone': [1, 2, 3], 'cat1': [6, 5, 6], 'cat2': [6, 5, 6], 'cat3': [6, 5, 6], 'cat4': [6, 5, 6], } # numpy case zones_np, _, values_np = create_zones_values(backend='numpy') df_np = crosstab(zones=zones_np, values=values_np, zone_ids=[1, 2, 3], layer=-1) # dask case zones_da, _, values_da = create_zones_values(backend='dask') df_da = crosstab(zones=zones_da, values=values_da, zone_ids=[1, 2, 3], cat_ids=['cat1', 'cat2', 'cat3', 'cat4'], layer=-1) check_results(df_np, df_da, crosstab_3d_results) # ----- no values case ------ crosstab_3d_novalues_results = { 'zone': [1, 2, 3], 'cat1': [0, 0, 0], 'cat2': [0, 0, 0], 'cat3': [0, 0, 0], 'cat4': [0, 0, 0], } # numpy case zones_np, _, values_np = create_zones_values(backend='numpy') df_np = crosstab(zones=zones_np, values=values_np, layer=-1, zone_ids=[1, 2, 3], nodata_values=1) # dask case zones_da, _, values_da = create_zones_values(backend='dask') df_da = crosstab(zones=zones_da, values=values_da, layer=-1, zone_ids=[1, 2, 3], nodata_values=1) check_results(df_np, df_da, crosstab_3d_novalues_results)
def test_crosstab_invalid_zones(): # invalid dims (must be 2d) zones = xa.DataArray(np.array([1, 2, 0])) values = xa.DataArray(np.array([[[1, 2, 0.5]]]), dims=['lat', 'lon', 'race']) values['race'] = ['cat1', 'cat2', 'cat3'] with pytest.raises(Exception) as e_info: crosstab(zones_agg=zones, values_agg=values) # invalid values (must be int) zones = xa.DataArray(np.array([[1, 2, 0.5]])) with pytest.raises(Exception) as e_info: # noqa crosstab(zones_agg=zones, values_agg=values)
def test_crosstab_2d(): # count agg, expected results crosstab_2d_results = { 'zone': [1, 2, 3], 0: [0, 0, 1], 1: [6, 0, 0], 2: [0, 4, 0], } # numpy case zones_np, values_np, _ = create_zones_values(backend='numpy') df_np = crosstab( zones=zones_np, values=values_np, zone_ids=[1, 2, 3], cat_ids=[0, 1, 2], ) # dask case zones_da, values_da, _ = create_zones_values(backend='dask') df_da = crosstab(zones=zones_da, values=values_da, zone_ids=[1, 2, 3], nodata_values=3) check_results(df_np, df_da, crosstab_2d_results) # percentage agg, expected results crosstab_2d_percentage_results = { 'zone': [1, 2], 1: [100, 0], 2: [0, 100], } # numpy case df_np = crosstab(zones=zones_np, values=values_np, zone_ids=[1, 2], cat_ids=[1, 2], nodata_values=3, agg='percentage') # dask case df_da = crosstab(zones=zones_da, values=values_da, zone_ids=[1, 2], cat_ids=[1, 2], nodata_values=3, agg='percentage') check_results(df_np, df_da, crosstab_2d_percentage_results)
def test_crosstab_3d(): # create valid `values_agg` of np.nan and np.inf values_agg = xa.DataArray(np.ones(24).reshape(2, 3, 4), dims=['lat', 'lon', 'race']) values_agg['race'] = ['cat1', 'cat2', 'cat3', 'cat4'] layer = 'race' # create a valid `zones_agg` with compatiable shape zones_arr = np.arange(6, dtype=np.int).reshape(2, 3) zones_agg = xa.DataArray(zones_arr) df = crosstab(zones_agg, values_agg, layer) num_cats = len(values_agg.dims[-1]) # number of columns = number of categories assert len(df.columns) == num_cats # exclude region with 0 zone id zone_idx = list(set(np.unique(zones_arr)) - {0}) num_zones = len(zone_idx) # number of rows = number of zones assert len(df.index) == num_zones num_nans = df.isnull().sum().sum() # no NaN assert num_nans == 0 # values_agg are all 1s, so all categories have same percentage over zones for col in df.columns: assert len(df[col].unique()) == 1 df['check_sum'] = df.apply( lambda r: r['cat1'] + r['cat2'] + r['cat3'] + r['cat4'], axis=1) # sum of a row is 1.0 assert df['check_sum'][zone_idx[0]] == 1.0
def test_crosstab_no_values(): # create valid `values_agg` of 0s values_agg = xa.DataArray(np.zeros(24).reshape(2, 3, 4), dims=['lat', 'lon', 'race']) values_agg['race'] = ['cat1', 'cat2', 'cat3', 'cat4'] # create a valid `zones_agg` with compatiable shape zones_arr = np.arange(6, dtype=np.int).reshape(2, 3) zones_agg = xa.DataArray(zones_arr) df = crosstab(zones_agg, values_agg) num_cats = len(values_agg.dims[-1]) # number of columns = number of categories assert len(df.columns) == num_cats # exclude region with 0 zone id zone_idx = set(np.unique(zones_arr)) - {0} num_zones = len(zone_idx) # number of rows = number of zones assert len(df.index) == num_zones num_zeros = (df == 0).sum().sum() # all are 0s assert num_zeros == num_zones * num_cats
def test_crosstab_3d(backend, data_zones, data_values_3d, result_crosstab_3d): layer, zone_ids, expected_result = result_crosstab_3d df_result = crosstab(zones=data_zones, values=data_values_3d, zone_ids=zone_ids, layer=layer) check_results(backend, df_result, expected_result)
def test_crosstab_2d(): values_val = np.asarray([[0, 0, 10, 20], [0, 0, 0, 10], [np.inf, 30, 20, 50], [10, 30, 40, 40], [10, np.nan, 50, 0]]) values_agg = xa.DataArray(values_val, dims=['lat', 'lon']) zones_val = np.asarray([[1, 1, 6, 6], [1, 1, 6, 6], [3, 5, 6, 6], [3, 5, 7, 7], [3, 7, 7, 0]]) zones_agg = xa.DataArray(zones_val, dims=['lat', 'lon']) df = crosstab(zones_agg, values_agg) num_cats = 6 # 0, 10, 20, 30, 40, 50 # number of columns = number of categories assert len(df.columns) == num_cats # exclude region with 0 zone id zone_idx = list(set(np.unique(zones_agg.data)) - {0}) num_zones = len(zone_idx) # number of rows = number of zones assert len(df.index) == num_zones df.loc[:, 'check_sum'] = df.sum(axis=1) # sum of a row is 1.0 assert df['check_sum'][zone_idx[0]] == 1.0
def test_percentage_crosstab_2d(backend, data_zones, data_values_2d, result_percentage_crosstab_2d): nodata_values, zone_ids, cat_ids, expected_result = result_percentage_crosstab_2d df_result = crosstab(zones=data_zones, values=data_values_2d, zone_ids=zone_ids, cat_ids=cat_ids, nodata_values=nodata_values, agg='percentage') check_results(backend, df_result, expected_result)
def test_count_crosstab_2d(backend, data_zones, data_values_2d, result_count_crosstab_2d): zone_ids, cat_ids, expected_result = result_count_crosstab_2d df_result = crosstab( zones=data_zones, values=data_values_2d, zone_ids=zone_ids, cat_ids=cat_ids, ) check_results(backend, df_result, expected_result)
def test_crosstab_3d_agg_method(backend, data_zones, data_values_3d, result_crosstab_3d): layer, zone_ids, expected_result = result_crosstab_3d agg_methods = ['min', 'max', 'mean', 'sum', 'std', 'var', 'count'] for agg in agg_methods: df_result = crosstab(zones=data_zones, values=data_values_3d, zone_ids=zone_ids, layer=layer, agg=agg) check_results(backend, df_result, expected_result[agg])
def test_crosstab_invalid_input(): # invalid zones dims (must be 2d) zones = xa.DataArray(np.array([1, 2, 0])) values = xa.DataArray(np.array([[[1, 2, 0.5]]]), dims=['lat', 'lon', 'race']) values['race'] = ['cat1', 'cat2', 'cat3'] with pytest.raises(Exception) as e_info: crosstab(zones_agg=zones, values_agg=values) # invalid zones dtype (must be int) zones = xa.DataArray(np.array([[1, 2, 0.5]])) with pytest.raises(Exception) as e_info: # noqa crosstab(zones_agg=zones, values_agg=values) # invalid values zones = xa.DataArray(np.array([[1, 2, 0]], dtype=np.int)) # values must be either int or float values = xa.DataArray(np.array([[['apples', 'foobar', 'cowboy']]]), dims=['lat', 'lon', 'race']) values['race'] = ['cat1', 'cat2', 'cat3'] with pytest.raises(Exception) as e_info: # noqa crosstab(zones_agg=zones, values_agg=values) # mismatch shape zones and values zones = xa.DataArray(np.array([[1, 2]])) values = xa.DataArray(np.array([[[1, 2, np.nan]]]), dims=['lat', 'lon', 'race']) values['race'] = ['cat1', 'cat2', 'cat3'] with pytest.raises(Exception) as e_info: # noqa crosstab(zones_agg=zones, values_agg=values) # invalid layer zones = xa.DataArray(np.array([[1, 2]])) values = xa.DataArray(np.array([[[1, 2, np.nan]]]), dims=['lat', 'lon', 'race']) values['race'] = ['cat1', 'cat2', 'cat3'] # this layer does not exist in values agg layer = 'cat' with pytest.raises(Exception) as e_info: # noqa crosstab(zones_agg=zones, values_agg=values, layer=layer)
def test_crosstab_2d_dtypes(): values_val = np.asarray( [[0, 0, 10, 20], [0, 0, 0, 10], [np.inf, 30, 20, 50], [10, 30, 40, 40], [10, np.nan, 50, 0]], dtype=np.float16) values_agg = xa.DataArray(values_val, dims=['lat', 'lon']) zones_val = np.asarray([[1, 1, 6, 6], [1, 1, 6, 6], [3, 5, 6, 6], [3, 5, 7, 7], [3, 7, 7, 0]]) zones_agg = xa.DataArray(zones_val, dims=['lat', 'lon']) df = crosstab(zones_agg, values_agg) assert isinstance(df, pd.DataFrame)
def test_crosstab_no_zones(): # create valid `values_agg` values_agg = xa.DataArray(np.zeros(24).reshape(2, 3, 4), dims=['lat', 'lon', 'race']) values_agg['race'] = ['cat1', 'cat2', 'cat3', 'cat4'] # create a valid `zones_agg` with compatiable shape # no zone zones_arr = np.zeros((2, 3), dtype=np.int) zones_agg = xa.DataArray(zones_arr) num_cats = len(values_agg.dims[-1]) df = crosstab(zones_agg, values_agg) # number of columns = number of categories assert len(df.columns) == num_cats # no row as no zone assert len(df.index) == 0