def test_structured_data_list(): sd1 = StructuredData([1, 2, 3, 5, 8]) sd2 = StructuredData({"a": [3, 5], "b": "red"}) udf_data = UdfData(structured_data_list=[sd1, sd2]) assert udf_data.to_dict() == { "datacubes": None, "feature_collection_list": None, "structured_data_list": [{ "data": [1, 2, 3, 5, 8], "description": "list", "type": "list" }, { "data": { "a": [3, 5], "b": "red" }, "description": "dict", "type": "dict" }], "proj": None, "user_context": {} } assert repr(udf_data) \ == '<UdfData datacube_list:None feature_collection_list:None structured_data_list:[<StructuredData with list>, <StructuredData with dict>]>'
def test_datacube_list(): xa = xarray.DataArray(numpy.zeros((2, 3)), coords={ "x": [1, 2], "y": [3, 4, 5] }, dims=("x", "y"), name="testdata") cube = XarrayDataCube(xa) udf_data = UdfData(datacube_list=[cube], user_context={"kernel": 3}) assert udf_data.to_dict() == { "datacubes": [{ "id": "testdata", "data": [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]], "dimensions": [{ "name": "x", "coordinates": [1, 2] }, { "name": "y", "coordinates": [3, 4, 5] }], }], "feature_collection_list": None, "structured_data_list": None, "proj": None, "user_context": { "kernel": 3 } } assert repr(udf_data) \ == '<UdfData datacube_list:[<XarrayDataCube shape:(2, 3)>] feature_collection_list:None structured_data_list:None>'
def hyper_min_median_max(udf_data: UdfData): """Compute the min, median and max of the time dimension of a hyper cube Hypercubes with time dimensions are required. The min, median and max reduction of th time axis will be applied to all hypercube dimensions. Args: udf_data (UdfData): The UDF data object that contains raster and vector tiles as well as hypercubes and structured data. Returns: This function will not return anything, the UdfData object "udf_data" must be used to store the resulting data. """ # Iterate over each tile cube_list = [] for cube in udf_data.get_datacube_list(): min = cube.array.min(dim="t") median = cube.array.median(dim="t") max = cube.array.max(dim="t") min.name = cube.id + "_min" median.name = cube.id + "_median" max.name = cube.id + "_max" cube_list.append(XarrayDataCube(array=min)) cube_list.append(XarrayDataCube(array=median)) cube_list.append(XarrayDataCube(array=max)) udf_data.set_datacube_list(cube_list)
def rct_stats(udf_data: UdfData): """Compute univariate statistics for each hypercube Args: udf_data (UdfData): The UDF data object that contains raster and vector tiles Returns: This function will not return anything, the UdfData object "udf_data" must be used to store the resulting data. """ # The dictionary that stores the statistical data stats = {} # Iterate over each raster collection cube and compute statistical values for cube in udf_data.get_datacube_list(): # make sure to cast the values to floats, otherwise they are not serializable stats[cube.id] = dict(sum=float(cube.array.sum()), mean=float(cube.array.mean()), min=float(cube.array.min()), max=float(cube.array.max())) # Create the structured data object sd = StructuredData( data=stats, type="dict", description= "Statistical data sum, min, max and mean for each raster collection cube as dict", ) # Remove all collections and set the StructuredData list udf_data.set_datacube_list(None) udf_data.set_structured_data_list([ sd, ])
def test_feature_collection_list(): data = GeoDataFrame({ "a": [1, 4], "b": [2, 16] }, geometry=[Point(1, 2), Point(3, 5)]) fc = FeatureCollection(id="test", data=data) udf_data = UdfData(feature_collection_list=[fc]) assert udf_data.to_dict() == { 'datacubes': None, 'feature_collection_list': [{ 'data': { 'type': 'FeatureCollection', 'features': [{ 'id': '0', 'type': 'Feature', 'geometry': { 'coordinates': (1.0, 2.0), 'type': 'Point' }, 'properties': { 'a': 1, 'b': 2 }, 'bbox': (1.0, 2.0, 1.0, 2.0), }, { 'id': '1', 'type': 'Feature', 'geometry': { 'coordinates': (3.0, 5.0), 'type': 'Point' }, 'properties': { 'a': 4, 'b': 16 }, 'bbox': (3.0, 5.0, 3.0, 5.0), }], 'bbox': (1.0, 2.0, 3.0, 5.0), }, 'id': 'test' }], 'structured_data_list': None, 'proj': None, 'user_context': {} } assert repr(udf_data) \ == '<UdfData datacube_list:None feature_collection_list:[<FeatureCollection with GeoDataFrame>] structured_data_list:None>'
def test_run_udf_code_reduce_time_mean(): udf_code = _get_udf_code("reduce_time_mean.py") a = _build_txy_data(ts=[2018, 2019, 2020, 2021], xs=[2, 3], ys=[10, 20, 30], name="temp", offset=2) b = _build_txy_data(ts=[2018, 2019, 2020, 2021], xs=[2, 3], ys=[10, 20, 30], name="prec", offset=4) udf_data = UdfData(datacube_list=[a, b]) result = run_udf_code(code=udf_code, data=udf_data) aa, bb = result.get_datacube_list() assert aa.id == "temp_mean" assert aa.array.name == "temp_mean" assert aa.array.shape == (2, 3) assert list( aa.array.values.ravel()) == [152.0, 153.0, 154.0, 162.0, 163.0, 164.0] assert bb.id == "prec_mean" assert bb.array.name == "prec_mean" assert bb.array.shape == (2, 3) assert list( bb.array.values.ravel()) == [154.0, 155.0, 156.0, 164.0, 165.0, 166.0]
def test_run_udf_code_apply_timeseries_tb(): udf_code = textwrap.dedent(""" import pandas as pd def apply_timeseries(series: pd.Series, context: dict) -> pd.Series: return series - series.mean() """) xdc = _build_xdc(ts=[2018, 2019, 2020, 2021], bands=["red", "green", "blue"]) udf_data = UdfData(datacube_list=[xdc]) result = run_udf_code(code=udf_code, data=udf_data) aa, = result.get_datacube_list() assert isinstance(aa, XarrayDataCube) expected_dims = [ { 'name': 't', 'coordinates': [2018, 2019, 2020, 2021] }, { 'name': 'bands', 'coordinates': ["red", "green", "blue"] }, ] assert aa.to_dict() == { 'data': [[-15, -15, -15], [-5, -5, -5], [5, 5, 5], [15, 15, 15]], 'dimensions': expected_dims, }
def test_run_udf_code_statistics(): udf_code = _get_udf_code("statistics.py") xdc = _build_txy_data(ts=[2018, 2019], xs=[0, 1], ys=[0, 1, 2], name="temp", offset=2) udf_data = UdfData(datacube_list=[xdc]) result = run_udf_code(code=udf_code, data=udf_data) structured, = result.structured_data_list assert structured.to_dict() == { "data": { "temp": { "min": 2, "mean": 58, "max": 114, "sum": 696 } }, "type": "dict", "description": "Statistical data sum, min, max and mean for each raster collection cube as dict" }
def test_run_udf_code_ndvi(): udf_code = _get_udf_code("ndvi02.py") red = _build_txy_data(ts=[2018], xs=[0, 1], ys=[0, 1, 2], name="red", offset=2) nir = _build_txy_data(ts=[2018], xs=[0, 1], ys=[0, 1, 2], name="nir", offset=4) udf_data = UdfData(datacube_list=[red, nir]) result = run_udf_code(code=udf_code, data=udf_data) print(nir.array) print(red.array) n, = result.get_datacube_list() assert n.id == "NDVI" assert n.array.shape == (1, 2, 3) def ndvi(red, nir): return (nir - red) / (nir + red) assert list(n.array.values.ravel()) == [ ndvi(2, 4), ndvi(3, 5), ndvi(4, 6), ndvi(12, 14), ndvi(13, 15), ndvi(14, 16) ]
def test_udf_data_from_dict_empty(): udf_data = UdfData.from_dict({}) assert udf_data.to_dict() == { 'datacubes': None, "feature_collection_list": None, 'structured_data_list': None, 'proj': None, 'user_context': {}, }
def hyper_map_fabs(udf_data: UdfData): """Compute the absolute values of each hyper cube in the provided data Args: udf_data (UdfData): The UDF data object that contains raster and vector tiles as well as hypercubes and structured data. Returns: This function will not return anything, the UdfData object "udf_data" must be used to store the resulting data. """ # Iterate over each tile cube_list = [] for cube in udf_data.get_datacube_list(): result = numpy.fabs(cube.array) result.name = cube.id + "_fabs" cube_list.append(XarrayDataCube(array=result)) udf_data.set_datacube_list(cube_list)
def test_run_udf_code_apply_timeseries_txy(): udf_code = textwrap.dedent(""" import pandas as pd def apply_timeseries(series: pd.Series, context: dict) -> pd.Series: return series - series.mean() """) a = _build_txy_data(ts=[2018, 2019, 2020, 2021], xs=[2, 3], ys=[10, 20, 30], name="temp", t_factor=2) b = _build_txy_data(ts=[2018, 2019, 2020, 2021], xs=[2, 3], ys=[10, 20, 30], name="prec", t_factor=10) udf_data = UdfData(datacube_list=[a, b]) result = run_udf_code(code=udf_code, data=udf_data) aa, bb = result.get_datacube_list() assert isinstance(aa, XarrayDataCube) expected_dims = [{ 'name': 't', 'coordinates': [2018, 2019, 2020, 2021] }, { 'name': 'x', 'coordinates': [2, 3] }, { 'name': 'y', 'coordinates': [10, 20, 30] }] assert aa.to_dict() == { 'id': 'temp', 'data': [ [[-3, -3, -3], [-3, -3, -3]], [[-1, -1, -1], [-1, -1, -1]], [[1, 1, 1], [1, 1, 1]], [[3, 3, 3], [3, 3, 3]], ], 'dimensions': expected_dims, } assert isinstance(bb, XarrayDataCube) assert bb.to_dict() == { 'id': 'prec', 'data': [[[-15, -15, -15], [-15, -15, -15]], [[-5, -5, -5], [-5, -5, -5]], [[5, 5, 5], [5, 5, 5]], [[15, 15, 15], [15, 15, 15]]], 'dimensions': expected_dims, }
def hyper_ndvi(udf_data: UdfData): """Compute the NDVI based on RED and NIR hypercubes Hypercubes with ids "red" and "nir" are required. The NDVI computation will be applied to all hypercube dimensions. Args: udf_data (UdfData): The UDF data object that contains raster and vector tiles as well as hypercubes and structured data. Returns: This function will not return anything, the UdfData object "udf_data" must be used to store the resulting data. """ red = None nir = None # Iterate over each tile for cube in udf_data.get_datacube_list(): if "red" in cube.id.lower(): red = cube if "nir" in cube.id.lower(): nir = cube if red is None: raise Exception("Red hypercube is missing in input") if nir is None: raise Exception("Nir hypercube is missing in input") ndvi = (nir.array - red.array) / (nir.array + red.array) ndvi.name = "NDVI" hc = XarrayDataCube(array=ndvi) udf_data.set_datacube_list([ hc, ])
def test_run_udf_code_map_fabs(): udf_code = _get_udf_code("map_fabs.py") xdc = _build_txy_data(ts=[2019, 2020, 2021], xs=[2, 3, 4], ys=[10, 20, 30], name="temp", x_factor=-10) udf_data = UdfData(datacube_list=[xdc]) result = run_udf_code(code=udf_code, data=udf_data) assert list(xdc.array[0, :2, :].values.ravel()) == [0, 1, 2, -10, -9, -8] output, = result.get_datacube_list() assert output.id == "temp_fabs" assert output.array.name == "temp_fabs" assert output.array.shape == (3, 3, 3) assert list(output.array[0, :2, :].values.ravel()) == [0, 1, 2, 10, 9, 8]
def test_udf_data_from_dict_structured_data(): udf_data = UdfData.from_dict( {"structured_data_list": [{ "data": [1, 2, 3] }]}) assert udf_data.to_dict() == { 'datacubes': None, "feature_collection_list": None, 'structured_data_list': [{ "data": [1, 2, 3], "type": "list", "description": "list" }], 'proj': None, 'user_context': {}, }
def test_udf_data_from_dict_datacube(): udf_data = UdfData.from_dict( {"datacubes": [{ "data": [1, 2, 3], "dimensions": [{ "name": "x" }] }]}) assert udf_data.to_dict() == { 'datacubes': [{ "data": [1, 2, 3], "dimensions": [{ "name": "x" }] }], "feature_collection_list": None, 'structured_data_list': None, 'proj': None, 'user_context': {}, }
def test_run_udf_code_reduce_time_min_median_max(): udf_code = _get_udf_code("reduce_time_min_median_max.py") a = _build_txy_data(ts=[2018, 2019, 2020], xs=[2, 3], ys=[10, 20, 30], name="temp", offset=2) udf_data = UdfData(datacube_list=[a]) result = run_udf_code(code=udf_code, data=udf_data) x, y, z = result.get_datacube_list() assert x.id == "temp_min" assert x.array.shape == (2, 3) assert list(x.array.values.ravel()) == [2, 3, 4, 12, 13, 14] assert y.id == "temp_median" assert y.array.shape == (2, 3) assert list(y.array.values.ravel()) == [102., 103., 104., 112., 113., 114.] assert z.id == "temp_max" assert z.array.shape == (2, 3) assert list(z.array.values.ravel()) == [202., 203., 204., 212., 213., 214.]
def test_udf_data_from_dict_datacube(): udf_data = UdfData.from_dict( {"datacubes": [{ "data": [1, 2, 3], "dimensions": [{ "name": "x" }] }]}) assert udf_data.to_dict() == { 'datacubes': [{ "data": [1, 2, 3], "dimensions": [{ "name": "x" }] }], "feature_collection_list": None, 'structured_data_list': None, 'proj': None, 'user_context': {}, } assert repr(udf_data) \ == "<UdfData datacube_list:[<XarrayDataCube shape:(3,)>] feature_collection_list:[] structured_data_list:[]>"