def apply_hypercube(cube: DataCube, context: dict) -> DataCube: from scipy.signal import savgol_filter array: xarray.DataArray = cube.get_array() filled = array.interpolate_na(dim='t') smoothed_array = savgol_filter(filled.values, 5, 2, axis=0) return DataCube(xarray.DataArray(smoothed_array, dims=array.dims, coords=array.coords))
def test_coordinateOrderChanged(self): inpcube = DataCube(self.inpcube.get_array().transpose()) refcube = DataCube(self.refcube.get_array().transpose()) outcube = udf_savitzkygolaysmooth_phenology.apply_datacube( inpcube, dict(do_smoothing=False, do_phenology=True)) xarray.testing.assert_allclose(outcube.get_array(), refcube.get_array())
def test_missingCoordinates(self): inpcube = DataCube(self.inpcube.get_array()[:, :, 0, :]) refcube = DataCube(self.refcube.get_array()[:, :, 0, :]) outcube = udf_savitzkygolaysmooth_phenology.apply_datacube( inpcube, dict(do_smoothing=False, do_phenology=True)) xarray.testing.assert_allclose(outcube.get_array(), refcube.get_array())
def apply_datacube(cube: DataCube, context: Dict) -> DataCube: """ Applies a rolling window median composite to a timeseries datacube. This UDF preserves dimensionality, and assumes a datacube with a temporal dimension 't' as input. """ array: xarray.DataArray = cube.get_array() import pandas as pd import numpy as np #this method computes dekad's, can be used to resample data to desired frequency time_dimension_index = array.get_index('t') d = time_dimension_index.day - np.clip( (time_dimension_index.day - 1) // 10, 0, 2) * 10 - 1 date = time_dimension_index.values - np.array(d, dtype="timedelta64[D]") #replace each value with 30-day window median #first median rolling window to fill gaps on all dates composited = array.rolling(t=30, min_periods=1, center=True).median().dropna("t") #resample rolling window medians to dekads ten_daily_composite = composited.groupby_bins("t", date).median() return DataCube(ten_daily_composite)
def hyper_min_median_max(udf_data: UdfData): """Compute the min, median and max of the time dimension of a hyper cube Hypercubes with time dimensions are required. The min, median and max reduction of th time axis will be applied to all hypercube dimensions. Args: udf_data (UdfData): The UDF data object that contains raster and vector tiles as well as hypercubes and structured data. Returns: This function will not return anything, the UdfData object "udf_data" must be used to store the resulting data. """ # Iterate over each tile cube_list = [] for cube in udf_data.get_datacube_list(): min = cube.array.min(dim="t") median = cube.array.median(dim="t") max = cube.array.max(dim="t") min.name = cube.id + "_min" median.name = cube.id + "_median" max.name = cube.id + "_max" cube_list.append(DataCube(array=min)) cube_list.append(DataCube(array=median)) cube_list.append(DataCube(array=max)) udf_data.set_datacube_list(cube_list)
def test_xy_nolabels(self): ref = self.buildData() ref = DataCube(ref.get_array().drop('x').drop('y')) for ifmt in formats: fn = os.path.join(self.tmpdir, 'test_xy_nolabels.' + ifmt) print("Testing " + fn) datacube_to_file(ref, fn, fmt=ifmt) res = datacube_from_file(fn, fmt=ifmt) xarray.testing.assert_allclose(res.get_array(), ref.get_array())
def test_hasNoDataTimeSeries(self): inpcube = DataCube(self.inpcube.get_array().where( self.inpcube.get_array().x != 3, numpy.nan, drop=False)) refcube = DataCube(self.refcube.get_array().where( self.refcube.get_array().x != 3, 0., drop=False)) outcube = udf_savitzkygolaysmooth_phenology.apply_datacube( inpcube, dict(do_smoothing=False, do_phenology=True)) xarray.testing.assert_allclose(outcube.get_array(), refcube.get_array())
def test_oldnewPhenologyIsSame(self): optcube = udf_savitzkygolaysmooth_phenology.apply_datacube( DataCube(self.inpcube.get_array().drop(['x', 'y'])), dict(do_smoothing=False, do_phenology=True)) oldcube = udf_phenology_old.apply_datacube( DataCube(self.inpcube.get_array().drop(['x', 'y'])), {}) optarr = optcube.get_array().squeeze('t', drop=True) oldarr = oldcube.get_array().dt.dayofyear.astype(numpy.float64) xarray.testing.assert_allclose(optarr, oldarr)
def test_band_nodim(self): ref = self.buildData() ref = DataCube(ref.get_array()[:, 0].drop('bands')) for ifmt in formats: fn = os.path.join(self.tmpdir, 'test_band_nodim.' + ifmt) print("Testing " + fn) datacube_to_file(ref, fn, fmt=ifmt) res = datacube_from_file(fn, fmt=ifmt) xarray.testing.assert_allclose(res.get_array(), ref.get_array())
def test_typing_float(self): ref = self.buildData() ref = DataCube(ref.get_array().astype(numpy.float64)) for ifmt in formats: fn = os.path.join(self.tmpdir, 'test_typing_float.' + ifmt) print("Testing " + fn) datacube_to_file(ref, fn, fmt=ifmt) res = datacube_from_file(fn, fmt=ifmt) xarray.testing.assert_allclose(res.get_array(), ref.get_array()) self.assertEqual(res.get_array().dtype, ref.get_array().dtype)
def apply_datacube(cube: DataCube, context: Dict) -> DataCube: """ Applies a savitzky-golay smoothing to a timeseries datacube. This UDF preserves dimensionality, and assumes a datacube with a temporal dimension 't' as input. """ array: xarray.DataArray = cube.get_array() filled = array.interpolate_na(dim='t') smoothed_array = savgol_filter(filled.values, 5, 2, axis=0) return DataCube( xarray.DataArray(smoothed_array, dims=array.dims, coords=array.coords))
def test_multiBand(self): inparr1 = self.inpcube.get_array() inparr2 = self.inpcube.get_array().assign_coords(bands=['extraband']) refarr1 = self.refcube.get_array() refarr2 = self.refcube.get_array().assign_coords(bands=['extraband']) inpcube = DataCube(xarray.concat([inparr1, inparr2], dim='bands')) refcube = DataCube(xarray.concat([refarr1, refarr2], dim='bands')) outcube = udf_savitzkygolaysmooth_phenology.apply_datacube( inpcube, dict(do_smoothing=True, do_phenology=False)) xarray.testing.assert_allclose(outcube.get_array(), refcube.get_array())
def apply_datacube(cube: DataCube, context: dict) -> DataCube: """Compute the NDVI based on sentinel2 tiles Tiles with ids "red" and "nir" are required. The NDVI computation will be applied to all time stamped 2D raster tiles that have equal time stamps. """ array: xarray.DataArray = cube.get_array() red = array.sel(bands="TOC-B04_10M") nir = array.sel(bands="TOC-B08_10M") ndvi = (nir - red) / (nir + red) return DataCube(ndvi)
def datacube_from_file(filename, fmt='netcdf') -> 'openeo_udf.api.datacube.DataCube': """ Converts source files of different formats into openeo_udf.api.datacube.DataCube in memory :param filename: the file on disk :param fmt: format to load from :return: openeo_udf.api.datacube.DataCube """ from openeo_udf.api.datacube import DataCube if fmt.lower() == 'netcdf': return DataCube(_load_DataArray_from_NetCDF(filename)) if fmt.lower() == 'json': return DataCube(_load_DataArray_from_JSON(filename))
def apply_datacube(cube: DataCube, context: Dict) -> DataCube: # access the underlying xarray inarr=cube.get_array() # ndvi B4=inarr.loc[:,'TOC-B04_10M'] B8=inarr.loc[:,'TOC-B08_10M'] ndvi=(B8-B4)/(B8+B4) # extend bands dim ndvi=ndvi.expand_dims(dim='bands', axis=-3).assign_coords(bands=['ndvi']) # wrap back to datacube and return return DataCube(ndvi)
def test_hypercube_api(self): """Test the hypercube mean reduction""" dcm = create_data_collection_model_example() dc = DataCube.from_data_collection(data_collection=dcm) print(dc[0].get_array()) print(dc[1].get_array()) dc1: DataCube = dc[0] dc2: DataCube = dc[1] self.assertEqual(dc1.id, dcm.variables_collections[0].variables[0].name) self.assertEqual(dc2.id, dcm.variables_collections[0].variables[1].name) a1: xarray.DataArray = dc1.get_array() a1 = numpy.asarray(a1).reshape([27]) v1 = dcm.variables_collections[0].variables[0].values v1 = numpy.asarray(v1) self.assertTrue(a1.all() == v1.all()) a2: xarray.DataArray = dc2.get_array() a2 = numpy.asarray(a2).reshape([27]) v2 = dcm.variables_collections[0].variables[1].values v2 = numpy.asarray(v2) self.assertTrue(a2.all() == v2.all())
def apply_timeseries_generic(udf_data: UdfData, callback: Callable = apply_timeseries): """ Implements the UDF contract by calling a user provided time series transformation function (apply_timeseries). Multiple bands are currently handled separately, another approach could provide a dataframe with a timeseries for each band. :param udf_data: :return: """ # The list of tiles that were created tile_results = [] # Iterate over each cube for cube in udf_data.get_datacube_list(): array3d = [] #use rollaxis to make the time dimension the last one for time_x_slice in numpy.rollaxis(cube.array.values, 1): time_x_result = [] for time_slice in time_x_slice: series = pandas.Series(time_slice) transformed_series = callback(series,udf_data.user_context) time_x_result.append(transformed_series) array3d.append(time_x_result) # We need to create a new 3D array with the correct shape for the computed aggregate result_tile = numpy.rollaxis(numpy.asarray(array3d),1) assert result_tile.shape == cube.array.shape # Create the new raster collection cube rct = DataCube(xarray.DataArray(result_tile)) tile_results.append(rct) # Insert the new tiles as list of raster collection tiles in the input object. The new tiles will # replace the original input tiles. udf_data.set_datacube_list(tile_results) return udf_data
def hyper_pytorch_ml(udf_data: UdfData): """Apply a pre-trained pytorch machine learn model on a hypercube The model must be a pytorch model that has expects the input data in the constructor The prediction method must accept a torch.autograd.Variable as input. Args: udf_data (UdfData): The UDF data object that hypercubes and vector tiles Returns: This function will not return anything, the UdfData object "udf_data" must be used to store the resulting data. """ cube = udf_data.get_datacube_list()[0] # This is the input data of the model. input = torch.autograd.Variable(torch.Tensor(cube.array.values)) # Get the first model mlm = udf_data.get_ml_model_list()[0] m = mlm.get_model() # Predict the data pred = m(input) result = xarray.DataArray(data=pred.detach().numpy(), dims=cube.array.dims, coords=cube.array.coords, name=cube.id + "_pytorch") # Create the new raster collection tile result_cube = DataCube(array=result) # Insert the new hypercube in the input object. udf_data.set_datacube_list([result_cube])
def reduceXY(xskip, yskip, datacube): dataarray = datacube.get_array() dataarray = dataarray.loc[{ 'x': dataarray.x[::xskip], 'y': dataarray.y[::yskip] }] return DataCube(dataarray)
def hyper_ndvi(udf_data: UdfData): """Compute the NDVI based on RED and NIR hypercubes Hypercubes with ids "red" and "nir" are required. The NDVI computation will be applied to all hypercube dimensions. Args: udf_data (UdfData): The UDF data object that contains raster and vector tiles as well as hypercubes and structured data. Returns: This function will not return anything, the UdfData object "udf_data" must be used to store the resulting data. """ red = None nir = None # Iterate over each tile for cube in udf_data.get_datacube_list(): if "red" in cube.id.lower(): red = cube if "nir" in cube.id.lower(): nir = cube if red is None: raise Exception("Red hypercube is missing in input") if nir is None: raise Exception("Nir hypercube is missing in input") ndvi = (nir.array - red.array) / (nir.array + red.array) ndvi.name = "NDVI" hc = DataCube(array=ndvi) udf_data.set_datacube_list([hc, ])
def apply_datacube(udf_cube: DataCube, context: dict) -> DataCube: """ Apply the BFASTmonitor method to detect a break at the end of time-series of the datacube. This UDF reduce the time dimension of the input datacube. :param udf_cube: the openEO virtual DataCube object :return DataCube(breaks_xr): """ from datetime import datetime # convert the openEO datacube into the xarray DataArray structure my_xarray: xr.DataArray = udf_cube.get_array() #select single band, removes band dimension my_xarray = my_xarray.sel(bands='VV') # start_hist = datetime(2017, 5, 1) start_monitor = datetime(2019, 1, 1) end_monitor = datetime(2019, 12, 29) # get the dates from the data cube: dates = [ pd.Timestamp(date).to_pydatetime() for date in my_xarray.coords['t'].values ] # pre-processing - crop the input data cube according to the history and monitor periods: data, dates = crop_data_dates(my_xarray.values, dates, start_hist, end_monitor) # !!! Note !!! that data has the shape 91, and not 92 for our dataset. The reason is the definition in # the bfast utils.py script where the start_hist is set < than dates, and not <= than dates. # ------------------------------------- # specify the BFASTmonitor parameters: model = BFASTMonitor(start_monitor, freq=31, k=3, verbose=1, hfrac=0.25, trend=True, level=0.05, backend='python') # run the monitoring: # model.fit(data, dates, nan_value=udf_data.nodatavals[0]) model.fit(data, dates) # get the detected breaks as an xarray Data Array: breaks_xr = xr.DataArray( model.breaks, coords=[my_xarray.coords['x'].values, my_xarray.coords['y'].values], dims=['x', 'y']) # return the breaks as openEO DataCube: return DataCube(breaks_xr)
def test_run_local_udf_frommemory(self): from openeo_udf.api.datacube import DataCube dc=self.buildData() r=rest_DataCube.execute_local_udf(udfcode, dc) result=r.get_datacube_list()[0].get_array() exec(udfcode) ref=locals()["apply_datacube"](DataCube(dc.get_array().astype(numpy.float64).drop(labels='x').drop(labels='y')), {}).get_array() xarray.testing.assert_allclose(result,ref)
def test_generate_merged_output(self): merge = load_DataCube('tests/merged_cube.json').get_array() hasPV = merge[:, 3].dropna('t', how='all').t merge = merge.loc[{'t': hasPV.values}][19:21] merge = (merge * 100.).astype(numpy.int64).astype(numpy.float64) / 100. merge = merge.where(merge > -1.e10).where(merge < 1.e10) save_DataCube('tests/test01_merged.json', DataCube(merge)) plot_xarray_dataarray(merge)
def rct_sklearn_ml(udf_data: UdfData): """Apply a pre-trained sklearn machine learn model on RED and NIR tiles The model must be a sklearn model that has a prediction method: m.predict(X) The prediction method must accept a pandas.DataFrame as input. Tiles with ids "red" and "nir" are required. The machine learn model will be applied to all spatio-temporal pixel of the two input raster collections. Args: udf_data (UdfData): The UDF data object that contains raster and vector tiles Returns: This function will not return anything, the UdfData object "udf_data" must be used to store the resulting data. """ red = None nir = None # Iterate over each cube for cube in udf_data.get_datacube_list(): if "red" in cube.id.lower(): red = cube if "nir" in cube.id.lower(): nir = cube if red is None: raise Exception("Red data cube is missing in input") if nir is None: raise Exception("Nir data cube is missing in input") # We need to reshape the data for prediction into one dimensional arrays three_dim_shape = red.array.shape one_dim_shape = numpy.prod(three_dim_shape) red_reshape = red.array.values.reshape((one_dim_shape)) nir_reshape = nir.array.values.reshape((one_dim_shape)) # This is the input data of the model. It must be trained with a DataFrame using the same names. X = pandas.DataFrame() X["red"] = red_reshape X["nir"] = nir_reshape # Get the first model mlm = udf_data.get_ml_model_list()[0] m = mlm.get_model() # Predict the data pred = m.predict(X) # Reshape the one dimensional predicted values to three dimensions based on the input shape pred_reshape = pred.reshape(three_dim_shape) result = xarray.DataArray(data=pred_reshape, dims=red.array.dims, coords=red.array.coords, name=red.id + "_pytorch") # Create the new raster collection cube h = DataCube(array=result) # Insert the new hypercubes in the input object. The new tiles will # replace the original input tiles. udf_data.set_datacube_list([h, ])
def apply_hypercube(cube: DataCube, context: dict) -> DataCube: """Reduce the time dimension for each tile and compute min, mean, max and sum for each pixel over time. Each raster tile in the udf data object will be reduced by time. Minimum, maximum, mean and sum are computed for each pixel over time. Args: udf_data (UdfData): The UDF data object that contains raster and vector tiles Returns: This function will not return anything, the UdfData object "udf_data" must be used to store the resulting data. """ # The list of tiles that were created array: xarray.DataArray = cube.get_array() result = xarray.concat( [array.min(dim='t'), array.max(dim='t'), array.sum(dim='t'), array.mean(dim='t')], dim='bands' ) return DataCube(result)
def test_run_local_udf_fromfile(self): from openeo_udf.api.datacube import DataCube with TemporaryDirectory() as td: dc=self.buildData() tmpfile=os.path.join(td,'test_data') dc.to_file(tmpfile) r=rest_DataCube.execute_local_udf(udfcode, tmpfile) result=r.get_datacube_list()[0].get_array() exec(udfcode) ref=locals()["apply_datacube"](DataCube(dc.get_array().astype(numpy.float64).drop(labels='x').drop(labels='y')), {}).get_array() xarray.testing.assert_allclose(result,ref)
def apply_datacube(cube: DataCube, context) -> DataCube: import xarray import numpy as np # Get the x array containing the time series array: xarray.DataArray = cube.get_array() min = 0.85 max = 1.15 step = 0.1 mean = array.median(skipna=True) bins = np.arange(min, max + step, step) * mean.values.tolist() bins = np.concatenate([[0], bins, [255]]) buckets = np.digitize(array.values, bins=bins).astype(float) return DataCube( xarray.DataArray(buckets, coords={ 't': array.t.values, 'bands': array.bands.values, 'y': array.y.values, 'x': array.x.values, }, dims=['t', 'bands', 'y', 'x']))
def hyper_map_fabs(udf_data: UdfData): """Compute the absolute values of each hyper cube in the provided data Args: udf_data (UdfData): The UDF data object that contains raster and vector tiles as well as hypercubes and structured data. Returns: This function will not return anything, the UdfData object "udf_data" must be used to store the resulting data. """ # Iterate over each tile cube_list = [] for cube in udf_data.get_datacube_list(): result = numpy.fabs(cube.array) result.name = cube.id + "_fabs" cube_list.append(DataCube(array=result)) udf_data.set_datacube_list(cube_list)
def test_gan(self): # load and inverse scale according to UDF arr=self.build_array() arr.loc[{'bands':'ndvi'}]=250.*(arr.loc[{'bands':'ndvi'}]+0.08) arr.loc[{'bands':'VH'}] =10.**(arr.loc[{'bands':'VH'}]/10.) arr.loc[{'bands':'VV'}] =10.**(arr.loc[{'bands':'VV'}]/10.) # Create a simple model that averages over time and then over bands inS1=tf.keras.Input(shape=[19,128,128,2]) avS1=tf.keras.layers.Lambda(lambda x: tf.reduce_mean(x, axis=1, keepdims=False))(inS1) inS2=tf.keras.Input(shape=[19,128,128,1]) avS2=tf.keras.layers.Lambda(lambda x: tf.reduce_mean(x, axis=1, keepdims=False))(inS2) inPV=tf.keras.Input(shape=[19,128,128,1]) avPV=tf.keras.layers.Lambda(lambda x: tf.reduce_mean(x, axis=1, keepdims=False))(inPV) ct=tf.keras.layers.Concatenate(axis=3)([avS1,avS2,avPV]) av=tf.keras.layers.Lambda(lambda x: tf.reduce_mean(x, axis=3, keepdims=True))(ct) model=tf.keras.Model([inS1,inS2,inPV],av) model.save('/tmp/test_gan_model.h5') # run gan result=apply_datacube(DataCube(arr), dict( prediction_model='/tmp/test_gan_model.h5', gan_window_half='9D', gan_steps='2D', gan_samples=19, acquisition_steps='10D', scaler='passthrough' )) # compute check: with this setup # 6 front NaNs # 5 data of 2020-06-29 # 3 data of 2020-07-01 # 5 trailing Nans # NaNs get filled with zeros check=arr.dropna('t') check=5./19.*check[0]+3./19.*check[1] check=check.mean('bands').expand_dims({'bands':['predictions']}) check=check.expand_dims({'t':[numpy.datetime64('2020-07-04')]}) xarray.testing.assert_allclose(check.astype(numpy.float32), result.get_array())
def buildData(self): a = numpy.zeros((3, 2, 5, 6), numpy.int32) for t in range(a.shape[0]): for b in range(a.shape[1]): for x in range(a.shape[2]): for y in range(a.shape[3]): a[t, b, x, y] = t * 1000 + b * 100 + x * 10 + y return DataCube( xarray.DataArray(a, dims=['t', 'bands', 'x', 'y'], coords={ 't': [ numpy.datetime64('2020-08-01'), numpy.datetime64('2020-08-11'), numpy.datetime64('2020-08-21') ], 'bands': ['bandzero', 'bandone'], 'x': [10., 11., 12., 13., 14.], 'y': [20., 21., 22., 23., 24., 25.] }))