Beispiel #1
0
def apply_hypercube(cube: DataCube, context: dict) -> DataCube:
    from scipy.signal import savgol_filter

    array: xarray.DataArray = cube.get_array()
    filled = array.interpolate_na(dim='t')
    smoothed_array = savgol_filter(filled.values, 5, 2, axis=0)
    return DataCube(xarray.DataArray(smoothed_array, dims=array.dims, coords=array.coords))
 def test_coordinateOrderChanged(self):
     inpcube = DataCube(self.inpcube.get_array().transpose())
     refcube = DataCube(self.refcube.get_array().transpose())
     outcube = udf_savitzkygolaysmooth_phenology.apply_datacube(
         inpcube, dict(do_smoothing=False, do_phenology=True))
     xarray.testing.assert_allclose(outcube.get_array(),
                                    refcube.get_array())
 def test_missingCoordinates(self):
     inpcube = DataCube(self.inpcube.get_array()[:, :, 0, :])
     refcube = DataCube(self.refcube.get_array()[:, :, 0, :])
     outcube = udf_savitzkygolaysmooth_phenology.apply_datacube(
         inpcube, dict(do_smoothing=False, do_phenology=True))
     xarray.testing.assert_allclose(outcube.get_array(),
                                    refcube.get_array())
Beispiel #4
0
def apply_datacube(cube: DataCube, context: Dict) -> DataCube:
    """
    Applies a rolling window median composite to a timeseries datacube.
    This UDF preserves dimensionality, and assumes a datacube with a temporal dimension 't' as input.
    """

    array: xarray.DataArray = cube.get_array()

    import pandas as pd
    import numpy as np

    #this method computes dekad's, can be used to resample data to desired frequency

    time_dimension_index = array.get_index('t')

    d = time_dimension_index.day - np.clip(
        (time_dimension_index.day - 1) // 10, 0, 2) * 10 - 1
    date = time_dimension_index.values - np.array(d, dtype="timedelta64[D]")

    #replace each value with 30-day window median
    #first median rolling window to fill gaps on all dates
    composited = array.rolling(t=30, min_periods=1,
                               center=True).median().dropna("t")
    #resample rolling window medians to dekads
    ten_daily_composite = composited.groupby_bins("t", date).median()
    return DataCube(ten_daily_composite)
Beispiel #5
0
def hyper_min_median_max(udf_data: UdfData):
    """Compute the min, median and max of the time dimension of a hyper cube

    Hypercubes with time dimensions are required. The min, median and max reduction of th time axis will be applied
    to all hypercube dimensions.

    Args:
        udf_data (UdfData): The UDF data object that contains raster and vector tiles as well as hypercubes
        and structured data.

    Returns:
        This function will not return anything, the UdfData object "udf_data" must be used to store the resulting
        data.

    """
    # Iterate over each tile
    cube_list = []
    for cube in udf_data.get_datacube_list():
        min = cube.array.min(dim="t")
        median = cube.array.median(dim="t")
        max = cube.array.max(dim="t")

        min.name = cube.id + "_min"
        median.name = cube.id + "_median"
        max.name = cube.id + "_max"

        cube_list.append(DataCube(array=min))
        cube_list.append(DataCube(array=median))
        cube_list.append(DataCube(array=max))

    udf_data.set_datacube_list(cube_list)
Beispiel #6
0
 def test_xy_nolabels(self):
     ref = self.buildData()
     ref = DataCube(ref.get_array().drop('x').drop('y'))
     for ifmt in formats:
         fn = os.path.join(self.tmpdir, 'test_xy_nolabels.' + ifmt)
         print("Testing " + fn)
         datacube_to_file(ref, fn, fmt=ifmt)
         res = datacube_from_file(fn, fmt=ifmt)
     xarray.testing.assert_allclose(res.get_array(), ref.get_array())
 def test_hasNoDataTimeSeries(self):
     inpcube = DataCube(self.inpcube.get_array().where(
         self.inpcube.get_array().x != 3, numpy.nan, drop=False))
     refcube = DataCube(self.refcube.get_array().where(
         self.refcube.get_array().x != 3, 0., drop=False))
     outcube = udf_savitzkygolaysmooth_phenology.apply_datacube(
         inpcube, dict(do_smoothing=False, do_phenology=True))
     xarray.testing.assert_allclose(outcube.get_array(),
                                    refcube.get_array())
 def test_oldnewPhenologyIsSame(self):
     optcube = udf_savitzkygolaysmooth_phenology.apply_datacube(
         DataCube(self.inpcube.get_array().drop(['x', 'y'])),
         dict(do_smoothing=False, do_phenology=True))
     oldcube = udf_phenology_old.apply_datacube(
         DataCube(self.inpcube.get_array().drop(['x', 'y'])), {})
     optarr = optcube.get_array().squeeze('t', drop=True)
     oldarr = oldcube.get_array().dt.dayofyear.astype(numpy.float64)
     xarray.testing.assert_allclose(optarr, oldarr)
Beispiel #9
0
 def test_band_nodim(self):
     ref = self.buildData()
     ref = DataCube(ref.get_array()[:, 0].drop('bands'))
     for ifmt in formats:
         fn = os.path.join(self.tmpdir, 'test_band_nodim.' + ifmt)
         print("Testing " + fn)
         datacube_to_file(ref, fn, fmt=ifmt)
         res = datacube_from_file(fn, fmt=ifmt)
     xarray.testing.assert_allclose(res.get_array(), ref.get_array())
Beispiel #10
0
 def test_typing_float(self):
     ref = self.buildData()
     ref = DataCube(ref.get_array().astype(numpy.float64))
     for ifmt in formats:
         fn = os.path.join(self.tmpdir, 'test_typing_float.' + ifmt)
         print("Testing " + fn)
         datacube_to_file(ref, fn, fmt=ifmt)
         res = datacube_from_file(fn, fmt=ifmt)
     xarray.testing.assert_allclose(res.get_array(), ref.get_array())
     self.assertEqual(res.get_array().dtype, ref.get_array().dtype)
Beispiel #11
0
def apply_datacube(cube: DataCube, context: Dict) -> DataCube:
    """
    Applies a savitzky-golay smoothing to a timeseries datacube.
    This UDF preserves dimensionality, and assumes a datacube with a temporal dimension 't' as input.
    """
    array: xarray.DataArray = cube.get_array()
    filled = array.interpolate_na(dim='t')
    smoothed_array = savgol_filter(filled.values, 5, 2, axis=0)
    return DataCube(
        xarray.DataArray(smoothed_array, dims=array.dims, coords=array.coords))
Beispiel #12
0
 def test_multiBand(self):
     inparr1 = self.inpcube.get_array()
     inparr2 = self.inpcube.get_array().assign_coords(bands=['extraband'])
     refarr1 = self.refcube.get_array()
     refarr2 = self.refcube.get_array().assign_coords(bands=['extraband'])
     inpcube = DataCube(xarray.concat([inparr1, inparr2], dim='bands'))
     refcube = DataCube(xarray.concat([refarr1, refarr2], dim='bands'))
     outcube = udf_savitzkygolaysmooth_phenology.apply_datacube(
         inpcube, dict(do_smoothing=True, do_phenology=False))
     xarray.testing.assert_allclose(outcube.get_array(),
                                    refcube.get_array())
Beispiel #13
0
def apply_datacube(cube: DataCube, context: dict) -> DataCube:
    """Compute the NDVI based on sentinel2 tiles

    Tiles with ids "red" and "nir" are required. The NDVI computation will be applied
    to all time stamped 2D raster tiles that have equal time stamps.

    """
    array: xarray.DataArray = cube.get_array()
    red = array.sel(bands="TOC-B04_10M")
    nir = array.sel(bands="TOC-B08_10M")
    ndvi = (nir - red) / (nir + red)
    return DataCube(ndvi)
Beispiel #14
0
def datacube_from_file(filename,
                       fmt='netcdf') -> 'openeo_udf.api.datacube.DataCube':
    """
    Converts source files of different formats into openeo_udf.api.datacube.DataCube in memory
    :param filename: the file on disk
    :param fmt: format to load from
    
    :return: openeo_udf.api.datacube.DataCube
    """
    from openeo_udf.api.datacube import DataCube
    if fmt.lower() == 'netcdf':
        return DataCube(_load_DataArray_from_NetCDF(filename))
    if fmt.lower() == 'json':
        return DataCube(_load_DataArray_from_JSON(filename))
Beispiel #15
0
def apply_datacube(cube: DataCube, context: Dict) -> DataCube:

    # access the underlying xarray
    inarr=cube.get_array()

    # ndvi
    B4=inarr.loc[:,'TOC-B04_10M']
    B8=inarr.loc[:,'TOC-B08_10M']
    ndvi=(B8-B4)/(B8+B4)
    
    # extend bands dim
    ndvi=ndvi.expand_dims(dim='bands', axis=-3).assign_coords(bands=['ndvi'])
    
    # wrap back to datacube and return
    return DataCube(ndvi)
Beispiel #16
0
    def test_hypercube_api(self):
        """Test the hypercube mean reduction"""

        dcm = create_data_collection_model_example()
        dc = DataCube.from_data_collection(data_collection=dcm)
        print(dc[0].get_array())
        print(dc[1].get_array())

        dc1: DataCube = dc[0]
        dc2: DataCube = dc[1]

        self.assertEqual(dc1.id,
                         dcm.variables_collections[0].variables[0].name)
        self.assertEqual(dc2.id,
                         dcm.variables_collections[0].variables[1].name)

        a1: xarray.DataArray = dc1.get_array()
        a1 = numpy.asarray(a1).reshape([27])
        v1 = dcm.variables_collections[0].variables[0].values
        v1 = numpy.asarray(v1)
        self.assertTrue(a1.all() == v1.all())

        a2: xarray.DataArray = dc2.get_array()
        a2 = numpy.asarray(a2).reshape([27])
        v2 = dcm.variables_collections[0].variables[1].values
        v2 = numpy.asarray(v2)
        self.assertTrue(a2.all() == v2.all())
Beispiel #17
0
def apply_timeseries_generic(udf_data: UdfData, callback: Callable = apply_timeseries):
    """
    Implements the UDF contract by calling a user provided time series transformation function (apply_timeseries).
    Multiple bands are currently handled separately, another approach could provide a dataframe with a timeseries for each band.

    :param udf_data:
    :return:
    """
    # The list of tiles that were created
    tile_results = []

    # Iterate over each cube
    for cube in udf_data.get_datacube_list():
        array3d = []
        #use rollaxis to make the time dimension the last one
        for time_x_slice in numpy.rollaxis(cube.array.values, 1):
            time_x_result = []
            for time_slice in time_x_slice:
                series = pandas.Series(time_slice)
                transformed_series = callback(series,udf_data.user_context)
                time_x_result.append(transformed_series)
            array3d.append(time_x_result)

        # We need to create a new 3D array with the correct shape for the computed aggregate
        result_tile = numpy.rollaxis(numpy.asarray(array3d),1)
        assert result_tile.shape == cube.array.shape
        # Create the new raster collection cube
        rct = DataCube(xarray.DataArray(result_tile))
        tile_results.append(rct)
    # Insert the new tiles as list of raster collection tiles in the input object. The new tiles will
    # replace the original input tiles.
    udf_data.set_datacube_list(tile_results)
    return udf_data
Beispiel #18
0
def hyper_pytorch_ml(udf_data: UdfData):
    """Apply a pre-trained pytorch machine learn model on a hypercube

    The model must be a pytorch model that has expects the input data in the constructor
    The prediction method must accept a torch.autograd.Variable as input.

    Args:
        udf_data (UdfData): The UDF data object that hypercubes and vector tiles

    Returns:
        This function will not return anything, the UdfData object "udf_data" must be used to store the resulting
        data.

    """
    cube = udf_data.get_datacube_list()[0]

    # This is the input data of the model.
    input = torch.autograd.Variable(torch.Tensor(cube.array.values))
    # Get the first model
    mlm = udf_data.get_ml_model_list()[0]
    m = mlm.get_model()
    # Predict the data
    pred = m(input)
    result = xarray.DataArray(data=pred.detach().numpy(),
                              dims=cube.array.dims,
                              coords=cube.array.coords,
                              name=cube.id + "_pytorch")
    # Create the new raster collection tile
    result_cube = DataCube(array=result)
    # Insert the new  hypercube in the input object.
    udf_data.set_datacube_list([result_cube])
Beispiel #19
0
def reduceXY(xskip, yskip, datacube):
    dataarray = datacube.get_array()
    dataarray = dataarray.loc[{
        'x': dataarray.x[::xskip],
        'y': dataarray.y[::yskip]
    }]
    return DataCube(dataarray)
Beispiel #20
0
def hyper_ndvi(udf_data: UdfData):
    """Compute the NDVI based on RED and NIR hypercubes

    Hypercubes with ids "red" and "nir" are required. The NDVI computation will be applied
    to all hypercube dimensions.

    Args:
        udf_data (UdfData): The UDF data object that contains raster and vector tiles as well as hypercubes
        and structured data.

    Returns:
        This function will not return anything, the UdfData object "udf_data" must be used to store the resulting
        data.

    """
    red = None
    nir = None

    # Iterate over each tile
    for cube in udf_data.get_datacube_list():
        if "red" in cube.id.lower():
            red = cube
        if "nir" in cube.id.lower():
            nir = cube
    if red is None:
        raise Exception("Red hypercube is missing in input")
    if nir is None:
        raise Exception("Nir hypercube is missing in input")

    ndvi = (nir.array - red.array) / (nir.array + red.array)
    ndvi.name = "NDVI"

    hc = DataCube(array=ndvi)
    udf_data.set_datacube_list([hc, ])
Beispiel #21
0
def apply_datacube(udf_cube: DataCube, context: dict) -> DataCube:
    """
    Apply the BFASTmonitor method to detect a break at the end of time-series of the datacube.
    This UDF reduce the time dimension of the input datacube. 
    :param udf_cube: the openEO virtual DataCube object 
    :return DataCube(breaks_xr):
    """
    from datetime import datetime
    # convert the openEO datacube into the xarray DataArray structure
    my_xarray: xr.DataArray = udf_cube.get_array()
    #select single band, removes band dimension
    my_xarray = my_xarray.sel(bands='VV')
    #
    start_hist = datetime(2017, 5, 1)
    start_monitor = datetime(2019, 1, 1)
    end_monitor = datetime(2019, 12, 29)
    # get the dates from the data cube:
    dates = [
        pd.Timestamp(date).to_pydatetime()
        for date in my_xarray.coords['t'].values
    ]
    # pre-processing - crop the input data cube according to the history and monitor periods:
    data, dates = crop_data_dates(my_xarray.values, dates, start_hist,
                                  end_monitor)
    # !!! Note !!! that data has the shape 91, and not 92 for our dataset. The reason is the definition in
    # the bfast utils.py script where the start_hist is set < than dates, and not <= than dates.
    # -------------------------------------
    # specify the BFASTmonitor parameters:
    model = BFASTMonitor(start_monitor,
                         freq=31,
                         k=3,
                         verbose=1,
                         hfrac=0.25,
                         trend=True,
                         level=0.05,
                         backend='python')
    # run the monitoring:
    # model.fit(data, dates, nan_value=udf_data.nodatavals[0])
    model.fit(data, dates)
    # get the detected breaks as an xarray Data Array:
    breaks_xr = xr.DataArray(
        model.breaks,
        coords=[my_xarray.coords['x'].values, my_xarray.coords['y'].values],
        dims=['x', 'y'])
    # return the breaks as openEO DataCube:
    return DataCube(breaks_xr)
 def test_run_local_udf_frommemory(self):
     from openeo_udf.api.datacube import DataCube
     dc=self.buildData()
     r=rest_DataCube.execute_local_udf(udfcode, dc)
     result=r.get_datacube_list()[0].get_array()
     exec(udfcode)
     ref=locals()["apply_datacube"](DataCube(dc.get_array().astype(numpy.float64).drop(labels='x').drop(labels='y')), {}).get_array()
     xarray.testing.assert_allclose(result,ref)
Beispiel #23
0
 def test_generate_merged_output(self):
     merge = load_DataCube('tests/merged_cube.json').get_array()
     hasPV = merge[:, 3].dropna('t', how='all').t
     merge = merge.loc[{'t': hasPV.values}][19:21]
     merge = (merge * 100.).astype(numpy.int64).astype(numpy.float64) / 100.
     merge = merge.where(merge > -1.e10).where(merge < 1.e10)
     save_DataCube('tests/test01_merged.json', DataCube(merge))
     plot_xarray_dataarray(merge)
Beispiel #24
0
def rct_sklearn_ml(udf_data: UdfData):
    """Apply a pre-trained sklearn machine learn model on RED and NIR tiles

    The model must be a sklearn model that has a prediction method: m.predict(X)
    The prediction method must accept a pandas.DataFrame as input.

    Tiles with ids "red" and "nir" are required. The machine learn model will be applied to all spatio-temporal pixel
    of the two input raster collections.

    Args:
        udf_data (UdfData): The UDF data object that contains raster and vector tiles

    Returns:
        This function will not return anything, the UdfData object "udf_data" must be used to store the resulting
        data.

    """
    red = None
    nir = None

    # Iterate over each cube
    for cube in udf_data.get_datacube_list():
        if "red" in cube.id.lower():
            red = cube
        if "nir" in cube.id.lower():
            nir = cube
    if red is None:
        raise Exception("Red data cube is missing in input")
    if nir is None:
        raise Exception("Nir data cube is missing in input")

    # We need to reshape the data for prediction into one dimensional arrays
    three_dim_shape = red.array.shape
    one_dim_shape = numpy.prod(three_dim_shape)

    red_reshape = red.array.values.reshape((one_dim_shape))
    nir_reshape = nir.array.values.reshape((one_dim_shape))

    # This is the input data of the model. It must be trained with a DataFrame using the same names.
    X = pandas.DataFrame()
    X["red"] = red_reshape
    X["nir"] = nir_reshape

    # Get the first model
    mlm = udf_data.get_ml_model_list()[0]
    m = mlm.get_model()
    # Predict the data
    pred = m.predict(X)
    # Reshape the one dimensional predicted values to three dimensions based on the input shape
    pred_reshape = pred.reshape(three_dim_shape)

    result = xarray.DataArray(data=pred_reshape, dims=red.array.dims,
                              coords=red.array.coords, name=red.id + "_pytorch")
    # Create the new raster collection cube
    h = DataCube(array=result)
    # Insert the new hypercubes in the input object. The new tiles will
    # replace the original input tiles.
    udf_data.set_datacube_list([h, ])
Beispiel #25
0
def apply_hypercube(cube: DataCube, context: dict) -> DataCube:
    """Reduce the time dimension for each tile and compute min, mean, max and sum for each pixel
    over time.
    Each raster tile in the udf data object will be reduced by time. Minimum, maximum, mean and sum are
    computed for each pixel over time.
    Args:
        udf_data (UdfData): The UDF data object that contains raster and vector tiles
    Returns:
        This function will not return anything, the UdfData object "udf_data" must be used to store the resulting
        data.
    """
    # The list of tiles that were created
    array: xarray.DataArray = cube.get_array()
    result = xarray.concat(
        [array.min(dim='t'), array.max(dim='t'), array.sum(dim='t'), array.mean(dim='t')],
        dim='bands'
    )
    return DataCube(result)
 def test_run_local_udf_fromfile(self):
     from openeo_udf.api.datacube import DataCube
     with TemporaryDirectory() as td:
         dc=self.buildData()
         tmpfile=os.path.join(td,'test_data')
         dc.to_file(tmpfile)
         r=rest_DataCube.execute_local_udf(udfcode, tmpfile)
         result=r.get_datacube_list()[0].get_array()
         exec(udfcode)
         ref=locals()["apply_datacube"](DataCube(dc.get_array().astype(numpy.float64).drop(labels='x').drop(labels='y')), {}).get_array()
         xarray.testing.assert_allclose(result,ref)
Beispiel #27
0
def apply_datacube(cube: DataCube, context) -> DataCube:
    import xarray
    import numpy as np

    # Get the x array containing the time series
    array: xarray.DataArray = cube.get_array()
    min = 0.85
    max = 1.15
    step = 0.1
    mean = array.median(skipna=True)
    bins = np.arange(min, max + step, step) * mean.values.tolist()
    bins = np.concatenate([[0], bins, [255]])
    buckets = np.digitize(array.values, bins=bins).astype(float)
    return DataCube(
        xarray.DataArray(buckets,
                         coords={
                             't': array.t.values,
                             'bands': array.bands.values,
                             'y': array.y.values,
                             'x': array.x.values,
                         },
                         dims=['t', 'bands', 'y', 'x']))
Beispiel #28
0
def hyper_map_fabs(udf_data: UdfData):
    """Compute the absolute values of each hyper cube in the provided data

    Args:
        udf_data (UdfData): The UDF data object that contains raster and vector tiles as well as hypercubes
        and structured data.

    Returns:
        This function will not return anything, the UdfData object "udf_data" must be used to store the resulting
        data.

    """
    # Iterate over each tile
    cube_list = []
    for cube in udf_data.get_datacube_list():
        result = numpy.fabs(cube.array)
        result.name = cube.id + "_fabs"
        cube_list.append(DataCube(array=result))
    udf_data.set_datacube_list(cube_list)
Beispiel #29
0
    def test_gan(self):

        # load and inverse scale according to UDF
        arr=self.build_array()
        arr.loc[{'bands':'ndvi'}]=250.*(arr.loc[{'bands':'ndvi'}]+0.08)
        arr.loc[{'bands':'VH'}]  =10.**(arr.loc[{'bands':'VH'}]/10.)
        arr.loc[{'bands':'VV'}]  =10.**(arr.loc[{'bands':'VV'}]/10.)        

        # Create a simple model that averages over time and then over bands 
        inS1=tf.keras.Input(shape=[19,128,128,2])
        avS1=tf.keras.layers.Lambda(lambda x: tf.reduce_mean(x, axis=1, keepdims=False))(inS1)
        inS2=tf.keras.Input(shape=[19,128,128,1])
        avS2=tf.keras.layers.Lambda(lambda x: tf.reduce_mean(x, axis=1, keepdims=False))(inS2)
        inPV=tf.keras.Input(shape=[19,128,128,1])
        avPV=tf.keras.layers.Lambda(lambda x: tf.reduce_mean(x, axis=1, keepdims=False))(inPV)
        ct=tf.keras.layers.Concatenate(axis=3)([avS1,avS2,avPV])
        av=tf.keras.layers.Lambda(lambda x: tf.reduce_mean(x, axis=3, keepdims=True))(ct)
        model=tf.keras.Model([inS1,inS2,inPV],av)
        model.save('/tmp/test_gan_model.h5')

        # run gan
        result=apply_datacube(DataCube(arr), dict(
            prediction_model='/tmp/test_gan_model.h5',
            gan_window_half='9D',
            gan_steps='2D',
            gan_samples=19,
            acquisition_steps='10D',
            scaler='passthrough'
        ))

        # compute check: with this setup
        # 6 front NaNs
        # 5 data of 2020-06-29
        # 3 data of 2020-07-01
        # 5 trailing Nans
        # NaNs get filled with zeros
        check=arr.dropna('t')
        check=5./19.*check[0]+3./19.*check[1]
        check=check.mean('bands').expand_dims({'bands':['predictions']})
        check=check.expand_dims({'t':[numpy.datetime64('2020-07-04')]})
        
        xarray.testing.assert_allclose(check.astype(numpy.float32), result.get_array())
Beispiel #30
0
 def buildData(self):
     a = numpy.zeros((3, 2, 5, 6), numpy.int32)
     for t in range(a.shape[0]):
         for b in range(a.shape[1]):
             for x in range(a.shape[2]):
                 for y in range(a.shape[3]):
                     a[t, b, x, y] = t * 1000 + b * 100 + x * 10 + y
     return DataCube(
         xarray.DataArray(a,
                          dims=['t', 'bands', 'x', 'y'],
                          coords={
                              't': [
                                  numpy.datetime64('2020-08-01'),
                                  numpy.datetime64('2020-08-11'),
                                  numpy.datetime64('2020-08-21')
                              ],
                              'bands': ['bandzero', 'bandone'],
                              'x': [10., 11., 12., 13., 14.],
                              'y': [20., 21., 22., 23., 24., 25.]
                          }))