Beispiel #1
0
 def test_band_nodim(self):
     ref = self.buildData()
     ref = DataCube(ref.get_array()[:, 0].drop('bands'))
     for ifmt in formats:
         fn = os.path.join(self.tmpdir, 'test_band_nodim.' + ifmt)
         print("Testing " + fn)
         datacube_to_file(ref, fn, fmt=ifmt)
         res = datacube_from_file(fn, fmt=ifmt)
     xarray.testing.assert_allclose(res.get_array(), ref.get_array())
Beispiel #2
0
 def test_xy_nolabels(self):
     ref = self.buildData()
     ref = DataCube(ref.get_array().drop('x').drop('y'))
     for ifmt in formats:
         fn = os.path.join(self.tmpdir, 'test_xy_nolabels.' + ifmt)
         print("Testing " + fn)
         datacube_to_file(ref, fn, fmt=ifmt)
         res = datacube_from_file(fn, fmt=ifmt)
     xarray.testing.assert_allclose(res.get_array(), ref.get_array())
Beispiel #3
0
 def test_typing_float(self):
     ref = self.buildData()
     ref = DataCube(ref.get_array().astype(numpy.float64))
     for ifmt in formats:
         fn = os.path.join(self.tmpdir, 'test_typing_float.' + ifmt)
         print("Testing " + fn)
         datacube_to_file(ref, fn, fmt=ifmt)
         res = datacube_from_file(fn, fmt=ifmt)
     xarray.testing.assert_allclose(res.get_array(), ref.get_array())
     self.assertEqual(res.get_array().dtype, ref.get_array().dtype)
 def test_coordinateOrderChanged(self):
     inpcube = DataCube(self.inpcube.get_array().transpose())
     refcube = DataCube(self.refcube.get_array().transpose())
     outcube = udf_savitzkygolaysmooth_phenology.apply_datacube(
         inpcube, dict(do_smoothing=False, do_phenology=True))
     xarray.testing.assert_allclose(outcube.get_array(),
                                    refcube.get_array())
Beispiel #5
0
def apply_datacube(cube: DataCube, context: Dict) -> DataCube:
    """
    Applies a rolling window median composite to a timeseries datacube.
    This UDF preserves dimensionality, and assumes a datacube with a temporal dimension 't' as input.
    """

    array: xarray.DataArray = cube.get_array()

    import pandas as pd
    import numpy as np

    #this method computes dekad's, can be used to resample data to desired frequency

    time_dimension_index = array.get_index('t')

    d = time_dimension_index.day - np.clip(
        (time_dimension_index.day - 1) // 10, 0, 2) * 10 - 1
    date = time_dimension_index.values - np.array(d, dtype="timedelta64[D]")

    #replace each value with 30-day window median
    #first median rolling window to fill gaps on all dates
    composited = array.rolling(t=30, min_periods=1,
                               center=True).median().dropna("t")
    #resample rolling window medians to dekads
    ten_daily_composite = composited.groupby_bins("t", date).median()
    return DataCube(ten_daily_composite)
Beispiel #6
0
def apply_hypercube(cube: DataCube, context: dict) -> DataCube:
    from scipy.signal import savgol_filter

    array: xarray.DataArray = cube.get_array()
    filled = array.interpolate_na(dim='t')
    smoothed_array = savgol_filter(filled.values, 5, 2, axis=0)
    return DataCube(xarray.DataArray(smoothed_array, dims=array.dims, coords=array.coords))
 def test_missingCoordinates(self):
     inpcube = DataCube(self.inpcube.get_array()[:, :, 0, :])
     refcube = DataCube(self.refcube.get_array()[:, :, 0, :])
     outcube = udf_savitzkygolaysmooth_phenology.apply_datacube(
         inpcube, dict(do_smoothing=False, do_phenology=True))
     xarray.testing.assert_allclose(outcube.get_array(),
                                    refcube.get_array())
 def test_hasNoDataTimeSeries(self):
     inpcube = DataCube(self.inpcube.get_array().where(
         self.inpcube.get_array().x != 3, numpy.nan, drop=False))
     refcube = DataCube(self.refcube.get_array().where(
         self.refcube.get_array().x != 3, 0., drop=False))
     outcube = udf_savitzkygolaysmooth_phenology.apply_datacube(
         inpcube, dict(do_smoothing=False, do_phenology=True))
     xarray.testing.assert_allclose(outcube.get_array(),
                                    refcube.get_array())
Beispiel #9
0
def apply_datacube(cube: DataCube, context: Dict) -> DataCube:
    """
    Applies a savitzky-golay smoothing to a timeseries datacube.
    This UDF preserves dimensionality, and assumes a datacube with a temporal dimension 't' as input.
    """
    array: xarray.DataArray = cube.get_array()
    filled = array.interpolate_na(dim='t')
    smoothed_array = savgol_filter(filled.values, 5, 2, axis=0)
    return DataCube(
        xarray.DataArray(smoothed_array, dims=array.dims, coords=array.coords))
Beispiel #10
0
 def test_multiBand(self):
     inparr1 = self.inpcube.get_array()
     inparr2 = self.inpcube.get_array().assign_coords(bands=['extraband'])
     refarr1 = self.refcube.get_array()
     refarr2 = self.refcube.get_array().assign_coords(bands=['extraband'])
     inpcube = DataCube(xarray.concat([inparr1, inparr2], dim='bands'))
     refcube = DataCube(xarray.concat([refarr1, refarr2], dim='bands'))
     outcube = udf_savitzkygolaysmooth_phenology.apply_datacube(
         inpcube, dict(do_smoothing=True, do_phenology=False))
     xarray.testing.assert_allclose(outcube.get_array(),
                                    refcube.get_array())
Beispiel #11
0
def apply_datacube(cube: DataCube, context: dict) -> DataCube:
    """Compute the NDVI based on sentinel2 tiles

    Tiles with ids "red" and "nir" are required. The NDVI computation will be applied
    to all time stamped 2D raster tiles that have equal time stamps.

    """
    array: xarray.DataArray = cube.get_array()
    red = array.sel(bands="TOC-B04_10M")
    nir = array.sel(bands="TOC-B08_10M")
    ndvi = (nir - red) / (nir + red)
    return DataCube(ndvi)
Beispiel #12
0
def apply_datacube(cube: DataCube, context: Dict) -> DataCube:

    # access the underlying xarray
    inarr=cube.get_array()

    # ndvi
    B4=inarr.loc[:,'TOC-B04_10M']
    B8=inarr.loc[:,'TOC-B08_10M']
    ndvi=(B8-B4)/(B8+B4)
    
    # extend bands dim
    ndvi=ndvi.expand_dims(dim='bands', axis=-3).assign_coords(bands=['ndvi'])
    
    # wrap back to datacube and return
    return DataCube(ndvi)
Beispiel #13
0
def apply_datacube(udf_cube: DataCube, context: dict) -> DataCube:
    """
    Apply the BFASTmonitor method to detect a break at the end of time-series of the datacube.
    This UDF reduce the time dimension of the input datacube. 
    :param udf_cube: the openEO virtual DataCube object 
    :return DataCube(breaks_xr):
    """
    from datetime import datetime
    # convert the openEO datacube into the xarray DataArray structure
    my_xarray: xr.DataArray = udf_cube.get_array()
    #select single band, removes band dimension
    my_xarray = my_xarray.sel(bands='VV')
    #
    start_hist = datetime(2017, 5, 1)
    start_monitor = datetime(2019, 1, 1)
    end_monitor = datetime(2019, 12, 29)
    # get the dates from the data cube:
    dates = [
        pd.Timestamp(date).to_pydatetime()
        for date in my_xarray.coords['t'].values
    ]
    # pre-processing - crop the input data cube according to the history and monitor periods:
    data, dates = crop_data_dates(my_xarray.values, dates, start_hist,
                                  end_monitor)
    # !!! Note !!! that data has the shape 91, and not 92 for our dataset. The reason is the definition in
    # the bfast utils.py script where the start_hist is set < than dates, and not <= than dates.
    # -------------------------------------
    # specify the BFASTmonitor parameters:
    model = BFASTMonitor(start_monitor,
                         freq=31,
                         k=3,
                         verbose=1,
                         hfrac=0.25,
                         trend=True,
                         level=0.05,
                         backend='python')
    # run the monitoring:
    # model.fit(data, dates, nan_value=udf_data.nodatavals[0])
    model.fit(data, dates)
    # get the detected breaks as an xarray Data Array:
    breaks_xr = xr.DataArray(
        model.breaks,
        coords=[my_xarray.coords['x'].values, my_xarray.coords['y'].values],
        dims=['x', 'y'])
    # return the breaks as openEO DataCube:
    return DataCube(breaks_xr)
Beispiel #14
0
def apply_hypercube(cube: DataCube, context: dict) -> DataCube:
    """Reduce the time dimension for each tile and compute min, mean, max and sum for each pixel
    over time.
    Each raster tile in the udf data object will be reduced by time. Minimum, maximum, mean and sum are
    computed for each pixel over time.
    Args:
        udf_data (UdfData): The UDF data object that contains raster and vector tiles
    Returns:
        This function will not return anything, the UdfData object "udf_data" must be used to store the resulting
        data.
    """
    # The list of tiles that were created
    array: xarray.DataArray = cube.get_array()
    result = xarray.concat(
        [array.min(dim='t'), array.max(dim='t'), array.sum(dim='t'), array.mean(dim='t')],
        dim='bands'
    )
    return DataCube(result)
Beispiel #15
0
def apply_datacube(cube: DataCube, context) -> DataCube:
    import xarray
    import numpy as np

    # Get the x array containing the time series
    array: xarray.DataArray = cube.get_array()
    min = 0.85
    max = 1.15
    step = 0.1
    mean = array.median(skipna=True)
    bins = np.arange(min, max + step, step) * mean.values.tolist()
    bins = np.concatenate([[0], bins, [255]])
    buckets = np.digitize(array.values, bins=bins).astype(float)
    return DataCube(
        xarray.DataArray(buckets,
                         coords={
                             't': array.t.values,
                             'bands': array.bands.values,
                             'y': array.y.values,
                             'x': array.x.values,
                         },
                         dims=['t', 'bands', 'y', 'x']))
Beispiel #16
0
 def test_LoadSave(self):
     cube1 = DataCube(self.build_array(32, 16))
     save_DataCube('/tmp/test_LoadSave.json', cube1)
     cube2 = load_DataCube('/tmp/test_LoadSave.json')
     xarray.testing.assert_allclose(cube1.get_array(), cube2.get_array())
def apply_datacube(cube: DataCube, context: Dict) -> DataCube:

    import pandas
    import xarray
    import numpy

    class PhenologypParams:
        def __init__(self, year):
            self.year = year  # yer of the season, int
            self.sStart = pandas.DateOffset(
                months=4, days=2)  # Start date of interval for start of season
            self.sEnd = pandas.DateOffset(
                months=6,
                days=10)  # End date of tart interval for start of season
            self.mStart = pandas.DateOffset(
                months=6, days=10)  # Start date of interval for mid of season
            self.mEnd = pandas.DateOffset(
                months=9,
                days=1)  # End date of tart interval for mid of season
            self.eStart = pandas.DateOffset(
                months=9, days=1)  # Start date of interval for end of season
            self.eEnd = pandas.DateOffset(
                months=12,
                days=31)  # End date of tart interval for end of season
            self.tSos = 10.  # Threshold for start of season
            self.tEos = 10.  # Threshold for end of season

    """
        sStartDate: First date of the interval for getting season start
        sEndDate: Last date of the interval for getting season start
    
        mStartDate: First date of the interval for getting maximum greenness
        mEndDate: Last date of the interval for getting maximum greenness
    
        eStartDate: First date of the interval for getting season end
        eEndDate: Last date of the interval for getting season end
    
        tSos: The offset (%) to add to the start date minimum to set the start of the season
        tEos: The offset (%) to subtract from the end date minimum to set the end of the season
    """

    class CropPhenology:
        def extractSeasonDates(self, timeseries, args):

            if timeseries is None:
                return None
            else:

                # Get the local maximum greenness
                mMax = self.getLocalMax(
                    timeseries,
                    pandas.Timestamp(args.year, args.mStart.months,
                                     args.mStart.days),
                    pandas.Timestamp(args.year, args.mEnd.months,
                                     args.mEnd.days))
                dmMax = mMax['Times']
                ymMax = mMax['Greenness']

                # Get the start of season dates
                sos = self.getStartOfSeason(
                    timeseries,
                    pandas.Timestamp(args.year, args.sStart.months,
                                     args.sStart.days),
                    pandas.Timestamp(args.year, args.sEnd.months,
                                     args.sEnd.days), float(args.tSos),
                    float(ymMax))

                # Get the end of season dates
                eos = self.getEndOfSeason(
                    timeseries,
                    pandas.Timestamp(args.year, args.eStart.months,
                                     args.eStart.days),
                    pandas.Timestamp(args.year, args.eEnd.months,
                                     args.eEnd.days), float(args.tEos),
                    float(ymMax))

                #return result
                return [sos[3], eos[3]]

        def getLocalMax(self, df, start, end):
            df_range = df.loc[df['Times'].between(start, end)]
            return df_range.loc[df_range['Greenness'].idxmax()]

        """
            Calculate the start of the season based on selected interval [start, end] and a greenness curve (df). 
            Within this interval we will first look for the local minimum greenness, marked by (dsMin, ysMin). In the
            second step we will use the offset (%) to calculate the amount greenness offset that needs to be applied to 
            the minumum value in order to get the start of the season. This offset is calculated as a percentage of the 
            difference between the maximum greenness and the local minimum.
        """

        def getStartOfSeason(self, df, start, end, offset, yMax):
            # Get the local minimum greenness in the start season interval
            df_sRange = df.loc[df['Times'].between(start, end)]
            sMin = df_sRange.loc[df_sRange['Greenness'].idxmin()]
            dsMin = sMin['Times']
            ysMin = sMin['Greenness']

            # Calculate the greenness value corresponding to the start of the season
            ySos = ysMin + ((yMax - ysMin) * (offset / 100.0))

            # Get the closest value to this greenness
            df_sRange = df_sRange.loc[df_sRange['Times'] >= dsMin]
            sos = df_sRange.iloc[(df_sRange['Greenness'] -
                                  ySos).abs().argsort()[:1]]
            return (dsMin, ysMin, ySos,
                    pandas.to_datetime(str(sos['Times'].values[0])))

        """
            Calculate the end of the season based on selected interval [start, end] and a greenness curve (df). 
            Within this interval we will first look for the local minimum greenness, marked by (deMin, yeMin). In the
            second step we will use the offset (%) to calculate the amount greenness offset that needs to be applied to 
            the minumum value in order to get the start of the season. This offset is calculated as a percentage of the 
            difference between the maximum greenness and the local minimum.
        """

        def getEndOfSeason(self, df, start, end, offset, yMax):
            # Get the local minimum greenness in the start season interval
            df_eRange = df.loc[df['Times'].between(start, end)]
            eMin = df_eRange.loc[df_eRange['Greenness'].idxmin()]
            deMin = eMin['Times']
            yeMin = eMin['Greenness']

            # Calculate the greenness value corresponding to the start of the season
            yEos = yeMin + ((yMax - yeMin) * (offset / 100.0))

            # Get the closest value to this greenness
            df_eRange = df_eRange.loc[df_eRange['Times'] <= deMin]
            eos = df_eRange.iloc[(df_eRange['Greenness'] -
                                  yEos).abs().argsort()[:1]]
            return (deMin, yeMin, yEos,
                    pandas.to_datetime(str(eos['Times'].values[0])))

    array = cube.get_array()
    cropphenology = CropPhenology()
    phenologyparams = PhenologypParams(int(array.t.dt.year[0]))
    season = xarray.DataArray(numpy.zeros(
        (2, array.x.shape[0], array.y.shape[0]), dtype=numpy.datetime64),
                              dims=('bands', 'x', 'y'),
                              coords={'bands': ['sos', 'eos']})

    for ix in array.x.values:
        for iy in array.y.values:
            iserie = pandas.DataFrame(data={
                'Greenness': array[:, 0, ix, iy].values,
                'Times': array.t.values
            })
            iseason = cropphenology.extractSeasonDates(iserie, phenologyparams)
            #season.values[:,ix,iy]=[iseason[0].dayofyear,iseason[1].dayofyear]
            season.values[:, ix, iy] = iseason

    return DataCube(season)
Beispiel #18
0
 def test_Reduce(self):
     cube1 = reduceXY(8, 2, DataCube(self.build_array(32, 16)))
     cube2 = DataCube(self.build_array(4, 8, mult=(8, 2)))
     xarray.testing.assert_allclose(cube1.get_array(), cube2.get_array())
Beispiel #19
0
def apply_datacube(cube: DataCube, context: Dict) -> DataCube:

    import functools
    import xarray
    import numpy
    from xarray.core.dataarray import DataArray
    import pandas
    from tensorflow.python.keras.models import load_model

    # BUILTIN CONFIG #########################

    NDVI='ndvi'
    PVid='ndvi'
    S2id='S2ndvi'
    VHid='VH'
    VVid='VV'

    prediction_model=""
    
    gan_window_half='90D'
    gan_steps='5D'
    gan_samples=37 # this is 2*gan_window_half/gan_steps+1
    acquisition_steps='10D'
    
    scaler='default'

    # FILL FROM CONTEXT IF THERE IS #########################

    if context is not None:
        prediction_model=context.get('prediction_model',prediction_model)

    if context is not None:
        gan_window_half=context.get('gan_window_half',gan_window_half)
    if context is not None:
        gan_steps=context.get('gan_steps',gan_steps)
    if context is not None:
        gan_samples=context.get('gan_samples',gan_samples)
    if context is not None:
        acquisition_steps=context.get('acquisition_steps',acquisition_steps)

    if context is not None:
        scaler=context.get('scaler',scaler)

    # HELPER FUNCTIONS #########################    
    
    @functools.lru_cache(maxsize=25)
    def load_datafusion_model(prediction_model):
        return load_model(prediction_model)

    
    class default_scaler():
        
        def minmaxscaler(self,data, source):
            ranges = {}
            ranges[NDVI] = [-0.08, 1]
            ranges[VVid] = [-20, -2]
            ranges[VHid] = [-33, -8]
            # Scale between -1 and 1
            datarescaled = 2*(data - ranges[source][0])/(ranges[source][1] - ranges[source][0]) - 1
            return datarescaled
    
        def minmaxunscaler(self,data, source):
            ranges = {}
            ranges[NDVI] = [-0.08, 1]
            ranges[VVid] = [-20, -2]
            ranges[VHid] = [-33, -8]
            # Unscale
            dataunscaled = 0.5*(data + 1) * (ranges[source][1] - ranges[source][0]) + ranges[source][0]
            return dataunscaled

    class passthrough_scaler():
        
        def minmaxscaler(self,data, source):
            return data
    
        def minmaxunscaler(self,data, source):
            return data


    def process_window(inarr, model, scaler, windowsize=128, nodata=0):
    
        inarr=inarr.ffill(dim='t').resample(t='1D').ffill().resample(t=gan_steps).ffill()
        
        # older tensorflows expect exact number of samples in every dimension
        if len(inarr.t)>gan_samples:
            trimfront=int((len(inarr.t)-gan_samples)/2)
            trimback=trimfront + (0 if (len(inarr.t)-gan_samples)%2==0 else 1)
            inarr=inarr.sel(t=inarr.t[trimfront:-trimback])
        if len(inarr.t)<gan_samples:
            trimfront=int((gan_samples-len(inarr.t))/2)
            trimback=trimfront + (0 if (gan_samples-len(inarr.t))%2==0 else 1)
            front=pandas.date_range(end=inarr.t.values.min()-pandas.to_timedelta(gan_steps), periods=trimfront, freq=gan_steps).values.astype(inarr.t.dtype)
            back=pandas.date_range(start=inarr.t.values.max()+pandas.to_timedelta(gan_steps), periods=trimback, freq=gan_steps).values.astype(inarr.t.dtype)
            inarr=inarr.reindex({'t':numpy.concatenate((front,inarr.t.values,back))})
        
        # grow it to 5 dimensions
        inarr=inarr.expand_dims(dim=['d0','d5'],axis=[0,5])
        
        # select bands
        PV=inarr.sel(bands=PVid)
        S2=inarr.sel(bands=S2id)
        VH=inarr.sel(bands=VHid)
        VV=inarr.sel(bands=VVid)
     
        # Scale S1
        VV = scaler.minmaxscaler(VV, VVid)
        VH = scaler.minmaxscaler(VH, VHid)
    
        # Concatenate s1 data
        s1_backscatter = xarray.concat((VV, VH), dim='d5')
        
        # Scale NDVI
        s2_ndvi = scaler.minmaxscaler(S2, NDVI)
        probav_ndvi = scaler.minmaxscaler(PV, NDVI)
    
        # Remove any nan values
        # Passing in numpy arrays because reduces RAM usage (newer tensorflows copy out from xarray into a numpy array) and backwards compatibility goes further back in time
        s2_ndvi=s2_ndvi.fillna(nodata).values
        s1_backscatter=s1_backscatter.fillna(nodata).values
        probav_ndvi=probav_ndvi.fillna(nodata).values
    
        # Run neural network
        predictions = model.predict((s1_backscatter, s2_ndvi, probav_ndvi))
    
        # Unscale
        predictions = scaler.minmaxunscaler(predictions, NDVI)
    
        return predictions.reshape((windowsize, windowsize))

    # MAIN CODE #########################


    # extract xarray
    inarr=cube.get_array()
            
    # rescale
    inarr.loc[{'bands':PVid}]=0.004*inarr.sel(bands=PVid)-0.08
    inarr.loc[{'bands':VHid}]=10.*xarray.ufuncs.log10(inarr.sel(bands=VHid))
    inarr.loc[{'bands':VVid}]=10.*xarray.ufuncs.log10(inarr.sel(bands=VVid))
    
    # compute windows
    xsize,ysize=inarr.x.shape[0],inarr.y.shape[0]
    windowlist=[((0,128),(0,128))]

    # init scaler
    sc=default_scaler()
    if scaler=='passthrough': sc=passthrough_scaler()
        
    # load the model
    model=load_datafusion_model(prediction_model)

    # compute acquisition dates
    acquisition_dates = pandas.date_range(
        inarr.t.values.min() + pandas.to_timedelta(gan_window_half),
        inarr.t.values.max() - pandas.to_timedelta(gan_window_half),
        freq=acquisition_steps
    )

    # result buffer
    shape=[len(acquisition_dates),1,1,1]
    shape[inarr.dims.index('x')]=xsize
    shape[inarr.dims.index('y')]=ysize
    predictions=DataArray(numpy.full(shape,numpy.nan, dtype=numpy.float32),dims=inarr.dims,coords={'bands':['predictions'],'t':acquisition_dates})
    
    # run processing
    for idate in acquisition_dates:
        for iwin in windowlist:
            data=inarr.sel({
                'x':slice(iwin[0][0],iwin[0][1]),
                'y':slice(iwin[1][0],iwin[1][1]),
                't':slice(idate-pandas.to_timedelta(gan_window_half), idate+pandas.to_timedelta(gan_window_half))
            })
            ires = process_window(data, model, sc, 128, 0.).astype(numpy.float32)
            predictions.loc[{'t':idate,'x':range(iwin[0][0],iwin[0][1]),'y':range(iwin[1][0],iwin[1][1])}]=ires
            
    # return the predictions
    return DataCube(predictions)
Beispiel #20
0
def resampleXY(xskip, yskip, datacube: DataCube):
    dataarray = datacube.get_array()
    return DataCube(dataarray.coarsen({'x': xskip, 'y': yskip}).mean())