Esempio n. 1
0
def run_bfast(indice_array):
    '''
    Runs BFAST algorithm
    Input:
    indice_array: Array[] sorted 3D array with image arrays for the whole period
    Output:
    Breaks and mean of timeseries
    '''
    print("starting Bfast Process")
    # set parameters
    k = 3
    freq = 30
    trend = True
    hfrac = 0.25
    level = 0.05
    start_hist = datetime(2013, 8, 1)  # set start of history period
    start_monitor = datetime(2019, 7, 1)  # set start of monitoring perios
    end_monitor = datetime(2020, 7, 31)  # set end of monitoring period

    dates = pd.date_range('2013-08-01', '2020-07-31',
                          freq='MS')  # create data array
    dates2 = [pd.to_datetime(date) for date in dates]
    nancount = numpy.count_nonzero(numpy.isnan(indice_array))
    print(f"Timeseries has {nancount} NANs")
    indice_array = numpy.where(numpy.isnan(indice_array), -9999,
                               indice_array)  # supstitute noData values
    indice_array = indice_array * 10000
    indice_array = indice_array.astype(int)
    data, dates = crop_data_dates(indice_array, dates2, start_hist,
                                  end_monitor)  # crop array

    while len(dates2) > data.shape[0]:
        dates2.pop()
    if len(dates2) < data.shape[0]:
        dates2.insert(0, datetime(2013, 1, 1))

    print("First date: {}".format(dates2[0]))
    print("Last date: {}".format(dates2[-1]))
    print("Shape of data array: {}".format(data.shape))
    print(f'Number of dates {len(dates2)}')

    # create BFAST model
    model = BFASTMonitor(start_monitor,
                         freq=freq,
                         k=k,
                         hfrac=hfrac,
                         trend=trend,
                         level=level,
                         backend='python',
                         verbose=1)
    # data = data[:, 2330:3000, :]
    model.fit(data, dates, n_chunks=5,
              nan_value=-9999)  # fit model to timeseries
    # bfastSingle.fit_single(data, dates2, model)

    # save results
    breaks = model.breaks
    means = model.means

    return breaks, means  # return detected breaks in timeseries and mean values of pixel
Esempio n. 2
0
def apply_datacube(udf_cube: DataCube, context: dict) -> DataCube:
    """
    Apply the BFASTmonitor method to detect a break at the end of time-series of the datacube.
    This UDF reduce the time dimension of the input datacube. 
    :param udf_cube: the openEO virtual DataCube object 
    :return DataCube(breaks_xr):
    """
    from datetime import datetime
    # convert the openEO datacube into the xarray DataArray structure
    my_xarray: xr.DataArray = udf_cube.get_array()
    #select single band, removes band dimension
    my_xarray = my_xarray.sel(bands='VV')
    #
    start_hist = datetime(2017, 5, 1)
    start_monitor = datetime(2019, 1, 1)
    end_monitor = datetime(2019, 12, 29)
    # get the dates from the data cube:
    dates = [
        pd.Timestamp(date).to_pydatetime()
        for date in my_xarray.coords['t'].values
    ]
    # pre-processing - crop the input data cube according to the history and monitor periods:
    data, dates = crop_data_dates(my_xarray.values, dates, start_hist,
                                  end_monitor)
    # !!! Note !!! that data has the shape 91, and not 92 for our dataset. The reason is the definition in
    # the bfast utils.py script where the start_hist is set < than dates, and not <= than dates.
    # -------------------------------------
    # specify the BFASTmonitor parameters:
    model = BFASTMonitor(start_monitor,
                         freq=31,
                         k=3,
                         verbose=1,
                         hfrac=0.25,
                         trend=True,
                         level=0.05,
                         backend='python')
    # run the monitoring:
    # model.fit(data, dates, nan_value=udf_data.nodatavals[0])
    model.fit(data, dates)
    # get the detected breaks as an xarray Data Array:
    breaks_xr = xr.DataArray(
        model.breaks,
        coords=[my_xarray.coords['x'].values, my_xarray.coords['y'].values],
        dims=['x', 'y'])
    # return the breaks as openEO DataCube:
    return DataCube(breaks_xr)
def run_bfast(indice_array):
    # parameters
    k = 3
    freq = 30
    trend = True
    hfrac = 0.25
    level = 0.05
    start_hist = datetime(2013, 4,
                          1)  # no data for the first three months of 2013
    start_monitor = datetime(2016, 7, 1)
    end_monitor = datetime(2018, 12, 31)

    dates = pd.date_range('2013-04-01', '2018-7-31', freq='MS')
    dates2 = [pd.to_datetime(date) for date in dates]
    indice_array = numpy.where(numpy.isnan(indice_array), -9999, indice_array)
    data, dates2 = crop_data_dates(indice_array, dates2, start_hist,
                                   end_monitor)
    # data = data * 10000
    data = data.astype(int)
    while len(dates2) > data.shape[0]:
        dates2.pop()
    if len(dates2) < data.shape[0]:
        dates2.insert(0, datetime(2013, 1, 1))

    print("First date: {}".format(dates2[0]))
    print("Last date: {}".format(dates2[-1]))
    print("Shape of data array: {}".format(data.shape))
    print(f'Number of dates {len(dates2)}')

    model = BFASTMonitor(start_monitor,
                         freq=freq,
                         k=k,
                         hfrac=hfrac,
                         trend=trend,
                         level=level,
                         backend='python',
                         verbose=1)
    # data = data[:, 2330:3000, :]
    model.fit(data, dates2, nan_value=-9999)
    # bfastSingle.fit_single(data, dates2, model)

    # visualize results
    breaks = model.breaks
    means = model.means

    return breaks, means
Esempio n. 4
0
def bfast_window(window, read_lock, write_lock, src, dst, segment_dir,
                 monitor_params, crop_params, out):
    """Run the bfast model on image windows"""

    # read in a read_lock to avoid duplicate reading and corruption of the data
    with read_lock:
        data = src.read(window=window).astype(np.int16)
        # all the nan are transformed into 0 by casting don't we want to use np.iinfo.minint16 ?

    # read the local observation date
    with (segment_dir / 'dates.csv').open() as f:
        dates = [
            datetime.strptime(l, "%Y-%m-%d") for l in f.read().splitlines()
            if l.rstrip()
        ]

    # crop the initial data to the used dates
    data, dates = crop_data_dates(data, dates, **crop_params)

    # start the bfast process
    model = BFASTMonitor(**monitor_params)

    # fit the model
    model.fit(data, dates)

    # vectorized fonction to format the results as decimal year (e.g mid 2015 will be 2015.5)
    to_decimal = np.vectorize(break_to_decimal_year, excluded=[1])

    # slice the date to narrow it to the monitoring dates
    start = monitor_params['start_monitor']
    end = crop_params['end']
    monitoring_dates = dates[dates.index(start):dates.index(end) +
                             1]  # carreful slicing is x in [i,j[

    # compute the decimal break on the model
    decimal_breaks = to_decimal(model.breaks, monitoring_dates)

    # agregate the results on 2 bands
    monitoring_results = np.stack(
        (decimal_breaks, model.magnitudes)).astype(np.float32)

    with write_lock:
        dst.write(monitoring_results, window=window)
        out.update_progress()

    return
Esempio n. 5
0
def bfast4openeo(udf_data):
    #
    from bfast import BFASTMonitor
    from bfast.utils import crop_data_dates
    from datetime import datetime
    import xarray as xr
    import pandas as pd
    import numpy as np
    #
    start_hist = datetime(2016, 12, 31)
    start_monitor = datetime(2019, 1, 1)
    end_monitor = datetime(2019, 12, 29)
    # get the dates from the data cube:
    dates = [
        pd.Timestamp(date).to_pydatetime()
        for date in udf_data.coords['time'].values
    ]
    # pre-processing - crop the input data cube according to the history and monitor periods:
    data, dates = crop_data_dates(udf_data.values, dates, start_hist,
                                  end_monitor)
    # !!! Note !!! that data has the shape 91, and not 92 for our dataset. The reason is the definition in
    # the bfast utils.py script where the start_hist is set < than dates, and not <= than dates.
    # -------------------------------------
    # specify the BFASTmonitor parameters:
    model = BFASTMonitor(start_monitor,
                         freq=31,
                         k=3,
                         verbose=1,
                         hfrac=0.25,
                         trend=True,
                         level=0.05,
                         backend='python')
    # run the monitoring:
    # model.fit(data, dates, nan_value=udf_data.nodatavals[0])
    model.fit(data, dates)
    # get the detected breaks as an xarray Data Array:
    # !!! question !!! are those breaks identical to the breaks we get from R script?
    breaks_xr = xr.DataArray(
        model.breaks,
        coords=[udf_data.coords['y'].values, udf_data.coords['x'].values],
        dims=['y', 'x'])
    return breaks_xr
Esempio n. 6
0
    def set_bfast_parameters(self,
                             start_monitor,
                             end_monitor,
                             start_hist,
                             freq,
                             k,
                             hfrac,
                             trend,
                             level,
                             backend='opencl',
                             verbose=1,
                             device_id=0):
        '''Set parameters, see bfast for what they do.. okay we should say this here
        
        parameters:
        -----------
        
        start_monitor : datetime object
        A datetime object specifying the start of 
        the monitoring phase.
        
        end_monitor: datetime object
        A datetime object specifying the end of 
        the monitoring phase.
        
        start_hist: datetime object
        A datetime object specifying the start of
        the history phase.
        
        freq : int, default 365
            The frequency for the seasonal model.

        k : int, default 3
            The number of harmonic terms.

        hfrac : float, default 0.25
            Float in the interval (0,1) specifying the 
            bandwidth relative to the sample size in 
            the MOSUM/ME monitoring processes.

        trend : bool, default True
            Whether a tend offset term shall be used or not

        level : float, default 0.05
            Significance level of the monitoring (and ROC, 
            if selected) procedure, i.e., probability of 
            type I error.
            
        
        backend : string, either 'opencl' or 'python'
            Chooses what backend to use. opencl uses the GPU
            implementation, which is much faster. 
        
        verbose : int, optional (default=0)
            The verbosity level (0=no output, 1=output)
        '''

        self.start_monitor = start_monitor
        self.end_monitor = end_monitor
        self.start_hist = start_hist
        self.freq = freq
        self.k = k
        self.hfrac = hfrac
        self.trend = trend
        self.level = level
        self.backend = backend
        self.verbose = verbose
        self.device_id = device_id

        self.model = BFASTMonitor(
            self.start_monitor,
            freq=freq,  # add these
            k=k,
            hfrac=hfrac,
            trend=trend,
            level=level,
            backend=backend,
            verbose=verbose,
            device_id=device_id,
        )

        try:
            print("device: ", pyopencl.get_platforms()[0].get_devices())
        except:
            print(
                "You selected  openCL, but no device was found, are you sure you set up a gpu session?"
            )
Esempio n. 7
0
def run_bfast_(backend,
               k=3,
               freq=365,
               trend=False,
               hfrac=0.25,
               level=0.05,
               start_hist=datetime(2002, 1, 1),
               start_monitor=datetime(2010, 1, 1),
               end_monitor=datetime(2018, 1, 1)):

    print("Running the {} backend".format(backend))

    # download and parse input data
    ifile_meta = "data/peru_small/dates.txt"
    ifile_data = "data/peru_small/data.npy"

    if not os.path.isdir("data/peru_small"):
        os.makedirs("data/peru_small")

        if not os.path.exists(ifile_meta):
            url = 'https://sid.erda.dk/share_redirect/fcwjD77gUY/dates.txt'
            wget.download(url, ifile_meta)
        if not os.path.exists(ifile_data):
            url = 'https://sid.erda.dk/share_redirect/fcwjD77gUY/data.npy'
            wget.download(url, ifile_data)

    data_orig = np.load(ifile_data)
    with open(ifile_meta) as f:
        dates = f.read().split('\n')
        dates = [datetime.strptime(d, '%Y-%m-%d') for d in dates if len(d) > 0]

    data, dates = crop_data_dates(data_orig, dates, start_hist, end_monitor)

    # fit BFASTMontiro model
    model = BFASTMonitor(
        start_monitor,
        freq=freq,
        k=k,
        hfrac=hfrac,
        trend=trend,
        level=level,
        backend=backend,
        verbose=0,
        device_id=0,
    )

    #data = data[:,:50,:50]
    start_time = time.time()
    if backend == "opencl":
        model.fit(data, dates, n_chunks=5, nan_value=-32768)
    else:
        model.fit(data, dates, nan_value=-32768)
    end_time = time.time()
    print("All computations have taken {} seconds.".format(end_time -
                                                           start_time))

    # visualize results
    breaks = model.breaks
    means = model.means
    magnitudes = model.magnitudes
    valids = model.valids
    return breaks, means, magnitudes, valids
Esempio n. 8
0
with open(ifile_meta) as f:
    dates = f.read().split('\n')
    dates = [datetime.strptime(d, '%Y-%m-%d') for d in dates if len(d) > 0]

data, dates = crop_data_dates(data_orig, dates, start_hist, end_monitor)
print("First date: {}".format(dates[0]))
print("Last date: {}".format(dates[-1]))
print("Shape of data array: {}".format(data.shape))

# fit BFASTMontiro model
model = BFASTMonitor(
    start_monitor,
    freq=freq,
    k=k,
    hfrac=hfrac,
    trend=trend,
    level=level,
    backend='opencl',
    verbose=1,
    device_id=0,
)

#data = data[:,:50,:50]
start_time = time.time()
model.fit(data, dates, n_chunks=5, nan_value=-32768)
end_time = time.time()
print("All computations have taken {} seconds.".format(end_time - start_time))

# visualize results
breaks = model.breaks
means = model.means
Esempio n. 9
0
# define history and monitoring period and crop input data
from bfast.monitor.utils import crop_data_dates
start_hist = datetime(2002, 1, 1)
start_monitor = datetime(2010, 1, 1)
end_monitor = datetime(2018, 1, 1)
data, dates = crop_data_dates(data_orig, dates, start_hist, end_monitor)
print("First date: {}".format(dates[0]))
print("Last date: {}".format(dates[-1]))
print("Shape of data array: {}".format(data.shape))

# apply BFASTMonitor using the OpenCL backend and the first device (e.g., GPU)
from bfast import BFASTMonitor

model = BFASTMonitor(
            start_monitor,
            freq=365,
            k=3,
            hfrac=0.25,
            trend=False,
            level=0.05,
            backend='opencl',
            device_id=0,
        )
model.fit(data, dates, n_chunks=5, nan_value=-32768)

print("Detected breaks")
# -2 corresponds to not enough data for a pixel
# -1 corresponds to "no breaks detected"
# idx with isx>=0 corresponds to the position of the first break
print(model.breaks)
Esempio n. 10
0
def calc_trend_breakpoints():

    fout = Path('/Users/u0116961/Documents/work/deforestation_paper/breakpoints.csv')

    ds_lai = io('LAI')
    ds_vod = io('SMOS_IC')

    date_from = '2010-01-01'
    date_to = '2019-12-31'

    x0s = np.arange(2010, 2020)
    rss_cols = [f'RSS_LAI_{x0}' for x0 in x0s] + [f'RSS_VOD_{x0}' for x0 in x0s] + \
               ['bfast_lai_b1', 'bfast_lai_b2', 'bfast_lai_m1', 'bfast_lai_m2' ] + \
               ['bfast_vod_b1', 'bfast_vod_b2', 'bfast_vod_m1', 'bfast_vod_m2' ]

    start_date = datetime(2012, 1, 1)
    bfast = BFASTMonitor(
        start_date,
        freq=365,
        k=3,
        hfrac=0.25,
        trend=False,
        level=0.05,
        verbose=0,
        backend='python',
        device_id=0,
    )

    for i, _ in ds_lai.lut.iterrows():
        print(f'{i} / {len(ds_lai.lut)}')

        lai = ds_lai.read('LAI', i, date_from=date_from, date_to=date_to).dropna()

        vod = ds_vod.read('VOD', i, date_from=date_from, date_to=date_to)
        if len(vod) > 0:
            invalid = (ds_vod.read('Flags', i, date_from=date_from, date_to=date_to) > 0) | \
                      (ds_vod.read('RMSE', i, date_from=date_from, date_to=date_to) > 8) | \
                      (ds_vod.read('VOD_StdErr', i, date_from=date_from, date_to=date_to) > 1.2)
            vod[invalid] = np.nan
            vod = vod.dropna()

        if (len(lai) < 1) | (len(vod) < 1):
            continue

        mlai = pd.DataFrame(normalize(lai.resample('M').mean()), columns=['LAI'])
        mvod = pd.DataFrame(normalize(vod.resample('M').mean()), columns=['VOD'])

        rss = pd.DataFrame(columns=rss_cols, index=(i,))

        data = np.reshape(mvod.values, (len(mvod), 1, 1))
        dates = mvod.index.to_pydatetime()
        bfast.fit(data, dates, n_chunks=5, nan_value=-32768)
        rss.loc[i,'bfast_vod_m1'] = bfast.magnitudes[0][0]
        if bfast.breaks[0][0] >= 0:
            dt = dates[dates >= datetime(2011, 1, 1)][bfast.breaks[0][0]]
            rss.loc[i,'bfast_vod_b1'] = dt.year + dt.month/12
        if len(bfast.breaks) > 1:
            dt = dates[dates > datetime(2011, 1, 1)][bfast.breaks.flatten()[1]]
            rss.loc[i,'bfast_vod_b2'] = dt.year + dt.month/12
            rss.loc[i,'bfast_vod_m2'] = bfast.magnitudes.flatten()[1]

        data = np.reshape(mlai.values, (len(mlai), 1, 1))
        dates = mlai.index.to_pydatetime()
        bfast.fit(data, dates, n_chunks=None, nan_value=-32768)
        rss.loc[i,'bfast_lai_m1'] = bfast.magnitudes[0][0]
        if bfast.breaks[0][0] >= 0:
            dt = dates[dates >= start_date][bfast.breaks[0][0]]
            rss.loc[i,'bfast_lai_b1'] = dt.year + dt.month/12
        # if len(bfast.breaks) > 1:
        #     dt = dates[dates > datetime(2011, 1, 1)][bfast.breaks.flatten()[1]]
        #     rss.loc[i,'bfast_lai_b2'] = dt.year + dt.month/12
        #     rss.loc[i,'bfast_lai_m2'] = bfast.magnitudes.flatten()[1]

        for x0 in x0s:

            mlai[f'LAI_trend_{x0}'] = np.nan
            mvod[f'VOD_trend_{x0}'] = np.nan

            lai_l = mlai[date_from:f'{x0}']['LAI']
            lai_u = mlai[f'{x0 + 1}':date_to]['LAI']

            vod_l = mvod[date_from:f'{x0}']['VOD']
            vod_u = mvod[f'{x0 + 1}':date_to]['VOD']

            for lai, vod in zip([lai_l, lai_u], [vod_l, vod_u]):

                try:
                    mk_lai = mk.original_test(lai)
                    mk_vod = mk.original_test(vod)

                    x = (lai.index.year - lai.index.year.min()) * 12 + lai.index.month
                    mlai.loc[lai.index, f'LAI_trend_{x0}'] = mk_lai.intercept + mk_lai.slope * x.values

                    x = (vod.index.year - vod.index.year.min()) * 12 + vod.index.month
                    mvod.loc[vod.index, f'VOD_trend_{x0}'] = mk_vod.intercept + mk_vod.slope * x.values

                except:
                    continue

            rss.loc[i, f'RSS_LAI_{x0}'] = ((mlai[f'LAI'] - mlai[f'LAI_trend_{x0}']) ** 2).sum()
            rss.loc[i, f'RSS_VOD_{x0}'] = ((mvod[f'VOD'] - mvod[f'VOD_trend_{x0}']) ** 2).sum()

        if fout.exists():
            rss.to_csv(fout, float_format='%0.4f', mode='a', header=False)
        else:
            rss.to_csv(fout, float_format='%0.4f')
Esempio n. 11
0
data_orig = numpy.load(ifile_data)
with open(ifile_meta) as f:
    dates = f.read().split('\n')
    dates = [datetime.strptime(d, '%Y-%m-%d') for d in dates if len(d) > 0]

data, dates = crop_data_dates(data_orig, dates, start_hist, end_monitor)
print("First date: {}".format(dates[0]))
print("Last date: {}".format(dates[-1]))
print("Shape of data array: {}".format(data.shape))

# fit BFAST using the CPU implementation (single pixel)
model = BFASTMonitor(start_monitor,
                     freq=freq,
                     k=k,
                     hfrac=hfrac,
                     trend=trend,
                     level=level,
                     backend='python',
                     verbose=1)

# only apply on a small subset
data = data[:, :50, :50]
model.fit(data, dates, nan_value=-32768)

# visualize results
breaks = model.breaks
means = model.means
no_breaks_indices = (breaks == -1)
means[no_breaks_indices] = 0
means[means > 0] = 0
breaks_plot = breaks.astype(numpy.float)