def smooth(self): """ Method to perform a smoothing on a time series """ def __smoothn(_data, s): _smoothed_data = smoothn(_data, isrobust=True, s=s, TolZ=1e-6, axis=0)[0].astype(_data.dtype) return _smoothed_data def __smooth_tsa(_data, _method, s): #fit = _method(_data.astype(float)).fit(smoothing_level=s) fit = _method(_data).fit(smoothing_level=s) # Re-cast to original data type fittedvalues = np.zeros_like(fit.fittedvalues) fittedvalues[0:-1] = fit.fittedvalues[1::] fittedvalues[-1] = y[-1] return fittedvalues # Create output array # Smooth data like a porco! y = self.data[self.dataset_name] # Only for smoothn if self.smoothing_method == 'smoothn': smoothed_data = xr.apply_ufunc(__smoothn, y, self.s, dask='parallelized', output_dtypes=[y.data.dtype]) else: _method = getattr(tsa, self.smoothing_method) smoothed_data = xr.apply_ufunc(__smooth_tsa, y, _method, self.s, dask='parallelized', output_dtypes=[y.data.dtype]) # Copy attributes smoothed_data.attrs = self.data[self.dataset_name].attrs # with ProgressBar(): # smoothed_data = smoothed_data.compute() save_dask_array(fname=self.output_fname, data=smoothed_data, data_var=self.dataset_name, method=self.smoothing_method, progressBar=self.progressBar)
def on_pbClimatology_click(self): """ Save to COGs all the time series analysis products """ # Wait cursor QtWidgets.QApplication.setOverrideCursor(Qt.WaitCursor) # Compute climatology self.ts.climatology() self.progressBar.setEnabled(True) self.progressBar.setValue(1) msg = f"Computing quartiles and saving outliers..." self.progressBar.setFormat(msg) quartiles = [0.25, 0.5, 0.75] n_quartiles = len(quartiles) quartile_names = ['Q1', 'median', 'Q2', 'minimum', 'maximum'] var = self.data_vars.currentText() # Group by day of year to compute quartiles grouped_by_doy = self.ts.data[var].time.groupby("time.dayofyear") # Get the number of time steps n_time_steps = len(grouped_by_doy.groups.keys()) # rows and cols rows, cols = self.ts.data[var].shape[1:] # Create output array # Layers # 0 - Q1 # 1 - Median # 2 - Q3 # 3 - Q1 - 1.5 * IQR # 4 - Q3 + 1.5 * IQR q_data = np.zeros((n_quartiles+2, n_time_steps, rows, cols)) # Store Days of Year doys = [] for i, (doy, _times) in enumerate(grouped_by_doy): self.progressBar.setValue(int((i/len(grouped_by_doy))*100)) doys.append(doy) # Get time series for DoY ts_doy = self.ts.data[var].sel(time=_times.data) # Get quartiles for DoY q_data[0:3,i] = np.quantile(ts_doy, q=quartiles, axis=0) # Get IQR iqr = q_data[2,i] - q_data[0,i] # Get boundaries q_data[3,i] = q_data[0,i] - (1.5 * iqr) q_data[4,i] = q_data[2,i] + (1.5 * iqr) # Upper boundary outliers upper_boundary_outliers = ts_doy < q_data[3,i] upper_boundary_outliers.attrs = ts_doy.attrs fname = (f'{os.path.splitext(self.fname)[0]}' f'_upper_boundary_outliers_DoY_{doy:03d}.tif') save_dask_array(fname=fname, data=upper_boundary_outliers, data_var=var, method=None, n_workers=4) # Lower boundary outliers lower_boundary_outliers = ts_doy < q_data[4,i] lower_boundary_outliers.attrs = ts_doy.attrs fname = (f'{os.path.splitext(self.fname)[0]}' f'_lower_boundary_outliers_DoY_{doy:03d}.tif') save_dask_array(fname=fname, data=lower_boundary_outliers, data_var=var, method=None, n_workers=4) # Save quartiles self.progressBar.setValue(0) msg = f"Saving quartiles..." self.progressBar.setFormat(msg) for i, quartile_name in enumerate(quartile_names): self.progressBar.setValue(int((i/len(quartile_names))*100)) fname = (f'{os.path.splitext(self.fname)[0]}' f'_climatology_quartile_{quartile_name}.tif') tmp_ds = xr.zeros_like(self.ts.climatology_mean) tmp_ds.data = q_data[i] save_dask_array(fname=fname, data=tmp_ds, data_var=var, method=None, n_workers=4) # Save climatology and per-year standard anomalies fname = (f'{os.path.splitext(self.fname)[0]}' f'_climatology_mean.tif') save_dask_array(fname=fname, data=self.ts.climatology_mean, data_var=var, method=None, n_workers=4) fname = (f'{os.path.splitext(self.fname)[0]}' f'_climatology_std.tif') save_dask_array(fname=fname, data=self.ts.climatology_std, data_var=var, method=None, n_workers=4) self.progressBar.setValue(0) msg = f"Saving climatologies and per-year anomalies..." self.progressBar.setFormat(msg) grouped_by_year = self.ts.data[var].time.groupby("time.year") for i, (_year, _times) in enumerate(grouped_by_year): self.progressBar.setValue(int((i/len(grouped_by_year))*100)) # Get time series for year ts_year = self.ts.data[var].sel(time=_times.data) # Anomalies (only for full years) if not len(ts_year.time) == n_time_steps: continue anomalies = (ts_year - self.ts.climatology_mean.data) \ / self.ts.climatology_std.data fname = (f'{os.path.splitext(self.fname)[0]}' f'_anomalies{_year}.tif') anomalies.attrs = ts_year.attrs save_dask_array(fname=fname, data=anomalies, data_var=var, method=None, n_workers=4) self.progressBar.setValue(0) self.progressBar.setEnabled(False) # Standard cursor QtWidgets.QApplication.restoreOverrideCursor()
def on_pbDecomposition_click(self): """ Save decomposition products: Trend, Seasonality, Residuals """ # Wait cursor QtWidgets.QApplication.setOverrideCursor(Qt.WaitCursor) # Annual frequency of peaks and valleys self.__frequency_analysis() self.progressBar.setEnabled(True) msg = f"Computing time series decomposition..." self.progressBar.setFormat(msg) self.progressBar.setValue(1) # Extract period from the current single year period = int(self.bandwidth.currentText()) nobs = len(self.left_ds) # Get data type dtype = self.left_ds.dtype # Get trend based on a moving window trend = self.left_ds.rolling(time=period, min_periods=1, center=True).mean().astype(dtype) trend.attrs = self.left_ds.attrs period_averages = self.left_ds.groupby("time.dayofyear") period_averages = period_averages.mean(axis=0).astype(dtype) if self.model.currentText()[0] == 'a': period_averages -= period_averages.mean(axis=0).astype(dtype) seasonal = np.tile(period_averages.T, nobs // period + 1).T[:nobs] residuals = (self.left_ds - trend - seasonal).astype(dtype) else: period_averages /= period_averages.mean(axis=0).astype(dtype) seasonal = np.tile(period_averages.T, nobs // period + 1).T[:nobs] residuals = (self.left_ds / seasonal / trend).astype(dtype) seasonal = None del(seasonal) period_averages.attrs = self.left_ds.attrs residuals.attrs = self.left_ds.attrs # Save to disk products = [trend, period_averages, residuals] product_names = ['trend', 'seasonality', 'residuals'] var = self.data_vars.currentText() for i, product in enumerate(product_names): fname = (f'{os.path.splitext(self.fname)[0]}' f'_seasonal_decomposition_{product}.tif') msg = f"Computing time series decomposition - {product}..." self.progressBar.setFormat(msg) self.progressBar.setValue(1) save_dask_array(fname=fname, data=products[i], data_var=var, method=None, n_workers=4, progressBar=self.progressBar) self.progressBar.setValue(1) # Delete big arrays onced they are saved period_averages, residuals = None, None del(period_averages, residuals) self.progressBar.setValue(0) self.progressBar.setEnabled(False) # Standard cursor QtWidgets.QApplication.restoreOverrideCursor()
def __frequency_analysis(self): """ Computes the annual frequency of peaks and valleys """ self.progressBar.setEnabled(True) msg = f"Computing one and two-peak annual frequencies..." self.progressBar.setFormat(msg) self.progressBar.setValue(1) # Ouput xarrays peaks = xr.zeros_like(self.left_ds) peaks = peaks.compute() layers, rows, cols = self.left_ds.shape # Set required distance to be consider an independent peak # The assumption is that a peak should occure on a different # season, hence getting all time steps on a single year / 4 distance = int(np.ceil(len(self.single_year_ds.time) / 4)) # TODO Extremely inefficient way to compute peaks and valleys # must be changes for a array-based solution! for c in range(cols): self.progressBar.setValue(int(((c+1) / cols) * 100)) for r in range(rows): idx, _ = find_peaks(self.left_ds[:,r,c], distance=distance) peaks[idx,r,c] = 1 # Get the number of peaks and valleys on a calendar year annual_peaks = peaks.groupby("time.year") annual_peaks = annual_peaks.sum(dim='time').astype(np.int8) # Copy attributes annual_peaks.attrs = self.left_ds.attrs # Get the frequency of one and two peaks n_years = len(annual_peaks.year) freq_one_peak = annual_peaks.where(annual_peaks==1).count(dim='year') freq_one_peak = freq_one_peak / n_years # Copy attributes freq_one_peak.attrs = self.left_ds.attrs # Add time dimension freq_one_peak = freq_one_peak.expand_dims( dim='time', axis=0) freq_two_peak = annual_peaks.where(annual_peaks==2).count(dim='year') freq_two_peak = freq_two_peak / n_years # Copy attributes freq_two_peak.attrs = self.left_ds.attrs # Add time dimension freq_two_peak = freq_two_peak.expand_dims( dim='time', axis=0) msg = f"Saving peaks and valleys frequencies..." self.progressBar.setFormat(msg) self.progressBar.setValue(1) fname = (f'{os.path.splitext(self.fname)[0]}' f'_peaks.tif') save_dask_array(fname=fname, data=annual_peaks, data_var=self.data_vars.currentText(), method=None, n_workers=4, progressBar=self.progressBar) fname = (f'{os.path.splitext(self.fname)[0]}' f'_one_peak_frequency.tif') save_dask_array(fname=fname, data=freq_one_peak, data_var=self.data_vars.currentText(), method=None, n_workers=4, progressBar=self.progressBar) fname = (f'{os.path.splitext(self.fname)[0]}' f'_two_peaks_frequency.tif') save_dask_array(fname=fname, data=freq_two_peak, data_var=self.data_vars.currentText(), method=None, n_workers=4, progressBar=self.progressBar) self.progressBar.setValue(0)
def on_pbMKTest_click(self): """ Compute and save Mann-Kendall test products """ # Wait cursor QtWidgets.QApplication.setOverrideCursor(Qt.WaitCursor) self.progressBar.setEnabled(True) msg = f"Computing Mann-Kendall test..." self.progressBar.setFormat(msg) self.progressBar.setValue(1) # Get trend based on a moving window period = int(self.bandwidth.currentText()) # Get data type dtype = self.left_ds.dtype # Check if we have to subset the data if not self.left_imshow.get_extent() == self.left_p.get_extent(): # Create subset w, e, s, n = self.left_p.get_extent() _data = self.left_ds.sel(longitude=slice(int(w),int(e)), latitude=slice(int(n),int(s))) new_gt = get_geotransform_from_xarray(_data) _data.attrs['transform'] = new_gt else: _data = self.left_ds def __get_z(x, s=None): n = x.shape[2] for k in range(n-1): for j in range(k+1, n): if s is None: s = np.sign(x[:,:,j] - x[:,:,k]) else: s += np.sign(x[:,:,j] - x[:,:,k]) var_s = (n*(n-1)*(2*n+5))/18 z = np.where(s > 0, (s - 1)/np.sqrt(var_s), (s + 1)/np.sqrt(var_s)).astype(np.float32) z[s==0] = 0.0 return z def __get_p(z): # Two tail test p = (2*(1-norm.cdf(abs(z)))).astype(np.float32) return p def __get_h(z, alpha=0.05): h = (abs(z) > norm.ppf(1-alpha/2)).astype(np.int8) return h def __get_trend(z, h): trend = np.where((z < 0) & (h == True), -1, 0).astype(np.int16) trend = np.where((z > 0) & (h == True), 1, trend) return trend z = xr.apply_ufunc(__get_z, _data, input_core_dims=[['time']], dask='parallelized', output_dtypes=[np.float32]) z = z.compute() p = xr.apply_ufunc(__get_p, z, dask='parallelized', output_dtypes=[np.float32]) h = xr.apply_ufunc(__get_h, z, dask='parallelized', output_dtypes=[np.int16]) trend = xr.apply_ufunc(__get_trend, z, h, dask='parallelized', output_dtypes=[np.int16]) # Save products var = self.data_vars.currentText() products = [z, p, h, trend] product_names = ['z', 'p', 'h', 'trend'] for i, product in enumerate(product_names): fname = (f'{os.path.splitext(self.fname)[0]}' f'_mann-kendall_test_{product}.tif') msg = f"Saving Mann-Kendal test - {product}..." self.progressBar.setFormat(msg) self.progressBar.setValue(1) # Set attributes from input data products[i].attrs = _data.attrs # Add time dimension products[i] = products[i].expand_dims( dim='time', axis=0) save_dask_array(fname=fname, data=products[i], data_var=var, method=None, n_workers=4, progressBar=self.progressBar) self.progressBar.setValue(1) self.progressBar.setValue(0) self.progressBar.setEnabled(False) # Standard cursor QtWidgets.QApplication.restoreOverrideCursor()
def on_pbCPD_click(self): """ Compute change points on the detrended time series """ # Wait cursor QtWidgets.QApplication.setOverrideCursor(Qt.WaitCursor) msg = f"Identifying change points..." self.progressBar.setEnabled(True) self.progressBar.setFormat(msg) self.progressBar.setValue(1) # Compute first the trend period = int(self.bandwidth.currentText()) nobs = len(self.left_ds) # Get data type dtype = self.left_ds.dtype # Get trend based on a moving window trend = self.left_ds.rolling(time=period, min_periods=1, center=True).mean().astype(dtype) trend = trend.compute() trend.attrs = self.left_ds.attrs # Output data output = xr.zeros_like(trend).astype(np.int16).load() output.attrs = self.left_ds.attrs layers, rows, cols = trend.shape for x in range(cols): self.progressBar.setValue(int((x / cols) * 100)) for y in range(rows): _data = trend[:,y,x] r_vector = FloatVector(_data) #changepoint_r = self.cpt.cpt_mean(r_vector) #changepoints_r = self.cpt.cpt_var(r_vector, method='PELT', # penalty='Manual', pen_value='2*log(n)') # CPD methods _method = 'BinSeg' _penalty = 'SIC' changepoints_r = self.cpt.cpt_meanvar(r_vector, test_stat='Normal', method=_method, penalty=_penalty) changepoints = numpy2ri.rpy2py( self.cpt.cpts(changepoints_r)).astype(int) if changepoints.shape[0] > 0: output[changepoints+1, y, x] = True fname = (f'{os.path.splitext(self.fname)[0]}' f'_change_points.tif') msg = f"Saving change points..." self.progressBar.setFormat(msg) self.progressBar.setValue(1) save_dask_array(fname=fname, data=output, data_var=self.data_vars.currentText(), method=None, n_workers=4, progressBar=self.progressBar) self.progressBar.setValue(0) self.progressBar.setEnabled(False) # Standard cursor QtWidgets.QApplication.restoreOverrideCursor()