def calc_anom(df, variable=None): """ Calculates anomaly based on climatology for time series. Parameters ---------- df : pandas DataFrame Dataframe containing time series. variable : str Variable to select from DataFrame Returns ------- df : pandas DataFrame Anomaly of time series. """ climatology = calc_climatology(df) if variable is None: variable = df.keys()[0] anom = calc_anomaly(df[variable], climatology=climatology) df[variable] = anom columns = [] for cols in df.columns: columns.append(cols + '_anomaly') df.columns = columns return df
def calc_anom(self, data): if self.columns is None: ite = data else: ite = self.columns for column in ite: clim = calc_climatology(data[column], **self.kwargs) data[column] = calc_anomaly(data[column], climatology=clim) return data
def calc_anom(self, data): if self.columns is None: ite = data else: ite = self.columns for column in ite: data[column] = calc_anomaly(data[column], window_size=self.window_size) return data
def test_anomaly_calc_given_climatology_no_leap_year(): clim = pd.Series(np.arange(366), name='clim', index=np.arange(366) + 1) data = pd.Series( np.arange(365), index=pd.date_range('2007-01-01', periods=365)) anom_should = pd.Series( np.zeros(365), index=pd.date_range('2007-01-01', periods=365)) anom = anomaly.calc_anomaly( data, climatology=clim, respect_leap_years=True) pdt.assert_series_equal(anom_should, anom, check_dtype=False)
def _adapt(self, data): data = super()._adapt(data) if self.columns is None: ite = data else: ite = self.columns for column in ite: clim = calc_climatology(data[column], **self.kwargs) data[column] = calc_anomaly(data[column], climatology=clim) return data
def _adapt(self, data): data = super()._adapt(data) if self.columns is None: ite = data else: ite = self.columns for column in ite: data[column] = calc_anomaly( data[column], window_size=self.window_size ) return data
def plot_innov_ts_example(): lat, lon = 41.70991616028507, -92.39133686043398 io_smap = LDAS_io('ObsFcstAna', exp='US_M36_SMOS40_DA_cal_scaled') # io_smap = LDAS_io('ObsFcstAna', exp='US_M36_SMOS_DA_cal_scaled_yearly') idx_lon, idx_lat = io_smap.grid.lonlat2colrow(lon, lat, domain=True) print(idx_lon, idx_lat) ts = io_smap.timeseries.isel(lat=idx_lat, lon=idx_lon, species=0).to_dataframe()[['obs_fcst', 'obs_obs']].dropna() plt.figure(figsize=(21, 8)) ax = plt.subplot(2, 1, 1) sns.lineplot(data=ts, dashes=False, ax=ax) plt.title(f'{lat:.2f} N, {lon:.2f} W') plt.xlabel('') plt.ylabel('Tb') # ---- cliamtology ---- ts['obs_fcst_clim'] = calc_anomaly(ts['obs_fcst'], return_clim=True, climatology=calc_climatology(ts['obs_fcst']))[ 'climatology'] ts['obs_obs_clim'] = calc_anomaly(ts['obs_obs'], return_clim=True, climatology=calc_climatology(ts['obs_obs']))[ 'climatology'] ts['obs_fcst_seas'] = ts['obs_fcst'] - calc_anomaly(ts['obs_fcst']) ts['obs_obs_seas'] = ts['obs_obs'] - calc_anomaly(ts['obs_obs']) ax = plt.subplot(2, 1, 2) ts['climatology_scaled'] = ts['obs_obs'] - ts['obs_obs_clim'] + ts['obs_fcst_clim'] - ts['obs_fcst'] ts['seasonality_scaled'] = ts['obs_obs'] - ts['obs_obs_seas'] + ts['obs_fcst_seas'] - ts['obs_fcst'] sns.lineplot(data=ts[['climatology_scaled', 'seasonality_scaled']], dashes=False, ax=ax) plt.axhline(color='black', linewidth=1, linestyle='--') plt.xlabel('') plt.ylabel('O-F') plt.tight_layout() plt.show()
import matplotlib.pyplot as plt import os if __name__ == '__main__': cur_path = os.path.abspath(os.path.curdir) input_dataidx_file = os.path.join(cur_path, 'data/ssmi/0673') gpi = 720360 dat_obj = datasets.DatasetTs(input_dataidx_file) print dat_obj.dat_data.dtype print type(dat_obj.dat_data), dat_obj.dat_data.shape gpi_data = dat_obj.read_ts(gpi) print gpi_data.dtype, gpi_data.shape Ser = pd.Series(gpi_data['sm'], index=gpi_data['jd']) # Adjust the parameters climatology = anomaly.calc_climatology(Ser) print climatology climatology.plot() plt.show() anom = anomaly.calc_anomaly(Ser, climatology=climatology) anom.plot() plt.show() print anom
def plot_clim_anom(df, clim=None, axes=None, markersize=0.75, mfc='0.3', mec='0.3', clim_color='0.0', clim_linewidth=0.5, clim_linestyle='-', pos_anom_color='#799ADA', neg_anom_color='#FD8086', anom_linewidth=0.2, add_titles=True): """ Takes a pandas DataFrame and calculates the climatology and anomaly and plots them in a nice way for each column Parameters ---------- df : pandas.DataFrame clim : pandas.DataFrame, optional if given these climatologies will be used if not given then climatologies will be calculated this DataFrame must have the same number of columns as df and also the column names. each climatology must have doy as index. axes : list of matplotlib.Axes, optional list of axes on which each column should be plotted if not given a standard layout is generated markersize : float, optional size of the markers for the datapoints mfc : matplotlib color, optional markerfacecolor, color of the marker face mec : matplotlib color, optional markeredgecolor clim_color : matplotlib color, optional color of the climatology clim_linewidth : float, optional linewidth of the climatology clim_linestyle : string, optional linestyle of the climatology pos_anom_color : matplotlib color, optional color of the positive anomaly neg_anom_color : matplotlib color, optional color of the negative anomaly anom_linewidth : float, optional linewidth of the anomaly lines add_titles : boolean, optional if set each subplot will have it's column name as title Default : True Returns ------- Figure : matplotlib.Figure if no axes were given axes : list of matploblib.Axes if no axes were given """ if type(df) == pd.Series: df = pd.DataFrame(df) nr_columns = len(df.columns) # make own axis if necessary if axes is None: own_axis = True gs = gridspec.GridSpec(nr_columns, 1, right=0.8) fig = plt.figure(num=None, figsize=(6, 2 * nr_columns), dpi=150, facecolor='w', edgecolor='k') last_axis = fig.add_subplot(gs[nr_columns - 1]) axes = [] for i, grid in enumerate(gs): if i < nr_columns - 1: ax = fig.add_subplot(grid, sharex=last_axis) axes.append(ax) ax.xaxis.set_visible(False) axes.append(last_axis) else: own_axis = False for i, column in enumerate(df): Ser = df[column] ax = axes[i] if clim is None: clima = anom.calc_climatology(Ser) else: clima = pd.Series(clim[column]) anomaly = anom.calc_anomaly(Ser, climatology=clima, return_clim=True) anomaly[Ser.name] = Ser anomaly = anomaly.dropna() pos_anom = anomaly[Ser.name].values > anomaly['climatology'].values neg_anom = anomaly[Ser.name].values < anomaly['climatology'].values ax.plot(anomaly.index, anomaly[Ser.name].values, 'o', markersize=markersize, mfc=mfc, mec=mec) ax.plot(anomaly.index, anomaly['climatology'].values, linestyle=clim_linestyle, color=clim_color, linewidth=clim_linewidth) ax.fill_between(anomaly.index, anomaly[Ser.name].values, anomaly['climatology'].values, interpolate=True, where=pos_anom, color=pos_anom_color, linewidth=anom_linewidth) ax.fill_between(anomaly.index, anomaly[Ser.name].values, anomaly['climatology'].values, interpolate=True, where=neg_anom, color=neg_anom_color, linewidth=anom_linewidth) if add_titles: ax.set_title(column) if own_axis: return fig, axes else: return None, None
def compare_data(ismn_data, validation_data, scaling='linreg', anomaly=None): """ Compare data from an ISMN station to the defined validation datasets. Parameters ---------- ismn_data: pandas.Dataframe Data from the ISMN used as a reference validation_data: dict Dictionary of pandas.DataFrames, One for each dataset to compare against scaling: string, optional Scaling method to use. anomaly: string If set then the validation is done for anomalies. """ insitu_label = 'soil moisture' if anomaly != None: if anomaly == 'climatology': ascat_clim = anomaly_calc.calc_climatology( ascat_masked[ascat_label]) insitu_clim = anomaly_calc.calc_climatology( ismn_data['soil moisture']) ascat_anom = anomaly_calc.calc_anomaly(ascat_masked[ascat_label], climatology=ascat_clim) ascat_masked[ascat_label] = ascat_anom.values insitu_anom = anomaly_calc.calc_anomaly(ISMN_data['insitu'], climatology=insitu_clim) ISMN_data['insitu'] = insitu_anom.values if anomaly == 'average': ascat_anom = anomaly_calc.calc_anomaly(ascat_masked[ascat_label]) ascat_masked[ascat_label] = ascat_anom.values insitu_anom = anomaly_calc.calc_anomaly(ISMN_data['insitu']) ISMN_data['insitu'] = insitu_anom.values ascat_masked = ascat_masked.dropna() ISMN_data = ISMN_data.dropna() for dname in validation_data: vdata = validation_data[dname] vdata_label = 'cci_sm' matched_data = temp_match.matching(ismn_data, vdata, window=1) if scaling != 'noscale' and scaling != 'porosity': scaled_data = scale.add_scaled(matched_data, label_in=vdata_label, label_scale=insitu_label, method=scaling) scaled_label = vdata_label + '_scaled_' + scaling scaled_data = scaled_data[[insitu_label, scaled_label]] elif scaling == 'noscale': scaled_data = matched_data[[insitu_label, vdata_label]] scaled_label = vdata_label # scaled_data.rename(columns={'insitu': ISMN_ts_name}, inplace=True) labels, values = scaled_data.to_dygraph_format() ascat_insitu = {'labels': labels, 'data': values} x, y = scaled_data[insitu_label].values, scaled_data[scaled_label].values kendall, p_kendall = sc_stats.kendalltau(x.tolist(), y.tolist()) spearman, p_spearman = sc_stats.spearmanr(x, y) pearson, p_pearson = sc_stats.pearsonr(x, y) rmsd = metrics.rmsd(x, y) bias = metrics.bias(y, x) mse, mse_corr, mse_bias, mse_var = metrics.mse(x, y) statistics = { 'kendall': { 'v': '%.2f' % kendall, 'p': '%.4f' % p_kendall }, 'spearman': { 'v': '%.2f' % spearman, 'p': '%.4f' % p_spearman }, 'pearson': { 'v': '%.2f' % pearson, 'p': '%.4f' % p_pearson }, 'bias': '%.4f' % bias, 'rmsd': { 'rmsd': '%.4f' % np.sqrt(mse), 'rmsd_corr': '%.4f' % np.sqrt(mse_corr), 'rmsd_bias': '%.4f' % np.sqrt(mse_bias), 'rmsd_var': '%.4f' % np.sqrt(mse_var) }, 'mse': { 'mse': '%.4f' % mse, 'mse_corr': '%.4f' % mse_corr, 'mse_bias': '%.4f' % mse_bias, 'mse_var': '%.4f' % mse_var } } scaling_options = { 'noscale': 'No scaling', 'porosity': 'Scale using porosity', 'linreg': 'Linear Regression', 'mean_std': 'Mean - standard deviation', 'min_max': 'Minimum,maximum', 'lin_cdf_match': 'Piecewise <br> linear CDF matching', 'cdf_match': 'CDF matching' } settings = { 'scaling': scaling_options[scaling], # 'snow_depth': mask['snow_depth'], # 'surface_temp': mask['st_l1'], # 'air_temp': mask['air_temp'] } era_data = {'labels': [], 'data': []} output_data = { 'validation_data': ascat_insitu, 'masking_data': era_data, 'statistics': statistics, 'settings': settings } return output_data, 1
# Calculate SWI T=10 ts_ascat_sm['swi_t10'] = exp_filter(ts_ascat_sm['sm'].values, jd, ctime=10) ts_ascat_sm['swi_t50'] = exp_filter(ts_ascat_sm['sm'].values, jd, ctime=50) fig1, ax = plt.subplots(1, 1, figsize=(15, 5)) ts_ascat_sm['sm'].plot(ax=ax, alpha=0.4, marker='o', color='#00bfff', label='SSM') ts_ascat_sm['swi_t10'].plot(ax=ax, lw=2, label='SWI T=10') ts_ascat_sm['swi_t50'].plot(ax=ax, lw=2, label='SWI T=50') plt.legend() # Calculate anomaly based on moving +- 17 day window anomaly_ascat = anomaly.calc_anomaly(ts_ascat['sm'], window_size=35) fig2, ax = plt.subplots(1, 1, figsize=(15, 5)) anomaly_ascat.plot(ax=ax, lw=2, label='ASCAT SM Anomaly') plt.legend() # Calculate climatology ts_ascat_clim = ts_ascat.dropna() climatology_ascat = anomaly.calc_climatology(ts_ascat_clim['sm']) fig3, ax = plt.subplots(1, 1, figsize=(15, 5)) climatology_ascat.plot(ax=ax, lw=2, label='ASCAT SM Climatology') plt.legend() # Calculate anomaly based on climatology ts_ascat_clim = ts_ascat.dropna() anomaly_clim_ascat = anomaly.calc_anomaly(ts_ascat_clim['sm'], climatology=climatology_ascat)
if calcstd == True: s1_ts_std = s1_ts_std[start:end] if len(s1_ts) < 1: continue s1_ts_res = s1_ts.resample('D').mean() station_ts_res = station_ts.resample('D').mean() if calc_anomalies == True: from pytesmo.time_series import anomaly as pyan s1_clim = pyan.calc_climatology(s1_ts_res.interpolate()) station_clim = pyan.calc_climatology( station_ts_res.interpolate()) s1_ts = pyan.calc_anomaly(s1_ts, climatology=s1_clim) s1_ts_res = pyan.calc_anomaly(s1_ts_res, climatology=s1_clim) station_ts = pyan.calc_anomaly(station_ts, climatology=station_clim) station_ts_res = pyan.calc_anomaly(station_ts_res, climatology=station_clim) # calculate error metrics ts_bias = s1_ts_res.subtract(station_ts_res).mean() tobemerged = [s1_ts_res.dropna(), station_ts_res.dropna()] s1_and_station = pd.concat(tobemerged, axis=1, join='inner') ts_bias = s1_and_station[0].subtract(s1_and_station[1]).median() xytmp = pd.concat( {
bcksct_rng = bcksct_max - bcksct_min s1pd_scaled = (s1pd - bcksct_min) / bcksct_rng s1pd_scaled = (s1pd_scaled * 35.) + 5. # s1pd_scaled = (s1pd - bcksct_min) / bcksct_rng # s1pd_scaled = (s1pd_scaled * 15.) + 3. s1pd_scaled = s1pd_scaled.resample('D').mean() s1pd_scaled.interpolate(inplace=True) # anomalies #bcksct_mean = s1pd_scaled.mean() #s1pd_anom = (s1pd_scaled / bcksct_mean) - 1 climatology = anomaly.calc_climatology(s1pd_scaled, moving_avg_orig=5, moving_avg_clim=30) s1pd_anom = anomaly.calc_anomaly(s1pd_scaled, climatology=climatology) plt.figure(figsize=(6.3, 3.7)) s1pd_scaled.plot() plt.xlabel('Date', fontsize=10) plt.ylabel('SMC [m3/m-3]') plt.ylim((0, 50)) plt.show() plt.savefig('/mnt/SAT/Workspaces/GrF/02_Documents/ISRSE2017/s1ts.png', dpi=300) plt.close() plt.figure(figsize=(6.3, 3.7)) s1pd_anom.plot() timex = pd.date_range('30/9/2014', '30/4/2017', freq='D') zerolist = pd.Series([0] * len(timex), index=timex) zerolist.plot(linestyle='--', color='r')
def mazia_vaildation_run(bsize=500, name='500m', fvect1=None, fvect2=None): # outpath outpath = '//projectdata.eurac.edu/projects/ESA_TIGER/S1_SMC_DEV/Processing/S1ALPS/ISMN/S1AB_' + name + '_reprocess_lt_05/w_GLDAS_station_validation/' # Calculate prediction standar deviations calcstd = False # use descending orbits desc = False calc_anomalies = False # initialise S1 SM retrieval # mlmodel = pickle.load(open('/mnt/SAT/Workspaces/GrF/Processing/S1ALPS/ASCAT/gee/mlmodel0.p', 'rb')) mlmodel = "//projectdata.eurac.edu/projects/ESA_TIGER/S1_SMC_DEV/Processing/S1ALPS/ISMN/S1AB_" + name + "_reprocess_lt_05/w_GLDAS_RFmlmodelNoneSVR_2step.p" # initialse text report txtrep = open(outpath + '2_Mazia_report.txt', 'w') txtrep.write( 'Accuracy report for Soil Moisture validation based on ISMN stations\n\n' ) txtrep.write('Model used: ' + mlmodel + '\n') txtrep.write( '------------------------------------------------------------------------\n\n' ) txtrep.write('Name, R, RMSE\n') xyplot = pd.DataFrame() cntr = 1 # define mazia station locations m_stations = { 'I1': [10.57978, 46.68706], 'I3': [10.58359, 46.68197], 'P1': [10.58295, 46.68586], 'P2': [10.58525, 46.68433], 'P3': [10.58562, 46.68511] } m_station_paths = '//projectdata.eurac.edu/projects/ESA_TIGER/S1_SMC_DEV/01_Data/InSitu/MaziaValley_SWC_2015_16/' s1_ts_list = list() station_ts_list = list() station_name_list = list() gldas_ts_list = list() for vstation in m_stations: st_name = vstation st_coordinates = m_stations[vstation] try: # get in situ data full_path2015 = m_station_paths + st_name + '_YEAR_2015.csv' insitu2015 = pd.read_csv(full_path2015, header=0, skiprows=[0, 2, 3], index_col=0, parse_dates=True, sep=',') full_path2016 = m_station_paths + st_name + '_YEAR_2016.csv' insitu2016 = pd.read_csv(full_path2016, header=0, skiprows=[0, 2, 3], index_col=0, parse_dates=True, sep=',') insitu = insitu2015.append(insitu2016) # get station ts station_ts = pd.Series( insitu[['SWC_02_A_Avg', 'SWC_02_B_Avg', 'SWC_02_C_Avg']].mean(axis=1)) plotpath = outpath + st_name + '.png' s1_ts, s1_ts_std, outliers = extract_time_series_gee( mlmodel, mlmodel, '/mnt/SAT4/DATA/S1_EODC/', outpath, st_coordinates[1], st_coordinates[0], name=st_name, footprint=bsize, calcstd=calcstd, desc=desc, target=station_ts, feature_vect1=fvect1, feature_vect2=fvect2) # , if s1_ts is None: continue if len(s1_ts) < 5: continue gldas_ts = extr_GLDAS_ts_GEE(st_coordinates[1], st_coordinates[0], bufferSize=150, yearlist=[2015, 2016]) gldas_ts = gldas_ts / 100. start = np.array([s1_ts.index[0], station_ts.index[0]]).max() end = np.array([s1_ts.index[-1], station_ts.index[-1]]).min() if start > end: continue station_ts = station_ts[start:end] s1_ts = s1_ts[start:end] gldas_ts = gldas_ts[start:end] if calcstd == True: s1_ts_std = s1_ts_std[start:end] if len(s1_ts) < 1: continue s1_ts_res = s1_ts.resample('D').mean() station_ts_res = station_ts.resample('D').mean() if calc_anomalies == True: from pytesmo.time_series import anomaly as pyan s1_clim = pyan.calc_climatology(s1_ts_res.interpolate()) station_clim = pyan.calc_climatology( station_ts_res.interpolate()) s1_ts = pyan.calc_anomaly(s1_ts, climatology=s1_clim) s1_ts_res = pyan.calc_anomaly(s1_ts_res, climatology=s1_clim) station_ts = pyan.calc_anomaly(station_ts, climatology=station_clim) station_ts_res = pyan.calc_anomaly(station_ts_res, climatology=station_clim) # calculate error metrics ts_bias = s1_ts_res.subtract(station_ts_res).mean() tobemerged = [s1_ts_res.dropna(), station_ts_res.dropna()] s1_and_station = pd.concat(tobemerged, axis=1, join='inner') ts_bias = s1_and_station[0].subtract(s1_and_station[1]).median() xytmp = pd.concat( { 'y': s1_and_station[0] - ts_bias, 'x': s1_and_station[1] }, join='inner', axis=1) if cntr == 1: xyplot = xytmp else: xyplot = pd.concat([xyplot, xytmp], axis=0) cntr = cntr + 1 ts_cor = s1_and_station[0].corr(s1_and_station[1]) ts_rmse = np.sqrt( np.nanmean( np.square(s1_and_station[0].subtract(s1_and_station[1])))) ts_ubrmse = np.sqrt( np.sum( np.square((s1_and_station[0] - s1_and_station[0].mean()) - (s1_and_station[1] - s1_and_station[1].mean()))) / len(s1_and_station[0])) print('R: ' + str(ts_cor)) print('RMSE: ' + str(ts_rmse)) print('Bias: ' + str(ts_bias)) txtrep.write(st_name + ', ' + str(ts_cor) + ', ' + str(ts_rmse) + '\n') s1_ts_list.append(s1_ts) station_ts_list.append(station_ts) station_name_list.append(st_name) gldas_ts_list.append(gldas_ts) # plot fig, ax1 = plt.subplots(figsize=(7.16, 1.4), dpi=300) line1, = ax1.plot(s1_ts.index, s1_ts, color='b', linestyle='-', marker='+', label='Sentinel-1', linewidth=0.2) line2, = ax1.plot(station_ts.index, station_ts, label='In-Situ', linewidth=0.4) if np.any(outliers) and outliers is not None: line6, = ax1.plot(s1_ts.index[outliers], s1_ts.iloc[outliers], color='r', linestyle='', marker='o') if calcstd == True: line4, = ax1.plot(s1_ts.index, s1_ts - np.sqrt(s1_ts_std), color='k', linestyle='--', linewidth=0.2) line5, = ax1.plot(s1_ts.index, s1_ts + np.sqrt(s1_ts_std), color='k', linestyle='--', linewidth=0.2) line6, = ax1.plot(gldas_ts.index, gldas_ts, color='g', linestyle='--', label='GLDAS', linewidth=0.2) ax1.set_ylabel('Soil Moisture [m3m-3]', size=8) smc_max = np.max([s1_ts.max(), station_ts.max()]) if smc_max <= 0.5: smc_max = 0.5 ax1.set_ylim((0, smc_max)) ax1.text( 0.85, 0.4, 'R=' + '{:03.2f}'.format(ts_cor) + # '\nRMSE=' + '{:03.2f}'.format(ts_rmse) + '\nBias=' + '{:03.2f}'.format(ts_bias) + '\nubRMSE=' + '{:03.2f}'.format(ts_ubrmse), transform=ax1.transAxes, fontsize=8) plt.title(st_name, fontsize=8) plt.tight_layout() plt.savefig(plotpath, dpi=300) plt.close() except: print('No data for: ' + st_name) pickle.dump( (s1_ts_list, station_ts_list, gldas_ts_list, station_name_list), open( 'C:/Users/FGreifeneder/OneDrive - Scientific Network South Tyrol/1_THESIS/pub3/images_submission2/w_GLDAS_validation_tss_mazia' + name + '.p', 'wb')) urmse_scatter = np.sqrt( np.sum( np.square((xyplot['y'] - xyplot['y'].mean()) - (xyplot['x'] - xyplot['x'].mean()))) / len(xyplot['y'])) rmse_scatter = np.sqrt( np.nanmean(np.square(xyplot['x'].subtract(xyplot['y'])))) r_scatter = xyplot['x'].corr(xyplot['y']) # plt.figure(figsize=(3.5, 3), dpi=600) xyplot.plot.scatter(x='x', y='y', color='k', xlim=(0, 1), ylim=(0, 1), figsize=(3.5, 3), s=1, marker='.') plt.xlim(0, 0.7) plt.ylim(0, 0.7) plt.xlabel("$SMC_{Tot}$ [m$^3$m$^{-3}$]", size=8) plt.ylabel("$SMC^*_{Tot}$ [m$^3$m$^{-3}$]", size=8) plt.plot([0, 0.7], [0, 0.7], 'k--') plt.text(0.1, 0.5, 'R=' + '{:03.2f}'.format(r_scatter) + '\nRMSE=' + '{:03.2f}'.format(rmse_scatter), fontsize=8) # + # '\nRMSE=' + '{:03.2f}'.format(rmse_scatter), fontsize=8) plt.tick_params(labelsize=8) plt.title('True vs. estimated SMC', size=8) plt.axes().set_aspect('equal', 'box') plt.tight_layout() plt.savefig(outpath + '1_Mazia_scatterplot.png', dpi=600) plt.close() txtrep.write( '------------------------------------------------------------------------\n\n' ) txtrep.write('Overall performance:\n') txtrep.write('R = ' + str(xyplot['x'].corr(xyplot['y'])) + '\n') txtrep.write( 'RMSE = ' + str(np.sqrt(np.nanmean(np.square(xyplot['x'].subtract(xyplot['y'])))))) txtrep.write('ubRMSE = ' + str( np.sqrt( np.sum( np.square((xyplot['y'] - xyplot['y'].mean()) - (xyplot['x'] - xyplot['x'].mean()))) / len(xyplot['y'])))) txtrep.close()
def ismn_validation_run(bsize=500, name='500m', fvect1=None, fvect2=None, fvect1desc=None, fvect2desc=None): s1path = 117 #bsize=250 basepath = '//projectdata.eurac.edu/projects/ESA_TIGER/S1_SMC_DEV/Processing/S1ALPS/ISMN/S1AB_' + name + '_reprocess_lt_05/' # outpath outpath = basepath + 'w_GLDAS_asc_desc/' # Calculate prediction standar deviations calcstd = False # use descending orbits desc = False # calculate anomalies? calc_anomalies = False # initialise available ISMN data ismn = ismn_interface.ISMN_Interface( 'T:/ECOPOTENTIAL/reference_data/ISMN/') # get list of networks networks = ismn.list_networks() # initialise S1 SM retrieval # mlmodel = pickle.load(open('/mnt/SAT/Workspaces/GrF/Processing/S1ALPS/ASCAT/gee/mlmodel0.p', 'rb')) mlmodel_avg = "//projectdata.eurac.edu/projects/ESA_TIGER/S1_SMC_DEV/Processing/S1ALPS/ISMN/S1AB_1km_reprocess_lt_05/no_GLDAS_RFmlmodelNoneSVR_2step.p" mlmodel = basepath + "w_GLDAS_RFmlmodelNoneSVR_2step.p" mlmodel_desc = "//projectdata.eurac.edu/projects/ESA_TIGER/S1_SMC_DEV/Processing/S1ALPS/ISMN/S1AB_50m_reprocess_lt_05_descending/w_GLDAS_RFmlmodelNoneSVR_2step.p" # initialse text report txtrep = open(outpath + '2_report.txt', 'w') txtrep.write( 'Accuracy report for Soil Moisture validation based on ISMN stations\n\n' ) txtrep.write('Model used: ' + mlmodel + '\n') txtrep.write( '------------------------------------------------------------------------\n\n' ) txtrep.write('Name, R, RMSE\n') xyplot = pd.DataFrame() cntr = 1 #used_stations = np.load('X:/Workspaces/GrF/Processing/S1ALPS/ISMN/gee_global_all_no_deep/ValidStaions.npy') used_stations = pickle.load(open(basepath + "testset_meta.p", 'rb')) #invalid_col = pickle.load(open('/mnt/SAT/Workspaces/GrF/Processing/S1ALPS/ISMN/gee_global005_highdbtollerance/invalid_col.p', 'rb')) #used_stations = [invalid_col['ntwkname'][i] + ', ' + invalid_col['stname'][i] for i in range(len(invalid_col['ntwkname']))] #for ntwk in networks: #for vstation in used_stations: s1_ts_list = list() station_ts_list = list() station_name_list = list() gldas_ts_list = list() s1_failed_list = list() s1_ub_list = list() for st_i in range(len(used_stations[0])): #for st_i in [0]: #ntwk, st_name = [x.strip() for x in vstation.split(',')] ntwk = used_stations[1][st_i] st_name = used_stations[0][st_i] # if st_name != 'ROSASCYN':# and st_name != 'Salem-10-W': # continue # get list of available stations available_stations = ismn.list_stations(ntwk) # iterate through all available ISMN stations #for st_name in available_stations: try: station = ismn.get_station(st_name, ntwk) station_vars = station.get_variables() # if st_name != '2.10': # continue # if (st_name != 'Boulder-14-W'): # continue #if st_name not in ['ANTIMONYFL', 'CALIVALLEY', 'Bethlehem']: # if st_name not in ['CALIVALLEY']: # continue if 'soil moisture' not in station_vars: continue station_depths = station.get_depths('soil moisture') # if 0.0 in station_depths[0]: # sm_sensors = station.get_sensors('soil moisture', depth_from=0, depth_to=0.24) # station_ts = station.read_variable('soil moisture', depth_from=0, depth_to=0.24, sensor=sm_sensors[0]) # else: # continue if 0.0 in station_depths[0]: did = np.where(station_depths[0] == 0.0) dto = station_depths[1][did] sm_sensors = station.get_sensors('soil moisture', depth_from=0, depth_to=dto[0]) print(sm_sensors[0]) station_ts = station.read_variable('soil moisture', depth_from=0, depth_to=dto[0], sensor=sm_sensors[0]) elif 0.05 in station_depths[0]: sm_sensors = station.get_sensors('soil moisture', depth_from=0.05, depth_to=0.05) station_ts = station.read_variable('soil moisture', depth_from=0.05, depth_to=0.05, sensor=sm_sensors[0]) else: continue print(st_name) plotpath = outpath + st_name + '.png' # if os.path.exists(plotpath): # continue # get station ts station_ts = station_ts.data['soil moisture'] # get S1 time series s1_ts, s1_ts_std, outliers, s1_failed = extract_time_series_gee( mlmodel, mlmodel_avg, '/mnt/SAT4/DATA/S1_EODC/', outpath, station.latitude, station.longitude, name=st_name, footprint=bsize, calcstd=calcstd, desc=desc, target=station_ts, feature_vect1=fvect1, feature_vect2=fvect2) #, #s1path=s1path) s1_ts2, s1_ts_std2, outliers2, s1_failed2 = extract_time_series_gee( mlmodel_desc, mlmodel_avg, '/mnt/SAT4/DATA/S1_EODC/', outpath, station.latitude, station.longitude, name=st_name, footprint=bsize, calcstd=calcstd, desc=True, target=station_ts, feature_vect1=fvect1desc, feature_vect2=fvect2desc) # , if (s1_ts is not None) and (s1_ts2 is not None): # correct the mean offset between time-series from ascending and descending orbits meandiff = s1_ts.mean() - s1_ts2.mean() s1_ts2 = s1_ts2 + meandiff meandiff_failed = s1_failed.median() - s1_failed2.median() s1_failed2 = s1_failed2 + meandiff_failed s1_ts = pd.concat([s1_ts, s1_ts2]) s1_failed = pd.concat([s1_failed, s1_failed2]) elif (s1_ts is None) and (s1_ts2 is not None): s1_ts = s1_ts2 s1_failed = s1_failed2 s1_ts.sort_index(inplace=True) s1_failed.sort_index(inplace=True) if s1_ts is None: continue if len(s1_ts) < 5: continue #evi_ts = extr_MODIS_MOD13Q1_ts_GEE(station.longitude, station.latitude, bufferSize=150) #evi_ts = pd.Series(evi_ts[1]['EVI'], index=evi_ts[0]) gldas_ts = extr_GLDAS_ts_GEE( station.longitude, station.latitude, bufferSize=150, yearlist=[2014, 2015, 2016, 2017, 2018, 2019]) gldas_ts = gldas_ts / 100. start = np.array([s1_ts.index[0], station_ts.index[0]]).max() end = np.array([s1_ts.index[-1], station_ts.index[-1]]).min() if start > end: continue station_ts = station_ts[start:end] s1_ts = s1_ts[start:end] s1_failed = s1_failed[start:end] #outliers = outliers[start:end] #evi_ts = evi_ts[start:end] gldas_ts = gldas_ts[start:end] if calcstd == True: s1_ts_std = s1_ts_std[start:end] if len(s1_ts) < 1: continue #station_ts = station_ts.iloc[np.where(station_ts > 0.1)] #s1_ts = s1_ts[np.where(error_ts == error_ts.min())[0]] s1_ts_res = s1_ts.resample('D').mean().rename('s1') station_ts_res = station_ts.resample('D').mean().rename('ismn') gldas_ts_res = gldas_ts.resample('D').mean().rename('gldas') if calc_anomalies == True: from pytesmo.time_series import anomaly as pyan s1_clim = pyan.calc_climatology(s1_ts_res.interpolate()) station_clim = pyan.calc_climatology( station_ts_res.interpolate()) s1_ts = pyan.calc_anomaly(s1_ts, climatology=s1_clim) s1_ts_res = pyan.calc_anomaly(s1_ts_res, climatology=s1_clim) station_ts = pyan.calc_anomaly(station_ts, climatology=station_clim) station_ts_res = pyan.calc_anomaly(station_ts_res, climatology=station_clim) # calculate error metrics ts_bias = s1_ts_res.subtract(station_ts_res).mean() # cdf matching tobemerged = [ s1_ts_res.dropna(), gldas_ts_res.dropna(), station_ts_res.dropna() ] s1_and_station = pd.concat(tobemerged, axis=1, join='inner') statmask = (s1_and_station['ismn'] > 0) & ( s1_and_station['ismn'] < 1) & (s1_and_station['s1'] > 0) & (s1_and_station['s1'] < 1) p2 = s1_and_station['ismn'][statmask].std( ) / s1_and_station['s1'][statmask].std() p1 = s1_and_station['ismn'][statmask].mean() - ( p2 * s1_and_station['s1'][statmask].mean()) s1_ts_ub = p1 + (p2 * s1_ts) s1_and_station['s1ub'] = p1 + (p2 * s1_and_station['s1']) ts_bias = s1_and_station['s1ub'].subtract( s1_and_station['ismn']).median() s1_failed = p1 + (p2 * s1_failed) xytmp = pd.concat( { 'y': s1_and_station['s1ub'], 'x': s1_and_station['ismn'] }, join='inner', axis=1) if cntr == 1: xyplot = xytmp else: xyplot = pd.concat([xyplot, xytmp], axis=0) cntr = cntr + 1 ts_cor = s1_and_station['s1ub'].corr(s1_and_station['ismn']) ts_rmse = np.sqrt( np.nanmean( np.square(s1_and_station['s1ub'].subtract( s1_and_station['ismn'])))) #ts_ubrmse = np.sqrt(np.sum(np.square(s1_ts_res.subtract(s1_ts_res.mean()).subtract(station_ts_res.subtract(station_ts_res.mean()))))) ts_ubrmse = np.sqrt( np.sum( np.square((s1_and_station['s1ub'] - s1_and_station['s1ub'].mean()) - (s1_and_station['ismn'] - s1_and_station['ismn'].mean()))) / len(s1_and_station['s1ub'])) print('R: ' + str(ts_cor)) print('RMSE: ' + str(ts_rmse)) print('Bias: ' + str(ts_bias)) txtrep.write(st_name + ', ' + str(ts_cor) + ', ' + str(ts_rmse) + '\n') s1_ts_list.append(s1_ts) station_ts_list.append(station_ts) station_name_list.append(st_name) gldas_ts_list.append(gldas_ts) s1_failed_list.append(s1_failed) s1_ub_list.append(s1_ts_ub) # plot # plt.figure(figsize=(18, 6)) fig, ax1 = plt.subplots(figsize=(7.16, 1.4), dpi=300) line1, = ax1.plot(s1_ts_ub.index, s1_ts_ub, color='b', linestyle='', marker='+', label='Sentinel-1', linewidth=0.2) line8, = ax1.plot(s1_failed.index, s1_failed, color='r', linestyle='', marker='+', label='fail', linewidth=0.2) line2, = ax1.plot(station_ts.index, station_ts, label='In-Situ', linewidth=0.4) if np.any(outliers) and outliers is not None: line6, = ax1.plot(s1_ts.index[outliers], s1_ts.iloc[outliers], color='r', linestyle='', marker='o') #line3, = ax1.plot(outliers.index, outliers, color='r', linestyle='', marker='*', label='Outliers') if calcstd == True: line4, = ax1.plot(s1_ts.index, s1_ts - np.sqrt(s1_ts_std), color='k', linestyle='--', linewidth=0.2) line5, = ax1.plot(s1_ts.index, s1_ts + np.sqrt(s1_ts_std), color='k', linestyle='--', linewidth=0.2) #ax3 = ax1.twinx() line6, = ax1.plot(gldas_ts.index, gldas_ts, color='g', linestyle='--', label='GLDAS', linewidth=0.2) #line3, = ax3.plot(evi_ts.index, evi_ts, linewidth=0.4, color='r', linestyle='--', label='MOD13Q1:EVI') # ax3.axes.tick_params(axis='y', direction='in', labelcolor='r', right='off', labelright='off') ax1.set_ylabel('Soil Moisture [m3m-3]', size=8) smc_max = np.max([s1_ts.max(), station_ts.max()]) if smc_max <= 0.5: smc_max = 0.5 ax1.set_ylim((0, smc_max)) ax1.text( 0.85, 0.4, 'R=' + '{:03.2f}'.format(ts_cor) + #'\nRMSE=' + '{:03.2f}'.format(ts_rmse) + '\nBias=' + '{:03.2f}'.format(ts_bias) + '\nRMSE=' + '{:03.2f}'.format(ts_rmse), transform=ax1.transAxes, fontsize=8) # ax2.set_ylabel('In-Situ [m3/m3]') #ax3.set_ylabel('EVI') #fig.tight_layout() #plt.legend(handles=[line1, line2], loc='upper left', fontsize=8)#, line3, line6]) plt.title(st_name, fontsize=8) #plt.show() plt.tight_layout() plt.savefig(plotpath, dpi=300) plt.close() except: print('No data for: ' + st_name) pickle.dump( (s1_ts_list, s1_ub_list, s1_failed, station_ts_list, gldas_ts_list, station_name_list), open( 'C:/Users/FGreifeneder/OneDrive - Scientific Network South Tyrol/1_THESIS/pub3/images_submission2/w_GLDAS_validation_tss_' + name + '.p', 'wb')) scatter_valid = np.where(((xyplot['x'] > 0) & (xyplot['x'] < 1)) & ((xyplot['y'] > 0) & (xyplot['y'] < 1))) xyplot = xyplot.iloc[scatter_valid] urmse_scatter = np.sqrt( np.sum( np.square((xyplot['y'] - xyplot['y'].mean()) - (xyplot['x'] - xyplot['x'].mean()))) / len(xyplot['y'])) rmse_scatter = np.sqrt( np.nanmean(np.square(xyplot['x'].subtract(xyplot['y'])))) r_scatter = xyplot['x'].corr(xyplot['y']) #plt.figure(figsize=(3.5, 3), dpi=600) xyplot.plot.scatter(x='x', y='y', color='k', xlim=(0, 1), ylim=(0, 1), figsize=(3.5, 3), s=1, marker='*') plt.xlim(0, 1) plt.ylim(0, 1) plt.xlabel("$SMC_{Tot}$ [m$^3$m$^{-3}$]", size=8) plt.ylabel("$SMC^*_{Tot}$ [m$^3$m$^{-3}$]", size=8) plt.plot([0, 1], [0, 1], 'k--', linewidth=0.8) plt.text(0.1, 0.5, 'R=' + '{:03.2f}'.format(r_scatter) + '\nRMSE=' + '{:03.2f}'.format(rmse_scatter), fontsize=8) # + #'\nRMSE=' + '{:03.2f}'.format(rmse_scatter), fontsize=8) plt.tick_params(labelsize=8) plt.title('True vs. estimated SMC', size=8) plt.axes().set_aspect('equal', 'box') plt.tight_layout() plt.savefig(outpath + '1_scatterplot.png', dpi=600) plt.close() txtrep.write( '------------------------------------------------------------------------\n\n' ) txtrep.write('Overall performance:\n') txtrep.write('R = ' + str(xyplot['x'].corr(xyplot['y'])) + '\n') txtrep.write( 'RMSE = ' + str(np.sqrt(np.nanmean(np.square(xyplot['x'].subtract(xyplot['y'])))))) txtrep.write('ubRMSE = ' + str( np.sqrt( np.sum( np.square((xyplot['y'] - xyplot['y'].mean()) - (xyplot['x'] - xyplot['x'].mean()))) / len(xyplot['y'])))) txtrep.close()
ascat_SSM_reader = ascat.AscatSsmCdr(ascat_data_folder, ascat_grid_folder, grid_filename='TUW_WARP5_grid_info_2_1.nc', static_layer_path=static_layers_folder) # <codecell> # Read data for location in northern Italy ascat_ts = ascat_SSM_reader.read(11, 45) #plot soil moisture ascat_ts.data['sm'].plot(title='SSM data') plt.show() # <codecell> #calculate anomaly based on moving +- 17 day window anom = ts_anomaly.calc_anomaly(ascat_ts.data['sm'], window_size=35) anom.plot(title='Anomaly (35-day window)') plt.show() # <codecell> #calculate climatology climatology = ts_anomaly.calc_climatology(ascat_ts.data['sm']) climatology.plot(title='Climatology') plt.show() # <codecell> #calculate anomaly based on climatology anomaly_clim = ts_anomaly.calc_anomaly(ascat_ts.data['sm'], climatology=climatology) anomaly_clim.plot(title='Anomaly (climatology)')