def plot_alltogether(time_lag, lon, lat, ts1, ts2, scale_ts=False, save_fig=False, *args): matched_data = temp_match.matching(ts1, ts2, *args) if len(matched_data) == 0: print "Empty dataset." return if scale_ts: matched_data = scaling.scale(matched_data, method="mean_std") matched_data.plot(figsize=(15, 5)) plt.title('SWI and Vegetation indices comparison (rescaled)') if save_fig: plt.savefig("C:\\Users\\i.pfeil\\Desktop\\TS_plots\\lon_" + str(lon) + "_lat_" + str(lat) + '_' + str(time_lag) + ".png", bbox_inches='tight') plt.clf() else: plt.show()
def calc_rho(ascat_ssm, FP_df, hoal_df): # multiply ASCAT with porosity (0.54) to get same units ascat_ssm['ssm_ascat'] = ascat_ssm['ssm_ascat'] * 0.54 matched_data = matching(ascat_ssm, FP_df['Parrot_vwc'], hoal_df['HOAL_sm0.05']) matched_data.plot() plt.title('Matched data: ASCAT, FP, HOAL') plt.show() data_together = scale(matched_data) #, method="mean_std") ascat_rho = metrics.spearmanr(data_together['Parrot_vwc'].iloc[:-3], data_together['ssm_ascat'].iloc[:-3]) hoal_rho_sm = metrics.spearmanr(data_together['Parrot_vwc'].iloc[:-3], data_together['HOAL_sm0.05'].iloc[:-3]) exclude = [ 'HOAL_ts0.05', 'air_temperature_celsius', 'par_umole_m2s', 'merge_key' ] data_together.ix[:, data_together.columns.difference(exclude)].plot() plt.title( 'Satellite and in-situ soil moisture, HOAL Petzenkirchen, station 22', fontsize=24) #+'\n rho_ASCAT_Parrot: '+str(np.round(ascat_rho[0],3))+ #', rho_HOAL_Parrot: '+str(np.round(hoal_rho_sm[0],3))) plt.ylabel('Volumetric Water Content [%]', fontsize=20) plt.tick_params(axis='both', which='major', labelsize=18) plt.ylim([0, 60]) plt.show()
def calc_rho(ascat_ssm, FP_df, hoal_df): # multiply ASCAT with porosity (0.54) to get same units ascat_ssm['ssm_ascat'] = ascat_ssm['ssm_ascat']*0.54 matched_data = matching(ascat_ssm, FP_df['Parrot_vwc'], hoal_df['HOAL_sm0.05']) matched_data.plot() plt.title('Matched data: ASCAT, FP, HOAL') plt.show() data_together = scale(matched_data)#, method="mean_std") ascat_rho = metrics.spearmanr(data_together['Parrot_vwc'].iloc[:-3], data_together['ssm_ascat'].iloc[:-3]) hoal_rho_sm = metrics.spearmanr(data_together['Parrot_vwc'].iloc[:-3], data_together['HOAL_sm0.05'].iloc[:-3]) exclude = ['HOAL_ts0.05', 'air_temperature_celsius', 'par_umole_m2s', 'merge_key'] data_together.ix[:, data_together.columns.difference(exclude)].plot() plt.title('Satellite and in-situ soil moisture, HOAL Petzenkirchen, station 22', fontsize=24) #+'\n rho_ASCAT_Parrot: '+str(np.round(ascat_rho[0],3))+ #', rho_HOAL_Parrot: '+str(np.round(hoal_rho_sm[0],3))) plt.ylabel('Volumetric Water Content [%]',fontsize=20) plt.tick_params(axis='both', which='major', labelsize=18) plt.ylim([0,60]) plt.show()
def test_matching_series(): """ test matching function with pd.Series as input """ data = np.arange(5.0) data[3] = np.nan ref_ser = pd.Series( data, index=pd.date_range(datetime(2007, 1, 1, 0), "2007-01-05", freq="D"), ) match_ser = pd.Series( np.arange(5), index=[ datetime(2007, 1, 1, 9), datetime(2007, 1, 2, 9), datetime(2007, 1, 3, 9), datetime(2007, 1, 4, 9), datetime(2007, 1, 5, 9), ], name="matched_data", ) matched = tmatching.matching(ref_ser, match_ser) nptest.assert_allclose(np.array([0, 1, 2, 4]), matched.matched_data) assert len(matched) == 4
def test_matching(): """ test matching function """ data = np.arange(5.0) data[3] = np.nan ref_df = pd.DataFrame( {"data": data}, index=pd.date_range(datetime(2007, 1, 1, 0), "2007-01-05", freq="D"), ) match_df = pd.DataFrame( {"matched_data": np.arange(5)}, index=[ datetime(2007, 1, 1, 9), datetime(2007, 1, 2, 9), datetime(2007, 1, 3, 9), datetime(2007, 1, 4, 9), datetime(2007, 1, 5, 9), ], ) matched = tmatching.matching(ref_df, match_df) nptest.assert_allclose(np.array([0, 1, 2, 4]), matched.matched_data) assert len(matched) == 4
def calc_rho(ascat_ssm, FP_df, hoal_df, hoal_raw): # multiply ASCAT with porosity (0.54) to get same units ascat_ssm['ssm_ascat'] = ascat_ssm['ssm_ascat'] * 0.54 # in welcher Reihenfolge matchen data_together1 = matching(FP_df, ascat_ssm, hoal_df, hoal_raw) data_together = matching(ascat_ssm, FP_df, hoal_df, hoal_raw) data_together2 = matching(FP_df, hoal_df) print('ref: FP', data_together) print('ref: ASCAT', data_together1) ascat_rho = metrics.spearmanr(data_together['Parrot_vwc'].iloc[:-3], data_together['ssm_ascat'].iloc[:-3]) hoal_rho_sm = metrics.spearmanr(data_together['Parrot_vwc'].iloc[:-3], data_together['HOAL_sm0.05'].iloc[:-3]) hoal_rho_ts = metrics.spearmanr( data_together['air_temperature_' + 'celsius'].iloc[:-3], data_together['HOAL_ts0.05'].iloc[:-3]) hoal_raw_rho_sm = metrics.spearmanr( data_together['Parrot_vwc'].iloc[:-3], data_together['HOAL_raw_sm1'].iloc[:-3]) print ascat_rho print hoal_rho_sm print hoal_rho_ts print hoal_raw_rho_sm exclude = [ 'HOAL_ts0.05', 'air_temperature_celsius', 'par_umole_m2s', 'merge_key' ] data_together.ix[:, data_together.columns.difference(exclude)].plot() plt.title( 'Satellite and in-situ soil moisture, HOAL Petzenkirchen, station 22' + '\n rho_ASCAT_Parrot: ' + str(np.round(ascat_rho[0], 3)) + ', rho_HOAL_Parrot: ' + str(np.round(hoal_rho_sm[0], 3)) + ', rho_HOAL_raw_Parrot: ' + str(np.round(hoal_raw_rho_sm[0], 3))) plt.ylabel('Volumetric Water Content [%]') plt.show() data_together1.ix[:, data_together1.columns.difference(exclude)].plot() plt.show() data_together2.ix[:, data_together2.columns.difference(exclude)].plot() plt.show()
def calc_rho(ascat_ssm, FP_df, hoal_df, hoal_raw): # multiply ASCAT with porosity (0.54) to get same units ascat_ssm['ssm_ascat'] = ascat_ssm['ssm_ascat']*0.54 # in welcher Reihenfolge matchen data_together1 = matching(FP_df, ascat_ssm, hoal_df, hoal_raw) data_together = matching(ascat_ssm, FP_df, hoal_df, hoal_raw) data_together2 = matching(FP_df, hoal_df) print('ref: FP', data_together) print('ref: ASCAT', data_together1) ascat_rho = metrics.spearmanr(data_together['Parrot_vwc'].iloc[:-3], data_together['ssm_ascat'].iloc[:-3]) hoal_rho_sm = metrics.spearmanr(data_together['Parrot_vwc'].iloc[:-3], data_together['HOAL_sm0.05'].iloc[:-3]) hoal_rho_ts = metrics.spearmanr(data_together['air_temperature_'+ 'celsius'].iloc[:-3], data_together['HOAL_ts0.05'].iloc[:-3]) hoal_raw_rho_sm = metrics.spearmanr(data_together['Parrot_vwc'].iloc[:-3], data_together['HOAL_raw_sm1'].iloc[:-3]) print ascat_rho print hoal_rho_sm print hoal_rho_ts print hoal_raw_rho_sm exclude = ['HOAL_ts0.05', 'air_temperature_celsius', 'par_umole_m2s', 'merge_key'] data_together.ix[:, data_together.columns.difference(exclude)].plot() plt.title('Satellite and in-situ soil moisture, HOAL Petzenkirchen, station 22'+ '\n rho_ASCAT_Parrot: '+str(np.round(ascat_rho[0],3))+ ', rho_HOAL_Parrot: '+str(np.round(hoal_rho_sm[0],3))+ ', rho_HOAL_raw_Parrot: '+str(np.round(hoal_raw_rho_sm[0],3))) plt.ylabel('Volumetric Water Content [%]') plt.show() data_together1.ix[:, data_together1.columns.difference(exclude)].plot() plt.show() data_together2.ix[:, data_together2.columns.difference(exclude)].plot() plt.show()
def rescale_df(ascat_ssm, FP_df): ascat_ssm['ssm_ascat'] = ascat_ssm['ssm_ascat'] * 0.54 ascat_ssm.plot() plt.show() matched_data = matching(ascat_ssm, FP_df['Parrot_vwc']) matched_data.plot() plt.show() scaled_data = scale(matched_data, method="mean_std") scaled_data.plot() plt.title('Satellite and in-situ soil moisture, HOAL Petzenkirchen') plt.ylabel('Volumetric Water Content [%]') plt.show()
def rescale_df(ascat_ssm, FP_df): ascat_ssm['ssm_ascat'] = ascat_ssm['ssm_ascat']*0.54 ascat_ssm.plot() plt.show() matched_data = matching(ascat_ssm, FP_df['Parrot_vwc']) matched_data.plot() plt.show() scaled_data = scale(matched_data, method="mean_std") scaled_data.plot() plt.title('Satellite and in-situ soil moisture, HOAL Petzenkirchen') plt.ylabel('Volumetric Water Content [%]') plt.show()
def calc_corr_IDSI_SWI(lat_min, lat_max, lon_min, lon_max): df = create_drought_dist(lat_min, lat_max, lon_min, lon_max) df.drought = df.drought * (-1) drought = pd.DataFrame(df.drought) tt = [1, 5, 10, 15, 20, 40, 60, 100] for t in tt: print t df_swi = read_ts_area('C:\\Users\\s.hochstoger\\Desktop\\0_IWMI_DATASETS\\Dataset_stacks\\SWI_stack.nc', 'SWI_' + str(t).zfill(3), lat_min, lat_max, lon_min, lon_max) anomaly_swi = anomaly(df_swi) df_anom = anomaly_swi.loc[:'20150626'] / 100 match = temp_match.matching(drought, df_anom) s_rho, s_p = metrics.spearmanr(match.iloc[:, 0], match.iloc[:, 1]) print s_rho, s_p
def calc_corr_IDSI_SWI(lat_min, lat_max, lon_min, lon_max): df = create_drought_dist(lat_min, lat_max, lon_min, lon_max) df.drought = df.drought * (-1) drought = pd.DataFrame(df.drought) tt = [1, 5, 10, 15, 20, 40, 60, 100] for t in tt: print t df_swi = read_ts_area( 'C:\\Users\\s.hochstoger\\Desktop\\0_IWMI_DATASETS\\Dataset_stacks\\SWI_stack.nc', 'SWI_' + str(t).zfill(3), lat_min, lat_max, lon_min, lon_max) anomaly_swi = anomaly(df_swi) df_anom = anomaly_swi.loc[:'20150626'] / 100 match = temp_match.matching(drought, df_anom) s_rho, s_p = metrics.spearmanr(match.iloc[:, 0], match.iloc[:, 1]) print s_rho, s_p
def rescale_df(ascat_ssm, FP_df, hoal_df): ascat_ssm['ssm_ascat'] = ascat_ssm['ssm_ascat']*0.54 #ascat_ssm.plot() #plt.show() matched_data = matching(ascat_ssm, FP_df['Parrot_vwc'], hoal_df) matched_data.plot() plt.title('Matched data: ASCAT, FP, HOAL') plt.show() scaled_data = scale(matched_data)#, method="mean_std") scaled_data.plot() plt.title('Satellite and in-situ soil moisture, HOAL Petzenkirchen') plt.ylabel('Volumetric Water Content [%]') plt.ylim([0,60]) plt.show()
def rescale_df(ascat_ssm, FP_df, hoal_df): ascat_ssm['ssm_ascat'] = ascat_ssm['ssm_ascat'] * 0.54 #ascat_ssm.plot() #plt.show() matched_data = matching(ascat_ssm, FP_df['Parrot_vwc'], hoal_df) matched_data.plot() plt.title('Matched data: ASCAT, FP, HOAL') plt.show() scaled_data = scale(matched_data) #, method="mean_std") scaled_data.plot() plt.title('Satellite and in-situ soil moisture, HOAL Petzenkirchen') plt.ylabel('Volumetric Water Content [%]') plt.ylim([0, 60]) plt.show()
def test_matching(): """ test matching function """ data = np.arange(5.0) data[3] = np.nan ref_df = pd.DataFrame({"data": data}, index=pd.date_range(datetime(2007, 1, 1, 0), "2007-01-05", freq="D")) match_df = pd.DataFrame({"matched_data": np.arange(5)}, index=[datetime(2007, 1, 1, 9), datetime(2007, 1, 2, 9), datetime(2007, 1, 3, 9), datetime(2007, 1, 4, 9), datetime(2007, 1, 5, 9)]) matched = tmatching.matching(ref_df, match_df) nptest.assert_allclose(np.array([0, 1, 2, 4]), matched.matched_data) assert len(matched) == 4
def test_matching_series(): """ test matching function with pd.Series as input """ data = np.arange(5.0) data[3] = np.nan ref_ser = pd.Series(data, index=pd.date_range(datetime(2007, 1, 1, 0), "2007-01-05", freq="D")) match_ser = pd.Series(np.arange(5), index=[datetime(2007, 1, 1, 9), datetime(2007, 1, 2, 9), datetime(2007, 1, 3, 9), datetime(2007, 1, 4, 9), datetime(2007, 1, 5, 9)], name='matched_data') matched = tmatching.matching(ref_ser, match_ser) nptest.assert_allclose(np.array([0, 1, 2, 4]), matched.matched_data) assert len(matched) == 4
def corr(paths, corr_df, start_date, end_date, lon=None, lat=None, vi_str='NDVI', time_lags=[0, 10, 20, 30, 40, 50, 60, 100], plot_time_lags=False): """ Calculate Spearman's Rho and p-value for SWI (all t-values) and specified VI (default NDVI). If plot_time_lags is True, a plot of VI (for different time lags) over SWI (all t-values) is created. Parameters: ----------- paths : dict Paths to datasets corr_df : pd.DataFrame DataFrame where correlation coeff.s are stored start_date, end_date : datetime Start and end date vi_str : str, optional Vegetation index to use, default: NDVI time_lag : int, optional time lag for shifting VI, default: 0 (days) plot_time_lags : bool, optional Plot (shifted) VI over SWIs, default: False Returns: -------- corr_df : pd.DataFrame DataFrame containing the correlation coeff.s """ swi_path = paths['SWI'] vi_path = paths[vi_str] # read SWI for different T-values and VI swi_list = [ 'SWI_001', 'SWI_010', 'SWI_020', 'SWI_040', 'SWI_060', 'SWI_100' ] swi_df = read_ts(swi_path, lon=lon, lat=lat, params=swi_list, start_date=start_date, end_date=end_date) vi = read_ts(vi_path, lon=lon, lat=lat, params=vi_str, start_date=start_date, end_date=end_date) vi[vi_str][np.where(vi == 255)[0]] = np.NaN water = {} for swi_key in swi_list: water[swi_key] = swi_df[swi_key] # rescale VI before further processing using method from Peng et al., 2014 for ds_water in water: water[ds_water] = rescale_peng(water[ds_water], np.nanmin(water[ds_water]), np.nanmax(water[ds_water])) vi_resc = rescale_peng(vi, np.nanmin(vi), np.nanmax(vi)) # insert time lag for time_lag in time_lags: if time_lag > 0: vi = vi_resc.copy() vi_idx = vi.index + timedelta(days=time_lag) vi = pd.DataFrame(vi.values, columns=[vi_str], index=vi_idx) # plot vi time lags over SWI if plot_time_lags and time_lag == 0: vi0 = vi.copy() vi_idx10 = vi.index + timedelta(days=10) vi10 = pd.DataFrame(vi.values, columns=['vi10'], index=vi_idx10) vi_idx20 = vi.index + timedelta(days=20) vi20 = pd.DataFrame(vi.values, columns=['vi20'], index=vi_idx20) vi_idx30 = vi.index + timedelta(days=30) vi30 = pd.DataFrame(vi.values, columns=['vi30'], index=vi_idx30) vi_idx40 = vi.index + timedelta(days=40) vi40 = pd.DataFrame(vi.values, columns=['vi40'], index=vi_idx40) vi_idx50 = vi.index + timedelta(days=50) vi50 = pd.DataFrame(vi.values, columns=['vi50'], index=vi_idx50) vi_idx60 = vi.index + timedelta(days=60) vi60 = pd.DataFrame(vi.values, columns=['vi60'], index=vi_idx60) vi_idx100 = vi.index + timedelta(days=100) vi100 = pd.DataFrame(vi.values, columns=['vi100'], index=vi_idx100) plot_alltogether(0, lon, lat, swi_df, vi0, save_fig=True) plot_alltogether(10, lon, lat, swi_df, vi10, save_fig=True) plot_alltogether(20, lon, lat, swi_df, vi20, save_fig=True) plot_alltogether(30, lon, lat, swi_df, vi30, save_fig=True) plot_alltogether(40, lon, lat, swi_df, vi40, save_fig=True) plot_alltogether(50, lon, lat, swi_df, vi50, save_fig=True) plot_alltogether(60, lon, lat, swi_df, vi60, save_fig=True) plot_alltogether(100, lon, lat, swi_df, vi100, save_fig=True) vegetation = {vi_str: vi} # calculate Spearman's Rho and p-value for VI and SWIs for ds_veg in vegetation.keys(): for ds_water in sorted(water.keys()): data_together = temp_match.matching(water[ds_water], vegetation[ds_veg]) rho, p = metrics.spearmanr(data_together[ds_water], data_together[ds_veg]) # mask values with p-value > 0.05 if p > 0.05: rho = np.NaN if ds_veg + '_' + ds_water + '_rho' in corr_df.columns: corr_df[ds_veg + '_' + ds_water + '_rho'].iloc[np.where( corr_df.index == time_lag)] = rho corr_df[ds_veg + '_' + ds_water + '_p'].iloc[np.where(corr_df.index == time_lag)] = p else: corr_df[ds_veg + '_' + ds_water + '_rho'] = pd.Series( rho, index=[time_lag]) corr_df[ds_veg + '_' + ds_water + '_p'] = pd.Series( p, index=[time_lag]) return corr_df
def compare_data(ismn_data, validation_data, scaling='linreg', anomaly=None): """ Compare data from an ISMN station to the defined validation datasets. Parameters ---------- ismn_data: pandas.Dataframe Data from the ISMN used as a reference validation_data: dict Dictionary of pandas.DataFrames, One for each dataset to compare against scaling: string, optional Scaling method to use. anomaly: string If set then the validation is done for anomalies. """ insitu_label = 'soil moisture' if anomaly != None: if anomaly == 'climatology': ascat_clim = anomaly_calc.calc_climatology( ascat_masked[ascat_label]) insitu_clim = anomaly_calc.calc_climatology( ismn_data['soil moisture']) ascat_anom = anomaly_calc.calc_anomaly(ascat_masked[ascat_label], climatology=ascat_clim) ascat_masked[ascat_label] = ascat_anom.values insitu_anom = anomaly_calc.calc_anomaly(ISMN_data['insitu'], climatology=insitu_clim) ISMN_data['insitu'] = insitu_anom.values if anomaly == 'average': ascat_anom = anomaly_calc.calc_anomaly(ascat_masked[ascat_label]) ascat_masked[ascat_label] = ascat_anom.values insitu_anom = anomaly_calc.calc_anomaly(ISMN_data['insitu']) ISMN_data['insitu'] = insitu_anom.values ascat_masked = ascat_masked.dropna() ISMN_data = ISMN_data.dropna() for dname in validation_data: vdata = validation_data[dname] vdata_label = 'cci_sm' matched_data = temp_match.matching(ismn_data, vdata, window=1) if scaling != 'noscale' and scaling != 'porosity': scaled_data = scale.add_scaled(matched_data, label_in=vdata_label, label_scale=insitu_label, method=scaling) scaled_label = vdata_label + '_scaled_' + scaling scaled_data = scaled_data[[insitu_label, scaled_label]] elif scaling == 'noscale': scaled_data = matched_data[[insitu_label, vdata_label]] scaled_label = vdata_label # scaled_data.rename(columns={'insitu': ISMN_ts_name}, inplace=True) labels, values = scaled_data.to_dygraph_format() ascat_insitu = {'labels': labels, 'data': values} x, y = scaled_data[insitu_label].values, scaled_data[scaled_label].values kendall, p_kendall = sc_stats.kendalltau(x.tolist(), y.tolist()) spearman, p_spearman = sc_stats.spearmanr(x, y) pearson, p_pearson = sc_stats.pearsonr(x, y) rmsd = metrics.rmsd(x, y) bias = metrics.bias(y, x) mse, mse_corr, mse_bias, mse_var = metrics.mse(x, y) statistics = { 'kendall': { 'v': '%.2f' % kendall, 'p': '%.4f' % p_kendall }, 'spearman': { 'v': '%.2f' % spearman, 'p': '%.4f' % p_spearman }, 'pearson': { 'v': '%.2f' % pearson, 'p': '%.4f' % p_pearson }, 'bias': '%.4f' % bias, 'rmsd': { 'rmsd': '%.4f' % np.sqrt(mse), 'rmsd_corr': '%.4f' % np.sqrt(mse_corr), 'rmsd_bias': '%.4f' % np.sqrt(mse_bias), 'rmsd_var': '%.4f' % np.sqrt(mse_var) }, 'mse': { 'mse': '%.4f' % mse, 'mse_corr': '%.4f' % mse_corr, 'mse_bias': '%.4f' % mse_bias, 'mse_var': '%.4f' % mse_var } } scaling_options = { 'noscale': 'No scaling', 'porosity': 'Scale using porosity', 'linreg': 'Linear Regression', 'mean_std': 'Mean - standard deviation', 'min_max': 'Minimum,maximum', 'lin_cdf_match': 'Piecewise <br> linear CDF matching', 'cdf_match': 'CDF matching' } settings = { 'scaling': scaling_options[scaling], # 'snow_depth': mask['snow_depth'], # 'surface_temp': mask['st_l1'], # 'air_temp': mask['air_temp'] } era_data = {'labels': [], 'data': []} output_data = { 'validation_data': ascat_insitu, 'masking_data': era_data, 'statistics': statistics, 'settings': settings } return output_data, 1
def compare_ssm_index(index, warp_gpi, sm_dataset, start=None, end=None, weekly=True, plot=False): df = ssm_iwmi.IWMI_read_csv() iwmi_bare, iwmi_crop = ssm_iwmi.IWMI_ts_index(df, index) if sm_dataset == 'cci': sm_ts = ssm_TUW.read_CCI(warp_gpi, start, end) if sm_dataset == 'ascat': sm_ts = ssm_TUW.read_ASCAT_ssm(warp_gpi) if sm_dataset == 'ers': sm_ts = ssm_TUW.read_ERS_ssm(warp_gpi, args=['sm'], start=None, end=None) if weekly == True: iwmi_bare_weekly = iwmi_bare.resample('W', how='mean').dropna() iwmi_crop_weekly = iwmi_crop.resample('W', how='mean').dropna() sm_ts_weekly = sm_ts.resample('W', how='mean').dropna() # correlation of bare and crop if len(iwmi_bare) != 0 and len(iwmi_crop) != 0: iwmi_bare_weekly.columns.values[0] = 1 match_bare_crop = temp_match.matching(iwmi_crop_weekly, iwmi_bare_weekly) corr_crop_bare = metrics.spearmanr(match_bare_crop.iloc[:, 0], match_bare_crop.iloc[:, 1])[0] print corr_crop_bare if len(iwmi_bare) == 0: iwmi_bare_weekly = iwmi_bare_weekly else: match_bare = temp_match.matching(sm_ts_weekly, iwmi_bare_weekly) iwmi_bare_weekly_match = match_bare.iloc[:,1] sm_ts_weekly_bare = match_bare.iloc[:,0] iwmi_bare_resc = scaling.lin_cdf_match(iwmi_bare_weekly.iloc[:,0], sm_ts_weekly) iwmi_bare_weekly = pd.DataFrame(iwmi_bare_resc, index=iwmi_bare_weekly.index) corr_bare = metrics.spearmanr(iwmi_bare_weekly_match, sm_ts_weekly_bare)[0] if len(iwmi_crop) == 0: iwmi_crop_weekly = iwmi_crop_weekly else: match_crop = temp_match.matching(sm_ts_weekly, iwmi_crop_weekly) iwmi_crop_weekly_match = match_crop.iloc[:, 1] sm_ts_weekly_crop = match_crop.iloc[:,0] iwmi_crop_resc = scaling.lin_cdf_match(iwmi_crop_weekly.iloc[:,0], sm_ts_weekly) iwmi_crop_weekly = pd.DataFrame(iwmi_crop_resc, index=iwmi_crop_weekly.index) corr_crop = metrics.spearmanr(iwmi_crop_weekly_match, sm_ts_weekly_crop)[0] else: if len(iwmi_bare) == 0: iwmi_bare_resc = iwmi_bare else: iwmi_bare_resc = scaling.lin_cdf_match(iwmi_bare, sm_ts) if len(iwmi_crop) == 0: iwmi_crop_resc = iwmi_crop else: iwmi_crop_resc = scaling.lin_cdf_match(iwmi_crop, sm_ts) iwmi_bare = pd.DataFrame(iwmi_bare_resc, index=iwmi_bare.index) iwmi_crop = pd.DataFrame(iwmi_crop_resc, index=iwmi_crop.index) if plot == True: if weekly == True: sm_ts_plot = sm_ts_weekly iwmi_bare_plot = iwmi_bare_weekly iwmi_crop_plot = iwmi_crop_weekly else: sm_ts_plot = sm_ts iwmi_bare_plot = iwmi_bare iwmi_crop_plot = iwmi_crop ax = sm_ts_plot.plot(color='b') if len(iwmi_crop_plot) != 0: iwmi_crop_plot.plot(color='r', ax=ax) if len(iwmi_bare_plot) != 0: iwmi_bare_plot.plot(color='g', ax=ax) plt.legend([sm_dataset+' ts', 'iwmi crop, index '+str(index), 'iwmi bare, index '+str(index)]) if sm_dataset in ['ers', 'ascat']: plt.ylabel('degree of saturation [%]') if 'corr_crop_bare' in locals(): plt.title('corr_bare_crop ='+str(round(corr_crop_bare, 3))) plt.ylim([0, 140]) else: plt.ylabel('volumetric soil moisture [m3/m3]') if 'corr_bare' in locals() and 'corr_crop' in locals(): plt.title('corr_bare = '+str(round(corr_bare,3))+ ' corr_crop = '+str(round(corr_crop,3))+ '\n corr_bare_crop = '+str(round(corr_crop_bare,3))) elif 'corr_bare' in locals(): plt.title('corr_bare = '+str(round(corr_bare,3))) elif 'corr_crop' in locals(): plt.title('corr_crop = '+str(round(corr_crop,3))) plt.ylim([0, 100]) plt.grid() #plt.show() plt.savefig(os.path.join(root.x, 'staff', 'ipfeil', 'iwmi_plots', sm_dataset, sm_dataset+'_cdf_'+str(index)+'.png')) plt.clf() return iwmi_bare_plot, iwmi_crop_plot, sm_ts_plot
def start_pred(paths, region, pred_date, vi_str='NDVI', t_val='SWI_040', monthly=False, spatial_res=0.1): if spatial_res == 0.1: with Dataset(paths['SWI'], 'r') as ncfile: res_lons = ncfile.variables['lon'][:] res_lats = ncfile.variables['lat'][:] elif spatial_res == 500: with Dataset(paths['NDVI'], 'r') as ncfile: res_lons = ncfile.variables['lon'][:] res_lats = ncfile.variables['lat'][:] with Dataset(paths['lc']) as ncfile: lccs = ncfile.variables['lccs_class'][:] lc_lons = ncfile.variables['lon'][:] lc_lats = ncfile.variables['lat'][:] #=========================================================================== # # achtung pc haengt sich eine zeit lang auf # lc_lons, lc_lats = np.meshgrid(lc_lons, lc_lats) # lc_lons = lc_lons.flatten() # lc_lats = lc_lats.flatten() # scatterplot(lc_lons, lc_lats, lccs, discrete=False, vmin=-128, vmax=128) #=========================================================================== # districts shapefile = os.path.join('C:\\', 'Users', 'i.pfeil', 'Documents', '0_IWMI_DATASETS', 'shapefiles', 'IND_adm', 'IND_adm2') shpfile = Shape(region, shapefile=shapefile) lon_min, lat_min, lon_max, lat_max = shpfile.bbox lons = res_lons[np.where((res_lons>=lon_min) & (res_lons<=lon_max))] lats = res_lats[np.where((res_lats>=lat_min) & (res_lats<=lat_max))] start_date = datetime(2007,7,1) end_date = datetime(2015,7,1) results = [] results2 = [] results3 = [] for lon in lons: for lat in lats: print lon, lat if round(lon,2) == 76.35 and round(lat,2) == 19.55: print 'danger' nearest_lon = find_nearest(lc_lons, lon) nearest_lat = find_nearest(lc_lats, lat) lc = lccs[nearest_lat, nearest_lon] if (lc == -66) or (lc == -46) or (lc == -36) or (lc == 0): # urban | water | snow and ice | no data print 'lc mask' continue swi_path = paths['SWI'] vi_path = paths[vi_str] swi_list = [t_val] swi_df = read_ts(swi_path, lon=lon, lat=lat, params=swi_list, start_date=start_date, end_date=end_date) # read vi and scale from 0 to 100 (before 0 to 250) vi_all = read_ts(vi_path, lon=lon, lat=lat, params=vi_str, start_date=start_date, end_date=end_date) #vi_all[vi_str][np.where(vi_all==-99)[0]] = np.NaN #vi_all = vi_all*100/250 vi = vi_all[:pred_date] vi_min = np.nanmin(vi) vi_max = np.nanmax(vi) vi = rescale_peng(vi, vi_min, vi_max) swi_all = swi_df[t_val] swi = swi_all[:pred_date] swi = rescale_peng(swi, np.nanmin(swi), np.nanmax(swi)) # resample monthly if monthly: swi = swi.resample("M").mean() vi = vi.resample("M").mean() # calculate differences between VIs of consecutive months dvi = np.ediff1d(vi, to_end=np.NaN) vi['D_VI'] = pd.Series(dvi, index=vi.index) matched_data = temp_match.matching(swi, vi) kd = zribi_kd(swi, vi, matched_data) results, results2, results3 = zribi_sim(lon, lat, swi, vi, matched_data, kd, vi_min, vi_max, results=results, results2=results2, results3=results3) np.save('C:\\Users\\i.pfeil\\Desktop\\veg_prediction\\results.npy', results) np.save('C:\\Users\\i.pfeil\\Desktop\\veg_prediction\\results2.npy', results2) np.save('C:\\Users\\i.pfeil\\Desktop\\veg_prediction\\results3.npy', results3)
mask_frozen_prob=5, mask_snow_prob=5) # drop nan values before doing any matching ascat_time_series.data = ascat_time_series.data.dropna() ISMN_time_series.data = ISMN_time_series.data.dropna() # rename the soil moisture column in ISMN_time_series.data to insitu_sm # to clearly differentiate the time series when they are plotted together ISMN_time_series.data.rename(columns={'soil moisture':label_insitu}, inplace=True) # get ISMN data that was observerd within +- 1 hour(1/24. day) of the ASCAT observation # do not include those indexes where no observation was found matched_data = temp_match.matching(ascat_time_series.data, ISMN_time_series.data, window=1 / 24.) # matched ISMN data is now a dataframe with the same datetime index # as ascat_time_series.data and the nearest insitu observation # continue only with relevant columns matched_data = matched_data[[label_ascat, label_insitu]] # the plot shows that ISMN and ASCAT are observed in different units matched_data.plot(figsize=(15, 5), secondary_y=[label_ascat], title='temporally merged data') plt.show() # this takes the matched_data DataFrame and scales all columns to the # column with the given reference_index, in this case in situ scaled_data = scaling.scale(matched_data, method='lin_cdf_match', reference_index=1)
def validate(params, timespan=('2009-01', '2009-12'), gpi=None, rescaling=None, y_axis_range=None): """ This function is optimising the parameters vegetation water content 'm_veg', soil moisture 'm_soil' and, if specified, a third optional parameter. The third optional parameter can eitehr be sand 'sand', clay 'clay', fractional root mean square height 'f_rms', stem volume 's_vol' or temperature 'temp'. Parameters ---------- params : list of dicts Model parameters. At least four of the following parameters needs to be specified if an optional parameter has been selected, otherwise all of them needs to be specified: 'sand', 'clay', 'f_rms', 'temp', 's_vol' timespan : tuple, optional timespan to analyze gpi : int, optional Grid point index. If specified, it will read data from datapool. rescaling : string, optional rescaling method, one of 'min_max', 'linreg', 'mean_std' and 'lin_cdf_match' Default: None insitu is the reference to which is scaled y_axis_range : tuple, optional specify (min, max) of y axis Returns ------- df : pandas.DataFrame Optimised soil moisture, vegetation water concent and, if specified, optional optimised parameter. """ unit_dict = {'freq': 'GHz', 'sand': '', 'clay': '', 'temp': '$^\circ$C', 'eps': '', 'theta': '$^\circ$', 'f_rms': '', 'sig_bare': 'dB', 'm_soil': '%', 'm_veg': '%', 'm_soil_x0': '%', 'm_veg_x0': '%', 's_vol': '$m^3ha^{-1}$', 'sig_canopy': 'dB', 'sig_for': 'dB', 'sig_floor': 'dB', 'polarization': ''} param_should = ['sand', 'clay', 'temp', 's_vol', 'f_rms', 'm_veg_x0', 'm_soil_x0'] for param in param_should: assert param in params.keys() if gpi is None: ts_resam = pd.read_csv(os.path.join(os.path.split(os.path.abspath(__file__))[0],'data','2011528_2009.csv'), index_col=0, parse_dates=True)[timespan[0]:timespan[1]] gpi = 2011528 else: ts_resam = read_resam(gpi)[timespan[0]:timespan[1]] m_veg_x0 = params.pop('m_veg_x0') m_soil_x0 = params.pop('m_soil_x0') columns = ['m_veg', 'm_soil'] x0 = np.array([m_veg_x0, m_soil_x0]) df = pd.DataFrame(index=ts_resam.index, columns=columns) df = df.fillna(np.nan) # optimise m_soil and m_veg for index, row in ts_resam.iterrows(): ascat_inc = np.array(row[['incf', 'incm', 'inca']].tolist()) ascat_sig = \ db2lin(np.array(row[['sigf', 'sigm', 'siga']].tolist())) args = (ascat_inc, ascat_sig, params, '') res = minimize(sig_sqr_diff, x0, args=args, method='Nelder-Mead') if res['success'] == True: df['m_veg'][index] = res['x'][0] df['m_soil'][index] = res['x'][1] str_static_p = \ ', '.join("%s: %r" % t for t in locals().iteritems()) str_static_p += ",\nm_veg_x0 = {:.2f}, m_soil_x0 = {:.2f}".format(m_veg_x0, m_soil_x0) ismn_file = os.path.join(os.path.split(os.path.abspath(__file__))[0],'data','ARM_ARM_Larned_sm_0.050000_0.050000_Water-Matric-Potential-Sensor-229L-W_20090101_20140527.stm') ismn_data = ismn_readers.read_data(ismn_file) insitu = pd.DataFrame(ismn_data.data['soil moisture']).rename(columns={'soil moisture': 'insitu'}) gldas = pd.read_csv(os.path.join(os.path.split(os.path.abspath(__file__))[0],'data', 'GLDAS_737602.csv'), parse_dates=True, index_col=0) gldas.rename(columns={'086_L1': 'gldas'}, inplace=True) gldas = pd.DataFrame(gldas['gldas']) / 100.0 ascat = pd.DataFrame(df['m_soil']).rename(columns={'m_soil': 'ascat'}) matched = temp_match.matching(ascat, insitu, gldas) if rescaling is not None: scaled = scaling.scale(matched, rescaling, reference_index=1) else: scaled = matched metrics = OrderedDict() metrics['bias'] = df_metrics.bias(scaled) metrics['pearson'] = df_metrics.pearsonr(scaled) metrics['spearman'] = df_metrics.spearmanr(scaled) metrics['ubrmsd'] = df_metrics.rmsd(scaled) metrics['std_ratio'] = df_std_ratio(scaled) tcol_error = df_metrics.tcol_error(scaled)._asdict() ts_title = "Soil moisture. " if rescaling is not None: ts_title = ' '.join([ts_title, 'Rescaling: %s.' % rescaling]) rmsd_title = 'unbiased RMSD' else: ts_title = ' '.join([ts_title, 'No rescaling.']) rmsd_title = 'RMSD' axes = scaled.plot(title=ts_title, figsize=(18, 8)) plt.legend() # these are matplotlib.patch.Patch properties props = dict(facecolor='white', alpha=0) columns = ('ascat-insitu', 'ascat-gldas', 'insitu-gldas') row_labels = ['bias', 'pearson R', 'spearman rho', rmsd_title, 'stddev ratio'] cell_text = [] for metric in metrics: metric_values = metrics[metric] if type(metric_values) == tuple: metric_values = metric_values[0] metric_values = metric_values._asdict() cell_text.append(["%.2f" % metric_values['ascat_and_insitu'], "%.2f" % metric_values['ascat_and_gldas'], "%.2f" % metric_values['insitu_and_gldas']]) table = plt.table( cellText=cell_text, colLabels=columns, colWidths=[0.1, 0.1, 0.1], rowLabels=row_labels, loc='bottom', bbox=(0.2, -0.5, 0.5, 0.3)) tcol_table = plt.table( cellText=[["%.2f" % tcol_error['ascat'], "%.2f" % tcol_error['gldas'], "%.2f" % tcol_error['insitu']]], colLabels=('ascat ', 'gldas ', 'insitu '), colWidths=[0.1, 0.1, 0.1], rowLabels=['Triple collocation error'], loc='bottom', bbox=(0.2, -0.6, 0.5, 0.1)) plt.subplots_adjust(left=0.08, bottom=0.35, right=0.85) plt.draw() # if y_axis_range is not None: axes.set_ylim(y_axis_range) params['m_veg_x0'] = m_veg_x0 params['m_soil_x0'] = m_soil_x0 infotext = [] for label in sorted(param_should): infotext.append('%s = %s %s' % (label, params[label], unit_dict[label])) infotext = '\n'.join(infotext) # place a text box in upper left in axes coords axes.text(1.03, 1, infotext, transform=axes.transAxes, fontsize=12, verticalalignment='top', bbox=props) axes = scatter_matrix(scaled) axes.flat[0].figure.suptitle(ts_title) # only draw 1:1 line if scaling was applied for j, ax in enumerate(axes.flatten()): if y_axis_range is not None: ax.set_xlim(y_axis_range) if np.remainder(j + 1, 3 + 1) != 1: if y_axis_range is not None: ax.set_ylim(y_axis_range) min_x, max_x = ax.get_xlim() min_y, max_y = ax.get_ylim() # find minimum lower left coordinate and maximum upper right min_ll = min([min_x, min_y]) max_ur = max([max_x, max_y]) ax.plot([min_ll, max_ur], [min_ll, max_ur], '--', c='0.6')
def optimise(params, timespan=('2009-01', '2009-12'), gpi=None, rescaling=None): """ This function is optimising the parameters vegetation water content 'm_veg', soil moisture 'm_soil' and, if specified, a third optional parameter. The third optional parameter can eitehr be sand 'sand', clay 'clay', fractional root mean square height 'f_rms', stem volume 's_vol' or temperature 'temp'. Parameters ---------- params : list of dicts Model parameters. At least four of the following parameters needs to be specified if an optional parameter has been selected, otherwise all of them needs to be specified: 'sand', 'clay', 'f_rms', 'temp', 's_vol' gpi : int, optional Grid point index. If specified, it will read data from datapool. Returns ------- df : pandas.DataFrame Optimised soil moisture, vegetation water concent and, if specified, optional optimised parameter. """ if gpi is None: ts_resam = pd.read_csv(os.path.join("data", "2011528_2009.csv"), index_col=0, parse_dates=True)[timespan[0]:timespan[1]] gpi = 2011528 else: ts_resam = read_resam(gpi)[timespan[0]:timespan[1]] m_veg_x0 = params.pop('m_veg_x0') m_soil_x0 = params.pop('m_soil_x0') columns = ['m_veg', 'm_soil'] x0 = np.array([m_veg_x0, m_soil_x0]) df = pd.DataFrame(index=ts_resam.index, columns=columns) df = df.fillna(np.nan) # optimise m_soil and m_veg for index, row in ts_resam.iterrows(): ascat_inc = np.array(row[['incf', 'incm', 'inca']].tolist()) ascat_sig = \ db2lin(np.array(row[['sigf', 'sigm', 'siga']].tolist())) args = (ascat_inc, ascat_sig, params, '') res = minimize(sig_sqr_diff, x0, args=args, method='Nelder-Mead') if res['success'] == True: df['m_veg'][index] = res['x'][0] df['m_soil'][index] = res['x'][1] str_static_p = \ ', '.join("%s: %r" % t for t in locals().iteritems()) str_static_p += ",\nm_veg_x0 = {:.2f}, m_soil_x0 = {:.2f}".format( m_veg_x0, m_soil_x0) ismn_file = os.path.join( 'data', 'ARM_ARM_Larned_sm_0.050000_0.050000_Water-Matric-Potential-Sensor-229L-W_20090101_20140527.stm' ) ismn_data = ismn_readers.read_data(ismn_file) insitu = pd.DataFrame(ismn_data.data['soil moisture']).rename( columns={'soil moisture': 'insitu'}) gldas = pd.read_csv(os.path.join('data', 'GLDAS_737602.csv'), parse_dates=True, index_col=0) gldas.rename(columns={'086_L1': 'gldas'}, inplace=True) gldas = pd.DataFrame(gldas['gldas']) ascat = pd.DataFrame(df['m_soil']).rename(columns={'m_soil': 'ascat'}) matched = temp_match.matching(ascat, insitu, gldas) if rescaling is not None: scaled = scaling.scale(matched, rescaling, reference_index=1) else: scaled = matched metrics = OrderedDict() metrics['bias'] = df_metrics.bias(scaled) metrics['pearson'] = df_metrics.pearsonr(scaled) metrics['kendall'] = df_metrics.kendalltau(scaled) metrics['ubrmsd'] = df_metrics.ubrmsd(scaled) metrics['var_ratio'] = df_var_ratio(scaled) tcol_error = df_metrics.tcol_error(scaled)._asdict() ts_title = "Soil moisture. " if rescaling is not None: ts_title = ' '.join([ts_title, 'Rescaling: %s.' % rescaling]) else: ts_title = ' '.join([ts_title, 'No rescaling.']) axes = scaled.plot(subplots=True, title=ts_title, figsize=(18, 8)) # these are matplotlib.patch.Patch properties props = dict(facecolor='white', alpha=0) columns = ('ascat-insitu', 'ascat-gldas', 'insitu-gldas') row_labels = [ 'bias', 'pearson R', 'kendall tau', 'unbiased RMSD', 'variance ratio' ] cell_text = [] for metric in metrics: metric_values = metrics[metric] if type(metric_values) == tuple: metric_values = metric_values[0] metric_values = metric_values._asdict() cell_text.append([ "%.2f" % metric_values['ascat_and_insitu'], "%.2f" % metric_values['ascat_and_gldas'], "%.2f" % metric_values['insitu_and_gldas'] ]) table = plt.table(cellText=cell_text, colLabels=columns, colWidths=[0.1, 0.1, 0.1], rowLabels=row_labels, loc='bottom', bbox=(0.2, -1.25, 0.5, 0.8)) tcol_table = plt.table(cellText=[[ "%.2f" % tcol_error['ascat'], "%.2f" % tcol_error['gldas'], "%.2f" % tcol_error['insitu'] ]], colLabels=('ascat', 'gldas', 'insitu'), colWidths=[0.1, 0.1, 0.1], rowLabels=['Triple collocation error'], loc='bottom', bbox=(0.2, -1.65, 0.5, 0.3)) plt.subplots_adjust(left=0.08, bottom=0.35) axes = scatter_matrix(scaled) axes.flat[0].figure.suptitle(ts_title) # only draw 1:1 line if scaling was applied if rescaling is not None: for j, ax in enumerate(axes.flatten()): if np.remainder(j + 1, 3 + 1) != 1: min_x, max_x = ax.get_xlim() min_y, max_y = ax.get_ylim() # find minimum lower left coordinate and maximum upper right min_ll = min([min_x, min_y]) max_ur = max([max_x, max_y]) ax.plot([min_ll, max_ur], [min_ll, max_ur], '--', c='0.6') return df
# focus only on the relevant variable ascat_time_series.data = ascat_time_series.data[label_ascat] # drop nan values before doing any matching ascat_time_series.data = ascat_time_series.data.dropna() ISMN_time_series.data = ISMN_time_series.data.dropna() # rename the soil moisture column in ISMN_time_series.data to insitu_sm # to clearly differentiate the time series when they are plotted together ISMN_time_series.data.rename(columns={'soil moisture':label_insitu}, inplace=True) # get ISMN data that was observerd within +- 1 hour(1/24. day) of the ASCAT observation # do not include those indexes where no observation was found matched_data = temp_match.matching(ascat_time_series.data, ISMN_time_series.data, window=1 / 24.) # matched ISMN data is now a dataframe with the same datetime index # as ascat_time_series.data and the nearest insitu observation # continue only with relevant columns matched_data = matched_data[[label_ascat, label_insitu]] # the plot shows that ISMN and ASCAT are observed in different units matched_data.plot(figsize=(15, 5), secondary_y=[label_ascat], title='temporally merged data') plt.show() # this takes the matched_data DataFrame and scales all columns to the # column with the given reference_index, in this case in situ scaled_data = scaling.scale(matched_data, method='lin_cdf_match', reference_index=1)
def optimise(params, timespan=('2009-01', '2009-12'), gpi=None, rescaling=None): """ This function is optimising the parameters vegetation water content 'm_veg', soil moisture 'm_soil' and, if specified, a third optional parameter. The third optional parameter can eitehr be sand 'sand', clay 'clay', fractional root mean square height 'f_rms', stem volume 's_vol' or temperature 'temp'. Parameters ---------- params : list of dicts Model parameters. At least four of the following parameters needs to be specified if an optional parameter has been selected, otherwise all of them needs to be specified: 'sand', 'clay', 'f_rms', 'temp', 's_vol' gpi : int, optional Grid point index. If specified, it will read data from datapool. Returns ------- df : pandas.DataFrame Optimised soil moisture, vegetation water concent and, if specified, optional optimised parameter. """ if gpi is None: ts_resam = pd.read_csv(os.path.join("data", "2011528_2009.csv"), index_col=0, parse_dates=True)[timespan[0]:timespan[1]] gpi = 2011528 else: ts_resam = read_resam(gpi)[timespan[0]:timespan[1]] m_veg_x0 = params.pop('m_veg_x0') m_soil_x0 = params.pop('m_soil_x0') columns = ['m_veg', 'm_soil'] x0 = np.array([m_veg_x0, m_soil_x0]) df = pd.DataFrame(index=ts_resam.index, columns=columns) df = df.fillna(np.nan) # optimise m_soil and m_veg for index, row in ts_resam.iterrows(): ascat_inc = np.array(row[['incf', 'incm', 'inca']].tolist()) ascat_sig = \ db2lin(np.array(row[['sigf', 'sigm', 'siga']].tolist())) args = (ascat_inc, ascat_sig, params, '') res = minimize(sig_sqr_diff, x0, args=args, method='Nelder-Mead') if res['success'] == True: df['m_veg'][index] = res['x'][0] df['m_soil'][index] = res['x'][1] str_static_p = \ ', '.join("%s: %r" % t for t in locals().iteritems()) str_static_p += ",\nm_veg_x0 = {:.2f}, m_soil_x0 = {:.2f}".format(m_veg_x0, m_soil_x0) ismn_file = os.path.join('data', 'ARM_ARM_Larned_sm_0.050000_0.050000_Water-Matric-Potential-Sensor-229L-W_20090101_20140527.stm') ismn_data = ismn_readers.read_data(ismn_file) insitu = pd.DataFrame(ismn_data.data['soil moisture']).rename(columns={'soil moisture': 'insitu'}) gldas = pd.read_csv(os.path.join('data', 'GLDAS_737602.csv'), parse_dates=True, index_col=0) gldas.rename(columns={'086_L1': 'gldas'}, inplace=True) gldas = pd.DataFrame(gldas['gldas']) ascat = pd.DataFrame(df['m_soil']).rename(columns={'m_soil': 'ascat'}) matched = temp_match.matching(ascat, insitu, gldas) if rescaling is not None: scaled = scaling.scale(matched, rescaling, reference_index=1) else: scaled = matched metrics = OrderedDict() metrics['bias'] = df_metrics.bias(scaled) metrics['pearson'] = df_metrics.pearsonr(scaled) metrics['kendall'] = df_metrics.kendalltau(scaled) metrics['ubrmsd'] = df_metrics.ubrmsd(scaled) metrics['var_ratio'] = df_var_ratio(scaled) tcol_error = df_metrics.tcol_error(scaled)._asdict() ts_title = "Soil moisture. " if rescaling is not None: ts_title = ' '.join([ts_title, 'Rescaling: %s.' % rescaling]) else: ts_title = ' '.join([ts_title, 'No rescaling.']) axes = scaled.plot(subplots=True, title=ts_title, figsize=(18, 8)) # these are matplotlib.patch.Patch properties props = dict(facecolor='white', alpha=0) columns = ('ascat-insitu', 'ascat-gldas', 'insitu-gldas') row_labels = ['bias', 'pearson R', 'kendall tau', 'unbiased RMSD', 'variance ratio'] cell_text = [] for metric in metrics: metric_values = metrics[metric] if type(metric_values) == tuple: metric_values = metric_values[0] metric_values = metric_values._asdict() cell_text.append(["%.2f" % metric_values['ascat_and_insitu'], "%.2f" % metric_values['ascat_and_gldas'], "%.2f" % metric_values['insitu_and_gldas']]) table = plt.table( cellText=cell_text, colLabels=columns, colWidths=[0.1, 0.1, 0.1], rowLabels=row_labels, loc='bottom', bbox=(0.2, -1.25, 0.5, 0.8)) tcol_table = plt.table( cellText=[["%.2f" % tcol_error['ascat'], "%.2f" % tcol_error['gldas'], "%.2f" % tcol_error['insitu']]], colLabels=('ascat', 'gldas', 'insitu'), colWidths=[0.1, 0.1, 0.1], rowLabels=['Triple collocation error'], loc='bottom', bbox=(0.2, -1.65, 0.5, 0.3)) plt.subplots_adjust(left=0.08, bottom=0.35) axes = scatter_matrix(scaled) axes.flat[0].figure.suptitle(ts_title) # only draw 1:1 line if scaling was applied if rescaling is not None: for j, ax in enumerate(axes.flatten()): if np.remainder(j + 1, 3 + 1) != 1: min_x, max_x = ax.get_xlim() min_y, max_y = ax.get_ylim() # find minimum lower left coordinate and maximum upper right min_ll = min([min_x, min_y]) max_ur = max([max_x, max_y]) ax.plot([min_ll, max_ur], [min_ll, max_ur], '--', c='0.6') return df