def test_add_scale(method): n = 1000 x = np.arange(n, dtype=float) y = np.arange(n) * 0.5 df = pd.DataFrame({'x': x, 'y': y}, columns=['x', 'y']) if method in ["lin_cdf_match", "cdf_match"]: with pytest.deprecated_call(): df_scaled = scaling.add_scaled(df, method=method) # test the scaling the other way round df_scaled2 = scaling.add_scaled(df, method=method, label_in='y', label_scale='x') else: df_scaled = scaling.add_scaled(df, method=method) # test the scaling the other way round df_scaled2 = scaling.add_scaled(df, method=method, label_in='y', label_scale='x') nptest.assert_almost_equal(df_scaled['y'].values, df_scaled['x_scaled_' + method].values) nptest.assert_almost_equal(df_scaled2['x'].values, df_scaled2['y_scaled_' + method].values)
def test_add_scale(method): n = 1000 x = np.arange(n, dtype=np.float) y = np.arange(n) * 0.5 df = pd.DataFrame({'x': x, 'y': y}, columns=['x', 'y']) df_scaled = scaling.add_scaled(df, method=method) nptest.assert_almost_equal(df_scaled['y'].values, df_scaled['x_scaled_' + method].values) # test the scaling the other way round df_scaled = scaling.add_scaled(df, method=method, label_in='y', label_scale='x') nptest.assert_almost_equal(df_scaled['x'].values, df_scaled['y_scaled_' + method].values)
def test_add_scale(method): n = 1000 x = np.arange(n, dtype=float) y = np.arange(n) * 0.5 df = pd.DataFrame({'x': x, 'y': y}, columns=['x', 'y']) df_scaled = scaling.add_scaled(df, method=method) nptest.assert_almost_equal(df_scaled['y'].values, df_scaled['x_scaled_' + method].values) # test the scaling the other way round df_scaled = scaling.add_scaled(df, method=method, label_in='y', label_scale='x') nptest.assert_almost_equal(df_scaled['x'].values, df_scaled['y_scaled_' + method].values)
def test_add_scale_error(method): n = 1000 x = np.arange(n, dtype=np.float) y = np.arange(n) * 0.5 df = pd.DataFrame({'x': x, 'y': y}, columns=['x', 'y']) with pytest.raises(KeyError): df_scaled = scaling.add_scaled(df, method=method) nptest.assert_almost_equal(df_scaled['y'].values, df_scaled['x_scaled_' + method].values)
#matched ISMN data is now a dataframe with the same datetime index #as ascat_time_series.data and the nearest insitu observation #temporal matching also includes distance information #but we are not interested in it right now so let's drop it matched_ISMN_data = matched_ISMN_data.drop(['distance'],axis=1) #this joins the SSM column of the ASCAT data to the matched ISMN data matched_data = matched_ISMN_data.join(ascat_time_series.data[label_ascat]) #the plot shows that ISMN and ASCAT are observed in different units matched_data.plot(secondary_y=[label_ascat]) plt.show() #this takes the matched_data DataFrame and adds a column scaled_data = scaling.add_scaled(matched_data, method='lin_cdf_match', label_in=label_ascat,label_scale=label_insitu) #the label of the scaled data is construced as label_in+'_scaled_'+method scaled_ascat_label = label_ascat+'_scaled_'+'lin_cdf_match' #now the scaled ascat data and insitu_sm are in the same space scaled_data.plot(secondary_y=[label_ascat]) plt.show() plt.scatter(matched_data[scaled_ascat_label].values,matched_data[label_insitu].values) plt.xlabel(scaled_ascat_label) plt.ylabel(label_insitu) plt.show() #calculate correlation coefficients, RMSD, bias, Nash Sutcliffe x, y = matched_data[scaled_ascat_label].values, matched_data[label_insitu].values print "ISMN time series:",ISMN_time_series
def compare_data(ismn_data, validation_data, scaling='linreg', anomaly=None): """ Compare data from an ISMN station to the defined validation datasets. Parameters ---------- ismn_data: pandas.Dataframe Data from the ISMN used as a reference validation_data: dict Dictionary of pandas.DataFrames, One for each dataset to compare against scaling: string, optional Scaling method to use. anomaly: string If set then the validation is done for anomalies. """ insitu_label = 'soil moisture' if anomaly != None: if anomaly == 'climatology': ascat_clim = anomaly_calc.calc_climatology( ascat_masked[ascat_label]) insitu_clim = anomaly_calc.calc_climatology( ismn_data['soil moisture']) ascat_anom = anomaly_calc.calc_anomaly(ascat_masked[ascat_label], climatology=ascat_clim) ascat_masked[ascat_label] = ascat_anom.values insitu_anom = anomaly_calc.calc_anomaly(ISMN_data['insitu'], climatology=insitu_clim) ISMN_data['insitu'] = insitu_anom.values if anomaly == 'average': ascat_anom = anomaly_calc.calc_anomaly(ascat_masked[ascat_label]) ascat_masked[ascat_label] = ascat_anom.values insitu_anom = anomaly_calc.calc_anomaly(ISMN_data['insitu']) ISMN_data['insitu'] = insitu_anom.values ascat_masked = ascat_masked.dropna() ISMN_data = ISMN_data.dropna() for dname in validation_data: vdata = validation_data[dname] vdata_label = 'cci_sm' matched_data = temp_match.matching(ismn_data, vdata, window=1) if scaling != 'noscale' and scaling != 'porosity': scaled_data = scale.add_scaled(matched_data, label_in=vdata_label, label_scale=insitu_label, method=scaling) scaled_label = vdata_label + '_scaled_' + scaling scaled_data = scaled_data[[insitu_label, scaled_label]] elif scaling == 'noscale': scaled_data = matched_data[[insitu_label, vdata_label]] scaled_label = vdata_label # scaled_data.rename(columns={'insitu': ISMN_ts_name}, inplace=True) labels, values = scaled_data.to_dygraph_format() ascat_insitu = {'labels': labels, 'data': values} x, y = scaled_data[insitu_label].values, scaled_data[scaled_label].values kendall, p_kendall = sc_stats.kendalltau(x.tolist(), y.tolist()) spearman, p_spearman = sc_stats.spearmanr(x, y) pearson, p_pearson = sc_stats.pearsonr(x, y) rmsd = metrics.rmsd(x, y) bias = metrics.bias(y, x) mse, mse_corr, mse_bias, mse_var = metrics.mse(x, y) statistics = { 'kendall': { 'v': '%.2f' % kendall, 'p': '%.4f' % p_kendall }, 'spearman': { 'v': '%.2f' % spearman, 'p': '%.4f' % p_spearman }, 'pearson': { 'v': '%.2f' % pearson, 'p': '%.4f' % p_pearson }, 'bias': '%.4f' % bias, 'rmsd': { 'rmsd': '%.4f' % np.sqrt(mse), 'rmsd_corr': '%.4f' % np.sqrt(mse_corr), 'rmsd_bias': '%.4f' % np.sqrt(mse_bias), 'rmsd_var': '%.4f' % np.sqrt(mse_var) }, 'mse': { 'mse': '%.4f' % mse, 'mse_corr': '%.4f' % mse_corr, 'mse_bias': '%.4f' % mse_bias, 'mse_var': '%.4f' % mse_var } } scaling_options = { 'noscale': 'No scaling', 'porosity': 'Scale using porosity', 'linreg': 'Linear Regression', 'mean_std': 'Mean - standard deviation', 'min_max': 'Minimum,maximum', 'lin_cdf_match': 'Piecewise <br> linear CDF matching', 'cdf_match': 'CDF matching' } settings = { 'scaling': scaling_options[scaling], # 'snow_depth': mask['snow_depth'], # 'surface_temp': mask['st_l1'], # 'air_temp': mask['air_temp'] } era_data = {'labels': [], 'data': []} output_data = { 'validation_data': ascat_insitu, 'masking_data': era_data, 'statistics': statistics, 'settings': settings } return output_data, 1
#temporal matching also includes distance information #but we are not interested in it right now so let's drop it matched_ISMN_data = matched_ISMN_data.drop(['distance'], axis=1) #this joins the SSM column of the ASCAT data to the matched ISMN data matched_data = matched_ISMN_data.join( ascat_time_series.data[label_ascat]) #the plot shows that ISMN and ASCAT are observed in different units matched_data.plot(secondary_y=[label_ascat]) plt.show() #this takes the matched_data DataFrame and adds a column scaled_data = scaling.add_scaled(matched_data, method='lin_cdf_match', label_in=label_ascat, label_scale=label_insitu) #the label of the scaled data is construced as label_in+'_scaled_'+method scaled_ascat_label = label_ascat + '_scaled_' + 'lin_cdf_match' #now the scaled ascat data and insitu_sm are in the same space scaled_data.plot(secondary_y=[label_ascat]) plt.show() plt.scatter(matched_data[scaled_ascat_label].values, matched_data[label_insitu].values) plt.xlabel(scaled_ascat_label) plt.ylabel(label_insitu) plt.show() #calculate correlation coefficients, RMSD, bias, Nash Sutcliffe x, y = matched_data[scaled_ascat_label].values, matched_data[