Exemple #1
0
def test_add_scale(method):

    n = 1000
    x = np.arange(n, dtype=float)
    y = np.arange(n) * 0.5

    df = pd.DataFrame({'x': x, 'y': y}, columns=['x', 'y'])
    if method in ["lin_cdf_match", "cdf_match"]:
        with pytest.deprecated_call():
            df_scaled = scaling.add_scaled(df, method=method)
            # test the scaling the other way round
            df_scaled2 = scaling.add_scaled(df,
                                            method=method,
                                            label_in='y',
                                            label_scale='x')
    else:
        df_scaled = scaling.add_scaled(df, method=method)
        # test the scaling the other way round
        df_scaled2 = scaling.add_scaled(df,
                                        method=method,
                                        label_in='y',
                                        label_scale='x')
    nptest.assert_almost_equal(df_scaled['y'].values,
                               df_scaled['x_scaled_' + method].values)

    nptest.assert_almost_equal(df_scaled2['x'].values,
                               df_scaled2['y_scaled_' + method].values)
Exemple #2
0
def test_add_scale(method):

    n = 1000
    x = np.arange(n, dtype=np.float)
    y = np.arange(n) * 0.5

    df = pd.DataFrame({'x': x, 'y': y}, columns=['x', 'y'])
    df_scaled = scaling.add_scaled(df, method=method)
    nptest.assert_almost_equal(df_scaled['y'].values,
                               df_scaled['x_scaled_' + method].values)

    # test the scaling the other way round
    df_scaled = scaling.add_scaled(df, method=method,
                                   label_in='y',
                                   label_scale='x')
    nptest.assert_almost_equal(df_scaled['x'].values,
                               df_scaled['y_scaled_' + method].values)
Exemple #3
0
def test_add_scale(method):

    n = 1000
    x = np.arange(n, dtype=float)
    y = np.arange(n) * 0.5

    df = pd.DataFrame({'x': x, 'y': y}, columns=['x', 'y'])
    df_scaled = scaling.add_scaled(df, method=method)
    nptest.assert_almost_equal(df_scaled['y'].values,
                               df_scaled['x_scaled_' + method].values)

    # test the scaling the other way round
    df_scaled = scaling.add_scaled(df, method=method,
                                   label_in='y',
                                   label_scale='x')
    nptest.assert_almost_equal(df_scaled['x'].values,
                               df_scaled['y_scaled_' + method].values)
Exemple #4
0
def test_add_scale_error(method):

    n = 1000
    x = np.arange(n, dtype=np.float)
    y = np.arange(n) * 0.5

    df = pd.DataFrame({'x': x, 'y': y}, columns=['x', 'y'])
    with pytest.raises(KeyError):
        df_scaled = scaling.add_scaled(df, method=method)
        nptest.assert_almost_equal(df_scaled['y'].values,
                                   df_scaled['x_scaled_' + method].values)
Exemple #5
0
def test_add_scale_error(method):

    n = 1000
    x = np.arange(n, dtype=np.float)
    y = np.arange(n) * 0.5

    df = pd.DataFrame({'x': x, 'y': y}, columns=['x', 'y'])
    with pytest.raises(KeyError):
        df_scaled = scaling.add_scaled(df, method=method)
        nptest.assert_almost_equal(df_scaled['y'].values,
                                   df_scaled['x_scaled_' + method].values)
Exemple #6
0
 #matched ISMN data is now a dataframe with the same datetime index
 #as ascat_time_series.data and the nearest insitu observation
 
 #temporal matching also includes distance information
 #but we are not interested in it right now so let's drop it
 matched_ISMN_data = matched_ISMN_data.drop(['distance'],axis=1)
 
 #this joins the SSM column of the ASCAT data to the matched ISMN data
 matched_data = matched_ISMN_data.join(ascat_time_series.data[label_ascat])       
 
 #the plot shows that ISMN and ASCAT are observed in different units
 matched_data.plot(secondary_y=[label_ascat])
 plt.show()
 
 #this takes the matched_data DataFrame and adds a column 
 scaled_data = scaling.add_scaled(matched_data, method='lin_cdf_match',
                                  label_in=label_ascat,label_scale=label_insitu)
 #the label of the scaled data is construced as label_in+'_scaled_'+method
 scaled_ascat_label = label_ascat+'_scaled_'+'lin_cdf_match'
 #now the scaled ascat data and insitu_sm are in the same space    
 scaled_data.plot(secondary_y=[label_ascat])
 plt.show()
 
 plt.scatter(matched_data[scaled_ascat_label].values,matched_data[label_insitu].values)
 plt.xlabel(scaled_ascat_label)
 plt.ylabel(label_insitu)
 plt.show()
 
 #calculate correlation coefficients, RMSD, bias, Nash Sutcliffe
 x, y = matched_data[scaled_ascat_label].values, matched_data[label_insitu].values
 
 print "ISMN time series:",ISMN_time_series
Exemple #7
0
def compare_data(ismn_data, validation_data, scaling='linreg', anomaly=None):
    """
    Compare data from an ISMN station to the defined validation datasets.

    Parameters
    ----------
    ismn_data: pandas.Dataframe
        Data from the ISMN used as a reference
    validation_data: dict
        Dictionary of pandas.DataFrames, One for each dataset to
        compare against
    scaling: string, optional
        Scaling method to use.
    anomaly: string
        If set then the validation is done for anomalies.
    """
    insitu_label = 'soil moisture'

    if anomaly != None:

        if anomaly == 'climatology':
            ascat_clim = anomaly_calc.calc_climatology(
                ascat_masked[ascat_label])
            insitu_clim = anomaly_calc.calc_climatology(
                ismn_data['soil moisture'])

            ascat_anom = anomaly_calc.calc_anomaly(ascat_masked[ascat_label],
                                                   climatology=ascat_clim)
            ascat_masked[ascat_label] = ascat_anom.values

            insitu_anom = anomaly_calc.calc_anomaly(ISMN_data['insitu'],
                                                    climatology=insitu_clim)
            ISMN_data['insitu'] = insitu_anom.values

        if anomaly == 'average':
            ascat_anom = anomaly_calc.calc_anomaly(ascat_masked[ascat_label])
            ascat_masked[ascat_label] = ascat_anom.values

            insitu_anom = anomaly_calc.calc_anomaly(ISMN_data['insitu'])
            ISMN_data['insitu'] = insitu_anom.values

        ascat_masked = ascat_masked.dropna()
        ISMN_data = ISMN_data.dropna()

    for dname in validation_data:
        vdata = validation_data[dname]
        vdata_label = 'cci_sm'

        matched_data = temp_match.matching(ismn_data, vdata, window=1)

        if scaling != 'noscale' and scaling != 'porosity':

            scaled_data = scale.add_scaled(matched_data,
                                           label_in=vdata_label,
                                           label_scale=insitu_label,
                                           method=scaling)

            scaled_label = vdata_label + '_scaled_' + scaling

            scaled_data = scaled_data[[insitu_label, scaled_label]]

        elif scaling == 'noscale':
            scaled_data = matched_data[[insitu_label, vdata_label]]
            scaled_label = vdata_label

    # scaled_data.rename(columns={'insitu': ISMN_ts_name}, inplace=True)

    labels, values = scaled_data.to_dygraph_format()

    ascat_insitu = {'labels': labels, 'data': values}

    x, y = scaled_data[insitu_label].values, scaled_data[scaled_label].values

    kendall, p_kendall = sc_stats.kendalltau(x.tolist(), y.tolist())
    spearman, p_spearman = sc_stats.spearmanr(x, y)
    pearson, p_pearson = sc_stats.pearsonr(x, y)
    rmsd = metrics.rmsd(x, y)
    bias = metrics.bias(y, x)
    mse, mse_corr, mse_bias, mse_var = metrics.mse(x, y)
    statistics = {
        'kendall': {
            'v': '%.2f' % kendall,
            'p': '%.4f' % p_kendall
        },
        'spearman': {
            'v': '%.2f' % spearman,
            'p': '%.4f' % p_spearman
        },
        'pearson': {
            'v': '%.2f' % pearson,
            'p': '%.4f' % p_pearson
        },
        'bias': '%.4f' % bias,
        'rmsd': {
            'rmsd': '%.4f' % np.sqrt(mse),
            'rmsd_corr': '%.4f' % np.sqrt(mse_corr),
            'rmsd_bias': '%.4f' % np.sqrt(mse_bias),
            'rmsd_var': '%.4f' % np.sqrt(mse_var)
        },
        'mse': {
            'mse': '%.4f' % mse,
            'mse_corr': '%.4f' % mse_corr,
            'mse_bias': '%.4f' % mse_bias,
            'mse_var': '%.4f' % mse_var
        }
    }

    scaling_options = {
        'noscale': 'No scaling',
        'porosity': 'Scale using porosity',
        'linreg': 'Linear Regression',
        'mean_std': 'Mean - standard deviation',
        'min_max': 'Minimum,maximum',
        'lin_cdf_match': 'Piecewise <br> linear CDF matching',
        'cdf_match': 'CDF matching'
    }

    settings = {
        'scaling': scaling_options[scaling],
        # 'snow_depth': mask['snow_depth'],
        # 'surface_temp': mask['st_l1'],
        # 'air_temp': mask['air_temp']
    }

    era_data = {'labels': [], 'data': []}
    output_data = {
        'validation_data': ascat_insitu,
        'masking_data': era_data,
        'statistics': statistics,
        'settings': settings
    }

    return output_data, 1
Exemple #8
0
        #temporal matching also includes distance information
        #but we are not interested in it right now so let's drop it
        matched_ISMN_data = matched_ISMN_data.drop(['distance'], axis=1)

        #this joins the SSM column of the ASCAT data to the matched ISMN data
        matched_data = matched_ISMN_data.join(
            ascat_time_series.data[label_ascat])

        #the plot shows that ISMN and ASCAT are observed in different units
        matched_data.plot(secondary_y=[label_ascat])
        plt.show()

        #this takes the matched_data DataFrame and adds a column
        scaled_data = scaling.add_scaled(matched_data,
                                         method='lin_cdf_match',
                                         label_in=label_ascat,
                                         label_scale=label_insitu)
        #the label of the scaled data is construced as label_in+'_scaled_'+method
        scaled_ascat_label = label_ascat + '_scaled_' + 'lin_cdf_match'
        #now the scaled ascat data and insitu_sm are in the same space
        scaled_data.plot(secondary_y=[label_ascat])
        plt.show()

        plt.scatter(matched_data[scaled_ascat_label].values,
                    matched_data[label_insitu].values)
        plt.xlabel(scaled_ascat_label)
        plt.ylabel(label_insitu)
        plt.show()

        #calculate correlation coefficients, RMSD, bias, Nash Sutcliffe
        x, y = matched_data[scaled_ascat_label].values, matched_data[