Example #1
0
def test_ubrmsd():
    """
    Test for ubrmsd
    """
    # example 1
    x = np.arange(10)
    y = np.arange(10) + 2

    ubrmsd_pred = 0
    ubrmsd_obs = met.ubrmsd(x, y)

    nptest.assert_equal(ubrmsd_obs, ubrmsd_pred)
    # aslo check consistency with direct formula
    ubrmsd_direct = np.sqrt(met.rmsd(x, y)**2 - met.bias(x, y)**2)
    nptest.assert_equal(ubrmsd_obs, ubrmsd_direct)

    # example 2, with outlier
    x = np.arange(10)
    y = np.arange(10) + 2
    y[-1] = 100.

    ubrmsd_pred = 26.7
    ubrmsd_obs = met.ubrmsd(x, y)

    nptest.assert_almost_equal(ubrmsd_obs, ubrmsd_pred, 6)
    # aslo check consistency with direct formula
    ubrmsd_direct = np.sqrt(met.rmsd(x, y)**2 - met.bias(x, y)**2)
    nptest.assert_almost_equal(ubrmsd_obs, ubrmsd_direct)
Example #2
0
def test_ubrmsd():
    """
    Test for ubrmsd
    """
    # example 1
    x = np.arange(10)
    y = np.arange(10) + 2

    ubrmsd_pred = 0
    ubrmsd_obs = met.ubrmsd(x, y)

    nptest.assert_equal(ubrmsd_obs, ubrmsd_pred)
    # aslo check consistency with direct formula
    ubrmsd_direct = np.sqrt(met.rmsd(x, y) ** 2 - met.bias(x, y)**2)
    nptest.assert_equal(ubrmsd_obs, ubrmsd_direct)

    # example 2, with outlier
    x = np.arange(10)
    y = np.arange(10) + 2
    y[-1] = 100.

    ubrmsd_pred = 26.7
    ubrmsd_obs = met.ubrmsd(x, y)

    nptest.assert_almost_equal(ubrmsd_obs, ubrmsd_pred, 6)
    # aslo check consistency with direct formula
    ubrmsd_direct = np.sqrt(met.rmsd(x, y) ** 2 - met.bias(x, y)**2)
    nptest.assert_almost_equal(ubrmsd_obs, ubrmsd_direct)
Example #3
0
def test_RollingMetrics():
    """
    Test RollingMetrics.
    """
    df = make_some_data()
    df['ref'] += np.random.rand(len(df))
    df['k1'] += np.random.rand(len(df))
    data = df[['ref', 'k1']]

    with warnings.catch_warnings():
        warnings.simplefilter("ignore")  # many warnings due to test data

        metriccalc = RollingMetrics(other_name='k1')
        dataset = metriccalc.calc_metrics(data,
                                          gpi_info=(0, 0, 0),
                                          center=False)

    # test pearson r
    ref_array = df['ref'].rolling('30d').corr(df['k1'])
    np.testing.assert_almost_equal(dataset['R'][0], ref_array.values)

    # test rmsd
    indexer = np.arange(30)[None, :] + np.arange(len(df) - 30)[:, None]
    rmsd_arr = []
    for i in range(indexer.shape[0]):
        rmsd_arr.append(
            metrics.rmsd(df['ref'][indexer[i, :]], df['k1'][indexer[i, :]]))

    rmsd_arr = np.array(rmsd_arr)
    np.testing.assert_almost_equal(dataset['RMSD'][0][29:-1], rmsd_arr)
Example #4
0
def test_RollingMetrics():
    """
    Test RollingMetrics.
    """
    df = make_some_data()
    df["ref"] += np.random.rand(len(df))
    df["k1"] += np.random.rand(len(df))
    data = df[["ref", "k1"]]

    metriccalc = RollingMetrics(other_name="k1")
    dataset = metriccalc.calc_metrics(data, gpi_info=(0, 0, 0), center=False)

    # test pearson r
    ref_array = df["ref"].rolling("30d").corr(df["k1"])
    np.testing.assert_almost_equal(dataset["R"][0], ref_array.values)

    # test rmsd
    indexer = np.arange(30)[None, :] + np.arange(len(df) - 30)[:, None]
    rmsd_arr = []
    for i in range(indexer.shape[0]):
        rmsd_arr.append(
            metrics.rmsd(df["ref"][indexer[i, :]].values,
                         df["k1"][indexer[i, :]].values))

    rmsd_arr = np.array(rmsd_arr)
    np.testing.assert_almost_equal(dataset["RMSD"][0][29:-1], rmsd_arr)
Example #5
0
def test_rmsd_mse():
    """
    Test for rmsd and mse
    """
    # example 1
    x = np.random.randn(1000)
    y = np.random.randn(1000)

    rmsd_pred = met.rmsd(x, y)
    mse_pred, _, _, _ = met.mse(x, y)

    nptest.assert_almost_equal(rmsd_pred ** 2, mse_pred, 6)
def test_rmsd_mse():
    """
    Test for rmsd and mse
    """
    # example 1
    x = np.random.randn(1000)
    y = np.random.randn(1000)

    rmsd_pred = met.rmsd(x, y)
    mse_pred, _, _, _ = met.mse(x, y)

    nptest.assert_almost_equal(rmsd_pred**2, mse_pred, 6)
Example #7
0
def test_rmsd():
    """
    Test for rmsd
    """
    # example 1
    x = np.arange(10)
    y = np.arange(10) + 2

    rmsd_pred = 2.
    rmsd_obs = met.rmsd(x, y)

    nptest.assert_equal(rmsd_obs, rmsd_pred)

    # example 2, with outlier
    x = np.arange(10)
    y = np.arange(10) + 2
    y[-1] = 100.

    rmsd_pred = np.sqrt(831.7)
    rmsd_obs = met.rmsd(x, y)

    nptest.assert_almost_equal(rmsd_obs, rmsd_pred, 6)
Example #8
0
def test_rmsd():
    """
    Test for rmsd
    """
    # example 1
    x = np.arange(10)
    y = np.arange(10) + 2

    rmsd_pred = 2.
    rmsd_obs = met.rmsd(x, y)

    nptest.assert_equal(rmsd_obs, rmsd_pred)

    # example 2, with outlier
    x = np.arange(10)
    y = np.arange(10) + 2
    y[-1] = 100.

    rmsd_pred = np.sqrt(831.7)
    rmsd_obs = met.rmsd(x, y)

    nptest.assert_almost_equal(rmsd_obs, rmsd_pred, 6)
Example #9
0
    def calc_metrics(self, data, gpi_info):
        """
        calculates the desired statistics

        Parameters
        ----------
        data : pandas.DataFrame
            with 2 columns, the first column is the reference dataset
            named 'ref'
            the second column the dataset to compare against named 'other'
        gpi_info : tuple
            of (gpi, lon, lat)

        Notes
        -----
        Kendall tau is calculation is optional at the moment
        because the scipy implementation is very slow which is problematic for
        global comparisons
        """
        dataset = copy.deepcopy(self.result_template)

        dataset["n_obs"][0] = len(data)
        dataset["gpi"][0] = gpi_info[0]
        dataset["lon"][0] = gpi_info[1]
        dataset["lat"][0] = gpi_info[2]

        if len(data) < 10:
            return dataset

        x, y = data["ref"].values, data[self.other_name].values
        R, p_R = metrics.pearsonr(x, y)
        rho, p_rho = metrics.spearmanr(x, y)
        RMSD = metrics.rmsd(x, y)
        BIAS = metrics.bias(x, y)

        dataset["R"][0], dataset["p_R"][0] = R, p_R
        dataset["rho"][0], dataset["p_rho"][0] = rho, p_rho
        dataset["RMSD"][0] = RMSD
        dataset["BIAS"][0] = BIAS

        if self.calc_tau:
            tau, p_tau = metrics.kendalltau(x, y)
            dataset["tau"][0], dataset["p_tau"][0] = tau, p_tau

        return dataset
Example #10
0
    def calc_metrics(self, data, gpi_info):
        """
        calculates the desired statistics

        Parameters
        ----------
        data : pandas.DataFrame
            with 2 columns, the first column is the reference dataset
            named 'ref'
            the second column the dataset to compare against named 'other'
        gpi_info : tuple
            of (gpi, lon, lat)

        Notes
        -----
        Kendall tau is calculation is optional at the moment
        because the scipy implementation is very slow which is problematic for
        global comparisons
        """
        dataset = copy.deepcopy(self.result_template)

        dataset['n_obs'][0] = len(data)
        dataset['gpi'][0] = gpi_info[0]
        dataset['lon'][0] = gpi_info[1]
        dataset['lat'][0] = gpi_info[2]

        if len(data) < 10:
            return dataset

        x, y = data['ref'].values, data[self.other_name].values
        R, p_R = metrics.pearsonr(x, y)
        rho, p_rho = metrics.spearmanr(x, y)
        RMSD = metrics.rmsd(x, y)
        BIAS = metrics.bias(x, y)

        dataset['R'][0], dataset['p_R'][0] = R, p_R
        dataset['rho'][0], dataset['p_rho'][0] = rho, p_rho
        dataset['RMSD'][0] = RMSD
        dataset['BIAS'][0] = BIAS

        if self.calc_tau:
            tau, p_tau = metrics.kendalltau(x, y)
            dataset['tau'][0], dataset['p_tau'][0] = tau, p_tau

        return dataset
Example #11
0
    def calc_metrics(self, data, gpi_info):
        """
        calculates the desired statistics

        Parameters
        ----------
        data : pandas.DataFrame
            with 2 columns, the first column is the reference dataset
            named 'ref'
            the second column the dataset to compare against named 'other'
        gpi_info : tuple
            of (gpi, lon, lat)

        Notes
        -----
        Kendall tau is not calculated at the moment
        because the scipy implementation is very slow which is problematic for
        global comparisons
        """
        dataset = copy.deepcopy(self.result_template)

        dataset['n_obs'][0] = len(data)
        dataset['gpi'][0] = gpi_info[0]
        dataset['lon'][0] = gpi_info[1]
        dataset['lat'][0] = gpi_info[2]

        if len(data) < 10:
            return dataset

        x, y = data['ref'].values, data['other'].values
        R, p_R = metrics.pearsonr(x, y)
        rho, p_rho = metrics.spearmanr(x, y)
        # tau, p_tau = metrics.kendalltau(x, y)
        RMSD = metrics.rmsd(x, y)
        BIAS = metrics.bias(x, y)

        dataset['R'][0], dataset['p_R'][0] = R, p_R
        dataset['rho'][0], dataset['p_rho'][0] = rho, p_rho
        # dataset['tau'][0], dataset['p_tau'][0] = tau, p_tau
        dataset['RMSD'][0] = RMSD
        dataset['BIAS'][0] = BIAS

        return dataset
Example #12
0
        plt.scatter(matched_data[scaled_ascat_label].values,matched_data[label_insitu].values)
        plt.xlabel(scaled_ascat_label)
        plt.ylabel(label_insitu)
        plt.show()
        
        #calculate correlation coefficients, RMSD, bias, Nash Sutcliffe
        x, y = matched_data[scaled_ascat_label].values, matched_data[label_insitu].values
        
        print "ISMN time series:",ISMN_time_series
        print "compared to"
        print ascat_time_series
        print "Results:"
        print "Pearson's (R,p_value)", metrics.pearsonr(x, y)
        print "Spearman's (rho,p_value)", metrics.spearmanr(x, y)
        print "Kendalls's (tau,p_value)", metrics.kendalltau(x, y)
        print "RMSD", metrics.rmsd(x, y)
        print "Bias", metrics.bias(x, y)
        print "Nash Sutcliffe", metrics.nash_sutcliffe(x, y)
        
        
    i += 1
    
    #only show the first 2 stations, otherwise this program would run a long time
    #and produce a lot of plots
    if i >= 2:
        break    
    



Example #13
0
def compare_data(ismn_data, validation_data, scaling='linreg', anomaly=None):
    """
    Compare data from an ISMN station to the defined validation datasets.

    Parameters
    ----------
    ismn_data: pandas.Dataframe
        Data from the ISMN used as a reference
    validation_data: dict
        Dictionary of pandas.DataFrames, One for each dataset to
        compare against
    scaling: string, optional
        Scaling method to use.
    anomaly: string
        If set then the validation is done for anomalies.
    """
    insitu_label = 'soil moisture'

    if anomaly != None:

        if anomaly == 'climatology':
            ascat_clim = anomaly_calc.calc_climatology(
                ascat_masked[ascat_label])
            insitu_clim = anomaly_calc.calc_climatology(
                ismn_data['soil moisture'])

            ascat_anom = anomaly_calc.calc_anomaly(ascat_masked[ascat_label],
                                                   climatology=ascat_clim)
            ascat_masked[ascat_label] = ascat_anom.values

            insitu_anom = anomaly_calc.calc_anomaly(ISMN_data['insitu'],
                                                    climatology=insitu_clim)
            ISMN_data['insitu'] = insitu_anom.values

        if anomaly == 'average':
            ascat_anom = anomaly_calc.calc_anomaly(ascat_masked[ascat_label])
            ascat_masked[ascat_label] = ascat_anom.values

            insitu_anom = anomaly_calc.calc_anomaly(ISMN_data['insitu'])
            ISMN_data['insitu'] = insitu_anom.values

        ascat_masked = ascat_masked.dropna()
        ISMN_data = ISMN_data.dropna()

    for dname in validation_data:
        vdata = validation_data[dname]
        vdata_label = 'cci_sm'

        matched_data = temp_match.matching(ismn_data, vdata, window=1)

        if scaling != 'noscale' and scaling != 'porosity':

            scaled_data = scale.add_scaled(matched_data,
                                           label_in=vdata_label,
                                           label_scale=insitu_label,
                                           method=scaling)

            scaled_label = vdata_label + '_scaled_' + scaling

            scaled_data = scaled_data[[insitu_label, scaled_label]]

        elif scaling == 'noscale':
            scaled_data = matched_data[[insitu_label, vdata_label]]
            scaled_label = vdata_label

    # scaled_data.rename(columns={'insitu': ISMN_ts_name}, inplace=True)

    labels, values = scaled_data.to_dygraph_format()

    ascat_insitu = {'labels': labels, 'data': values}

    x, y = scaled_data[insitu_label].values, scaled_data[scaled_label].values

    kendall, p_kendall = sc_stats.kendalltau(x.tolist(), y.tolist())
    spearman, p_spearman = sc_stats.spearmanr(x, y)
    pearson, p_pearson = sc_stats.pearsonr(x, y)
    rmsd = metrics.rmsd(x, y)
    bias = metrics.bias(y, x)
    mse, mse_corr, mse_bias, mse_var = metrics.mse(x, y)
    statistics = {
        'kendall': {
            'v': '%.2f' % kendall,
            'p': '%.4f' % p_kendall
        },
        'spearman': {
            'v': '%.2f' % spearman,
            'p': '%.4f' % p_spearman
        },
        'pearson': {
            'v': '%.2f' % pearson,
            'p': '%.4f' % p_pearson
        },
        'bias': '%.4f' % bias,
        'rmsd': {
            'rmsd': '%.4f' % np.sqrt(mse),
            'rmsd_corr': '%.4f' % np.sqrt(mse_corr),
            'rmsd_bias': '%.4f' % np.sqrt(mse_bias),
            'rmsd_var': '%.4f' % np.sqrt(mse_var)
        },
        'mse': {
            'mse': '%.4f' % mse,
            'mse_corr': '%.4f' % mse_corr,
            'mse_bias': '%.4f' % mse_bias,
            'mse_var': '%.4f' % mse_var
        }
    }

    scaling_options = {
        'noscale': 'No scaling',
        'porosity': 'Scale using porosity',
        'linreg': 'Linear Regression',
        'mean_std': 'Mean - standard deviation',
        'min_max': 'Minimum,maximum',
        'lin_cdf_match': 'Piecewise <br> linear CDF matching',
        'cdf_match': 'CDF matching'
    }

    settings = {
        'scaling': scaling_options[scaling],
        # 'snow_depth': mask['snow_depth'],
        # 'surface_temp': mask['st_l1'],
        # 'air_temp': mask['air_temp']
    }

    era_data = {'labels': [], 'data': []}
    output_data = {
        'validation_data': ascat_insitu,
        'masking_data': era_data,
        'statistics': statistics,
        'settings': settings
    }

    return output_data, 1
Example #14
0
        scaled_data.plot(secondary_y=[label_ascat])
        plt.show()

        plt.scatter(matched_data[scaled_ascat_label].values,
                    matched_data[label_insitu].values)
        plt.xlabel(scaled_ascat_label)
        plt.ylabel(label_insitu)
        plt.show()

        #calculate correlation coefficients, RMSD, bias, Nash Sutcliffe
        x, y = matched_data[scaled_ascat_label].values, matched_data[
            label_insitu].values

        print "ISMN time series:", ISMN_time_series
        print "compared to"
        print ascat_time_series
        print "Results:"
        print "Pearson's (R,p_value)", metrics.pearsonr(x, y)
        print "Spearman's (rho,p_value)", metrics.spearmanr(x, y)
        print "Kendalls's (tau,p_value)", metrics.kendalltau(x, y)
        print "RMSD", metrics.rmsd(x, y)
        print "Bias", metrics.bias(x, y)
        print "Nash Sutcliffe", metrics.nash_sutcliffe(x, y)

    i += 1

    #only show the first 2 stations, otherwise this program would run a long time
    #and produce a lot of plots
    if i >= 2:
        break
Example #15
0
    def _calc_validation_metrics(self):
        """
        Calculate vertical metrics between candidate and reference using pytesmo.

        Currently implemented:
            bias, mad, rmsd, nrmsd,
        Returns
        -------
        df_validation_metrics: pd.DataFrame
            Data Frame that contains the metrics between the candidate and reference
            for the 2 groups
        """
        df_validation_metrics = pd.DataFrame()

        for group_no, subset_data in enumerate([self.set0, self.set1, self.setfull]):
            if group_no in [0,1]:
                group = 'group%i' % group_no
            else:
                group = 'FRAME'
            if 'bias' in self.metrics:
                if any([subset_data[col].empty for col in [self.candidate_name, self.reference_name]]):
                    bias = np.nan
                else:
                    bias =metrics.bias(subset_data[self.reference_name].values,
                                       subset_data[self.candidate_name].values)
                df_validation_metrics.at['bias', '%s' % group] = bias

            if 'mad' in self.metrics:
                if any([subset_data[col].empty for col in [self.candidate_name, self.reference_name]]):
                    mad = np.nan
                else:
                    mad =metrics.mad(subset_data[self.reference_name].values,
                                     subset_data[self.candidate_name].values)
                df_validation_metrics.at['mad', '%s' % group] = mad

            if 'rmsd' in self.metrics:
                if any([subset_data[col].empty for col in [self.candidate_name, self.reference_name]]):
                    rmsd = np.nan
                else:
                    rmsd =metrics.rmsd(subset_data[self.reference_name].values,
                                       subset_data[self.candidate_name].values)
                df_validation_metrics.at['rmsd', '%s' % group] = rmsd

            if 'nrmsd' in self.metrics:
                if any([subset_data[col].empty for col in [self.candidate_name, self.reference_name]]):
                    nrmsd = np.nan
                else:
                    nrmsd =metrics.nrmsd(subset_data[self.reference_name].values,
                                         subset_data[self.candidate_name].values)
                df_validation_metrics.at['nrmsd', '%s' % group] = nrmsd

            if 'PearsonR' in self.metrics:
                if any([subset_data[col].empty for col in [self.candidate_name, self.reference_name]]):
                    pr, pp = np.nan, np.nan
                else:
                    with warnings.catch_warnings():  # supress scipy warnings
                        warnings.filterwarnings('ignore')
                        pr, pp =metrics.pearsonr(subset_data[self.reference_name].values,
                                                 subset_data[self.candidate_name].values)

                df_validation_metrics.at['PearsonR', '%s' % group] = pr
                df_validation_metrics.at['Pp', '%s' % group] = pp

            if 'SpearmanR' in self.metrics:
                if any([subset_data[col].empty for col in [self.candidate_name, self.reference_name]]):
                    sr, sp = np.nan, np.nan
                else:
                    with warnings.catch_warnings():  # supress scipy warnings
                        warnings.filterwarnings('ignore')
                        sr, sp = metrics.spearmanr(subset_data[self.reference_name].values,
                                                   subset_data[self.candidate_name].values)

                df_validation_metrics.at['SpearmanR', '%s' % group] = sr
                df_validation_metrics.at['Sp', '%s' % group] = sp

        return df_validation_metrics