コード例 #1
0
    def calc_metrics(self, data, gpi_info):
        """
        calculates the desired statistics

        Parameters
        ----------
        data : pandas.DataFrame
            with 3 columns, the first column is the reference dataset
            named 'ref'
            the second and third column are the datasets to compare against
            named 'k1 and k2'
        gpi_info : tuple
            Grid point info (i.e. gpi, lon, lat)
        """
        dataset = super(HSAF_Metrics, self).calc_metrics(data, gpi_info)

        for season in self.seasons:

            if season != 'ALL':
                subset = self.month_to_season[data.index.month] == season
            else:
                subset = np.ones(len(data), dtype=bool)

            # number of observations
            n_obs = subset.sum()
            if n_obs < self.min_obs:
                continue
            dataset['{:}_n_obs'.format(season)][0] = n_obs

            # get single dataset metrics
            # calculate SNR
            x = data[self.df_columns[0]].values[subset]
            y = data[self.df_columns[1]].values[subset]
            z = data[self.df_columns[2]].values[subset]

            snr, err, beta = metrics.tcol_snr(x, y, z)

            for i, name in enumerate(self.ds_names):
                dataset['{:}_{:}_snr'.format(name, season)][0] = snr[i]
                dataset['{:}_{:}_err_var'.format(name, season)][0] = err[i]
                dataset['{:}_{:}_beta'.format(name, season)][0] = beta[i]

            # calculate Pearson correlation
            pearson_R, pearson_p = df_metrics.pearsonr(data)
            pearson_R = pearson_R._asdict()
            pearson_p = pearson_p._asdict()

            # calculate Spearman correlation
            spea_rho, spea_p = df_metrics.spearmanr(data)
            spea_rho = spea_rho._asdict()
            spea_p = spea_p._asdict()

            # scale data to reference in order to calculate absolute metrics
            data_scaled = scale(data, method='min_max')

            # calculate bias
            bias_nT = df_metrics.bias(data_scaled)
            bias_dict = bias_nT._asdict()

            # calculate ubRMSD
            ubRMSD_nT = df_metrics.ubrmsd(data_scaled)
            ubRMSD_dict = ubRMSD_nT._asdict()

            for tds_name in self.tds_names:
                R = pearson_R[tds_name]
                p_R = pearson_p[tds_name]
                rho = spea_rho[tds_name]
                p_rho = spea_p[tds_name]
                bias = bias_dict[tds_name]
                ubRMSD = ubRMSD_dict[tds_name]

                split_tds_name = tds_name.split('_and_')
                tds_name_key = "{:}_{:}".format(
                    self.ds_names_lut[split_tds_name[0]],
                    self.ds_names_lut[split_tds_name[1]])

                dataset['{:}_{:}_R'.format(tds_name_key, season)][0] = R
                dataset['{:}_{:}_p_R'.format(tds_name_key, season)][0] = p_R
                dataset['{:}_{:}_rho'.format(tds_name_key, season)][0] = rho
                dataset['{:}_{:}_p_rho'.format(tds_name_key, season)][0] = \
                    p_rho
                dataset['{:}_{:}_bias'.format(tds_name_key, season)][0] = bias
                dataset['{:}_{:}_ubrmsd'.format(tds_name_key, season)][0] = \
                    ubRMSD

        return dataset
コード例 #2
0
    def calc_metrics(self, data, gpi_info):
        """
        calculates the desired statistics

        Parameters
        ----------
        data : pd.DataFrame
            with >2 columns, the first column is the reference dataset
            named 'ref' other columns are the datasets to compare against
            named 'other_i'
        gpi_info : tuple
            of (gpi, lon, lat)

        Notes
        -----
        Kendall tau is calculation is optional at the moment
        because the scipy implementation is very slow which is problematic for
        global comparisons
        """

        dataset = super(IntercomparisonMetrics,
                        self).calc_metrics(data, gpi_info)

        subset = np.ones(len(data), dtype=bool)

        n_obs = subset.sum()
        if n_obs < self.min_obs:
            return dataset

        dataset['n_obs'][0] = n_obs

        # calculate Pearson correlation
        pearson_R, pearson_p = df_metrics.pearsonr(data)
        pearson_R, pearson_p = pearson_R._asdict(), pearson_p._asdict()

        # calculate Spearman correlation
        spea_rho, spea_p = df_metrics.spearmanr(data)
        spea_rho, spea_p = spea_rho._asdict(), spea_p._asdict()

        # calculate bias
        bias_nT = df_metrics.bias(data)
        bias_dict = bias_nT._asdict()

        # calculate RMSD
        rmsd = df_metrics.rmsd(data)
        rmsd_dict = rmsd._asdict()

        # calculate MSE
        mse, mse_corr, mse_bias, mse_var = df_metrics.mse(data)
        mse_dict, mse_corr_dict, mse_bias_dict, mse_var_dict = \
            mse._asdict(), mse_corr._asdict(), mse_bias._asdict(), mse_var._asdict()

        # calculate RSS
        rss = df_metrics.RSS(data)
        rss_dict = rss._asdict()

        # calulcate tau
        if self.calc_tau:
            tau, p_tau = df_metrics.kendalltau(data)
            tau_dict, p_tau_dict = tau._asdict(), p_tau._asdict()
        else:
            tau = p_tau = p_tau_dict = tau_dict = None

        # No extra scaling is performed here.
        # always scale for ubRMSD with mean std
        # calculate ubRMSD
        data_scaled = scale(data, method='mean_std')
        ubRMSD_nT = df_metrics.ubrmsd(data_scaled)
        ubRMSD_dict = ubRMSD_nT._asdict()

        for tds_name in self.tds_names:
            R, p_R = pearson_R[tds_name], pearson_p[tds_name]
            rho, p_rho = spea_rho[tds_name], spea_p[tds_name]
            bias = bias_dict[tds_name]
            mse = mse_dict[tds_name]
            mse_corr = mse_corr_dict[tds_name]
            mse_bias = mse_bias_dict[tds_name]
            mse_var = mse_var_dict[tds_name]
            rmsd = rmsd_dict[tds_name]
            ubRMSD = ubRMSD_dict[tds_name]
            rss = rss_dict[tds_name]

            if tau_dict and p_tau_dict:
                tau = tau_dict[tds_name]
                p_tau = p_tau_dict[tds_name]

            split_tds_name = tds_name.split(self.ds_names_split)
            tds_name_key = self.ds_names_split.join([
                self.ds_names_lut[split_tds_name[0]],
                self.ds_names_lut[split_tds_name[1]]
            ])

            dataset[self.metric_ds_split.join(['R', tds_name_key])][0] = R
            dataset[self.metric_ds_split.join(['p_R', tds_name_key])][0] = p_R
            dataset[self.metric_ds_split.join(['rho', tds_name_key])][0] = rho
            dataset[self.metric_ds_split.join(['p_rho',
                                               tds_name_key])][0] = p_rho
            dataset[self.metric_ds_split.join(['BIAS',
                                               tds_name_key])][0] = bias
            dataset[self.metric_ds_split.join(['mse', tds_name_key])][0] = mse
            dataset[self.metric_ds_split.join(['mse_corr',
                                               tds_name_key])][0] = mse_corr
            dataset[self.metric_ds_split.join(['mse_bias',
                                               tds_name_key])][0] = mse_bias
            dataset[self.metric_ds_split.join(['mse_var',
                                               tds_name_key])][0] = mse_var
            dataset[self.metric_ds_split.join(['RMSD',
                                               tds_name_key])][0] = rmsd
            dataset[self.metric_ds_split.join(['urmsd',
                                               tds_name_key])][0] = ubRMSD
            dataset[self.metric_ds_split.join(['RSS', tds_name_key])][0] = rss

            if self.calc_tau:
                dataset[self.metric_ds_split.join(['tau',
                                                   tds_name_key])][0] = tau
                dataset[self.metric_ds_split.join(['p_tau',
                                                   tds_name_key])][0] = p_tau

        return dataset
コード例 #3
0
    def calc_metrics(self, data, gpi_info):
        """
        calculates the desired statistics

        Parameters
        ----------
        data : pandas.DataFrame
            with >2 columns, the first column is the reference dataset
            named 'ref'
            other columns are the data sets to compare against named 'other_i'
        gpi_info : tuple
            of (gpi, lon, lat)

        Notes
        -----
        Kendall tau is calculation is optional at the moment
        because the scipy implementation is very slow which is problematic for
        global comparisons
        """

        dataset = copy.deepcopy(self.result_template)

        dataset['gpi'][0] = gpi_info[0]
        dataset['lon'][0] = gpi_info[1]
        dataset['lat'][0] = gpi_info[2]

        # number of observations
        subset = np.ones(len(data), dtype=bool)

        n_obs = subset.sum()
        if n_obs < 10:
            return dataset

        dataset['n_obs'][0] = n_obs

        # calculate Pearson correlation
        pearson_R, pearson_p = df_metrics.pearsonr(data)
        pearson_R = pearson_R._asdict()
        pearson_p = pearson_p._asdict()

        # calculate Spearman correlation
        spea_rho, spea_p = df_metrics.spearmanr(data)
        spea_rho = spea_rho._asdict()
        spea_p = spea_p._asdict()

        # calculate bias
        bias_nT = df_metrics.bias(data)
        bias_dict = bias_nT._asdict()

        # calculate RMSD
        rmsd = df_metrics.rmsd(data)
        rmsd_dict = rmsd._asdict()

        # calculate MSE
        mse, mse_corr, mse_bias, mse_var = df_metrics.mse(data)
        mse_dict = mse._asdict()
        mse_corr_dict = mse_corr._asdict()
        mse_bias_dict = mse_bias._asdict()
        mse_var_dict = mse_var._asdict()

        # calulcate tau
        if self.calc_tau:
            tau, p_tau = df_metrics.kendalltau(data)
            tau_dict = tau._asdict()
            p_tau_dict = p_tau._asdict()
        else:
            tau = p_tau = p_tau_dict = tau_dict = None

        #data_scaled = scale(data, method='mean_std')
        # calculate ubRMSD
        ubRMSD_nT = df_metrics.ubrmsd(data)
        ubRMSD_dict = ubRMSD_nT._asdict()

        # get single dataset metrics
        # calculate SNR
        x = data[self.df_columns[0]].values[subset]
        y = data[self.df_columns[1]].values[subset]
        z = data[self.df_columns[2]].values[subset]

        snr, err, beta = metrics.tcol_snr(x, y, z)

        for i, name in enumerate(self.ds_names):
            dataset['{:}_snr'.format(name)][0] = snr[i]
            dataset['{:}_err_var'.format(name)][0] = err[i]
            dataset['{:}_beta'.format(name)][0] = beta[i]

        for tds_name in self.tds_names:
            R = pearson_R[tds_name]
            p_R = pearson_p[tds_name]
            rho = spea_rho[tds_name]
            p_rho = spea_p[tds_name]
            bias = bias_dict[tds_name]
            mse = mse_dict[tds_name]
            mse_corr = mse_corr_dict[tds_name]
            mse_bias = mse_bias_dict[tds_name]
            mse_var = mse_var_dict[tds_name]
            rmsd = rmsd_dict[tds_name]
            ubRMSD = ubRMSD_dict[tds_name]
            if tau_dict and p_tau_dict:
                tau = tau_dict[tds_name]
                p_tau = p_tau_dict[tds_name]

            split_tds_name = tds_name.split('_and_')
            tds_name_key = "{:}_{:}".format(
                self.ds_names_lut[split_tds_name[0]],
                self.ds_names_lut[split_tds_name[1]])

            dataset['R_between_{:}'.format(tds_name_key)][0] = R
            dataset['p_R_between_{:}'.format(tds_name_key)][0] = p_R
            dataset['rho_between_{:}'.format(tds_name_key)][0] = rho
            dataset['p_rho_between_{:}'.format(tds_name_key)][0] = p_rho
            dataset['bias_between_{:}'.format(tds_name_key)][0] = bias
            dataset['mse_between_{:}'.format(tds_name_key)][0] = mse
            dataset['mse_corr_between_{:}'.format(tds_name_key)][0] = mse_corr
            dataset['mse_bias_between_{:}'.format(tds_name_key)][0] = mse_bias
            dataset['mse_var_between_{:}'.format(tds_name_key)][0] = mse_var
            dataset['rmsd_between_{:}'.format(tds_name_key)][0] = rmsd
            dataset['ubRMSD_between_{:}'.format(tds_name_key)][0] = ubRMSD

            if self.calc_tau:
                dataset['tau_between_{:}'.format(tds_name_key)][0] = tau
                dataset['p_tau_between_{:}'.format(tds_name_key)][0] = p_tau

        return dataset
コード例 #4
0
    def calc_metrics(self, data, gpi_info):
        """
        Calculate Triple Collocation metrics

        Parameters
        ----------
        data : pd.DataFrame
            with >2 columns, the first column is the reference dataset named 'ref'
            other columns are the data sets to compare against named 'other_i'
        gpi_info : tuple
            of (gpi, lon, lat)

        Notes
        -----
        Kendall tau is calculation is optional at the moment
        because the scipy implementation is very slow which is problematic for
        global comparisons
        """

        dataset = copy.deepcopy(self.result_template)

        dataset['gpi'][0] = gpi_info[0]
        dataset['lon'][0] = gpi_info[1]
        dataset['lat'][0] = gpi_info[2]

        if self.metadata_template != None:
            for key, value in self.metadata_template.items():
                dataset[key][0] = gpi_info[3][key]

        # number of observations
        subset = np.ones(len(data), dtype=bool)

        n_obs = subset.sum()
        if n_obs < self.min_obs:
            return dataset

        dataset['n_obs'][0] = n_obs

        # calculate Pearson correlation
        pearson_R, pearson_p = df_metrics.pearsonr(data)
        pearson_R, pearson_p = pearson_R._asdict(), pearson_p._asdict()
        # calculate Spearman correlation
        spea_rho, spea_p = df_metrics.spearmanr(data)
        spea_rho, spea_p = spea_rho._asdict(), spea_p._asdict()
        # calculate bias
        bias_nT = df_metrics.bias(data)
        bias_dict = bias_nT._asdict()
        # calculate RMSD
        rmsd = df_metrics.rmsd(data)
        rmsd_dict = rmsd._asdict()
        # calculate MSE
        mse, mse_corr, mse_bias, mse_var = df_metrics.mse(data)
        mse_dict = mse._asdict()
        mse_corr_dict = mse_corr._asdict()
        mse_bias_dict = mse_bias._asdict()
        mse_var_dict = mse_var._asdict()
        # calculate RSS
        rss = df_metrics.RSS(data)
        rss_dict = rss._asdict()
        # calculate ubRMSD
        # todo: we could use the TC derived scaling parameters here?
        data_scaled = scale(data, method='mean_std')
        ubRMSD_nT = df_metrics.ubrmsd(data_scaled)
        ubRMSD_dict = ubRMSD_nT._asdict()
        # calulcate tau
        if self.calc_tau:
            tau, p_tau = df_metrics.kendalltau(data)
            tau_dict, p_tau_dict = tau._asdict(), p_tau._asdict()
        else:
            tau = p_tau = p_tau_dict = tau_dict = None
        # calculate TC metrics
        ref_ind = np.where(np.array(data.columns) == self.ref_name)[0][0]
        snrs, err_stds, betas = df_metrics.tcol_snr(data, ref_ind=ref_ind)
        snr_dict = self._tc_res_dict(snrs)
        err_std_dict = self._tc_res_dict(err_stds)
        beta_dict = self._tc_res_dict(betas)

        # store TC results
        for thds_name in self.thds_names:
            snr = snr_dict[thds_name]
            err_std = err_std_dict[thds_name]
            beta = beta_dict[thds_name]

            split_thds_name = thds_name.split(self.ds_names_split)
            thds_name_key = self.ds_names_split.join([
                self.ds_names_lut[split_thds_name[0]],
                self.ds_names_lut[split_thds_name[1]],
                self.ds_names_lut[split_thds_name[2]]
            ])

            for metr, res in dict(snr=snr, err_std=err_std, beta=beta).items():
                for ds, ds_res in res.items():
                    m_ds = "{}_{}".format(metr, self.ds_names_lut[ds])
                    n = '{}{}{}'.format(m_ds, self.metric_ds_split,
                                        thds_name_key)
                    if n in dataset.keys():
                        dataset[n][0] = ds_res

        # Store basic metrics results
        for tds_name in self.tds_names:
            R, p_R = pearson_R[tds_name], pearson_p[tds_name]
            rho, p_rho = spea_rho[tds_name], spea_p[tds_name]
            bias = bias_dict[tds_name]
            mse = mse_dict[tds_name]
            mse_corr = mse_corr_dict[tds_name]
            mse_bias = mse_bias_dict[tds_name]
            mse_var = mse_var_dict[tds_name]
            rmsd = rmsd_dict[tds_name]
            ubRMSD = ubRMSD_dict[tds_name]
            rss = rss_dict[tds_name]

            if tau_dict and p_tau_dict:
                tau = tau_dict[tds_name]
                p_tau = p_tau_dict[tds_name]

            split_tds_name = tds_name.split(self.ds_names_split)
            tds_name_key = self.ds_names_split.join([
                self.ds_names_lut[split_tds_name[0]],
                self.ds_names_lut[split_tds_name[1]]
            ])

            dataset[self.metric_ds_split.join(['R', tds_name_key])][0] = R
            dataset[self.metric_ds_split.join(['p_R', tds_name_key])][0] = p_R
            dataset[self.metric_ds_split.join(['rho', tds_name_key])][0] = rho
            dataset[self.metric_ds_split.join(['p_rho',
                                               tds_name_key])][0] = p_rho
            dataset[self.metric_ds_split.join(['BIAS',
                                               tds_name_key])][0] = bias
            dataset[self.metric_ds_split.join(['mse', tds_name_key])][0] = mse
            dataset[self.metric_ds_split.join(['mse_corr',
                                               tds_name_key])][0] = mse_corr
            dataset[self.metric_ds_split.join(['mse_bias',
                                               tds_name_key])][0] = mse_bias
            dataset[self.metric_ds_split.join(['mse_var',
                                               tds_name_key])][0] = mse_var
            dataset[self.metric_ds_split.join(['RMSD',
                                               tds_name_key])][0] = rmsd
            dataset[self.metric_ds_split.join(['urmsd',
                                               tds_name_key])][0] = ubRMSD
            dataset[self.metric_ds_split.join(['RSS', tds_name_key])][0] = rss

            if self.calc_tau:
                dataset[self.metric_ds_split.join(['tau',
                                                   tds_name_key])][0] = tau
                dataset[self.metric_ds_split.join(['p_tau',
                                                   tds_name_key])][0] = p_tau

        return dataset
コード例 #5
0
def optimise(params,
             timespan=('2009-01', '2009-12'),
             gpi=None,
             rescaling=None):
    """
    This function is optimising the parameters vegetation water content
    'm_veg', soil moisture 'm_soil' and, if specified, a third optional
    parameter. The third optional parameter can eitehr be sand 'sand',
    clay 'clay', fractional root mean square height 'f_rms',
    stem volume 's_vol' or temperature 'temp'.

    Parameters
    ----------
    params : list of dicts
        Model parameters. At least
        four of the following parameters needs to be specified if an optional
        parameter has been selected, otherwise all of them needs to be
        specified: 'sand', 'clay', 'f_rms', 'temp', 's_vol'
    gpi : int, optional
        Grid point index. If specified, it will read data from datapool.

    Returns
    -------
    df : pandas.DataFrame
        Optimised soil moisture, vegetation water concent and, if specified,
        optional optimised parameter.
    """

    if gpi is None:
        ts_resam = pd.read_csv(os.path.join("data", "2011528_2009.csv"),
                               index_col=0,
                               parse_dates=True)[timespan[0]:timespan[1]]
        gpi = 2011528
    else:
        ts_resam = read_resam(gpi)[timespan[0]:timespan[1]]

    m_veg_x0 = params.pop('m_veg_x0')
    m_soil_x0 = params.pop('m_soil_x0')
    columns = ['m_veg', 'm_soil']

    x0 = np.array([m_veg_x0, m_soil_x0])

    df = pd.DataFrame(index=ts_resam.index, columns=columns)
    df = df.fillna(np.nan)
    # optimise  m_soil and m_veg
    for index, row in ts_resam.iterrows():

        ascat_inc = np.array(row[['incf', 'incm', 'inca']].tolist())
        ascat_sig = \
            db2lin(np.array(row[['sigf', 'sigm', 'siga']].tolist()))

        args = (ascat_inc, ascat_sig, params, '')
        res = minimize(sig_sqr_diff, x0, args=args, method='Nelder-Mead')

        if res['success'] == True:
            df['m_veg'][index] = res['x'][0]
            df['m_soil'][index] = res['x'][1]

    str_static_p = \
                ', '.join("%s: %r" % t for t in locals().iteritems())

    str_static_p += ",\nm_veg_x0 = {:.2f}, m_soil_x0 = {:.2f}".format(
        m_veg_x0, m_soil_x0)

    ismn_file = os.path.join(
        'data',
        'ARM_ARM_Larned_sm_0.050000_0.050000_Water-Matric-Potential-Sensor-229L-W_20090101_20140527.stm'
    )
    ismn_data = ismn_readers.read_data(ismn_file)
    insitu = pd.DataFrame(ismn_data.data['soil moisture']).rename(
        columns={'soil moisture': 'insitu'})
    gldas = pd.read_csv(os.path.join('data', 'GLDAS_737602.csv'),
                        parse_dates=True,
                        index_col=0)
    gldas.rename(columns={'086_L1': 'gldas'}, inplace=True)
    gldas = pd.DataFrame(gldas['gldas'])
    ascat = pd.DataFrame(df['m_soil']).rename(columns={'m_soil': 'ascat'})

    matched = temp_match.matching(ascat, insitu, gldas)

    if rescaling is not None:
        scaled = scaling.scale(matched, rescaling, reference_index=1)
    else:
        scaled = matched

    metrics = OrderedDict()
    metrics['bias'] = df_metrics.bias(scaled)
    metrics['pearson'] = df_metrics.pearsonr(scaled)
    metrics['kendall'] = df_metrics.kendalltau(scaled)
    metrics['ubrmsd'] = df_metrics.ubrmsd(scaled)
    metrics['var_ratio'] = df_var_ratio(scaled)
    tcol_error = df_metrics.tcol_error(scaled)._asdict()

    ts_title = "Soil moisture. "
    if rescaling is not None:
        ts_title = ' '.join([ts_title, 'Rescaling: %s.' % rescaling])
    else:
        ts_title = ' '.join([ts_title, 'No rescaling.'])

    axes = scaled.plot(subplots=True, title=ts_title, figsize=(18, 8))

    # these are matplotlib.patch.Patch properties
    props = dict(facecolor='white', alpha=0)

    columns = ('ascat-insitu', 'ascat-gldas', 'insitu-gldas')
    row_labels = [
        'bias', 'pearson R', 'kendall tau', 'unbiased RMSD', 'variance ratio'
    ]
    cell_text = []
    for metric in metrics:
        metric_values = metrics[metric]
        if type(metric_values) == tuple:
            metric_values = metric_values[0]
        metric_values = metric_values._asdict()
        cell_text.append([
            "%.2f" % metric_values['ascat_and_insitu'],
            "%.2f" % metric_values['ascat_and_gldas'],
            "%.2f" % metric_values['insitu_and_gldas']
        ])

    table = plt.table(cellText=cell_text,
                      colLabels=columns,
                      colWidths=[0.1, 0.1, 0.1],
                      rowLabels=row_labels,
                      loc='bottom',
                      bbox=(0.2, -1.25, 0.5, 0.8))

    tcol_table = plt.table(cellText=[[
        "%.2f" % tcol_error['ascat'],
        "%.2f" % tcol_error['gldas'],
        "%.2f" % tcol_error['insitu']
    ]],
                           colLabels=('ascat', 'gldas', 'insitu'),
                           colWidths=[0.1, 0.1, 0.1],
                           rowLabels=['Triple collocation error'],
                           loc='bottom',
                           bbox=(0.2, -1.65, 0.5, 0.3))

    plt.subplots_adjust(left=0.08, bottom=0.35)

    axes = scatter_matrix(scaled)
    axes.flat[0].figure.suptitle(ts_title)

    # only draw 1:1 line if scaling was applied
    if rescaling is not None:
        for j, ax in enumerate(axes.flatten()):

            if np.remainder(j + 1, 3 + 1) != 1:
                min_x, max_x = ax.get_xlim()
                min_y, max_y = ax.get_ylim()
                # find minimum lower left coordinate and maximum upper right
                min_ll = min([min_x, min_y])
                max_ur = max([max_x, max_y])
                ax.plot([min_ll, max_ur], [min_ll, max_ur], '--', c='0.6')

    return df
コード例 #6
0
def validate(params,
             timespan=('2009-01', '2009-12'), gpi=None, rescaling=None,
             y_axis_range=None):
    """
    This function is optimising the parameters vegetation water content
    'm_veg', soil moisture 'm_soil' and, if specified, a third optional
    parameter. The third optional parameter can eitehr be sand 'sand',
    clay 'clay', fractional root mean square height 'f_rms',
    stem volume 's_vol' or temperature 'temp'.

    Parameters
    ----------
    params : list of dicts
        Model parameters. At least
        four of the following parameters needs to be specified if an optional
        parameter has been selected, otherwise all of them needs to be
        specified: 'sand', 'clay', 'f_rms', 'temp', 's_vol'
    timespan : tuple, optional
        timespan to analyze
    gpi : int, optional
        Grid point index. If specified, it will read data from datapool.
    rescaling : string, optional
        rescaling method, one of 'min_max', 'linreg', 'mean_std' and 'lin_cdf_match'
        Default: None
        insitu is the reference to which is scaled
    y_axis_range : tuple, optional
        specify (min, max) of y axis


    Returns
    -------
    df : pandas.DataFrame
        Optimised soil moisture, vegetation water concent and, if specified,
        optional optimised parameter.
    """

    unit_dict = {'freq': 'GHz',
                 'sand': '',
                 'clay': '',
                 'temp': '$^\circ$C',
                 'eps': '',
                 'theta': '$^\circ$',
                 'f_rms': '',
                 'sig_bare': 'dB',
                 'm_soil': '%',
                 'm_veg': '%',
                 'm_soil_x0': '%',
                 'm_veg_x0': '%',
                 's_vol': '$m^3ha^{-1}$',
                 'sig_canopy': 'dB',
                 'sig_for': 'dB',
                 'sig_floor': 'dB',
                 'polarization': ''}

    param_should = ['sand', 'clay', 'temp',
                    's_vol', 'f_rms',
                    'm_veg_x0', 'm_soil_x0']

    for param in param_should:
        assert param in params.keys()

    if gpi is None:
        ts_resam = pd.read_csv(os.path.join(os.path.split(os.path.abspath(__file__))[0],'data','2011528_2009.csv'), index_col=0,
                               parse_dates=True)[timespan[0]:timespan[1]]
        gpi = 2011528
    else:
        ts_resam = read_resam(gpi)[timespan[0]:timespan[1]]

    m_veg_x0 = params.pop('m_veg_x0')
    m_soil_x0 = params.pop('m_soil_x0')
    columns = ['m_veg', 'm_soil']

    x0 = np.array([m_veg_x0, m_soil_x0])

    df = pd.DataFrame(index=ts_resam.index, columns=columns)
    df = df.fillna(np.nan)
    # optimise  m_soil and m_veg
    for index, row in ts_resam.iterrows():

        ascat_inc = np.array(row[['incf', 'incm', 'inca']].tolist())
        ascat_sig = \
            db2lin(np.array(row[['sigf', 'sigm', 'siga']].tolist()))

        args = (ascat_inc, ascat_sig, params, '')
        res = minimize(sig_sqr_diff, x0, args=args, method='Nelder-Mead')

        if res['success'] == True:
            df['m_veg'][index] = res['x'][0]
            df['m_soil'][index] = res['x'][1]

    str_static_p = \
                ', '.join("%s: %r" % t for t in locals().iteritems())

    str_static_p += ",\nm_veg_x0 = {:.2f}, m_soil_x0 = {:.2f}".format(m_veg_x0, m_soil_x0)

	
    ismn_file = os.path.join(os.path.split(os.path.abspath(__file__))[0],'data','ARM_ARM_Larned_sm_0.050000_0.050000_Water-Matric-Potential-Sensor-229L-W_20090101_20140527.stm')
    ismn_data = ismn_readers.read_data(ismn_file)
    insitu = pd.DataFrame(ismn_data.data['soil moisture']).rename(columns={'soil moisture': 'insitu'})
    gldas = pd.read_csv(os.path.join(os.path.split(os.path.abspath(__file__))[0],'data', 'GLDAS_737602.csv'), parse_dates=True, index_col=0)
    gldas.rename(columns={'086_L1': 'gldas'}, inplace=True)
    gldas = pd.DataFrame(gldas['gldas']) / 100.0
    ascat = pd.DataFrame(df['m_soil']).rename(columns={'m_soil': 'ascat'})

    matched = temp_match.matching(ascat, insitu, gldas)

    if rescaling is not None:
        scaled = scaling.scale(matched, rescaling, reference_index=1)
    else:
        scaled = matched

    metrics = OrderedDict()
    metrics['bias'] = df_metrics.bias(scaled)
    metrics['pearson'] = df_metrics.pearsonr(scaled)
    metrics['spearman'] = df_metrics.spearmanr(scaled)
    metrics['ubrmsd'] = df_metrics.rmsd(scaled)
    metrics['std_ratio'] = df_std_ratio(scaled)
    tcol_error = df_metrics.tcol_error(scaled)._asdict()

    ts_title = "Soil moisture. "
    if rescaling is not None:
        ts_title = ' '.join([ts_title, 'Rescaling: %s.' % rescaling])
        rmsd_title = 'unbiased RMSD'
    else:
        ts_title = ' '.join([ts_title, 'No rescaling.'])
        rmsd_title = 'RMSD'


    axes = scaled.plot(title=ts_title, figsize=(18, 8))
    plt.legend()

    # these are matplotlib.patch.Patch properties
    props = dict(facecolor='white', alpha=0)

    columns = ('ascat-insitu', 'ascat-gldas', 'insitu-gldas')
    row_labels = ['bias', 'pearson R', 'spearman rho', rmsd_title, 'stddev ratio']
    cell_text = []
    for metric in metrics:
        metric_values = metrics[metric]
        if type(metric_values) == tuple:
            metric_values = metric_values[0]
        metric_values = metric_values._asdict()
        cell_text.append(["%.2f" % metric_values['ascat_and_insitu'],
                              "%.2f" % metric_values['ascat_and_gldas'],
                              "%.2f" % metric_values['insitu_and_gldas']])


    table = plt.table(
              cellText=cell_text,
              colLabels=columns,
              colWidths=[0.1, 0.1, 0.1],
              rowLabels=row_labels, loc='bottom',
              bbox=(0.2, -0.5, 0.5, 0.3))

    tcol_table = plt.table(
              cellText=[["%.2f" % tcol_error['ascat'],
                         "%.2f" % tcol_error['gldas'],
                         "%.2f" % tcol_error['insitu']]],
              colLabels=('ascat      ', 'gldas      ', 'insitu      '),
              colWidths=[0.1, 0.1, 0.1],
              rowLabels=['Triple collocation error'], loc='bottom',
              bbox=(0.2, -0.6, 0.5, 0.1))

    plt.subplots_adjust(left=0.08, bottom=0.35, right=0.85)
    plt.draw()
#
    if y_axis_range is not None:
        axes.set_ylim(y_axis_range)

    params['m_veg_x0'] = m_veg_x0
    params['m_soil_x0'] = m_soil_x0

    infotext = []
    for label in sorted(param_should):
        infotext.append('%s = %s %s' % (label, params[label], unit_dict[label]))

    infotext = '\n'.join(infotext)

    # place a text box in upper left in axes coords
    axes.text(1.03, 1, infotext, transform=axes.transAxes, fontsize=12,
            verticalalignment='top', bbox=props)

    axes = scatter_matrix(scaled)
    axes.flat[0].figure.suptitle(ts_title)

    # only draw 1:1 line if scaling was applied
    for j, ax in enumerate(axes.flatten()):
        if y_axis_range is not None:
            ax.set_xlim(y_axis_range)

        if np.remainder(j + 1, 3 + 1) != 1:
            if y_axis_range is not None:
                ax.set_ylim(y_axis_range)
            min_x, max_x = ax.get_xlim()
            min_y, max_y = ax.get_ylim()
            # find minimum lower left coordinate and maximum upper right
            min_ll = min([min_x, min_y])
            max_ur = max([max_x, max_y])
            ax.plot([min_ll, max_ur], [min_ll, max_ur], '--', c='0.6')
コード例 #7
0
def optimise(params,
             timespan=('2009-01', '2009-12'), gpi=None, rescaling=None):
    """
    This function is optimising the parameters vegetation water content
    'm_veg', soil moisture 'm_soil' and, if specified, a third optional
    parameter. The third optional parameter can eitehr be sand 'sand',
    clay 'clay', fractional root mean square height 'f_rms',
    stem volume 's_vol' or temperature 'temp'.

    Parameters
    ----------
    params : list of dicts
        Model parameters. At least
        four of the following parameters needs to be specified if an optional
        parameter has been selected, otherwise all of them needs to be
        specified: 'sand', 'clay', 'f_rms', 'temp', 's_vol'
    gpi : int, optional
        Grid point index. If specified, it will read data from datapool.

    Returns
    -------
    df : pandas.DataFrame
        Optimised soil moisture, vegetation water concent and, if specified,
        optional optimised parameter.
    """

    if gpi is None:
        ts_resam = pd.read_csv(os.path.join("data", "2011528_2009.csv"), index_col=0,
                               parse_dates=True)[timespan[0]:timespan[1]]
        gpi = 2011528
    else:
        ts_resam = read_resam(gpi)[timespan[0]:timespan[1]]

    m_veg_x0 = params.pop('m_veg_x0')
    m_soil_x0 = params.pop('m_soil_x0')
    columns = ['m_veg', 'm_soil']

    x0 = np.array([m_veg_x0, m_soil_x0])

    df = pd.DataFrame(index=ts_resam.index, columns=columns)
    df = df.fillna(np.nan)
    # optimise  m_soil and m_veg
    for index, row in ts_resam.iterrows():

        ascat_inc = np.array(row[['incf', 'incm', 'inca']].tolist())
        ascat_sig = \
            db2lin(np.array(row[['sigf', 'sigm', 'siga']].tolist()))

        args = (ascat_inc, ascat_sig, params, '')
        res = minimize(sig_sqr_diff, x0, args=args, method='Nelder-Mead')

        if res['success'] == True:
            df['m_veg'][index] = res['x'][0]
            df['m_soil'][index] = res['x'][1]

    str_static_p = \
                ', '.join("%s: %r" % t for t in locals().iteritems())

    str_static_p += ",\nm_veg_x0 = {:.2f}, m_soil_x0 = {:.2f}".format(m_veg_x0, m_soil_x0)

    ismn_file = os.path.join('data', 'ARM_ARM_Larned_sm_0.050000_0.050000_Water-Matric-Potential-Sensor-229L-W_20090101_20140527.stm')
    ismn_data = ismn_readers.read_data(ismn_file)
    insitu = pd.DataFrame(ismn_data.data['soil moisture']).rename(columns={'soil moisture': 'insitu'})
    gldas = pd.read_csv(os.path.join('data', 'GLDAS_737602.csv'), parse_dates=True, index_col=0)
    gldas.rename(columns={'086_L1': 'gldas'}, inplace=True)
    gldas = pd.DataFrame(gldas['gldas'])
    ascat = pd.DataFrame(df['m_soil']).rename(columns={'m_soil': 'ascat'})

    matched = temp_match.matching(ascat, insitu, gldas)

    if rescaling is not None:
        scaled = scaling.scale(matched, rescaling, reference_index=1)
    else:
        scaled = matched

    metrics = OrderedDict()
    metrics['bias'] = df_metrics.bias(scaled)
    metrics['pearson'] = df_metrics.pearsonr(scaled)
    metrics['kendall'] = df_metrics.kendalltau(scaled)
    metrics['ubrmsd'] = df_metrics.ubrmsd(scaled)
    metrics['var_ratio'] = df_var_ratio(scaled)
    tcol_error = df_metrics.tcol_error(scaled)._asdict()

    ts_title = "Soil moisture. "
    if rescaling is not None:
        ts_title = ' '.join([ts_title, 'Rescaling: %s.' % rescaling])
    else:
        ts_title = ' '.join([ts_title, 'No rescaling.'])

    axes = scaled.plot(subplots=True, title=ts_title, figsize=(18, 8))

    # these are matplotlib.patch.Patch properties
    props = dict(facecolor='white', alpha=0)

    columns = ('ascat-insitu', 'ascat-gldas', 'insitu-gldas')
    row_labels = ['bias', 'pearson R', 'kendall tau', 'unbiased RMSD', 'variance ratio']
    cell_text = []
    for metric in metrics:
        metric_values = metrics[metric]
        if type(metric_values) == tuple:
            metric_values = metric_values[0]
        metric_values = metric_values._asdict()
        cell_text.append(["%.2f" % metric_values['ascat_and_insitu'],
                              "%.2f" % metric_values['ascat_and_gldas'],
                              "%.2f" % metric_values['insitu_and_gldas']])

    table = plt.table(
              cellText=cell_text,
              colLabels=columns,
              colWidths=[0.1, 0.1, 0.1],
              rowLabels=row_labels, loc='bottom',
              bbox=(0.2, -1.25, 0.5, 0.8))

    tcol_table = plt.table(
              cellText=[["%.2f" % tcol_error['ascat'],
                         "%.2f" % tcol_error['gldas'],
                         "%.2f" % tcol_error['insitu']]],
              colLabels=('ascat', 'gldas', 'insitu'),
              colWidths=[0.1, 0.1, 0.1],
              rowLabels=['Triple collocation error'], loc='bottom',
              bbox=(0.2, -1.65, 0.5, 0.3))

    plt.subplots_adjust(left=0.08, bottom=0.35)

    axes = scatter_matrix(scaled)
    axes.flat[0].figure.suptitle(ts_title)

    # only draw 1:1 line if scaling was applied
    if rescaling is not None:
        for j, ax in enumerate(axes.flatten()):

            if np.remainder(j + 1, 3 + 1) != 1:
                min_x, max_x = ax.get_xlim()
                min_y, max_y = ax.get_ylim()
                # find minimum lower left coordinate and maximum upper right
                min_ll = min([min_x, min_y])
                max_ur = max([max_x, max_y])
                ax.plot([min_ll, max_ur], [min_ll, max_ur], '--', c='0.6')

    return df
コード例 #8
0
ファイル: metric_calculators.py プロジェクト: TUW-GEO/pytesmo
    def calc_metrics(self, data, gpi_info):
        """
        calculates the desired statistics

        Parameters
        ----------
        data : pandas.DataFrame
            with >2 columns, the first column is the reference dataset
            named 'ref'
            other columns are the data sets to compare against named 'other_i'
        gpi_info : tuple
            of (gpi, lon, lat)

        Notes
        -----
        Kendall tau is calculation is optional at the moment
        because the scipy implementation is very slow which is problematic for
        global comparisons
        """

        dataset = copy.deepcopy(self.result_template)

        dataset['gpi'][0] = gpi_info[0]
        dataset['lon'][0] = gpi_info[1]
        dataset['lat'][0] = gpi_info[2]

        # number of observations
        subset = np.ones(len(data), dtype=bool)

        n_obs = subset.sum()
        if n_obs < 10:
            return dataset

        dataset['n_obs'][0] = n_obs


        # calculate Pearson correlation
        pearson_R, pearson_p = df_metrics.pearsonr(data)
        pearson_R = pearson_R._asdict()
        pearson_p = pearson_p._asdict()

        # calculate Spearman correlation
        spea_rho, spea_p = df_metrics.spearmanr(data)
        spea_rho = spea_rho._asdict()
        spea_p = spea_p._asdict()

        # calculate bias
        bias_nT = df_metrics.bias(data)
        bias_dict = bias_nT._asdict()

        # calculate RMSD
        rmsd = df_metrics.rmsd(data)
        rmsd_dict = rmsd._asdict()

        # calculate MSE
        mse, mse_corr, mse_bias, mse_var = df_metrics.mse(data)
        mse_dict = mse._asdict()
        mse_corr_dict = mse_corr._asdict()
        mse_bias_dict = mse_bias._asdict()
        mse_var_dict = mse_var._asdict()

        # calulcate tau
        if self.calc_tau:
            tau, p_tau = df_metrics.kendalltau(data)
            tau_dict = tau._asdict()
            p_tau_dict = p_tau._asdict()
        else:
            tau = p_tau = p_tau_dict = tau_dict = None

        #data_scaled = scale(data, method='mean_std')
        # calculate ubRMSD
        ubRMSD_nT = df_metrics.ubrmsd(data)
        ubRMSD_dict = ubRMSD_nT._asdict()

        # get single dataset metrics
        # calculate SNR
        x = data[self.df_columns[0]].values[subset]
        y = data[self.df_columns[1]].values[subset]
        z = data[self.df_columns[2]].values[subset]

        snr, err, beta = metrics.tcol_snr(x, y, z)

        for i, name in enumerate(self.ds_names):
            dataset['{:}_snr'.format(name)][0] = snr[i]
            dataset['{:}_err_var'.format(name)][0] = err[i]
            dataset['{:}_beta'.format(name)][0] = beta[i]


        for tds_name in self.tds_names:
            R = pearson_R[tds_name]
            p_R = pearson_p[tds_name]
            rho = spea_rho[tds_name]
            p_rho = spea_p[tds_name]
            bias = bias_dict[tds_name]
            mse = mse_dict[tds_name]
            mse_corr = mse_corr_dict[tds_name]
            mse_bias = mse_bias_dict[tds_name]
            mse_var = mse_var_dict[tds_name]
            rmsd = rmsd_dict[tds_name]
            ubRMSD = ubRMSD_dict[tds_name]
            if tau_dict and p_tau_dict:
                tau = tau_dict[tds_name]
                p_tau = p_tau_dict[tds_name]


            split_tds_name = tds_name.split('_and_')
            tds_name_key = "{:}_{:}".format(self.ds_names_lut[
                split_tds_name[0]],
                self.ds_names_lut[
                split_tds_name[1]])

            dataset['R_between_{:}'.format(tds_name_key)][0] = R
            dataset['p_R_between_{:}'.format(tds_name_key)][0] = p_R
            dataset['rho_between_{:}'.format(tds_name_key)][0] = rho
            dataset['p_rho_between_{:}'.format(tds_name_key)][0] = p_rho
            dataset['bias_between_{:}'.format(tds_name_key)][0] = bias
            dataset['mse_between_{:}'.format(tds_name_key)][0] = mse
            dataset['mse_corr_between_{:}'.format(tds_name_key)][0] = mse_corr
            dataset['mse_bias_between_{:}'.format(tds_name_key)][0] = mse_bias
            dataset['mse_var_between_{:}'.format(tds_name_key)][0] = mse_var
            dataset['rmsd_between_{:}'.format(tds_name_key)][0] = rmsd
            dataset['ubRMSD_between_{:}'.format(tds_name_key)][0] = ubRMSD

            if self.calc_tau:
                dataset['tau_between_{:}'.format(tds_name_key)][0] = tau
                dataset['p_tau_between_{:}'.format(tds_name_key)][0] = p_tau

        return dataset
コード例 #9
0
ファイル: metric_calculators.py プロジェクト: TUW-GEO/pytesmo
    def calc_metrics(self, data, gpi_info):
        """
        calculates the desired statistics

        Parameters
        ----------
        data : pandas.DataFrame
            with 3 columns, the first column is the reference dataset
            named 'ref'
            the second and third column are the datasets to compare against
            named 'k1 and k2'
        gpi_info : tuple
            Grid point info (i.e. gpi, lon, lat)
        """
        dataset = copy.deepcopy(self.result_template)

        dataset['gpi'][0] = gpi_info[0]
        dataset['lon'][0] = gpi_info[1]
        dataset['lat'][0] = gpi_info[2]

        for season in self.seasons:

            if season != 'ALL':
                subset = self.month_to_season[data.index.month] == season
            else:
                subset = np.ones(len(data), dtype=bool)

            # number of observations
            n_obs = subset.sum()
            if n_obs < 10:
                continue
            dataset['{:}_n_obs'.format(season)][0] = n_obs

            # get single dataset metrics
            # calculate SNR
            x = data[self.df_columns[0]].values[subset]
            y = data[self.df_columns[1]].values[subset]
            z = data[self.df_columns[2]].values[subset]

            snr, err, beta = metrics.tcol_snr(x, y, z)

            for i, name in enumerate(self.ds_names):
                dataset['{:}_{:}_snr'.format(name, season)][0] = snr[i]
                dataset['{:}_{:}_err_var'.format(name, season)][0] = err[i]
                dataset['{:}_{:}_beta'.format(name, season)][0] = beta[i]

            # calculate Pearson correlation
            pearson_R, pearson_p = df_metrics.pearsonr(data)
            pearson_R = pearson_R._asdict()
            pearson_p = pearson_p._asdict()

            # calculate Spearman correlation
            spea_rho, spea_p = df_metrics.spearmanr(data)
            spea_rho = spea_rho._asdict()
            spea_p = spea_p._asdict()

            # scale data to reference in order to calculate absolute metrics
            data_scaled = scale(data, method='min_max')

            # calculate bias
            bias_nT = df_metrics.bias(data_scaled)
            bias_dict = bias_nT._asdict()

            # calculate ubRMSD
            ubRMSD_nT = df_metrics.ubrmsd(data_scaled)
            ubRMSD_dict = ubRMSD_nT._asdict()

            for tds_name in self.tds_names:
                R = pearson_R[tds_name]
                p_R = pearson_p[tds_name]
                rho = spea_rho[tds_name]
                p_rho = spea_p[tds_name]
                bias = bias_dict[tds_name]
                ubRMSD = ubRMSD_dict[tds_name]

                split_tds_name = tds_name.split('_and_')
                tds_name_key = "{:}_{:}".format(self.ds_names_lut[
                    split_tds_name[0]],
                    self.ds_names_lut[
                    split_tds_name[1]])

                dataset['{:}_{:}_R'.format(tds_name_key, season)][0] = R
                dataset['{:}_{:}_p_R'.format(tds_name_key, season)][0] = p_R
                dataset['{:}_{:}_rho'.format(tds_name_key, season)][0] = rho
                dataset['{:}_{:}_p_rho'.format(tds_name_key, season)][0] = \
                    p_rho
                dataset['{:}_{:}_bias'.format(tds_name_key, season)][0] = bias
                dataset['{:}_{:}_ubrmsd'.format(tds_name_key, season)][0] = \
                    ubRMSD

        return dataset