Beispiel #1
0
def test_pearson_recursive():

    x = np.random.rand(100)
    y = np.random.rand(100)

    r, p = met.pearsonr(x, y)
    r_rec, _ = met.pearsonr_recursive(x, y)
    nptest.assert_almost_equal(r, r_rec)

    args = []
    for xi, yi in zip(x, y):

        r_rec, args = met.pearsonr_recursive(np.array([xi]),
                                             np.array([yi]), *args)

    nptest.assert_almost_equal(r, r_rec)
def test_pearson_recursive():

    x = np.random.rand(100)
    y = np.random.rand(100)

    r, p = met.pearsonr(x, y)
    r_rec, _ = met.pearsonr_recursive(x, y)
    nptest.assert_almost_equal(r, r_rec)

    args = []
    for xi, yi in zip(x, y):

        r_rec, args = met.pearsonr_recursive(np.array([xi]), np.array([yi]),
                                             *args)

    nptest.assert_almost_equal(r, r_rec)
Beispiel #3
0
    def calc_metrics(self, data, gpi_info):
        """
        calculates the desired statistics

        Parameters
        ----------
        data : pandas.DataFrame
            with 2 columns, the first column is the reference dataset
            named 'ref'
            the second column the dataset to compare against named 'other'
        gpi_info : tuple
            of (gpi, lon, lat)

        Notes
        -----
        Kendall tau is calculation is optional at the moment
        because the scipy implementation is very slow which is problematic for
        global comparisons
        """
        dataset = copy.deepcopy(self.result_template)

        dataset["n_obs"][0] = len(data)
        dataset["gpi"][0] = gpi_info[0]
        dataset["lon"][0] = gpi_info[1]
        dataset["lat"][0] = gpi_info[2]

        if len(data) < 10:
            return dataset

        x, y = data["ref"].values, data[self.other_name].values
        R, p_R = metrics.pearsonr(x, y)
        rho, p_rho = metrics.spearmanr(x, y)
        RMSD = metrics.rmsd(x, y)
        BIAS = metrics.bias(x, y)

        dataset["R"][0], dataset["p_R"][0] = R, p_R
        dataset["rho"][0], dataset["p_rho"][0] = rho, p_rho
        dataset["RMSD"][0] = RMSD
        dataset["BIAS"][0] = BIAS

        if self.calc_tau:
            tau, p_tau = metrics.kendalltau(x, y)
            dataset["tau"][0], dataset["p_tau"][0] = tau, p_tau

        return dataset
Beispiel #4
0
    def calc_metrics(self, data, gpi_info):
        """
        calculates the desired statistics

        Parameters
        ----------
        data : pandas.DataFrame
            with 2 columns, the first column is the reference dataset
            named 'ref'
            the second column the dataset to compare against named 'other'
        gpi_info : tuple
            of (gpi, lon, lat)

        Notes
        -----
        Kendall tau is calculation is optional at the moment
        because the scipy implementation is very slow which is problematic for
        global comparisons
        """
        dataset = copy.deepcopy(self.result_template)

        dataset['n_obs'][0] = len(data)
        dataset['gpi'][0] = gpi_info[0]
        dataset['lon'][0] = gpi_info[1]
        dataset['lat'][0] = gpi_info[2]

        if len(data) < 10:
            return dataset

        x, y = data['ref'].values, data[self.other_name].values
        R, p_R = metrics.pearsonr(x, y)
        rho, p_rho = metrics.spearmanr(x, y)
        RMSD = metrics.rmsd(x, y)
        BIAS = metrics.bias(x, y)

        dataset['R'][0], dataset['p_R'][0] = R, p_R
        dataset['rho'][0], dataset['p_rho'][0] = rho, p_rho
        dataset['RMSD'][0] = RMSD
        dataset['BIAS'][0] = BIAS

        if self.calc_tau:
            tau, p_tau = metrics.kendalltau(x, y)
            dataset['tau'][0], dataset['p_tau'][0] = tau, p_tau

        return dataset
    def calc_metrics(self, data, gpi_info):
        """
        calculates the desired statistics

        Parameters
        ----------
        data : pandas.DataFrame
            with 2 columns, the first column is the reference dataset
            named 'ref'
            the second column the dataset to compare against named 'other'
        gpi_info : tuple
            of (gpi, lon, lat)

        Notes
        -----
        Kendall tau is not calculated at the moment
        because the scipy implementation is very slow which is problematic for
        global comparisons
        """
        dataset = copy.deepcopy(self.result_template)

        dataset['n_obs'][0] = len(data)
        dataset['gpi'][0] = gpi_info[0]
        dataset['lon'][0] = gpi_info[1]
        dataset['lat'][0] = gpi_info[2]

        if len(data) < 10:
            return dataset

        x, y = data['ref'].values, data['other'].values
        R, p_R = metrics.pearsonr(x, y)
        rho, p_rho = metrics.spearmanr(x, y)
        # tau, p_tau = metrics.kendalltau(x, y)
        RMSD = metrics.rmsd(x, y)
        BIAS = metrics.bias(x, y)

        dataset['R'][0], dataset['p_R'][0] = R, p_R
        dataset['rho'][0], dataset['p_rho'][0] = rho, p_rho
        # dataset['tau'][0], dataset['p_tau'][0] = tau, p_tau
        dataset['RMSD'][0] = RMSD
        dataset['BIAS'][0] = BIAS

        return dataset
Beispiel #6
0
    def calc_metrics(self, data, gpi_info):
        """
        calculates the desired statistics

        Parameters
        ----------
        data : pandas.DataFrame
            with 2 columns, the first column is the reference dataset
            named 'ref'
            the second column the dataset to compare against named 'other'
        gpi_info : tuple
            Grid point info (i.e. gpi, lon, lat)
        """
        dataset = copy.deepcopy(self.result_template)

        dataset['gpi'][0] = gpi_info[0]
        dataset['lon'][0] = gpi_info[1]
        dataset['lat'][0] = gpi_info[2]

        for season in self.seasons:

            if season != 'ALL':
                subset = self.month_to_season[data.index.month] == season
            else:
                subset = np.ones(len(data), dtype=bool)

            if subset.sum() < 10:
                continue

            x = data['ref'].values[subset]
            y = data[self.other_name].values[subset]
            R, p_R = metrics.pearsonr(x, y)
            rho, p_rho = metrics.spearmanr(x, y)

            dataset['{:}_n_obs'.format(season)][0] = subset.sum()
            dataset['{:}_R'.format(season)][0] = R
            dataset['{:}_p_R'.format(season)][0] = p_R
            dataset['{:}_rho'.format(season)][0] = rho
            dataset['{:}_p_rho'.format(season)][0] = p_rho

        return dataset
Beispiel #7
0
    def calc_metrics(self, data, gpi_info):
        """
        calculates the desired statistics

        Parameters
        ----------
        data : pandas.DataFrame
            with 2 columns, the first column is the reference dataset
            named 'ref'
            the second column the dataset to compare against named 'other'
        gpi_info : tuple
            Grid point info (i.e. gpi, lon, lat)
        """
        dataset = copy.deepcopy(self.result_template)

        dataset['gpi'][0] = gpi_info[0]
        dataset['lon'][0] = gpi_info[1]
        dataset['lat'][0] = gpi_info[2]

        for season in self.seasons:

            if season != 'ALL':
                subset = self.month_to_season[data.index.month] == season
            else:
                subset = np.ones(len(data), dtype=bool)

            if subset.sum() < 10:
                continue

            x = data['ref'].values[subset]
            y = data[self.other_name].values[subset]
            R, p_R = metrics.pearsonr(x, y)
            rho, p_rho = metrics.spearmanr(x, y)

            dataset['{:}_n_obs'.format(season)][0] = subset.sum()
            dataset['{:}_R'.format(season)][0] = R
            dataset['{:}_p_R'.format(season)][0] = p_R
            dataset['{:}_rho'.format(season)][0] = rho
            dataset['{:}_p_rho'.format(season)][0] = p_rho

        return dataset
Beispiel #8
0
        scaled_data.plot(secondary_y=[label_ascat])
        plt.show()
        
        plt.scatter(matched_data[scaled_ascat_label].values,matched_data[label_insitu].values)
        plt.xlabel(scaled_ascat_label)
        plt.ylabel(label_insitu)
        plt.show()
        
        #calculate correlation coefficients, RMSD, bias, Nash Sutcliffe
        x, y = matched_data[scaled_ascat_label].values, matched_data[label_insitu].values
        
        print "ISMN time series:",ISMN_time_series
        print "compared to"
        print ascat_time_series
        print "Results:"
        print "Pearson's (R,p_value)", metrics.pearsonr(x, y)
        print "Spearman's (rho,p_value)", metrics.spearmanr(x, y)
        print "Kendalls's (tau,p_value)", metrics.kendalltau(x, y)
        print "RMSD", metrics.rmsd(x, y)
        print "Bias", metrics.bias(x, y)
        print "Nash Sutcliffe", metrics.nash_sutcliffe(x, y)
        
        
    i += 1
    
    #only show the first 2 stations, otherwise this program would run a long time
    #and produce a lot of plots
    if i >= 2:
        break    
    
Beispiel #9
0
        scaled_data.plot(secondary_y=[label_ascat])
        plt.show()

        plt.scatter(matched_data[scaled_ascat_label].values,
                    matched_data[label_insitu].values)
        plt.xlabel(scaled_ascat_label)
        plt.ylabel(label_insitu)
        plt.show()

        #calculate correlation coefficients, RMSD, bias, Nash Sutcliffe
        x, y = matched_data[scaled_ascat_label].values, matched_data[
            label_insitu].values

        print "ISMN time series:", ISMN_time_series
        print "compared to"
        print ascat_time_series
        print "Results:"
        print "Pearson's (R,p_value)", metrics.pearsonr(x, y)
        print "Spearman's (rho,p_value)", metrics.spearmanr(x, y)
        print "Kendalls's (tau,p_value)", metrics.kendalltau(x, y)
        print "RMSD", metrics.rmsd(x, y)
        print "Bias", metrics.bias(x, y)
        print "Nash Sutcliffe", metrics.nash_sutcliffe(x, y)

    i += 1

    #only show the first 2 stations, otherwise this program would run a long time
    #and produce a lot of plots
    if i >= 2:
        break
Beispiel #10
0
    def _calc_validation_metrics(self):
        """
        Calculate vertical metrics between candidate and reference using pytesmo.

        Currently implemented:
            bias, mad, rmsd, nrmsd,
        Returns
        -------
        df_validation_metrics: pd.DataFrame
            Data Frame that contains the metrics between the candidate and reference
            for the 2 groups
        """
        df_validation_metrics = pd.DataFrame()

        for group_no, subset_data in enumerate([self.set0, self.set1, self.setfull]):
            if group_no in [0,1]:
                group = 'group%i' % group_no
            else:
                group = 'FRAME'
            if 'bias' in self.metrics:
                if any([subset_data[col].empty for col in [self.candidate_name, self.reference_name]]):
                    bias = np.nan
                else:
                    bias =metrics.bias(subset_data[self.reference_name].values,
                                       subset_data[self.candidate_name].values)
                df_validation_metrics.at['bias', '%s' % group] = bias

            if 'mad' in self.metrics:
                if any([subset_data[col].empty for col in [self.candidate_name, self.reference_name]]):
                    mad = np.nan
                else:
                    mad =metrics.mad(subset_data[self.reference_name].values,
                                     subset_data[self.candidate_name].values)
                df_validation_metrics.at['mad', '%s' % group] = mad

            if 'rmsd' in self.metrics:
                if any([subset_data[col].empty for col in [self.candidate_name, self.reference_name]]):
                    rmsd = np.nan
                else:
                    rmsd =metrics.rmsd(subset_data[self.reference_name].values,
                                       subset_data[self.candidate_name].values)
                df_validation_metrics.at['rmsd', '%s' % group] = rmsd

            if 'nrmsd' in self.metrics:
                if any([subset_data[col].empty for col in [self.candidate_name, self.reference_name]]):
                    nrmsd = np.nan
                else:
                    nrmsd =metrics.nrmsd(subset_data[self.reference_name].values,
                                         subset_data[self.candidate_name].values)
                df_validation_metrics.at['nrmsd', '%s' % group] = nrmsd

            if 'PearsonR' in self.metrics:
                if any([subset_data[col].empty for col in [self.candidate_name, self.reference_name]]):
                    pr, pp = np.nan, np.nan
                else:
                    with warnings.catch_warnings():  # supress scipy warnings
                        warnings.filterwarnings('ignore')
                        pr, pp =metrics.pearsonr(subset_data[self.reference_name].values,
                                                 subset_data[self.candidate_name].values)

                df_validation_metrics.at['PearsonR', '%s' % group] = pr
                df_validation_metrics.at['Pp', '%s' % group] = pp

            if 'SpearmanR' in self.metrics:
                if any([subset_data[col].empty for col in [self.candidate_name, self.reference_name]]):
                    sr, sp = np.nan, np.nan
                else:
                    with warnings.catch_warnings():  # supress scipy warnings
                        warnings.filterwarnings('ignore')
                        sr, sp = metrics.spearmanr(subset_data[self.reference_name].values,
                                                   subset_data[self.candidate_name].values)

                df_validation_metrics.at['SpearmanR', '%s' % group] = sr
                df_validation_metrics.at['Sp', '%s' % group] = sp

        return df_validation_metrics