def test_pearson_recursive(): x = np.random.rand(100) y = np.random.rand(100) r, p = met.pearsonr(x, y) r_rec, _ = met.pearsonr_recursive(x, y) nptest.assert_almost_equal(r, r_rec) args = [] for xi, yi in zip(x, y): r_rec, args = met.pearsonr_recursive(np.array([xi]), np.array([yi]), *args) nptest.assert_almost_equal(r, r_rec)
def calc_metrics(self, data, gpi_info): """ calculates the desired statistics Parameters ---------- data : pandas.DataFrame with 2 columns, the first column is the reference dataset named 'ref' the second column the dataset to compare against named 'other' gpi_info : tuple of (gpi, lon, lat) Notes ----- Kendall tau is calculation is optional at the moment because the scipy implementation is very slow which is problematic for global comparisons """ dataset = copy.deepcopy(self.result_template) dataset["n_obs"][0] = len(data) dataset["gpi"][0] = gpi_info[0] dataset["lon"][0] = gpi_info[1] dataset["lat"][0] = gpi_info[2] if len(data) < 10: return dataset x, y = data["ref"].values, data[self.other_name].values R, p_R = metrics.pearsonr(x, y) rho, p_rho = metrics.spearmanr(x, y) RMSD = metrics.rmsd(x, y) BIAS = metrics.bias(x, y) dataset["R"][0], dataset["p_R"][0] = R, p_R dataset["rho"][0], dataset["p_rho"][0] = rho, p_rho dataset["RMSD"][0] = RMSD dataset["BIAS"][0] = BIAS if self.calc_tau: tau, p_tau = metrics.kendalltau(x, y) dataset["tau"][0], dataset["p_tau"][0] = tau, p_tau return dataset
def calc_metrics(self, data, gpi_info): """ calculates the desired statistics Parameters ---------- data : pandas.DataFrame with 2 columns, the first column is the reference dataset named 'ref' the second column the dataset to compare against named 'other' gpi_info : tuple of (gpi, lon, lat) Notes ----- Kendall tau is calculation is optional at the moment because the scipy implementation is very slow which is problematic for global comparisons """ dataset = copy.deepcopy(self.result_template) dataset['n_obs'][0] = len(data) dataset['gpi'][0] = gpi_info[0] dataset['lon'][0] = gpi_info[1] dataset['lat'][0] = gpi_info[2] if len(data) < 10: return dataset x, y = data['ref'].values, data[self.other_name].values R, p_R = metrics.pearsonr(x, y) rho, p_rho = metrics.spearmanr(x, y) RMSD = metrics.rmsd(x, y) BIAS = metrics.bias(x, y) dataset['R'][0], dataset['p_R'][0] = R, p_R dataset['rho'][0], dataset['p_rho'][0] = rho, p_rho dataset['RMSD'][0] = RMSD dataset['BIAS'][0] = BIAS if self.calc_tau: tau, p_tau = metrics.kendalltau(x, y) dataset['tau'][0], dataset['p_tau'][0] = tau, p_tau return dataset
def calc_metrics(self, data, gpi_info): """ calculates the desired statistics Parameters ---------- data : pandas.DataFrame with 2 columns, the first column is the reference dataset named 'ref' the second column the dataset to compare against named 'other' gpi_info : tuple of (gpi, lon, lat) Notes ----- Kendall tau is not calculated at the moment because the scipy implementation is very slow which is problematic for global comparisons """ dataset = copy.deepcopy(self.result_template) dataset['n_obs'][0] = len(data) dataset['gpi'][0] = gpi_info[0] dataset['lon'][0] = gpi_info[1] dataset['lat'][0] = gpi_info[2] if len(data) < 10: return dataset x, y = data['ref'].values, data['other'].values R, p_R = metrics.pearsonr(x, y) rho, p_rho = metrics.spearmanr(x, y) # tau, p_tau = metrics.kendalltau(x, y) RMSD = metrics.rmsd(x, y) BIAS = metrics.bias(x, y) dataset['R'][0], dataset['p_R'][0] = R, p_R dataset['rho'][0], dataset['p_rho'][0] = rho, p_rho # dataset['tau'][0], dataset['p_tau'][0] = tau, p_tau dataset['RMSD'][0] = RMSD dataset['BIAS'][0] = BIAS return dataset
def calc_metrics(self, data, gpi_info): """ calculates the desired statistics Parameters ---------- data : pandas.DataFrame with 2 columns, the first column is the reference dataset named 'ref' the second column the dataset to compare against named 'other' gpi_info : tuple Grid point info (i.e. gpi, lon, lat) """ dataset = copy.deepcopy(self.result_template) dataset['gpi'][0] = gpi_info[0] dataset['lon'][0] = gpi_info[1] dataset['lat'][0] = gpi_info[2] for season in self.seasons: if season != 'ALL': subset = self.month_to_season[data.index.month] == season else: subset = np.ones(len(data), dtype=bool) if subset.sum() < 10: continue x = data['ref'].values[subset] y = data[self.other_name].values[subset] R, p_R = metrics.pearsonr(x, y) rho, p_rho = metrics.spearmanr(x, y) dataset['{:}_n_obs'.format(season)][0] = subset.sum() dataset['{:}_R'.format(season)][0] = R dataset['{:}_p_R'.format(season)][0] = p_R dataset['{:}_rho'.format(season)][0] = rho dataset['{:}_p_rho'.format(season)][0] = p_rho return dataset
scaled_data.plot(secondary_y=[label_ascat]) plt.show() plt.scatter(matched_data[scaled_ascat_label].values,matched_data[label_insitu].values) plt.xlabel(scaled_ascat_label) plt.ylabel(label_insitu) plt.show() #calculate correlation coefficients, RMSD, bias, Nash Sutcliffe x, y = matched_data[scaled_ascat_label].values, matched_data[label_insitu].values print "ISMN time series:",ISMN_time_series print "compared to" print ascat_time_series print "Results:" print "Pearson's (R,p_value)", metrics.pearsonr(x, y) print "Spearman's (rho,p_value)", metrics.spearmanr(x, y) print "Kendalls's (tau,p_value)", metrics.kendalltau(x, y) print "RMSD", metrics.rmsd(x, y) print "Bias", metrics.bias(x, y) print "Nash Sutcliffe", metrics.nash_sutcliffe(x, y) i += 1 #only show the first 2 stations, otherwise this program would run a long time #and produce a lot of plots if i >= 2: break
scaled_data.plot(secondary_y=[label_ascat]) plt.show() plt.scatter(matched_data[scaled_ascat_label].values, matched_data[label_insitu].values) plt.xlabel(scaled_ascat_label) plt.ylabel(label_insitu) plt.show() #calculate correlation coefficients, RMSD, bias, Nash Sutcliffe x, y = matched_data[scaled_ascat_label].values, matched_data[ label_insitu].values print "ISMN time series:", ISMN_time_series print "compared to" print ascat_time_series print "Results:" print "Pearson's (R,p_value)", metrics.pearsonr(x, y) print "Spearman's (rho,p_value)", metrics.spearmanr(x, y) print "Kendalls's (tau,p_value)", metrics.kendalltau(x, y) print "RMSD", metrics.rmsd(x, y) print "Bias", metrics.bias(x, y) print "Nash Sutcliffe", metrics.nash_sutcliffe(x, y) i += 1 #only show the first 2 stations, otherwise this program would run a long time #and produce a lot of plots if i >= 2: break
def _calc_validation_metrics(self): """ Calculate vertical metrics between candidate and reference using pytesmo. Currently implemented: bias, mad, rmsd, nrmsd, Returns ------- df_validation_metrics: pd.DataFrame Data Frame that contains the metrics between the candidate and reference for the 2 groups """ df_validation_metrics = pd.DataFrame() for group_no, subset_data in enumerate([self.set0, self.set1, self.setfull]): if group_no in [0,1]: group = 'group%i' % group_no else: group = 'FRAME' if 'bias' in self.metrics: if any([subset_data[col].empty for col in [self.candidate_name, self.reference_name]]): bias = np.nan else: bias =metrics.bias(subset_data[self.reference_name].values, subset_data[self.candidate_name].values) df_validation_metrics.at['bias', '%s' % group] = bias if 'mad' in self.metrics: if any([subset_data[col].empty for col in [self.candidate_name, self.reference_name]]): mad = np.nan else: mad =metrics.mad(subset_data[self.reference_name].values, subset_data[self.candidate_name].values) df_validation_metrics.at['mad', '%s' % group] = mad if 'rmsd' in self.metrics: if any([subset_data[col].empty for col in [self.candidate_name, self.reference_name]]): rmsd = np.nan else: rmsd =metrics.rmsd(subset_data[self.reference_name].values, subset_data[self.candidate_name].values) df_validation_metrics.at['rmsd', '%s' % group] = rmsd if 'nrmsd' in self.metrics: if any([subset_data[col].empty for col in [self.candidate_name, self.reference_name]]): nrmsd = np.nan else: nrmsd =metrics.nrmsd(subset_data[self.reference_name].values, subset_data[self.candidate_name].values) df_validation_metrics.at['nrmsd', '%s' % group] = nrmsd if 'PearsonR' in self.metrics: if any([subset_data[col].empty for col in [self.candidate_name, self.reference_name]]): pr, pp = np.nan, np.nan else: with warnings.catch_warnings(): # supress scipy warnings warnings.filterwarnings('ignore') pr, pp =metrics.pearsonr(subset_data[self.reference_name].values, subset_data[self.candidate_name].values) df_validation_metrics.at['PearsonR', '%s' % group] = pr df_validation_metrics.at['Pp', '%s' % group] = pp if 'SpearmanR' in self.metrics: if any([subset_data[col].empty for col in [self.candidate_name, self.reference_name]]): sr, sp = np.nan, np.nan else: with warnings.catch_warnings(): # supress scipy warnings warnings.filterwarnings('ignore') sr, sp = metrics.spearmanr(subset_data[self.reference_name].values, subset_data[self.candidate_name].values) df_validation_metrics.at['SpearmanR', '%s' % group] = sr df_validation_metrics.at['Sp', '%s' % group] = sp return df_validation_metrics