def test_missing_data(): n_datasets = 5 npoints = 5 nsamples = 100 datasets = create_datasets(n_datasets, npoints, nsamples, missing=True) metric_calculator = PairwiseIntercomparisonMetrics() val = Validation( datasets, spatial_ref="0-ERA5", metrics_calculators={(n_datasets, 2): metric_calculator.calc_metrics}, temporal_matcher=make_combined_temporal_matcher(pd.Timedelta(12, "H")), ) gpis = list(range(npoints)) val.calc(gpis, gpis, gpis, rename_cols=False, only_with_temporal_ref=True)
def test_PairwiseIntercomparisonMetrics_confidence_intervals(): # tests if the correct confidence intervals are returned datasets, _ = testdata_random() matcher = make_combined_temporal_matcher(pd.Timedelta(6, "H")) val = Validation( datasets, "reference_name", scaling=None, # doesn't work with the constant test data temporal_matcher=matcher, metrics_calculators={ (4, 2): (PairwiseIntercomparisonMetrics( calc_spearman=True, calc_kendall=True, analytical_cis=True, bootstrap_cis=True, ).calc_metrics) }) results_pw = val.calc([1], [1], [1], rename_cols=False) metrics_with_ci = { "BIAS": "bias", "R": "pearson_r", "rho": "spearman_r", "tau": "kendall_tau", "RMSD": "rmsd", "urmsd": "ubrmsd", "mse": "msd", "mse_bias": "mse_bias", } metrics_with_bs_ci = { "mse_corr": "mse_corr", "mse_var": "mse_var", } # reconstruct dataframe frames = [] for key in datasets: frames.append(datasets[key]["class"].data) data = pd.concat(frames, axis=1) data.dropna(how="any", inplace=True) for key in results_pw: othername = key[0][0] other_col = othername.split("_")[0] other = data[other_col].values refname = key[1][0] ref_col = refname.split("_")[0] ref = data[ref_col].values for metric_key in metrics_with_ci: lower = results_pw[key][f"{metric_key}_ci_lower"] upper = results_pw[key][f"{metric_key}_ci_upper"] # calculate manually from data metric_func = getattr(pairwise, metrics_with_ci[metric_key]) m, lb, ub = with_analytical_ci(metric_func, other, ref) # difference due to float32 vs. float64 assert_almost_equal(upper, ub, 6) assert_almost_equal(lower, lb, 6) for metric_key in metrics_with_bs_ci: lower = results_pw[key][f"{metric_key}_ci_lower"] upper = results_pw[key][f"{metric_key}_ci_upper"] # calculate manually from data metric_func = getattr(pairwise, metrics_with_bs_ci[metric_key]) m, lb, ub = with_bootstrapped_ci(metric_func, other, ref) assert_allclose(upper, ub, rtol=1e-1, atol=1e-4) assert_allclose(lower, lb, rtol=1e-1, atol=1e-4)
def test_PairwiseIntercomparisonMetrics(testdata_generator): # This test first compares the PairwiseIntercomparisonMetrics to known # results and then confirms that it agrees with IntercomparisonMetrics as # expected datasets, expected = testdata_generator() # for the pairwise intercomparison metrics it's important that we use # make_combined_temporal_matcher val = Validation( datasets, "reference_name", scaling=None, # doesn't work with the constant test data temporal_matcher=make_combined_temporal_matcher(pd.Timedelta(6, "H")), metrics_calculators={ (4, 2): (PairwiseIntercomparisonMetrics(calc_spearman=True, analytical_cis=False).calc_metrics) }) results_pw = val.calc([1], [1], [1], rename_cols=False) # in results_pw, there are four entries with keys (("c1name", "c1"), # ("refname", "ref"), and so on. # Each value is a single dictionary with the values of the metrics expected_metrics = [ "R", "p_R", "BIAS", "RMSD", "mse", "RSS", "mse_corr", "mse_bias", "urmsd", "mse_var", "n_obs", "gpi", "lat", "lon", "rho", "p_rho", "tau", "p_tau" ] for key in results_pw: assert isinstance(key, tuple) assert len(key) == 2 assert all(map(lambda x: isinstance(x, tuple), key)) assert isinstance(results_pw[key], dict) assert sorted(expected_metrics) == sorted(results_pw[key].keys()) for m in expected_metrics: if m in expected[key]: assert_equal(results_pw[key][m], expected[key][m]) # preparation of IntercomparisonMetrics run for comparison ds_names = list(datasets.keys()) metrics = IntercomparisonMetrics( dataset_names=ds_names, # passing the names here explicitly, see GH issue #220 refname="reference_name", other_names=ds_names[1:], calc_tau=True, ) val = Validation( datasets, "reference_name", scaling=None, temporal_matcher=None, # use default here metrics_calculators={(4, 4): metrics.calc_metrics}) results = val.calc(1, 1, 1, rename_cols=False) # results is a dictionary with one entry and key # (('c1name', 'c1'), ('c2name', 'c2'), ('c3name', 'c3'), ('refname', # 'ref')), the value is a list of length 0, which contains a dictionary # with all the results, where the metrics are joined with "_between_" with # the combination of datasets, which is joined with "_and_", e.g. for R # between ``refname`` and ``c1name`` the key is # "R_between_refname_and_c1name" common_metrics = ["n_obs", "gpi", "lat", "lon"] pw_metrics = list(set(expected_metrics) - set(common_metrics)) # there's some sorting done at some point in pytesmo oldkey = tuple(sorted([(name, name.split("_")[0]) for name in ds_names])) res_old = results[oldkey] for key in results_pw: res = results_pw[key] # handle the full dataset metrics for m in common_metrics: assert_equal(res[m], res_old[m]) # now get the metrics and compare to the right combination for m in pw_metrics: othername = key[0][0] refname = key[1][0] if othername == "reference_name": # sorting might be different, see GH #220 othername = key[1][0] refname = key[0][0] old_m_key = f"{m}_between_{refname}_and_{othername}" if m == "BIAS": # PairwiseIntercomparisonMetrics has the result as (other, # ref), and therefore "bias between other and ref", compared to # "bias between ref and bias" in IntercomparisonMetrics # this is related to issue #220 assert_equal(np.abs(res[m]), np.abs(res_old[old_m_key])) elif m == "urmsd": # the old implementation differs from the new implementation pass else: assert_equal(res[m], res_old[old_m_key])
def create_pytesmo_validation(validation_run): ds_list = [] ref_name = None scaling_ref_name = None ds_num = 1 for dataset_config in validation_run.dataset_configurations.all(): reader = create_reader(dataset_config.dataset, dataset_config.version) reader = setup_filtering( reader, list(dataset_config.filters.all()), list(dataset_config.parametrisedfilter_set.all()), dataset_config.dataset, dataset_config.variable) if validation_run.anomalies == ValidationRun.MOVING_AVG_35_D: reader = AnomalyAdapter( reader, window_size=35, columns=[dataset_config.variable.pretty_name]) if validation_run.anomalies == ValidationRun.CLIMATOLOGY: # make sure our baseline period is in UTC and without timezone information anomalies_baseline = [ validation_run.anomalies_from.astimezone(tz=pytz.UTC).replace( tzinfo=None), validation_run.anomalies_to.astimezone(tz=pytz.UTC).replace( tzinfo=None) ] reader = AnomalyClimAdapter( reader, columns=[dataset_config.variable.pretty_name], timespan=anomalies_baseline) if (validation_run.reference_configuration and (dataset_config.id == validation_run.reference_configuration.id)): # reference is always named "0-..." dataset_name = '{}-{}'.format(0, dataset_config.dataset.short_name) else: dataset_name = '{}-{}'.format(ds_num, dataset_config.dataset.short_name) ds_num += 1 ds_list.append((dataset_name, { 'class': reader, 'columns': [dataset_config.variable.pretty_name] })) if (validation_run.reference_configuration and (dataset_config.id == validation_run.reference_configuration.id)): ref_name = dataset_name ref_short_name = validation_run.reference_configuration.dataset.short_name if (validation_run.scaling_ref and (dataset_config.id == validation_run.scaling_ref.id)): scaling_ref_name = dataset_name datasets = dict(ds_list) ds_num = len(ds_list) period = None if validation_run.interval_from is not None and validation_run.interval_to is not None: # while pytesmo can't deal with timezones, normalise the validation period to utc; can be removed once pytesmo can do timezones startdate = validation_run.interval_from.astimezone(UTC).replace( tzinfo=None) enddate = validation_run.interval_to.astimezone(UTC).replace( tzinfo=None) period = [startdate, enddate] upscale_parms = None if validation_run.upscaling_method != "none": __logger.debug("Upscaling option is active") upscale_parms = { "upscaling_method": validation_run.upscaling_method, "temporal_stability": validation_run.temporal_stability, } upscaling_lut = create_upscaling_lut( validation_run=validation_run, datasets=datasets, ref_name=ref_name, ) upscale_parms["upscaling_lut"] = upscaling_lut __logger.debug("Lookup table for non-reference datasets " + ", ".join(upscaling_lut.keys()) + " created") __logger.debug("{}".format(upscaling_lut)) datamanager = DataManager( datasets, ref_name=ref_name, period=period, read_ts_names='read', upscale_parms=upscale_parms, ) ds_names = get_dataset_names(datamanager.reference_name, datamanager.datasets, n=ds_num) # set value of the metadata template according to what reference dataset is used if ref_short_name == 'ISMN': metadata_template = METADATA_TEMPLATE['ismn_ref'] else: metadata_template = METADATA_TEMPLATE['other_ref'] pairwise_metrics = PairwiseIntercomparisonMetrics( metadata_template=metadata_template, calc_kendall=False, ) metric_calculators = {(ds_num, 2): pairwise_metrics.calc_metrics} if (len(ds_names) >= 3) and (validation_run.tcol is True): tcol_metrics = TripleCollocationMetrics( ref_name, metadata_template=metadata_template, ) metric_calculators.update({(ds_num, 3): tcol_metrics.calc_metrics}) if validation_run.scaling_method == validation_run.NO_SCALING: scaling_method = None else: scaling_method = validation_run.scaling_method __logger.debug(f"Scaling method: {scaling_method}") __logger.debug(f"Scaling dataset: {scaling_ref_name}") val = Validation(datasets=datamanager, temporal_matcher=make_combined_temporal_matcher( pd.Timedelta(12, "H")), spatial_ref=ref_name, scaling=scaling_method, scaling_ref=scaling_ref_name, metrics_calculators=metric_calculators, period=period) return val
def test_temporal_matching_ascat_ismn(): """ This test uses a CSV file of ASCAT and ISMN data to test if the temporal matching within the validation works as epxected in a "real" setup. This only tests whether the number of observations matches, because this is the main thing the temporal matching influences. """ # test with ASCAT and ISMN data here = Path(__file__).resolve().parent ascat = pd.read_csv(here / "ASCAT.csv", index_col=0, parse_dates=True) ismn = pd.read_csv(here / "ISMN.csv", index_col=0, parse_dates=True) dfs = {"ASCAT": ascat, "ISMN": ismn} columns = {"ASCAT": "sm", "ISMN": "soil_moisture"} refname = "ISMN" window = pd.Timedelta(12, "H") old_matcher = BasicTemporalMatching().combinatory_matcher new_matcher = make_combined_temporal_matcher(window) datasets = {} for key in ["ISMN", "ASCAT"]: all_columns = list(dfs[key].columns) ds = {"columns": [columns[key]], "class": DummyReader(dfs[key], all_columns)} datasets[key] = ds new_val = Validation( datasets, refname, scaling=None, # doesn't work with the constant test data temporal_matcher=new_matcher, metrics_calculators={ (2, 2): PairwiseIntercomparisonMetrics().calc_metrics } ) new_results = new_val.calc( 1, 1, 1, rename_cols=False, only_with_temporal_ref=True ) # old setup ds_names = list(datasets.keys()) metrics = IntercomparisonMetrics( dataset_names=ds_names, # passing the names here explicitly, see GH issue #220 refname=refname, other_names=ds_names[1:], calc_tau=True, ) old_val = Validation( datasets, refname, scaling=None, # doesn't work with the constant test data temporal_matcher=old_matcher, metrics_calculators={ (2, 2): metrics.calc_metrics } ) old_results = old_val.calc( 1, 1, 1, rename_cols=False ) old_key = (('ASCAT', 'sm'), ('ISMN', 'soil_moisture')) new_key = (('ASCAT', 'sm'), ('ISMN', 'soil_moisture')) assert old_results[old_key]["n_obs"] == new_results[new_key]["n_obs"]