def test_TC_metrics_calculator_metadata(): """ Test TC metrics with metadata. """ df = make_some_data() data = df[['ref', 'k1', 'k2']] metadata_dict_template = {'network': np.array(['None'], dtype='U256')} metriccalc = TCMetrics(other_names=('k1', 'k2'), calc_tau=True, dataset_names=['ref', 'k1', 'k2'], metadata_template=metadata_dict_template) res = metriccalc.calc_metrics( data, gpi_info=(0, 0, 0, {'network': 'SOILSCAPE'})) assert res['network'] == np.array(['SOILSCAPE'], dtype='U256')
def test_TC_metrics_calculator_metadata(): """ Test TC metrics with metadata. """ df = make_some_data() data = df[["ref", "k1", "k2"]] metadata_dict_template = {"network": np.array(["None"], dtype="U256")} metriccalc = TCMetrics( other_names=("k1", "k2"), calc_tau=True, dataset_names=["ref", "k1", "k2"], metadata_template=metadata_dict_template, ) res = metriccalc.calc_metrics(data, gpi_info=(0, 0, 0, { "network": "SOILSCAPE" })) assert res["network"] == np.array(["SOILSCAPE"], dtype="U256")
def test_TC_metrics_calculator(): """ Test TC metrics. """ # this calculator uses a reference data set that is part of ALL triples. df = make_some_data() data = df[['ref', 'k1', 'k2', 'k3']] with warnings.catch_warnings(): warnings.simplefilter("ignore") # many warnings due to test data metriccalc = TCMetrics(other_names=('k1', 'k2', 'k3'), calc_tau=True, dataset_names=('ref', 'k1', 'k2', 'k3')) res = metriccalc.calc_metrics(data, gpi_info=(0, 0, 0)) assert res['n_obs'] == np.array([366]) assert np.isnan(res['R_between_ref_and_k1']) assert np.isnan(res['R_between_ref_and_k2']) assert np.isnan(res['rho_between_ref_and_k1']) assert np.isnan(res['rho_between_ref_and_k2']) assert np.isnan(res['mse_between_ref_and_k1']) assert np.isnan(res['mse_between_ref_and_k2']) assert np.isnan(res['mse_corr_between_ref_and_k1']) assert np.isnan(res['mse_corr_between_ref_and_k2']) assert res['mse_bias_between_ref_and_k1'], np.array([0.04], dtype='float32') assert res['mse_bias_between_ref_and_k2'], np.array([0.04], dtype='float32') # scipy 1.3.0 is not built for python 2.7 so we allow both for now assert (np.isnan(res['p_R_between_ref_and_k1']) or res['p_R_between_ref_and_k1'] == 1.0) assert (np.isnan(res['p_R_between_ref_and_k2']) or res['p_R_between_ref_and_k2'] == 1.0) assert res['RMSD_between_ref_and_k1'] == np.array([0.2], dtype='float32') assert res['RMSD_between_ref_and_k2'] == np.array([0.2], dtype='float32') assert res['BIAS_between_ref_and_k1'] == np.array([-0.2], dtype='float32') assert res['BIAS_between_ref_and_k2'] == np.array([0.2], dtype='float32') np.testing.assert_almost_equal(res['urmsd_between_ref_and_k1'], np.array([0.], dtype='float32')) np.testing.assert_almost_equal(res['urmsd_between_ref_and_k2'], np.array([0.], dtype='float32')) assert 'RSS_between_ref_and_k1' in res.keys() assert 'RSS_between_ref_and_k2' in res.keys() # each non-ref dataset has a snr, err and beta assert np.isnan(res['snr_k1_between_ref_and_k1_and_k2']) assert np.isnan(res['snr_k2_between_ref_and_k1_and_k2']) assert np.isnan(res['snr_k2_between_ref_and_k2_and_k3']) assert np.isnan(res['err_std_k1_between_ref_and_k1_and_k2']) np.testing.assert_almost_equal(res['beta_k1_between_ref_and_k1_and_k2'][0], 0.) np.testing.assert_almost_equal(res['beta_k2_between_ref_and_k1_and_k2'][0], 0.) np.testing.assert_almost_equal(res['beta_k3_between_ref_and_k1_and_k3'][0], 0.)
def test_TC_metrics_calculator(): """ Test TC metrics. """ # this calculator uses a reference data set that is part of ALL triples. df = make_some_data() data = df[["ref", "k1", "k2", "k3"]] metriccalc = TCMetrics( other_names=("k1", "k2", "k3"), calc_tau=True, dataset_names=("ref", "k1", "k2", "k3"), ) res = metriccalc.calc_metrics(data, gpi_info=(0, 0, 0)) assert res["n_obs"] == np.array([366]) assert np.isnan(res["R_between_ref_and_k1"]) assert np.isnan(res["R_between_ref_and_k2"]) assert np.isnan(res["rho_between_ref_and_k1"]) assert np.isnan(res["rho_between_ref_and_k2"]) np.testing.assert_almost_equal(res["mse_between_ref_and_k1"], np.array([0.04], dtype=np.float32)) np.testing.assert_almost_equal(res["mse_between_ref_and_k2"], np.array([0.04], dtype=np.float32)) np.testing.assert_almost_equal(res["mse_corr_between_ref_and_k1"], np.array([0], dtype=np.float32)) np.testing.assert_almost_equal(res["mse_corr_between_ref_and_k2"], np.array([0], dtype=np.float32)) np.testing.assert_almost_equal(res["mse_bias_between_ref_and_k1"], np.array([0.04], dtype=np.float32)) np.testing.assert_almost_equal(res["mse_bias_between_ref_and_k2"], np.array([0.04], dtype=np.float32)) # scipy 1.3.0 is not built for python 2.7 so we allow both for now assert (np.isnan(res["p_R_between_ref_and_k1"]) or res["p_R_between_ref_and_k1"] == 1.0) assert (np.isnan(res["p_R_between_ref_and_k2"]) or res["p_R_between_ref_and_k2"] == 1.0) assert res["RMSD_between_ref_and_k1"] == np.array([0.2], dtype="float32") assert res["RMSD_between_ref_and_k2"] == np.array([0.2], dtype="float32") assert res["BIAS_between_ref_and_k1"] == np.array([-0.2], dtype="float32") assert res["BIAS_between_ref_and_k2"] == np.array([0.2], dtype="float32") np.testing.assert_almost_equal(res["urmsd_between_ref_and_k1"], np.array([0.0], dtype="float32")) np.testing.assert_almost_equal(res["urmsd_between_ref_and_k2"], np.array([0.0], dtype="float32")) assert "RSS_between_ref_and_k1" in res.keys() assert "RSS_between_ref_and_k2" in res.keys() # each non-ref dataset has a snr, err and beta assert np.isnan(res["snr_k1_between_ref_and_k1_and_k2"]) assert np.isnan(res["snr_k2_between_ref_and_k1_and_k2"]) assert np.isnan(res["snr_k2_between_ref_and_k2_and_k3"]) assert np.isnan(res["err_std_k1_between_ref_and_k1_and_k2"]) np.testing.assert_almost_equal(res["beta_k1_between_ref_and_k1_and_k2"][0], 0.0) np.testing.assert_almost_equal(res["beta_k2_between_ref_and_k1_and_k2"][0], 0.0) np.testing.assert_almost_equal(res["beta_k3_between_ref_and_k1_and_k3"][0], 0.0)
def create_pytesmo_validation(validation_run): ds_list = [] ref_name = None scaling_ref_name = None ds_num = 1 for dataset_config in validation_run.dataset_configurations.all(): reader = create_reader(dataset_config.dataset, dataset_config.version) reader = setup_filtering( reader, list(dataset_config.filters.all()), list(dataset_config.parametrisedfilter_set.all()), dataset_config.dataset, dataset_config.variable) if validation_run.anomalies == ValidationRun.MOVING_AVG_35_D: reader = AnomalyAdapter( reader, window_size=35, columns=[dataset_config.variable.pretty_name]) if validation_run.anomalies == ValidationRun.CLIMATOLOGY: # make sure our baseline period is in UTC and without timezone information anomalies_baseline = [ validation_run.anomalies_from.astimezone(tz=pytz.UTC).replace( tzinfo=None), validation_run.anomalies_to.astimezone(tz=pytz.UTC).replace( tzinfo=None) ] reader = AnomalyClimAdapter( reader, columns=[dataset_config.variable.pretty_name], timespan=anomalies_baseline) if ((validation_run.reference_configuration) and (dataset_config.id == validation_run.reference_configuration.id)): # reference is always named "0-..." dataset_name = '{}-{}'.format(0, dataset_config.dataset.short_name) else: dataset_name = '{}-{}'.format(ds_num, dataset_config.dataset.short_name) ds_num += 1 ds_list.append((dataset_name, { 'class': reader, 'columns': [dataset_config.variable.pretty_name] })) if ((validation_run.reference_configuration) and (dataset_config.id == validation_run.reference_configuration.id)): ref_name = dataset_name if ((validation_run.scaling_ref) and (dataset_config.id == validation_run.scaling_ref.id)): scaling_ref_name = dataset_name datasets = dict(ds_list) ds_num = len(ds_list) period = None if validation_run.interval_from is not None and validation_run.interval_to is not None: ## while pytesmo can't deal with timezones, normalise the validation period to utc; can be removed once pytesmo can do timezones startdate = validation_run.interval_from.astimezone(UTC).replace( tzinfo=None) enddate = validation_run.interval_to.astimezone(UTC).replace( tzinfo=None) period = [startdate, enddate] datamanager = DataManager(datasets, ref_name=ref_name, period=period, read_ts_names='read') ds_names = get_dataset_names(datamanager.reference_name, datamanager.datasets, n=ds_num) if (len(ds_names) >= 3) and (validation_run.tcol is True): # if there are 3 or more dataset, do TC, exclude ref metrics metrics = TCMetrics( dataset_names=ds_names, tc_metrics_for_ref=False, other_names=['k{}'.format(i + 1) for i in range(ds_num - 1)]) else: metrics = IntercomparisonMetrics( dataset_names=ds_names, other_names=['k{}'.format(i + 1) for i in range(ds_num - 1)]) if validation_run.scaling_method == validation_run.NO_SCALING: scaling_method = None else: scaling_method = validation_run.scaling_method __logger.debug(f"Scaling method: {scaling_method}") __logger.debug(f"Scaling dataset: {scaling_ref_name}") val = Validation(datasets=datamanager, spatial_ref=ref_name, temporal_window=0.5, scaling=scaling_method, scaling_ref=scaling_ref_name, metrics_calculators={ (ds_num, ds_num): metrics.calc_metrics }, period=period) return val