Python PairwiseIntercomparisonMetrics 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: pytesmo.validation_framework.metric_calculators

hotexamples.com에서의 예제들: 5

Python PairwiseIntercomparisonMetrics - 5개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 pytesmo.validation_framework.metric_calculators.PairwiseIntercomparisonMetrics에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

PairwiseIntercomparisonMetrics(5)

자주 사용되는 메소드들

PairwiseIntercomparisonMetrics (5)

예제 #1

파일 보기

파일: test_validation.py 프로젝트: s-scherrer/pytesmo

def test_missing_data():
    n_datasets = 5
    npoints = 5
    nsamples = 100

    datasets = create_datasets(n_datasets, npoints, nsamples, missing=True)

    metric_calculator = PairwiseIntercomparisonMetrics()

    val = Validation(
        datasets,
        spatial_ref="0-ERA5",
        metrics_calculators={(n_datasets, 2): metric_calculator.calc_metrics},
        temporal_matcher=make_combined_temporal_matcher(pd.Timedelta(12, "H")),
    )
    gpis = list(range(npoints))
    val.calc(gpis, gpis, gpis, rename_cols=False, only_with_temporal_ref=True)

예제 #2

파일 보기

def test_PairwiseIntercomparisonMetrics_confidence_intervals():
    # tests if the correct confidence intervals are returned

    datasets, _ = testdata_random()
    matcher = make_combined_temporal_matcher(pd.Timedelta(6, "H"))
    val = Validation(
        datasets,
        "reference_name",
        scaling=None,  # doesn't work with the constant test data
        temporal_matcher=matcher,
        metrics_calculators={
            (4, 2): (PairwiseIntercomparisonMetrics(
                calc_spearman=True,
                calc_kendall=True,
                analytical_cis=True,
                bootstrap_cis=True,
            ).calc_metrics)
        })
    results_pw = val.calc([1], [1], [1], rename_cols=False)

    metrics_with_ci = {
        "BIAS": "bias",
        "R": "pearson_r",
        "rho": "spearman_r",
        "tau": "kendall_tau",
        "RMSD": "rmsd",
        "urmsd": "ubrmsd",
        "mse": "msd",
        "mse_bias": "mse_bias",
    }
    metrics_with_bs_ci = {
        "mse_corr": "mse_corr",
        "mse_var": "mse_var",
    }

    # reconstruct dataframe
    frames = []
    for key in datasets:
        frames.append(datasets[key]["class"].data)
    data = pd.concat(frames, axis=1)
    data.dropna(how="any", inplace=True)

    for key in results_pw:
        othername = key[0][0]
        other_col = othername.split("_")[0]
        other = data[other_col].values
        refname = key[1][0]
        ref_col = refname.split("_")[0]
        ref = data[ref_col].values
        for metric_key in metrics_with_ci:
            lower = results_pw[key][f"{metric_key}_ci_lower"]
            upper = results_pw[key][f"{metric_key}_ci_upper"]

            # calculate manually from data
            metric_func = getattr(pairwise, metrics_with_ci[metric_key])
            m, lb, ub = with_analytical_ci(metric_func, other, ref)
            # difference due to float32 vs. float64
            assert_almost_equal(upper, ub, 6)
            assert_almost_equal(lower, lb, 6)

        for metric_key in metrics_with_bs_ci:
            lower = results_pw[key][f"{metric_key}_ci_lower"]
            upper = results_pw[key][f"{metric_key}_ci_upper"]

            # calculate manually from data
            metric_func = getattr(pairwise, metrics_with_bs_ci[metric_key])
            m, lb, ub = with_bootstrapped_ci(metric_func, other, ref)
            assert_allclose(upper, ub, rtol=1e-1, atol=1e-4)
            assert_allclose(lower, lb, rtol=1e-1, atol=1e-4)

예제 #3

파일 보기

def test_PairwiseIntercomparisonMetrics(testdata_generator):
    # This test first compares the PairwiseIntercomparisonMetrics to known
    # results and then confirms that it agrees with IntercomparisonMetrics as
    # expected

    datasets, expected = testdata_generator()

    # for the pairwise intercomparison metrics it's important that we use
    # make_combined_temporal_matcher
    val = Validation(
        datasets,
        "reference_name",
        scaling=None,  # doesn't work with the constant test data
        temporal_matcher=make_combined_temporal_matcher(pd.Timedelta(6, "H")),
        metrics_calculators={
            (4, 2):
            (PairwiseIntercomparisonMetrics(calc_spearman=True,
                                            analytical_cis=False).calc_metrics)
        })
    results_pw = val.calc([1], [1], [1], rename_cols=False)

    # in results_pw, there are four entries with keys (("c1name", "c1"),
    # ("refname", "ref"), and so on.
    # Each value is a single dictionary with the values of the metrics

    expected_metrics = [
        "R", "p_R", "BIAS", "RMSD", "mse", "RSS", "mse_corr", "mse_bias",
        "urmsd", "mse_var", "n_obs", "gpi", "lat", "lon", "rho", "p_rho",
        "tau", "p_tau"
    ]
    for key in results_pw:
        assert isinstance(key, tuple)
        assert len(key) == 2
        assert all(map(lambda x: isinstance(x, tuple), key))
        assert isinstance(results_pw[key], dict)
        assert sorted(expected_metrics) == sorted(results_pw[key].keys())
        for m in expected_metrics:
            if m in expected[key]:
                assert_equal(results_pw[key][m], expected[key][m])

    # preparation of IntercomparisonMetrics run for comparison
    ds_names = list(datasets.keys())
    metrics = IntercomparisonMetrics(
        dataset_names=ds_names,
        # passing the names here explicitly, see GH issue #220
        refname="reference_name",
        other_names=ds_names[1:],
        calc_tau=True,
    )
    val = Validation(
        datasets,
        "reference_name",
        scaling=None,
        temporal_matcher=None,  # use default here
        metrics_calculators={(4, 4): metrics.calc_metrics})

    results = val.calc(1, 1, 1, rename_cols=False)

    # results is a dictionary with one entry and key
    # (('c1name', 'c1'), ('c2name', 'c2'), ('c3name', 'c3'), ('refname',
    # 'ref')), the value is a list of length 0, which contains a dictionary
    # with all the results, where the metrics are joined with "_between_" with
    # the combination of datasets, which is joined with "_and_", e.g. for R
    # between ``refname`` and ``c1name`` the key is
    # "R_between_refname_and_c1name"
    common_metrics = ["n_obs", "gpi", "lat", "lon"]
    pw_metrics = list(set(expected_metrics) - set(common_metrics))
    # there's some sorting done at some point in pytesmo
    oldkey = tuple(sorted([(name, name.split("_")[0]) for name in ds_names]))
    res_old = results[oldkey]
    for key in results_pw:
        res = results_pw[key]
        # handle the full dataset metrics
        for m in common_metrics:
            assert_equal(res[m], res_old[m])
        # now get the metrics and compare to the right combination
        for m in pw_metrics:
            othername = key[0][0]
            refname = key[1][0]
            if othername == "reference_name":
                # sorting might be different, see GH #220
                othername = key[1][0]
                refname = key[0][0]
            old_m_key = f"{m}_between_{refname}_and_{othername}"
            if m == "BIAS":
                # PairwiseIntercomparisonMetrics has the result as (other,
                # ref), and therefore "bias between other and ref", compared to
                # "bias between ref and bias" in IntercomparisonMetrics
                # this is related to issue #220
                assert_equal(np.abs(res[m]), np.abs(res_old[old_m_key]))
            elif m == "urmsd":
                # the old implementation differs from the new implementation
                pass
            else:
                assert_equal(res[m], res_old[old_m_key])

예제 #4

파일 보기

def create_pytesmo_validation(validation_run):
    ds_list = []
    ref_name = None
    scaling_ref_name = None

    ds_num = 1
    for dataset_config in validation_run.dataset_configurations.all():
        reader = create_reader(dataset_config.dataset, dataset_config.version)
        reader = setup_filtering(
            reader, list(dataset_config.filters.all()),
            list(dataset_config.parametrisedfilter_set.all()),
            dataset_config.dataset, dataset_config.variable)

        if validation_run.anomalies == ValidationRun.MOVING_AVG_35_D:
            reader = AnomalyAdapter(
                reader,
                window_size=35,
                columns=[dataset_config.variable.pretty_name])
        if validation_run.anomalies == ValidationRun.CLIMATOLOGY:
            # make sure our baseline period is in UTC and without timezone information
            anomalies_baseline = [
                validation_run.anomalies_from.astimezone(tz=pytz.UTC).replace(
                    tzinfo=None),
                validation_run.anomalies_to.astimezone(tz=pytz.UTC).replace(
                    tzinfo=None)
            ]
            reader = AnomalyClimAdapter(
                reader,
                columns=[dataset_config.variable.pretty_name],
                timespan=anomalies_baseline)

        if (validation_run.reference_configuration and
            (dataset_config.id == validation_run.reference_configuration.id)):
            # reference is always named "0-..."
            dataset_name = '{}-{}'.format(0, dataset_config.dataset.short_name)
        else:
            dataset_name = '{}-{}'.format(ds_num,
                                          dataset_config.dataset.short_name)
            ds_num += 1

        ds_list.append((dataset_name, {
            'class': reader,
            'columns': [dataset_config.variable.pretty_name]
        }))

        if (validation_run.reference_configuration and
            (dataset_config.id == validation_run.reference_configuration.id)):
            ref_name = dataset_name
            ref_short_name = validation_run.reference_configuration.dataset.short_name

        if (validation_run.scaling_ref
                and (dataset_config.id == validation_run.scaling_ref.id)):
            scaling_ref_name = dataset_name

    datasets = dict(ds_list)
    ds_num = len(ds_list)

    period = None
    if validation_run.interval_from is not None and validation_run.interval_to is not None:
        # while pytesmo can't deal with timezones, normalise the validation period to utc; can be removed once pytesmo can do timezones
        startdate = validation_run.interval_from.astimezone(UTC).replace(
            tzinfo=None)
        enddate = validation_run.interval_to.astimezone(UTC).replace(
            tzinfo=None)
        period = [startdate, enddate]

    upscale_parms = None
    if validation_run.upscaling_method != "none":
        __logger.debug("Upscaling option is active")
        upscale_parms = {
            "upscaling_method": validation_run.upscaling_method,
            "temporal_stability": validation_run.temporal_stability,
        }
        upscaling_lut = create_upscaling_lut(
            validation_run=validation_run,
            datasets=datasets,
            ref_name=ref_name,
        )
        upscale_parms["upscaling_lut"] = upscaling_lut
        __logger.debug("Lookup table for non-reference datasets " +
                       ", ".join(upscaling_lut.keys()) + " created")
        __logger.debug("{}".format(upscaling_lut))

    datamanager = DataManager(
        datasets,
        ref_name=ref_name,
        period=period,
        read_ts_names='read',
        upscale_parms=upscale_parms,
    )
    ds_names = get_dataset_names(datamanager.reference_name,
                                 datamanager.datasets,
                                 n=ds_num)

    # set value of the metadata template according to what reference dataset is used
    if ref_short_name == 'ISMN':
        metadata_template = METADATA_TEMPLATE['ismn_ref']
    else:
        metadata_template = METADATA_TEMPLATE['other_ref']

    pairwise_metrics = PairwiseIntercomparisonMetrics(
        metadata_template=metadata_template,
        calc_kendall=False,
    )

    metric_calculators = {(ds_num, 2): pairwise_metrics.calc_metrics}

    if (len(ds_names) >= 3) and (validation_run.tcol is True):
        tcol_metrics = TripleCollocationMetrics(
            ref_name,
            metadata_template=metadata_template,
        )
        metric_calculators.update({(ds_num, 3): tcol_metrics.calc_metrics})

    if validation_run.scaling_method == validation_run.NO_SCALING:
        scaling_method = None
    else:
        scaling_method = validation_run.scaling_method

    __logger.debug(f"Scaling method: {scaling_method}")
    __logger.debug(f"Scaling dataset: {scaling_ref_name}")

    val = Validation(datasets=datamanager,
                     temporal_matcher=make_combined_temporal_matcher(
                         pd.Timedelta(12, "H")),
                     spatial_ref=ref_name,
                     scaling=scaling_method,
                     scaling_ref=scaling_ref_name,
                     metrics_calculators=metric_calculators,
                     period=period)

    return val

예제 #5

파일 보기

def test_temporal_matching_ascat_ismn():
    """
    This test uses a CSV file of ASCAT and ISMN data to test if the temporal
    matching within the validation works as epxected in a "real" setup.
    This only tests whether the number of observations matches, because this is
    the main thing the temporal matching influences.
    """

    # test with ASCAT and ISMN data
    here = Path(__file__).resolve().parent
    ascat = pd.read_csv(here / "ASCAT.csv", index_col=0, parse_dates=True)
    ismn = pd.read_csv(here / "ISMN.csv", index_col=0, parse_dates=True)
    dfs = {"ASCAT": ascat, "ISMN": ismn}
    columns = {"ASCAT": "sm", "ISMN": "soil_moisture"}
    refname = "ISMN"
    window = pd.Timedelta(12, "H")

    old_matcher = BasicTemporalMatching().combinatory_matcher
    new_matcher = make_combined_temporal_matcher(window)

    datasets = {}
    for key in ["ISMN", "ASCAT"]:
        all_columns = list(dfs[key].columns)
        ds = {"columns": [columns[key]], "class": DummyReader(dfs[key], all_columns)}
        datasets[key] = ds

    new_val = Validation(
        datasets,
        refname,
        scaling=None,  # doesn't work with the constant test data
        temporal_matcher=new_matcher,
        metrics_calculators={
            (2, 2): PairwiseIntercomparisonMetrics().calc_metrics
        }
    )
    new_results = new_val.calc(
        1, 1, 1, rename_cols=False, only_with_temporal_ref=True
    )

    # old setup
    ds_names = list(datasets.keys())
    metrics = IntercomparisonMetrics(
        dataset_names=ds_names,
        # passing the names here explicitly, see GH issue #220
        refname=refname,
        other_names=ds_names[1:],
        calc_tau=True,
    )
    old_val = Validation(
        datasets,
        refname,
        scaling=None,  # doesn't work with the constant test data
        temporal_matcher=old_matcher,
        metrics_calculators={
            (2, 2): metrics.calc_metrics
        }
    )
    old_results = old_val.calc(
        1, 1, 1, rename_cols=False
    )

    old_key = (('ASCAT', 'sm'), ('ISMN', 'soil_moisture'))
    new_key = (('ASCAT', 'sm'), ('ISMN', 'soil_moisture'))

    assert old_results[old_key]["n_obs"] == new_results[new_key]["n_obs"]