Example #1
0
def test_validation_n3_k2_temporal_matching_no_matches():

    tst_results = {}

    datasets = setup_two_without_overlap()

    dm = DataManager(
        datasets,
        "DS1",
        read_ts_names={d: "read" for d in ["DS1", "DS2", "DS3"]},
    )

    process = Validation(
        dm,
        "DS1",
        temporal_matcher=temporal_matchers.BasicTemporalMatching(
            window=1 / 24.0
        ).combinatory_matcher,
        scaling="lin_cdf_match",
        metrics_calculators={
            (3, 2): metrics_calculators.BasicMetrics(
                other_name="k1"
            ).calc_metrics
        },
    )

    jobs = process.get_processing_jobs()
    for job in jobs:
        results = process.calc(*job)
        assert sorted(list(results)) == sorted(list(tst_results))
Example #2
0
def test_validation_error_n2_k2():

    datasets = setup_TestDatasets()

    dm = DataManager(
        datasets,
        "DS1",
        read_ts_names={d: "read" for d in ["DS1", "DS2", "DS3"]},
    )

    # n less than number of datasets is no longer allowed
    with pytest.raises(ValueError):
        Validation(
            dm,
            "DS1",
            temporal_matcher=temporal_matchers.BasicTemporalMatching(
                window=1 / 24.0
            ).combinatory_matcher,
            scaling="lin_cdf_match",
            metrics_calculators={
                (2, 2): metrics_calculators.BasicMetrics(
                    other_name="k1"
                ).calc_metrics
            },
        )
Example #3
0
def test_validation_n3_k2_temporal_matching_no_matches2():

    tst_results = {
        (("DS1", "x"), ("DS3", "y")): {
            "n_obs": np.array([1000], dtype=np.int32),
            "tau": np.array([np.nan], dtype=np.float32),
            "gpi": np.array([4], dtype=np.int32),
            "RMSD": np.array([0.0], dtype=np.float32),
            "lon": np.array([4.0]),
            "p_tau": np.array([np.nan], dtype=np.float32),
            "BIAS": np.array([0.0], dtype=np.float32),
            "p_rho": np.array([0.0], dtype=np.float32),
            "rho": np.array([1.0], dtype=np.float32),
            "lat": np.array([4.0]),
            "R": np.array([1.0], dtype=np.float32),
            "p_R": np.array([0.0], dtype=np.float32),
        },
        (("DS1", "x"), ("DS3", "x")): {
            "n_obs": np.array([1000], dtype=np.int32),
            "tau": np.array([np.nan], dtype=np.float32),
            "gpi": np.array([4], dtype=np.int32),
            "RMSD": np.array([0.0], dtype=np.float32),
            "lon": np.array([4.0]),
            "p_tau": np.array([np.nan], dtype=np.float32),
            "BIAS": np.array([0.0], dtype=np.float32),
            "p_rho": np.array([0.0], dtype=np.float32),
            "rho": np.array([1.0], dtype=np.float32),
            "lat": np.array([4.0]),
            "R": np.array([1.0], dtype=np.float32),
            "p_R": np.array([0.0], dtype=np.float32),
        },
    }

    datasets = setup_three_with_two_overlapping()
    dm = DataManager(
        datasets,
        "DS1",
        read_ts_names={d: "read" for d in ["DS1", "DS2", "DS3"]},
    )

    process = Validation(
        dm,
        "DS1",
        temporal_matcher=temporal_matchers.BasicTemporalMatching(
            window=1 / 24.0
        ).combinatory_matcher,
        scaling="lin_cdf_match",
        metrics_calculators={
            (3, 2): metrics_calculators.BasicMetrics(
                other_name="k1"
            ).calc_metrics
        },
    )

    jobs = process.get_processing_jobs()
    for job in jobs:
        results = process.calc(*job)
        assert sorted(list(results)) == sorted(list(tst_results))
Example #4
0
def test_validation_n3_k2():

    tst_results = {
        (('DS1', 'x'), ('DS3', 'y')): {
            'n_obs': np.array([1000], dtype=np.int32),
            'tau': np.array([np.nan], dtype=np.float32),
            'gpi': np.array([4], dtype=np.int32),
            'RMSD': np.array([0.], dtype=np.float32),
            'lon': np.array([4.]),
            'p_tau': np.array([np.nan], dtype=np.float32),
            'BIAS': np.array([0.], dtype=np.float32),
            'p_rho': np.array([0.], dtype=np.float32),
            'rho': np.array([1.], dtype=np.float32),
            'lat': np.array([4.]),
            'R': np.array([1.], dtype=np.float32),
            'p_R': np.array([0.], dtype=np.float32)},
        (('DS1', 'x'), ('DS2', 'y')): {
            'n_obs': np.array([1000], dtype=np.int32),
            'tau': np.array([np.nan], dtype=np.float32),
            'gpi': np.array([4], dtype=np.int32),
            'RMSD': np.array([0.], dtype=np.float32),
            'lon': np.array([4.]),
            'p_tau': np.array([np.nan], dtype=np.float32),
            'BIAS': np.array([0.], dtype=np.float32),
            'p_rho': np.array([0.], dtype=np.float32),
            'rho': np.array([1.], dtype=np.float32),
            'lat': np.array([4.]),
            'R': np.array([1.], dtype=np.float32),
            'p_R': np.array([0.], dtype=np.float32)},
        (('DS1', 'x'), ('DS3', 'x')): {
            'n_obs': np.array([1000], dtype=np.int32),
            'tau': np.array([np.nan], dtype=np.float32),
            'gpi': np.array([4], dtype=np.int32),
            'RMSD': np.array([0.], dtype=np.float32),
            'lon': np.array([4.]),
            'p_tau': np.array([np.nan], dtype=np.float32),
            'BIAS': np.array([0.], dtype=np.float32),
            'p_rho': np.array([0.], dtype=np.float32),
            'rho': np.array([1.], dtype=np.float32),
            'lat': np.array([4.]),
            'R': np.array([1.], dtype=np.float32),
            'p_R': np.array([0.], dtype=np.float32)}}

    datasets = setup_TestDatasets()

    process = Validation(
        datasets, 'DS1',
        temporal_matcher=temporal_matchers.BasicTemporalMatching(
            window=1 / 24.0).combinatory_matcher,
        scaling='lin_cdf_match',
        metrics_calculators={
            (3, 2): metrics_calculators.BasicMetrics(other_name='k1').calc_metrics})

    jobs = process.get_processing_jobs()
    for job in jobs:
        results = process.calc(*job)
        assert sorted(list(results)) == sorted(list(tst_results))
Example #5
0
def test_validation_n3_k2_masking_no_data_remains():

    datasets = setup_TestDatasets()

    # setup masking datasets

    grid = grids.CellGrid(np.array([1, 2, 3, 4]), np.array([1, 2, 3, 4]),
                          np.array([4, 4, 2, 1]), gpis=np.array([1, 2, 3, 4]))

    mds1 = GriddedTsBase("", grid, MaskingTestDataset)
    mds2 = GriddedTsBase("", grid, MaskingTestDataset)

    mds = {
        'masking1': {
            'class': mds1,
            'columns': ['x'],
            'args': [],
            'kwargs': {'limit': 500},
            'use_lut': False,
            'grids_compatible': True},
        'masking2': {
            'class': mds2,
            'columns': ['x'],
            'args': [],
            'kwargs': {'limit': 1000},
            'use_lut': False,
            'grids_compatible': True}
    }

    process = Validation(
        datasets, 'DS1',
        temporal_matcher=temporal_matchers.BasicTemporalMatching(
            window=1 / 24.0).combinatory_matcher,
        scaling='lin_cdf_match',
        metrics_calculators={
            (3, 2): metrics_calculators.BasicMetrics(other_name='k1').calc_metrics},
        masking_datasets=mds)

    gpi_info = (1, 1, 1)
    ref_df = datasets['DS1']['class'].read(1)
    with warnings.catch_warnings():
        warnings.filterwarnings('ignore', category=DeprecationWarning)
        new_ref_df = process.mask_dataset(ref_df, gpi_info)
    assert len(new_ref_df) == 0
    nptest.assert_allclose(new_ref_df.x.values, np.arange(1000, 1000))
    jobs = process.get_processing_jobs()
    for job in jobs:
        with warnings.catch_warnings():
            warnings.filterwarnings('ignore', category=DeprecationWarning)
            results = process.calc(*job)
        tst = []
        assert sorted(list(results)) == sorted(list(tst))
        for key, tst_key in zip(sorted(results),
                                sorted(tst)):
            nptest.assert_almost_equal(results[key]['n_obs'],
                                       tst[tst_key]['n_obs'])
Example #6
0
def test_validation_error_n2_k2():

    datasets = setup_TestDatasets()

    dm = DataManager(datasets, 'DS1', read_ts_names={d: 'read' for d in ['DS1', 'DS2', 'DS3']})

    # n less than number of datasets is no longer allowed
    with pytest.raises(ValueError):
        process = Validation(
            dm, 'DS1',
            temporal_matcher=temporal_matchers.BasicTemporalMatching(
                window=1 / 24.0).combinatory_matcher,
            scaling='lin_cdf_match',
            metrics_calculators={
                (2, 2): metrics_calculators.BasicMetrics(other_name='k1').calc_metrics})
Example #7
0
def test_validation_n2_k2_temporal_matching_no_matches():

    tst_results = {}

    datasets = setup_two_without_overlap()

    process = Validation(
        datasets, 'DS1',
        temporal_matcher=temporal_matchers.BasicTemporalMatching(
            window=1 / 24.0).combinatory_matcher,
        scaling='lin_cdf_match',
        metrics_calculators={
            (2, 2): metrics_calculators.BasicMetrics(other_name='k1').calc_metrics})

    jobs = process.get_processing_jobs()
    for job in jobs:
        results = process.calc(*job)
        assert sorted(list(results)) == sorted(list(tst_results))
Example #8
0
        }
    }
}

# The datasets dictionary contains all the information about the datasets to read. The `class` is the dataset class
# to use which we have already initialized. The `columns` key describes which columns of the dataset interest us for
# validation. This a mandatory field telling the framework which other columns to ignore. In this case the columns
# `soil moisture_flag` and `soil moisture_orig_flag` will be ignored by the ISMN reader. We can also specify
# additional keywords that should be given to the `read_ts` method of the dataset reader. In this case we want the
# ASCAT reader to mask the ASCAT soil moisture using the included frozen and snow probabilities as well as the SSF.
# There are also other keys that can be used here. Please see the documentation for explanations.

# In[8]:

period = [datetime(2007, 1, 1), datetime(2014, 12, 31)]
basic_metrics = metrics_calculators.BasicMetrics(other_name='k1')

process = Validation(datasets,
                     'ISMN',
                     temporal_ref='ASCAT',
                     scaling='lin_cdf_match',
                     scaling_ref='ASCAT',
                     metrics_calculators={(2, 2): basic_metrics.calc_metrics},
                     period=period)

# During the initialization of the Validation class we can also tell it other things that it needs to know. In this
# case it uses the datasets we have specified earlier. The spatial reference is the `'ISMN'` dataset which is the
# second argument. The 'metrics_calculators' argument looks a little bit strange so let's look at it in more detail.
#
# It is a dictionary with a tuple as the key and a function as the value. The key tuple `(n, k)` has the following
# meaning: `n` datasets are temporally matched together and then given in sets of `k` columns to the metric
Example #9
0
def test_ascat_ismn_validation():
    """
    Test processing framework with some ISMN and ASCAT sample data
    """
    ascat_data_folder = os.path.join(os.path.dirname(__file__), '..', 'test-data',
                                     'sat', 'ascat', 'netcdf', '55R22')

    ascat_grid_folder = os.path.join(os.path.dirname(__file__), '..', 'test-data',
                                     'sat', 'ascat', 'netcdf', 'grid')

    static_layers_folder = os.path.join(os.path.dirname(__file__),
                                        '..', 'test-data', 'sat',
                                        'h_saf', 'static_layer')

    ascat_reader = AscatSsmCdr(ascat_data_folder, ascat_grid_folder,
                               static_layer_path=static_layers_folder)
    ascat_reader.read_bulk = True

    # Initialize ISMN reader

    ismn_data_folder = os.path.join(os.path.dirname(__file__), '..', 'test-data',
                                    'ismn', 'multinetwork', 'header_values')
    ismn_reader = ISMN_Interface(ismn_data_folder)

    jobs = []

    ids = ismn_reader.get_dataset_ids(
        variable='soil moisture',
        min_depth=0,
        max_depth=0.1)
    for idx in ids:
        metadata = ismn_reader.metadata[idx]
        jobs.append((idx, metadata['longitude'], metadata['latitude']))

    # Create the variable ***save_path*** which is a string representing the
    # path where the results will be saved. **DO NOT CHANGE** the name
    # ***save_path*** because it will be searched during the parallel
    # processing!

    save_path = tempfile.mkdtemp()

    # Create the validation object.

    datasets = {
        'ISMN': {
            'class': ismn_reader,
            'columns': ['soil moisture']
        },
        'ASCAT': {
            'class': ascat_reader,
            'columns': ['sm'],
            'kwargs': {'mask_frozen_prob': 80,
                       'mask_snow_prob': 80,
                       'mask_ssf': True}
        }}

    period = [datetime(2007, 1, 1), datetime(2014, 12, 31)]

    process = Validation(
        datasets, 'ISMN',
        temporal_ref='ASCAT',
        scaling='lin_cdf_match',
        scaling_ref='ASCAT',
        metrics_calculators={
            (2, 2): metrics_calculators.BasicMetrics(other_name='k1').calc_metrics},
        period=period)

    for job in jobs:
        results = process.calc(*job)
        netcdf_results_manager(results, save_path)

    results_fname = os.path.join(
        save_path, 'ASCAT.sm_with_ISMN.soil moisture.nc')

    vars_should = [u'n_obs', u'tau', u'gpi', u'RMSD', u'lon', u'p_tau',
                   u'BIAS', u'p_rho', u'rho', u'lat', u'R', u'p_R']
    n_obs_should = [384,  357,  482,  141,  251, 1927, 1887, 1652]
    rho_should = np.array([0.70022893, 0.53934574,
                           0.69356072, 0.84189808,
                           0.74206454, 0.30299741,
                           0.53143877, 0.62204134], dtype=np.float32)

    rmsd_should = np.array([7.72966719, 11.58347607,
                            14.57700157, 13.06224251,
                            12.90389824, 14.24668026,
                            21.19682884, 17.3883934], dtype=np.float32)
    with nc.Dataset(results_fname) as results:
        assert sorted(results.variables.keys()) == sorted(vars_should)
        assert sorted(results.variables['n_obs'][:].tolist()) == sorted(
            n_obs_should)
        nptest.assert_allclose(sorted(rho_should),
                               sorted(results.variables['rho'][:]),
                               rtol=1e-4)
        nptest.assert_allclose(sorted(rmsd_should),
                               sorted(results.variables['RMSD'][:]),
                               rtol=1e-4)
Example #10
0
def test_validation_n3_k2_masking():

    # test result for one gpi in a cell
    tst_results_one = {
        (('DS1', 'x'), ('DS3', 'y')): {
            'n_obs': np.array([250], dtype=np.int32)},
        (('DS1', 'x'), ('DS2', 'y')): {
            'n_obs': np.array([250], dtype=np.int32)},
        (('DS1', 'x'), ('DS3', 'x')): {
            'n_obs': np.array([250], dtype=np.int32)}}

    # test result for two gpis in a cell
    tst_results_two = {
        (('DS1', 'x'), ('DS3', 'y')): {
            'n_obs': np.array([250, 250], dtype=np.int32)},
        (('DS1', 'x'), ('DS2', 'y')): {
            'n_obs': np.array([250, 250], dtype=np.int32)},
        (('DS1', 'x'), ('DS3', 'x')): {
            'n_obs': np.array([250, 250], dtype=np.int32)}}

    # cell 4 in this example has two gpis so it returns different results.
    tst_results = {1: tst_results_one,
                   1: tst_results_one,
                   2: tst_results_two}

    datasets = setup_TestDatasets()

    # setup masking datasets

    grid = grids.CellGrid(np.array([1, 2, 3, 4]), np.array([1, 2, 3, 4]),
                          np.array([4, 4, 2, 1]), gpis=np.array([1, 2, 3, 4]))

    mds1 = GriddedTsBase("", grid, MaskingTestDataset)
    mds2 = GriddedTsBase("", grid, MaskingTestDataset)

    mds = {
        'masking1': {
            'class': mds1,
            'columns': ['x'],
            'args': [],
            'kwargs': {'limit': 500},
            'use_lut': False,
            'grids_compatible': True},
        'masking2': {
            'class': mds2,
            'columns': ['x'],
            'args': [],
            'kwargs': {'limit': 750},
            'use_lut': False,
            'grids_compatible': True}
    }

    process = Validation(
        datasets, 'DS1',
        temporal_matcher=temporal_matchers.BasicTemporalMatching(
            window=1 / 24.0).combinatory_matcher,
        scaling='lin_cdf_match',
        metrics_calculators={
            (3, 2): metrics_calculators.BasicMetrics(other_name='k1').calc_metrics},
        masking_datasets=mds)

    gpi_info = (1, 1, 1)
    ref_df = datasets['DS1']['class'].read_ts(1)
    new_ref_df = process.mask_dataset(ref_df, gpi_info)
    assert len(new_ref_df) == 250
    nptest.assert_allclose(new_ref_df.x.values, np.arange(750, 1000))
    jobs = process.get_processing_jobs()
    for job in jobs:
        results = process.calc(*job)
        tst = tst_results[len(job[0])]
        assert sorted(list(results)) == sorted(list(tst))
        for key, tst_key in zip(sorted(results),
                                sorted(tst)):
            nptest.assert_almost_equal(results[key]['n_obs'],
                                       tst[tst_key]['n_obs'])
Example #11
0
def test_ascat_ismn_validation(ascat_reader, ismn_reader):
    """
    Test processing framework with some ISMN and ASCAT sample data
    """
    jobs = []

    ids = ismn_reader.get_dataset_ids(variable="soil moisture",
                                      min_depth=0,
                                      max_depth=0.1)
    for idx in ids:
        metadata = ismn_reader.metadata[idx]
        jobs.append((idx, metadata["longitude"], metadata["latitude"]))

    # Create the variable ***save_path*** which is a string representing the
    # path where the results will be saved. **DO NOT CHANGE** the name
    # ***save_path*** because it will be searched during the parallel
    # processing!

    save_path = tempfile.mkdtemp()

    # Create the validation object.

    datasets = {
        "ISMN": {
            "class": ismn_reader,
            "columns": ["soil moisture"]
        },
        "ASCAT": {
            "class": ascat_reader,
            "columns": ["sm"],
            "kwargs": {
                "mask_frozen_prob": 80,
                "mask_snow_prob": 80,
                "mask_ssf": True,
            },
        },
    }

    read_ts_names = {"ASCAT": "read", "ISMN": "read_ts"}
    period = [datetime(2007, 1, 1), datetime(2014, 12, 31)]

    datasets = DataManager(datasets,
                           "ISMN",
                           period,
                           read_ts_names=read_ts_names)

    process = Validation(
        datasets,
        "ISMN",
        temporal_ref="ASCAT",
        scaling="lin_cdf_match",
        scaling_ref="ASCAT",
        metrics_calculators={
            (2, 2):
            metrics_calculators.BasicMetrics(other_name="k1").calc_metrics
        },
        period=period,
    )

    for job in jobs:
        results = process.calc(*job)
        netcdf_results_manager(results, save_path)

    results_fname = os.path.join(save_path,
                                 "ASCAT.sm_with_ISMN.soil moisture.nc")
    # targets
    target_vars = {
        "n_obs": [357, 384, 1646, 1875, 1915, 467, 141, 251],
        "rho":
        np.array([
            0.53934574, 0.7002289, 0.62200236, 0.53647155, 0.30413666,
            0.6740655, 0.8418981, 0.74206454
        ],
                 dtype=np.float32),
        "RMSD":
        np.array([
            11.583476, 7.729667, 17.441547, 21.125721, 14.31557, 14.187225,
            13.0622425, 12.903898
        ],
                 dtype=np.float32)
    }

    check_results(
        filename=results_fname,
        target_vars=target_vars,
    )
Example #12
0
def test_ascat_ismn_validation():
    """
    Test processing framework with some ISMN and ASCAT sample data
    """
    ascat_data_folder = os.path.join(os.path.dirname(__file__), '..',
                                     'test-data', 'sat', 'ascat', 'netcdf',
                                     '55R22')

    ascat_grid_folder = os.path.join(os.path.dirname(__file__), '..',
                                     'test-data', 'sat', 'ascat', 'netcdf',
                                     'grid')

    ascat_reader = AscatH25_SSM(ascat_data_folder, ascat_grid_folder)
    ascat_reader.read_bulk = True
    ascat_reader._load_grid_info()

    # Initialize ISMN reader

    ismn_data_folder = os.path.join(os.path.dirname(__file__), '..',
                                    'test-data', 'ismn', 'multinetwork',
                                    'header_values')
    ismn_reader = ISMN_Interface(ismn_data_folder)

    jobs = []

    ids = ismn_reader.get_dataset_ids(variable='soil moisture',
                                      min_depth=0,
                                      max_depth=0.1)
    for idx in ids:
        metadata = ismn_reader.metadata[idx]
        jobs.append((idx, metadata['longitude'], metadata['latitude']))

    # Create the variable ***save_path*** which is a string representing the
    # path where the results will be saved. **DO NOT CHANGE** the name
    # ***save_path*** because it will be searched during the parallel
    # processing!

    save_path = tempfile.mkdtemp()

    # Create the validation object.

    datasets = {
        'ISMN': {
            'class': ismn_reader,
            'columns': ['soil moisture']
        },
        'ASCAT': {
            'class': ascat_reader,
            'columns': ['sm'],
            'kwargs': {
                'mask_frozen_prob': 80,
                'mask_snow_prob': 80,
                'mask_ssf': True
            }
        }
    }

    period = [datetime(2007, 1, 1), datetime(2014, 12, 31)]

    process = Validation(
        datasets,
        'ISMN',
        temporal_ref='ASCAT',
        scaling='lin_cdf_match',
        scaling_ref='ASCAT',
        metrics_calculators={
            (2, 2):
            metrics_calculators.BasicMetrics(other_name='k1').calc_metrics
        },
        period=period)

    for job in jobs:
        results = process.calc(*job)
        netcdf_results_manager(results, save_path)

    results_fname = os.path.join(save_path,
                                 'ASCAT.sm_with_ISMN.soil moisture.nc')

    vars_should = [
        u'n_obs', u'tau', u'gpi', u'RMSD', u'lon', u'p_tau', u'BIAS', u'p_rho',
        u'rho', u'lat', u'R', u'p_R'
    ]
    n_obs_should = [360, 385, 1644, 1881, 1927, 479, 140, 251]
    rho_should = np.array([
        0.546187, 0.717398, 0.620892, 0.532465, 0.302997, 0.694713, 0.840592,
        0.742065
    ],
                          dtype=np.float32)

    rmsd_should = np.array([
        11.536263, 7.545650, 17.451935, 21.193714, 14.246680, 14.494674,
        13.173215, 12.903898
    ],
                           dtype=np.float32)
    with nc.Dataset(results_fname) as results:
        assert sorted(results.variables.keys()) == sorted(vars_should)
        assert sorted(
            results.variables['n_obs'][:].tolist()) == sorted(n_obs_should)
        nptest.assert_allclose(sorted(rho_should),
                               sorted(results.variables['rho'][:]),
                               rtol=1e-4)
        nptest.assert_allclose(sorted(rmsd_should),
                               sorted(results.variables['RMSD'][:]),
                               rtol=1e-4)
Example #13
0
def test_validation_n3_k2_masking():

    # test result for one gpi in a cell
    tst_results_one = {
        (("DS1", "x"), ("DS3", "y")): {
            "n_obs": np.array([250], dtype=np.int32)
        },
        (("DS1", "x"), ("DS2", "y")): {
            "n_obs": np.array([250], dtype=np.int32)
        },
        (("DS1", "x"), ("DS3", "x")): {
            "n_obs": np.array([250], dtype=np.int32)
        },
        (("DS2", "y"), ("DS3", "x")): {
            "n_obs": np.array([250], dtype=np.int32)
        },
        (("DS2", "y"), ("DS3", "y")): {
            "n_obs": np.array([250], dtype=np.int32)
        },
    }

    # test result for two gpis in a cell
    tst_results_two = {
        (("DS1", "x"), ("DS3", "y")): {
            "n_obs": np.array([250, 250], dtype=np.int32)
        },
        (("DS1", "x"), ("DS2", "y")): {
            "n_obs": np.array([250, 250], dtype=np.int32)
        },
        (("DS1", "x"), ("DS3", "x")): {
            "n_obs": np.array([250, 250], dtype=np.int32)
        },
        (("DS2", "y"), ("DS3", "x")): {
            "n_obs": np.array([250, 250], dtype=np.int32)
        },
        (("DS2", "y"), ("DS3", "y")): {
            "n_obs": np.array([250, 250], dtype=np.int32)
        },
    }

    # cell 4 in this example has two gpis so it returns different results.
    tst_results = {1: tst_results_one, 1: tst_results_one, 2: tst_results_two}

    datasets = setup_TestDatasets()

    # setup masking datasets

    grid = grids.CellGrid(
        np.array([1, 2, 3, 4]),
        np.array([1, 2, 3, 4]),
        np.array([4, 4, 2, 1]),
        gpis=np.array([1, 2, 3, 4]),
    )

    mds1 = GriddedTsBase("", grid, MaskingTestDataset)
    mds2 = GriddedTsBase("", grid, MaskingTestDataset)

    mds = {
        "masking1": {
            "class": mds1,
            "columns": ["x"],
            "args": [],
            "kwargs": {"limit": 500},
            "use_lut": False,
            "grids_compatible": True,
        },
        "masking2": {
            "class": mds2,
            "columns": ["x"],
            "args": [],
            "kwargs": {"limit": 750},
            "use_lut": False,
            "grids_compatible": True,
        },
    }

    process = Validation(
        datasets,
        "DS1",
        temporal_matcher=temporal_matchers.BasicTemporalMatching(
            window=1 / 24.0
        ).combinatory_matcher,
        scaling="lin_cdf_match",
        metrics_calculators={
            (3, 2): metrics_calculators.BasicMetrics(
                other_name="k1"
            ).calc_metrics
        },
        masking_datasets=mds,
    )

    gpi_info = (1, 1, 1)
    ref_df = datasets["DS1"]["class"].read(1)
    with warnings.catch_warnings():
        warnings.simplefilter(
            "ignore", category=DeprecationWarning
        )  # read_ts is hard coded when using mask_data
        new_ref_df = process.mask_dataset(ref_df, gpi_info)
    assert len(new_ref_df) == 250
    nptest.assert_allclose(new_ref_df.x.values, np.arange(750, 1000))
    jobs = process.get_processing_jobs()
    for job in jobs:

        with warnings.catch_warnings():
            # most warnings here are caused by the read_ts function that cannot
            # be changed when using a masking data set
            warnings.simplefilter("ignore", category=DeprecationWarning)
            results = process.calc(*job)

        tst = tst_results[len(job[0])]
        assert sorted(list(results)) == sorted(list(tst))
        for key, tst_key in zip(sorted(results), sorted(tst)):
            nptest.assert_almost_equal(
                results[key]["n_obs"], tst[tst_key]["n_obs"]
            )
Example #14
0
def test_validation_n3_k2_masking_no_data_remains():

    datasets = setup_TestDatasets()

    # setup masking datasets

    grid = grids.CellGrid(
        np.array([1, 2, 3, 4]),
        np.array([1, 2, 3, 4]),
        np.array([4, 4, 2, 1]),
        gpis=np.array([1, 2, 3, 4]),
    )

    mds1 = GriddedTsBase("", grid, MaskingTestDataset)
    mds2 = GriddedTsBase("", grid, MaskingTestDataset)

    mds = {
        "masking1": {
            "class": mds1,
            "columns": ["x"],
            "args": [],
            "kwargs": {"limit": 500},
            "use_lut": False,
            "grids_compatible": True,
        },
        "masking2": {
            "class": mds2,
            "columns": ["x"],
            "args": [],
            "kwargs": {"limit": 1000},
            "use_lut": False,
            "grids_compatible": True,
        },
    }

    process = Validation(
        datasets,
        "DS1",
        temporal_matcher=temporal_matchers.BasicTemporalMatching(
            window=1 / 24.0
        ).combinatory_matcher,
        scaling="lin_cdf_match",
        metrics_calculators={
            (3, 2): metrics_calculators.BasicMetrics(
                other_name="k1"
            ).calc_metrics
        },
        masking_datasets=mds,
    )

    gpi_info = (1, 1, 1)
    ref_df = datasets["DS1"]["class"].read(1)
    with warnings.catch_warnings():
        warnings.filterwarnings("ignore", category=DeprecationWarning)
        new_ref_df = process.mask_dataset(ref_df, gpi_info)
    assert len(new_ref_df) == 0
    nptest.assert_allclose(new_ref_df.x.values, np.arange(1000, 1000))
    jobs = process.get_processing_jobs()
    for job in jobs:
        with warnings.catch_warnings():
            warnings.filterwarnings("ignore", category=DeprecationWarning)
            results = process.calc(*job)
        tst = []
        assert sorted(list(results)) == sorted(list(tst))
        for key, tst_key in zip(sorted(results), sorted(tst)):
            nptest.assert_almost_equal(
                results[key]["n_obs"], tst[tst_key]["n_obs"]
            )
Example #15
0
def test_ascat_ismn_validation_metadata(ascat_reader, ismn_reader):
    """
    Test processing framework with some ISMN and ASCAT sample data
    """
    jobs = []

    ids = ismn_reader.get_dataset_ids(variable="soil moisture",
                                      min_depth=0,
                                      max_depth=0.1)

    metadata_dict_template = {
        "network": np.array(["None"], dtype="U256"),
        "station": np.array(["None"], dtype="U256"),
        "landcover": np.float32([np.nan]),
        "climate": np.array(["None"], dtype="U4"),
    }

    for idx in ids:
        metadata = ismn_reader.metadata[idx]
        metadata_dict = [{
            "network": metadata["network"],
            "station": metadata["station"],
            "landcover": metadata["landcover_2010"],
            "climate": metadata["climate"],
        }]
        jobs.append(
            (idx, metadata["longitude"], metadata["latitude"], metadata_dict))

    # Create the variable ***save_path*** which is a string representing the
    # path where the results will be saved. **DO NOT CHANGE** the name
    # ***save_path*** because it will be searched during the parallel
    # processing!

    save_path = tempfile.mkdtemp()

    # Create the validation object.

    datasets = {
        "ISMN": {
            "class": ismn_reader,
            "columns": ["soil moisture"],
        },
        "ASCAT": {
            "class": ascat_reader,
            "columns": ["sm"],
            "kwargs": {
                "mask_frozen_prob": 80,
                "mask_snow_prob": 80,
                "mask_ssf": True,
            },
        },
    }

    read_ts_names = {"ASCAT": "read", "ISMN": "read_ts"}
    period = [datetime(2007, 1, 1), datetime(2014, 12, 31)]

    datasets = DataManager(datasets,
                           "ISMN",
                           period,
                           read_ts_names=read_ts_names)
    process = Validation(
        datasets,
        "ISMN",
        temporal_ref="ASCAT",
        scaling="lin_cdf_match",
        scaling_ref="ASCAT",
        metrics_calculators={
            (2, 2):
            metrics_calculators.BasicMetrics(
                other_name="k1",
                metadata_template=metadata_dict_template).calc_metrics
        },
        period=period,
    )

    for job in jobs:
        results = process.calc(*job)
        netcdf_results_manager(results, save_path)

    results_fname = os.path.join(save_path,
                                 "ASCAT.sm_with_ISMN.soil moisture.nc")
    target_vars = {
        "n_obs": [357, 384, 1646, 1875, 1915, 467, 141, 251],
        "rho":
        np.array([
            0.53934574,
            0.7002289,
            0.62200236,
            0.53647155,
            0.30413666,
            0.6740655,
            0.8418981,
            0.74206454,
        ],
                 dtype=np.float32),
        "RMSD":
        np.array([
            11.583476,
            7.729667,
            17.441547,
            21.125721,
            14.31557,
            14.187225,
            13.0622425,
            12.903898,
        ],
                 dtype=np.float32),
        "network":
        np.array(
            [
                "MAQU",
                "MAQU",
                "SCAN",
                "SCAN",
                "SCAN",
                "SOILSCAPE",
                "SOILSCAPE",
                "SOILSCAPE",
            ],
            dtype="U256",
        )
    }
    vars_should = [
        'BIAS', 'R', 'RMSD', '_row_size', 'climate', 'gpi', 'idx', 'landcover',
        'lat', 'lon', 'n_obs', 'network', 'p_R', 'p_rho', 'p_tau', 'rho',
        'station', 'tau', 'time'
    ]

    check_results(filename=results_fname,
                  target_vars=target_vars,
                  variables=vars_should)
Example #16
0
def test_validation_with_averager(ascat_reader, ismn_reader):
    """
    Test processing framework with averaging module. ASCAT and ISMN data are used here with no geographical
    considerations (the lut is provided more upstream and contains this information already)
    """
    while hasattr(ascat_reader, 'cls'):
        ascat_reader = ascat_reader.cls
    # lookup table between the ascat and ismn points - not geographically correct
    upscaling_lut = {
        "ISMN": {
            1814367: [(0, 102.1333, 33.8833), (1, 102.1333, 33.6666)],
            1803695: [(2, -86.55, 34.783), (3, -97.083, 37.133),
                      (4, -105.417, 34.25)],
            1856312: [(5, -120.9675, 38.43003), (6, -120.78559, 38.14956),
                      (7, -120.80639, 38.17353)]
        }
    }
    gpis = (1814367, 1803695, 1856312)
    lons, lats = [], []
    for gpi in gpis:
        lon, lat = ascat_reader.grid.gpi2lonlat(gpi)
        lons.append(lon)
        lats.append(lat)

    jobs = [(gpis, lons, lats)]

    # Create the variable ***save_path*** which is a string representing the
    # path where the results will be saved. **DO NOT CHANGE** the name
    # ***save_path*** because it will be searched during the parallel
    # processing!

    save_path = tempfile.mkdtemp()

    # Create the validation object.

    datasets = {
        "ASCAT": {
            "class": ascat_reader,
            "columns": ["sm"],
            "kwargs": {
                "mask_frozen_prob": 80,
                "mask_snow_prob": 80,
                "mask_ssf": True,
            }
        },
        "ISMN": {
            "class": ismn_reader,
            "columns": ["soil moisture"],
        },
    }

    read_ts_names = {"ASCAT": "read", "ISMN": "read_ts"}
    period = [datetime(2007, 1, 1), datetime(2014, 12, 31)]

    datasets = DataManager(
        datasets,
        "ASCAT",
        period,
        read_ts_names=read_ts_names,
        upscale_parms={
            "upscaling_method": "average",
            "temporal_stability": True,
            "upscaling_lut": upscaling_lut,
        },
    )
    process = Validation(
        datasets,
        "ASCAT",
        temporal_ref="ISMN",
        scaling="lin_cdf_match",
        scaling_ref="ISMN",
        metrics_calculators={
            (2, 2):
            metrics_calculators.BasicMetrics(other_name="k1").calc_metrics
        },
        period=period,
    )

    for job in jobs:
        results = process.calc(*job)
        netcdf_results_manager(results, save_path)

    results_fname = os.path.join(save_path,
                                 "ASCAT.sm_with_ISMN.soil moisture.nc")

    target_vars = {
        "n_obs": [764, 2392, 904],
        "rho": np.array([-0.012487, 0.255156, 0.635517], dtype=np.float32),
        "RMSD": np.array([0.056428, 0.056508, 0.116294], dtype=np.float32),
        "R": np.array([-0.012335, 0.257671, 0.657239], dtype=np.float32)
    }

    check_results(
        filename=results_fname,
        target_vars=target_vars,
    )
Example #17
0
def test_ascat_ismn_validation_metadata(ascat_reader):
    """
    Test processing framework with some ISMN and ASCAT sample data
    """
    # Initialize ISMN reader

    ismn_data_folder = os.path.join(
        os.path.dirname(__file__),
        "..",
        "test-data",
        "ismn",
        "multinetwork",
        "header_values",
    )
    ismn_reader = ISMN_Interface(ismn_data_folder)

    jobs = []

    ids = ismn_reader.get_dataset_ids(
        variable="soil moisture", min_depth=0, max_depth=0.1
    )

    metadata_dict_template = {
        "network": np.array(["None"], dtype="U256"),
        "station": np.array(["None"], dtype="U256"),
        "landcover": np.float32([np.nan]),
        "climate": np.array(["None"], dtype="U4"),
    }

    for idx in ids:
        metadata = ismn_reader.metadata[idx]
        metadata_dict = [
            {
                "network": metadata["network"],
                "station": metadata["station"],
                "landcover": metadata["landcover_2010"],
                "climate": metadata["climate"],
            }
        ]
        jobs.append(
            (idx, metadata["longitude"], metadata["latitude"], metadata_dict)
        )

    # Create the variable ***save_path*** which is a string representing the
    # path where the results will be saved. **DO NOT CHANGE** the name
    # ***save_path*** because it will be searched during the parallel
    # processing!

    save_path = tempfile.mkdtemp()

    # Create the validation object.

    datasets = {
        "ISMN": {
            "class": ismn_reader,
            "columns": ["soil moisture"],
        },
        "ASCAT": {
            "class": ascat_reader,
            "columns": ["sm"],
            "kwargs": {
                "mask_frozen_prob": 80,
                "mask_snow_prob": 80,
                "mask_ssf": True,
            },
        },
    }

    read_ts_names = {"ASCAT": "read", "ISMN": "read_ts"}
    period = [datetime(2007, 1, 1), datetime(2014, 12, 31)]

    datasets = DataManager(
        datasets, "ISMN", period, read_ts_names=read_ts_names
    )
    process = Validation(
        datasets,
        "ISMN",
        temporal_ref="ASCAT",
        scaling="lin_cdf_match",
        scaling_ref="ASCAT",
        metrics_calculators={
            (2, 2): metrics_calculators.BasicMetrics(
                other_name="k1", metadata_template=metadata_dict_template
            ).calc_metrics
        },
        period=period,
    )

    for job in jobs:
        results = process.calc(*job)
        netcdf_results_manager(results, save_path)

    results_fname = os.path.join(
        save_path, "ASCAT.sm_with_ISMN.soil moisture.nc"
    )

    vars_should = [
        u"n_obs",
        u"tau",
        u"gpi",
        u"RMSD",
        u"lon",
        u"p_tau",
        u"BIAS",
        u"p_rho",
        u"rho",
        u"lat",
        u"R",
        u"p_R",
        u"time",
        u"idx",
        u"_row_size",
    ]
    for key, value in metadata_dict_template.items():
        vars_should.append(key)

    n_obs_should = [357, 384, 1646, 1875, 1915, 467, 141, 251]
    rho_should = np.array(
        [
            0.53934574,
            0.7002289,
            0.62200236,
            0.53647155,
            0.30413666,
            0.6740655,
            0.8418981,
            0.74206454,
        ],
        dtype=np.float32,
    )
    rmsd_should = np.array(
        [
            11.583476,
            7.729667,
            17.441547,
            21.125721,
            14.31557,
            14.187225,
            13.0622425,
            12.903898,
        ],
        dtype=np.float32,
    )

    network_should = np.array(
        [
            "MAQU",
            "MAQU",
            "SCAN",
            "SCAN",
            "SCAN",
            "SOILSCAPE",
            "SOILSCAPE",
            "SOILSCAPE",
        ],
        dtype="U256",
    )

    with nc.Dataset(results_fname, mode="r") as results:
        vars = results.variables.keys()
        n_obs = results.variables["n_obs"][:].tolist()
        rho = results.variables["rho"][:]
        rmsd = results.variables["RMSD"][:]
        network = results.variables["network"][:]

    assert sorted(vars) == sorted(vars_should)
    assert sorted(n_obs) == sorted(n_obs_should)
    nptest.assert_allclose(sorted(rho), sorted(rho_should), rtol=1e-4)
    nptest.assert_allclose(sorted(rmsd), sorted(rmsd_should), rtol=1e-4)
    nptest.assert_equal(sorted(network), sorted(network_should))