Beispiel #1
0
def test_gridded_base_spatial_subset():
    """
    Test selection of spatial subset.
    """
    lons = np.arange(4)
    lats = np.arange(4)
    cells = np.array([4, 4, 2, 1])
    gpis = np.arange(4)

    grid = grids.CellGrid(lons, lats, cells,  gpis=gpis)
    ds = GriddedBase("", grid, TestDataset)

    # gpi subset
    new_ds = ds.get_spatial_subset(gpis=[1, 2, 3])
    np.testing.assert_array_equal(new_ds.grid.gpis, gpis[1:])

    # cell subset
    new_ds = ds.get_spatial_subset(cells=[4])
    np.testing.assert_array_equal(new_ds.grid.gpis, gpis[:2])

    # ll_bbox subset
    ll_bbox = (0, 2, 0, 2)
    new_ds = ds.get_spatial_subset(ll_bbox=ll_bbox)
    np.testing.assert_array_equal(new_ds.grid.gpis, gpis[:3])

    # grid subset
    new_grid = grids.CellGrid(lons[2:], lats[2:],
                              cells[2:],  gpis=gpis[2:])
    new_ds = ds.get_spatial_subset(grid=new_grid)
    np.testing.assert_array_equal(new_ds.grid.gpis, new_grid.gpis)
Beispiel #2
0
    def _load_grid_info(self):
        """
        Reads the grid info for all land points from the netCDF file provided
        by TU Wien
        """

        grid_info_filepath = os.path.join(
            self.grid_path, self.grid_info_filename)
        grid_info = netCDF4.Dataset(grid_info_filepath, 'r')

        land = grid_info.variables['land_flag'][:]
        valid_points = np.where(land == 1)[0]

        # read whole grid information because this is faster than reading
        # only the valid points
        lon = grid_info.variables['lon'][:]
        lat = grid_info.variables['lat'][:]
        gpis = grid_info.variables['gpi'][:]
        cells = grid_info.variables['cell'][:]

        self.grid = grids.CellGrid(lon[valid_points], lat[valid_points], cells[
                                   valid_points], gpis=gpis[valid_points])
        self.grid_info_loaded = True

        grid_info.close()
Beispiel #3
0
def setup_TestDatasets() -> dict:
    grid = grids.CellGrid(np.array([1, 2, 3, 4]), np.array([1, 2, 3, 4]),
                          np.array([4, 4, 2, 1]), gpis=np.array([1, 2, 3, 4]))

    ds1 = GriddedTsBase("", grid, TestDataset)
    ds2 = GriddedTsBase("", grid, TestDataset)
    ds3 = GriddedTsBase("", grid, TestDataset)

    datasets = {
        'DS1': {
            'class': ds1,
            'columns': ['x'],
            'args': [],
            'kwargs': {}
        },
        'DS2': {
            'class': ds2,
            'columns': ['y'],
            'args': [],
            'kwargs': {},
            'use_lut': False,
            'grids_compatible': True
        },
        'DS3': {
            'class': ds3,
            'columns': ['x', 'y'],
            'args': [],
            'kwargs': {},
            'use_lut': False,
            'grids_compatible': True
        }
    }

    return datasets
Beispiel #4
0
def load_grid(grid_filename):
    """
    Load grid file.

    Parameters
    ----------
    grid_filename : str
        Grid filename.

    Returns
    -------
    grid : pygeogrids.CellGrid
        Grid.
    """
    with netCDF4.Dataset(grid_filename) as grid_nc:
        land_gp = np.where(grid_nc.variables['land_flag'][:] == 1)[0]
        lons = grid_nc.variables['lon'][:]
        lats = grid_nc.variables['lat'][:]
        gpis = grid_nc.variables['gpi'][:]
        cells = grid_nc.variables['cell'][:]

    grid = grids.CellGrid(lons[land_gp], lats[land_gp], cells[land_gp],
                          gpis[land_gp])

    return grid
Beispiel #5
0
def setup_three_with_two_overlapping():
    grid = grids.CellGrid(np.array([1, 2, 3, 4]), np.array([1, 2, 3, 4]),
                          np.array([4, 4, 2, 1]), gpis=np.array([1, 2, 3, 4]))

    ds1 = GriddedTsBase("", grid, TestDataset)
    ds2 = GriddedTsBase("", grid, TestDataset)
    ds3 = GriddedTsBase("", grid, TestDataset)

    datasets = {
        'DS1': {
            'class': ds1,
            'columns': ['x'],
            'args': [],
            'kwargs': {}
        },
        'DS2': {
            'class': ds2,
            'columns': ['y'],
            'args': [],
            'kwargs': {'start': '1990-01-01'},
            'use_lut': False,
            'grids_compatible': True
        },
        'DS3': {
            'class': ds3,
            'columns': ['x', 'y'],
            'args': [],
            'kwargs': {},
            'use_lut': False,
            'grids_compatible': True
        }
    }
    return datasets
Beispiel #6
0
    def _load_grid_info(self):
        """
        Reads the grid info for all land points from the txt file provided
        by TU Wien. The first time the actual txt file is parsed and saved
        as a numpy array to speed up future data access.
        """
        grid_info_np_filepath = os.path.join(self.grid_path,
                                             self.grid_info_np_filename)

        if os.path.exists(grid_info_np_filepath):
            grid_info = np.load(grid_info_np_filepath)

        else:
            grid_info_filepath = os.path.join(self.grid_path,
                                              self.grid_info_filename)
            grid_info = np.loadtxt(grid_info_filepath,
                                   delimiter=',',
                                   skiprows=1)
            np.save(os.path.join(self.grid_path, self.grid_info_np_filename),
                    grid_info)

        self.grid = grids.CellGrid(grid_info[:, 2],
                                   grid_info[:, 1],
                                   grid_info[:, 3].astype(np.int16),
                                   gpis=grid_info[:, 0].astype(np.int32))

        self.grid_info_loaded = True
Beispiel #7
0
def setup_TestDataManager():

    grid = grids.CellGrid(np.array([1, 2, 3, 4]), np.array([1, 2, 3, 4]),
                          np.array([4, 4, 2, 1]), gpis=np.array([1, 2, 3, 4]))

    ds1 = GriddedTsBase("", grid, TestDatasetRuntimeError)
    ds2 = GriddedTsBase("", grid, TestDatasetRuntimeError)
    ds3 = GriddedTsBase("", grid, TestDatasetRuntimeError,
                        ioclass_kws={'message': 'Other RuntimeError'})

    datasets = {
        'DS1': {
            'class': ds1,
            'columns': ['soil moisture'],
            'args': [],
            'kwargs': {}
        },
        'DS2': {
            'class': ds2,
            'columns': ['sm'],
            'args': [],
            'kwargs': {},
            'grids_compatible': True
        },
        'DS3': {
            'class': ds3,
            'columns': ['sm', 'sm2'],
            'args': [],
            'kwargs': {},
            'grids_compatible': True
        }
    }

    dm = DataManager(datasets, 'DS1')
    return dm
Beispiel #8
0
def test_validation_n3_k2_masking_no_data_remains():

    datasets = setup_TestDatasets()

    # setup masking datasets

    grid = grids.CellGrid(np.array([1, 2, 3, 4]), np.array([1, 2, 3, 4]),
                          np.array([4, 4, 2, 1]), gpis=np.array([1, 2, 3, 4]))

    mds1 = GriddedTsBase("", grid, MaskingTestDataset)
    mds2 = GriddedTsBase("", grid, MaskingTestDataset)

    mds = {
        'masking1': {
            'class': mds1,
            'columns': ['x'],
            'args': [],
            'kwargs': {'limit': 500},
            'use_lut': False,
            'grids_compatible': True},
        'masking2': {
            'class': mds2,
            'columns': ['x'],
            'args': [],
            'kwargs': {'limit': 1000},
            'use_lut': False,
            'grids_compatible': True}
    }

    process = Validation(
        datasets, 'DS1',
        temporal_matcher=temporal_matchers.BasicTemporalMatching(
            window=1 / 24.0).combinatory_matcher,
        scaling='lin_cdf_match',
        metrics_calculators={
            (3, 2): metrics_calculators.BasicMetrics(other_name='k1').calc_metrics},
        masking_datasets=mds)

    gpi_info = (1, 1, 1)
    ref_df = datasets['DS1']['class'].read(1)
    with warnings.catch_warnings():
        warnings.filterwarnings('ignore', category=DeprecationWarning)
        new_ref_df = process.mask_dataset(ref_df, gpi_info)
    assert len(new_ref_df) == 0
    nptest.assert_allclose(new_ref_df.x.values, np.arange(1000, 1000))
    jobs = process.get_processing_jobs()
    for job in jobs:
        with warnings.catch_warnings():
            warnings.filterwarnings('ignore', category=DeprecationWarning)
            results = process.calc(*job)
        tst = []
        assert sorted(list(results)) == sorted(list(tst))
        for key, tst_key in zip(sorted(results),
                                sorted(tst)):
            nptest.assert_almost_equal(results[key]['n_obs'],
                                       tst[tst_key]['n_obs'])
Beispiel #9
0
def test_gridded_ts_base_read_append():
    """
    Test reading in append mode in GriddedTs. Should be allowed.
    """
    grid = grids.CellGrid(np.array([1, 2, 3, 4]), np.array([1, 2, 3, 4]),
                          np.array([4, 4, 2, 1]), gpis=np.array([1, 2, 3, 4]))

    ds = GriddedTsBase("", grid, Dataset, mode='a')
    # during iteration the gpis are traversed based on cells for a cell grid
    assert ds.read(1) == 1
Beispiel #10
0
def test_gridded_ts_base_iter_ts():
    """
    Test iteration over time series in GriddedTsBase.
    """
    grid = grids.CellGrid(np.array([1, 2, 3, 4]), np.array([1, 2, 3, 4]),
                          np.array([4, 4, 2, 1]), gpis=np.array([1, 2, 3, 4]))

    ds = GriddedTsBase("", grid, TestDataset)
    # during iteration the gpis are traversed based on cells for a cell grid
    gpi_should = [4, 3, 1, 2]
    for ts, gpi in ds.iter_ts():
        assert gpi == gpi_should.pop(0)
Beispiel #11
0
    def setUp(self):
        """
        Create grid and temporary location for files.
        """
        self.testdatapath = os.path.join(mkdtemp())
        self.testfilenames = [os.path.join(self.testdatapath, '0001.nc')]

        self.gpis = [1, 10, 11, 12]
        self.lons = [0, 0, 1, 1]
        self.lats = [1, 1, 0, 0]
        self.cells = [1, 1, 1, 1]
        self.grid = grids.CellGrid(self.lons, self.lats, self.cells, self.gpis)
Beispiel #12
0
def test_gridded_ts_base_iter_gp_IOError_None_yield():
    """
    Test iteration over time series in GriddedTsBase.
    Should yield None if IOError is raised.
    """
    grid = grids.CellGrid(np.array([1, 2, 3, 4]),
                          np.array([1, 2, 3, 4]),
                          np.array([4, 4, 2, 1]),
                          gpis=np.array([1, 2, 3, 1234]))

    ds = GriddedTsBase("", grid, Dataset)
    # during iteration the gpis are traversed based on cells for a cell grid
    gpi_should = [1234, 3, 1, 2]
    for ts, gpi in ds.iter_gp():
        assert gpi == gpi_should.pop(0)
        if gpi == 1234:
            assert ts is None
Beispiel #13
0
def test_DataManager_default_add():

    grid = grids.CellGrid(np.array([1, 2, 3, 4]),
                          np.array([1, 2, 3, 4]),
                          np.array([4, 4, 2, 1]),
                          gpis=np.array([1, 2, 3, 4]))

    ds1 = GriddedTsBase("", grid, TestDataset)

    datasets = {
        'DS1': {
            'class': ds1,
            'columns': ['soil moisture'],
        },
        'DS2': {
            'class': ds1,
            'columns': ['soil moisture'],
        }
    }

    dm = DataManager(datasets, 'DS1')
    assert dm.datasets == {
        'DS1': {
            'class': ds1,
            'columns': ['soil moisture'],
            'args': [],
            'kwargs': {},
            'use_lut': False,
            'lut_max_dist': None,
            'grids_compatible': False
        },
        'DS2': {
            'class': ds1,
            'columns': ['soil moisture'],
            'args': [],
            'kwargs': {},
            'use_lut': False,
            'lut_max_dist': None,
            'grids_compatible': False
        }
    }
Beispiel #14
0
def test_validation_n3_k2_masking():

    # test result for one gpi in a cell
    tst_results_one = {
        (('DS1', 'x'), ('DS3', 'y')): {
            'n_obs': np.array([250], dtype=np.int32)},
        (('DS1', 'x'), ('DS2', 'y')): {
            'n_obs': np.array([250], dtype=np.int32)},
        (('DS1', 'x'), ('DS3', 'x')): {
            'n_obs': np.array([250], dtype=np.int32)}}

    # test result for two gpis in a cell
    tst_results_two = {
        (('DS1', 'x'), ('DS3', 'y')): {
            'n_obs': np.array([250, 250], dtype=np.int32)},
        (('DS1', 'x'), ('DS2', 'y')): {
            'n_obs': np.array([250, 250], dtype=np.int32)},
        (('DS1', 'x'), ('DS3', 'x')): {
            'n_obs': np.array([250, 250], dtype=np.int32)}}

    # cell 4 in this example has two gpis so it returns different results.
    tst_results = {1: tst_results_one,
                   1: tst_results_one,
                   2: tst_results_two}

    datasets = setup_TestDatasets()

    # setup masking datasets

    grid = grids.CellGrid(np.array([1, 2, 3, 4]), np.array([1, 2, 3, 4]),
                          np.array([4, 4, 2, 1]), gpis=np.array([1, 2, 3, 4]))

    mds1 = GriddedTsBase("", grid, MaskingTestDataset)
    mds2 = GriddedTsBase("", grid, MaskingTestDataset)

    mds = {
        'masking1': {
            'class': mds1,
            'columns': ['x'],
            'args': [],
            'kwargs': {'limit': 500},
            'use_lut': False,
            'grids_compatible': True},
        'masking2': {
            'class': mds2,
            'columns': ['x'],
            'args': [],
            'kwargs': {'limit': 750},
            'use_lut': False,
            'grids_compatible': True}
    }

    process = Validation(
        datasets, 'DS1',
        temporal_matcher=temporal_matchers.BasicTemporalMatching(
            window=1 / 24.0).combinatory_matcher,
        scaling='lin_cdf_match',
        metrics_calculators={
            (3, 2): metrics_calculators.BasicMetrics(other_name='k1').calc_metrics},
        masking_datasets=mds)

    gpi_info = (1, 1, 1)
    ref_df = datasets['DS1']['class'].read_ts(1)
    new_ref_df = process.mask_dataset(ref_df, gpi_info)
    assert len(new_ref_df) == 250
    nptest.assert_allclose(new_ref_df.x.values, np.arange(750, 1000))
    jobs = process.get_processing_jobs()
    for job in jobs:
        results = process.calc(*job)
        tst = tst_results[len(job[0])]
        assert sorted(list(results)) == sorted(list(tst))
        for key, tst_key in zip(sorted(results),
                                sorted(tst)):
            nptest.assert_almost_equal(results[key]['n_obs'],
                                       tst[tst_key]['n_obs'])
Beispiel #15
0
def test_validation_n3_k2_masking():

    # test result for one gpi in a cell
    tst_results_one = {
        (("DS1", "x"), ("DS3", "y")): {
            "n_obs": np.array([250], dtype=np.int32)
        },
        (("DS1", "x"), ("DS2", "y")): {
            "n_obs": np.array([250], dtype=np.int32)
        },
        (("DS1", "x"), ("DS3", "x")): {
            "n_obs": np.array([250], dtype=np.int32)
        },
        (("DS2", "y"), ("DS3", "x")): {
            "n_obs": np.array([250], dtype=np.int32)
        },
        (("DS2", "y"), ("DS3", "y")): {
            "n_obs": np.array([250], dtype=np.int32)
        },
    }

    # test result for two gpis in a cell
    tst_results_two = {
        (("DS1", "x"), ("DS3", "y")): {
            "n_obs": np.array([250, 250], dtype=np.int32)
        },
        (("DS1", "x"), ("DS2", "y")): {
            "n_obs": np.array([250, 250], dtype=np.int32)
        },
        (("DS1", "x"), ("DS3", "x")): {
            "n_obs": np.array([250, 250], dtype=np.int32)
        },
        (("DS2", "y"), ("DS3", "x")): {
            "n_obs": np.array([250, 250], dtype=np.int32)
        },
        (("DS2", "y"), ("DS3", "y")): {
            "n_obs": np.array([250, 250], dtype=np.int32)
        },
    }

    # cell 4 in this example has two gpis so it returns different results.
    tst_results = {1: tst_results_one, 1: tst_results_one, 2: tst_results_two}

    datasets = setup_TestDatasets()

    # setup masking datasets

    grid = grids.CellGrid(
        np.array([1, 2, 3, 4]),
        np.array([1, 2, 3, 4]),
        np.array([4, 4, 2, 1]),
        gpis=np.array([1, 2, 3, 4]),
    )

    mds1 = GriddedTsBase("", grid, MaskingTestDataset)
    mds2 = GriddedTsBase("", grid, MaskingTestDataset)

    mds = {
        "masking1": {
            "class": mds1,
            "columns": ["x"],
            "args": [],
            "kwargs": {"limit": 500},
            "use_lut": False,
            "grids_compatible": True,
        },
        "masking2": {
            "class": mds2,
            "columns": ["x"],
            "args": [],
            "kwargs": {"limit": 750},
            "use_lut": False,
            "grids_compatible": True,
        },
    }

    process = Validation(
        datasets,
        "DS1",
        temporal_matcher=temporal_matchers.BasicTemporalMatching(
            window=1 / 24.0
        ).combinatory_matcher,
        scaling="lin_cdf_match",
        metrics_calculators={
            (3, 2): metrics_calculators.BasicMetrics(
                other_name="k1"
            ).calc_metrics
        },
        masking_datasets=mds,
    )

    gpi_info = (1, 1, 1)
    ref_df = datasets["DS1"]["class"].read(1)
    with warnings.catch_warnings():
        warnings.simplefilter(
            "ignore", category=DeprecationWarning
        )  # read_ts is hard coded when using mask_data
        new_ref_df = process.mask_dataset(ref_df, gpi_info)
    assert len(new_ref_df) == 250
    nptest.assert_allclose(new_ref_df.x.values, np.arange(750, 1000))
    jobs = process.get_processing_jobs()
    for job in jobs:

        with warnings.catch_warnings():
            # most warnings here are caused by the read_ts function that cannot
            # be changed when using a masking data set
            warnings.simplefilter("ignore", category=DeprecationWarning)
            results = process.calc(*job)

        tst = tst_results[len(job[0])]
        assert sorted(list(results)) == sorted(list(tst))
        for key, tst_key in zip(sorted(results), sorted(tst)):
            nptest.assert_almost_equal(
                results[key]["n_obs"], tst[tst_key]["n_obs"]
            )
Beispiel #16
0
def test_validation_n3_k2_masking_no_data_remains():

    datasets = setup_TestDatasets()

    # setup masking datasets

    grid = grids.CellGrid(
        np.array([1, 2, 3, 4]),
        np.array([1, 2, 3, 4]),
        np.array([4, 4, 2, 1]),
        gpis=np.array([1, 2, 3, 4]),
    )

    mds1 = GriddedTsBase("", grid, MaskingTestDataset)
    mds2 = GriddedTsBase("", grid, MaskingTestDataset)

    mds = {
        "masking1": {
            "class": mds1,
            "columns": ["x"],
            "args": [],
            "kwargs": {"limit": 500},
            "use_lut": False,
            "grids_compatible": True,
        },
        "masking2": {
            "class": mds2,
            "columns": ["x"],
            "args": [],
            "kwargs": {"limit": 1000},
            "use_lut": False,
            "grids_compatible": True,
        },
    }

    process = Validation(
        datasets,
        "DS1",
        temporal_matcher=temporal_matchers.BasicTemporalMatching(
            window=1 / 24.0
        ).combinatory_matcher,
        scaling="lin_cdf_match",
        metrics_calculators={
            (3, 2): metrics_calculators.BasicMetrics(
                other_name="k1"
            ).calc_metrics
        },
        masking_datasets=mds,
    )

    gpi_info = (1, 1, 1)
    ref_df = datasets["DS1"]["class"].read(1)
    with warnings.catch_warnings():
        warnings.filterwarnings("ignore", category=DeprecationWarning)
        new_ref_df = process.mask_dataset(ref_df, gpi_info)
    assert len(new_ref_df) == 0
    nptest.assert_allclose(new_ref_df.x.values, np.arange(1000, 1000))
    jobs = process.get_processing_jobs()
    for job in jobs:
        with warnings.catch_warnings():
            warnings.filterwarnings("ignore", category=DeprecationWarning)
            results = process.calc(*job)
        tst = []
        assert sorted(list(results)) == sorted(list(tst))
        for key, tst_key in zip(sorted(results), sorted(tst)):
            nptest.assert_almost_equal(
                results[key]["n_obs"], tst[tst_key]["n_obs"]
            )