def test_gridded_base_spatial_subset(): """ Test selection of spatial subset. """ lons = np.arange(4) lats = np.arange(4) cells = np.array([4, 4, 2, 1]) gpis = np.arange(4) grid = grids.CellGrid(lons, lats, cells, gpis=gpis) ds = GriddedBase("", grid, TestDataset) # gpi subset new_ds = ds.get_spatial_subset(gpis=[1, 2, 3]) np.testing.assert_array_equal(new_ds.grid.gpis, gpis[1:]) # cell subset new_ds = ds.get_spatial_subset(cells=[4]) np.testing.assert_array_equal(new_ds.grid.gpis, gpis[:2]) # ll_bbox subset ll_bbox = (0, 2, 0, 2) new_ds = ds.get_spatial_subset(ll_bbox=ll_bbox) np.testing.assert_array_equal(new_ds.grid.gpis, gpis[:3]) # grid subset new_grid = grids.CellGrid(lons[2:], lats[2:], cells[2:], gpis=gpis[2:]) new_ds = ds.get_spatial_subset(grid=new_grid) np.testing.assert_array_equal(new_ds.grid.gpis, new_grid.gpis)
def _load_grid_info(self): """ Reads the grid info for all land points from the netCDF file provided by TU Wien """ grid_info_filepath = os.path.join( self.grid_path, self.grid_info_filename) grid_info = netCDF4.Dataset(grid_info_filepath, 'r') land = grid_info.variables['land_flag'][:] valid_points = np.where(land == 1)[0] # read whole grid information because this is faster than reading # only the valid points lon = grid_info.variables['lon'][:] lat = grid_info.variables['lat'][:] gpis = grid_info.variables['gpi'][:] cells = grid_info.variables['cell'][:] self.grid = grids.CellGrid(lon[valid_points], lat[valid_points], cells[ valid_points], gpis=gpis[valid_points]) self.grid_info_loaded = True grid_info.close()
def setup_TestDatasets() -> dict: grid = grids.CellGrid(np.array([1, 2, 3, 4]), np.array([1, 2, 3, 4]), np.array([4, 4, 2, 1]), gpis=np.array([1, 2, 3, 4])) ds1 = GriddedTsBase("", grid, TestDataset) ds2 = GriddedTsBase("", grid, TestDataset) ds3 = GriddedTsBase("", grid, TestDataset) datasets = { 'DS1': { 'class': ds1, 'columns': ['x'], 'args': [], 'kwargs': {} }, 'DS2': { 'class': ds2, 'columns': ['y'], 'args': [], 'kwargs': {}, 'use_lut': False, 'grids_compatible': True }, 'DS3': { 'class': ds3, 'columns': ['x', 'y'], 'args': [], 'kwargs': {}, 'use_lut': False, 'grids_compatible': True } } return datasets
def load_grid(grid_filename): """ Load grid file. Parameters ---------- grid_filename : str Grid filename. Returns ------- grid : pygeogrids.CellGrid Grid. """ with netCDF4.Dataset(grid_filename) as grid_nc: land_gp = np.where(grid_nc.variables['land_flag'][:] == 1)[0] lons = grid_nc.variables['lon'][:] lats = grid_nc.variables['lat'][:] gpis = grid_nc.variables['gpi'][:] cells = grid_nc.variables['cell'][:] grid = grids.CellGrid(lons[land_gp], lats[land_gp], cells[land_gp], gpis[land_gp]) return grid
def setup_three_with_two_overlapping(): grid = grids.CellGrid(np.array([1, 2, 3, 4]), np.array([1, 2, 3, 4]), np.array([4, 4, 2, 1]), gpis=np.array([1, 2, 3, 4])) ds1 = GriddedTsBase("", grid, TestDataset) ds2 = GriddedTsBase("", grid, TestDataset) ds3 = GriddedTsBase("", grid, TestDataset) datasets = { 'DS1': { 'class': ds1, 'columns': ['x'], 'args': [], 'kwargs': {} }, 'DS2': { 'class': ds2, 'columns': ['y'], 'args': [], 'kwargs': {'start': '1990-01-01'}, 'use_lut': False, 'grids_compatible': True }, 'DS3': { 'class': ds3, 'columns': ['x', 'y'], 'args': [], 'kwargs': {}, 'use_lut': False, 'grids_compatible': True } } return datasets
def _load_grid_info(self): """ Reads the grid info for all land points from the txt file provided by TU Wien. The first time the actual txt file is parsed and saved as a numpy array to speed up future data access. """ grid_info_np_filepath = os.path.join(self.grid_path, self.grid_info_np_filename) if os.path.exists(grid_info_np_filepath): grid_info = np.load(grid_info_np_filepath) else: grid_info_filepath = os.path.join(self.grid_path, self.grid_info_filename) grid_info = np.loadtxt(grid_info_filepath, delimiter=',', skiprows=1) np.save(os.path.join(self.grid_path, self.grid_info_np_filename), grid_info) self.grid = grids.CellGrid(grid_info[:, 2], grid_info[:, 1], grid_info[:, 3].astype(np.int16), gpis=grid_info[:, 0].astype(np.int32)) self.grid_info_loaded = True
def setup_TestDataManager(): grid = grids.CellGrid(np.array([1, 2, 3, 4]), np.array([1, 2, 3, 4]), np.array([4, 4, 2, 1]), gpis=np.array([1, 2, 3, 4])) ds1 = GriddedTsBase("", grid, TestDatasetRuntimeError) ds2 = GriddedTsBase("", grid, TestDatasetRuntimeError) ds3 = GriddedTsBase("", grid, TestDatasetRuntimeError, ioclass_kws={'message': 'Other RuntimeError'}) datasets = { 'DS1': { 'class': ds1, 'columns': ['soil moisture'], 'args': [], 'kwargs': {} }, 'DS2': { 'class': ds2, 'columns': ['sm'], 'args': [], 'kwargs': {}, 'grids_compatible': True }, 'DS3': { 'class': ds3, 'columns': ['sm', 'sm2'], 'args': [], 'kwargs': {}, 'grids_compatible': True } } dm = DataManager(datasets, 'DS1') return dm
def test_validation_n3_k2_masking_no_data_remains(): datasets = setup_TestDatasets() # setup masking datasets grid = grids.CellGrid(np.array([1, 2, 3, 4]), np.array([1, 2, 3, 4]), np.array([4, 4, 2, 1]), gpis=np.array([1, 2, 3, 4])) mds1 = GriddedTsBase("", grid, MaskingTestDataset) mds2 = GriddedTsBase("", grid, MaskingTestDataset) mds = { 'masking1': { 'class': mds1, 'columns': ['x'], 'args': [], 'kwargs': {'limit': 500}, 'use_lut': False, 'grids_compatible': True}, 'masking2': { 'class': mds2, 'columns': ['x'], 'args': [], 'kwargs': {'limit': 1000}, 'use_lut': False, 'grids_compatible': True} } process = Validation( datasets, 'DS1', temporal_matcher=temporal_matchers.BasicTemporalMatching( window=1 / 24.0).combinatory_matcher, scaling='lin_cdf_match', metrics_calculators={ (3, 2): metrics_calculators.BasicMetrics(other_name='k1').calc_metrics}, masking_datasets=mds) gpi_info = (1, 1, 1) ref_df = datasets['DS1']['class'].read(1) with warnings.catch_warnings(): warnings.filterwarnings('ignore', category=DeprecationWarning) new_ref_df = process.mask_dataset(ref_df, gpi_info) assert len(new_ref_df) == 0 nptest.assert_allclose(new_ref_df.x.values, np.arange(1000, 1000)) jobs = process.get_processing_jobs() for job in jobs: with warnings.catch_warnings(): warnings.filterwarnings('ignore', category=DeprecationWarning) results = process.calc(*job) tst = [] assert sorted(list(results)) == sorted(list(tst)) for key, tst_key in zip(sorted(results), sorted(tst)): nptest.assert_almost_equal(results[key]['n_obs'], tst[tst_key]['n_obs'])
def test_gridded_ts_base_read_append(): """ Test reading in append mode in GriddedTs. Should be allowed. """ grid = grids.CellGrid(np.array([1, 2, 3, 4]), np.array([1, 2, 3, 4]), np.array([4, 4, 2, 1]), gpis=np.array([1, 2, 3, 4])) ds = GriddedTsBase("", grid, Dataset, mode='a') # during iteration the gpis are traversed based on cells for a cell grid assert ds.read(1) == 1
def test_gridded_ts_base_iter_ts(): """ Test iteration over time series in GriddedTsBase. """ grid = grids.CellGrid(np.array([1, 2, 3, 4]), np.array([1, 2, 3, 4]), np.array([4, 4, 2, 1]), gpis=np.array([1, 2, 3, 4])) ds = GriddedTsBase("", grid, TestDataset) # during iteration the gpis are traversed based on cells for a cell grid gpi_should = [4, 3, 1, 2] for ts, gpi in ds.iter_ts(): assert gpi == gpi_should.pop(0)
def setUp(self): """ Create grid and temporary location for files. """ self.testdatapath = os.path.join(mkdtemp()) self.testfilenames = [os.path.join(self.testdatapath, '0001.nc')] self.gpis = [1, 10, 11, 12] self.lons = [0, 0, 1, 1] self.lats = [1, 1, 0, 0] self.cells = [1, 1, 1, 1] self.grid = grids.CellGrid(self.lons, self.lats, self.cells, self.gpis)
def test_gridded_ts_base_iter_gp_IOError_None_yield(): """ Test iteration over time series in GriddedTsBase. Should yield None if IOError is raised. """ grid = grids.CellGrid(np.array([1, 2, 3, 4]), np.array([1, 2, 3, 4]), np.array([4, 4, 2, 1]), gpis=np.array([1, 2, 3, 1234])) ds = GriddedTsBase("", grid, Dataset) # during iteration the gpis are traversed based on cells for a cell grid gpi_should = [1234, 3, 1, 2] for ts, gpi in ds.iter_gp(): assert gpi == gpi_should.pop(0) if gpi == 1234: assert ts is None
def test_DataManager_default_add(): grid = grids.CellGrid(np.array([1, 2, 3, 4]), np.array([1, 2, 3, 4]), np.array([4, 4, 2, 1]), gpis=np.array([1, 2, 3, 4])) ds1 = GriddedTsBase("", grid, TestDataset) datasets = { 'DS1': { 'class': ds1, 'columns': ['soil moisture'], }, 'DS2': { 'class': ds1, 'columns': ['soil moisture'], } } dm = DataManager(datasets, 'DS1') assert dm.datasets == { 'DS1': { 'class': ds1, 'columns': ['soil moisture'], 'args': [], 'kwargs': {}, 'use_lut': False, 'lut_max_dist': None, 'grids_compatible': False }, 'DS2': { 'class': ds1, 'columns': ['soil moisture'], 'args': [], 'kwargs': {}, 'use_lut': False, 'lut_max_dist': None, 'grids_compatible': False } }
def test_validation_n3_k2_masking(): # test result for one gpi in a cell tst_results_one = { (('DS1', 'x'), ('DS3', 'y')): { 'n_obs': np.array([250], dtype=np.int32)}, (('DS1', 'x'), ('DS2', 'y')): { 'n_obs': np.array([250], dtype=np.int32)}, (('DS1', 'x'), ('DS3', 'x')): { 'n_obs': np.array([250], dtype=np.int32)}} # test result for two gpis in a cell tst_results_two = { (('DS1', 'x'), ('DS3', 'y')): { 'n_obs': np.array([250, 250], dtype=np.int32)}, (('DS1', 'x'), ('DS2', 'y')): { 'n_obs': np.array([250, 250], dtype=np.int32)}, (('DS1', 'x'), ('DS3', 'x')): { 'n_obs': np.array([250, 250], dtype=np.int32)}} # cell 4 in this example has two gpis so it returns different results. tst_results = {1: tst_results_one, 1: tst_results_one, 2: tst_results_two} datasets = setup_TestDatasets() # setup masking datasets grid = grids.CellGrid(np.array([1, 2, 3, 4]), np.array([1, 2, 3, 4]), np.array([4, 4, 2, 1]), gpis=np.array([1, 2, 3, 4])) mds1 = GriddedTsBase("", grid, MaskingTestDataset) mds2 = GriddedTsBase("", grid, MaskingTestDataset) mds = { 'masking1': { 'class': mds1, 'columns': ['x'], 'args': [], 'kwargs': {'limit': 500}, 'use_lut': False, 'grids_compatible': True}, 'masking2': { 'class': mds2, 'columns': ['x'], 'args': [], 'kwargs': {'limit': 750}, 'use_lut': False, 'grids_compatible': True} } process = Validation( datasets, 'DS1', temporal_matcher=temporal_matchers.BasicTemporalMatching( window=1 / 24.0).combinatory_matcher, scaling='lin_cdf_match', metrics_calculators={ (3, 2): metrics_calculators.BasicMetrics(other_name='k1').calc_metrics}, masking_datasets=mds) gpi_info = (1, 1, 1) ref_df = datasets['DS1']['class'].read_ts(1) new_ref_df = process.mask_dataset(ref_df, gpi_info) assert len(new_ref_df) == 250 nptest.assert_allclose(new_ref_df.x.values, np.arange(750, 1000)) jobs = process.get_processing_jobs() for job in jobs: results = process.calc(*job) tst = tst_results[len(job[0])] assert sorted(list(results)) == sorted(list(tst)) for key, tst_key in zip(sorted(results), sorted(tst)): nptest.assert_almost_equal(results[key]['n_obs'], tst[tst_key]['n_obs'])
def test_validation_n3_k2_masking(): # test result for one gpi in a cell tst_results_one = { (("DS1", "x"), ("DS3", "y")): { "n_obs": np.array([250], dtype=np.int32) }, (("DS1", "x"), ("DS2", "y")): { "n_obs": np.array([250], dtype=np.int32) }, (("DS1", "x"), ("DS3", "x")): { "n_obs": np.array([250], dtype=np.int32) }, (("DS2", "y"), ("DS3", "x")): { "n_obs": np.array([250], dtype=np.int32) }, (("DS2", "y"), ("DS3", "y")): { "n_obs": np.array([250], dtype=np.int32) }, } # test result for two gpis in a cell tst_results_two = { (("DS1", "x"), ("DS3", "y")): { "n_obs": np.array([250, 250], dtype=np.int32) }, (("DS1", "x"), ("DS2", "y")): { "n_obs": np.array([250, 250], dtype=np.int32) }, (("DS1", "x"), ("DS3", "x")): { "n_obs": np.array([250, 250], dtype=np.int32) }, (("DS2", "y"), ("DS3", "x")): { "n_obs": np.array([250, 250], dtype=np.int32) }, (("DS2", "y"), ("DS3", "y")): { "n_obs": np.array([250, 250], dtype=np.int32) }, } # cell 4 in this example has two gpis so it returns different results. tst_results = {1: tst_results_one, 1: tst_results_one, 2: tst_results_two} datasets = setup_TestDatasets() # setup masking datasets grid = grids.CellGrid( np.array([1, 2, 3, 4]), np.array([1, 2, 3, 4]), np.array([4, 4, 2, 1]), gpis=np.array([1, 2, 3, 4]), ) mds1 = GriddedTsBase("", grid, MaskingTestDataset) mds2 = GriddedTsBase("", grid, MaskingTestDataset) mds = { "masking1": { "class": mds1, "columns": ["x"], "args": [], "kwargs": {"limit": 500}, "use_lut": False, "grids_compatible": True, }, "masking2": { "class": mds2, "columns": ["x"], "args": [], "kwargs": {"limit": 750}, "use_lut": False, "grids_compatible": True, }, } process = Validation( datasets, "DS1", temporal_matcher=temporal_matchers.BasicTemporalMatching( window=1 / 24.0 ).combinatory_matcher, scaling="lin_cdf_match", metrics_calculators={ (3, 2): metrics_calculators.BasicMetrics( other_name="k1" ).calc_metrics }, masking_datasets=mds, ) gpi_info = (1, 1, 1) ref_df = datasets["DS1"]["class"].read(1) with warnings.catch_warnings(): warnings.simplefilter( "ignore", category=DeprecationWarning ) # read_ts is hard coded when using mask_data new_ref_df = process.mask_dataset(ref_df, gpi_info) assert len(new_ref_df) == 250 nptest.assert_allclose(new_ref_df.x.values, np.arange(750, 1000)) jobs = process.get_processing_jobs() for job in jobs: with warnings.catch_warnings(): # most warnings here are caused by the read_ts function that cannot # be changed when using a masking data set warnings.simplefilter("ignore", category=DeprecationWarning) results = process.calc(*job) tst = tst_results[len(job[0])] assert sorted(list(results)) == sorted(list(tst)) for key, tst_key in zip(sorted(results), sorted(tst)): nptest.assert_almost_equal( results[key]["n_obs"], tst[tst_key]["n_obs"] )
def test_validation_n3_k2_masking_no_data_remains(): datasets = setup_TestDatasets() # setup masking datasets grid = grids.CellGrid( np.array([1, 2, 3, 4]), np.array([1, 2, 3, 4]), np.array([4, 4, 2, 1]), gpis=np.array([1, 2, 3, 4]), ) mds1 = GriddedTsBase("", grid, MaskingTestDataset) mds2 = GriddedTsBase("", grid, MaskingTestDataset) mds = { "masking1": { "class": mds1, "columns": ["x"], "args": [], "kwargs": {"limit": 500}, "use_lut": False, "grids_compatible": True, }, "masking2": { "class": mds2, "columns": ["x"], "args": [], "kwargs": {"limit": 1000}, "use_lut": False, "grids_compatible": True, }, } process = Validation( datasets, "DS1", temporal_matcher=temporal_matchers.BasicTemporalMatching( window=1 / 24.0 ).combinatory_matcher, scaling="lin_cdf_match", metrics_calculators={ (3, 2): metrics_calculators.BasicMetrics( other_name="k1" ).calc_metrics }, masking_datasets=mds, ) gpi_info = (1, 1, 1) ref_df = datasets["DS1"]["class"].read(1) with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=DeprecationWarning) new_ref_df = process.mask_dataset(ref_df, gpi_info) assert len(new_ref_df) == 0 nptest.assert_allclose(new_ref_df.x.values, np.arange(1000, 1000)) jobs = process.get_processing_jobs() for job in jobs: with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=DeprecationWarning) results = process.calc(*job) tst = [] assert sorted(list(results)) == sorted(list(tst)) for key, tst_key in zip(sorted(results), sorted(tst)): nptest.assert_almost_equal( results[key]["n_obs"], tst[tst_key]["n_obs"] )