def generate_station_list(): """ This routine generates a list of available ISMN stations and the EASEv2 grid point they are located in. """ paths = Paths() io = ISMN_Interface(paths.ismn_raw) # get metadata indices of all stations that measure soil moisture within the first 10 cm idx = io.get_dataset_ids('soil moisture', min_depth=0.0, max_depth=0.1) df = pd.DataFrame({'network': io.metadata[idx]['network'], 'station': io.metadata[idx]['station'], 'lat': io.metadata[idx]['latitude'], 'lon': io.metadata[idx]['longitude'], 'ease2_gpi': np.zeros(len(idx)).astype('int')}, index=idx) # merge indices for stations that have multiple sensors within the first 10 cm duplicate_idx = df.groupby(df.columns.tolist()).apply(lambda x: '-'.join(['%i'% i for i in x.index])).values df.drop_duplicates(inplace=True) df.index = duplicate_idx # create EASEv2 grid domain grid = EASE2() lons, lats = np.meshgrid(grid.ease_lons, grid.ease_lats) lons = lons.flatten() lats = lats.flatten() # find EASEv2 grid points in which the individual stations are located for i, (idx, data) in enumerate(df.iterrows()): print('%i / %i' % (i, len(df))) r = (lons - data.lon) ** 2 + (lats - data.lat) ** 2 df.loc[idx, 'ease2_gpi'] = np.where((r - r.min()) < 0.0001)[0][0] df.to_csv(paths.ismn / 'station_list.csv')
def generate_station_list(): paths = Paths() io = ISMN_Interface(paths.ismn / 'downloaded' / 'CONUS_20100101_20190101') # get metadata indices of all stations that measure soil moisture within the first 10 cm idx = io.get_dataset_ids('soil moisture', min_depth=0.0, max_depth=0.1) df = pd.DataFrame( { 'network': io.metadata[idx]['network'], 'station': io.metadata[idx]['station'], 'lat': io.metadata[idx]['latitude'], 'lon': io.metadata[idx]['longitude'], 'ease2_gpi': np.zeros(len(idx)).astype('int') }, index=idx) # merge indices for stations that have multiple sensors within the first 10 cm duplicate_idx = df.groupby(df.columns.tolist()).apply( lambda x: '-'.join(['%i' % i for i in x.index])).values df.drop_duplicates(inplace=True) df.index = duplicate_idx grid = EASE2() lons, lats = np.meshgrid(grid.ease_lons, grid.ease_lats) lons = lons.flatten() lats = lats.flatten() for i, (idx, data) in enumerate(df.iterrows()): print('%i / %i' % (i, len(df))) r = (lons - data.lon)**2 + (lats - data.lat)**2 df.loc[idx, 'ease2_gpi'] = np.where((r - r.min()) < 0.0001)[0][0] df.to_csv(paths.ismn / 'station_list.csv')
def test_metadata_dataframe(): # make sure that metadata.index represents same values as get_dataset_ids testdata = os.path.join(testdata_root, "Data_seperate_files_20170810_20180809") metadata_path = os.path.join(testdata, "python_metadata") cleanup(metadata_path) ds_one = ISMN_Interface(testdata, meta_path=metadata_path, network='FR_Aqui') assert np.all(ds_one.metadata.index.values == ds_one.get_dataset_ids( None, -np.inf, np.inf)) ids = ds_one.get_dataset_ids('soil_moisture') assert ids == ds_one.metadata.index.values assert ds_one.metadata.loc[ids[0], 'variable']['val'] == 'soil_moisture' assert ds_one.metadata.loc[ids[0], 'network']['val'] == 'FR_Aqui' ds_one.close_files()
ismn_reader = ISMN_Interface(ismn_data_folder) # The validation is run based on jobs. A job consists of at least three lists or numpy arrays specifing the grid # point index, its latitude and longitude. In the case of the ISMN we can use the `dataset_ids` that identify every # time series in the downloaded ISMN data as our grid point index. We can then get longitude and latitude from the # metadata of the dataset. # # **DO NOT CHANGE** the name ***jobs*** because it will be searched during the parallel processing! # In[5]: jobs = [] ids = ismn_reader.get_dataset_ids(variable='soil moisture', min_depth=0, max_depth=0.1) for idx in ids: metadata = ismn_reader.metadata[idx] jobs.append((idx, metadata['longitude'], metadata['latitude'])) print("Jobs (gpi, lon, lat):") print(jobs) # For this small test dataset it is only one job # # It is important here that the ISMN reader has a read_ts function that works by just using the `dataset_id`. In this # way the validation framework can go through the jobs and read the correct time series. # In[6]:
def test_ascat_ismn_validation_metadata_rolling(ascat_reader): """ Test processing framework with some ISMN and ASCAT sample data """ # Initialize ISMN reader ismn_data_folder = os.path.join( os.path.dirname(__file__), "..", "test-data", "ismn", "multinetwork", "header_values", ) ismn_reader = ISMN_Interface(ismn_data_folder) jobs = [] ids = ismn_reader.get_dataset_ids( variable="soil moisture", min_depth=0, max_depth=0.1 ) metadata_dict_template = { "network": np.array(["None"], dtype="U256"), "station": np.array(["None"], dtype="U256"), "landcover": np.float32([np.nan]), "climate": np.array(["None"], dtype="U4"), } for idx in ids: metadata = ismn_reader.metadata[idx] metadata_dict = [ { "network": metadata["network"], "station": metadata["station"], "landcover": metadata["landcover_2010"], "climate": metadata["climate"], } ] jobs.append( (idx, metadata["longitude"], metadata["latitude"], metadata_dict) ) save_path = tempfile.mkdtemp() # Create the validation object. datasets = { "ISMN": {"class": ismn_reader, "columns": ["soil moisture"]}, "ASCAT": { "class": ascat_reader, "columns": ["sm"], "kwargs": { "mask_frozen_prob": 80, "mask_snow_prob": 80, "mask_ssf": True, }, }, } read_ts_names = {"ASCAT": "read", "ISMN": "read_ts"} period = [datetime(2007, 1, 1), datetime(2014, 12, 31)] datasets = DataManager( datasets, "ISMN", period, read_ts_names=read_ts_names ) process = Validation( datasets, "ISMN", temporal_ref="ASCAT", scaling="lin_cdf_match", scaling_ref="ASCAT", metrics_calculators={ (2, 2): metrics_calculators.RollingMetrics( other_name="k1", metadata_template=metadata_dict_template ).calc_metrics }, period=period, ) for job in jobs: results = process.calc(*job) netcdf_results_manager( results, save_path, ts_vars=["R", "p_R", "RMSD"] ) results_fname = os.path.join( save_path, "ASCAT.sm_with_ISMN.soil moisture.nc" ) vars_should = [ u"gpi", u"lon", u"lat", u"R", u"p_R", u"time", u"idx", u"_row_size", ] for key, value in metadata_dict_template.items(): vars_should.append(key) network_should = np.array( [ "MAQU", "MAQU", "SCAN", "SCAN", "SCAN", "SOILSCAPE", "SOILSCAPE", "SOILSCAPE", ], dtype="U256", ) reader = PointDataResults(results_fname, read_only=True) df = reader.read_loc(None) nptest.assert_equal(sorted(network_should), sorted(df["network"].values)) assert np.all(df.gpi.values == np.arange(8)) assert reader.read_ts(0).index.size == 357 assert np.all( reader.read_ts(1).columns.values == np.array(["R", "p_R", "RMSD"]) )
def test_ascat_ismn_validation_metadata(ascat_reader): """ Test processing framework with some ISMN and ASCAT sample data """ # Initialize ISMN reader ismn_data_folder = os.path.join( os.path.dirname(__file__), "..", "test-data", "ismn", "multinetwork", "header_values", ) ismn_reader = ISMN_Interface(ismn_data_folder) jobs = [] ids = ismn_reader.get_dataset_ids( variable="soil moisture", min_depth=0, max_depth=0.1 ) metadata_dict_template = { "network": np.array(["None"], dtype="U256"), "station": np.array(["None"], dtype="U256"), "landcover": np.float32([np.nan]), "climate": np.array(["None"], dtype="U4"), } for idx in ids: metadata = ismn_reader.metadata[idx] metadata_dict = [ { "network": metadata["network"], "station": metadata["station"], "landcover": metadata["landcover_2010"], "climate": metadata["climate"], } ] jobs.append( (idx, metadata["longitude"], metadata["latitude"], metadata_dict) ) # Create the variable ***save_path*** which is a string representing the # path where the results will be saved. **DO NOT CHANGE** the name # ***save_path*** because it will be searched during the parallel # processing! save_path = tempfile.mkdtemp() # Create the validation object. datasets = { "ISMN": { "class": ismn_reader, "columns": ["soil moisture"], }, "ASCAT": { "class": ascat_reader, "columns": ["sm"], "kwargs": { "mask_frozen_prob": 80, "mask_snow_prob": 80, "mask_ssf": True, }, }, } read_ts_names = {"ASCAT": "read", "ISMN": "read_ts"} period = [datetime(2007, 1, 1), datetime(2014, 12, 31)] datasets = DataManager( datasets, "ISMN", period, read_ts_names=read_ts_names ) process = Validation( datasets, "ISMN", temporal_ref="ASCAT", scaling="lin_cdf_match", scaling_ref="ASCAT", metrics_calculators={ (2, 2): metrics_calculators.BasicMetrics( other_name="k1", metadata_template=metadata_dict_template ).calc_metrics }, period=period, ) for job in jobs: results = process.calc(*job) netcdf_results_manager(results, save_path) results_fname = os.path.join( save_path, "ASCAT.sm_with_ISMN.soil moisture.nc" ) vars_should = [ u"n_obs", u"tau", u"gpi", u"RMSD", u"lon", u"p_tau", u"BIAS", u"p_rho", u"rho", u"lat", u"R", u"p_R", u"time", u"idx", u"_row_size", ] for key, value in metadata_dict_template.items(): vars_should.append(key) n_obs_should = [357, 384, 1646, 1875, 1915, 467, 141, 251] rho_should = np.array( [ 0.53934574, 0.7002289, 0.62200236, 0.53647155, 0.30413666, 0.6740655, 0.8418981, 0.74206454, ], dtype=np.float32, ) rmsd_should = np.array( [ 11.583476, 7.729667, 17.441547, 21.125721, 14.31557, 14.187225, 13.0622425, 12.903898, ], dtype=np.float32, ) network_should = np.array( [ "MAQU", "MAQU", "SCAN", "SCAN", "SCAN", "SOILSCAPE", "SOILSCAPE", "SOILSCAPE", ], dtype="U256", ) with nc.Dataset(results_fname, mode="r") as results: vars = results.variables.keys() n_obs = results.variables["n_obs"][:].tolist() rho = results.variables["rho"][:] rmsd = results.variables["RMSD"][:] network = results.variables["network"][:] assert sorted(vars) == sorted(vars_should) assert sorted(n_obs) == sorted(n_obs_should) nptest.assert_allclose(sorted(rho), sorted(rho_should), rtol=1e-4) nptest.assert_allclose(sorted(rmsd), sorted(rmsd_should), rtol=1e-4) nptest.assert_equal(sorted(network), sorted(network_should))
def test_ascat_ismn_validation_metadata_rolling(): """ Test processing framework with some ISMN and ASCAT sample data """ ascat_data_folder = os.path.join(os.path.dirname(__file__), '..', 'test-data', 'sat', 'ascat', 'netcdf', '55R22') ascat_grid_folder = os.path.join(os.path.dirname(__file__), '..', 'test-data', 'sat', 'ascat', 'netcdf', 'grid') static_layers_folder = os.path.join(os.path.dirname(__file__), '..', 'test-data', 'sat', 'h_saf', 'static_layer') ascat_reader = AscatSsmCdr(ascat_data_folder, ascat_grid_folder, grid_filename='TUW_WARP5_grid_info_2_1.nc', static_layer_path=static_layers_folder) ascat_reader.read_bulk = True # Initialize ISMN reader ismn_data_folder = os.path.join(os.path.dirname(__file__), '..', 'test-data', 'ismn', 'multinetwork', 'header_values') ismn_reader = ISMN_Interface(ismn_data_folder) jobs = [] ids = ismn_reader.get_dataset_ids(variable='soil moisture', min_depth=0, max_depth=0.1) metadata_dict_template = { 'network': np.array(['None'], dtype='U256'), 'station': np.array(['None'], dtype='U256'), 'landcover': np.float32([np.nan]), 'climate': np.array(['None'], dtype='U4') } for idx in ids: metadata = ismn_reader.metadata[idx] metadata_dict = [{ 'network': metadata['network'], 'station': metadata['station'], 'landcover': metadata['landcover_2010'], 'climate': metadata['climate'] }] jobs.append( (idx, metadata['longitude'], metadata['latitude'], metadata_dict)) save_path = tempfile.mkdtemp() # Create the validation object. datasets = { 'ISMN': { 'class': ismn_reader, 'columns': ['soil moisture'] }, 'ASCAT': { 'class': ascat_reader, 'columns': ['sm'], 'kwargs': { 'mask_frozen_prob': 80, 'mask_snow_prob': 80, 'mask_ssf': True } } } read_ts_names = {'ASCAT': 'read', 'ISMN': 'read_ts'} period = [datetime(2007, 1, 1), datetime(2014, 12, 31)] datasets = DataManager(datasets, 'ISMN', period, read_ts_names=read_ts_names) process = Validation( datasets, 'ISMN', temporal_ref='ASCAT', scaling='lin_cdf_match', scaling_ref='ASCAT', metrics_calculators={ (2, 2): metrics_calculators.RollingMetrics( other_name='k1', metadata_template=metadata_dict_template).calc_metrics }, period=period) for job in jobs: results = process.calc(*job) netcdf_results_manager(results, save_path, ts_vars=['R', 'p_R', 'RMSD']) results_fname = os.path.join(save_path, 'ASCAT.sm_with_ISMN.soil moisture.nc') vars_should = [ u'gpi', u'lon', u'lat', u'R', u'p_R', u'time', u'idx', u'_row_size' ] for key, value in metadata_dict_template.items(): vars_should.append(key) network_should = np.array([ 'MAQU', 'MAQU', 'SCAN', 'SCAN', 'SCAN', 'SOILSCAPE', 'SOILSCAPE', 'SOILSCAPE' ], dtype='U256') reader = PointDataResults(results_fname, read_only=True) df = reader.read_loc(None) nptest.assert_equal(sorted(network_should), sorted(df['network'].values)) assert np.all(df.gpi.values == np.arange(8)) assert (reader.read_ts(0).index.size == 357) assert np.all( reader.read_ts(1).columns.values == np.array(['R', 'p_R', 'RMSD']))
def test_ascat_ismn_validation(): """ Test processing framework with some ISMN and ASCAT sample data """ ascat_data_folder = os.path.join(os.path.dirname(__file__), '..', 'test-data', 'sat', 'ascat', 'netcdf', '55R22') ascat_grid_folder = os.path.join(os.path.dirname(__file__), '..', 'test-data', 'sat', 'ascat', 'netcdf', 'grid') static_layers_folder = os.path.join(os.path.dirname(__file__), '..', 'test-data', 'sat', 'h_saf', 'static_layer') ascat_reader = AscatSsmCdr(ascat_data_folder, ascat_grid_folder, grid_filename='TUW_WARP5_grid_info_2_1.nc', static_layer_path=static_layers_folder) ascat_reader.read_bulk = True # Initialize ISMN reader ismn_data_folder = os.path.join(os.path.dirname(__file__), '..', 'test-data', 'ismn', 'multinetwork', 'header_values') ismn_reader = ISMN_Interface(ismn_data_folder) jobs = [] ids = ismn_reader.get_dataset_ids(variable='soil moisture', min_depth=0, max_depth=0.1) for idx in ids: metadata = ismn_reader.metadata[idx] jobs.append((idx, metadata['longitude'], metadata['latitude'])) # Create the variable ***save_path*** which is a string representing the # path where the results will be saved. **DO NOT CHANGE** the name # ***save_path*** because it will be searched during the parallel # processing! save_path = tempfile.mkdtemp() # Create the validation object. datasets = { 'ISMN': { 'class': ismn_reader, 'columns': ['soil moisture'] }, 'ASCAT': { 'class': ascat_reader, 'columns': ['sm'], 'kwargs': { 'mask_frozen_prob': 80, 'mask_snow_prob': 80, 'mask_ssf': True } } } read_ts_names = {'ASCAT': 'read', 'ISMN': 'read_ts'} period = [datetime(2007, 1, 1), datetime(2014, 12, 31)] datasets = DataManager(datasets, 'ISMN', period, read_ts_names=read_ts_names) process = Validation( datasets, 'ISMN', temporal_ref='ASCAT', scaling='lin_cdf_match', scaling_ref='ASCAT', metrics_calculators={ (2, 2): metrics_calculators.BasicMetrics(other_name='k1').calc_metrics }, period=period) for job in jobs: results = process.calc(*job) netcdf_results_manager(results, save_path) results_fname = os.path.join(save_path, 'ASCAT.sm_with_ISMN.soil moisture.nc') vars_should = [ u'n_obs', u'tau', u'gpi', u'RMSD', u'lon', u'p_tau', u'BIAS', u'p_rho', u'rho', u'lat', u'R', u'p_R', u'time', u'idx', u'_row_size' ] n_obs_should = [384, 357, 482, 141, 251, 1927, 1887, 1652] rho_should = np.array([ 0.70022893, 0.53934574, 0.69356072, 0.84189808, 0.74206454, 0.30299741, 0.53143877, 0.62204134 ], dtype=np.float32) rmsd_should = np.array([ 7.72966719, 11.58347607, 14.57700157, 13.06224251, 12.90389824, 14.24668026, 21.19682884, 17.3883934 ], dtype=np.float32) with nc.Dataset(results_fname, mode='r') as results: assert sorted(list(results.variables.keys())) == sorted(vars_should) assert sorted( results.variables['n_obs'][:].tolist()) == sorted(n_obs_should) nptest.assert_allclose(sorted(rho_should), sorted(results.variables['rho'][:]), rtol=1e-4) nptest.assert_allclose(sorted(rmsd_should), sorted(results.variables['RMSD'][:]), rtol=1e-4)
class Test_ISMN_Interface_CeopUnzipped(unittest.TestCase): @classmethod def setUpClass(cls): super(Test_ISMN_Interface_CeopUnzipped, cls).setUpClass() testdata = os.path.join(testdata_root, "Data_seperate_files_20170810_20180809") metadata_path = os.path.join(testdata, "python_metadata") cleanup(metadata_path) ds = ISMN_Interface(testdata, network=[], parallel=True) assert ds.networks == OrderedDict() cls.testdata = testdata def setUp(self) -> None: self.ds = ISMN_Interface(self.testdata, network=["COSMOS"]) def tearDown(self) -> None: self.ds.close_files() logging.shutdown() def test_list(self): with pytest.deprecated_call(): assert len(self.ds.list_networks()) == 1 assert len(self.ds.list_stations()) == len( self.ds.list_stations("COSMOS")) == 2 assert len(self.ds.list_sensors()) == 2 assert len(self.ds.list_sensors(station="Barrow-ARM")) == 1 def test_network_for_station(self): assert self.ds.network_for_station("Barrow-ARM") == "COSMOS" assert self.ds.network_for_station("ARM-1") == "COSMOS" def test_stations_that_measure(self): for s in self.ds.stations_that_measure("soil_moisture"): assert s.name in ["ARM-1", "Barrow-ARM"] for s in self.ds.stations_that_measure("nonexisting"): raise AssertionError("Found var that doesnt exist") def test_get_dataset_ids(self): ids = self.ds.get_dataset_ids("soil_moisture", max_depth=100, groupby="network") assert list(ids.keys()) == ["COSMOS"] assert ids["COSMOS"] == [0, 1] ids = self.ds.get_dataset_ids("soil_moisture", max_depth=0.19) assert ids == [0] ids = self.ds.get_dataset_ids( ["soil_moisture"], max_depth=99, filter_meta_dict={ "lc_2010": 210, "network": "COSMOS", "station": "Barrow-ARM", }, ) assert ids == [1] ids = self.ds.get_dataset_ids("novar") assert len(ids) == 0 ids = self.ds.get_dataset_ids(["soil_moisture", "shouldhavenoeffect"], 0.0, 0.19) # should get 1 assert len(ids) == 1 ids = self.ds.get_dataset_ids("soil_moisture", 0.0, 1.0) # should get 2 assert len(ids) == 2 ids = self.ds.get_dataset_ids("soil_moisture", 0.0, 1.0, filter_meta_dict={"lc_2010": 210}) # should get 1 assert len(ids) == 1 ids = self.ds.get_dataset_ids("nonexisting") # should get 0 assert len(ids) == 0 def test_read_ts(self): data1 = self.ds.read(0) assert not data1.empty data2, meta = self.ds.read_ts(1, return_meta=True) assert not data2.empty def test_read_metadata(self): data2, meta = self.ds.read_ts(1, return_meta=True) assert all(meta == self.ds.read_metadata(1, format="pandas")) assert self.ds.read_metadata(1, format="dict") is not None assert self.ds.read_metadata([1], format="obj") is not None assert not self.ds.metadata.empty assert self.ds.metadata.loc[1]['station']['val'] \ == self.ds.read_metadata([0,1]).loc[1, ('station', 'val')] def test_find_nearest_station(self): should_lon, should_lat = -156.62870, 71.32980 station = self.ds.find_nearest_station(should_lon, should_lat) assert station.lon == should_lon assert station.lat == should_lat def test_plot_station_locations(self): with TemporaryDirectory() as out_dir: outpath = os.path.join(out_dir, "plot.png") self.ds.plot_station_locations(["soil_moisture", 'precipitation'], markersize=5, filename=outpath) assert len(os.listdir(out_dir)) == 1 def test_get_min_max_obs_timestamps(self): tmin, tmax = self.ds.get_min_max_obs_timestamps("soil_moisture", max_depth=0.19) assert tmin == datetime(2017, 8, 10, 0) assert tmax == datetime(2018, 8, 9, 23) def test_get_min_max_obs_timestamps_for_station(self): station = self.ds.collection.networks["COSMOS"].stations["ARM-1"] tmin, tmax = station.get_min_max_obs_timestamp("soil_moisture", 0, 0.19) assert tmin == datetime(2017, 8, 10, 0) assert tmax == datetime(2018, 8, 9, 23) def test_get_static_var_val(self): vals = self.ds.get_static_var_vals("soil_moisture", max_depth=0.19) assert vals == {130: "Grassland"} vals = self.ds.get_landcover_types("soil_moisture", max_depth=100) assert len(vals) == 2 assert vals[130] == "Grassland" assert vals[210] == "Water" self.ds.print_landcover_dict() vals = self.ds.get_climate_types("soil_moisture", max_depth=100, climate="climate_KG") assert len(vals) == 2 assert vals["ET"] == "Polar Tundra" assert vals["Cfa"] == "Temperate Without Dry Season, Hot Summer" self.ds.print_climate_dict() def test_get_var(self): vars = self.ds.get_variables() assert vars == ["soil_moisture"] def test_get_sensors(self): i = 0 for nw, station in self.ds.collection.iter_stations( filter_meta_dict={"network": "COSMOS"}): for se in station.iter_sensors(): data = se.read_data() # check if the networks is COSMOS or station in [ARM, Barrow-ARM] assert not data.empty # check something for that one station i += 1 assert i == 2 i = 0 for se in self.ds.networks["COSMOS"].stations[ "Barrow-ARM"].iter_sensors(): data = se.read_data() assert not data.empty # check something for that one station i += 1 assert i == 1 i = 0 for net, stat, sens in self.ds.collection.iter_sensors( depth=Depth(0, 1), filter_meta_dict={"station": ["Barrow-ARM", "ARM-1"]}, ): data = sens.read_data() assert not data.empty i += 1 assert i == 2 for nw, station in self.ds.collection.iter_stations(): for se in station.iter_sensors(variable="nonexisting"): raise ValueError("Found sensor, although none should exist") def test_get_nearest_station(self): should_lon, should_lat = -156.62870, 71.32980 station, dist = self.ds.collection.get_nearest_station( should_lon, should_lat) assert dist == 0 assert station.lon == should_lon assert station.lat == should_lat gpi, dist = self.ds.collection.grid.find_nearest_gpi( int(should_lon), int(should_lat)) assert dist != 0 for net in self.ds.collection.iter_networks(): if station.name in net.stations.keys(): assert net.stations[station.name].lon == should_lon assert net.stations[station.name].lat == should_lat station, dist = self.ds.find_nearest_station(0, 0, return_distance=True, max_dist=100) assert station == dist == None def test_citation(self): with TemporaryDirectory() as out_dir: out_file = os.path.join(out_dir, 'citation.txt') refs = self.ds.collection.export_citations(out_file=out_file) assert all([ net in refs.keys() for net in list(self.ds.collection.networks.keys()) ]) assert os.path.exists(out_file) with open(out_file, mode='r') as f: lines = f.readlines() assert len(lines) > 0
class Test_ISMN_Interface_CeopUnzipped(unittest.TestCase): @classmethod def setUpClass(cls): super(Test_ISMN_Interface_CeopUnzipped, cls).setUpClass() testdata = os.path.join(testdata_root, 'Data_seperate_files_20170810_20180809') metadata_path = os.path.join(testdata, 'python_metadata') cleanup(metadata_path) ds = ISMN_Interface(testdata, network=[]) assert ds.networks == OrderedDict() cls.testdata = testdata def setUp(self) -> None: self.ds = ISMN_Interface(self.testdata, network=['COSMOS']) def tearDown(self) -> None: self.ds.close_files() logging.shutdown() def test_list(self): assert len(self.ds.list_networks()) == 1 assert len(self.ds.list_stations()) == len(self.ds.list_stations('COSMOS')) == 2 assert len(self.ds.list_sensors()) == 2 assert len(self.ds.list_sensors(station='Barrow-ARM')) == 1 def test_network_for_station(self): assert self.ds.network_for_station('Barrow-ARM') == 'COSMOS' assert self.ds.network_for_station('ARM-1') == 'COSMOS' def test_stations_that_measure(self): for s in self.ds.stations_that_measure('soil_moisture'): assert s.name in ['ARM-1', 'Barrow-ARM'] for s in self.ds.stations_that_measure('nonexisting'): raise AssertionError("Found var that doesnt exist") def test_get_dataset_ids(self): ids = self.ds.get_dataset_ids('soil_moisture', max_depth=100, groupby='network') assert list(ids.keys()) == ['COSMOS'] assert ids['COSMOS'] == [0,1] ids = self.ds.get_dataset_ids('soil_moisture', max_depth=0.19) assert ids == [0] ids = self.ds.get_dataset_ids('soil_moisture', max_depth=99, filter_meta_dict={'lc_2010': 210, 'network': 'COSMOS', 'station': 'Barrow-ARM'}) assert ids == [1] ids = self.ds.get_dataset_ids('novar') assert len(ids) == 0 ids = self.ds.get_dataset_ids('soil_moisture', 0., 0.19) # should get 1 assert len(ids) == 1 ids = self.ds.get_dataset_ids('soil_moisture', 0., 1.) # should get 2 assert len(ids) == 2 ids = self.ds.get_dataset_ids('soil_moisture', 0., 1., filter_meta_dict={'lc_2010': 210}) # should get 1 assert len(ids) == 1 ids = self.ds.get_dataset_ids('nonexisting') # should get 0 assert len(ids) == 0 def test_read_ts(self): data1 = self.ds.read(0) assert not data1.empty data2 = self.ds.read_ts(1) assert not data2.empty assert len(data1.index) != len(data2.index) # make sure they are not same def test_find_nearest_station(self): should_lon, should_lat = -156.62870, 71.32980 station = self.ds.find_nearest_station(should_lon, should_lat) assert station.lon == should_lon assert station.lat == should_lat def test_plot_station_locations(self): with TemporaryDirectory() as out_dir: outpath = os.path.join(out_dir, 'plot.png') self.ds.plot_station_locations('soil_moisture', markersize=5, filename=outpath) assert len(os.listdir(out_dir)) == 1 def test_get_min_max_obs_timestamps(self): tmin, tmax = self.ds.get_min_max_obs_timestamps('soil_moisture', max_depth=0.19) assert tmin == datetime(2017, 8, 10, 0) assert tmax == datetime(2018, 8, 9, 23) def test_get_min_max_obs_timestamps_for_station(self): station = self.ds.collection.networks['COSMOS'].stations['ARM-1'] tmin, tmax = station.get_min_max_obs_timestamp('soil_moisture', 0, 0.19) assert tmin == datetime(2017, 8, 10, 0) assert tmax == datetime(2018, 8, 9, 23) def test_get_static_var_val(self): vals = self.ds.get_static_var_vals('soil_moisture', max_depth=0.19) assert vals == {130: 'Grassland'} vals = self.ds.get_landcover_types('soil_moisture', max_depth=100) assert len(vals) == 2 assert vals[130] == 'Grassland' assert vals[210] == 'Water' self.ds.print_landcover_dict() vals = self.ds.get_climate_types('soil_moisture', max_depth=100, climate='climate_KG') assert len(vals) == 2 assert vals['ET'] == 'Polar Tundra' assert vals['Cfa'] == 'Temperate Without Dry Season, Hot Summer' self.ds.print_climate_dict() def test_get_var(self): vars = self.ds.get_variables() assert vars == ['soil_moisture'] def test_get_sensors(self): i = 0 for nw, station in self.ds.collection.iter_stations( filter_meta_dict={'network': 'COSMOS'}): for se in station.iter_sensors(): data = se.read_data() # check if the networks is COSMOS or station in [ARM, Barrow-ARM] assert not data.empty # check something for that one station i += 1 assert i == 2 i = 0 for se in self.ds.networks['COSMOS'].stations['Barrow-ARM'].iter_sensors(): data = se.read_data() assert not data.empty # check something for that one station i += 1 assert i == 1 i = 0 for net, stat, sens in self.ds.collection.iter_sensors( depth=Depth(0,1), filter_meta_dict={'station': ['Barrow-ARM', 'ARM-1']}): data = sens.read_data() assert not data.empty i +=1 assert i == 2 for nw, station in self.ds.collection.iter_stations(): for se in station.iter_sensors(variable='nonexisting'): raise ValueError("Found sensor, although none should exist") def test_get_nearest_station(self): should_lon, should_lat = -156.62870, 71.32980 station, dist = self.ds.collection.get_nearest_station(should_lon, should_lat) assert dist == 0 assert station.lon == should_lon assert station.lat == should_lat gpi, dist = self.ds.collection.grid.find_nearest_gpi(int(should_lon),int(should_lat)) assert dist != 0 for net in self.ds.collection.iter_networks(): if station.name in net.stations.keys(): assert net.stations[station.name].lon == should_lon assert net.stations[station.name].lat == should_lat station, dist = self.ds.find_nearest_station(0, 0, return_distance=True, max_dist=100) assert station == dist == None
ismn_reader = ISMN_Interface(ismn_data_folder) # The validation is run based on jobs. A job consists of at least three lists or numpy arrays specifing the grid # point index, its latitude and longitude. In the case of the ISMN we can use the `dataset_ids` that identify every # time series in the downloaded ISMN data as our grid point index. We can then get longitude and latitude from the # metadata of the dataset. # # **DO NOT CHANGE** the name ***jobs*** because it will be searched during the parallel processing! # In[5]: jobs = [] ids = ismn_reader.get_dataset_ids(variable='soil moisture', min_depth=0, max_depth=0.1) for idx in ids: metadata = ismn_reader.metadata[idx] jobs.append((idx, metadata['longitude'], metadata['latitude'])) print("Jobs (gpi, lon, lat):") print(jobs) # For this small test dataset it is only one job # # It is important here that the ISMN reader has a read_ts function that works by just using the `dataset_id`. In this # way the validation framework can go through the jobs and read the correct time series. # In[6]:
def test_ascat_ismn_validation(): """ Test processing framework with some ISMN and ASCAT sample data """ ascat_data_folder = os.path.join(os.path.dirname(__file__), '..', 'test-data', 'sat', 'ascat', 'netcdf', '55R22') ascat_grid_folder = os.path.join(os.path.dirname(__file__), '..', 'test-data', 'sat', 'ascat', 'netcdf', 'grid') static_layers_folder = os.path.join(os.path.dirname(__file__), '..', 'test-data', 'sat', 'h_saf', 'static_layer') ascat_reader = AscatSsmCdr(ascat_data_folder, ascat_grid_folder, grid_filename='TUW_WARP5_grid_info_2_1.nc', static_layer_path=static_layers_folder) ascat_reader.read_bulk = True # Initialize ISMN reader ismn_data_folder = os.path.join(os.path.dirname(__file__), '..', 'test-data', 'ismn', 'multinetwork', 'header_values') ismn_reader = ISMN_Interface(ismn_data_folder) jobs = [] ids = ismn_reader.get_dataset_ids( variable='soil moisture', min_depth=0, max_depth=0.1) for idx in ids: metadata = ismn_reader.metadata[idx] jobs.append((idx, metadata['longitude'], metadata['latitude'])) # Create the variable ***save_path*** which is a string representing the # path where the results will be saved. **DO NOT CHANGE** the name # ***save_path*** because it will be searched during the parallel # processing! save_path = tempfile.mkdtemp() # Create the validation object. datasets = { 'ISMN': { 'class': ismn_reader, 'columns': ['soil moisture'] }, 'ASCAT': { 'class': ascat_reader, 'columns': ['sm'], 'kwargs': {'mask_frozen_prob': 80, 'mask_snow_prob': 80, 'mask_ssf': True} }} period = [datetime(2007, 1, 1), datetime(2014, 12, 31)] process = Validation( datasets, 'ISMN', temporal_ref='ASCAT', scaling='lin_cdf_match', scaling_ref='ASCAT', metrics_calculators={ (2, 2): metrics_calculators.BasicMetrics(other_name='k1').calc_metrics}, period=period) for job in jobs: results = process.calc(*job) netcdf_results_manager(results, save_path) results_fname = os.path.join( save_path, 'ASCAT.sm_with_ISMN.soil moisture.nc') vars_should = [u'n_obs', u'tau', u'gpi', u'RMSD', u'lon', u'p_tau', u'BIAS', u'p_rho', u'rho', u'lat', u'R', u'p_R'] n_obs_should = [384, 357, 482, 141, 251, 1927, 1887, 1652] rho_should = np.array([0.70022893, 0.53934574, 0.69356072, 0.84189808, 0.74206454, 0.30299741, 0.53143877, 0.62204134], dtype=np.float32) rmsd_should = np.array([7.72966719, 11.58347607, 14.57700157, 13.06224251, 12.90389824, 14.24668026, 21.19682884, 17.3883934], dtype=np.float32) with nc.Dataset(results_fname, mode='r') as results: assert sorted(results.variables.keys()) == sorted(vars_should) assert sorted(results.variables['n_obs'][:].tolist()) == sorted( n_obs_should) nptest.assert_allclose(sorted(rho_should), sorted(results.variables['rho'][:]), rtol=1e-4) nptest.assert_allclose(sorted(rmsd_should), sorted(results.variables['RMSD'][:]), rtol=1e-4)