def resample_timeseries(): paths = Paths() io = ISMN_Interface(paths.ismn / 'downloaded' / 'CONUS_20100101_20190101') # get all stations / sensors for each grid cell. lut = pd.read_csv(paths.ismn / 'station_list.csv', index_col=0) lut = lut.groupby('ease2_gpi').apply( lambda x: '-'.join([i for i in x.index])) dir_out = paths.ismn / 'timeseries' for cnt, (gpi, indices) in enumerate(lut.iteritems()): print('%i / %i' % (cnt, len(lut))) fname = dir_out / ('%i.csv' % gpi) idx = indices.split('-') # Only one station within grid cell if len(idx) == 1: try: ts = io.read_ts(int(idx[0])) ts = ts[ts['soil moisture_flag'] == 'G']['soil moisture'] ts.tz_convert(None).to_csv(fname, float_format='%.4f') except: print('Corrupt file: ' + io.metadata[int(idx[0])]['filename']) # Multiple stations within grid cell else: df = [] for i in idx: try: ts = io.read_ts(int(i)) df += [ ts[ts['soil moisture_flag'] == 'G']['soil moisture'] ] except: print('Corrupt file: ' + io.metadata[int(i)]['filename']) if len(df) == 0: continue df = pd.concat(df, axis=1) df.columns = np.arange(len(df.columns)) # match temporal mean and standard deviation to those of the station with the maximum temporal coverage n = np.array([len(df[i].dropna()) for i in df]) ref = np.where(n == n.max())[0][0] for col in df: if col != ref: df[col] = (df[col] - df[col].mean()) / df[col].std( ) * df[ref].std() + df[ref].mean() # Average measurements of all stations df.mean(axis='columns').tz_convert(None).to_csv( fname, float_format='%.4f')
def test_timezone_adapter(self): c3s_data_folder = path.join( Dataset.objects.get(short_name='C3S').storage_path, 'C3S_V201706/TCDR/063_images_to_ts/combined-daily') c3s_reader = c3s_read(c3s_data_folder) timezone_reader = TimezoneAdapter(c3s_reader) orig_data = c3s_reader.read_ts(-155.42, 19.78) data = timezone_reader.read_ts(-155.42, 19.78) self.assertTrue( np.array_equal(orig_data.index.values, data.index.values)) self.assertTrue(not hasattr(data.index, 'tz') or data.index.tz is None) orig_data = c3s_reader.read(-155.42, 19.78) data = timezone_reader.read(-155.42, 19.78) self.assertTrue( np.array_equal(orig_data.index.values, data.index.values)) self.assertTrue((not hasattr(data.index, 'tz')) or (data.index.tz is None)) ismn_data_folder = path.join( Dataset.objects.get(short_name='ISMN').storage_path, 'ISMN_V20191211') ismn_reader = ISMN_Interface(ismn_data_folder) timezone_reader2 = TimezoneAdapter(ismn_reader) orig_data = ismn_reader.read_ts(0) data = timezone_reader2.read_ts(0) self.assertTrue( np.array_equal(orig_data.index.values, data.index.values)) self.assertTrue((not hasattr(data.index, 'tz')) or (data.index.tz is None))
max_depth=0.1) for idx in ids: metadata = ismn_reader.metadata[idx] jobs.append((idx, metadata['longitude'], metadata['latitude'])) print("Jobs (gpi, lon, lat):") print(jobs) # For this small test dataset it is only one job # # It is important here that the ISMN reader has a read_ts function that works by just using the `dataset_id`. In this # way the validation framework can go through the jobs and read the correct time series. # In[6]: data = ismn_reader.read_ts(ids[0]) print('ISMN data example:') print(data.head()) # ## Initialize the Validation class # # The Validation class is the heart of the validation framwork. It contains the information about which datasets to # read using which arguments or keywords and if they are spatially compatible. It also contains the settings about # which metric calculators to use and how to perform the scaling into the reference data space. It is initialized in # the following way: # In[7]: datasets = { 'ISMN': { 'class': ismn_reader,
def resample_ismn(): """ This resamples ISMN data onto the EASE2 grid and stores data for each grid cell into .csv files. If single grid cells contain multiple stations, they are averaged. A grid look-up table needs to be created first (method: ancillary.grid.create_lut). """ paths = Paths() io = ISMN_Interface(paths.ismn_raw) # get all stations / sensors for each grid cell. lut = pd.read_csv(paths.ismn / 'station_list.csv',index_col=0) lut = lut.groupby('ease2_gpi').apply(lambda x: '-'.join([i for i in x.index])) dir_out = paths.ismn / 'timeseries' if not dir_out.exists(): dir_out.mkdir() for cnt, (gpi, indices) in enumerate(lut.iteritems()): print('%i / %i' % (cnt, len(lut))) fname = dir_out / ('%i.csv' % gpi) idx = indices.split('-') # Only one station within grid cell if len(idx) == 1: try: ts = io.read_ts(int(idx[0])) ts = ts[ts['soil moisture_flag'] == 'G']['soil moisture'] # Get only "good" data based on ISMN QC ts.tz_convert(None).to_csv(fname, float_format='%.4f') except: print('Corrupt file: ' + io.metadata[int(idx[0])]['filename']) # Multiple stations within grid cell else: df = [] for i in idx: try: ts = io.read_ts(int(i)) df += [ts[ts['soil moisture_flag'] == 'G']['soil moisture']] # Get only "good" data based on ISMN QC except: print('Corrupt file: ' + io.metadata[int(i)]['filename']) if len(df) == 0: continue df = pd.concat(df, axis=1) df.columns = np.arange(len(df.columns)) # match temporal mean and standard deviation to those of the station with the maximum temporal coverage n = np.array([len(df[i].dropna()) for i in df]) ref = np.where(n==n.max())[0][0] for col in df: if col != ref: df[col] = (df[col] - df[col].mean())/df[col].std() * df[ref].std() + df[ref].mean() # Average measurements of all stations df.mean(axis='columns').tz_convert(None).to_csv(fname, float_format='%.4f')
class Test_ISMN_Interface_CeopUnzipped(unittest.TestCase): @classmethod def setUpClass(cls): super(Test_ISMN_Interface_CeopUnzipped, cls).setUpClass() testdata = os.path.join(testdata_root, "Data_seperate_files_20170810_20180809") metadata_path = os.path.join(testdata, "python_metadata") cleanup(metadata_path) ds = ISMN_Interface(testdata, network=[], parallel=True) assert ds.networks == OrderedDict() cls.testdata = testdata def setUp(self) -> None: self.ds = ISMN_Interface(self.testdata, network=["COSMOS"]) def tearDown(self) -> None: self.ds.close_files() logging.shutdown() def test_list(self): with pytest.deprecated_call(): assert len(self.ds.list_networks()) == 1 assert len(self.ds.list_stations()) == len( self.ds.list_stations("COSMOS")) == 2 assert len(self.ds.list_sensors()) == 2 assert len(self.ds.list_sensors(station="Barrow-ARM")) == 1 def test_network_for_station(self): assert self.ds.network_for_station("Barrow-ARM") == "COSMOS" assert self.ds.network_for_station("ARM-1") == "COSMOS" def test_stations_that_measure(self): for s in self.ds.stations_that_measure("soil_moisture"): assert s.name in ["ARM-1", "Barrow-ARM"] for s in self.ds.stations_that_measure("nonexisting"): raise AssertionError("Found var that doesnt exist") def test_get_dataset_ids(self): ids = self.ds.get_dataset_ids("soil_moisture", max_depth=100, groupby="network") assert list(ids.keys()) == ["COSMOS"] assert ids["COSMOS"] == [0, 1] ids = self.ds.get_dataset_ids("soil_moisture", max_depth=0.19) assert ids == [0] ids = self.ds.get_dataset_ids( ["soil_moisture"], max_depth=99, filter_meta_dict={ "lc_2010": 210, "network": "COSMOS", "station": "Barrow-ARM", }, ) assert ids == [1] ids = self.ds.get_dataset_ids("novar") assert len(ids) == 0 ids = self.ds.get_dataset_ids(["soil_moisture", "shouldhavenoeffect"], 0.0, 0.19) # should get 1 assert len(ids) == 1 ids = self.ds.get_dataset_ids("soil_moisture", 0.0, 1.0) # should get 2 assert len(ids) == 2 ids = self.ds.get_dataset_ids("soil_moisture", 0.0, 1.0, filter_meta_dict={"lc_2010": 210}) # should get 1 assert len(ids) == 1 ids = self.ds.get_dataset_ids("nonexisting") # should get 0 assert len(ids) == 0 def test_read_ts(self): data1 = self.ds.read(0) assert not data1.empty data2, meta = self.ds.read_ts(1, return_meta=True) assert not data2.empty def test_read_metadata(self): data2, meta = self.ds.read_ts(1, return_meta=True) assert all(meta == self.ds.read_metadata(1, format="pandas")) assert self.ds.read_metadata(1, format="dict") is not None assert self.ds.read_metadata([1], format="obj") is not None assert not self.ds.metadata.empty assert self.ds.metadata.loc[1]['station']['val'] \ == self.ds.read_metadata([0,1]).loc[1, ('station', 'val')] def test_find_nearest_station(self): should_lon, should_lat = -156.62870, 71.32980 station = self.ds.find_nearest_station(should_lon, should_lat) assert station.lon == should_lon assert station.lat == should_lat def test_plot_station_locations(self): with TemporaryDirectory() as out_dir: outpath = os.path.join(out_dir, "plot.png") self.ds.plot_station_locations(["soil_moisture", 'precipitation'], markersize=5, filename=outpath) assert len(os.listdir(out_dir)) == 1 def test_get_min_max_obs_timestamps(self): tmin, tmax = self.ds.get_min_max_obs_timestamps("soil_moisture", max_depth=0.19) assert tmin == datetime(2017, 8, 10, 0) assert tmax == datetime(2018, 8, 9, 23) def test_get_min_max_obs_timestamps_for_station(self): station = self.ds.collection.networks["COSMOS"].stations["ARM-1"] tmin, tmax = station.get_min_max_obs_timestamp("soil_moisture", 0, 0.19) assert tmin == datetime(2017, 8, 10, 0) assert tmax == datetime(2018, 8, 9, 23) def test_get_static_var_val(self): vals = self.ds.get_static_var_vals("soil_moisture", max_depth=0.19) assert vals == {130: "Grassland"} vals = self.ds.get_landcover_types("soil_moisture", max_depth=100) assert len(vals) == 2 assert vals[130] == "Grassland" assert vals[210] == "Water" self.ds.print_landcover_dict() vals = self.ds.get_climate_types("soil_moisture", max_depth=100, climate="climate_KG") assert len(vals) == 2 assert vals["ET"] == "Polar Tundra" assert vals["Cfa"] == "Temperate Without Dry Season, Hot Summer" self.ds.print_climate_dict() def test_get_var(self): vars = self.ds.get_variables() assert vars == ["soil_moisture"] def test_get_sensors(self): i = 0 for nw, station in self.ds.collection.iter_stations( filter_meta_dict={"network": "COSMOS"}): for se in station.iter_sensors(): data = se.read_data() # check if the networks is COSMOS or station in [ARM, Barrow-ARM] assert not data.empty # check something for that one station i += 1 assert i == 2 i = 0 for se in self.ds.networks["COSMOS"].stations[ "Barrow-ARM"].iter_sensors(): data = se.read_data() assert not data.empty # check something for that one station i += 1 assert i == 1 i = 0 for net, stat, sens in self.ds.collection.iter_sensors( depth=Depth(0, 1), filter_meta_dict={"station": ["Barrow-ARM", "ARM-1"]}, ): data = sens.read_data() assert not data.empty i += 1 assert i == 2 for nw, station in self.ds.collection.iter_stations(): for se in station.iter_sensors(variable="nonexisting"): raise ValueError("Found sensor, although none should exist") def test_get_nearest_station(self): should_lon, should_lat = -156.62870, 71.32980 station, dist = self.ds.collection.get_nearest_station( should_lon, should_lat) assert dist == 0 assert station.lon == should_lon assert station.lat == should_lat gpi, dist = self.ds.collection.grid.find_nearest_gpi( int(should_lon), int(should_lat)) assert dist != 0 for net in self.ds.collection.iter_networks(): if station.name in net.stations.keys(): assert net.stations[station.name].lon == should_lon assert net.stations[station.name].lat == should_lat station, dist = self.ds.find_nearest_station(0, 0, return_distance=True, max_dist=100) assert station == dist == None def test_citation(self): with TemporaryDirectory() as out_dir: out_file = os.path.join(out_dir, 'citation.txt') refs = self.ds.collection.export_citations(out_file=out_file) assert all([ net in refs.keys() for net in list(self.ds.collection.networks.keys()) ]) assert os.path.exists(out_file) with open(out_file, mode='r') as f: lines = f.readlines() assert len(lines) > 0
class Test_ISMN_Interface_CeopUnzipped(unittest.TestCase): @classmethod def setUpClass(cls): super(Test_ISMN_Interface_CeopUnzipped, cls).setUpClass() testdata = os.path.join(testdata_root, 'Data_seperate_files_20170810_20180809') metadata_path = os.path.join(testdata, 'python_metadata') cleanup(metadata_path) ds = ISMN_Interface(testdata, network=[]) assert ds.networks == OrderedDict() cls.testdata = testdata def setUp(self) -> None: self.ds = ISMN_Interface(self.testdata, network=['COSMOS']) def tearDown(self) -> None: self.ds.close_files() logging.shutdown() def test_list(self): assert len(self.ds.list_networks()) == 1 assert len(self.ds.list_stations()) == len(self.ds.list_stations('COSMOS')) == 2 assert len(self.ds.list_sensors()) == 2 assert len(self.ds.list_sensors(station='Barrow-ARM')) == 1 def test_network_for_station(self): assert self.ds.network_for_station('Barrow-ARM') == 'COSMOS' assert self.ds.network_for_station('ARM-1') == 'COSMOS' def test_stations_that_measure(self): for s in self.ds.stations_that_measure('soil_moisture'): assert s.name in ['ARM-1', 'Barrow-ARM'] for s in self.ds.stations_that_measure('nonexisting'): raise AssertionError("Found var that doesnt exist") def test_get_dataset_ids(self): ids = self.ds.get_dataset_ids('soil_moisture', max_depth=100, groupby='network') assert list(ids.keys()) == ['COSMOS'] assert ids['COSMOS'] == [0,1] ids = self.ds.get_dataset_ids('soil_moisture', max_depth=0.19) assert ids == [0] ids = self.ds.get_dataset_ids('soil_moisture', max_depth=99, filter_meta_dict={'lc_2010': 210, 'network': 'COSMOS', 'station': 'Barrow-ARM'}) assert ids == [1] ids = self.ds.get_dataset_ids('novar') assert len(ids) == 0 ids = self.ds.get_dataset_ids('soil_moisture', 0., 0.19) # should get 1 assert len(ids) == 1 ids = self.ds.get_dataset_ids('soil_moisture', 0., 1.) # should get 2 assert len(ids) == 2 ids = self.ds.get_dataset_ids('soil_moisture', 0., 1., filter_meta_dict={'lc_2010': 210}) # should get 1 assert len(ids) == 1 ids = self.ds.get_dataset_ids('nonexisting') # should get 0 assert len(ids) == 0 def test_read_ts(self): data1 = self.ds.read(0) assert not data1.empty data2 = self.ds.read_ts(1) assert not data2.empty assert len(data1.index) != len(data2.index) # make sure they are not same def test_find_nearest_station(self): should_lon, should_lat = -156.62870, 71.32980 station = self.ds.find_nearest_station(should_lon, should_lat) assert station.lon == should_lon assert station.lat == should_lat def test_plot_station_locations(self): with TemporaryDirectory() as out_dir: outpath = os.path.join(out_dir, 'plot.png') self.ds.plot_station_locations('soil_moisture', markersize=5, filename=outpath) assert len(os.listdir(out_dir)) == 1 def test_get_min_max_obs_timestamps(self): tmin, tmax = self.ds.get_min_max_obs_timestamps('soil_moisture', max_depth=0.19) assert tmin == datetime(2017, 8, 10, 0) assert tmax == datetime(2018, 8, 9, 23) def test_get_min_max_obs_timestamps_for_station(self): station = self.ds.collection.networks['COSMOS'].stations['ARM-1'] tmin, tmax = station.get_min_max_obs_timestamp('soil_moisture', 0, 0.19) assert tmin == datetime(2017, 8, 10, 0) assert tmax == datetime(2018, 8, 9, 23) def test_get_static_var_val(self): vals = self.ds.get_static_var_vals('soil_moisture', max_depth=0.19) assert vals == {130: 'Grassland'} vals = self.ds.get_landcover_types('soil_moisture', max_depth=100) assert len(vals) == 2 assert vals[130] == 'Grassland' assert vals[210] == 'Water' self.ds.print_landcover_dict() vals = self.ds.get_climate_types('soil_moisture', max_depth=100, climate='climate_KG') assert len(vals) == 2 assert vals['ET'] == 'Polar Tundra' assert vals['Cfa'] == 'Temperate Without Dry Season, Hot Summer' self.ds.print_climate_dict() def test_get_var(self): vars = self.ds.get_variables() assert vars == ['soil_moisture'] def test_get_sensors(self): i = 0 for nw, station in self.ds.collection.iter_stations( filter_meta_dict={'network': 'COSMOS'}): for se in station.iter_sensors(): data = se.read_data() # check if the networks is COSMOS or station in [ARM, Barrow-ARM] assert not data.empty # check something for that one station i += 1 assert i == 2 i = 0 for se in self.ds.networks['COSMOS'].stations['Barrow-ARM'].iter_sensors(): data = se.read_data() assert not data.empty # check something for that one station i += 1 assert i == 1 i = 0 for net, stat, sens in self.ds.collection.iter_sensors( depth=Depth(0,1), filter_meta_dict={'station': ['Barrow-ARM', 'ARM-1']}): data = sens.read_data() assert not data.empty i +=1 assert i == 2 for nw, station in self.ds.collection.iter_stations(): for se in station.iter_sensors(variable='nonexisting'): raise ValueError("Found sensor, although none should exist") def test_get_nearest_station(self): should_lon, should_lat = -156.62870, 71.32980 station, dist = self.ds.collection.get_nearest_station(should_lon, should_lat) assert dist == 0 assert station.lon == should_lon assert station.lat == should_lat gpi, dist = self.ds.collection.grid.find_nearest_gpi(int(should_lon),int(should_lat)) assert dist != 0 for net in self.ds.collection.iter_networks(): if station.name in net.stations.keys(): assert net.stations[station.name].lon == should_lon assert net.stations[station.name].lat == should_lat station, dist = self.ds.find_nearest_station(0, 0, return_distance=True, max_dist=100) assert station == dist == None
metadata = ismn_reader.metadata[idx] jobs.append((idx, metadata['longitude'], metadata['latitude'])) print("Jobs (gpi, lon, lat):") print(jobs) # For this small test dataset it is only one job # # It is important here that the ISMN reader has a read_ts function that works by just using the `dataset_id`. In this # way the validation framework can go through the jobs and read the correct time series. # In[6]: data = ismn_reader.read_ts(ids[0]) print('ISMN data example:') print(data.head()) # ## Initialize the Validation class # # The Validation class is the heart of the validation framwork. It contains the information about which datasets to # read using which arguments or keywords and if they are spatially compatible. It also contains the settings about # which metric calculators to use and how to perform the scaling into the reference data space. It is initialized in # the following way: # In[7]: datasets = { 'ISMN': {