def resample_timeseries():

    paths = Paths()

    io = ISMN_Interface(paths.ismn / 'downloaded' / 'CONUS_20100101_20190101')

    # get all stations / sensors for each grid cell.
    lut = pd.read_csv(paths.ismn / 'station_list.csv', index_col=0)
    lut = lut.groupby('ease2_gpi').apply(
        lambda x: '-'.join([i for i in x.index]))

    dir_out = paths.ismn / 'timeseries'

    for cnt, (gpi, indices) in enumerate(lut.iteritems()):
        print('%i / %i' % (cnt, len(lut)))

        fname = dir_out / ('%i.csv' % gpi)

        idx = indices.split('-')

        # Only one station within grid cell
        if len(idx) == 1:
            try:
                ts = io.read_ts(int(idx[0]))
                ts = ts[ts['soil moisture_flag'] == 'G']['soil moisture']
                ts.tz_convert(None).to_csv(fname, float_format='%.4f')
            except:
                print('Corrupt file: ' + io.metadata[int(idx[0])]['filename'])

        # Multiple stations within grid cell
        else:
            df = []
            for i in idx:
                try:
                    ts = io.read_ts(int(i))
                    df += [
                        ts[ts['soil moisture_flag'] == 'G']['soil moisture']
                    ]
                except:
                    print('Corrupt file: ' + io.metadata[int(i)]['filename'])
            if len(df) == 0:
                continue

            df = pd.concat(df, axis=1)
            df.columns = np.arange(len(df.columns))

            # match temporal mean and standard deviation to those of the station with the maximum temporal coverage
            n = np.array([len(df[i].dropna()) for i in df])
            ref = np.where(n == n.max())[0][0]
            for col in df:
                if col != ref:
                    df[col] = (df[col] - df[col].mean()) / df[col].std(
                    ) * df[ref].std() + df[ref].mean()

            # Average measurements of all stations
            df.mean(axis='columns').tz_convert(None).to_csv(
                fname, float_format='%.4f')
Esempio n. 2
0
    def test_timezone_adapter(self):
        c3s_data_folder = path.join(
            Dataset.objects.get(short_name='C3S').storage_path,
            'C3S_V201706/TCDR/063_images_to_ts/combined-daily')
        c3s_reader = c3s_read(c3s_data_folder)

        timezone_reader = TimezoneAdapter(c3s_reader)

        orig_data = c3s_reader.read_ts(-155.42, 19.78)
        data = timezone_reader.read_ts(-155.42, 19.78)
        self.assertTrue(
            np.array_equal(orig_data.index.values, data.index.values))
        self.assertTrue(not hasattr(data.index, 'tz') or data.index.tz is None)

        orig_data = c3s_reader.read(-155.42, 19.78)
        data = timezone_reader.read(-155.42, 19.78)
        self.assertTrue(
            np.array_equal(orig_data.index.values, data.index.values))
        self.assertTrue((not hasattr(data.index, 'tz'))
                        or (data.index.tz is None))

        ismn_data_folder = path.join(
            Dataset.objects.get(short_name='ISMN').storage_path,
            'ISMN_V20191211')
        ismn_reader = ISMN_Interface(ismn_data_folder)

        timezone_reader2 = TimezoneAdapter(ismn_reader)

        orig_data = ismn_reader.read_ts(0)
        data = timezone_reader2.read_ts(0)
        self.assertTrue(
            np.array_equal(orig_data.index.values, data.index.values))
        self.assertTrue((not hasattr(data.index, 'tz'))
                        or (data.index.tz is None))
Esempio n. 3
0
                                  max_depth=0.1)
for idx in ids:
    metadata = ismn_reader.metadata[idx]
    jobs.append((idx, metadata['longitude'], metadata['latitude']))

print("Jobs (gpi, lon, lat):")
print(jobs)

# For this small test dataset it is only one job
#
# It is important here that the ISMN reader has a read_ts function that works by just using the `dataset_id`. In this
#  way the validation framework can go through the jobs and read the correct time series.

# In[6]:

data = ismn_reader.read_ts(ids[0])
print('ISMN data example:')
print(data.head())

# ## Initialize the Validation class
#
# The Validation class is the heart of the validation framwork. It contains the information about which datasets to
# read using which arguments or keywords and if they are spatially compatible. It also contains the settings about
# which metric calculators to use and how to perform the scaling into the reference data space. It is initialized in
# the following way:

# In[7]:

datasets = {
    'ISMN': {
        'class': ismn_reader,
def resample_ismn():
    """
    This resamples ISMN data onto the EASE2 grid and stores data for each grid cell into .csv files.
    If single grid cells contain multiple stations, they are averaged.

    A grid look-up table needs to be created first (method: ancillary.grid.create_lut).

    """

    paths = Paths()

    io = ISMN_Interface(paths.ismn_raw)

    # get all stations / sensors for each grid cell.
    lut = pd.read_csv(paths.ismn / 'station_list.csv',index_col=0)
    lut = lut.groupby('ease2_gpi').apply(lambda x: '-'.join([i for i in x.index]))

    dir_out = paths.ismn / 'timeseries'
    if not dir_out.exists():
        dir_out.mkdir()

    for cnt, (gpi, indices) in enumerate(lut.iteritems()):
        print('%i / %i' % (cnt, len(lut)))

        fname = dir_out / ('%i.csv' % gpi)

        idx = indices.split('-')

        # Only one station within grid cell
        if len(idx) == 1:
            try:
                ts = io.read_ts(int(idx[0]))
                ts = ts[ts['soil moisture_flag'] == 'G']['soil moisture'] # Get only "good" data based on ISMN QC
                ts.tz_convert(None).to_csv(fname, float_format='%.4f')
            except:
                print('Corrupt file: ' + io.metadata[int(idx[0])]['filename'])

        # Multiple stations within grid cell
        else:
            df = []
            for i in idx:
                try:
                    ts = io.read_ts(int(i))
                    df += [ts[ts['soil moisture_flag'] == 'G']['soil moisture']] # Get only "good" data based on ISMN QC
                except:
                    print('Corrupt file: ' + io.metadata[int(i)]['filename'])
            if len(df) == 0:
                continue

            df = pd.concat(df, axis=1)
            df.columns = np.arange(len(df.columns))

            # match temporal mean and standard deviation to those of the station with the maximum temporal coverage
            n = np.array([len(df[i].dropna()) for i in df])
            ref = np.where(n==n.max())[0][0]
            for col in df:
                if col != ref:
                    df[col] = (df[col] - df[col].mean())/df[col].std() * df[ref].std() + df[ref].mean()

            # Average measurements of all stations
            df.mean(axis='columns').tz_convert(None).to_csv(fname, float_format='%.4f')
Esempio n. 5
0
class Test_ISMN_Interface_CeopUnzipped(unittest.TestCase):
    @classmethod
    def setUpClass(cls):
        super(Test_ISMN_Interface_CeopUnzipped, cls).setUpClass()

        testdata = os.path.join(testdata_root,
                                "Data_seperate_files_20170810_20180809")
        metadata_path = os.path.join(testdata, "python_metadata")

        cleanup(metadata_path)
        ds = ISMN_Interface(testdata, network=[], parallel=True)
        assert ds.networks == OrderedDict()
        cls.testdata = testdata

    def setUp(self) -> None:
        self.ds = ISMN_Interface(self.testdata, network=["COSMOS"])

    def tearDown(self) -> None:
        self.ds.close_files()
        logging.shutdown()

    def test_list(self):
        with pytest.deprecated_call():
            assert len(self.ds.list_networks()) == 1
            assert len(self.ds.list_stations()) == len(
                self.ds.list_stations("COSMOS")) == 2
            assert len(self.ds.list_sensors()) == 2
            assert len(self.ds.list_sensors(station="Barrow-ARM")) == 1

    def test_network_for_station(self):
        assert self.ds.network_for_station("Barrow-ARM") == "COSMOS"
        assert self.ds.network_for_station("ARM-1") == "COSMOS"

    def test_stations_that_measure(self):
        for s in self.ds.stations_that_measure("soil_moisture"):
            assert s.name in ["ARM-1", "Barrow-ARM"]

        for s in self.ds.stations_that_measure("nonexisting"):
            raise AssertionError("Found var that doesnt exist")

    def test_get_dataset_ids(self):
        ids = self.ds.get_dataset_ids("soil_moisture",
                                      max_depth=100,
                                      groupby="network")
        assert list(ids.keys()) == ["COSMOS"]
        assert ids["COSMOS"] == [0, 1]

        ids = self.ds.get_dataset_ids("soil_moisture", max_depth=0.19)
        assert ids == [0]

        ids = self.ds.get_dataset_ids(
            ["soil_moisture"],
            max_depth=99,
            filter_meta_dict={
                "lc_2010": 210,
                "network": "COSMOS",
                "station": "Barrow-ARM",
            },
        )
        assert ids == [1]

        ids = self.ds.get_dataset_ids("novar")
        assert len(ids) == 0

        ids = self.ds.get_dataset_ids(["soil_moisture", "shouldhavenoeffect"],
                                      0.0, 0.19)  # should get 1
        assert len(ids) == 1

        ids = self.ds.get_dataset_ids("soil_moisture", 0.0,
                                      1.0)  # should get 2
        assert len(ids) == 2

        ids = self.ds.get_dataset_ids("soil_moisture",
                                      0.0,
                                      1.0,
                                      filter_meta_dict={"lc_2010":
                                                        210})  # should get 1
        assert len(ids) == 1

        ids = self.ds.get_dataset_ids("nonexisting")  # should get 0
        assert len(ids) == 0

    def test_read_ts(self):
        data1 = self.ds.read(0)
        assert not data1.empty

        data2, meta = self.ds.read_ts(1, return_meta=True)
        assert not data2.empty

    def test_read_metadata(self):
        data2, meta = self.ds.read_ts(1, return_meta=True)
        assert all(meta == self.ds.read_metadata(1, format="pandas"))
        assert self.ds.read_metadata(1, format="dict") is not None
        assert self.ds.read_metadata([1], format="obj") is not None

        assert not self.ds.metadata.empty
        assert self.ds.metadata.loc[1]['station']['val'] \
               == self.ds.read_metadata([0,1]).loc[1, ('station', 'val')]

    def test_find_nearest_station(self):
        should_lon, should_lat = -156.62870, 71.32980

        station = self.ds.find_nearest_station(should_lon, should_lat)

        assert station.lon == should_lon
        assert station.lat == should_lat

    def test_plot_station_locations(self):
        with TemporaryDirectory() as out_dir:
            outpath = os.path.join(out_dir, "plot.png")
            self.ds.plot_station_locations(["soil_moisture", 'precipitation'],
                                           markersize=5,
                                           filename=outpath)
            assert len(os.listdir(out_dir)) == 1

    def test_get_min_max_obs_timestamps(self):
        tmin, tmax = self.ds.get_min_max_obs_timestamps("soil_moisture",
                                                        max_depth=0.19)
        assert tmin == datetime(2017, 8, 10, 0)
        assert tmax == datetime(2018, 8, 9, 23)

    def test_get_min_max_obs_timestamps_for_station(self):
        station = self.ds.collection.networks["COSMOS"].stations["ARM-1"]
        tmin, tmax = station.get_min_max_obs_timestamp("soil_moisture", 0,
                                                       0.19)
        assert tmin == datetime(2017, 8, 10, 0)
        assert tmax == datetime(2018, 8, 9, 23)

    def test_get_static_var_val(self):
        vals = self.ds.get_static_var_vals("soil_moisture", max_depth=0.19)
        assert vals == {130: "Grassland"}

        vals = self.ds.get_landcover_types("soil_moisture", max_depth=100)
        assert len(vals) == 2
        assert vals[130] == "Grassland"
        assert vals[210] == "Water"
        self.ds.print_landcover_dict()

        vals = self.ds.get_climate_types("soil_moisture",
                                         max_depth=100,
                                         climate="climate_KG")
        assert len(vals) == 2
        assert vals["ET"] == "Polar Tundra"
        assert vals["Cfa"] == "Temperate Without Dry Season, Hot Summer"
        self.ds.print_climate_dict()

    def test_get_var(self):
        vars = self.ds.get_variables()
        assert vars == ["soil_moisture"]

    def test_get_sensors(self):
        i = 0
        for nw, station in self.ds.collection.iter_stations(
                filter_meta_dict={"network": "COSMOS"}):
            for se in station.iter_sensors():
                data = se.read_data()
                # check if the networks is COSMOS or station in [ARM, Barrow-ARM]
                assert not data.empty
                # check something for that one station
                i += 1
        assert i == 2

        i = 0
        for se in self.ds.networks["COSMOS"].stations[
                "Barrow-ARM"].iter_sensors():
            data = se.read_data()
            assert not data.empty
            # check something for that one station
            i += 1
        assert i == 1

        i = 0
        for net, stat, sens in self.ds.collection.iter_sensors(
                depth=Depth(0, 1),
                filter_meta_dict={"station": ["Barrow-ARM", "ARM-1"]},
        ):
            data = sens.read_data()
            assert not data.empty
            i += 1
        assert i == 2

        for nw, station in self.ds.collection.iter_stations():
            for se in station.iter_sensors(variable="nonexisting"):
                raise ValueError("Found sensor, although none should exist")

    def test_get_nearest_station(self):
        should_lon, should_lat = -156.62870, 71.32980

        station, dist = self.ds.collection.get_nearest_station(
            should_lon, should_lat)
        assert dist == 0
        assert station.lon == should_lon
        assert station.lat == should_lat
        gpi, dist = self.ds.collection.grid.find_nearest_gpi(
            int(should_lon), int(should_lat))
        assert dist != 0
        for net in self.ds.collection.iter_networks():
            if station.name in net.stations.keys():
                assert net.stations[station.name].lon == should_lon
                assert net.stations[station.name].lat == should_lat

        station, dist = self.ds.find_nearest_station(0,
                                                     0,
                                                     return_distance=True,
                                                     max_dist=100)
        assert station == dist == None

    def test_citation(self):
        with TemporaryDirectory() as out_dir:
            out_file = os.path.join(out_dir, 'citation.txt')
            refs = self.ds.collection.export_citations(out_file=out_file)
            assert all([
                net in refs.keys()
                for net in list(self.ds.collection.networks.keys())
            ])
            assert os.path.exists(out_file)
            with open(out_file, mode='r') as f:
                lines = f.readlines()
                assert len(lines) > 0
Esempio n. 6
0
class Test_ISMN_Interface_CeopUnzipped(unittest.TestCase):
    
    @classmethod
    def setUpClass(cls):
        super(Test_ISMN_Interface_CeopUnzipped, cls).setUpClass()

        testdata = os.path.join(testdata_root,
            'Data_seperate_files_20170810_20180809')
        metadata_path = os.path.join(testdata, 'python_metadata')

        cleanup(metadata_path)
        ds = ISMN_Interface(testdata, network=[])
        assert ds.networks == OrderedDict()
        cls.testdata = testdata

    def setUp(self) -> None:
        self.ds = ISMN_Interface(self.testdata, network=['COSMOS'])

    def tearDown(self) -> None:
        self.ds.close_files()
        logging.shutdown()
            
    def test_list(self):
        assert len(self.ds.list_networks()) == 1
        assert len(self.ds.list_stations()) == len(self.ds.list_stations('COSMOS')) == 2
        assert len(self.ds.list_sensors()) == 2
        assert len(self.ds.list_sensors(station='Barrow-ARM')) == 1

    def test_network_for_station(self):
        assert self.ds.network_for_station('Barrow-ARM') == 'COSMOS'
        assert self.ds.network_for_station('ARM-1') == 'COSMOS'

    def test_stations_that_measure(self):
        for s in self.ds.stations_that_measure('soil_moisture'):
            assert s.name in ['ARM-1', 'Barrow-ARM']

        for s in self.ds.stations_that_measure('nonexisting'):
            raise AssertionError("Found var that doesnt exist")

    def test_get_dataset_ids(self):
        ids = self.ds.get_dataset_ids('soil_moisture', max_depth=100, groupby='network')
        assert list(ids.keys()) == ['COSMOS']
        assert ids['COSMOS'] == [0,1]

        ids = self.ds.get_dataset_ids('soil_moisture', max_depth=0.19)
        assert ids == [0]

        ids = self.ds.get_dataset_ids('soil_moisture', max_depth=99,
                                      filter_meta_dict={'lc_2010': 210,
                                                        'network': 'COSMOS',
                                                        'station': 'Barrow-ARM'})
        assert ids == [1]

        ids = self.ds.get_dataset_ids('novar')
        assert len(ids) == 0

        ids = self.ds.get_dataset_ids('soil_moisture', 0., 0.19) # should get 1
        assert len(ids) == 1

        ids = self.ds.get_dataset_ids('soil_moisture', 0., 1.) # should get 2
        assert len(ids) == 2

        ids = self.ds.get_dataset_ids('soil_moisture', 0., 1.,
                                      filter_meta_dict={'lc_2010': 210}) # should get 1
        assert len(ids) == 1

        ids = self.ds.get_dataset_ids('nonexisting') # should get 0
        assert len(ids) == 0

    def test_read_ts(self):
        data1 = self.ds.read(0)
        assert not data1.empty

        data2 = self.ds.read_ts(1)
        assert not data2.empty

        assert len(data1.index) != len(data2.index) # make sure they are not same

    def test_find_nearest_station(self):
        should_lon, should_lat = -156.62870, 71.32980

        station = self.ds.find_nearest_station(should_lon, should_lat)

        assert station.lon == should_lon
        assert station.lat == should_lat

    def test_plot_station_locations(self):
        with TemporaryDirectory() as out_dir:
            outpath = os.path.join(out_dir, 'plot.png')
            self.ds.plot_station_locations('soil_moisture', markersize=5,
                                           filename=outpath)
            assert len(os.listdir(out_dir)) == 1

    def test_get_min_max_obs_timestamps(self):
        tmin, tmax = self.ds.get_min_max_obs_timestamps('soil_moisture', max_depth=0.19)
        assert tmin == datetime(2017, 8, 10, 0)
        assert tmax == datetime(2018, 8, 9, 23)

    def test_get_min_max_obs_timestamps_for_station(self):
        station = self.ds.collection.networks['COSMOS'].stations['ARM-1']
        tmin, tmax = station.get_min_max_obs_timestamp('soil_moisture', 0, 0.19)
        assert tmin == datetime(2017, 8, 10, 0)
        assert tmax == datetime(2018, 8, 9, 23)

    def test_get_static_var_val(self):
        vals = self.ds.get_static_var_vals('soil_moisture', max_depth=0.19)
        assert vals == {130: 'Grassland'}

        vals = self.ds.get_landcover_types('soil_moisture', max_depth=100)
        assert len(vals) == 2
        assert vals[130] == 'Grassland'
        assert vals[210] == 'Water'
        self.ds.print_landcover_dict()

        vals = self.ds.get_climate_types('soil_moisture', max_depth=100,
                                         climate='climate_KG')
        assert len(vals) == 2
        assert vals['ET'] == 'Polar Tundra'
        assert vals['Cfa'] == 'Temperate Without Dry Season, Hot Summer'
        self.ds.print_climate_dict()

    def test_get_var(self):
        vars = self.ds.get_variables()
        assert vars == ['soil_moisture']


    def test_get_sensors(self):
        i = 0
        for nw, station in self.ds.collection.iter_stations(
                filter_meta_dict={'network': 'COSMOS'}):
            for se in station.iter_sensors():
                data = se.read_data()
                # check if the networks is COSMOS or station in [ARM, Barrow-ARM]
                assert not data.empty
                # check something for that one station
                i += 1
        assert i == 2

        i = 0
        for se in self.ds.networks['COSMOS'].stations['Barrow-ARM'].iter_sensors():
            data = se.read_data()
            assert not data.empty
            # check something for that one station
            i += 1
        assert i == 1

        i = 0
        for net, stat, sens in self.ds.collection.iter_sensors(
                depth=Depth(0,1),
                filter_meta_dict={'station': ['Barrow-ARM', 'ARM-1']}):
            data = sens.read_data()
            assert not data.empty
            i +=1
        assert i == 2


        for nw, station in self.ds.collection.iter_stations():
            for se in station.iter_sensors(variable='nonexisting'):
                raise ValueError("Found sensor, although none should exist")

    def test_get_nearest_station(self):
        should_lon, should_lat = -156.62870, 71.32980

        station, dist = self.ds.collection.get_nearest_station(should_lon, should_lat)
        assert dist == 0
        assert station.lon == should_lon
        assert station.lat == should_lat
        gpi, dist = self.ds.collection.grid.find_nearest_gpi(int(should_lon),int(should_lat))
        assert dist != 0
        for net in self.ds.collection.iter_networks():
            if station.name in net.stations.keys():
                assert net.stations[station.name].lon == should_lon
                assert net.stations[station.name].lat == should_lat

        station, dist = self.ds.find_nearest_station(0, 0, return_distance=True,
                                                     max_dist=100)
        assert station == dist == None
Esempio n. 7
0
    metadata = ismn_reader.metadata[idx]
    jobs.append((idx, metadata['longitude'], metadata['latitude']))

print("Jobs (gpi, lon, lat):")
print(jobs)


# For this small test dataset it is only one job
# 
# It is important here that the ISMN reader has a read_ts function that works by just using the `dataset_id`. In this
#  way the validation framework can go through the jobs and read the correct time series.

# In[6]:


data = ismn_reader.read_ts(ids[0])
print('ISMN data example:')
print(data.head())


# ## Initialize the Validation class
# 
# The Validation class is the heart of the validation framwork. It contains the information about which datasets to
# read using which arguments or keywords and if they are spatially compatible. It also contains the settings about
# which metric calculators to use and how to perform the scaling into the reference data space. It is initialized in
# the following way:

# In[7]:

datasets = {
    'ISMN': {