Ejemplos de ISMN_Interface.get_dataset_ids en Python

Lenguaje de programación: Python

Namespace/Package Name: ismn.interface

Clase / Tipo: ISMN_Interface

Método / Función: get_dataset_ids

Ejemplos en hotexamples.com: 12

Python ISMN_Interface.get_dataset_ids - 12 ejemplos encontrados. Estos son los ejemplos en Python del mundo real mejor valorados de ismn.interface.ISMN_Interface.get_dataset_ids extraídos de proyectos de código abierto. Puedes valorar ejemplos para ayudarnos a mejorar la calidad de los ejemplos.

Métodos usados con frecuencia

Mostrar Ocultar

ISMN_Interface(22)

get_dataset_ids(10)

read_ts(6)

close_files(3)

read(2)

print_landcover_dict(2)

print_climate_dict(2)

plot_station_locations(2)

network_for_station(2)

list_stations(2)

list_sensors(2)

list_networks(2)

get_variables(2)

get_static_var_vals(2)

get_min_max_obs_timestamps(2)

get_landcover_types(2)

get_climate_types(2)

find_nearest_station(2)

stations_that_measure(2)

read_metadata(1)

Ejemplo n.º 1

Mostrar archivo

Archivo: ismn.py Proyecto: sharadgupta27/validation_good_practice

def generate_station_list():
    """ This routine generates a list of available ISMN stations and the EASEv2 grid point they are located in. """

    paths = Paths()

    io = ISMN_Interface(paths.ismn_raw)

    # get metadata indices of all stations that measure soil moisture within the first 10 cm
    idx = io.get_dataset_ids('soil moisture', min_depth=0.0, max_depth=0.1)
    df = pd.DataFrame({'network': io.metadata[idx]['network'],
                       'station': io.metadata[idx]['station'],
                       'lat': io.metadata[idx]['latitude'],
                       'lon': io.metadata[idx]['longitude'],
                       'ease2_gpi': np.zeros(len(idx)).astype('int')}, index=idx)

    # merge indices for stations that have multiple sensors within the first 10 cm
    duplicate_idx = df.groupby(df.columns.tolist()).apply(lambda x: '-'.join(['%i'% i for i in x.index])).values
    df.drop_duplicates(inplace=True)
    df.index = duplicate_idx

    # create EASEv2 grid domain
    grid = EASE2()
    lons, lats = np.meshgrid(grid.ease_lons, grid.ease_lats)
    lons = lons.flatten()
    lats = lats.flatten()

    # find EASEv2 grid points in which the individual stations are located
    for i, (idx, data) in enumerate(df.iterrows()):
        print('%i / %i' % (i, len(df)))
        r = (lons - data.lon) ** 2 + (lats - data.lat) ** 2
        df.loc[idx, 'ease2_gpi'] = np.where((r - r.min()) < 0.0001)[0][0]

    df.to_csv(paths.ismn / 'station_list.csv')

Ejemplo n.º 2

Mostrar archivo

Archivo: insitu.py Proyecto: wpreimes/validation_good_practice

def generate_station_list():

    paths = Paths()

    io = ISMN_Interface(paths.ismn / 'downloaded' / 'CONUS_20100101_20190101')

    # get metadata indices of all stations that measure soil moisture within the first 10 cm
    idx = io.get_dataset_ids('soil moisture', min_depth=0.0, max_depth=0.1)
    df = pd.DataFrame(
        {
            'network': io.metadata[idx]['network'],
            'station': io.metadata[idx]['station'],
            'lat': io.metadata[idx]['latitude'],
            'lon': io.metadata[idx]['longitude'],
            'ease2_gpi': np.zeros(len(idx)).astype('int')
        },
        index=idx)

    # merge indices for stations that have multiple sensors within the first 10 cm
    duplicate_idx = df.groupby(df.columns.tolist()).apply(
        lambda x: '-'.join(['%i' % i for i in x.index])).values
    df.drop_duplicates(inplace=True)
    df.index = duplicate_idx

    grid = EASE2()
    lons, lats = np.meshgrid(grid.ease_lons, grid.ease_lats)
    lons = lons.flatten()
    lats = lats.flatten()

    for i, (idx, data) in enumerate(df.iterrows()):
        print('%i / %i' % (i, len(df)))
        r = (lons - data.lon)**2 + (lats - data.lat)**2
        df.loc[idx, 'ease2_gpi'] = np.where((r - r.min()) < 0.0001)[0][0]

    df.to_csv(paths.ismn / 'station_list.csv')

Ejemplo n.º 3

Mostrar archivo

def test_metadata_dataframe():
    # make sure that metadata.index represents same values as get_dataset_ids
    testdata = os.path.join(testdata_root,
                            "Data_seperate_files_20170810_20180809")
    metadata_path = os.path.join(testdata, "python_metadata")
    cleanup(metadata_path)
    ds_one = ISMN_Interface(testdata,
                            meta_path=metadata_path,
                            network='FR_Aqui')

    assert np.all(ds_one.metadata.index.values == ds_one.get_dataset_ids(
        None, -np.inf, np.inf))
    ids = ds_one.get_dataset_ids('soil_moisture')
    assert ids == ds_one.metadata.index.values
    assert ds_one.metadata.loc[ids[0], 'variable']['val'] == 'soil_moisture'
    assert ds_one.metadata.loc[ids[0], 'network']['val'] == 'FR_Aqui'
    ds_one.close_files()

Ejemplo n.º 4

Mostrar archivo

ismn_reader = ISMN_Interface(ismn_data_folder)

# The validation is run based on jobs. A job consists of at least three lists or numpy arrays specifing the grid
# point index, its latitude and longitude. In the case of the ISMN we can use the `dataset_ids` that identify every
# time series in the downloaded ISMN data as our grid point index. We can then get longitude and latitude from the
# metadata of the dataset.
#
# **DO NOT CHANGE** the name ***jobs*** because it will be searched during the parallel processing!

# In[5]:

jobs = []

ids = ismn_reader.get_dataset_ids(variable='soil moisture',
                                  min_depth=0,
                                  max_depth=0.1)
for idx in ids:
    metadata = ismn_reader.metadata[idx]
    jobs.append((idx, metadata['longitude'], metadata['latitude']))

print("Jobs (gpi, lon, lat):")
print(jobs)

# For this small test dataset it is only one job
#
# It is important here that the ISMN reader has a read_ts function that works by just using the `dataset_id`. In this
#  way the validation framework can go through the jobs and read the correct time series.

# In[6]:

Ejemplo n.º 5

Mostrar archivo

def test_ascat_ismn_validation_metadata_rolling(ascat_reader):
    """
    Test processing framework with some ISMN and ASCAT sample data
    """
    # Initialize ISMN reader
    ismn_data_folder = os.path.join(
        os.path.dirname(__file__),
        "..",
        "test-data",
        "ismn",
        "multinetwork",
        "header_values",
    )
    ismn_reader = ISMN_Interface(ismn_data_folder)

    jobs = []

    ids = ismn_reader.get_dataset_ids(
        variable="soil moisture", min_depth=0, max_depth=0.1
    )

    metadata_dict_template = {
        "network": np.array(["None"], dtype="U256"),
        "station": np.array(["None"], dtype="U256"),
        "landcover": np.float32([np.nan]),
        "climate": np.array(["None"], dtype="U4"),
    }

    for idx in ids:
        metadata = ismn_reader.metadata[idx]
        metadata_dict = [
            {
                "network": metadata["network"],
                "station": metadata["station"],
                "landcover": metadata["landcover_2010"],
                "climate": metadata["climate"],
            }
        ]
        jobs.append(
            (idx, metadata["longitude"], metadata["latitude"], metadata_dict)
        )

    save_path = tempfile.mkdtemp()

    # Create the validation object.

    datasets = {
        "ISMN": {"class": ismn_reader, "columns": ["soil moisture"]},
        "ASCAT": {
            "class": ascat_reader,
            "columns": ["sm"],
            "kwargs": {
                "mask_frozen_prob": 80,
                "mask_snow_prob": 80,
                "mask_ssf": True,
            },
        },
    }

    read_ts_names = {"ASCAT": "read", "ISMN": "read_ts"}
    period = [datetime(2007, 1, 1), datetime(2014, 12, 31)]

    datasets = DataManager(
        datasets, "ISMN", period, read_ts_names=read_ts_names
    )

    process = Validation(
        datasets,
        "ISMN",
        temporal_ref="ASCAT",
        scaling="lin_cdf_match",
        scaling_ref="ASCAT",
        metrics_calculators={
            (2, 2): metrics_calculators.RollingMetrics(
                other_name="k1", metadata_template=metadata_dict_template
            ).calc_metrics
        },
        period=period,
    )

    for job in jobs:
        results = process.calc(*job)
        netcdf_results_manager(
            results, save_path, ts_vars=["R", "p_R", "RMSD"]
        )

    results_fname = os.path.join(
        save_path, "ASCAT.sm_with_ISMN.soil moisture.nc"
    )

    vars_should = [
        u"gpi",
        u"lon",
        u"lat",
        u"R",
        u"p_R",
        u"time",
        u"idx",
        u"_row_size",
    ]

    for key, value in metadata_dict_template.items():
        vars_should.append(key)

    network_should = np.array(
        [
            "MAQU",
            "MAQU",
            "SCAN",
            "SCAN",
            "SCAN",
            "SOILSCAPE",
            "SOILSCAPE",
            "SOILSCAPE",
        ],
        dtype="U256",
    )

    reader = PointDataResults(results_fname, read_only=True)
    df = reader.read_loc(None)
    nptest.assert_equal(sorted(network_should), sorted(df["network"].values))
    assert np.all(df.gpi.values == np.arange(8))
    assert reader.read_ts(0).index.size == 357
    assert np.all(
        reader.read_ts(1).columns.values == np.array(["R", "p_R", "RMSD"])
    )

Ejemplo n.º 6

Mostrar archivo

def test_ascat_ismn_validation_metadata(ascat_reader):
    """
    Test processing framework with some ISMN and ASCAT sample data
    """
    # Initialize ISMN reader

    ismn_data_folder = os.path.join(
        os.path.dirname(__file__),
        "..",
        "test-data",
        "ismn",
        "multinetwork",
        "header_values",
    )
    ismn_reader = ISMN_Interface(ismn_data_folder)

    jobs = []

    ids = ismn_reader.get_dataset_ids(
        variable="soil moisture", min_depth=0, max_depth=0.1
    )

    metadata_dict_template = {
        "network": np.array(["None"], dtype="U256"),
        "station": np.array(["None"], dtype="U256"),
        "landcover": np.float32([np.nan]),
        "climate": np.array(["None"], dtype="U4"),
    }

    for idx in ids:
        metadata = ismn_reader.metadata[idx]
        metadata_dict = [
            {
                "network": metadata["network"],
                "station": metadata["station"],
                "landcover": metadata["landcover_2010"],
                "climate": metadata["climate"],
            }
        ]
        jobs.append(
            (idx, metadata["longitude"], metadata["latitude"], metadata_dict)
        )

    # Create the variable ***save_path*** which is a string representing the
    # path where the results will be saved. **DO NOT CHANGE** the name
    # ***save_path*** because it will be searched during the parallel
    # processing!

    save_path = tempfile.mkdtemp()

    # Create the validation object.

    datasets = {
        "ISMN": {
            "class": ismn_reader,
            "columns": ["soil moisture"],
        },
        "ASCAT": {
            "class": ascat_reader,
            "columns": ["sm"],
            "kwargs": {
                "mask_frozen_prob": 80,
                "mask_snow_prob": 80,
                "mask_ssf": True,
            },
        },
    }

    read_ts_names = {"ASCAT": "read", "ISMN": "read_ts"}
    period = [datetime(2007, 1, 1), datetime(2014, 12, 31)]

    datasets = DataManager(
        datasets, "ISMN", period, read_ts_names=read_ts_names
    )
    process = Validation(
        datasets,
        "ISMN",
        temporal_ref="ASCAT",
        scaling="lin_cdf_match",
        scaling_ref="ASCAT",
        metrics_calculators={
            (2, 2): metrics_calculators.BasicMetrics(
                other_name="k1", metadata_template=metadata_dict_template
            ).calc_metrics
        },
        period=period,
    )

    for job in jobs:
        results = process.calc(*job)
        netcdf_results_manager(results, save_path)

    results_fname = os.path.join(
        save_path, "ASCAT.sm_with_ISMN.soil moisture.nc"
    )

    vars_should = [
        u"n_obs",
        u"tau",
        u"gpi",
        u"RMSD",
        u"lon",
        u"p_tau",
        u"BIAS",
        u"p_rho",
        u"rho",
        u"lat",
        u"R",
        u"p_R",
        u"time",
        u"idx",
        u"_row_size",
    ]
    for key, value in metadata_dict_template.items():
        vars_should.append(key)

    n_obs_should = [357, 384, 1646, 1875, 1915, 467, 141, 251]
    rho_should = np.array(
        [
            0.53934574,
            0.7002289,
            0.62200236,
            0.53647155,
            0.30413666,
            0.6740655,
            0.8418981,
            0.74206454,
        ],
        dtype=np.float32,
    )
    rmsd_should = np.array(
        [
            11.583476,
            7.729667,
            17.441547,
            21.125721,
            14.31557,
            14.187225,
            13.0622425,
            12.903898,
        ],
        dtype=np.float32,
    )

    network_should = np.array(
        [
            "MAQU",
            "MAQU",
            "SCAN",
            "SCAN",
            "SCAN",
            "SOILSCAPE",
            "SOILSCAPE",
            "SOILSCAPE",
        ],
        dtype="U256",
    )

    with nc.Dataset(results_fname, mode="r") as results:
        vars = results.variables.keys()
        n_obs = results.variables["n_obs"][:].tolist()
        rho = results.variables["rho"][:]
        rmsd = results.variables["RMSD"][:]
        network = results.variables["network"][:]

    assert sorted(vars) == sorted(vars_should)
    assert sorted(n_obs) == sorted(n_obs_should)
    nptest.assert_allclose(sorted(rho), sorted(rho_should), rtol=1e-4)
    nptest.assert_allclose(sorted(rmsd), sorted(rmsd_should), rtol=1e-4)
    nptest.assert_equal(sorted(network), sorted(network_should))

Ejemplo n.º 7

Mostrar archivo

def test_ascat_ismn_validation_metadata_rolling():
    """
    Test processing framework with some ISMN and ASCAT sample data
    """
    ascat_data_folder = os.path.join(os.path.dirname(__file__), '..',
                                     'test-data', 'sat', 'ascat', 'netcdf',
                                     '55R22')

    ascat_grid_folder = os.path.join(os.path.dirname(__file__), '..',
                                     'test-data', 'sat', 'ascat', 'netcdf',
                                     'grid')

    static_layers_folder = os.path.join(os.path.dirname(__file__), '..',
                                        'test-data', 'sat', 'h_saf',
                                        'static_layer')

    ascat_reader = AscatSsmCdr(ascat_data_folder,
                               ascat_grid_folder,
                               grid_filename='TUW_WARP5_grid_info_2_1.nc',
                               static_layer_path=static_layers_folder)
    ascat_reader.read_bulk = True

    # Initialize ISMN reader

    ismn_data_folder = os.path.join(os.path.dirname(__file__), '..',
                                    'test-data', 'ismn', 'multinetwork',
                                    'header_values')
    ismn_reader = ISMN_Interface(ismn_data_folder)

    jobs = []

    ids = ismn_reader.get_dataset_ids(variable='soil moisture',
                                      min_depth=0,
                                      max_depth=0.1)

    metadata_dict_template = {
        'network': np.array(['None'], dtype='U256'),
        'station': np.array(['None'], dtype='U256'),
        'landcover': np.float32([np.nan]),
        'climate': np.array(['None'], dtype='U4')
    }

    for idx in ids:
        metadata = ismn_reader.metadata[idx]
        metadata_dict = [{
            'network': metadata['network'],
            'station': metadata['station'],
            'landcover': metadata['landcover_2010'],
            'climate': metadata['climate']
        }]
        jobs.append(
            (idx, metadata['longitude'], metadata['latitude'], metadata_dict))

    save_path = tempfile.mkdtemp()

    # Create the validation object.

    datasets = {
        'ISMN': {
            'class': ismn_reader,
            'columns': ['soil moisture']
        },
        'ASCAT': {
            'class': ascat_reader,
            'columns': ['sm'],
            'kwargs': {
                'mask_frozen_prob': 80,
                'mask_snow_prob': 80,
                'mask_ssf': True
            }
        }
    }

    read_ts_names = {'ASCAT': 'read', 'ISMN': 'read_ts'}
    period = [datetime(2007, 1, 1), datetime(2014, 12, 31)]

    datasets = DataManager(datasets,
                           'ISMN',
                           period,
                           read_ts_names=read_ts_names)

    process = Validation(
        datasets,
        'ISMN',
        temporal_ref='ASCAT',
        scaling='lin_cdf_match',
        scaling_ref='ASCAT',
        metrics_calculators={
            (2, 2):
            metrics_calculators.RollingMetrics(
                other_name='k1',
                metadata_template=metadata_dict_template).calc_metrics
        },
        period=period)

    for job in jobs:
        results = process.calc(*job)
        netcdf_results_manager(results,
                               save_path,
                               ts_vars=['R', 'p_R', 'RMSD'])

    results_fname = os.path.join(save_path,
                                 'ASCAT.sm_with_ISMN.soil moisture.nc')

    vars_should = [
        u'gpi', u'lon', u'lat', u'R', u'p_R', u'time', u'idx', u'_row_size'
    ]

    for key, value in metadata_dict_template.items():
        vars_should.append(key)

    network_should = np.array([
        'MAQU', 'MAQU', 'SCAN', 'SCAN', 'SCAN', 'SOILSCAPE', 'SOILSCAPE',
        'SOILSCAPE'
    ],
                              dtype='U256')

    reader = PointDataResults(results_fname, read_only=True)
    df = reader.read_loc(None)
    nptest.assert_equal(sorted(network_should), sorted(df['network'].values))
    assert np.all(df.gpi.values == np.arange(8))
    assert (reader.read_ts(0).index.size == 357)
    assert np.all(
        reader.read_ts(1).columns.values == np.array(['R', 'p_R', 'RMSD']))

Ejemplo n.º 8

Mostrar archivo

def test_ascat_ismn_validation():
    """
    Test processing framework with some ISMN and ASCAT sample data
    """
    ascat_data_folder = os.path.join(os.path.dirname(__file__), '..',
                                     'test-data', 'sat', 'ascat', 'netcdf',
                                     '55R22')

    ascat_grid_folder = os.path.join(os.path.dirname(__file__), '..',
                                     'test-data', 'sat', 'ascat', 'netcdf',
                                     'grid')

    static_layers_folder = os.path.join(os.path.dirname(__file__), '..',
                                        'test-data', 'sat', 'h_saf',
                                        'static_layer')

    ascat_reader = AscatSsmCdr(ascat_data_folder,
                               ascat_grid_folder,
                               grid_filename='TUW_WARP5_grid_info_2_1.nc',
                               static_layer_path=static_layers_folder)
    ascat_reader.read_bulk = True

    # Initialize ISMN reader

    ismn_data_folder = os.path.join(os.path.dirname(__file__), '..',
                                    'test-data', 'ismn', 'multinetwork',
                                    'header_values')
    ismn_reader = ISMN_Interface(ismn_data_folder)

    jobs = []

    ids = ismn_reader.get_dataset_ids(variable='soil moisture',
                                      min_depth=0,
                                      max_depth=0.1)
    for idx in ids:
        metadata = ismn_reader.metadata[idx]
        jobs.append((idx, metadata['longitude'], metadata['latitude']))

    # Create the variable ***save_path*** which is a string representing the
    # path where the results will be saved. **DO NOT CHANGE** the name
    # ***save_path*** because it will be searched during the parallel
    # processing!

    save_path = tempfile.mkdtemp()

    # Create the validation object.

    datasets = {
        'ISMN': {
            'class': ismn_reader,
            'columns': ['soil moisture']
        },
        'ASCAT': {
            'class': ascat_reader,
            'columns': ['sm'],
            'kwargs': {
                'mask_frozen_prob': 80,
                'mask_snow_prob': 80,
                'mask_ssf': True
            }
        }
    }

    read_ts_names = {'ASCAT': 'read', 'ISMN': 'read_ts'}
    period = [datetime(2007, 1, 1), datetime(2014, 12, 31)]

    datasets = DataManager(datasets,
                           'ISMN',
                           period,
                           read_ts_names=read_ts_names)

    process = Validation(
        datasets,
        'ISMN',
        temporal_ref='ASCAT',
        scaling='lin_cdf_match',
        scaling_ref='ASCAT',
        metrics_calculators={
            (2, 2):
            metrics_calculators.BasicMetrics(other_name='k1').calc_metrics
        },
        period=period)

    for job in jobs:
        results = process.calc(*job)
        netcdf_results_manager(results, save_path)

    results_fname = os.path.join(save_path,
                                 'ASCAT.sm_with_ISMN.soil moisture.nc')

    vars_should = [
        u'n_obs', u'tau', u'gpi', u'RMSD', u'lon', u'p_tau', u'BIAS', u'p_rho',
        u'rho', u'lat', u'R', u'p_R', u'time', u'idx', u'_row_size'
    ]
    n_obs_should = [384, 357, 482, 141, 251, 1927, 1887, 1652]
    rho_should = np.array([
        0.70022893, 0.53934574, 0.69356072, 0.84189808, 0.74206454, 0.30299741,
        0.53143877, 0.62204134
    ],
                          dtype=np.float32)

    rmsd_should = np.array([
        7.72966719, 11.58347607, 14.57700157, 13.06224251, 12.90389824,
        14.24668026, 21.19682884, 17.3883934
    ],
                           dtype=np.float32)
    with nc.Dataset(results_fname, mode='r') as results:
        assert sorted(list(results.variables.keys())) == sorted(vars_should)
        assert sorted(
            results.variables['n_obs'][:].tolist()) == sorted(n_obs_should)
        nptest.assert_allclose(sorted(rho_should),
                               sorted(results.variables['rho'][:]),
                               rtol=1e-4)
        nptest.assert_allclose(sorted(rmsd_should),
                               sorted(results.variables['RMSD'][:]),
                               rtol=1e-4)

Ejemplo n.º 9

Mostrar archivo

class Test_ISMN_Interface_CeopUnzipped(unittest.TestCase):
    @classmethod
    def setUpClass(cls):
        super(Test_ISMN_Interface_CeopUnzipped, cls).setUpClass()

        testdata = os.path.join(testdata_root,
                                "Data_seperate_files_20170810_20180809")
        metadata_path = os.path.join(testdata, "python_metadata")

        cleanup(metadata_path)
        ds = ISMN_Interface(testdata, network=[], parallel=True)
        assert ds.networks == OrderedDict()
        cls.testdata = testdata

    def setUp(self) -> None:
        self.ds = ISMN_Interface(self.testdata, network=["COSMOS"])

    def tearDown(self) -> None:
        self.ds.close_files()
        logging.shutdown()

    def test_list(self):
        with pytest.deprecated_call():
            assert len(self.ds.list_networks()) == 1
            assert len(self.ds.list_stations()) == len(
                self.ds.list_stations("COSMOS")) == 2
            assert len(self.ds.list_sensors()) == 2
            assert len(self.ds.list_sensors(station="Barrow-ARM")) == 1

    def test_network_for_station(self):
        assert self.ds.network_for_station("Barrow-ARM") == "COSMOS"
        assert self.ds.network_for_station("ARM-1") == "COSMOS"

    def test_stations_that_measure(self):
        for s in self.ds.stations_that_measure("soil_moisture"):
            assert s.name in ["ARM-1", "Barrow-ARM"]

        for s in self.ds.stations_that_measure("nonexisting"):
            raise AssertionError("Found var that doesnt exist")

    def test_get_dataset_ids(self):
        ids = self.ds.get_dataset_ids("soil_moisture",
                                      max_depth=100,
                                      groupby="network")
        assert list(ids.keys()) == ["COSMOS"]
        assert ids["COSMOS"] == [0, 1]

        ids = self.ds.get_dataset_ids("soil_moisture", max_depth=0.19)
        assert ids == [0]

        ids = self.ds.get_dataset_ids(
            ["soil_moisture"],
            max_depth=99,
            filter_meta_dict={
                "lc_2010": 210,
                "network": "COSMOS",
                "station": "Barrow-ARM",
            },
        )
        assert ids == [1]

        ids = self.ds.get_dataset_ids("novar")
        assert len(ids) == 0

        ids = self.ds.get_dataset_ids(["soil_moisture", "shouldhavenoeffect"],
                                      0.0, 0.19)  # should get 1
        assert len(ids) == 1

        ids = self.ds.get_dataset_ids("soil_moisture", 0.0,
                                      1.0)  # should get 2
        assert len(ids) == 2

        ids = self.ds.get_dataset_ids("soil_moisture",
                                      0.0,
                                      1.0,
                                      filter_meta_dict={"lc_2010":
                                                        210})  # should get 1
        assert len(ids) == 1

        ids = self.ds.get_dataset_ids("nonexisting")  # should get 0
        assert len(ids) == 0

    def test_read_ts(self):
        data1 = self.ds.read(0)
        assert not data1.empty

        data2, meta = self.ds.read_ts(1, return_meta=True)
        assert not data2.empty

    def test_read_metadata(self):
        data2, meta = self.ds.read_ts(1, return_meta=True)
        assert all(meta == self.ds.read_metadata(1, format="pandas"))
        assert self.ds.read_metadata(1, format="dict") is not None
        assert self.ds.read_metadata([1], format="obj") is not None

        assert not self.ds.metadata.empty
        assert self.ds.metadata.loc[1]['station']['val'] \
               == self.ds.read_metadata([0,1]).loc[1, ('station', 'val')]

    def test_find_nearest_station(self):
        should_lon, should_lat = -156.62870, 71.32980

        station = self.ds.find_nearest_station(should_lon, should_lat)

        assert station.lon == should_lon
        assert station.lat == should_lat

    def test_plot_station_locations(self):
        with TemporaryDirectory() as out_dir:
            outpath = os.path.join(out_dir, "plot.png")
            self.ds.plot_station_locations(["soil_moisture", 'precipitation'],
                                           markersize=5,
                                           filename=outpath)
            assert len(os.listdir(out_dir)) == 1

    def test_get_min_max_obs_timestamps(self):
        tmin, tmax = self.ds.get_min_max_obs_timestamps("soil_moisture",
                                                        max_depth=0.19)
        assert tmin == datetime(2017, 8, 10, 0)
        assert tmax == datetime(2018, 8, 9, 23)

    def test_get_min_max_obs_timestamps_for_station(self):
        station = self.ds.collection.networks["COSMOS"].stations["ARM-1"]
        tmin, tmax = station.get_min_max_obs_timestamp("soil_moisture", 0,
                                                       0.19)
        assert tmin == datetime(2017, 8, 10, 0)
        assert tmax == datetime(2018, 8, 9, 23)

    def test_get_static_var_val(self):
        vals = self.ds.get_static_var_vals("soil_moisture", max_depth=0.19)
        assert vals == {130: "Grassland"}

        vals = self.ds.get_landcover_types("soil_moisture", max_depth=100)
        assert len(vals) == 2
        assert vals[130] == "Grassland"
        assert vals[210] == "Water"
        self.ds.print_landcover_dict()

        vals = self.ds.get_climate_types("soil_moisture",
                                         max_depth=100,
                                         climate="climate_KG")
        assert len(vals) == 2
        assert vals["ET"] == "Polar Tundra"
        assert vals["Cfa"] == "Temperate Without Dry Season, Hot Summer"
        self.ds.print_climate_dict()

    def test_get_var(self):
        vars = self.ds.get_variables()
        assert vars == ["soil_moisture"]

    def test_get_sensors(self):
        i = 0
        for nw, station in self.ds.collection.iter_stations(
                filter_meta_dict={"network": "COSMOS"}):
            for se in station.iter_sensors():
                data = se.read_data()
                # check if the networks is COSMOS or station in [ARM, Barrow-ARM]
                assert not data.empty
                # check something for that one station
                i += 1
        assert i == 2

        i = 0
        for se in self.ds.networks["COSMOS"].stations[
                "Barrow-ARM"].iter_sensors():
            data = se.read_data()
            assert not data.empty
            # check something for that one station
            i += 1
        assert i == 1

        i = 0
        for net, stat, sens in self.ds.collection.iter_sensors(
                depth=Depth(0, 1),
                filter_meta_dict={"station": ["Barrow-ARM", "ARM-1"]},
        ):
            data = sens.read_data()
            assert not data.empty
            i += 1
        assert i == 2

        for nw, station in self.ds.collection.iter_stations():
            for se in station.iter_sensors(variable="nonexisting"):
                raise ValueError("Found sensor, although none should exist")

    def test_get_nearest_station(self):
        should_lon, should_lat = -156.62870, 71.32980

        station, dist = self.ds.collection.get_nearest_station(
            should_lon, should_lat)
        assert dist == 0
        assert station.lon == should_lon
        assert station.lat == should_lat
        gpi, dist = self.ds.collection.grid.find_nearest_gpi(
            int(should_lon), int(should_lat))
        assert dist != 0
        for net in self.ds.collection.iter_networks():
            if station.name in net.stations.keys():
                assert net.stations[station.name].lon == should_lon
                assert net.stations[station.name].lat == should_lat

        station, dist = self.ds.find_nearest_station(0,
                                                     0,
                                                     return_distance=True,
                                                     max_dist=100)
        assert station == dist == None

    def test_citation(self):
        with TemporaryDirectory() as out_dir:
            out_file = os.path.join(out_dir, 'citation.txt')
            refs = self.ds.collection.export_citations(out_file=out_file)
            assert all([
                net in refs.keys()
                for net in list(self.ds.collection.networks.keys())
            ])
            assert os.path.exists(out_file)
            with open(out_file, mode='r') as f:
                lines = f.readlines()
                assert len(lines) > 0

Ejemplo n.º 10

Mostrar archivo

Archivo: test_interface.py Proyecto: lugeo17/ismn

class Test_ISMN_Interface_CeopUnzipped(unittest.TestCase):
    
    @classmethod
    def setUpClass(cls):
        super(Test_ISMN_Interface_CeopUnzipped, cls).setUpClass()

        testdata = os.path.join(testdata_root,
            'Data_seperate_files_20170810_20180809')
        metadata_path = os.path.join(testdata, 'python_metadata')

        cleanup(metadata_path)
        ds = ISMN_Interface(testdata, network=[])
        assert ds.networks == OrderedDict()
        cls.testdata = testdata

    def setUp(self) -> None:
        self.ds = ISMN_Interface(self.testdata, network=['COSMOS'])

    def tearDown(self) -> None:
        self.ds.close_files()
        logging.shutdown()
            
    def test_list(self):
        assert len(self.ds.list_networks()) == 1
        assert len(self.ds.list_stations()) == len(self.ds.list_stations('COSMOS')) == 2
        assert len(self.ds.list_sensors()) == 2
        assert len(self.ds.list_sensors(station='Barrow-ARM')) == 1

    def test_network_for_station(self):
        assert self.ds.network_for_station('Barrow-ARM') == 'COSMOS'
        assert self.ds.network_for_station('ARM-1') == 'COSMOS'

    def test_stations_that_measure(self):
        for s in self.ds.stations_that_measure('soil_moisture'):
            assert s.name in ['ARM-1', 'Barrow-ARM']

        for s in self.ds.stations_that_measure('nonexisting'):
            raise AssertionError("Found var that doesnt exist")

    def test_get_dataset_ids(self):
        ids = self.ds.get_dataset_ids('soil_moisture', max_depth=100, groupby='network')
        assert list(ids.keys()) == ['COSMOS']
        assert ids['COSMOS'] == [0,1]

        ids = self.ds.get_dataset_ids('soil_moisture', max_depth=0.19)
        assert ids == [0]

        ids = self.ds.get_dataset_ids('soil_moisture', max_depth=99,
                                      filter_meta_dict={'lc_2010': 210,
                                                        'network': 'COSMOS',
                                                        'station': 'Barrow-ARM'})
        assert ids == [1]

        ids = self.ds.get_dataset_ids('novar')
        assert len(ids) == 0

        ids = self.ds.get_dataset_ids('soil_moisture', 0., 0.19) # should get 1
        assert len(ids) == 1

        ids = self.ds.get_dataset_ids('soil_moisture', 0., 1.) # should get 2
        assert len(ids) == 2

        ids = self.ds.get_dataset_ids('soil_moisture', 0., 1.,
                                      filter_meta_dict={'lc_2010': 210}) # should get 1
        assert len(ids) == 1

        ids = self.ds.get_dataset_ids('nonexisting') # should get 0
        assert len(ids) == 0

    def test_read_ts(self):
        data1 = self.ds.read(0)
        assert not data1.empty

        data2 = self.ds.read_ts(1)
        assert not data2.empty

        assert len(data1.index) != len(data2.index) # make sure they are not same

    def test_find_nearest_station(self):
        should_lon, should_lat = -156.62870, 71.32980

        station = self.ds.find_nearest_station(should_lon, should_lat)

        assert station.lon == should_lon
        assert station.lat == should_lat

    def test_plot_station_locations(self):
        with TemporaryDirectory() as out_dir:
            outpath = os.path.join(out_dir, 'plot.png')
            self.ds.plot_station_locations('soil_moisture', markersize=5,
                                           filename=outpath)
            assert len(os.listdir(out_dir)) == 1

    def test_get_min_max_obs_timestamps(self):
        tmin, tmax = self.ds.get_min_max_obs_timestamps('soil_moisture', max_depth=0.19)
        assert tmin == datetime(2017, 8, 10, 0)
        assert tmax == datetime(2018, 8, 9, 23)

    def test_get_min_max_obs_timestamps_for_station(self):
        station = self.ds.collection.networks['COSMOS'].stations['ARM-1']
        tmin, tmax = station.get_min_max_obs_timestamp('soil_moisture', 0, 0.19)
        assert tmin == datetime(2017, 8, 10, 0)
        assert tmax == datetime(2018, 8, 9, 23)

    def test_get_static_var_val(self):
        vals = self.ds.get_static_var_vals('soil_moisture', max_depth=0.19)
        assert vals == {130: 'Grassland'}

        vals = self.ds.get_landcover_types('soil_moisture', max_depth=100)
        assert len(vals) == 2
        assert vals[130] == 'Grassland'
        assert vals[210] == 'Water'
        self.ds.print_landcover_dict()

        vals = self.ds.get_climate_types('soil_moisture', max_depth=100,
                                         climate='climate_KG')
        assert len(vals) == 2
        assert vals['ET'] == 'Polar Tundra'
        assert vals['Cfa'] == 'Temperate Without Dry Season, Hot Summer'
        self.ds.print_climate_dict()

    def test_get_var(self):
        vars = self.ds.get_variables()
        assert vars == ['soil_moisture']


    def test_get_sensors(self):
        i = 0
        for nw, station in self.ds.collection.iter_stations(
                filter_meta_dict={'network': 'COSMOS'}):
            for se in station.iter_sensors():
                data = se.read_data()
                # check if the networks is COSMOS or station in [ARM, Barrow-ARM]
                assert not data.empty
                # check something for that one station
                i += 1
        assert i == 2

        i = 0
        for se in self.ds.networks['COSMOS'].stations['Barrow-ARM'].iter_sensors():
            data = se.read_data()
            assert not data.empty
            # check something for that one station
            i += 1
        assert i == 1

        i = 0
        for net, stat, sens in self.ds.collection.iter_sensors(
                depth=Depth(0,1),
                filter_meta_dict={'station': ['Barrow-ARM', 'ARM-1']}):
            data = sens.read_data()
            assert not data.empty
            i +=1
        assert i == 2


        for nw, station in self.ds.collection.iter_stations():
            for se in station.iter_sensors(variable='nonexisting'):
                raise ValueError("Found sensor, although none should exist")

    def test_get_nearest_station(self):
        should_lon, should_lat = -156.62870, 71.32980

        station, dist = self.ds.collection.get_nearest_station(should_lon, should_lat)
        assert dist == 0
        assert station.lon == should_lon
        assert station.lat == should_lat
        gpi, dist = self.ds.collection.grid.find_nearest_gpi(int(should_lon),int(should_lat))
        assert dist != 0
        for net in self.ds.collection.iter_networks():
            if station.name in net.stations.keys():
                assert net.stations[station.name].lon == should_lon
                assert net.stations[station.name].lat == should_lat

        station, dist = self.ds.find_nearest_station(0, 0, return_distance=True,
                                                     max_dist=100)
        assert station == dist == None

Ejemplo n.º 11

Mostrar archivo

Archivo: validation_framework.py Proyecto: TUW-GEO/pytesmo

ismn_reader = ISMN_Interface(ismn_data_folder)


# The validation is run based on jobs. A job consists of at least three lists or numpy arrays specifing the grid
# point index, its latitude and longitude. In the case of the ISMN we can use the `dataset_ids` that identify every
# time series in the downloaded ISMN data as our grid point index. We can then get longitude and latitude from the
# metadata of the dataset.
# 
# **DO NOT CHANGE** the name ***jobs*** because it will be searched during the parallel processing!

# In[5]:


jobs = []

ids = ismn_reader.get_dataset_ids(variable='soil moisture', min_depth=0, max_depth=0.1)
for idx in ids:
    metadata = ismn_reader.metadata[idx]
    jobs.append((idx, metadata['longitude'], metadata['latitude']))

print("Jobs (gpi, lon, lat):")
print(jobs)


# For this small test dataset it is only one job
# 
# It is important here that the ISMN reader has a read_ts function that works by just using the `dataset_id`. In this
#  way the validation framework can go through the jobs and read the correct time series.

# In[6]:

Ejemplo n.º 12

Mostrar archivo

Archivo: test_validation.py Proyecto: TUW-GEO/pytesmo

def test_ascat_ismn_validation():
    """
    Test processing framework with some ISMN and ASCAT sample data
    """
    ascat_data_folder = os.path.join(os.path.dirname(__file__), '..', 'test-data',
                                     'sat', 'ascat', 'netcdf', '55R22')

    ascat_grid_folder = os.path.join(os.path.dirname(__file__), '..', 'test-data',
                                     'sat', 'ascat', 'netcdf', 'grid')

    static_layers_folder = os.path.join(os.path.dirname(__file__),
                                        '..', 'test-data', 'sat',
                                        'h_saf', 'static_layer')

    ascat_reader = AscatSsmCdr(ascat_data_folder, ascat_grid_folder,
                               grid_filename='TUW_WARP5_grid_info_2_1.nc',
                               static_layer_path=static_layers_folder)
    ascat_reader.read_bulk = True

    # Initialize ISMN reader

    ismn_data_folder = os.path.join(os.path.dirname(__file__), '..', 'test-data',
                                    'ismn', 'multinetwork', 'header_values')
    ismn_reader = ISMN_Interface(ismn_data_folder)

    jobs = []

    ids = ismn_reader.get_dataset_ids(
        variable='soil moisture',
        min_depth=0,
        max_depth=0.1)
    for idx in ids:
        metadata = ismn_reader.metadata[idx]
        jobs.append((idx, metadata['longitude'], metadata['latitude']))

    # Create the variable ***save_path*** which is a string representing the
    # path where the results will be saved. **DO NOT CHANGE** the name
    # ***save_path*** because it will be searched during the parallel
    # processing!

    save_path = tempfile.mkdtemp()

    # Create the validation object.

    datasets = {
        'ISMN': {
            'class': ismn_reader,
            'columns': ['soil moisture']
        },
        'ASCAT': {
            'class': ascat_reader,
            'columns': ['sm'],
            'kwargs': {'mask_frozen_prob': 80,
                       'mask_snow_prob': 80,
                       'mask_ssf': True}
        }}

    period = [datetime(2007, 1, 1), datetime(2014, 12, 31)]

    process = Validation(
        datasets, 'ISMN',
        temporal_ref='ASCAT',
        scaling='lin_cdf_match',
        scaling_ref='ASCAT',
        metrics_calculators={
            (2, 2): metrics_calculators.BasicMetrics(other_name='k1').calc_metrics},
        period=period)

    for job in jobs:
        results = process.calc(*job)
        netcdf_results_manager(results, save_path)

    results_fname = os.path.join(
        save_path, 'ASCAT.sm_with_ISMN.soil moisture.nc')

    vars_should = [u'n_obs', u'tau', u'gpi', u'RMSD', u'lon', u'p_tau',
                   u'BIAS', u'p_rho', u'rho', u'lat', u'R', u'p_R']
    n_obs_should = [384,  357,  482,  141,  251, 1927, 1887, 1652]
    rho_should = np.array([0.70022893, 0.53934574,
                           0.69356072, 0.84189808,
                           0.74206454, 0.30299741,
                           0.53143877, 0.62204134], dtype=np.float32)

    rmsd_should = np.array([7.72966719, 11.58347607,
                            14.57700157, 13.06224251,
                            12.90389824, 14.24668026,
                            21.19682884, 17.3883934], dtype=np.float32)
    with nc.Dataset(results_fname, mode='r') as results:
        assert sorted(results.variables.keys()) == sorted(vars_should)
        assert sorted(results.variables['n_obs'][:].tolist()) == sorted(
            n_obs_should)
        nptest.assert_allclose(sorted(rho_should),
                               sorted(results.variables['rho'][:]),
                               rtol=1e-4)
        nptest.assert_allclose(sorted(rmsd_should),
                               sorted(results.variables['RMSD'][:]),
                               rtol=1e-4)