ismn_reader = ISMN_Interface(ismn_data_folder) # The validation is run based on jobs. A job consists of at least three lists or numpy arrays specifing the grid # point index, its latitude and longitude. In the case of the ISMN we can use the `dataset_ids` that identify every # time series in the downloaded ISMN data as our grid point index. We can then get longitude and latitude from the # metadata of the dataset. # # **DO NOT CHANGE** the name ***jobs*** because it will be searched during the parallel processing! # In[5]: jobs = [] ids = ismn_reader.get_dataset_ids(variable='soil moisture', min_depth=0, max_depth=0.1) for idx in ids: metadata = ismn_reader.metadata[idx] jobs.append((idx, metadata['longitude'], metadata['latitude'])) print("Jobs (gpi, lon, lat):") print(jobs) # For this small test dataset it is only one job # # It is important here that the ISMN reader has a read_ts function that works by just using the `dataset_id`. In this # way the validation framework can go through the jobs and read the correct time series. # In[6]:
def test_ascat_ismn_validation(): """ Test processing framework with some ISMN and ASCAT sample data """ ascat_data_folder = os.path.join(os.path.dirname(__file__), '..', 'test-data', 'sat', 'ascat', 'netcdf', '55R22') ascat_grid_folder = os.path.join(os.path.dirname(__file__), '..', 'test-data', 'sat', 'ascat', 'netcdf', 'grid') static_layers_folder = os.path.join(os.path.dirname(__file__), '..', 'test-data', 'sat', 'h_saf', 'static_layer') ascat_reader = AscatSsmCdr(ascat_data_folder, ascat_grid_folder, static_layer_path=static_layers_folder) ascat_reader.read_bulk = True # Initialize ISMN reader ismn_data_folder = os.path.join(os.path.dirname(__file__), '..', 'test-data', 'ismn', 'multinetwork', 'header_values') ismn_reader = ISMN_Interface(ismn_data_folder) jobs = [] ids = ismn_reader.get_dataset_ids( variable='soil moisture', min_depth=0, max_depth=0.1) for idx in ids: metadata = ismn_reader.metadata[idx] jobs.append((idx, metadata['longitude'], metadata['latitude'])) # Create the variable ***save_path*** which is a string representing the # path where the results will be saved. **DO NOT CHANGE** the name # ***save_path*** because it will be searched during the parallel # processing! save_path = tempfile.mkdtemp() # Create the validation object. datasets = { 'ISMN': { 'class': ismn_reader, 'columns': ['soil moisture'] }, 'ASCAT': { 'class': ascat_reader, 'columns': ['sm'], 'kwargs': {'mask_frozen_prob': 80, 'mask_snow_prob': 80, 'mask_ssf': True} }} period = [datetime(2007, 1, 1), datetime(2014, 12, 31)] process = Validation( datasets, 'ISMN', temporal_ref='ASCAT', scaling='lin_cdf_match', scaling_ref='ASCAT', metrics_calculators={ (2, 2): metrics_calculators.BasicMetrics(other_name='k1').calc_metrics}, period=period) for job in jobs: results = process.calc(*job) netcdf_results_manager(results, save_path) results_fname = os.path.join( save_path, 'ASCAT.sm_with_ISMN.soil moisture.nc') vars_should = [u'n_obs', u'tau', u'gpi', u'RMSD', u'lon', u'p_tau', u'BIAS', u'p_rho', u'rho', u'lat', u'R', u'p_R'] n_obs_should = [384, 357, 482, 141, 251, 1927, 1887, 1652] rho_should = np.array([0.70022893, 0.53934574, 0.69356072, 0.84189808, 0.74206454, 0.30299741, 0.53143877, 0.62204134], dtype=np.float32) rmsd_should = np.array([7.72966719, 11.58347607, 14.57700157, 13.06224251, 12.90389824, 14.24668026, 21.19682884, 17.3883934], dtype=np.float32) with nc.Dataset(results_fname) as results: assert sorted(results.variables.keys()) == sorted(vars_should) assert sorted(results.variables['n_obs'][:].tolist()) == sorted( n_obs_should) nptest.assert_allclose(sorted(rho_should), sorted(results.variables['rho'][:]), rtol=1e-4) nptest.assert_allclose(sorted(rmsd_should), sorted(results.variables['RMSD'][:]), rtol=1e-4)
def test_ascat_ismn_validation(): """ Test processing framework with some ISMN and ASCAT sample data """ ascat_data_folder = os.path.join(os.path.dirname(__file__), '..', 'test-data', 'sat', 'ascat', 'netcdf', '55R22') ascat_grid_folder = os.path.join(os.path.dirname(__file__), '..', 'test-data', 'sat', 'ascat', 'netcdf', 'grid') ascat_reader = AscatH25_SSM(ascat_data_folder, ascat_grid_folder) ascat_reader.read_bulk = True ascat_reader._load_grid_info() # Initialize ISMN reader ismn_data_folder = os.path.join(os.path.dirname(__file__), '..', 'test-data', 'ismn', 'multinetwork', 'header_values') ismn_reader = ISMN_Interface(ismn_data_folder) jobs = [] ids = ismn_reader.get_dataset_ids(variable='soil moisture', min_depth=0, max_depth=0.1) for idx in ids: metadata = ismn_reader.metadata[idx] jobs.append((idx, metadata['longitude'], metadata['latitude'])) # Create the variable ***save_path*** which is a string representing the # path where the results will be saved. **DO NOT CHANGE** the name # ***save_path*** because it will be searched during the parallel # processing! save_path = tempfile.mkdtemp() # Create the validation object. datasets = { 'ISMN': { 'class': ismn_reader, 'columns': ['soil moisture'] }, 'ASCAT': { 'class': ascat_reader, 'columns': ['sm'], 'kwargs': { 'mask_frozen_prob': 80, 'mask_snow_prob': 80, 'mask_ssf': True } } } period = [datetime(2007, 1, 1), datetime(2014, 12, 31)] process = Validation( datasets, 'ISMN', temporal_ref='ASCAT', scaling='lin_cdf_match', scaling_ref='ASCAT', metrics_calculators={ (2, 2): metrics_calculators.BasicMetrics(other_name='k1').calc_metrics }, period=period) for job in jobs: results = process.calc(*job) netcdf_results_manager(results, save_path) results_fname = os.path.join(save_path, 'ASCAT.sm_with_ISMN.soil moisture.nc') vars_should = [ u'n_obs', u'tau', u'gpi', u'RMSD', u'lon', u'p_tau', u'BIAS', u'p_rho', u'rho', u'lat', u'R', u'p_R' ] n_obs_should = [360, 385, 1644, 1881, 1927, 479, 140, 251] rho_should = np.array([ 0.546187, 0.717398, 0.620892, 0.532465, 0.302997, 0.694713, 0.840592, 0.742065 ], dtype=np.float32) rmsd_should = np.array([ 11.536263, 7.545650, 17.451935, 21.193714, 14.246680, 14.494674, 13.173215, 12.903898 ], dtype=np.float32) with nc.Dataset(results_fname) as results: assert sorted(results.variables.keys()) == sorted(vars_should) assert sorted( results.variables['n_obs'][:].tolist()) == sorted(n_obs_should) nptest.assert_allclose(sorted(rho_should), sorted(results.variables['rho'][:]), rtol=1e-4) nptest.assert_allclose(sorted(rmsd_should), sorted(results.variables['RMSD'][:]), rtol=1e-4)
# In[3]: ismn_data_folder = '/data/Development/python/workspace/pytesmo/tests/test-data/ismn/format_header_values/' ismn_reader = ISMN_Interface(ismn_data_folder) # The validation is run based on jobs. A job consists of at least three lists or numpy arrays specifing the grid point index, its latitude and longitude. In the case of the ISMN we can use the `dataset_ids` that identify every time series in the downloaded ISMN data as our grid point index. We can then get longitude and latitude from the metadata of the dataset. # # **DO NOT CHANGE** the name ***jobs*** because it will be searched during the parallel processing! # In[4]: jobs = [] ids = ismn_reader.get_dataset_ids(variable='soil moisture', min_depth=0, max_depth=0.1) for idx in ids: metadata = ismn_reader.metadata[idx] jobs.append((idx, metadata['longitude'], metadata['latitude'])) print jobs # For this small test dataset it is only one job # # It is important here that the ISMN reader has a read_ts function that works by just using the `dataset_id`. In this way the validation framework can go through the jobs and read the correct time series. # In[5]: data = ismn_reader.read_ts(ids[0]) print data.head()
def test_ascat_ismn_validation(): """ Test processing framework with some ISMN and ASCAT sample data """ ascat_data_folder = os.path.join(os.path.dirname(__file__), 'test-data', 'sat', 'ascat', 'netcdf', '55R22') ascat_grid_folder = os.path.join(os.path.dirname(__file__), 'test-data', 'sat', 'ascat', 'netcdf', 'grid') ascat_reader = AscatH25_SSM(ascat_data_folder, ascat_grid_folder) ascat_reader.read_bulk = True ascat_reader._load_grid_info() # Initialize ISMN reader ismn_data_folder = os.path.join(os.path.dirname(__file__), 'test-data', 'ismn', 'multinetwork', 'header_values') ismn_reader = ISMN_Interface(ismn_data_folder) jobs = [] ids = ismn_reader.get_dataset_ids( variable='soil moisture', min_depth=0, max_depth=0.1) for idx in ids: metadata = ismn_reader.metadata[idx] jobs.append((idx, metadata['longitude'], metadata['latitude'])) # Create the variable ***save_path*** which is a string representing the # path where the results will be saved. **DO NOT CHANGE** the name # ***save_path*** because it will be searched during the parallel # processing! save_path = tempfile.mkdtemp() # Create the validation object. datasets = { 'ISMN': { 'class': ismn_reader, 'columns': [ 'soil moisture' ], 'type': 'reference', 'args': [], 'kwargs': {} }, 'ASCAT': { 'class': ascat_reader, 'columns': [ 'sm' ], 'type': 'other', 'args': [], 'kwargs': {}, 'grids_compatible': False, 'use_lut': False, 'lut_max_dist': 30000 } } period = [datetime(2007, 1, 1), datetime(2014, 12, 31)] process = Validation( datasets=datasets, data_prep=DataPreparation(), temporal_matcher=temporal_matchers.BasicTemporalMatching( window=1 / 24.0, reverse=True), scaling='lin_cdf_match', scale_to_other=True, metrics_calculator=metrics_calculators.BasicMetrics(), period=period, cell_based_jobs=False) for job in jobs: results = process.calc(job) netcdf_results_manager(results, save_path) results_fname = os.path.join( save_path, 'ISMN.soil moisture_with_ASCAT.sm.nc') vars_should = [u'n_obs', u'tau', u'gpi', u'RMSD', u'lon', u'p_tau', u'BIAS', u'p_rho', u'rho', u'lat', u'R', u'p_R'] n_obs_should = [360, 385, 1644, 1881, 1927, 479, 140, 251] rho_should = np.array([0.54618734, 0.71739876, 0.62089276, 0.53246528, 0.30299741, 0.69647062, 0.840593, 0.73913699], dtype=np.float32) rmsd_should = np.array([11.53626347, 7.54565048, 17.45193481, 21.19371414, 14.24668026, 14.27493, 13.173215, 12.59192371], dtype=np.float32) with nc.Dataset(results_fname) as results: assert sorted(results.variables.keys()) == sorted(vars_should) assert sorted(results.variables['n_obs'][:].tolist()) == sorted( n_obs_should) nptest.assert_allclose(sorted(rho_should), sorted(results.variables['rho'][:])) nptest.assert_allclose(sorted(rmsd_should), sorted(results.variables['RMSD'][:]))
def test_ascat_ismn_validation(): """ Test processing framework with some ISMN and ASCAT sample data """ ascat_data_folder = os.path.join(os.path.dirname(__file__), '..', 'test-data', 'sat', 'ascat', 'netcdf', '55R22') ascat_grid_folder = os.path.join(os.path.dirname(__file__), '..', 'test-data', 'sat', 'ascat', 'netcdf', 'grid') ascat_reader = AscatH25_SSM(ascat_data_folder, ascat_grid_folder) ascat_reader.read_bulk = True ascat_reader._load_grid_info() # Initialize ISMN reader ismn_data_folder = os.path.join(os.path.dirname(__file__), '..', 'test-data', 'ismn', 'multinetwork', 'header_values') ismn_reader = ISMN_Interface(ismn_data_folder) jobs = [] ids = ismn_reader.get_dataset_ids( variable='soil moisture', min_depth=0, max_depth=0.1) for idx in ids: metadata = ismn_reader.metadata[idx] jobs.append((idx, metadata['longitude'], metadata['latitude'])) # Create the variable ***save_path*** which is a string representing the # path where the results will be saved. **DO NOT CHANGE** the name # ***save_path*** because it will be searched during the parallel # processing! save_path = tempfile.mkdtemp() # Create the validation object. datasets = { 'ISMN': { 'class': ismn_reader, 'columns': ['soil moisture'] }, 'ASCAT': { 'class': ascat_reader, 'columns': ['sm'], 'kwargs': {'mask_frozen_prob': 80, 'mask_snow_prob': 80, 'mask_ssf': True} }} period = [datetime(2007, 1, 1), datetime(2014, 12, 31)] process = Validation( datasets, 'ISMN', temporal_ref='ASCAT', scaling='lin_cdf_match', scaling_ref='ASCAT', metrics_calculators={ (2, 2): metrics_calculators.BasicMetrics(other_name='k1').calc_metrics}, period=period) for job in jobs: results = process.calc(*job) netcdf_results_manager(results, save_path) results_fname = os.path.join( save_path, 'ASCAT.sm_with_ISMN.soil moisture.nc') vars_should = [u'n_obs', u'tau', u'gpi', u'RMSD', u'lon', u'p_tau', u'BIAS', u'p_rho', u'rho', u'lat', u'R', u'p_R'] n_obs_should = [360, 385, 1644, 1881, 1927, 479, 140, 251] rho_should = np.array([0.546187, 0.717398, 0.620892, 0.532465, 0.302997, 0.694713, 0.840592, 0.742065], dtype=np.float32) rmsd_should = np.array([11.536263, 7.545650, 17.451935, 21.193714, 14.246680, 14.494674, 13.173215, 12.903898], dtype=np.float32) with nc.Dataset(results_fname) as results: assert sorted(results.variables.keys()) == sorted(vars_should) assert sorted(results.variables['n_obs'][:].tolist()) == sorted( n_obs_should) nptest.assert_allclose(sorted(rho_should), sorted(results.variables['rho'][:]), rtol=1e-4) nptest.assert_allclose(sorted(rmsd_should), sorted(results.variables['RMSD'][:]), rtol=1e-4)