def test_subset_collection_as_none(tmpdir): with pytest.raises(MissingParameterValue): subset( None, time=("2085-01-16", "2120-12-16"), output_dir=tmpdir, file_namer="simple", )
def test_subset_t_with_invalid_date(tmpdir): with pytest.raises(Exception) as exc: subset(CMIP5_IDS[1], time=("1985-01-16", "2002-12-16"), area=("0", "-10", "120", "40"), output_dir=tmpdir, file_namer="simple") assert ( exc.value == "No files found in given time range for " "cmip5.output1.MOHC.HadGEM2-ES.rcp85.mon.atmos.Amon.r1i1p1.latest.tas" )
def test_time_is_none(tmpdir, load_esgf_test_data): result = subset( CMIP5_IDS[1], time=None, area=("0", "-10", "120", "40"), output_dir=tmpdir, file_namer="simple", ) _check_output_nc(result) ds = xr.open_mfdataset( os.path.join( CONFIG["project:cmip5"]["base_dir"], "output1/MOHC/HadGEM2-ES/rcp85/mon/atmos/Amon/r1i1p1/latest/tas/*.nc", ), use_cftime=True, ) ds_subset = xr.open_dataset(result.file_uris[0], use_cftime=True) assert ds_subset.time.values.min().strftime( "%Y-%m-%d" ) == ds.time.values.min().strftime("%Y-%m-%d") assert ds_subset.time.values.max().strftime( "%Y-%m-%d" ) == ds.time.values.max().strftime("%Y-%m-%d")
def _handler(self, request, response): dataset_version = parse_wps_input(request.inputs, 'dataset_version', must_exist=True) variable = parse_wps_input(request.inputs, 'variable', must_exist=True) collection = f'{dataset_version}.{variable}' inputs = { "collection": collection, "time": parse_wps_input(request.inputs, 'time', default=None), "area": parse_wps_input(request.inputs, 'area', default=None), "apply_fixes": False, "output_dir": self.workdir, "file_namer": "simple", "output_type": "netcdf" } output_uris = subset(**inputs).file_uris ml4 = build_metalink("subset-cru_ts-result", "Subsetting result as NetCDF files.", self.workdir, output_uris) populate_response(response, "subset", self.workdir, inputs, collection, ml4) return response
def test_subset_t_y_x(tmpdir, load_esgf_test_data): fpath = ( f"{MINI_ESGF_MASTER_DIR}/" "test_data/badc/cmip5/data/cmip5/output1/MOHC/HadGEM2-ES/rcp85/mon/" "atmos/Amon/r1i1p1/latest/tas/*.nc" ) ds = xr.open_mfdataset( fpath, use_cftime=True, combine="by_coords", ) assert ds.tas.shape == (3530, 2, 2) result = subset( CMIP5_IDS[1], time=("2085-01-16", "2120-12-16"), area=(0, -10, 120, 40), output_dir=tmpdir, file_namer="simple", ) _check_output_nc(result) ds_subset = xr.open_dataset(result.file_uris[0], use_cftime=True) assert ds_subset.tas.shape == (433, 1, 1)
def test_subset_t(tmpdir): result = subset(CMIP5_IDS[1], time=("2085-01-16", "2120-12-16"), output_dir=tmpdir, file_namer="simple") _check_output_nc(result) ds = xr.open_dataset(result.file_paths[0], use_cftime=True) assert ds.time.shape == (433, )
def run_subset(args): # Convert file list to directory if required kwargs = deepcopy(args) kwargs['collection'] = resolve_collection_if_files(args.get("collection")) # TODO: handle lazy load of daops from daops.ops.subset import subset result = subset(**kwargs) return result.file_uris
def test_subset_zostoga_with_fix(tmpdir): result = subset(CMIP5_IDS[0], time=("2085-01-16", "2120-12-16"), output_dir=tmpdir, file_namer="simple") _check_output_nc(result) ds = xr.open_dataset(result.file_paths[0], use_cftime=True) assert ds.time.shape == (192, ) assert "lev" not in ds.dims
def test_subset_with_catalog_time_invariant(tmpdir, load_esgf_test_data): # c3s-cmip6 dataset so will use catalog in consolidate result = subset( f"c3s-cmip6.ScenarioMIP.MPI-M.MPI-ESM1-2-LR.ssp370.r1i1p1f1.fx.mrsofc.gn.v20190815", output_dir=tmpdir, output_type="nc", file_namer="standard", ) _check_output_nc(result, fname="mrsofc_fx_MPI-ESM1-2-LR_ssp370_r1i1p1f1_gn.nc")
def test_parameter_classes_as_args(tmpdir, load_esgf_test_data): collection = collection_parameter.CollectionParameter(CMIP5_IDS[1]) time = time_parameter.TimeParameter(("2085-01-16", "2120-12-16")) area = area_parameter.AreaParameter((0, -10, 120, 40)) result = subset( collection, time=time, area=area, output_dir=tmpdir, file_namer="simple" ) _check_output_nc(result) ds_subset = xr.open_dataset(result.file_uris[0], use_cftime=True) assert ds_subset.tas.shape == (433, 1, 1)
def test_subset_with_fix_and_multiple_ids(zostoga_id, tmpdir): result = subset(zostoga_id, time=("2008-01-16", "2028-12-16"), output_dir=tmpdir, file_namer="simple") _check_output_nc(result) ds = xr.open_dataset(result.file_paths[0], use_cftime=True) assert ds.time.shape in [(251, ), (252, )] assert "lev" not in ds.dims # checking that lev has been removed by fix ds.close()
def test_time_invariant_subset_standard_name(tmpdir, load_esgf_test_data): dset = "CMIP6.ScenarioMIP.IPSL.IPSL-CM6A-LR.ssp119.r1i1p1f1.fx.mrsofc.gr.v20190410" result = subset( dset, area=(5.0, 10.0, 300.0, 80.0), output_dir=tmpdir, output_type="nc", file_namer="standard", ) assert "mrsofc_fx_IPSL-CM6A-LR_ssp119_r1i1p1f1_gr.nc" in result.file_uris[0]
def call(self, args): # TODO: handle lazy load of daops from daops.ops.subset import subset kwargs = parameterise.parameterise(collection=args.get('collection'), time=args.get('time'), level=args.get('level'), area=args.get('area')) kwargs.update(self.config) kwargs['output_dir'] = tempfile.mkdtemp(dir=self.config['output_dir'], prefix='subset_') result = subset( **kwargs, ) return result.file_paths
def test_subset_with_catalog(tmpdir, load_esgf_test_data): # c3s-cmip6 dataset so will use catalog in consolidate result = subset( "c3s-cmip6.ScenarioMIP.INM.INM-CM5-0.ssp245.r1i1p1f1.Amon.rlds.gr1.v20190619", time=("2028-01-16", "2050-12-16"), output_dir=tmpdir, output_type="nc", file_namer="standard", ) _check_output_nc( result, fname="rlds_Amon_INM-CM5-0_ssp245_r1i1p1f1_gr1_20280116-20501116.nc" )
def test_subset_t_z_y_x(tmpdir, load_esgf_test_data): fpath = ( f"{MINI_ESGF_MASTER_DIR}/" "test_data/badc/cmip6/data/CMIP6/CMIP/NOAA-GFDL/" "GFDL-ESM4/historical/r1i1p1f1/Amon/o3/gr1/v20190726/" "o3_Amon_GFDL-ESM4_historical_r1i1p1f1_gr1_185001-194912.nc" ) ds = xr.open_mfdataset( fpath, use_cftime=True, combine="by_coords", ) assert ds.o3.shape == (1200, 19, 2, 3) assert list(ds.o3.coords["plev"].values) == [ 100000.0, 92500.0, 85000.0, 70000.0, 60000.0, 50000.0, 40000.0, 30000.0, 25000.0, 20000.0, 15000.0, 10000.0, 7000.0, 5000.0, 3000.0, 2000.0, 1000.0, 500.0, 100.0, ] result = subset( CMIP6_IDS[0], time=("1890-01-16", "1901-12-16"), area=(0, -10, 120, 40), level=(10000, 850.0), output_dir=tmpdir, file_namer="simple", ) _check_output_nc(result) ds_subset = xr.open_dataset(result.file_uris[0], use_cftime=True) assert ds_subset.o3.shape == (143, 6, 1, 1)
def test_subset_zostoga_with_apply_fixes_false(tmpdir, load_esgf_test_data): result = subset( CMIP5_IDS[0], time=("2085-01-16", "2120-12-16"), output_dir=tmpdir, file_namer="simple", apply_fixes=False, ) _check_output_nc(result) ds = xr.open_dataset(result.file_uris[0], use_cftime=True) assert ds.time.shape == (192,) # lev should still be in ds.dims because fix hasn't been applied assert "lev" in ds.dims
def test_subset_t_y_x(tmpdir): ds = xr.open_mfdataset( f"tests/mini-esgf-data/test_data/badc/cmip5/data/cmip5/output1/MOHC/HadGEM2-ES/rcp85/mon/" f"atmos/Amon/r1i1p1/latest/tas/*.nc", use_cftime=True, combine='by_coords') assert ds.tas.shape == (3530, 2, 2) result = subset(CMIP5_IDS[1], time=("2085-01-16", "2120-12-16"), area=(0, -10, 120, 40), output_dir=tmpdir, file_namer="simple") _check_output_nc(result) ds_subset = xr.open_dataset(result.file_paths[0], use_cftime=True) assert ds_subset.tas.shape == (433, 1, 1)
def test_end_time_is_none(tmpdir): result = subset(CMIP5_IDS[2], time="1940-10-14/", area=("0", "-10", "120", "40"), output_dir=tmpdir, file_namer="simple") _check_output_nc(result) ds = xr.open_mfdataset(os.path.join( CONFIG["project:cmip5"]["base_dir"], "cmip5/output1/MOHC/HadGEM2-ES/historical/mon/land/Lmon/r1i1p1/latest/rh/*.nc" ), use_cftime=True) ds_subset = xr.open_dataset(result.file_paths[0], use_cftime=True) assert ds_subset.time.values.min().strftime("%Y-%m-%d") == '1940-10-16' assert ds_subset.time.values.max().strftime( "%Y-%m-%d") == ds.time.values.max().strftime("%Y-%m-%d")
def test_subset_with_file_mapper(tmpdir, load_esgf_test_data): file_paths = [ f"{MINI_ESGF_MASTER_DIR}/test_data/badc/cmip5/data/cmip5/output1/MOHC/HadGEM2-ES" f"/rcp85/mon/atmos/Amon/r1i1p1/latest/tas/tas_Amon_HadGEM2-ES_rcp85_r1i1p1_200512-203011.nc", f"{MINI_ESGF_MASTER_DIR}/test_data/badc/cmip5/data/cmip5/output1/MOHC/HadGEM2-ES" f"/rcp85/mon/atmos/Amon/r1i1p1/latest/tas/tas_Amon_HadGEM2-ES_rcp85_r1i1p1_203012-205511.nc", ] dset = FileMapper(file_paths) result = subset( dset, time=("2008-01-16", "2028-12-16"), output_dir=tmpdir, output_type="nc", file_namer="standard", ) assert "tas_mon_HadGEM2-ES_rcp85_r1i1p1_20080116-20281216.nc" in result.file_uris[0]
def test_subset_t_z_y_x(tmpdir): ds = xr.open_mfdataset( "tests/mini-esgf-data/test_data/badc/cmip6/data/CMIP6/CMIP/NOAA-GFDL/" "GFDL-ESM4/historical/r1i1p1f1/Amon/o3/gr1/v20190726/" "o3_Amon_GFDL-ESM4_historical_r1i1p1f1_gr1_185001-194912.nc", use_cftime=True, combine='by_coords') assert ds.o3.shape == (1200, 19, 2, 3) assert list(ds.o3.coords['plev'].values) == [ 100000., 92500., 85000., 70000., 60000., 50000., 40000., 30000., 25000., 20000., 15000., 10000., 7000., 5000., 3000., 2000., 1000., 500., 100. ] result = subset(CMIP6_IDS[0], time=("1890-01-16", "1901-12-16"), area=(0, -10, 120, 40), level=(10000, 850.0), output_dir=tmpdir, file_namer="simple") _check_output_nc(result) ds_subset = xr.open_dataset(result.file_paths[0], use_cftime=True) assert ds_subset.o3.shape == (143, 6, 1, 1)
def run_subset(args): # TODO: handle lazy load of daops from daops.ops.subset import subset result = subset(**args) return result.file_uris
def test_subset_no_collection(tmpdir): with pytest.raises(TypeError): subset( time=("2085-01-16", "2120-12-16"), output_dir=tmpdir, file_namer="simple" )