def _draw_time_series_plot(evaluation, plot_config): """""" time_range_info = plot_config['time_range'] ref_ds = evaluation.ref_dataset target_ds = evaluation.target_datasets if time_range_info == 'monthly': ref_ds.values, ref_ds.times = utils.calc_climatology_monthly(ref_ds) for t in target_ds: t.values, t.times = utils.calc_climatology_monthly(t) else: logger.error('Invalid time range provided. Only monthly is supported ' 'at the moment') return if evaluation.subregions: for bound_count, bound in enumerate(evaluation.subregions): results = [] labels = [] subset = dsp.subset(bound, ref_ds, subregion_name="R{}_{}".format( bound_count, ref_ds.name)) results.append(utils.calc_time_series(subset)) labels.append(subset.name) for t in target_ds: subset = dsp.subset(bound, t, subregion_name="R{}_{}".format( bound_count, t.name)) results.append(utils.calc_time_series(subset)) labels.append(subset.name) plots.draw_time_series(np.array(results), ref_ds.times, labels, 'R{}'.format(bound_count), **plot_config.get('optional_args', {})) else: results = [] labels = [] results.append(utils.calc_time_series(ref_ds)) labels.append(ref_ds.name) for t in target_ds: results.append(utils.calc_time_series(t)) labels.append(t.name) plots.draw_time_series(np.array(results), ref_ds.times, labels, 'time_series', **plot_config.get('optional_args', {}))
def _run_subregion_evaluation(self): results = [] for target in self.target_datasets: results.append([]) for metric in self.metrics: results[-1].append([]) for subregion in self.subregions: # Subset the reference and target dataset with the # subregion information. new_ref = DSP.subset(subregion, self.ref_dataset) new_tar = DSP.subset(subregion, target) run_result = metric.run(new_ref, new_tar) results[-1][-1].append(run_result) return results
def test_subset(self): subset = dp.subset(self.target_dataset, self.subregion) self.assertEqual(subset.lats.shape[0], 82) self.assertSequenceEqual(list(np.array(range(-81, 82, 2))), list(subset.lats)) self.assertEqual(subset.lons.shape[0], 162) self.assertEqual(subset.times.shape[0], 37) self.assertEqual(subset.values.shape, (37, 82, 162))
def test_subset(self): subset = dp.subset(self.subregion, self.target_dataset) self.assertEqual(subset.lats.shape[0], 82) self.assertSequenceEqual(list(np.array(range(-81, 82, 2))), list(subset.lats)) self.assertEqual(subset.lons.shape[0], 162) self.assertEqual(subset.times.shape[0], 37) self.assertEqual(subset.values.shape, (37, 82, 162))
def _run_subregion_evaluation(self): results = [] new_refs = [DSP.subset(s, self.ref_dataset) for s in self.subregions] for target in self.target_datasets: results.append([]) new_targets = [DSP.subset(s, target) for s in self.subregions] for metric in self.metrics: results[-1].append([]) for i in range(len(self.subregions)): new_ref = new_refs[i] new_tar = new_targets[i] run_result = metric.run(new_ref, new_tar) results[-1][-1].append(run_result) return convert_evaluation_result(results, subregion=True)
def _run_subregion_unary_evaluation(self): unary_results = [] if self.ref_dataset: new_refs = [DSP.subset(s, self.ref_dataset) for s in self.subregions] new_targets = [ [DSP.subset(s, t) for s in self.subregions] for t in self.target_datasets ] for metric in self.unary_metrics: unary_results.append([]) for i in range(len(self.subregions)): unary_results[-1].append([]) if self.ref_dataset: unary_results[-1][-1].append(metric.run(new_refs[i])) for t in range(len(self.target_datasets)): unary_results[-1][-1].append(metric.run(new_targets[t][i])) return convert_unary_evaluation_result(unary_results, subregion = True)
def test_subset_without_start_index(self): self.subregion = ds.Bounds( lat_min=-81, lat_max=81, lon_min=-161, lon_max=161, ) subset = dp.subset(self.target_dataset, self.subregion) times = np.array([datetime.datetime(year, month, 1) for year in range(2000, 2010) for month in range(1, 13)]) self.assertEqual(subset.lats.shape[0], 82) self.assertSequenceEqual(list(np.array(range(-81, 82, 2))), list(subset.lats)) self.assertEqual(subset.lons.shape[0], 162) self.assertEqual(subset.values.shape, (120, 82, 162)) self.assertEqual(subset.times.shape[0], 120) np.testing.assert_array_equal(subset.times, times)
def test_subset_without_start_index(self): self.subregion = ds.Bounds( -81, 81, -161, 161, ) subset = dp.subset(self.target_dataset, self.subregion) times = np.array([datetime.datetime(year, month, 1) for year in range(2000, 2010) for month in range(1, 13)]) self.assertEqual(subset.lats.shape[0], 82) self.assertSequenceEqual(list(np.array(range(-81, 82, 2))), list(subset.lats)) self.assertEqual(subset.lons.shape[0], 162) self.assertEqual(subset.values.shape, (120, 82, 162)) self.assertEqual(subset.times.shape[0], 120) np.testing.assert_array_equal(subset.times, times)
if time_info['maximum_overlap_period']: start_time, end_time = utils.get_temporal_overlap([ref_dataset]+model_datasets) print 'Maximum overlap period' print 'start_time:', start_time print 'end_time:', end_time if temporal_resolution == 'monthly' and end_time.day !=1: end_time = end_time.replace(day=1) if ref_data_info['data_source'] == 'rcmed': min_lat = np.max([min_lat, ref_dataset.lats.min()]) max_lat = np.min([max_lat, ref_dataset.lats.max()]) min_lon = np.max([min_lon, ref_dataset.lons.min()]) max_lon = np.min([max_lon, ref_dataset.lons.max()]) bounds = Bounds(min_lat, max_lat, min_lon, max_lon, start_time, end_time) ref_dataset = dsp.subset(bounds,ref_dataset) if ref_dataset.temporal_resolution() != temporal_resolution: ref_dataset = dsp.temporal_rebin(ref_dataset, temporal_resolution) for idata,dataset in enumerate(model_datasets): model_datasets[idata] = dsp.subset(bounds,dataset) if dataset.temporal_resolution() != temporal_resolution: model_datasets[idata] = dsp.temporal_rebin(dataset, temporal_resolution) # Temporaly subset both observation and model datasets for the user specified season month_start = time_info['month_start'] month_end = time_info['month_end'] average_each_year = time_info['average_each_year'] ref_dataset = dsp.temporal_subset(month_start, month_end,ref_dataset,average_each_year) for idata,dataset in enumerate(model_datasets): model_datasets[idata] = dsp.temporal_subset(month_start, month_end,dataset,average_each_year)
# https://rcmes.jpl.nasa.gov/content/data-rcmes-database CRU31 = rcmed.parameter_dataset(10, 37, LAT_MIN, LAT_MAX, LON_MIN, LON_MAX, START, END) """ Step 3: Resample Datasets so they are the same shape """ print("Resampling datasets ...") print("... on units") CRU31 = dsp.water_flux_unit_conversion(CRU31) print("... temporal") CRU31 = dsp.temporal_rebin(CRU31, temporal_resolution='monthly') for member, each_target_dataset in enumerate(target_datasets): target_datasets[member] = dsp.water_flux_unit_conversion( target_datasets[member]) target_datasets[member] = dsp.temporal_rebin(target_datasets[member], temporal_resolution='monthly') target_datasets[member] = dsp.subset(EVAL_BOUNDS, target_datasets[member]) #Regrid print("... regrid") new_lats = np.arange(LAT_MIN, LAT_MAX, gridLatStep) new_lons = np.arange(LON_MIN, LON_MAX, gridLonStep) CRU31 = dsp.spatial_regrid(CRU31, new_lats, new_lons) for member, each_target_dataset in enumerate(target_datasets): target_datasets[member] = dsp.spatial_regrid(target_datasets[member], new_lats, new_lons) #find the mean values #way to get the mean. Note the function exists in util.py as def calc_climatology_year(dataset): CRU31.values = utils.calc_temporal_mean(CRU31)
""" Step 2: Fetch an OCW Dataset Object from the data_source.rcmed module """ print("Working with the rcmed interface to get CRU3.1 Daily Precipitation") # the dataset_id and the parameter id were determined from # https://rcmes.jpl.nasa.gov/content/data-rcmes-database CRU31 = rcmed.parameter_dataset( 10, 37, LAT_MIN, LAT_MAX, LON_MIN, LON_MAX, START, END) """ Step 3: Processing datasets so they are the same shape ... """ print("Processing datasets so they are the same shape") CRU31 = dsp.water_flux_unit_conversion(CRU31) CRU31 = dsp.normalize_dataset_datetimes(CRU31, 'monthly') for member, each_target_dataset in enumerate(target_datasets): target_datasets[member] = dsp.subset(target_datasets[member], EVAL_BOUNDS) target_datasets[member] = dsp.water_flux_unit_conversion(target_datasets[ member]) target_datasets[member] = dsp.normalize_dataset_datetimes( target_datasets[member], 'monthly') print("... spatial regridding") new_lats = np.arange(LAT_MIN, LAT_MAX, gridLatStep) new_lons = np.arange(LON_MIN, LON_MAX, gridLonStep) CRU31 = dsp.spatial_regrid(CRU31, new_lats, new_lons) for member, each_target_dataset in enumerate(target_datasets): target_datasets[member] = dsp.spatial_regrid( target_datasets[member], new_lats, new_lons)
def test_out_of_dataset_bounds_start(self): self.subregion.start = datetime.datetime(1999, 1, 1) with self.assertRaises(ValueError): dp.subset(self.subregion, self.target_dataset)
def _draw_time_series_plot(evaluation, plot_config): """""" time_range_info = plot_config['time_range'] ref_ds = evaluation.ref_dataset target_ds = evaluation.target_datasets if time_range_info == 'monthly': ref_ds.values, ref_ds.times = utils.calc_climatology_monthly(ref_ds) for t in target_ds: t.values, t.times = utils.calc_climatology_monthly(t) else: logger.error( 'Invalid time range provided. Only monthly is supported ' 'at the moment' ) return if evaluation.subregions: for bound_count, bound in enumerate(evaluation.subregions): results = [] labels = [] subset = dsp.subset( bound, ref_ds, subregion_name="R{}_{}".format(bound_count, ref_ds.name) ) results.append(utils.calc_time_series(subset)) labels.append(subset.name) for t in target_ds: subset = dsp.subset( bound, t, subregion_name="R{}_{}".format(bound_count, t.name) ) results.append(utils.calc_time_series(subset)) labels.append(subset.name) plots.draw_time_series(np.array(results), ref_ds.times, labels, 'R{}'.format(bound_count), **plot_config.get('optional_args', {})) else: results = [] labels = [] results.append(utils.calc_time_series(ref_ds)) labels.append(ref_ds.name) for t in target_ds: results.append(utils.calc_time_series(t)) labels.append(t.name) plots.draw_time_series(np.array(results), ref_ds.times, labels, 'time_series', **plot_config.get('optional_args', {}))
def test_out_of_dataset_bounds_lat_min(self): self.subregion.lat_min = -90 with self.assertRaises(ValueError): dp.subset(self.target_dataset, self.subregion)
LON_MAX = 55 START = datetime.datetime(1999, 1, 1) END = datetime.datetime(2000, 12, 1) SEASON_MONTH_START = 1 SEASON_MONTH_END = 12 EVAL_BOUNDS = Bounds(LAT_MIN, LAT_MAX, LON_MIN, LON_MAX, START, END) # Normalize the time values of our datasets so they fall on expected days # of the month. For example, monthly data will be normalized so that: # 15 Jan 2014, 15 Feb 2014 => 1 Jan 2014, 1 Feb 2014 ref_dataset = dsp.normalize_dataset_datetimes(ref_dataset, "monthly") target_dataset = dsp.normalize_dataset_datetimes(target_dataset, "monthly") # Subset down the evaluation datasets to our selected evaluation bounds. target_dataset = dsp.subset(EVAL_BOUNDS, target_dataset) ref_dataset = dsp.subset(EVAL_BOUNDS, ref_dataset) # Do a monthly temporal rebin of the evaluation datasets. target_dataset = dsp.temporal_rebin(target_dataset, datetime.timedelta(days=30)) ref_dataset = dsp.temporal_rebin(ref_dataset, datetime.timedelta(days=30)) # Spatially regrid onto a 1 degree lat/lon grid within our evaluation bounds. new_lats = np.arange(LAT_MIN, LAT_MAX, 1.0) new_lons = np.arange(LON_MIN, LON_MAX, 1.0) target_dataset = dsp.spatial_regrid(target_dataset, new_lats, new_lons) ref_dataset = dsp.spatial_regrid(ref_dataset, new_lats, new_lons) # Load the datasets for the evaluation. mean_bias = metrics.MeanBias() # These versions of the metrics require seasonal bounds prior to running
if time_info['maximum_overlap_period']: start_time, end_time = utils.get_temporal_overlap([ref_dataset]+model_datasets) print 'Maximum overlap period' print 'start_time:', start_time print 'end_time:', end_time if temporal_resolution == 'monthly' and end_time.day !=1: end_time = end_time.replace(day=1) if ref_data_info['data_source'] == 'rcmed': min_lat = np.max([min_lat, ref_dataset.lats.min()]) max_lat = np.min([max_lat, ref_dataset.lats.max()]) min_lon = np.max([min_lon, ref_dataset.lons.min()]) max_lon = np.min([max_lon, ref_dataset.lons.max()]) bounds = Bounds(min_lat, max_lat, min_lon, max_lon, start_time, end_time) ref_dataset = dsp.subset(ref_dataset, bounds) if ref_dataset.temporal_resolution() != temporal_resolution: ref_dataset = dsp.temporal_rebin(ref_dataset, temporal_resolution) for idata,dataset in enumerate(model_datasets): model_datasets[idata] = dsp.subset(dataset, bounds) if dataset.temporal_resolution() != temporal_resolution: model_datasets[idata] = dsp.temporal_rebin(dataset, temporal_resolution) # Temporaly subset both observation and model datasets for the user specified season month_start = time_info['month_start'] month_end = time_info['month_end'] average_each_year = time_info['average_each_year'] ref_dataset = dsp.temporal_subset(ref_dataset,month_start, month_end,average_each_year) for idata,dataset in enumerate(model_datasets): model_datasets[idata] = dsp.temporal_subset(dataset,month_start, month_end,average_each_year)
max_lon = np.min([max_lon, dataset.lons.max()]) if not 'boundary_type' in space_info: bounds = Bounds(lat_min=min_lat, lat_max=max_lat, lon_min=min_lon, lon_max=max_lon, start=start_time, end=end_time) else: bounds = Bounds(boundary_type=space_info['boundary_type'], start=start_time, end=end_time) for i, dataset in enumerate(datasets): datasets[i] = dsp.subset(dataset, bounds) if dataset.temporal_resolution() != temporal_resolution: datasets[i] = dsp.temporal_rebin(dataset, temporal_resolution) # Temporally subset both observation and model datasets # for the user specified season month_start = time_info['month_start'] month_end = time_info['month_end'] average_each_year = time_info['average_each_year'] # For now we will treat the first listed dataset as the reference dataset for # evaluation purposes. for i, dataset in enumerate(datasets): datasets[i] = dsp.temporal_subset(dataset, month_start, month_end, average_each_year)
import ocw.dataset as ds import ocw.data_source.local as local import ocw.dataset_processor as dsp import ocw.plotter as plotter import numpy as np import numpy.ma as ma ''' data source: https://dx.doi.org/10.6084/m9.figshare.3753321.v1 AOD_monthly_2000-Mar_2016-FEB_from_MISR_L3_JOINT.nc is publicly available.''' dataset = local.load_file('AOD_monthly_2000-MAR_2016-FEB_from_MISR_L3_JOINT.nc', 'nonabsorbing_ave') ''' Subset the data for East Asia''' Bounds = ds.Bounds(lat_min=20, lat_max=57.7, lon_min=90, lon_max=150) dataset = dsp.subset(dataset, Bounds) '''The original dataset includes nonabsorbing AOD values between March 2000 and February 2015. dsp.temporal_subset will extract data in September-October-November.''' dataset_SON = dsp.temporal_subset( dataset, month_start=9, month_end=11, average_each_year=True) ny, nx = dataset_SON.values.shape[1:] # multi-year mean aod clim_aod = ma.zeros([3, ny, nx]) clim_aod[0, :] = ma.mean(dataset_SON.values, axis=0) # 16-year mean clim_aod[1, :] = ma.mean(dataset_SON.values[-5:, :], axis=0) # the last 5-year mean clim_aod[2, :] = dataset_SON.values[-1, :] # the last year's value
parameter_id, min_lat, max_lat, min_lon, max_lon, start_time, end_time) """ Step 3: Resample Datasets so they are the same shape """ print("CRU31_Dataset.values shape: (times, lats, lons) - %s" % (cru31_dataset.values.shape,)) print("KNMI_Dataset.values shape: (times, lats, lons) - %s" % (knmi_dataset.values.shape,)) print("Our two datasets have a mis-match in time. We will subset on time to %s years\n" % YEARS) # Create a Bounds object to use for subsetting new_bounds = Bounds(min_lat, max_lat, min_lon, max_lon, start_time, end_time) knmi_dataset = dsp.subset(new_bounds, knmi_dataset) print("CRU31_Dataset.values shape: (times, lats, lons) - %s" % (cru31_dataset.values.shape,)) print("KNMI_Dataset.values shape: (times, lats, lons) - %s \n" % (knmi_dataset.values.shape,)) print("Temporally Rebinning the Datasets to a Single Timestep") # To run FULL temporal Rebinning use a timedelta > 366 days. I used 999 in this example knmi_dataset = dsp.temporal_rebin(knmi_dataset, datetime.timedelta(days=999)) cru31_dataset = dsp.temporal_rebin(cru31_dataset, datetime.timedelta(days=999)) print("KNMI_Dataset.values shape: %s" % (knmi_dataset.values.shape,)) print("CRU31_Dataset.values shape: %s \n\n" % (cru31_dataset.values.shape,)) """ Spatially Regrid the Dataset Objects to a 1/2 degree grid """ # Using the bounds we will create a new set of lats and lons on 1 degree step new_lons = np.arange(min_lon, max_lon, 0.5)
""" Step 2: Fetch an OCW Dataset Object from the data_source.rcmed module """ print("Working with the rcmed interface to get CRU3.1 Daily Precipitation") # the dataset_id and the parameter id were determined from # https://rcmes.jpl.nasa.gov/content/data-rcmes-database CRU31 = rcmed.parameter_dataset(10, 37, LAT_MIN, LAT_MAX, LON_MIN, LON_MAX, START, END) """ Step 3: Processing datasets so they are the same shape ... """ print("Processing datasets so they are the same shape") CRU31 = dsp.water_flux_unit_conversion(CRU31) CRU31 = dsp.normalize_dataset_datetimes(CRU31, 'monthly') for member, each_target_dataset in enumerate(target_datasets): target_datasets[member] = dsp.subset(EVAL_BOUNDS, target_datasets[member]) target_datasets[member] = dsp.water_flux_unit_conversion(target_datasets[member]) target_datasets[member] = dsp.normalize_dataset_datetimes(target_datasets[member], 'monthly') print("... spatial regridding") new_lats = np.arange(LAT_MIN, LAT_MAX, gridLatStep) new_lons = np.arange(LON_MIN, LON_MAX, gridLonStep) CRU31 = dsp.spatial_regrid(CRU31, new_lats, new_lons) for member, each_target_dataset in enumerate(target_datasets): target_datasets[member] = dsp.spatial_regrid(target_datasets[member], new_lats, new_lons) #find climatology monthly for obs and models CRU31.values, CRU31.times = utils.calc_climatology_monthly(CRU31)
target_datasets.append(local.load_file(FILE_3, varName, name="UCT")) """ Step 2: Fetch an OCW Dataset Object from the data_source.rcmed module """ print("Working with the rcmed interface to get CRU3.1 Daily Precipitation") # the dataset_id and the parameter id were determined from # https://rcmes.jpl.nasa.gov/content/data-rcmes-database CRU31 = rcmed.parameter_dataset(10, 37, LAT_MIN, LAT_MAX, LON_MIN, LON_MAX, START, END) """ Step 3: Resample Datasets so they are the same shape """ print("Resampling datasets") CRU31 = dsp.water_flux_unit_conversion(CRU31) CRU31 = dsp.temporal_rebin(CRU31, datetime.timedelta(days=30)) for member, each_target_dataset in enumerate(target_datasets): target_datasets[member] = dsp.subset(EVAL_BOUNDS, target_datasets[member]) target_datasets[member] = dsp.water_flux_unit_conversion(target_datasets[member]) target_datasets[member] = dsp.temporal_rebin(target_datasets[member], datetime.timedelta(days=30)) """ Spatially Regrid the Dataset Objects to a user defined grid """ # Using the bounds we will create a new set of lats and lons print("Regridding datasets") new_lats = np.arange(LAT_MIN, LAT_MAX, gridLatStep) new_lons = np.arange(LON_MIN, LON_MAX, gridLonStep) CRU31 = dsp.spatial_regrid(CRU31, new_lats, new_lons) for member, each_target_dataset in enumerate(target_datasets): target_datasets[member] = dsp.spatial_regrid(target_datasets[member], new_lats, new_lons) #make the model ensemble
""" Step 3: Resample Datasets so they are the same shape """ print("Temporally Rebinning the Datasets to an Annual Timestep") # To run annual temporal Rebinning use a timedelta of 360 days. knmi_dataset = dsp.temporal_rebin(knmi_dataset, datetime.timedelta(days=360)) wrf311_dataset = dsp.temporal_rebin(wrf311_dataset, datetime.timedelta(days=360)) cru31_dataset = dsp.temporal_rebin(cru31_dataset, datetime.timedelta(days=360)) # Running Temporal Rebin early helps negate the issue of datasets being on different # days of the month (1st vs. 15th) # Create a Bounds object to use for subsetting new_bounds = Bounds(min_lat, max_lat, min_lon, max_lon, start_time, end_time) # Subset our model datasets so they are the same size knmi_dataset = dsp.subset(new_bounds, knmi_dataset) wrf311_dataset = dsp.subset(new_bounds, wrf311_dataset) """ Spatially Regrid the Dataset Objects to a 1/2 degree grid """ # Using the bounds we will create a new set of lats and lons on 1/2 degree step new_lons = np.arange(min_lon, max_lon, 0.5) new_lats = np.arange(min_lat, max_lat, 0.5) # Spatially regrid datasets using the new_lats, new_lons numpy arrays knmi_dataset = dsp.spatial_regrid(knmi_dataset, new_lats, new_lons) wrf311_dataset = dsp.spatial_regrid(wrf311_dataset, new_lats, new_lons) cru31_dataset = dsp.spatial_regrid(cru31_dataset, new_lats, new_lons) # Generate an ensemble dataset from knmi and wrf models ensemble_dataset = dsp.ensemble([knmi_dataset, wrf311_dataset])
] model_dataset_season = [ dsp.temporal_subset(dataset, month_start, month_end, average_each_year=True) for dataset in model_dataset_subset ] """ Spatial subset of obs_dataset and generate time series """ obs_timeseries = np.zeros([nyear, n_region ]) # region index 0-6: NW, SW, NGP, SGP, MW, NE, SE model_timeseries = np.zeros([nmodel, nyear, n_region]) for iregion in np.arange(n_region): obs_timeseries[:, iregion] = utils.calc_time_series( dsp.subset(obs_dataset_season, regional_bounds[iregion])) for imodel in np.arange(nmodel): model_timeseries[imodel, :, iregion] = utils.calc_time_series( dsp.subset(model_dataset_season[imodel], regional_bounds[iregion])) year = np.arange(nyear) regional_trends_obs = np.zeros(n_region) regional_trends_obs_error = np.zeros(n_region) regional_trends_model = np.zeros([nmodel, n_region]) regional_trends_model_error = np.zeros([nmodel, n_region]) regional_trends_ens = np.zeros(n_region) regional_trends_ens_error = np.zeros(n_region) for iregion in np.arange(n_region): regional_trends_obs[iregion], regional_trends_obs_error[
model_datasets) print 'Maximum overlap period' print 'start_time:', start_time print 'end_time:', end_time if temporal_resolution == 'monthly' and end_time.day != 1: end_time = end_time.replace(day=1) if ref_data_info['data_source'] == 'rcmed': min_lat = np.max([min_lat, ref_dataset.lats.min()]) max_lat = np.min([max_lat, ref_dataset.lats.max()]) min_lon = np.max([min_lon, ref_dataset.lons.min()]) max_lon = np.min([max_lon, ref_dataset.lons.max()]) bounds = Bounds(min_lat, max_lat, min_lon, max_lon, start_time, end_time) if ref_dataset.lats.ndim != 2 and ref_dataset.lons.ndim != 2: ref_dataset = dsp.subset(bounds, ref_dataset) for idata, dataset in enumerate(model_datasets): if dataset.lats.ndim != 2 and dataset.lons.ndim != 2: model_datasets[idata] = dsp.subset(bounds, dataset) # Temporaly subset both observation and model datasets for the user specified season month_start = time_info['month_start'] month_end = time_info['month_end'] average_each_year = time_info['average_each_year'] ref_dataset = dsp.temporal_subset(month_start, month_end, ref_dataset, average_each_year) for idata, dataset in enumerate(model_datasets): model_datasets[idata] = dsp.temporal_subset(month_start, month_end, dataset, average_each_year)
def test_subset_name_propagation(self): subset_name = 'foo_subset_name' subset = dp.subset(self.target_dataset, self.subregion, subset_name) self.assertEqual(subset.name, subset_name)
SEASON_MONTH_START = 1 SEASON_MONTH_END = 12 EVAL_BOUNDS = Bounds(LAT_MIN, LAT_MAX, LON_MIN, LON_MAX, START, END) # Normalize the time values of our datasets so they fall on expected days # of the month. For example, monthly data will be normalized so that: # 15 Jan 2014, 15 Feb 2014 => 1 Jan 2014, 1 Feb 2014 ref_dataset = dsp.normalize_dataset_datetimes(ref_dataset, 'monthly') target_datasets = [ dsp.normalize_dataset_datetimes(target, 'monthly') for target in target_datasets ] # Subset down the evaluation datasets to our selected evaluation bounds. ref_dataset = dsp.subset(EVAL_BOUNDS, ref_dataset) target_datasets = [ dsp.subset(EVAL_BOUNDS, target) for target in target_datasets ] # Do a monthly temporal rebin of the evaluation datasets. ref_dataset = dsp.temporal_rebin(ref_dataset, datetime.timedelta(days=30)) target_datasets = [ dsp.temporal_rebin(target, datetime.timedelta(days=30)) for target in target_datasets ] # Spatially regrid onto a 1 degree lat/lon grid within our evaluation bounds. new_lats = np.arange(LAT_MIN, LAT_MAX, 1.0) new_lons = np.arange(LON_MIN, LON_MAX, 1.0) ref_dataset = dsp.spatial_regrid(ref_dataset, new_lats, new_lons)
target_datasets.append(local.load_file(FILE_3, varName, name='UCT')) # Step 2: Fetch an OCW Dataset Object from the data_source.rcmed module print('Working with the rcmed interface to get CRU3.1 Daily Precipitation') # the dataset_id and the parameter id were determined from # https://rcmes.jpl.nasa.gov/content/data-rcmes-database CRU31 = rcmed.parameter_dataset( 10, 37, LAT_MIN, LAT_MAX, LON_MIN, LON_MAX, START, END) # Step 3: Processing datasets so they are the same shape print('Processing datasets so they are the same shape') CRU31 = dsp.water_flux_unit_conversion(CRU31) CRU31 = dsp.normalize_dataset_datetimes(CRU31, 'monthly') for member, each_target_dataset in enumerate(target_datasets): target_datasets[member] = dsp.subset(target_datasets[member], EVAL_BOUNDS) target_datasets[member] = dsp.water_flux_unit_conversion(target_datasets[member]) target_datasets[member] = dsp.normalize_dataset_datetimes( target_datasets[member], 'monthly') print('... spatial regridding') new_lats = np.arange(LAT_MIN, LAT_MAX, gridLatStep) new_lons = np.arange(LON_MIN, LON_MAX, gridLonStep) CRU31 = dsp.spatial_regrid(CRU31, new_lats, new_lons) for member, each_target_dataset in enumerate(target_datasets): target_datasets[member] =\ dsp.spatial_regrid(target_datasets[member], new_lats, new_lons) # Find climatology monthly for obs and models. CRU31.values, CRU31.times = utils.calc_climatology_monthly(CRU31)
target_datasets.append(local.load_file(FILE_1, varName, name="KNMI")) target_datasets.append(local.load_file(FILE_2, varName, name="REGCM")) target_datasets.append(local.load_file(FILE_3, varName, name="UCT")) """ Step 2: Fetch an OCW Dataset Object from the data_source.rcmed module """ print("Working with the rcmed interface to get CRU3.1 Daily Precipitation") # the dataset_id and the parameter id were determined from # https://rcmes.jpl.nasa.gov/content/data-rcmes-database CRU31 = rcmed.parameter_dataset(10, 37, LAT_MIN, LAT_MAX, LON_MIN, LON_MAX, START, END) """ Step 3: Processing datasets so they are the same shape ... """ print("Processing datasets so they are the same shape") CRU31 = dsp.water_flux_unit_conversion(CRU31) CRU31 = dsp.normalize_dataset_datetimes(CRU31, 'monthly') for member, each_target_dataset in enumerate(target_datasets): target_datasets[member] = dsp.subset(EVAL_BOUNDS, target_datasets[member]) target_datasets[member] = dsp.water_flux_unit_conversion( target_datasets[member]) target_datasets[member] = dsp.normalize_dataset_datetimes( target_datasets[member], 'monthly') print("... spatial regridding") new_lats = np.arange(LAT_MIN, LAT_MAX, gridLatStep) new_lons = np.arange(LON_MIN, LON_MAX, gridLonStep) CRU31 = dsp.spatial_regrid(CRU31, new_lats, new_lons) for member, each_target_dataset in enumerate(target_datasets): target_datasets[member] = dsp.spatial_regrid(target_datasets[member], new_lats, new_lons) #find climatology monthly for obs and models
def test_out_of_dataset_bounds_lon_max(self): self.subregion.lon_max = 180 with self.assertRaises(ValueError): dp.subset(self.subregion, self.target_dataset)
import ssl if hasattr(ssl, '_create_unverified_context'): ssl._create_default_https_context = ssl._create_unverified_context # rectangular boundary min_lat = 15.75 max_lat = 55.75 min_lon = -125.75 max_lon = -66.75 start_time = datetime(1998,1,1) end_time = datetime(1998,12,31) TRMM_dataset = rcmed.parameter_dataset(3, 36, min_lat, max_lat, min_lon, max_lon, start_time, end_time) Cuba_and_Bahamas_bounds = Bounds(boundary_type='countries', countries=['Cuba','Bahamas']) TRMM_dataset2 = dsp.subset(TRMM_dataset, Cuba_and_Bahamas_bounds, extract=False) # to mask out the data over Mexico and Canada plotter.draw_contour_map(ma.mean(TRMM_dataset2.values, axis=0), TRMM_dataset2.lats, TRMM_dataset2.lons, fname='TRMM_without_Cuba_and_Bahamas') NCA_SW_bounds = Bounds(boundary_type='us_states', us_states=['CA','NV','UT','AZ','NM','CO']) TRMM_dataset3 = dsp.subset(TRMM_dataset2, NCA_SW_bounds, extract=True) # to mask out the data over Mexico and Canada plotter.draw_contour_map(ma.mean(TRMM_dataset3.values, axis=0), TRMM_dataset3.lats, TRMM_dataset3.lons, fname='TRMM_NCA_SW')
def test_out_of_dataset_bounds_end(self): self.subregion.end = datetime.datetime(2011, 1, 1) with self.assertRaises(ValueError): dp.subset(self.subregion, self.target_dataset)
min_lat = 15.75 max_lat = 55.75 min_lon = -125.75 max_lon = -66.75 start_time = datetime(1998, 1, 1) end_time = datetime(1998, 12, 31) TRMM_dataset = rcmed.parameter_dataset(3, 36, min_lat, max_lat, min_lon, max_lon, start_time, end_time) Cuba_and_Bahamas_bounds = Bounds(boundary_type='countries', countries=['Cuba', 'Bahamas']) # to mask out the data over Mexico and Canada TRMM_dataset2 = dsp.subset(TRMM_dataset, Cuba_and_Bahamas_bounds, extract=False) plotter.draw_contour_map(ma.mean(TRMM_dataset2.values, axis=0), TRMM_dataset2.lats, TRMM_dataset2.lons, fname='TRMM_without_Cuba_and_Bahamas') NCA_SW_bounds = Bounds(boundary_type='us_states', us_states=['CA', 'NV', 'UT', 'AZ', 'NM', 'CO']) # to mask out the data over Mexico and Canada TRMM_dataset3 = dsp.subset(TRMM_dataset2, NCA_SW_bounds, extract=True) plotter.draw_contour_map(ma.mean(TRMM_dataset3.values, axis=0), TRMM_dataset3.lats, TRMM_dataset3.lons,
knmi_dataset.name = "knmi" wrf_dataset.name = "wrf" # Date values from loaded datasets might not always fall on reasonable days. # With monthly data, we could have data falling on the 1st, 15th, or some other # day of the month. Let's fix that real quick. ########################################################################## knmi_dataset = dsp.normalize_dataset_datetimes(knmi_dataset, 'monthly') wrf_dataset = dsp.normalize_dataset_datetimes(wrf_dataset, 'monthly') # We're only going to run this evaluation over a years worth of data. We'll # make a Bounds object and use it to subset our datasets. ########################################################################## subset = Bounds(lat_min=-45, lat_max=42, lon_min=-24, lon_max=60, start=datetime.datetime(1989, 1, 1), end=datetime.datetime(1989, 12, 1)) knmi_dataset = dsp.subset(knmi_dataset, subset) wrf_dataset = dsp.subset(wrf_dataset, subset) # Temporally re-bin the data into a monthly timestep. ########################################################################## knmi_dataset = dsp.temporal_rebin(knmi_dataset, temporal_resolution='monthly') wrf_dataset = dsp.temporal_rebin(wrf_dataset, temporal_resolution='monthly') # Spatially regrid the datasets onto a 1 degree grid. ########################################################################## # Get the bounds of the reference dataset and use it to create a new # set of lat/lon values on a 1 degree step # Using the bounds we will create a new set of lats and lons on 1 degree step min_lat, max_lat, min_lon, max_lon = knmi_dataset.spatial_boundaries() new_lons = numpy.arange(min_lon, max_lon, 1) new_lats = numpy.arange(min_lat, max_lat, 1)
print("Fetching data from RCMED...") cru31_dataset = rcmed.parameter_dataset(dataset_id, parameter_id, min_lat, max_lat, min_lon, max_lon, start_time, end_time) """ Step 3: Resample Datasets so they are the same shape """ print("CRU31_Dataset.values shape: (times, lats, lons) - %s" % (cru31_dataset.values.shape, )) print("KNMI_Dataset.values shape: (times, lats, lons) - %s" % (knmi_dataset.values.shape, )) print( "Our two datasets have a mis-match in time. We will subset on time to %s years\n" % YEARS) # Create a Bounds object to use for subsetting new_bounds = Bounds(min_lat, max_lat, min_lon, max_lon, start_time, end_time) knmi_dataset = dsp.subset(new_bounds, knmi_dataset) print("CRU31_Dataset.values shape: (times, lats, lons) - %s" % (cru31_dataset.values.shape, )) print("KNMI_Dataset.values shape: (times, lats, lons) - %s \n" % (knmi_dataset.values.shape, )) print("Temporally Rebinning the Datasets to a Single Timestep") # To run FULL temporal Rebinning use a timedelta > 366 days. I used 999 in this example knmi_dataset = dsp.temporal_rebin(knmi_dataset, datetime.timedelta(days=999)) cru31_dataset = dsp.temporal_rebin(cru31_dataset, datetime.timedelta(days=999)) print("KNMI_Dataset.values shape: %s" % (knmi_dataset.values.shape, )) print("CRU31_Dataset.values shape: %s \n\n" % (cru31_dataset.values.shape, )) """ Spatially Regrid the Dataset Objects to a 1/2 degree grid """ # Using the bounds we will create a new set of lats and lons on 1 degree step
""" Step 2: Fetch an OCW Dataset Object from the data_source.rcmed module """ print( "Working with the rcmed interface to get CRU3.1 Monthly Mean Precipitation" ) # the dataset_id and the parameter id were determined from # https://rcmes.jpl.nasa.gov/content/data-rcmes-database CRU31 = rcmed.parameter_dataset(10, 37, LAT_MIN, LAT_MAX, LON_MIN, LON_MAX, START, END) """ Step 3: Processing Datasets so they are the same shape """ print("Processing datasets ...") CRU31 = dsp.normalize_dataset_datetimes(CRU31, 'monthly') print("... on units") CRU31 = dsp.water_flux_unit_conversion(CRU31) for member, each_target_dataset in enumerate(target_datasets): target_datasets[member] = dsp.subset(target_datasets[member], EVAL_BOUNDS) target_datasets[member] = dsp.water_flux_unit_conversion( target_datasets[member]) target_datasets[member] = dsp.normalize_dataset_datetimes( target_datasets[member], 'monthly') print("... spatial regridding") new_lats = np.arange(LAT_MIN, LAT_MAX, gridLatStep) new_lons = np.arange(LON_MIN, LON_MAX, gridLonStep) CRU31 = dsp.spatial_regrid(CRU31, new_lats, new_lons) for member, each_target_dataset in enumerate(target_datasets): target_datasets[member] = dsp.spatial_regrid(target_datasets[member], new_lats, new_lons) # find the total annual mean. Note the function exists in util.py as def
target_datasets.append(local.load_file(FILE_3, varName, name="UCT")) """ Step 2: Fetch an OCW Dataset Object from the data_source.rcmed module """ print("Working with the rcmed interface to get CRU3.1 Monthly Mean Precipitation") # the dataset_id and the parameter id were determined from # https://rcmes.jpl.nasa.gov/content/data-rcmes-database CRU31 = rcmed.parameter_dataset(10, 37, LAT_MIN, LAT_MAX, LON_MIN, LON_MAX, START, END) """ Step 3: Processing Datasets so they are the same shape """ print("Processing datasets ...") CRU31 = dsp.normalize_dataset_datetimes(CRU31, 'monthly') print("... on units") CRU31 = dsp.water_flux_unit_conversion(CRU31) for member, each_target_dataset in enumerate(target_datasets): target_datasets[member] = dsp.subset(target_datasets[member], EVAL_BOUNDS) target_datasets[member] = dsp.water_flux_unit_conversion(target_datasets[member]) target_datasets[member] = dsp.normalize_dataset_datetimes(target_datasets[member], 'monthly') print("... spatial regridding") new_lats = np.arange(LAT_MIN, LAT_MAX, gridLatStep) new_lons = np.arange(LON_MIN, LON_MAX, gridLonStep) CRU31 = dsp.spatial_regrid(CRU31, new_lats, new_lons) for member, each_target_dataset in enumerate(target_datasets): target_datasets[member] = dsp.spatial_regrid(target_datasets[member], new_lats, new_lons) #find the total annual mean. Note the function exists in util.py as def calc_climatology_year(dataset): _,CRU31.values = utils.calc_climatology_year(CRU31) for member, each_target_dataset in enumerate(target_datasets):
knmi_dataset.name = "knmi" wrf_dataset.name = "wrf" # Date values from loaded datasets might not always fall on reasonable days. # With monthly data, we could have data falling on the 1st, 15th, or some other # day of the month. Let's fix that real quick. ################################################################################ knmi_dataset = dsp.normalize_dataset_datetimes(knmi_dataset, 'monthly') wrf_dataset = dsp.normalize_dataset_datetimes(wrf_dataset, 'monthly') # We're only going to run this evaluation over a years worth of data. We'll # make a Bounds object and use it to subset our datasets. ################################################################################ subset = Bounds(-45, 42, -24, 60, datetime.datetime(1989, 1, 1), datetime.datetime(1989, 12, 1)) knmi_dataset = dsp.subset(subset, knmi_dataset) wrf_dataset = dsp.subset(subset, wrf_dataset) # Temporally re-bin the data into a monthly timestep. ################################################################################ knmi_dataset = dsp.temporal_rebin(knmi_dataset, datetime.timedelta(days=30)) wrf_dataset = dsp.temporal_rebin(wrf_dataset, datetime.timedelta(days=30)) # Spatially regrid the datasets onto a 1 degree grid. ################################################################################ # Get the bounds of the reference dataset and use it to create a new # set of lat/lon values on a 1 degree step # Using the bounds we will create a new set of lats and lons on 1 degree step min_lat, max_lat, min_lon, max_lon = knmi_dataset.spatial_boundaries() new_lons = numpy.arange(min_lon, max_lon, 1) new_lats = numpy.arange(min_lat, max_lat, 1)
# under the License. import ocw.dataset as ds import ocw.data_source.local as local import ocw.dataset_processor as dsp import ocw.plotter as plotter import numpy as np import numpy.ma as ma ''' data source: https://dx.doi.org/10.6084/m9.figshare.3753321.v1 AOD_monthly_2000-Mar_2016-FEB_from_MISR_L3_JOINT.nc is publicly available.''' dataset = local.load_file( 'AOD_monthly_2000-MAR_2016-FEB_from_MISR_L3_JOINT.nc', 'nonabsorbing_ave') ''' Subset the data for East Asia''' Bounds = ds.Bounds(lat_min=20, lat_max=57.7, lon_min=90, lon_max=150) dataset = dsp.subset(dataset, Bounds) '''The original dataset includes nonabsorbing AOD values between March 2000 and February 2015. dsp.temporal_subset will extract data in September-October-November.''' dataset_SON = dsp.temporal_subset(dataset, month_start=9, month_end=11, average_each_year=True) ny, nx = dataset_SON.values.shape[1:] # multi-year mean aod clim_aod = ma.zeros([3, ny, nx]) clim_aod[0, :] = ma.mean(dataset_SON.values, axis=0) # 16-year mean clim_aod[1, :] = ma.mean(dataset_SON.values[-5:, :], axis=0) # the last 5-year mean
ssl._create_default_https_context = ssl._create_unverified_context # rectangular boundary min_lat = 15.75 max_lat = 55.75 min_lon = -125.75 max_lon = -66.75 start_time = datetime(1998, 1, 1) end_time = datetime(1998, 12, 31) TRMM_dataset = rcmed.parameter_dataset(3, 36, min_lat, max_lat, min_lon, max_lon, start_time, end_time) Cuba_and_Bahamas_bounds = Bounds( boundary_type='countries', countries=['Cuba', 'Bahamas']) # to mask out the data over Mexico and Canada TRMM_dataset2 = dsp.subset( TRMM_dataset, Cuba_and_Bahamas_bounds, extract=False) plotter.draw_contour_map(ma.mean(TRMM_dataset2.values, axis=0), TRMM_dataset2.lats, TRMM_dataset2.lons, fname='TRMM_without_Cuba_and_Bahamas') NCA_SW_bounds = Bounds(boundary_type='us_states', us_states=[ 'CA', 'NV', 'UT', 'AZ', 'NM', 'CO']) # to mask out the data over Mexico and Canada TRMM_dataset3 = dsp.subset(TRMM_dataset2, NCA_SW_bounds, extract=True) plotter.draw_contour_map(ma.mean(TRMM_dataset3.values, axis=0), TRMM_dataset3.lats, TRMM_dataset3.lons, fname='TRMM_NCA_SW')
max_lon = np.min([max_lon, dataset.lons.max()]) if not 'boundary_type' in space_info: bounds = Bounds(lat_min=min_lat, lat_max=max_lat, lon_min=min_lon, lon_max=max_lon, start=start_time, end=end_time) else: bounds = Bounds(boundary_type=space_info['boundary_type'], start=start_time, end=end_time) for i, dataset in enumerate(datasets): datasets[i] = dsp.subset(dataset, bounds) if dataset.temporal_resolution() != temporal_resolution: datasets[i] = dsp.temporal_rebin(datasets[i], temporal_resolution) # Temporally subset both observation and model datasets # for the user specified season month_start = time_info['month_start'] month_end = time_info['month_end'] average_each_year = time_info['average_each_year'] # For now we will treat the first listed dataset as the reference dataset for # evaluation purposes. for i, dataset in enumerate(datasets): datasets[i] = dsp.temporal_subset(dataset, month_start, month_end, average_each_year)
LON_MAX = 55 START = datetime.datetime(1999, 1, 1) END = datetime.datetime(2000, 12, 1) SEASON_MONTH_START = 1 SEASON_MONTH_END = 12 EVAL_BOUNDS = Bounds(LAT_MIN, LAT_MAX, LON_MIN, LON_MAX, START, END) # Normalize the time values of our datasets so they fall on expected days # of the month. For example, monthly data will be normalized so that: # 15 Jan 2014, 15 Feb 2014 => 1 Jan 2014, 1 Feb 2014 ref_dataset = dsp.normalize_dataset_datetimes(ref_dataset, "monthly") target_dataset = dsp.normalize_dataset_datetimes(target_dataset, "monthly") # Subset down the evaluation datasets to our selected evaluation bounds. target_dataset = dsp.subset(EVAL_BOUNDS, target_dataset) ref_dataset = dsp.subset(EVAL_BOUNDS, ref_dataset) # Do a monthly temporal rebin of the evaluation datasets. target_dataset = dsp.temporal_rebin(target_dataset, datetime.timedelta(days=30)) ref_dataset = dsp.temporal_rebin(ref_dataset, datetime.timedelta(days=30)) # Spatially regrid onto a 1 degree lat/lon grid within our evaluation bounds. new_lats = np.arange(LAT_MIN, LAT_MAX, 1.0) new_lons = np.arange(LON_MIN, LON_MAX, 1.0) target_dataset = dsp.spatial_regrid(target_dataset, new_lats, new_lons) ref_dataset = dsp.spatial_regrid(ref_dataset, new_lats, new_lons) # Load the datasets for the evaluation. mean_bias = metrics.MeanBias()
def test_out_of_dataset_bounds_start(self): self.subregion.start = datetime.datetime(1999, 1, 1) with self.assertRaises(ValueError): dp.subset(self.target_dataset, self.subregion)
def test_subset_name(self): subset = dp.subset(self.target_dataset, self.subregion) self.assertEqual(subset.name, self.name)
max_lon = np.min([max_lon, dataset.lons.max()]) if not 'boundary_type' in space_info: bounds = Bounds(lat_min=min_lat, lat_max=max_lat, lon_min=min_lon, lon_max=max_lon, start=start_time, end=end_time) else: bounds = Bounds(boundary_type=space_info['boundary_type'], start=start_time, end=end_time) for i, dataset in enumerate(obs_datasets): obs_datasets[i] = dsp.subset(dataset, bounds) if dataset.temporal_resolution() != temporal_resolution: obs_datasets[i] = dsp.temporal_rebin(dataset, temporal_resolution) for i, dataset in enumerate(model_datasets): model_datasets[i] = dsp.subset(dataset, bounds) if dataset.temporal_resolution() != temporal_resolution: model_datasets[i] = dsp.temporal_rebin(dataset, temporal_resolution) # Temporally subset both observation and model datasets # for the user specified season month_start = time_info['month_start'] month_end = time_info['month_end'] average_each_year = time_info['average_each_year'] # TODO: Fully support multiple observation / reference datasets.
parameter_id, min_lat, max_lat, min_lon, max_lon, start_time, end_time) """ Step 3: Resample Datasets so they are the same shape """ print("CRU31_Dataset.values shape: (times, lats, lons) - %s" % (cru31_dataset.values.shape,)) print("KNMI_Dataset.values shape: (times, lats, lons) - %s" % (knmi_dataset.values.shape,)) print("Our two datasets have a mis-match in time. We will subset on time to %s years\n" % YEARS) # Create a Bounds object to use for subsetting new_bounds = Bounds(min_lat, max_lat, min_lon, max_lon, start_time, end_time) knmi_dataset = dsp.subset(knmi_dataset, new_bounds) print("CRU31_Dataset.values shape: (times, lats, lons) - %s" % (cru31_dataset.values.shape,)) print("KNMI_Dataset.values shape: (times, lats, lons) - %s \n" % (knmi_dataset.values.shape,)) print("Temporally Rebinning the Datasets to a Single Timestep") # To run FULL temporal Rebinning knmi_dataset = dsp.temporal_rebin(knmi_dataset, temporal_resolution = 'full') cru31_dataset = dsp.temporal_rebin(cru31_dataset, temporal_resolution = 'full') print("KNMI_Dataset.values shape: %s" % (knmi_dataset.values.shape,)) print("CRU31_Dataset.values shape: %s \n\n" % (cru31_dataset.values.shape,)) """ Spatially Regrid the Dataset Objects to a 1/2 degree grid """ # Using the bounds we will create a new set of lats and lons on 0.5 degree step new_lons = np.arange(min_lon, max_lon, 0.5)
# With monthly data, we could have data falling on the 1st, 15th, or some other # day of the month. Let's fix that real quick. ########################################################################## knmi_dataset = dsp.normalize_dataset_datetimes(knmi_dataset, 'monthly') wrf_dataset = dsp.normalize_dataset_datetimes(wrf_dataset, 'monthly') # We're only going to run this evaluation over a years worth of data. We'll # make a Bounds object and use it to subset our datasets. ########################################################################## subset = Bounds(lat_min=-45, lat_max=42, lon_min=-24, lon_max=60, start=datetime.datetime(1989, 1, 1), end=datetime.datetime(1989, 12, 1)) knmi_dataset = dsp.subset(knmi_dataset, subset) wrf_dataset = dsp.subset(wrf_dataset, subset) # Temporally re-bin the data into a monthly timestep. ########################################################################## knmi_dataset = dsp.temporal_rebin(knmi_dataset, temporal_resolution='monthly') wrf_dataset = dsp.temporal_rebin(wrf_dataset, temporal_resolution='monthly') # Spatially regrid the datasets onto a 1 degree grid. ########################################################################## # Get the bounds of the reference dataset and use it to create a new # set of lat/lon values on a 1 degree step # Using the bounds we will create a new set of lats and lons on 1 degree step min_lat, max_lat, min_lon, max_lon = knmi_dataset.spatial_boundaries() new_lons = numpy.arange(min_lon, max_lon, 1) new_lats = numpy.arange(min_lat, max_lat, 1)
def test_out_of_dataset_bounds_lon_max(self): self.subregion.lon_max = 180 with self.assertRaises(ValueError): dp.subset(self.target_dataset, self.subregion)
end_time = min([end_time, dataset_end]) cru31_dataset = dsp.temporal_rebin(cru31_dataset, temporal_resolution='annual') dataset_start, dataset_end = cru31_dataset.temporal_boundaries() start_time = max([start_time, dataset_start]) end_time = min([end_time, dataset_end]) print("Time Range is: %s to %s" % (start_time.strftime("%Y-%m-%d"), end_time.strftime("%Y-%m-%d"))) # Create a Bounds object to use for subsetting new_bounds = Bounds(lat_min=min_lat, lat_max=max_lat, lon_min=min_lon, lon_max=max_lon, start=start_time, end=end_time) # Subset our model datasets so they are the same size knmi_dataset = dsp.subset(knmi_dataset, new_bounds) wrf311_dataset = dsp.subset(wrf311_dataset, new_bounds) # Spatially Regrid the Dataset Objects to a 1/2 degree grid. # Using the bounds we will create a new set of lats and lons on 1/2 degree step new_lons = np.arange(min_lon, max_lon, 0.5) new_lats = np.arange(min_lat, max_lat, 0.5) # Spatially regrid datasets using the new_lats, new_lons numpy arrays knmi_dataset = dsp.spatial_regrid(knmi_dataset, new_lats, new_lons) wrf311_dataset = dsp.spatial_regrid(wrf311_dataset, new_lats, new_lons) cru31_dataset = dsp.spatial_regrid(cru31_dataset, new_lats, new_lons) # Generate an ensemble dataset from knmi and wrf models ensemble_dataset = dsp.ensemble([knmi_dataset, wrf311_dataset])
def test_out_of_dataset_bounds_end(self): self.subregion.end = datetime.datetime(2011, 1, 1) with self.assertRaises(ValueError): dp.subset(self.target_dataset, self.subregion)
def test_subset_name(self): subset = dp.subset(self.subregion, self.target_dataset) self.assertEqual(subset.name, self.name)
print("CRU31_Dataset.values shape: (times, lats, lons) - %s" % (cru31_dataset.values.shape, )) print("KNMI_Dataset.values shape: (times, lats, lons) - %s" % (knmi_dataset.values.shape, )) print( "Our two datasets have a mis-match in time. We will subset on time to %s years\n" % YEARS) # Create a Bounds object to use for subsetting new_bounds = Bounds(lat_min=min_lat, lat_max=max_lat, lon_min=min_lon, lon_max=max_lon, start=start_time, end=end_time) knmi_dataset = dsp.subset(knmi_dataset, new_bounds) print("CRU31_Dataset.values shape: (times, lats, lons) - %s" % (cru31_dataset.values.shape, )) print("KNMI_Dataset.values shape: (times, lats, lons) - %s \n" % (knmi_dataset.values.shape, )) print("Temporally Rebinning the Datasets to a Single Timestep") # To run FULL temporal Rebinning knmi_dataset = dsp.temporal_rebin(knmi_dataset, temporal_resolution='full') cru31_dataset = dsp.temporal_rebin(cru31_dataset, temporal_resolution='full') print("KNMI_Dataset.values shape: %s" % (knmi_dataset.values.shape, )) print("CRU31_Dataset.values shape: %s \n\n" % (cru31_dataset.values.shape, )) """ Spatially Regrid the Dataset Objects to a 1/2 degree grid """ # Using the bounds we will create a new set of lats and lons on 0.5 degree step
average_each_year=True) """ Temporal subset of model_dataset """ model_dataset_subset = [dsp.temporal_slice(dataset,start_time=start_date, end_time=end_date) for dataset in model_dataset] model_dataset_season = [dsp.temporal_subset(dataset, month_start, month_end, average_each_year=True) for dataset in model_dataset_subset] """ Spatial subset of obs_dataset and generate time series """ obs_timeseries = np.zeros([nyear, n_region]) # region index 0-6: NW, SW, NGP, SGP, MW, NE, SE model_timeseries = np.zeros([nmodel, nyear, n_region]) for iregion in np.arange(n_region): obs_timeseries[:, iregion] = utils.calc_time_series( dsp.subset(obs_dataset_season, regional_bounds[iregion])) for imodel in np.arange(nmodel): model_timeseries[imodel, :, iregion] = utils.calc_time_series( dsp.subset(model_dataset_season[imodel], regional_bounds[iregion])) year = np.arange(nyear) regional_trends_obs = np.zeros(n_region) regional_trends_obs_error = np.zeros(n_region) regional_trends_model = np.zeros([nmodel, n_region]) regional_trends_model_error = np.zeros([nmodel, n_region]) regional_trends_ens = np.zeros(n_region) regional_trends_ens_error = np.zeros(n_region) for iregion in np.arange(n_region): regional_trends_obs[iregion], regional_trends_obs_error[iregion] = utils.calculate_temporal_trend_of_time_series(
""" Step 3: Resample Datasets so they are the same shape """ print("Temporally Rebinning the Datasets to an Annual Timestep") # To run annual temporal Rebinning, knmi_dataset = dsp.temporal_rebin(knmi_dataset, temporal_resolution = 'annual') wrf311_dataset = dsp.temporal_rebin(wrf311_dataset, temporal_resolution = 'annual') cru31_dataset = dsp.temporal_rebin(cru31_dataset, temporal_resolution = 'annual') # Running Temporal Rebin early helps negate the issue of datasets being on different # days of the month (1st vs. 15th) # Create a Bounds object to use for subsetting new_bounds = Bounds(min_lat, max_lat, min_lon, max_lon, start_time, end_time) # Subset our model datasets so they are the same size knmi_dataset = dsp.subset(knmi_dataset, new_bounds) wrf311_dataset = dsp.subset(wrf311_dataset, new_bounds) """ Spatially Regrid the Dataset Objects to a 1/2 degree grid """ # Using the bounds we will create a new set of lats and lons on 1/2 degree step new_lons = np.arange(min_lon, max_lon, 0.5) new_lats = np.arange(min_lat, max_lat, 0.5) # Spatially regrid datasets using the new_lats, new_lons numpy arrays knmi_dataset = dsp.spatial_regrid(knmi_dataset, new_lats, new_lons) wrf311_dataset = dsp.spatial_regrid(wrf311_dataset, new_lats, new_lons) cru31_dataset = dsp.spatial_regrid(cru31_dataset, new_lats, new_lons) # Generate an ensemble dataset from knmi and wrf models ensemble_dataset = dsp.ensemble([knmi_dataset, wrf311_dataset])
def test_out_of_dataset_bounds_lat_min(self): self.subregion.lat_min = -90 with self.assertRaises(ValueError): dp.subset(self.subregion, self.target_dataset)
def test_subset_name_propagation(self): subset_name = 'foo_subset_name' subset = dp.subset(self.subregion, self.target_dataset, subset_name) self.assertEqual(subset.name, subset_name)
file_path='./data/WRF24_2010_summer/', filename_pattern=['wrf2dout*']) # Step 2: Load the spatial filter (Bukovsky region mask). Bukovsky_mask = Bounds( boundary_type='user', user_mask_file='Bukovsky_regions.nc', mask_variable_name='Bukovsky', longitude_name='lon', latitude_name='lat') # Step 3: Spatial subset the WRF data (for Northern Great Plains, user_mask_values=[10]). WRF_dataset_filtered = \ dsp.subset(WRF_dataset, Bukovsky_mask, user_mask_values=[10]) # Step 4: Analyze the wet spells. duration1, peak1, total1 = \ metrics.wet_spell_analysis(GPM_dataset_filtered, threshold=0.1, nyear=1, dt=0.5) duration2, peak2, total2 =\ metrics.wet_spell_analysis(WRF_dataset_filtered.values, threshold=0.1, nyear=1, dt=0.5) # Step 5: Calculate the joint PDF(JPDF) of spell_duration and peak_rainfall. histo2d_GPM = \ metrics.calc_joint_histogram(data_array1=duration1, data_array2=peak1, bins_for_data1=np.append(np.arange(25)+0.5, [48.5, 120.5]), bins_for_data2=[0.1, 0.2, 0.5, 1.0, 2.0, 5.0, 10, 20, 30]) histo2d_GPM = histo2d_GPM/np.sum(histo2d_GPM) * 100.