def _prepare_datasets_for_evaluation(reference, targets, config_data): """""" subset = config_data['evaluation'].get('subset', None) temporal_time_delta = config_data['evaluation'].get( 'temporal_time_delta', None) spatial_regrid_lats = config_data['evaluation'].get( 'spatial_regrid_lats', None) spatial_regrid_lons = config_data['evaluation'].get( 'spatial_regrid_lons', None) # If we have a temporal time delta and it's daily (i.e., 1) we will # normalize the data as daily data (which means we adjust the start times # for each bucket of data to be consistent). By default we will normalize # the data as monthly. Note that this will not break yearly data so it's # safer to do this no matter what. This keeps us from ending up with 1-off # errors in the resulting dataset shape post-temporal/spatial adjustments # that break evaluations. string_time_delta = 'monthly' if temporal_time_delta and temporal_time_delta == 1: string_time_delta = 'daily' reference = dsp.normalize_dataset_datetimes(reference, string_time_delta) targets = [ dsp.normalize_dataset_datetimes(t, string_time_delta) for t in targets ] if subset: start = dateutil.parser.parse(subset[4]) end = dateutil.parser.parse(subset[5]) bounds = Bounds(subset[0], subset[1], subset[2], subset[3], start, end) if reference: reference = dsp.safe_subset(bounds, reference) if targets: targets = [dsp.safe_subset(bounds, t) for t in targets] if temporal_time_delta: resolution = timedelta(temporal_time_delta) if reference: reference = dsp.temporal_rebin(reference, resolution) if targets: targets = [dsp.temporal_rebin(t, resolution) for t in targets] if spatial_regrid_lats and spatial_regrid_lons: lats = np.arange(spatial_regrid_lats[0], spatial_regrid_lats[1], spatial_regrid_lats[2]) lons = np.arange(spatial_regrid_lons[0], spatial_regrid_lons[1], spatial_regrid_lons[2]) if reference: reference = dsp.spatial_regrid(reference, lats, lons) if targets: targets = [dsp.spatial_regrid(t, lats, lons) for t in targets] return reference, targets
def _prepare_datasets_for_evaluation(reference, targets, config_data): """""" subset = config_data['evaluation'].get('subset', None) temporal_time_delta = config_data['evaluation'].get('temporal_time_delta', None) spatial_regrid_lats = config_data['evaluation'].get('spatial_regrid_lats', None) spatial_regrid_lons = config_data['evaluation'].get('spatial_regrid_lons', None) # If we have a temporal time delta and it's daily (i.e., 1) we will # normalize the data as daily data (which means we adjust the start times # for each bucket of data to be consistent). By default we will normalize # the data as monthly. Note that this will not break yearly data so it's # safer to do this no matter what. This keeps us from ending up with 1-off # errors in the resulting dataset shape post-temporal/spatial adjustments # that break evaluations. string_time_delta = 'monthly' if temporal_time_delta and temporal_time_delta == 1: string_time_delta = 'daily' reference = dsp.normalize_dataset_datetimes(reference, string_time_delta) targets = [dsp.normalize_dataset_datetimes(t, string_time_delta) for t in targets] if subset: start = dateutil.parser.parse(subset[4]) end = dateutil.parser.parse(subset[5]) bounds = Bounds(subset[0], subset[1], subset[2], subset[3], start, end) if reference: reference = dsp.safe_subset(bounds, reference) if targets: targets = [dsp.safe_subset(bounds, t) for t in targets] if temporal_time_delta: resolution = timedelta(temporal_time_delta) if reference: reference = dsp.temporal_rebin(reference, resolution) if targets: targets = [dsp.temporal_rebin(t, resolution) for t in targets] if spatial_regrid_lats and spatial_regrid_lons: lats = np.arange(spatial_regrid_lats[0], spatial_regrid_lats[1], spatial_regrid_lats[2]) lons = np.arange(spatial_regrid_lons[0], spatial_regrid_lons[1], spatial_regrid_lons[2]) if reference: reference = dsp.spatial_regrid(reference, lats, lons) if targets: targets = [dsp.spatial_regrid(t, lats, lons) for t in targets] return reference, targets
def setUp(self): self.input_dataset = ten_year_monthly_dataset() self.new_lats = np.array(range(-89, 90, 4)) self.new_lons = np.array(range(-179, 180, 4)) self.regridded_dataset = dp.spatial_regrid(self.input_dataset, self.new_lats, self.new_lons)
def test_two_dimensional_lats_lons(self): self.input_dataset.lats = np.array(range(-89, 90, 2)) self.input_dataset.lons = np.array(range(-179, 180, 4)) self.input_dataset.lats = self.input_dataset.lats.reshape(2, 45) self.input_dataset.lons = self.input_dataset.lons.reshape(2, 45) new_dataset = dp.spatial_regrid( self.input_dataset, self.new_lats, self.new_lons) np.testing.assert_array_equal(new_dataset.lats, self.new_lats)
def _prepare_datasets_for_evaluation(reference, targets, config_data): """""" subset = config_data['evaluation'].get('subset', None) temporal_time_delta = config_data['evaluation'].get('temporal_time_delta', None) spatial_regrid_lats = config_data['evaluation'].get('spatial_regrid_lats', None) spatial_regrid_lons = config_data['evaluation'].get('spatial_regrid_lons', None) if subset: start = dateutil.parser.parse(subset[4]) end = dateutil.parser.parse(subset[5]) bounds = Bounds(subset[0], subset[1], subset[2], subset[3], start, end) if reference: reference = dsp.safe_subset(bounds, reference) if targets: targets = [dsp.safe_subset(bounds, t) for t in targets] if temporal_time_delta: resolution = timedelta(temporal_time_delta) if reference: reference = dsp.temporal_rebin(reference, resolution) if targets: targets = [dsp.temporal_rebin(t, resolution) for t in targets] if spatial_regrid_lats and spatial_regrid_lons: lats = np.arange(spatial_regrid_lats[0], spatial_regrid_lats[1], spatial_regrid_lats[2]) lons = np.arange(spatial_regrid_lons[0], spatial_regrid_lons[1], spatial_regrid_lons[2]) if reference: reference = dsp.spatial_regrid(reference, lats, lons) if targets: targets = [dsp.spatial_regrid(t, lats, lons) for t in targets] return reference, targets
def run_evaluation(): ''' Run an OCW Evaluation. *run_evaluation* expects the Evaluation parameters to be POSTed in the following format. .. sourcecode:: javascript { reference_dataset: { // Id that tells us how we need to load this dataset. 'data_source_id': 1 == local, 2 == rcmed, // Dict of data_source specific identifying information. // // if data_source_id == 1 == local: // { // 'id': The path to the local file on the server for loading. // 'var_name': The variable data to pull from the file. // 'lat_name': The latitude variable name. // 'lon_name': The longitude variable name. // 'time_name': The time variable name // 'name': Optional dataset name // } // // if data_source_id == 2 == rcmed: // { // 'dataset_id': The dataset id to grab from RCMED. // 'parameter_id': The variable id value used by RCMED. // 'name': Optional dataset name // } 'dataset_info': {..} }, // The list of target datasets to use in the Evaluation. The data // format for the dataset objects should be the same as the // reference_dataset above. 'target_datasets': [{...}, {...}, ...], // All the datasets are re-binned to the reference dataset // before being added to an experiment. This step (in degrees) // is used when re-binning both the reference and target datasets. 'spatial_rebin_lat_step': The lat degree step. Integer > 0, // Same as above, but for lon 'spatial_rebin_lon_step': The lon degree step. Integer > 0, // The temporal resolution to use when doing a temporal re-bin // This is a timedelta of days to use so daily == 1, monthly is // (1, 31], annual/yearly is (31, 366], and full is anything > 366. 'temporal_resolution': Integer in range(1, 999), // A list of the metric class names to use in the evaluation. The // names must match the class name exactly. 'metrics': [Bias, TemporalStdDev, ...] // The bounding values used in the Evaluation. Note that lat values // should range from -180 to 180 and lon values from -90 to 90. 'start_time': start time value in the format '%Y-%m-%d %H:%M:%S', 'end_time': end time value in the format '%Y-%m-%d %H:%M:%S', 'lat_min': The minimum latitude value, 'lat_max': The maximum latitude value, 'lon_min': The minimum longitude value, 'lon_max': The maximum longitude value, // NOTE: At the moment, subregion support is fairly minimal. This // will be addressed in the future. Ideally, the user should be able // to load a file that they have locally. That would change the // format that this data is passed. 'subregion_information': Path to a subregion file on the server. } ''' # TODO: validate input parameters and return an error if not valid eval_time_stamp = datetime.now().strftime('%Y-%m-%d_%H-%M-%S') data = request.json eval_bounds = { 'start_time': datetime.strptime(data['start_time'], '%Y-%m-%d %H:%M:%S'), 'end_time': datetime.strptime(data['end_time'], '%Y-%m-%d %H:%M:%S'), 'lat_min': float(data['lat_min']), 'lat_max': float(data['lat_max']), 'lon_min': float(data['lon_min']), 'lon_max': float(data['lon_max']) } # Load all the datasets ref_dataset = _process_dataset_object(data['reference_dataset'], eval_bounds) target_datasets = [_process_dataset_object(obj, eval_bounds) for obj in data['target_datasets']] # Normalize the dataset time values so they break on consistent days of the # month or time of the day, depending on how they will be rebinned. resolution = data['temporal_resolution'] time_delta = timedelta(days=resolution) time_step = 'daily' if resolution == 1 else 'monthly' ref_dataset = dsp.normalize_dataset_datetimes(ref_dataset, time_step) target_datasets = [dsp.normalize_dataset_datetimes(ds, time_step) for ds in target_datasets] # Subset the datasets start = eval_bounds['start_time'] end = eval_bounds['end_time'] # Normalize all the values to the first of the month if we're not # dealing with daily data. This will ensure that a valid subregion # isn't considered out of bounds do to a dataset's time values # being shifted to the first of the month. if time_step != 'daily': if start.day != 1: day_offset = start.day - 1 start -= timedelta(days=day_offset) if end.day != 1: day_offset = end.day - 1 end -= timedelta(days=day_offset) subset = Bounds(eval_bounds['lat_min'], eval_bounds['lat_max'], eval_bounds['lon_min'], eval_bounds['lon_max'], start, end) ref_dataset = dsp.safe_subset(subset, ref_dataset) target_datasets = [dsp.safe_subset(subset, ds) for ds in target_datasets] # Do temporal re-bin based off of passed resolution ref_dataset = dsp.temporal_rebin(ref_dataset, time_delta) target_datasets = [dsp.temporal_rebin(ds, time_delta) for ds in target_datasets] # Do spatial re=bin based off of reference dataset + lat/lon steps lat_step = data['spatial_rebin_lat_step'] lon_step = data['spatial_rebin_lon_step'] lat_bins, lon_bins = _calculate_new_latlon_bins(eval_bounds, lat_step, lon_step) ref_dataset = dsp.spatial_regrid(ref_dataset, lat_bins, lon_bins) target_datasets = [dsp.spatial_regrid(ds, lat_bins, lon_bins) for ds in target_datasets] # Load metrics loaded_metrics = _load_metrics(data['metrics']) # Prime evaluation object with data evaluation = Evaluation(ref_dataset, target_datasets, loaded_metrics) # Run evaluation evaluation.run() # Plot _generate_evaluation_plots(evaluation, lat_bins, lon_bins, eval_time_stamp) return json.dumps({'eval_work_dir': eval_time_stamp})
ref_dataset = dsp.normalize_dataset_datetimes(ref_dataset, "monthly") target_dataset = dsp.normalize_dataset_datetimes(target_dataset, "monthly") # Subset down the evaluation datasets to our selected evaluation bounds. target_dataset = dsp.subset(EVAL_BOUNDS, target_dataset) ref_dataset = dsp.subset(EVAL_BOUNDS, ref_dataset) # Do a monthly temporal rebin of the evaluation datasets. target_dataset = dsp.temporal_rebin(target_dataset, datetime.timedelta(days=30)) ref_dataset = dsp.temporal_rebin(ref_dataset, datetime.timedelta(days=30)) # Spatially regrid onto a 1 degree lat/lon grid within our evaluation bounds. new_lats = np.arange(LAT_MIN, LAT_MAX, 1.0) new_lons = np.arange(LON_MIN, LON_MAX, 1.0) target_dataset = dsp.spatial_regrid(target_dataset, new_lats, new_lons) ref_dataset = dsp.spatial_regrid(ref_dataset, new_lats, new_lons) # Load the datasets for the evaluation. mean_bias = metrics.MeanBias() # These versions of the metrics require seasonal bounds prior to running # the metrics. You should set these values above in the evaluation # configuration section. spatial_std_dev_ratio = metrics.SeasonalSpatialStdDevRatio( month_start=SEASON_MONTH_START, month_end=SEASON_MONTH_END) pattern_correlation = metrics.SeasonalPatternCorrelation( month_start=SEASON_MONTH_START, month_end=SEASON_MONTH_END) # Create our example evaluation. example_eval = evaluation.Evaluation( ref_dataset, # Reference dataset for the evaluation
def setUp(self): self.input_dataset = ten_year_monthly_dataset() self.new_lats = np.array(range(-89, 90, 4)) self.new_lons = np.array(range(-179, 180, 4)) self.regridded_dataset = dp.spatial_regrid( self.input_dataset, self.new_lats, self.new_lons)
nlon = (max_lon - min_lon)/delta_lon+1 new_lat = np.linspace(min_lat, max_lat, nlat) new_lon = np.linspace(min_lon, max_lon, nlon) # number of models nmodel = len(model_datasets) print 'Dataset loading completed' print 'Observation data:', ref_name print 'Number of model datasets:',nmodel for model_name in model_names: print model_name """ Step 4: Spatial regriding of the reference datasets """ print 'Regridding datasets: ', config['regrid'] if not config['regrid']['regrid_on_reference']: ref_dataset = dsp.spatial_regrid(ref_dataset, new_lat, new_lon) print 'Reference dataset has been regridded' for idata,dataset in enumerate(model_datasets): model_datasets[idata] = dsp.spatial_regrid(dataset, new_lat, new_lon, boundary_check = boundary_check_model) print model_names[idata]+' has been regridded' print 'Propagating missing data information' ref_dataset = dsp.mask_missing_data([ref_dataset]+model_datasets)[0] model_datasets = dsp.mask_missing_data([ref_dataset]+model_datasets)[1:] """ Step 5: Checking and converting variable units """ print 'Checking and converting variable units' ref_dataset = dsp.variable_unit_conversion(ref_dataset) for idata,dataset in enumerate(model_datasets): model_datasets[idata] = dsp.variable_unit_conversion(dataset)
# Temporally re-bin the data into a monthly timestep. ################################################################################ knmi_dataset = dsp.temporal_rebin(knmi_dataset, datetime.timedelta(days=30)) wrf_dataset = dsp.temporal_rebin(wrf_dataset, datetime.timedelta(days=30)) # Spatially regrid the datasets onto a 1 degree grid. ################################################################################ # Get the bounds of the reference dataset and use it to create a new # set of lat/lon values on a 1 degree step # Using the bounds we will create a new set of lats and lons on 1 degree step min_lat, max_lat, min_lon, max_lon = knmi_dataset.spatial_boundaries() new_lons = numpy.arange(min_lon, max_lon, 1) new_lats = numpy.arange(min_lat, max_lat, 1) # Spatially regrid datasets using the new_lats, new_lons numpy arrays knmi_dataset = dsp.spatial_regrid(knmi_dataset, new_lats, new_lons) wrf_dataset = dsp.spatial_regrid(wrf_dataset, new_lats, new_lons) # Load the metrics that we want to use for the evaluation. ################################################################################ taylor_diagram = metrics.SpatialPatternTaylorDiagram() # Create our new evaluation object. The knmi dataset is the evaluations # reference dataset. We then provide a list of 1 or more target datasets # to use for the evaluation. In this case, we only want to use the wrf dataset. # Then we pass a list of all the metrics that we want to use in the evaluation. ################################################################################ test_evaluation = evaluation.Evaluation(knmi_dataset, [wrf_dataset], [taylor_diagram]) test_evaluation.run() # Pull our the evaluation results and prepare them for drawing a Taylor diagram.
# 15 Jan 2014, 15 Feb 2014 => 1 Jan 2014, 1 Feb 2014 ref_dataset = dsp.normalize_dataset_datetimes(ref_dataset, "monthly") target_dataset = dsp.normalize_dataset_datetimes(target_dataset, "monthly") # Subset down the evaluation datasets to our selected evaluation bounds. target_dataset = dsp.subset(EVAL_BOUNDS, target_dataset) ref_dataset = dsp.subset(EVAL_BOUNDS, ref_dataset) # Do a monthly temporal rebin of the evaluation datasets. target_dataset = dsp.temporal_rebin(target_dataset, datetime.timedelta(days=30)) ref_dataset = dsp.temporal_rebin(ref_dataset, datetime.timedelta(days=30)) # Spatially regrid onto a 1 degree lat/lon grid within our evaluation bounds. new_lats = np.arange(LAT_MIN, LAT_MAX, 1.0) new_lons = np.arange(LON_MIN, LON_MAX, 1.0) target_dataset = dsp.spatial_regrid(target_dataset, new_lats, new_lons) ref_dataset = dsp.spatial_regrid(ref_dataset, new_lats, new_lons) # Load the datasets for the evaluation. mean_bias = metrics.MeanBias() # These versions of the metrics require seasonal bounds prior to running # the metrics. You should set these values above in the evaluation # configuration section. spatial_std_dev_ratio = metrics.SeasonalSpatialStdDevRatio(month_start=SEASON_MONTH_START, month_end=SEASON_MONTH_END) pattern_correlation = metrics.SeasonalPatternCorrelation(month_start=SEASON_MONTH_START, month_end=SEASON_MONTH_END) # Create our example evaluation. example_eval = evaluation.Evaluation(ref_dataset, # Reference dataset for the evaluation # 1 or more target datasets for the evaluation [target_dataset], # 1 ore more metrics to use in the evaluation
print("Processing datasets ...") CRU31 = dsp.normalize_dataset_datetimes(CRU31, 'monthly') print("... on units") CRU31 = dsp.water_flux_unit_conversion(CRU31) for member, each_target_dataset in enumerate(target_datasets): target_datasets[member] = dsp.subset(target_datasets[member], EVAL_BOUNDS) target_datasets[member] = dsp.water_flux_unit_conversion( target_datasets[member]) target_datasets[member] = dsp.normalize_dataset_datetimes( target_datasets[member], 'monthly') print("... spatial regridding") new_lats = np.arange(LAT_MIN, LAT_MAX, gridLatStep) new_lons = np.arange(LON_MIN, LON_MAX, gridLonStep) CRU31 = dsp.spatial_regrid(CRU31, new_lats, new_lons) for member, each_target_dataset in enumerate(target_datasets): target_datasets[member] = dsp.spatial_regrid(target_datasets[member], new_lats, new_lons) # find the total annual mean. Note the function exists in util.py as def # calc_climatology_year(dataset): _, CRU31.values = utils.calc_climatology_year(CRU31) for member, each_target_dataset in enumerate(target_datasets): _, target_datasets[member].values = utils.calc_climatology_year( target_datasets[member]) # make the model ensemble target_datasets_ensemble = dsp.ensemble(target_datasets)
def run_evaluation(): ''' Run an OCW Evaluation. *run_evaluation* expects the Evaluation parameters to be POSTed in the following format. .. sourcecode:: javascript { reference_dataset: { // Id that tells us how we need to load this dataset. 'data_source_id': 1 == local, 2 == rcmed, // Dict of data_source specific identifying information. // // if data_source_id == 1 == local: // { // 'id': The path to the local file on the server for loading. // 'var_name': The variable data to pull from the file. // 'lat_name': The latitude variable name. // 'lon_name': The longitude variable name. // 'time_name': The time variable name // 'name': Optional dataset name // } // // if data_source_id == 2 == rcmed: // { // 'dataset_id': The dataset id to grab from RCMED. // 'parameter_id': The variable id value used by RCMED. // 'name': Optional dataset name // } 'dataset_info': {..} }, // The list of target datasets to use in the Evaluation. The data // format for the dataset objects should be the same as the // reference_dataset above. 'target_datasets': [{...}, {...}, ...], // All the datasets are re-binned to the reference dataset // before being added to an experiment. This step (in degrees) // is used when re-binning both the reference and target datasets. 'spatial_rebin_lat_step': The lat degree step. Integer > 0, // Same as above, but for lon 'spatial_rebin_lon_step': The lon degree step. Integer > 0, // The temporal resolution to use when doing a temporal re-bin // This is a timedelta of days to use so daily == 1, monthly is // (1, 31], annual/yearly is (31, 366], and full is anything > 366. 'temporal_resolution': Integer in range(1, 999), // A list of the metric class names to use in the evaluation. The // names must match the class name exactly. 'metrics': [Bias, TemporalStdDev, ...] // The bounding values used in the Evaluation. Note that lat values // should range from -180 to 180 and lon values from -90 to 90. 'start_time': start time value in the format '%Y-%m-%d %H:%M:%S', 'end_time': end time value in the format '%Y-%m-%d %H:%M:%S', 'lat_min': The minimum latitude value, 'lat_max': The maximum latitude value, 'lon_min': The minimum longitude value, 'lon_max': The maximum longitude value, // NOTE: At the moment, subregion support is fairly minimal. This // will be addressed in the future. Ideally, the user should be able // to load a file that they have locally. That would change the // format that this data is passed. 'subregion_information': Path to a subregion file on the server. } ''' # TODO: validate input parameters and return an error if not valid eval_time_stamp = datetime.now().strftime('%Y-%m-%d_%H-%M-%S') data = request.json eval_bounds = { 'start_time': datetime.strptime(data['start_time'], '%Y-%m-%d %H:%M:%S'), 'end_time': datetime.strptime(data['end_time'], '%Y-%m-%d %H:%M:%S'), 'lat_min': float(data['lat_min']), 'lat_max': float(data['lat_max']), 'lon_min': float(data['lon_min']), 'lon_max': float(data['lon_max']) } # Load all the datasets ref_dataset = _process_dataset_object(data['reference_dataset'], eval_bounds) target_datasets = [_process_dataset_object(obj, eval_bounds) for obj in data['target_datasets']] # Normalize the dataset time values so they break on consistent days of the # month or time of the day, depending on how they will be rebinned. resolution = data['temporal_resolution'] time_delta = timedelta(days=resolution) time_step = 'daily' if resolution == 1 else 'monthly' ref_dataset = dsp.normalize_dataset_datetimes(ref_dataset, time_step) target_datasets = [dsp.normalize_dataset_datetimes(ds, time_step) for ds in target_datasets] # Subset the datasets start = eval_bounds['start_time'] end = eval_bounds['end_time'] # Normalize all the values to the first of the month if we're not # dealing with daily data. This will ensure that a valid subregion # isn't considered out of bounds do to a dataset's time values # being shifted to the first of the month. if time_step != 'daily': if start.day != 1: day_offset = start.day - 1 start -= timedelta(days=day_offset) if end.day != 1: day_offset = end.day - 1 end -= timedelta(days=day_offset) subset = Bounds(eval_bounds['lat_min'], eval_bounds['lat_max'], eval_bounds['lon_min'], eval_bounds['lon_max'], start, end) ref_dataset = dsp.safe_subset(ref_dataset, subset) target_datasets = [dsp.safe_subset(ds, subset) for ds in target_datasets] # Do temporal re-bin based off of passed resolution ref_dataset = dsp.temporal_rebin(ref_dataset, time_delta) target_datasets = [dsp.temporal_rebin(ds, time_delta) for ds in target_datasets] # Do spatial re=bin based off of reference dataset + lat/lon steps lat_step = data['spatial_rebin_lat_step'] lon_step = data['spatial_rebin_lon_step'] lat_bins, lon_bins = _calculate_new_latlon_bins(eval_bounds, lat_step, lon_step) ref_dataset = dsp.spatial_regrid(ref_dataset, lat_bins, lon_bins) target_datasets = [dsp.spatial_regrid(ds, lat_bins, lon_bins) for ds in target_datasets] # Load metrics loaded_metrics = _load_metrics(data['metrics']) # Prime evaluation object with data evaluation = Evaluation(ref_dataset, target_datasets, loaded_metrics) # Run evaluation evaluation.run() # Plot _generate_evaluation_plots(evaluation, lat_bins, lon_bins, eval_time_stamp) return json.dumps({'eval_work_dir': eval_time_stamp})
print("Temporally Rebinning the Datasets to a Single Timestep") # To run FULL temporal Rebinning knmi_dataset = dsp.temporal_rebin(knmi_dataset, temporal_resolution='full') cru31_dataset = dsp.temporal_rebin(cru31_dataset, temporal_resolution='full') print("KNMI_Dataset.values shape: %s" % (knmi_dataset.values.shape, )) print("CRU31_Dataset.values shape: %s \n\n" % (cru31_dataset.values.shape, )) """ Spatially Regrid the Dataset Objects to a 1/2 degree grid """ # Using the bounds we will create a new set of lats and lons on 0.5 degree step new_lons = np.arange(min_lon, max_lon, 0.5) new_lats = np.arange(min_lat, max_lat, 0.5) # Spatially regrid datasets using the new_lats, new_lons numpy arrays print("Spatially Regridding the KNMI_Dataset...") knmi_dataset = dsp.spatial_regrid(knmi_dataset, new_lats, new_lons) print("Spatially Regridding the CRU31_Dataset...") cru31_dataset = dsp.spatial_regrid(cru31_dataset, new_lats, new_lons) print("Final shape of the KNMI_Dataset:%s" % (knmi_dataset.values.shape, )) print("Final shape of the CRU31_Dataset:%s" % (cru31_dataset.values.shape, )) """ Step 4: Build a Metric to use for Evaluation - Bias for this example """ # You can build your own metrics, but OCW also ships with some common metrics print("Setting up a Bias metric to use for evaluation") bias = metrics.Bias() """ Step 5: Create an Evaluation Object using Datasets and our Metric """ # The Evaluation Class Signature is: # Evaluation(reference, targets, metrics, subregions=None) # Evaluation can take in multiple targets and metrics, so we need to convert # our examples into Python lists. Evaluation will iterate over the lists print("Making the Evaluation definition") bias_evaluation = evaluation.Evaluation(knmi_dataset, [cru31_dataset], [bias])
def spatialRegrid(dataset, lats, lons): '''Spatially regrid dataset variable to a new grid with specified resolution, where lats & lons are the new coordinate vectors. ''' return dsp.spatial_regrid(dataset, lats, lons)
# since the main intent of this program is to evaluate RCMs. However, it can be # used for GCMs in which case it should be set to False to save time. boundary_check = config['regrid'].get('boundary_check', True) # number of target datasets (usually models, but can also be obs / reanalysis) ntarget = len(target_datasets) print('Dataset loading completed') print('Reference data: {}'.format(reference_name)) print('Number of target datasets: {}'.format(ntarget)) for target_name in target_names: print(target_name) """ Step 3: Spatial regriding of the datasets """ print('Regridding datasets: {}'.format(config['regrid'])) if not config['regrid']['regrid_on_reference']: reference_dataset = dsp.spatial_regrid(reference_dataset, new_lat, new_lon) print('Reference dataset has been regridded') for i, dataset in enumerate(target_datasets): target_datasets[i] = dsp.spatial_regrid(dataset, new_lat, new_lon, boundary_check=boundary_check) print('{} has been regridded'.format(target_names[i])) print('Propagating missing data information') datasets = dsp.mask_missing_data([reference_dataset]+target_datasets) reference_dataset = datasets[0] target_datasets = datasets[1:] """ Step 4: Checking and converting variable units """ print('Checking and converting variable units') reference_dataset = dsp.variable_unit_conversion(reference_dataset) for i, dataset in enumerate(target_datasets): target_datasets[i] = dsp.variable_unit_conversion(dataset)
nlat = (max_lat - min_lat) / delta_lat + 1 nlon = (max_lon - min_lon) / delta_lon + 1 new_lat = np.linspace(min_lat, max_lat, nlat) new_lon = np.linspace(min_lon, max_lon, nlon) # number of models nmodel = len(model_datasets) print 'Dataset loading completed' print 'Observation data:', ref_name print 'Number of model datasets:', nmodel for model_name in model_names: print model_name """ Step 4: Spatial regriding of the reference datasets """ print 'Regridding datasets: ', config['regrid'] if not config['regrid']['regrid_on_reference']: ref_dataset = dsp.spatial_regrid(ref_dataset, new_lat, new_lon) for idata, dataset in enumerate(model_datasets): model_datasets[idata] = dsp.spatial_regrid(dataset, new_lat, new_lon) print 'Propagating missing data information' ref_dataset = dsp.mask_missing_data([ref_dataset] + model_datasets)[0] model_datasets = dsp.mask_missing_data([ref_dataset] + model_datasets)[1:] """ Step 5: Checking and converting variable units """ print 'Checking and converting variable units' ref_dataset = dsp.variable_unit_conversion(ref_dataset) for idata, dataset in enumerate(model_datasets): model_datasets[idata] = dsp.variable_unit_conversion(dataset) print 'Generating multi-model ensemble' if len(model_datasets) >= 2.: model_datasets.append(dsp.ensemble(model_datasets))
nlon = (max_lon - min_lon)/delta_lon+1 new_lat = np.linspace(min_lat, max_lat, nlat) new_lon = np.linspace(min_lon, max_lon, nlon) # number of models nmodel = len(model_datasets) print 'Dataset loading completed' print 'Observation data:', ref_name print 'Number of model datasets:',nmodel for model_name in model_names: print model_name """ Step 4: Spatial regriding of the reference datasets """ print 'Regridding datasets: ', config['regrid'] if not config['regrid']['regrid_on_reference']: ref_dataset = dsp.spatial_regrid(ref_dataset, new_lat, new_lon) print 'Reference dataset has been regridded' for idata,dataset in enumerate(model_datasets): model_datasets[idata] = dsp.spatial_regrid(dataset, new_lat, new_lon) print model_names[idata]+' has been regridded' print 'Propagating missing data information' ref_dataset = dsp.mask_missing_data([ref_dataset]+model_datasets)[0] model_datasets = dsp.mask_missing_data([ref_dataset]+model_datasets)[1:] """ Step 5: Checking and converting variable units """ print 'Checking and converting variable units' ref_dataset = dsp.variable_unit_conversion(ref_dataset) for idata,dataset in enumerate(model_datasets): model_datasets[idata] = dsp.variable_unit_conversion(dataset)
# Get flag for boundary checking for regridding. By default, this is set to True # since the main intent of this program is to evaluate RCMs. However, it can be # used for GCMs in which case it should be set to False to save time. boundary_check = config['regrid'].get('boundary_check', True) # number of target datasets (usually models, but can also be obs / reanalysis) ntarget = len(target_datasets) print('Dataset loading completed') print('Reference data: {}'.format(reference_name)) print('Number of target datasets: {}'.format(ntarget)) for target_name in target_names: print(target_name) """ Step 3: Spatial regriding of the datasets """ print('Regridding datasets: {}'.format(config['regrid'])) if not config['regrid']['regrid_on_reference']: reference_dataset = dsp.spatial_regrid(reference_dataset, new_lat, new_lon) print('Reference dataset has been regridded') for i, dataset in enumerate(target_datasets): target_datasets[i] = dsp.spatial_regrid(dataset, new_lat, new_lon, boundary_check=boundary_check) print('{} has been regridded'.format(target_names[i])) print('Propagating missing data information') datasets = dsp.mask_missing_data([reference_dataset] + target_datasets) reference_dataset = datasets[0] target_datasets = datasets[1:] """ Step 4: Checking and converting variable units """ print('Checking and converting variable units') reference_dataset = dsp.variable_unit_conversion(reference_dataset) for i, dataset in enumerate(target_datasets):
nlat = (max_lat - min_lat) / delta_lat + 1 nlon = (max_lon - min_lon) / delta_lon + 1 new_lat = np.linspace(min_lat, max_lat, nlat) new_lon = np.linspace(min_lon, max_lon, nlon) # number of models nmodel = len(model_datasets) print 'Dataset loading completed' print 'Observation data:', obs_name print 'Number of model datasets:', nmodel for model_name in model_names: print model_name """ Step 4: Spatial regriding of the reference datasets """ print 'Regridding datasets: ', config['regrid'] if not config['regrid']['regrid_on_reference']: obs_dataset = dsp.spatial_regrid(obs_dataset, new_lat, new_lon) print 'Reference dataset has been regridded' for i, dataset in enumerate(model_datasets): model_datasets[i] = dsp.spatial_regrid(dataset, new_lat, new_lon, boundary_check=boundary_check) print model_names[i] + ' has been regridded' print 'Propagating missing data information' obs_dataset = dsp.mask_missing_data([obs_dataset] + model_datasets)[0] model_datasets = dsp.mask_missing_data([obs_dataset] + model_datasets)[1:] """ Step 5: Checking and converting variable units """ print 'Checking and converting variable units' obs_dataset = dsp.variable_unit_conversion(obs_dataset) for idata, dataset in enumerate(model_datasets): model_datasets[idata] = dsp.variable_unit_conversion(dataset)
datasets = local.load_multiple_files(paths, 'clt') with time_block(ocw_results, 'Domain Subsetting'): for i, ds in enumerate(datasets): datasets[i] = dsp.subset(ds, bnds) with time_block(ocw_results, 'Seasonal Subsetting'): for i, ds in enumerate(datasets): datasets[i] = dsp.temporal_subset(ds, 9, 11) with time_block(ocw_results, 'Resampling'): for i, ds in enumerate(datasets): datasets[i] = dsp.temporal_rebin(ds, 'annual') with time_block(ocw_results, 'Regridding'): for i, ds in enumerate(datasets): datasets[i] = dsp.spatial_regrid(ds, new_lats, new_lons) print(f'OCW Results: {ocw_results}') # Plot results matplotlib.style.use('ggplot') df = pd.DataFrame({'OCW': ocw_results, 'BCDP': bcdp_results}) df.plot.bar(logy=True, rot=12) for p in ax.patches: val = np.round(p.get_height(), decimals=2) ax.annotate(str(val), (p.get_x() + .02, p.get_height()), size=9.5) plt.ylabel('Running Time [s]') plt.savefig('bcdp_ocw_benchmarks.png')
# Running Temporal Rebin early helps negate the issue of datasets being on different # days of the month (1st vs. 15th) # Create a Bounds object to use for subsetting new_bounds = Bounds(min_lat, max_lat, min_lon, max_lon, start_time, end_time) # Subset our model datasets so they are the same size knmi_dataset = dsp.subset(knmi_dataset, new_bounds) wrf311_dataset = dsp.subset(wrf311_dataset, new_bounds) """ Spatially Regrid the Dataset Objects to a 1/2 degree grid """ # Using the bounds we will create a new set of lats and lons on 1/2 degree step new_lons = np.arange(min_lon, max_lon, 0.5) new_lats = np.arange(min_lat, max_lat, 0.5) # Spatially regrid datasets using the new_lats, new_lons numpy arrays knmi_dataset = dsp.spatial_regrid(knmi_dataset, new_lats, new_lons) wrf311_dataset = dsp.spatial_regrid(wrf311_dataset, new_lats, new_lons) cru31_dataset = dsp.spatial_regrid(cru31_dataset, new_lats, new_lons) # Generate an ensemble dataset from knmi and wrf models ensemble_dataset = dsp.ensemble([knmi_dataset, wrf311_dataset]) """ Step 4: Build a Metric to use for Evaluation - Bias for this example """ print("Setting up a Bias metric to use for evaluation") bias = metrics.Bias() """ Step 5: Create an Evaluation Object using Datasets and our Metric """ # The Evaluation Class Signature is: # Evaluation(reference, targets, metrics, subregions=None) # Evaluation can take in multiple targets and metrics, so we need to convert # our examples into Python lists. Evaluation will iterate over the lists
""" Step 3: Processing datasets so they are the same shape ... """ print("Processing datasets so they are the same shape") CRU31 = dsp.water_flux_unit_conversion(CRU31) CRU31 = dsp.normalize_dataset_datetimes(CRU31, 'monthly') for member, each_target_dataset in enumerate(target_datasets): target_datasets[member] = dsp.subset(target_datasets[member], EVAL_BOUNDS) target_datasets[member] = dsp.water_flux_unit_conversion(target_datasets[ member]) target_datasets[member] = dsp.normalize_dataset_datetimes( target_datasets[member], 'monthly') print("... spatial regridding") new_lats = np.arange(LAT_MIN, LAT_MAX, gridLatStep) new_lons = np.arange(LON_MIN, LON_MAX, gridLonStep) CRU31 = dsp.spatial_regrid(CRU31, new_lats, new_lons) for member, each_target_dataset in enumerate(target_datasets): target_datasets[member] = dsp.spatial_regrid( target_datasets[member], new_lats, new_lons) # find climatology monthly for obs and models CRU31.values, CRU31.times = utils.calc_climatology_monthly(CRU31) for member, each_target_dataset in enumerate(target_datasets): target_datasets[member].values, target_datasets[ member].times = utils.calc_climatology_monthly(target_datasets[member]) # make the model ensemble target_datasets_ensemble = dsp.ensemble(target_datasets)
# Temporally re-bin the data into a monthly timestep. ########################################################################## knmi_dataset = dsp.temporal_rebin(knmi_dataset, temporal_resolution='monthly') wrf_dataset = dsp.temporal_rebin(wrf_dataset, temporal_resolution='monthly') # Spatially regrid the datasets onto a 1 degree grid. ########################################################################## # Get the bounds of the reference dataset and use it to create a new # set of lat/lon values on a 1 degree step # Using the bounds we will create a new set of lats and lons on 1 degree step min_lat, max_lat, min_lon, max_lon = knmi_dataset.spatial_boundaries() new_lons = numpy.arange(min_lon, max_lon, 1) new_lats = numpy.arange(min_lat, max_lat, 1) # Spatially regrid datasets using the new_lats, new_lons numpy arrays knmi_dataset = dsp.spatial_regrid(knmi_dataset, new_lats, new_lons) wrf_dataset = dsp.spatial_regrid(wrf_dataset, new_lats, new_lons) # Load the metrics that we want to use for the evaluation. ########################################################################## taylor_diagram = metrics.SpatialPatternTaylorDiagram() # Create our new evaluation object. The knmi dataset is the evaluations # reference dataset. We then provide a list of 1 or more target datasets # to use for the evaluation. In this case, we only want to use the wrf dataset. # Then we pass a list of all the metrics that we want to use in the evaluation. ########################################################################## test_evaluation = evaluation.Evaluation(knmi_dataset, [wrf_dataset], [taylor_diagram]) test_evaluation.run()
def run_screen(model_datasets, models_info, observations_info, overlap_start_time, overlap_end_time, overlap_min_lat, overlap_max_lat, overlap_min_lon, overlap_max_lon, temp_grid_setting, spatial_grid_setting, working_directory, plot_title): '''Generates screen to show running evaluation process. :param model_datasets: list of model dataset objects :type model_datasets: list :param models_info: list of dictionaries that contain information for each model :type models_info: list :param observations_info: list of dictionaries that contain information for each observation :type observations_info: list :param overlap_start_time: overlap start time between model and obs start time :type overlap_start_time: datetime :param overlap_end_time: overlap end time between model and obs end time :type overlap_end_time: float :param overlap_min_lat: overlap minimum lat between model and obs minimum lat :type overlap_min_lat: float :param overlap_max_lat: overlap maximum lat between model and obs maximum lat :type overlap_max_lat: float :param overlap_min_lon: overlap minimum lon between model and obs minimum lon :type overlap_min_lon: float :param overlap_max_lon: overlap maximum lon between model and obs maximum lon :type overlap_max_lon: float :param temp_grid_setting: temporal grid option such as hourly, daily, monthly and annually :type temp_grid_setting: string :param spatial_grid_setting: :type spatial_grid_setting: string :param working_directory: path to a directory for storring outputs :type working_directory: string :param plot_title: Title for plot :type plot_title: string ''' option = None if option != "0": ready_screen("manage_obs_screen") y = screen.getmaxyx()[0] screen.addstr(2, 2, "Evaluation started....") screen.refresh() OUTPUT_PLOT = "plot" dataset_id = int(observations_info[0]['dataset_id']) #just accepts one dataset at this time parameter_id = int(observations_info[0]['parameter_id']) #just accepts one dataset at this time new_bounds = Bounds(overlap_min_lat, overlap_max_lat, overlap_min_lon, overlap_max_lon, overlap_start_time, overlap_end_time) model_dataset = dsp.subset(new_bounds, model_datasets[0]) #just accepts one model at this time #Getting bound info of subseted model file to retrive obs data with same bound as subseted model new_model_spatial_bounds = model_dataset.spatial_boundaries() new_model_temp_bounds = model_dataset.time_range() new_min_lat = new_model_spatial_bounds[0] new_max_lat = new_model_spatial_bounds[1] new_min_lon = new_model_spatial_bounds[2] new_max_lon = new_model_spatial_bounds[3] new_start_time = new_model_temp_bounds[0] new_end_time = new_model_temp_bounds[1] screen.addstr(4, 4, "Retrieving data...") screen.refresh() #Retrieve obs data obs_dataset = rcmed.parameter_dataset( dataset_id, parameter_id, new_min_lat, new_max_lat, new_min_lon, new_max_lon, new_start_time, new_end_time) screen.addstr(4, 4, "--> Data retrieved.") screen.refresh() screen.addstr(5, 4, "Temporally regridding...") screen.refresh() if temp_grid_setting.lower() == 'hourly': days = 0.5 elif temp_grid_setting.lower() == 'daily': days = 1 elif temp_grid_setting.lower() == 'monthly': days = 31 else: days = 365 model_dataset = dsp.temporal_rebin(model_dataset, timedelta(days)) obs_dataset = dsp.temporal_rebin(obs_dataset, timedelta(days)) screen.addstr(5, 4, "--> Temporally regridded.") screen.refresh() new_lats = np.arange(new_min_lat, new_max_lat, spatial_grid_setting) new_lons = np.arange(new_min_lon, new_max_lon, spatial_grid_setting) screen.addstr(6, 4, "Spatially regridding...") screen.refresh() spatial_gridded_model = dsp.spatial_regrid(model_dataset, new_lats, new_lons) spatial_gridded_obs = dsp.spatial_regrid(obs_dataset, new_lats, new_lons) screen.addstr(6, 4, "--> Spatially regridded.") screen.refresh() screen.addstr(7, 4, "Setting up metrics...") screen.refresh() bias = metrics.Bias() bias_evaluation = evaluation.Evaluation(spatial_gridded_model, [spatial_gridded_obs], [bias]) screen.addstr(7, 4, "--> Metrics setting done.") screen.refresh() screen.addstr(8, 4, "Running evaluation.....") screen.refresh() bias_evaluation.run() results = bias_evaluation.results[0][0] screen.addstr(8, 4, "--> Evaluation Finished.") screen.refresh() screen.addstr(9, 4, "Generating plots....") screen.refresh() lats = new_lats lons = new_lons gridshape = (1, 1) sub_titles = [""] #No subtitle set for now if not os.path.exists(working_directory): os.makedirs(working_directory) for i in range(len(results)): fname = working_directory + OUTPUT_PLOT + str(i) plotter.draw_contour_map(results[i], lats, lons, fname, gridshape=gridshape, ptitle=plot_title, subtitles=sub_titles) screen.addstr(9, 4, "--> Plots generated.") screen.refresh() screen.addstr(y-2, 1, "Press 'enter' to Exit: ") option = screen.getstr()