def _prepare_datasets_for_evaluation(reference, targets, config_data): """""" subset = config_data['evaluation'].get('subset', None) temporal_time_delta = config_data['evaluation'].get( 'temporal_time_delta', None) spatial_regrid_lats = config_data['evaluation'].get( 'spatial_regrid_lats', None) spatial_regrid_lons = config_data['evaluation'].get( 'spatial_regrid_lons', None) # If we have a temporal time delta and it's daily (i.e., 1) we will # normalize the data as daily data (which means we adjust the start times # for each bucket of data to be consistent). By default we will normalize # the data as monthly. Note that this will not break yearly data so it's # safer to do this no matter what. This keeps us from ending up with 1-off # errors in the resulting dataset shape post-temporal/spatial adjustments # that break evaluations. string_time_delta = 'monthly' if temporal_time_delta and temporal_time_delta == 1: string_time_delta = 'daily' reference = dsp.normalize_dataset_datetimes(reference, string_time_delta) targets = [ dsp.normalize_dataset_datetimes(t, string_time_delta) for t in targets ] if subset: start = dateutil.parser.parse(subset[4]) end = dateutil.parser.parse(subset[5]) bounds = Bounds(subset[0], subset[1], subset[2], subset[3], start, end) if reference: reference = dsp.safe_subset(bounds, reference) if targets: targets = [dsp.safe_subset(bounds, t) for t in targets] if temporal_time_delta: resolution = timedelta(temporal_time_delta) if reference: reference = dsp.temporal_rebin(reference, resolution) if targets: targets = [dsp.temporal_rebin(t, resolution) for t in targets] if spatial_regrid_lats and spatial_regrid_lons: lats = np.arange(spatial_regrid_lats[0], spatial_regrid_lats[1], spatial_regrid_lats[2]) lons = np.arange(spatial_regrid_lons[0], spatial_regrid_lons[1], spatial_regrid_lons[2]) if reference: reference = dsp.spatial_regrid(reference, lats, lons) if targets: targets = [dsp.spatial_regrid(t, lats, lons) for t in targets] return reference, targets
def _prepare_datasets_for_evaluation(reference, targets, config_data): """""" subset = config_data['evaluation'].get('subset', None) temporal_time_delta = config_data['evaluation'].get('temporal_time_delta', None) spatial_regrid_lats = config_data['evaluation'].get('spatial_regrid_lats', None) spatial_regrid_lons = config_data['evaluation'].get('spatial_regrid_lons', None) # If we have a temporal time delta and it's daily (i.e., 1) we will # normalize the data as daily data (which means we adjust the start times # for each bucket of data to be consistent). By default we will normalize # the data as monthly. Note that this will not break yearly data so it's # safer to do this no matter what. This keeps us from ending up with 1-off # errors in the resulting dataset shape post-temporal/spatial adjustments # that break evaluations. string_time_delta = 'monthly' if temporal_time_delta and temporal_time_delta == 1: string_time_delta = 'daily' reference = dsp.normalize_dataset_datetimes(reference, string_time_delta) targets = [dsp.normalize_dataset_datetimes(t, string_time_delta) for t in targets] if subset: start = dateutil.parser.parse(subset[4]) end = dateutil.parser.parse(subset[5]) bounds = Bounds(subset[0], subset[1], subset[2], subset[3], start, end) if reference: reference = dsp.safe_subset(bounds, reference) if targets: targets = [dsp.safe_subset(bounds, t) for t in targets] if temporal_time_delta: resolution = timedelta(temporal_time_delta) if reference: reference = dsp.temporal_rebin(reference, resolution) if targets: targets = [dsp.temporal_rebin(t, resolution) for t in targets] if spatial_regrid_lats and spatial_regrid_lons: lats = np.arange(spatial_regrid_lats[0], spatial_regrid_lats[1], spatial_regrid_lats[2]) lons = np.arange(spatial_regrid_lons[0], spatial_regrid_lons[1], spatial_regrid_lons[2]) if reference: reference = dsp.spatial_regrid(reference, lats, lons) if targets: targets = [dsp.spatial_regrid(t, lats, lons) for t in targets] return reference, targets
def test_partial_spatial_overlap(self): '''Ensure that safe_subset can handle out of bounds spatial values''' ds = dp.safe_subset(self.target_dataset, self.spatial_out_of_bounds) spatial_bounds = ds.spatial_boundaries() self.assertEquals(spatial_bounds[0], -60) self.assertEquals(spatial_bounds[1], 60) self.assertEquals(spatial_bounds[2], -170) self.assertEquals(spatial_bounds[3], 170)
def test_partial_spatial_overlap(self): '''Ensure that safe_subset can handle out of bounds spatial values''' ds = dp.safe_subset(self.spatial_out_of_bounds, self.target_dataset) spatial_bounds = ds.spatial_boundaries() self.assertEquals(spatial_bounds[0], -60) self.assertEquals(spatial_bounds[1], 60) self.assertEquals(spatial_bounds[2], -170) self.assertEquals(spatial_bounds[3], 170)
def test_partial_temporal_overlap(self): '''Ensure that safe_subset can handle out of bounds temporal values''' ds = dp.safe_subset(self.target_dataset, self.temporal_out_of_bounds) temporal_bounds = ds.time_range() start = datetime.datetime(2000, 1, 1) end = datetime.datetime(2009, 12, 1) self.assertEquals(temporal_bounds[0], start) self.assertEquals(temporal_bounds[1], end)
def test_partial_temporal_overlap(self): '''Ensure that safe_subset can handle out of bounds temporal values''' ds = dp.safe_subset(self.temporal_out_of_bounds, self.target_dataset) temporal_bounds = ds.time_range() start = datetime.datetime(2000, 1, 1) end = datetime.datetime(2009, 12, 1) self.assertEquals(temporal_bounds[0], start) self.assertEquals(temporal_bounds[1], end)
def test_entire_bounds_overlap(self): ds = dp.safe_subset(self.target_dataset, self.everything_out_of_bounds) spatial_bounds = ds.spatial_boundaries() temporal_bounds = ds.temporal_boundaries() start = datetime.datetime(2000, 1, 1) end = datetime.datetime(2009, 12, 1) self.assertEquals(spatial_bounds[0], -60) self.assertEquals(spatial_bounds[1], 60) self.assertEquals(spatial_bounds[2], -170) self.assertEquals(spatial_bounds[3], 170) self.assertEquals(temporal_bounds[0], start) self.assertEquals(temporal_bounds[1], end)
def test_entire_bounds_overlap(self): ds = dp.safe_subset(self.everything_out_of_bounds, self.target_dataset) spatial_bounds = ds.spatial_boundaries() temporal_bounds = ds.time_range() start = datetime.datetime(2000, 1, 1) end = datetime.datetime(2009, 12, 1) self.assertEquals(spatial_bounds[0], -60) self.assertEquals(spatial_bounds[1], 60) self.assertEquals(spatial_bounds[2], -170) self.assertEquals(spatial_bounds[3], 170) self.assertEquals(temporal_bounds[0], start) self.assertEquals(temporal_bounds[1], end)
def _prepare_datasets_for_evaluation(reference, targets, config_data): """""" subset = config_data['evaluation'].get('subset', None) temporal_time_delta = config_data['evaluation'].get('temporal_time_delta', None) spatial_regrid_lats = config_data['evaluation'].get('spatial_regrid_lats', None) spatial_regrid_lons = config_data['evaluation'].get('spatial_regrid_lons', None) if subset: start = dateutil.parser.parse(subset[4]) end = dateutil.parser.parse(subset[5]) bounds = Bounds(subset[0], subset[1], subset[2], subset[3], start, end) if reference: reference = dsp.safe_subset(bounds, reference) if targets: targets = [dsp.safe_subset(bounds, t) for t in targets] if temporal_time_delta: resolution = timedelta(temporal_time_delta) if reference: reference = dsp.temporal_rebin(reference, resolution) if targets: targets = [dsp.temporal_rebin(t, resolution) for t in targets] if spatial_regrid_lats and spatial_regrid_lons: lats = np.arange(spatial_regrid_lats[0], spatial_regrid_lats[1], spatial_regrid_lats[2]) lons = np.arange(spatial_regrid_lons[0], spatial_regrid_lons[1], spatial_regrid_lons[2]) if reference: reference = dsp.spatial_regrid(reference, lats, lons) if targets: targets = [dsp.spatial_regrid(t, lats, lons) for t in targets] return reference, targets
def run_evaluation(): ''' Run an OCW Evaluation. *run_evaluation* expects the Evaluation parameters to be POSTed in the following format. .. sourcecode:: javascript { reference_dataset: { // Id that tells us how we need to load this dataset. 'data_source_id': 1 == local, 2 == rcmed, // Dict of data_source specific identifying information. // // if data_source_id == 1 == local: // { // 'id': The path to the local file on the server for loading. // 'var_name': The variable data to pull from the file. // 'lat_name': The latitude variable name. // 'lon_name': The longitude variable name. // 'time_name': The time variable name // 'name': Optional dataset name // } // // if data_source_id == 2 == rcmed: // { // 'dataset_id': The dataset id to grab from RCMED. // 'parameter_id': The variable id value used by RCMED. // 'name': Optional dataset name // } 'dataset_info': {..} }, // The list of target datasets to use in the Evaluation. The data // format for the dataset objects should be the same as the // reference_dataset above. 'target_datasets': [{...}, {...}, ...], // All the datasets are re-binned to the reference dataset // before being added to an experiment. This step (in degrees) // is used when re-binning both the reference and target datasets. 'spatial_rebin_lat_step': The lat degree step. Integer > 0, // Same as above, but for lon 'spatial_rebin_lon_step': The lon degree step. Integer > 0, // The temporal resolution to use when doing a temporal re-bin // This is a timedelta of days to use so daily == 1, monthly is // (1, 31], annual/yearly is (31, 366], and full is anything > 366. 'temporal_resolution': Integer in range(1, 999), // A list of the metric class names to use in the evaluation. The // names must match the class name exactly. 'metrics': [Bias, TemporalStdDev, ...] // The bounding values used in the Evaluation. Note that lat values // should range from -180 to 180 and lon values from -90 to 90. 'start_time': start time value in the format '%Y-%m-%d %H:%M:%S', 'end_time': end time value in the format '%Y-%m-%d %H:%M:%S', 'lat_min': The minimum latitude value, 'lat_max': The maximum latitude value, 'lon_min': The minimum longitude value, 'lon_max': The maximum longitude value, // NOTE: At the moment, subregion support is fairly minimal. This // will be addressed in the future. Ideally, the user should be able // to load a file that they have locally. That would change the // format that this data is passed. 'subregion_information': Path to a subregion file on the server. } ''' # TODO: validate input parameters and return an error if not valid eval_time_stamp = datetime.now().strftime('%Y-%m-%d_%H-%M-%S') data = request.json eval_bounds = { 'start_time': datetime.strptime(data['start_time'], '%Y-%m-%d %H:%M:%S'), 'end_time': datetime.strptime(data['end_time'], '%Y-%m-%d %H:%M:%S'), 'lat_min': float(data['lat_min']), 'lat_max': float(data['lat_max']), 'lon_min': float(data['lon_min']), 'lon_max': float(data['lon_max']) } # Load all the datasets ref_dataset = _process_dataset_object(data['reference_dataset'], eval_bounds) target_datasets = [_process_dataset_object(obj, eval_bounds) for obj in data['target_datasets']] # Normalize the dataset time values so they break on consistent days of the # month or time of the day, depending on how they will be rebinned. resolution = data['temporal_resolution'] time_delta = timedelta(days=resolution) time_step = 'daily' if resolution == 1 else 'monthly' ref_dataset = dsp.normalize_dataset_datetimes(ref_dataset, time_step) target_datasets = [dsp.normalize_dataset_datetimes(ds, time_step) for ds in target_datasets] # Subset the datasets start = eval_bounds['start_time'] end = eval_bounds['end_time'] # Normalize all the values to the first of the month if we're not # dealing with daily data. This will ensure that a valid subregion # isn't considered out of bounds do to a dataset's time values # being shifted to the first of the month. if time_step != 'daily': if start.day != 1: day_offset = start.day - 1 start -= timedelta(days=day_offset) if end.day != 1: day_offset = end.day - 1 end -= timedelta(days=day_offset) subset = Bounds(eval_bounds['lat_min'], eval_bounds['lat_max'], eval_bounds['lon_min'], eval_bounds['lon_max'], start, end) ref_dataset = dsp.safe_subset(subset, ref_dataset) target_datasets = [dsp.safe_subset(subset, ds) for ds in target_datasets] # Do temporal re-bin based off of passed resolution ref_dataset = dsp.temporal_rebin(ref_dataset, time_delta) target_datasets = [dsp.temporal_rebin(ds, time_delta) for ds in target_datasets] # Do spatial re=bin based off of reference dataset + lat/lon steps lat_step = data['spatial_rebin_lat_step'] lon_step = data['spatial_rebin_lon_step'] lat_bins, lon_bins = _calculate_new_latlon_bins(eval_bounds, lat_step, lon_step) ref_dataset = dsp.spatial_regrid(ref_dataset, lat_bins, lon_bins) target_datasets = [dsp.spatial_regrid(ds, lat_bins, lon_bins) for ds in target_datasets] # Load metrics loaded_metrics = _load_metrics(data['metrics']) # Prime evaluation object with data evaluation = Evaluation(ref_dataset, target_datasets, loaded_metrics) # Run evaluation evaluation.run() # Plot _generate_evaluation_plots(evaluation, lat_bins, lon_bins, eval_time_stamp) return json.dumps({'eval_work_dir': eval_time_stamp})
def run_evaluation(): ''' Run an OCW Evaluation. *run_evaluation* expects the Evaluation parameters to be POSTed in the following format. .. sourcecode:: javascript { reference_dataset: { // Id that tells us how we need to load this dataset. 'data_source_id': 1 == local, 2 == rcmed, // Dict of data_source specific identifying information. // // if data_source_id == 1 == local: // { // 'id': The path to the local file on the server for loading. // 'var_name': The variable data to pull from the file. // 'lat_name': The latitude variable name. // 'lon_name': The longitude variable name. // 'time_name': The time variable name // 'name': Optional dataset name // } // // if data_source_id == 2 == rcmed: // { // 'dataset_id': The dataset id to grab from RCMED. // 'parameter_id': The variable id value used by RCMED. // 'name': Optional dataset name // } 'dataset_info': {..} }, // The list of target datasets to use in the Evaluation. The data // format for the dataset objects should be the same as the // reference_dataset above. 'target_datasets': [{...}, {...}, ...], // All the datasets are re-binned to the reference dataset // before being added to an experiment. This step (in degrees) // is used when re-binning both the reference and target datasets. 'spatial_rebin_lat_step': The lat degree step. Integer > 0, // Same as above, but for lon 'spatial_rebin_lon_step': The lon degree step. Integer > 0, // The temporal resolution to use when doing a temporal re-bin // This is a timedelta of days to use so daily == 1, monthly is // (1, 31], annual/yearly is (31, 366], and full is anything > 366. 'temporal_resolution': Integer in range(1, 999), // A list of the metric class names to use in the evaluation. The // names must match the class name exactly. 'metrics': [Bias, TemporalStdDev, ...] // The bounding values used in the Evaluation. Note that lat values // should range from -180 to 180 and lon values from -90 to 90. 'start_time': start time value in the format '%Y-%m-%d %H:%M:%S', 'end_time': end time value in the format '%Y-%m-%d %H:%M:%S', 'lat_min': The minimum latitude value, 'lat_max': The maximum latitude value, 'lon_min': The minimum longitude value, 'lon_max': The maximum longitude value, // NOTE: At the moment, subregion support is fairly minimal. This // will be addressed in the future. Ideally, the user should be able // to load a file that they have locally. That would change the // format that this data is passed. 'subregion_information': Path to a subregion file on the server. } ''' # TODO: validate input parameters and return an error if not valid eval_time_stamp = datetime.now().strftime('%Y-%m-%d_%H-%M-%S') data = request.json eval_bounds = { 'start_time': datetime.strptime(data['start_time'], '%Y-%m-%d %H:%M:%S'), 'end_time': datetime.strptime(data['end_time'], '%Y-%m-%d %H:%M:%S'), 'lat_min': float(data['lat_min']), 'lat_max': float(data['lat_max']), 'lon_min': float(data['lon_min']), 'lon_max': float(data['lon_max']) } # Load all the datasets ref_dataset = _process_dataset_object(data['reference_dataset'], eval_bounds) target_datasets = [_process_dataset_object(obj, eval_bounds) for obj in data['target_datasets']] # Normalize the dataset time values so they break on consistent days of the # month or time of the day, depending on how they will be rebinned. resolution = data['temporal_resolution'] time_delta = timedelta(days=resolution) time_step = 'daily' if resolution == 1 else 'monthly' ref_dataset = dsp.normalize_dataset_datetimes(ref_dataset, time_step) target_datasets = [dsp.normalize_dataset_datetimes(ds, time_step) for ds in target_datasets] # Subset the datasets start = eval_bounds['start_time'] end = eval_bounds['end_time'] # Normalize all the values to the first of the month if we're not # dealing with daily data. This will ensure that a valid subregion # isn't considered out of bounds do to a dataset's time values # being shifted to the first of the month. if time_step != 'daily': if start.day != 1: day_offset = start.day - 1 start -= timedelta(days=day_offset) if end.day != 1: day_offset = end.day - 1 end -= timedelta(days=day_offset) subset = Bounds(eval_bounds['lat_min'], eval_bounds['lat_max'], eval_bounds['lon_min'], eval_bounds['lon_max'], start, end) ref_dataset = dsp.safe_subset(ref_dataset, subset) target_datasets = [dsp.safe_subset(ds, subset) for ds in target_datasets] # Do temporal re-bin based off of passed resolution ref_dataset = dsp.temporal_rebin(ref_dataset, time_delta) target_datasets = [dsp.temporal_rebin(ds, time_delta) for ds in target_datasets] # Do spatial re=bin based off of reference dataset + lat/lon steps lat_step = data['spatial_rebin_lat_step'] lon_step = data['spatial_rebin_lon_step'] lat_bins, lon_bins = _calculate_new_latlon_bins(eval_bounds, lat_step, lon_step) ref_dataset = dsp.spatial_regrid(ref_dataset, lat_bins, lon_bins) target_datasets = [dsp.spatial_regrid(ds, lat_bins, lon_bins) for ds in target_datasets] # Load metrics loaded_metrics = _load_metrics(data['metrics']) # Prime evaluation object with data evaluation = Evaluation(ref_dataset, target_datasets, loaded_metrics) # Run evaluation evaluation.run() # Plot _generate_evaluation_plots(evaluation, lat_bins, lon_bins, eval_time_stamp) return json.dumps({'eval_work_dir': eval_time_stamp})