def _prepare_datasets_for_evaluation(reference, targets, config_data):
    """"""
    subset = config_data['evaluation'].get('subset', None)
    temporal_time_delta = config_data['evaluation'].get(
        'temporal_time_delta', None)
    spatial_regrid_lats = config_data['evaluation'].get(
        'spatial_regrid_lats', None)
    spatial_regrid_lons = config_data['evaluation'].get(
        'spatial_regrid_lons', None)

    # If we have a temporal time delta and it's daily (i.e., 1) we will
    # normalize the data as daily data (which means we adjust the start times
    # for each bucket of data to be consistent). By default we will normalize
    # the data as monthly. Note that this will not break yearly data so it's
    # safer to do this no matter what. This keeps us from ending up with 1-off
    # errors in the resulting dataset shape post-temporal/spatial adjustments
    # that break evaluations.
    string_time_delta = 'monthly'
    if temporal_time_delta and temporal_time_delta == 1:
        string_time_delta = 'daily'

    reference = dsp.normalize_dataset_datetimes(reference, string_time_delta)
    targets = [
        dsp.normalize_dataset_datetimes(t, string_time_delta) for t in targets
    ]

    if subset:
        start = dateutil.parser.parse(subset[4])
        end = dateutil.parser.parse(subset[5])
        bounds = Bounds(subset[0], subset[1], subset[2], subset[3], start, end)

        if reference:
            reference = dsp.safe_subset(bounds, reference)

        if targets:
            targets = [dsp.safe_subset(bounds, t) for t in targets]

    if temporal_time_delta:
        resolution = timedelta(temporal_time_delta)

        if reference:
            reference = dsp.temporal_rebin(reference, resolution)

        if targets:
            targets = [dsp.temporal_rebin(t, resolution) for t in targets]

    if spatial_regrid_lats and spatial_regrid_lons:
        lats = np.arange(spatial_regrid_lats[0], spatial_regrid_lats[1],
                         spatial_regrid_lats[2])
        lons = np.arange(spatial_regrid_lons[0], spatial_regrid_lons[1],
                         spatial_regrid_lons[2])

        if reference:
            reference = dsp.spatial_regrid(reference, lats, lons)

        if targets:
            targets = [dsp.spatial_regrid(t, lats, lons) for t in targets]

    return reference, targets
Example #2
0
def _prepare_datasets_for_evaluation(reference, targets, config_data):
    """"""
    subset = config_data['evaluation'].get('subset', None)
    temporal_time_delta = config_data['evaluation'].get('temporal_time_delta', None)
    spatial_regrid_lats = config_data['evaluation'].get('spatial_regrid_lats', None)
    spatial_regrid_lons = config_data['evaluation'].get('spatial_regrid_lons', None)

    # If we have a temporal time delta and it's daily (i.e., 1) we will
    # normalize the data as daily data (which means we adjust the start times
    # for each bucket of data to be consistent). By default we will normalize
    # the data as monthly. Note that this will not break yearly data so it's
    # safer to do this no matter what. This keeps us from ending up with 1-off
    # errors in the resulting dataset shape post-temporal/spatial adjustments
    # that break evaluations.
    string_time_delta = 'monthly'
    if temporal_time_delta and temporal_time_delta == 1:
        string_time_delta = 'daily'

    reference = dsp.normalize_dataset_datetimes(reference, string_time_delta)
    targets = [dsp.normalize_dataset_datetimes(t, string_time_delta) for t in targets]

    if subset:
        start = dateutil.parser.parse(subset[4])
        end = dateutil.parser.parse(subset[5])
        bounds = Bounds(subset[0], subset[1], subset[2], subset[3], start, end)

        if reference:
            reference = dsp.safe_subset(bounds, reference)

        if targets:
            targets = [dsp.safe_subset(bounds, t) for t in targets]

    if temporal_time_delta:
        resolution = timedelta(temporal_time_delta)

        if reference:
            reference = dsp.temporal_rebin(reference, resolution)

        if targets:
            targets = [dsp.temporal_rebin(t, resolution) for t in targets]

    if spatial_regrid_lats and spatial_regrid_lons:
        lats = np.arange(spatial_regrid_lats[0], spatial_regrid_lats[1], spatial_regrid_lats[2])
        lons = np.arange(spatial_regrid_lons[0], spatial_regrid_lons[1], spatial_regrid_lons[2])

        if reference:
            reference = dsp.spatial_regrid(reference, lats, lons)

        if targets:
            targets = [dsp.spatial_regrid(t, lats, lons) for t in targets]

    return reference, targets
 def setUp(self):
     self.input_dataset = ten_year_monthly_dataset()
     self.new_lats = np.array(range(-89, 90, 4))
     self.new_lons = np.array(range(-179, 180, 4))
     self.regridded_dataset = dp.spatial_regrid(self.input_dataset,
                                                self.new_lats,
                                                self.new_lons)
Example #4
0
 def test_two_dimensional_lats_lons(self):
     self.input_dataset.lats = np.array(range(-89, 90, 2))
     self.input_dataset.lons = np.array(range(-179, 180, 4))
     self.input_dataset.lats = self.input_dataset.lats.reshape(2, 45)
     self.input_dataset.lons = self.input_dataset.lons.reshape(2, 45)
     new_dataset = dp.spatial_regrid(
         self.input_dataset, self.new_lats, self.new_lons)
     np.testing.assert_array_equal(new_dataset.lats, self.new_lats)
 def test_two_dimensional_lats_lons(self):
     self.input_dataset.lats = np.array(range(-89, 90, 2))
     self.input_dataset.lons = np.array(range(-179, 180, 4))
     self.input_dataset.lats = self.input_dataset.lats.reshape(2, 45)
     self.input_dataset.lons = self.input_dataset.lons.reshape(2, 45)
     new_dataset = dp.spatial_regrid(
         self.input_dataset, self.new_lats, self.new_lons)
     np.testing.assert_array_equal(new_dataset.lats, self.new_lats)
Example #6
0
def _prepare_datasets_for_evaluation(reference, targets, config_data):
    """"""
    subset = config_data['evaluation'].get('subset', None)
    temporal_time_delta = config_data['evaluation'].get('temporal_time_delta', None)
    spatial_regrid_lats = config_data['evaluation'].get('spatial_regrid_lats', None)
    spatial_regrid_lons = config_data['evaluation'].get('spatial_regrid_lons', None)

    if subset:
        start = dateutil.parser.parse(subset[4])
        end = dateutil.parser.parse(subset[5])
        bounds = Bounds(subset[0], subset[1], subset[2], subset[3], start, end)

        if reference:
            reference = dsp.safe_subset(bounds, reference)

        if targets:
            targets = [dsp.safe_subset(bounds, t) for t in targets]

    if temporal_time_delta:
        resolution = timedelta(temporal_time_delta)

        if reference:
            reference = dsp.temporal_rebin(reference, resolution)

        if targets:
            targets = [dsp.temporal_rebin(t, resolution) for t in targets]

    if spatial_regrid_lats and spatial_regrid_lons:
        lats = np.arange(spatial_regrid_lats[0], spatial_regrid_lats[1], spatial_regrid_lats[2])
        lons = np.arange(spatial_regrid_lons[0], spatial_regrid_lons[1], spatial_regrid_lons[2])

        if reference:
            reference = dsp.spatial_regrid(reference, lats, lons)

        if targets:
            targets = [dsp.spatial_regrid(t, lats, lons) for t in targets]

    return reference, targets
Example #7
0
def run_evaluation():
    ''' Run an OCW Evaluation.

    *run_evaluation* expects the Evaluation parameters to be POSTed in
    the following format.

    .. sourcecode:: javascript

        {
            reference_dataset: {
                // Id that tells us how we need to load this dataset.
                'data_source_id': 1 == local, 2 == rcmed,

                // Dict of data_source specific identifying information.
                //
                // if data_source_id == 1 == local:
                // {
                //     'id': The path to the local file on the server for loading.
                //     'var_name': The variable data to pull from the file.
                //     'lat_name': The latitude variable name.
                //     'lon_name': The longitude variable name.
                //     'time_name': The time variable name
                //     'name': Optional dataset name
                // }
                //
                // if data_source_id == 2 == rcmed:
                // {
                //     'dataset_id': The dataset id to grab from RCMED.
                //     'parameter_id': The variable id value used by RCMED.
                //     'name': Optional dataset name
                // }
                'dataset_info': {..}
            },

            // The list of target datasets to use in the Evaluation. The data
            // format for the dataset objects should be the same as the
            // reference_dataset above.
            'target_datasets': [{...}, {...}, ...],

            // All the datasets are re-binned to the reference dataset
            // before being added to an experiment. This step (in degrees)
            // is used when re-binning both the reference and target datasets.
            'spatial_rebin_lat_step': The lat degree step. Integer > 0,

            // Same as above, but for lon
            'spatial_rebin_lon_step': The lon degree step. Integer > 0,

            // The temporal resolution to use when doing a temporal re-bin
            // This is a timedelta of days to use so daily == 1, monthly is
            // (1, 31], annual/yearly is (31, 366], and full is anything > 366.
            'temporal_resolution': Integer in range(1, 999),

            // A list of the metric class names to use in the evaluation. The
            // names must match the class name exactly.
            'metrics': [Bias, TemporalStdDev, ...]

            // The bounding values used in the Evaluation. Note that lat values
            // should range from -180 to 180 and lon values from -90 to 90.
            'start_time': start time value in the format '%Y-%m-%d %H:%M:%S',
            'end_time': end time value in the format '%Y-%m-%d %H:%M:%S',
            'lat_min': The minimum latitude value,
            'lat_max': The maximum latitude value,
            'lon_min': The minimum longitude value,
            'lon_max': The maximum longitude value,

            // NOTE: At the moment, subregion support is fairly minimal. This
            // will be addressed in the future. Ideally, the user should be able
            // to load a file that they have locally. That would change the
            // format that this data is passed.
            'subregion_information': Path to a subregion file on the server.
        }
    '''
    # TODO: validate input parameters and return an error if not valid

    eval_time_stamp = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
    data = request.json

    eval_bounds = {
        'start_time': datetime.strptime(data['start_time'], '%Y-%m-%d %H:%M:%S'),
        'end_time': datetime.strptime(data['end_time'], '%Y-%m-%d %H:%M:%S'),
        'lat_min': float(data['lat_min']),
        'lat_max': float(data['lat_max']),
        'lon_min': float(data['lon_min']),
        'lon_max': float(data['lon_max'])
    }

    # Load all the datasets
    ref_dataset = _process_dataset_object(data['reference_dataset'], eval_bounds)

    target_datasets = [_process_dataset_object(obj, eval_bounds)
					   for obj
					   in data['target_datasets']]

    # Normalize the dataset time values so they break on consistent days of the
    # month or time of the day, depending on how they will be rebinned.
    resolution = data['temporal_resolution']
    time_delta = timedelta(days=resolution)

    time_step = 'daily' if resolution == 1 else 'monthly'
    ref_dataset = dsp.normalize_dataset_datetimes(ref_dataset, time_step)
    target_datasets = [dsp.normalize_dataset_datetimes(ds, time_step)
                       for ds in target_datasets]

    # Subset the datasets
    start = eval_bounds['start_time']
    end = eval_bounds['end_time']

    # Normalize all the values to the first of the month if we're not
    # dealing with daily data. This will ensure that a valid subregion
    # isn't considered out of bounds do to a dataset's time values
    # being shifted to the first of the month.
    if time_step != 'daily':
        if start.day != 1:
            day_offset = start.day - 1
            start -= timedelta(days=day_offset)

        if end.day != 1:
            day_offset = end.day - 1
            end -= timedelta(days=day_offset)

    subset = Bounds(eval_bounds['lat_min'],
                    eval_bounds['lat_max'],
                    eval_bounds['lon_min'],
                    eval_bounds['lon_max'],
                    start,
                    end)

    ref_dataset = dsp.safe_subset(subset, ref_dataset)
    target_datasets = [dsp.safe_subset(subset, ds)
                       for ds
                       in target_datasets]
    
    # Do temporal re-bin based off of passed resolution
    ref_dataset = dsp.temporal_rebin(ref_dataset, time_delta)
    target_datasets = [dsp.temporal_rebin(ds, time_delta)
					   for ds
					   in target_datasets]

    # Do spatial re=bin based off of reference dataset + lat/lon steps
    lat_step = data['spatial_rebin_lat_step']
    lon_step = data['spatial_rebin_lon_step']
    lat_bins, lon_bins = _calculate_new_latlon_bins(eval_bounds,
													lat_step,
													lon_step)

    ref_dataset = dsp.spatial_regrid(ref_dataset, lat_bins, lon_bins)
    target_datasets =  [dsp.spatial_regrid(ds, lat_bins, lon_bins)
						for ds
						in target_datasets]

    # Load metrics
    loaded_metrics = _load_metrics(data['metrics'])

    # Prime evaluation object with data
    evaluation = Evaluation(ref_dataset, target_datasets, loaded_metrics)

    # Run evaluation
    evaluation.run()

    # Plot
    _generate_evaluation_plots(evaluation, lat_bins, lon_bins, eval_time_stamp)

    return json.dumps({'eval_work_dir': eval_time_stamp})
Example #8
0
ref_dataset = dsp.normalize_dataset_datetimes(ref_dataset, "monthly")
target_dataset = dsp.normalize_dataset_datetimes(target_dataset, "monthly")

# Subset down the evaluation datasets to our selected evaluation bounds.
target_dataset = dsp.subset(EVAL_BOUNDS, target_dataset)
ref_dataset = dsp.subset(EVAL_BOUNDS, ref_dataset)

# Do a monthly temporal rebin of the evaluation datasets.
target_dataset = dsp.temporal_rebin(target_dataset,
                                    datetime.timedelta(days=30))
ref_dataset = dsp.temporal_rebin(ref_dataset, datetime.timedelta(days=30))

# Spatially regrid onto a 1 degree lat/lon grid within our evaluation bounds.
new_lats = np.arange(LAT_MIN, LAT_MAX, 1.0)
new_lons = np.arange(LON_MIN, LON_MAX, 1.0)
target_dataset = dsp.spatial_regrid(target_dataset, new_lats, new_lons)
ref_dataset = dsp.spatial_regrid(ref_dataset, new_lats, new_lons)

# Load the datasets for the evaluation.
mean_bias = metrics.MeanBias()
# These versions of the metrics require seasonal bounds prior to running
# the metrics. You should set these values above in the evaluation
# configuration section.
spatial_std_dev_ratio = metrics.SeasonalSpatialStdDevRatio(
    month_start=SEASON_MONTH_START, month_end=SEASON_MONTH_END)
pattern_correlation = metrics.SeasonalPatternCorrelation(
    month_start=SEASON_MONTH_START, month_end=SEASON_MONTH_END)

# Create our example evaluation.
example_eval = evaluation.Evaluation(
    ref_dataset,  # Reference dataset for the evaluation
 def setUp(self):
     self.input_dataset = ten_year_monthly_dataset()
     self.new_lats = np.array(range(-89, 90, 4))
     self.new_lons = np.array(range(-179, 180, 4))
     self.regridded_dataset = dp.spatial_regrid(
         self.input_dataset, self.new_lats, self.new_lons)
Example #10
0
    nlon = (max_lon - min_lon)/delta_lon+1
    new_lat = np.linspace(min_lat, max_lat, nlat)
    new_lon = np.linspace(min_lon, max_lon, nlon)

# number of models
nmodel = len(model_datasets)
print 'Dataset loading completed'
print 'Observation data:', ref_name 
print 'Number of model datasets:',nmodel
for model_name in model_names:
    print model_name

""" Step 4: Spatial regriding of the reference datasets """
print 'Regridding datasets: ', config['regrid']
if not config['regrid']['regrid_on_reference']:
    ref_dataset = dsp.spatial_regrid(ref_dataset, new_lat, new_lon)
    print 'Reference dataset has been regridded'
for idata,dataset in enumerate(model_datasets):
    model_datasets[idata] = dsp.spatial_regrid(dataset, new_lat, new_lon, boundary_check = boundary_check_model)
    print model_names[idata]+' has been regridded'
print 'Propagating missing data information'
ref_dataset = dsp.mask_missing_data([ref_dataset]+model_datasets)[0]
model_datasets = dsp.mask_missing_data([ref_dataset]+model_datasets)[1:]

""" Step 5: Checking and converting variable units """
print 'Checking and converting variable units'
ref_dataset = dsp.variable_unit_conversion(ref_dataset)
for idata,dataset in enumerate(model_datasets):
    model_datasets[idata] = dsp.variable_unit_conversion(dataset)
    
Example #11
0
# Temporally re-bin the data into a monthly timestep.
################################################################################
knmi_dataset = dsp.temporal_rebin(knmi_dataset, datetime.timedelta(days=30))
wrf_dataset = dsp.temporal_rebin(wrf_dataset, datetime.timedelta(days=30))

# Spatially regrid the datasets onto a 1 degree grid.
################################################################################
# Get the bounds of the reference dataset and use it to create a new
# set of lat/lon values on a 1 degree step
# Using the bounds we will create a new set of lats and lons on 1 degree step
min_lat, max_lat, min_lon, max_lon = knmi_dataset.spatial_boundaries()
new_lons = numpy.arange(min_lon, max_lon, 1)
new_lats = numpy.arange(min_lat, max_lat, 1)

# Spatially regrid datasets using the new_lats, new_lons numpy arrays
knmi_dataset = dsp.spatial_regrid(knmi_dataset, new_lats, new_lons)
wrf_dataset = dsp.spatial_regrid(wrf_dataset, new_lats, new_lons)

# Load the metrics that we want to use for the evaluation.
################################################################################
taylor_diagram = metrics.SpatialPatternTaylorDiagram()

# Create our new evaluation object. The knmi dataset is the evaluations
# reference dataset. We then provide a list of 1 or more target datasets
# to use for the evaluation. In this case, we only want to use the wrf dataset.
# Then we pass a list of all the metrics that we want to use in the evaluation.
################################################################################
test_evaluation = evaluation.Evaluation(knmi_dataset, [wrf_dataset], [taylor_diagram])
test_evaluation.run()

# Pull our the evaluation results and prepare them for drawing a Taylor diagram.
Example #12
0
# 15 Jan 2014, 15 Feb 2014 => 1 Jan 2014, 1 Feb 2014
ref_dataset = dsp.normalize_dataset_datetimes(ref_dataset, "monthly")
target_dataset = dsp.normalize_dataset_datetimes(target_dataset, "monthly")

# Subset down the evaluation datasets to our selected evaluation bounds.
target_dataset = dsp.subset(EVAL_BOUNDS, target_dataset)
ref_dataset = dsp.subset(EVAL_BOUNDS, ref_dataset)

# Do a monthly temporal rebin of the evaluation datasets.
target_dataset = dsp.temporal_rebin(target_dataset, datetime.timedelta(days=30))
ref_dataset = dsp.temporal_rebin(ref_dataset, datetime.timedelta(days=30))

# Spatially regrid onto a 1 degree lat/lon grid within our evaluation bounds.
new_lats = np.arange(LAT_MIN, LAT_MAX, 1.0)
new_lons = np.arange(LON_MIN, LON_MAX, 1.0)
target_dataset = dsp.spatial_regrid(target_dataset, new_lats, new_lons)
ref_dataset = dsp.spatial_regrid(ref_dataset, new_lats, new_lons)

# Load the datasets for the evaluation.
mean_bias = metrics.MeanBias()
# These versions of the metrics require seasonal bounds prior to running
# the metrics. You should set these values above in the evaluation
# configuration section.
spatial_std_dev_ratio = metrics.SeasonalSpatialStdDevRatio(month_start=SEASON_MONTH_START, month_end=SEASON_MONTH_END)
pattern_correlation = metrics.SeasonalPatternCorrelation(month_start=SEASON_MONTH_START, month_end=SEASON_MONTH_END)

# Create our example evaluation.
example_eval = evaluation.Evaluation(ref_dataset, # Reference dataset for the evaluation
                                    # 1 or more target datasets for the evaluation
                                    [target_dataset],
                                    # 1 ore more metrics to use in the evaluation
Example #13
0
print("Processing datasets ...")
CRU31 = dsp.normalize_dataset_datetimes(CRU31, 'monthly')
print("... on units")
CRU31 = dsp.water_flux_unit_conversion(CRU31)

for member, each_target_dataset in enumerate(target_datasets):
    target_datasets[member] = dsp.subset(target_datasets[member], EVAL_BOUNDS)
    target_datasets[member] = dsp.water_flux_unit_conversion(
        target_datasets[member])
    target_datasets[member] = dsp.normalize_dataset_datetimes(
        target_datasets[member], 'monthly')

print("... spatial regridding")
new_lats = np.arange(LAT_MIN, LAT_MAX, gridLatStep)
new_lons = np.arange(LON_MIN, LON_MAX, gridLonStep)
CRU31 = dsp.spatial_regrid(CRU31, new_lats, new_lons)

for member, each_target_dataset in enumerate(target_datasets):
    target_datasets[member] = dsp.spatial_regrid(target_datasets[member],
                                                 new_lats, new_lons)

# find the total annual mean. Note the function exists in util.py as def
# calc_climatology_year(dataset):
_, CRU31.values = utils.calc_climatology_year(CRU31)

for member, each_target_dataset in enumerate(target_datasets):
    _, target_datasets[member].values = utils.calc_climatology_year(
        target_datasets[member])

# make the model ensemble
target_datasets_ensemble = dsp.ensemble(target_datasets)
Example #14
0
def run_evaluation():
    ''' Run an OCW Evaluation.

    *run_evaluation* expects the Evaluation parameters to be POSTed in
    the following format.

    .. sourcecode:: javascript

        {
            reference_dataset: {
                // Id that tells us how we need to load this dataset.
                'data_source_id': 1 == local, 2 == rcmed,

                // Dict of data_source specific identifying information.
                //
                // if data_source_id == 1 == local:
                // {
                //     'id': The path to the local file on the server for loading.
                //     'var_name': The variable data to pull from the file.
                //     'lat_name': The latitude variable name.
                //     'lon_name': The longitude variable name.
                //     'time_name': The time variable name
                //     'name': Optional dataset name
                // }
                //
                // if data_source_id == 2 == rcmed:
                // {
                //     'dataset_id': The dataset id to grab from RCMED.
                //     'parameter_id': The variable id value used by RCMED.
                //     'name': Optional dataset name
                // }
                'dataset_info': {..}
            },

            // The list of target datasets to use in the Evaluation. The data
            // format for the dataset objects should be the same as the
            // reference_dataset above.
            'target_datasets': [{...}, {...}, ...],

            // All the datasets are re-binned to the reference dataset
            // before being added to an experiment. This step (in degrees)
            // is used when re-binning both the reference and target datasets.
            'spatial_rebin_lat_step': The lat degree step. Integer > 0,

            // Same as above, but for lon
            'spatial_rebin_lon_step': The lon degree step. Integer > 0,

            // The temporal resolution to use when doing a temporal re-bin
            // This is a timedelta of days to use so daily == 1, monthly is
            // (1, 31], annual/yearly is (31, 366], and full is anything > 366.
            'temporal_resolution': Integer in range(1, 999),

            // A list of the metric class names to use in the evaluation. The
            // names must match the class name exactly.
            'metrics': [Bias, TemporalStdDev, ...]

            // The bounding values used in the Evaluation. Note that lat values
            // should range from -180 to 180 and lon values from -90 to 90.
            'start_time': start time value in the format '%Y-%m-%d %H:%M:%S',
            'end_time': end time value in the format '%Y-%m-%d %H:%M:%S',
            'lat_min': The minimum latitude value,
            'lat_max': The maximum latitude value,
            'lon_min': The minimum longitude value,
            'lon_max': The maximum longitude value,

            // NOTE: At the moment, subregion support is fairly minimal. This
            // will be addressed in the future. Ideally, the user should be able
            // to load a file that they have locally. That would change the
            // format that this data is passed.
            'subregion_information': Path to a subregion file on the server.
        }
    '''
    # TODO: validate input parameters and return an error if not valid

    eval_time_stamp = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
    data = request.json

    eval_bounds = {
        'start_time': datetime.strptime(data['start_time'], '%Y-%m-%d %H:%M:%S'),
        'end_time': datetime.strptime(data['end_time'], '%Y-%m-%d %H:%M:%S'),
        'lat_min': float(data['lat_min']),
        'lat_max': float(data['lat_max']),
        'lon_min': float(data['lon_min']),
        'lon_max': float(data['lon_max'])
    }

    # Load all the datasets
    ref_dataset = _process_dataset_object(data['reference_dataset'], eval_bounds)

    target_datasets = [_process_dataset_object(obj, eval_bounds)
					   for obj
					   in data['target_datasets']]

    # Normalize the dataset time values so they break on consistent days of the
    # month or time of the day, depending on how they will be rebinned.
    resolution = data['temporal_resolution']
    time_delta = timedelta(days=resolution)

    time_step = 'daily' if resolution == 1 else 'monthly'
    ref_dataset = dsp.normalize_dataset_datetimes(ref_dataset, time_step)
    target_datasets = [dsp.normalize_dataset_datetimes(ds, time_step)
                       for ds in target_datasets]

    # Subset the datasets
    start = eval_bounds['start_time']
    end = eval_bounds['end_time']

    # Normalize all the values to the first of the month if we're not
    # dealing with daily data. This will ensure that a valid subregion
    # isn't considered out of bounds do to a dataset's time values
    # being shifted to the first of the month.
    if time_step != 'daily':
        if start.day != 1:
            day_offset = start.day - 1
            start -= timedelta(days=day_offset)

        if end.day != 1:
            day_offset = end.day - 1
            end -= timedelta(days=day_offset)

    subset = Bounds(eval_bounds['lat_min'],
                    eval_bounds['lat_max'],
                    eval_bounds['lon_min'],
                    eval_bounds['lon_max'],
                    start,
                    end)

    ref_dataset = dsp.safe_subset(ref_dataset, subset)
    target_datasets = [dsp.safe_subset(ds, subset)
                       for ds
                       in target_datasets]
    
    # Do temporal re-bin based off of passed resolution
    ref_dataset = dsp.temporal_rebin(ref_dataset, time_delta)
    target_datasets = [dsp.temporal_rebin(ds, time_delta)
					   for ds
					   in target_datasets]

    # Do spatial re=bin based off of reference dataset + lat/lon steps
    lat_step = data['spatial_rebin_lat_step']
    lon_step = data['spatial_rebin_lon_step']
    lat_bins, lon_bins = _calculate_new_latlon_bins(eval_bounds,
													lat_step,
													lon_step)

    ref_dataset = dsp.spatial_regrid(ref_dataset, lat_bins, lon_bins)
    target_datasets =  [dsp.spatial_regrid(ds, lat_bins, lon_bins)
						for ds
						in target_datasets]

    # Load metrics
    loaded_metrics = _load_metrics(data['metrics'])

    # Prime evaluation object with data
    evaluation = Evaluation(ref_dataset, target_datasets, loaded_metrics)

    # Run evaluation
    evaluation.run()

    # Plot
    _generate_evaluation_plots(evaluation, lat_bins, lon_bins, eval_time_stamp)

    return json.dumps({'eval_work_dir': eval_time_stamp})
print("Temporally Rebinning the Datasets to a Single Timestep")
# To run FULL temporal Rebinning
knmi_dataset = dsp.temporal_rebin(knmi_dataset, temporal_resolution='full')
cru31_dataset = dsp.temporal_rebin(cru31_dataset, temporal_resolution='full')

print("KNMI_Dataset.values shape: %s" % (knmi_dataset.values.shape, ))
print("CRU31_Dataset.values shape: %s \n\n" % (cru31_dataset.values.shape, ))
""" Spatially Regrid the Dataset Objects to a 1/2 degree grid """
# Using the bounds we will create a new set of lats and lons on 0.5 degree step
new_lons = np.arange(min_lon, max_lon, 0.5)
new_lats = np.arange(min_lat, max_lat, 0.5)

# Spatially regrid datasets using the new_lats, new_lons numpy arrays
print("Spatially Regridding the KNMI_Dataset...")
knmi_dataset = dsp.spatial_regrid(knmi_dataset, new_lats, new_lons)
print("Spatially Regridding the CRU31_Dataset...")
cru31_dataset = dsp.spatial_regrid(cru31_dataset, new_lats, new_lons)
print("Final shape of the KNMI_Dataset:%s" % (knmi_dataset.values.shape, ))
print("Final shape of the CRU31_Dataset:%s" % (cru31_dataset.values.shape, ))
""" Step 4:  Build a Metric to use for Evaluation - Bias for this example """
# You can build your own metrics, but OCW also ships with some common metrics
print("Setting up a Bias metric to use for evaluation")
bias = metrics.Bias()
""" Step 5: Create an Evaluation Object using Datasets and our Metric """
# The Evaluation Class Signature is:
# Evaluation(reference, targets, metrics, subregions=None)
# Evaluation can take in multiple targets and metrics, so we need to convert
# our examples into Python lists.  Evaluation will iterate over the lists
print("Making the Evaluation definition")
bias_evaluation = evaluation.Evaluation(knmi_dataset, [cru31_dataset], [bias])
Example #16
0
def spatialRegrid(dataset, lats, lons):
    '''Spatially regrid dataset variable to a new grid with specified resolution, where lats & lons
are the new coordinate vectors.
    '''
    return dsp.spatial_regrid(dataset, lats, lons)
# since the main intent of this program is to evaluate RCMs. However, it can be
# used for GCMs in which case it should be set to False to save time.
boundary_check = config['regrid'].get('boundary_check', True)

# number of target datasets (usually models, but can also be obs / reanalysis)
ntarget = len(target_datasets)
print('Dataset loading completed')
print('Reference data: {}'.format(reference_name))
print('Number of target datasets: {}'.format(ntarget))
for target_name in target_names:
    print(target_name)

""" Step 3: Spatial regriding of the datasets """
print('Regridding datasets: {}'.format(config['regrid']))
if not config['regrid']['regrid_on_reference']:
    reference_dataset = dsp.spatial_regrid(reference_dataset, new_lat, new_lon)
    print('Reference dataset has been regridded')
for i, dataset in enumerate(target_datasets):
    target_datasets[i] = dsp.spatial_regrid(dataset, new_lat, new_lon,
                                           boundary_check=boundary_check)
    print('{} has been regridded'.format(target_names[i]))
print('Propagating missing data information')
datasets = dsp.mask_missing_data([reference_dataset]+target_datasets)
reference_dataset = datasets[0]
target_datasets = datasets[1:]

""" Step 4: Checking and converting variable units """
print('Checking and converting variable units')
reference_dataset = dsp.variable_unit_conversion(reference_dataset)
for i, dataset in enumerate(target_datasets):
    target_datasets[i] = dsp.variable_unit_conversion(dataset)
Example #18
0
    nlat = (max_lat - min_lat) / delta_lat + 1
    nlon = (max_lon - min_lon) / delta_lon + 1
    new_lat = np.linspace(min_lat, max_lat, nlat)
    new_lon = np.linspace(min_lon, max_lon, nlon)

# number of models
nmodel = len(model_datasets)
print 'Dataset loading completed'
print 'Observation data:', ref_name
print 'Number of model datasets:', nmodel
for model_name in model_names:
    print model_name
""" Step 4: Spatial regriding of the reference datasets """
print 'Regridding datasets: ', config['regrid']
if not config['regrid']['regrid_on_reference']:
    ref_dataset = dsp.spatial_regrid(ref_dataset, new_lat, new_lon)
for idata, dataset in enumerate(model_datasets):
    model_datasets[idata] = dsp.spatial_regrid(dataset, new_lat, new_lon)

print 'Propagating missing data information'
ref_dataset = dsp.mask_missing_data([ref_dataset] + model_datasets)[0]
model_datasets = dsp.mask_missing_data([ref_dataset] + model_datasets)[1:]
""" Step 5: Checking and converting variable units """
print 'Checking and converting variable units'
ref_dataset = dsp.variable_unit_conversion(ref_dataset)
for idata, dataset in enumerate(model_datasets):
    model_datasets[idata] = dsp.variable_unit_conversion(dataset)

print 'Generating multi-model ensemble'
if len(model_datasets) >= 2.:
    model_datasets.append(dsp.ensemble(model_datasets))
Example #19
0
    nlon = (max_lon - min_lon)/delta_lon+1
    new_lat = np.linspace(min_lat, max_lat, nlat)
    new_lon = np.linspace(min_lon, max_lon, nlon)

# number of models
nmodel = len(model_datasets)
print 'Dataset loading completed'
print 'Observation data:', ref_name 
print 'Number of model datasets:',nmodel
for model_name in model_names:
    print model_name

""" Step 4: Spatial regriding of the reference datasets """
print 'Regridding datasets: ', config['regrid']
if not config['regrid']['regrid_on_reference']:
    ref_dataset = dsp.spatial_regrid(ref_dataset, new_lat, new_lon)
    print 'Reference dataset has been regridded'
for idata,dataset in enumerate(model_datasets):
    model_datasets[idata] = dsp.spatial_regrid(dataset, new_lat, new_lon)
    print model_names[idata]+' has been regridded'

print 'Propagating missing data information'
ref_dataset = dsp.mask_missing_data([ref_dataset]+model_datasets)[0]
model_datasets = dsp.mask_missing_data([ref_dataset]+model_datasets)[1:]

""" Step 5: Checking and converting variable units """
print 'Checking and converting variable units'
ref_dataset = dsp.variable_unit_conversion(ref_dataset)
for idata,dataset in enumerate(model_datasets):
    model_datasets[idata] = dsp.variable_unit_conversion(dataset)
    
Example #20
0
# Get flag for boundary checking for regridding. By default, this is set to True
# since the main intent of this program is to evaluate RCMs. However, it can be
# used for GCMs in which case it should be set to False to save time.
boundary_check = config['regrid'].get('boundary_check', True)

# number of target datasets (usually models, but can also be obs / reanalysis)
ntarget = len(target_datasets)
print('Dataset loading completed')
print('Reference data: {}'.format(reference_name))
print('Number of target datasets: {}'.format(ntarget))
for target_name in target_names:
    print(target_name)
""" Step 3: Spatial regriding of the datasets """
print('Regridding datasets: {}'.format(config['regrid']))
if not config['regrid']['regrid_on_reference']:
    reference_dataset = dsp.spatial_regrid(reference_dataset, new_lat, new_lon)
    print('Reference dataset has been regridded')
for i, dataset in enumerate(target_datasets):
    target_datasets[i] = dsp.spatial_regrid(dataset,
                                            new_lat,
                                            new_lon,
                                            boundary_check=boundary_check)
    print('{} has been regridded'.format(target_names[i]))
print('Propagating missing data information')
datasets = dsp.mask_missing_data([reference_dataset] + target_datasets)
reference_dataset = datasets[0]
target_datasets = datasets[1:]
""" Step 4: Checking and converting variable units """
print('Checking and converting variable units')
reference_dataset = dsp.variable_unit_conversion(reference_dataset)
for i, dataset in enumerate(target_datasets):
Example #21
0
    nlon = (max_lon - min_lon)/delta_lon+1
    new_lat = np.linspace(min_lat, max_lat, nlat)
    new_lon = np.linspace(min_lon, max_lon, nlon)

# number of models
nmodel = len(model_datasets)
print 'Dataset loading completed'
print 'Observation data:', ref_name 
print 'Number of model datasets:',nmodel
for model_name in model_names:
    print model_name

""" Step 4: Spatial regriding of the reference datasets """
print 'Regridding datasets: ', config['regrid']
if not config['regrid']['regrid_on_reference']:
    ref_dataset = dsp.spatial_regrid(ref_dataset, new_lat, new_lon)
    print 'Reference dataset has been regridded'
for idata,dataset in enumerate(model_datasets):
    model_datasets[idata] = dsp.spatial_regrid(dataset, new_lat, new_lon, boundary_check = boundary_check_model)
    print model_names[idata]+' has been regridded'
print 'Propagating missing data information'
ref_dataset = dsp.mask_missing_data([ref_dataset]+model_datasets)[0]
model_datasets = dsp.mask_missing_data([ref_dataset]+model_datasets)[1:]

""" Step 5: Checking and converting variable units """
print 'Checking and converting variable units'
ref_dataset = dsp.variable_unit_conversion(ref_dataset)
for idata,dataset in enumerate(model_datasets):
    model_datasets[idata] = dsp.variable_unit_conversion(dataset)
    
def spatialRegrid(dataset, lats, lons):
    '''Spatially regrid dataset variable to a new grid with specified resolution, where lats & lons
are the new coordinate vectors.
    '''
    return dsp.spatial_regrid(dataset, lats, lons)
Example #23
0
    nlat = (max_lat - min_lat) / delta_lat + 1
    nlon = (max_lon - min_lon) / delta_lon + 1
    new_lat = np.linspace(min_lat, max_lat, nlat)
    new_lon = np.linspace(min_lon, max_lon, nlon)

# number of models
nmodel = len(model_datasets)
print 'Dataset loading completed'
print 'Observation data:', obs_name
print 'Number of model datasets:', nmodel
for model_name in model_names:
    print model_name
""" Step 4: Spatial regriding of the reference datasets """
print 'Regridding datasets: ', config['regrid']
if not config['regrid']['regrid_on_reference']:
    obs_dataset = dsp.spatial_regrid(obs_dataset, new_lat, new_lon)
    print 'Reference dataset has been regridded'
for i, dataset in enumerate(model_datasets):
    model_datasets[i] = dsp.spatial_regrid(dataset,
                                           new_lat,
                                           new_lon,
                                           boundary_check=boundary_check)
    print model_names[i] + ' has been regridded'
print 'Propagating missing data information'
obs_dataset = dsp.mask_missing_data([obs_dataset] + model_datasets)[0]
model_datasets = dsp.mask_missing_data([obs_dataset] + model_datasets)[1:]
""" Step 5: Checking and converting variable units """
print 'Checking and converting variable units'
obs_dataset = dsp.variable_unit_conversion(obs_dataset)
for idata, dataset in enumerate(model_datasets):
    model_datasets[idata] = dsp.variable_unit_conversion(dataset)
Example #24
0
    datasets = local.load_multiple_files(paths, 'clt')

with time_block(ocw_results, 'Domain Subsetting'):
    for i, ds in enumerate(datasets):
        datasets[i] = dsp.subset(ds, bnds)

with time_block(ocw_results, 'Seasonal Subsetting'):
    for i, ds in enumerate(datasets):
        datasets[i] = dsp.temporal_subset(ds, 9, 11)

with time_block(ocw_results, 'Resampling'):
    for i, ds in enumerate(datasets):
        datasets[i] = dsp.temporal_rebin(ds, 'annual')

with time_block(ocw_results, 'Regridding'):
    for i, ds in enumerate(datasets):
        datasets[i] = dsp.spatial_regrid(ds, new_lats, new_lons)

print(f'OCW Results: {ocw_results}')

# Plot results
matplotlib.style.use('ggplot')
df = pd.DataFrame({'OCW': ocw_results, 'BCDP': bcdp_results})
df.plot.bar(logy=True, rot=12)
for p in ax.patches:
    val = np.round(p.get_height(), decimals=2)
    ax.annotate(str(val), (p.get_x() + .02, p.get_height()), size=9.5)

plt.ylabel('Running Time [s]')
plt.savefig('bcdp_ocw_benchmarks.png')
# Running Temporal Rebin early helps negate the issue of datasets being on different 
# days of the month (1st vs. 15th)
# Create a Bounds object to use for subsetting
new_bounds = Bounds(min_lat, max_lat, min_lon, max_lon, start_time, end_time)

# Subset our model datasets so they are the same size
knmi_dataset = dsp.subset(knmi_dataset, new_bounds)
wrf311_dataset = dsp.subset(wrf311_dataset, new_bounds)

""" Spatially Regrid the Dataset Objects to a 1/2 degree grid """
# Using the bounds we will create a new set of lats and lons on 1/2 degree step
new_lons = np.arange(min_lon, max_lon, 0.5)
new_lats = np.arange(min_lat, max_lat, 0.5)
 
# Spatially regrid datasets using the new_lats, new_lons numpy arrays
knmi_dataset = dsp.spatial_regrid(knmi_dataset, new_lats, new_lons)
wrf311_dataset = dsp.spatial_regrid(wrf311_dataset, new_lats, new_lons)
cru31_dataset = dsp.spatial_regrid(cru31_dataset, new_lats, new_lons)

# Generate an ensemble dataset from knmi and wrf models
ensemble_dataset = dsp.ensemble([knmi_dataset, wrf311_dataset])

""" Step 4:  Build a Metric to use for Evaluation - Bias for this example """
print("Setting up a Bias metric to use for evaluation")
bias = metrics.Bias()

""" Step 5: Create an Evaluation Object using Datasets and our Metric """
# The Evaluation Class Signature is:
# Evaluation(reference, targets, metrics, subregions=None)
# Evaluation can take in multiple targets and metrics, so we need to convert
# our examples into Python lists.  Evaluation will iterate over the lists
""" Step 3: Processing datasets so they are the same shape ... """
print("Processing datasets so they are the same shape")
CRU31 = dsp.water_flux_unit_conversion(CRU31)
CRU31 = dsp.normalize_dataset_datetimes(CRU31, 'monthly')

for member, each_target_dataset in enumerate(target_datasets):
    target_datasets[member] = dsp.subset(target_datasets[member], EVAL_BOUNDS)
    target_datasets[member] = dsp.water_flux_unit_conversion(target_datasets[
                                                             member])
    target_datasets[member] = dsp.normalize_dataset_datetimes(
        target_datasets[member], 'monthly')

print("... spatial regridding")
new_lats = np.arange(LAT_MIN, LAT_MAX, gridLatStep)
new_lons = np.arange(LON_MIN, LON_MAX, gridLonStep)
CRU31 = dsp.spatial_regrid(CRU31, new_lats, new_lons)


for member, each_target_dataset in enumerate(target_datasets):
    target_datasets[member] = dsp.spatial_regrid(
        target_datasets[member], new_lats, new_lons)

# find climatology monthly for obs and models
CRU31.values, CRU31.times = utils.calc_climatology_monthly(CRU31)

for member, each_target_dataset in enumerate(target_datasets):
    target_datasets[member].values, target_datasets[
        member].times = utils.calc_climatology_monthly(target_datasets[member])

# make the model ensemble
target_datasets_ensemble = dsp.ensemble(target_datasets)
Example #27
0
# Temporally re-bin the data into a monthly timestep.
##########################################################################
knmi_dataset = dsp.temporal_rebin(knmi_dataset, temporal_resolution='monthly')
wrf_dataset = dsp.temporal_rebin(wrf_dataset, temporal_resolution='monthly')

# Spatially regrid the datasets onto a 1 degree grid.
##########################################################################
# Get the bounds of the reference dataset and use it to create a new
# set of lat/lon values on a 1 degree step
# Using the bounds we will create a new set of lats and lons on 1 degree step
min_lat, max_lat, min_lon, max_lon = knmi_dataset.spatial_boundaries()
new_lons = numpy.arange(min_lon, max_lon, 1)
new_lats = numpy.arange(min_lat, max_lat, 1)

# Spatially regrid datasets using the new_lats, new_lons numpy arrays
knmi_dataset = dsp.spatial_regrid(knmi_dataset, new_lats, new_lons)
wrf_dataset = dsp.spatial_regrid(wrf_dataset, new_lats, new_lons)

# Load the metrics that we want to use for the evaluation.
##########################################################################
taylor_diagram = metrics.SpatialPatternTaylorDiagram()

# Create our new evaluation object. The knmi dataset is the evaluations
# reference dataset. We then provide a list of 1 or more target datasets
# to use for the evaluation. In this case, we only want to use the wrf dataset.
# Then we pass a list of all the metrics that we want to use in the evaluation.
##########################################################################
test_evaluation = evaluation.Evaluation(knmi_dataset, [wrf_dataset],
                                        [taylor_diagram])
test_evaluation.run()
Example #28
0
def run_screen(model_datasets, models_info, observations_info,
               overlap_start_time, overlap_end_time, overlap_min_lat,
               overlap_max_lat, overlap_min_lon, overlap_max_lon,
               temp_grid_setting, spatial_grid_setting, working_directory, plot_title):
     '''Generates screen to show running evaluation process.

     :param model_datasets: list of model dataset objects
     :type model_datasets: list
     :param models_info: list of dictionaries that contain information for each model
     :type models_info: list
     :param observations_info: list of dictionaries that contain information for each observation
     :type observations_info: list
     :param overlap_start_time: overlap start time between model and obs start time
     :type overlap_start_time: datetime
     :param overlap_end_time: overlap end time between model and obs end time
     :type overlap_end_time: float
     :param overlap_min_lat: overlap minimum lat between model and obs minimum lat
     :type overlap_min_lat: float
     :param overlap_max_lat: overlap maximum lat between model and obs maximum lat
     :type overlap_max_lat: float
     :param overlap_min_lon: overlap minimum lon between model and obs minimum lon
     :type overlap_min_lon: float
     :param overlap_max_lon: overlap maximum lon between model and obs maximum lon
     :type overlap_max_lon: float
     :param temp_grid_setting: temporal grid option such as hourly, daily, monthly and annually
     :type temp_grid_setting: string
     :param spatial_grid_setting:
     :type spatial_grid_setting: string
     :param working_directory: path to a directory for storring outputs
     :type working_directory: string
     :param plot_title: Title for plot
     :type plot_title: string
     '''

     option = None
     if option != "0":
          ready_screen("manage_obs_screen")
          y = screen.getmaxyx()[0]
          screen.addstr(2, 2, "Evaluation started....")
          screen.refresh()

          OUTPUT_PLOT = "plot"

          dataset_id = int(observations_info[0]['dataset_id'])       #just accepts one dataset at this time
          parameter_id = int(observations_info[0]['parameter_id'])  #just accepts one dataset at this time

          new_bounds = Bounds(overlap_min_lat, overlap_max_lat, overlap_min_lon, overlap_max_lon, overlap_start_time, overlap_end_time)
          model_dataset = dsp.subset(new_bounds, model_datasets[0])   #just accepts one model at this time

          #Getting bound info of subseted model file to retrive obs data with same bound as subseted model
          new_model_spatial_bounds = model_dataset.spatial_boundaries()
          new_model_temp_bounds = model_dataset.time_range()
          new_min_lat = new_model_spatial_bounds[0]
          new_max_lat = new_model_spatial_bounds[1]
          new_min_lon = new_model_spatial_bounds[2]
          new_max_lon = new_model_spatial_bounds[3]
          new_start_time = new_model_temp_bounds[0]
          new_end_time = new_model_temp_bounds[1]

          screen.addstr(4, 4, "Retrieving data...")
          screen.refresh()

          #Retrieve obs data
          obs_dataset = rcmed.parameter_dataset(
                                        dataset_id,
                                        parameter_id,
                                        new_min_lat,
                                        new_max_lat,
                                        new_min_lon,
                                        new_max_lon,
                                        new_start_time,
                                        new_end_time)
          screen.addstr(4, 4, "--> Data retrieved.")
          screen.refresh()

          screen.addstr(5, 4, "Temporally regridding...")
          screen.refresh()
          if temp_grid_setting.lower() == 'hourly':
               days = 0.5
          elif temp_grid_setting.lower() == 'daily':
               days = 1
          elif temp_grid_setting.lower() == 'monthly':
               days = 31
          else:
               days = 365
          model_dataset = dsp.temporal_rebin(model_dataset, timedelta(days))
          obs_dataset = dsp.temporal_rebin(obs_dataset, timedelta(days))
          screen.addstr(5, 4, "--> Temporally regridded.")
          screen.refresh()

          new_lats = np.arange(new_min_lat, new_max_lat, spatial_grid_setting)
          new_lons = np.arange(new_min_lon, new_max_lon, spatial_grid_setting)

          screen.addstr(6, 4, "Spatially regridding...")
          screen.refresh()
          spatial_gridded_model = dsp.spatial_regrid(model_dataset, new_lats, new_lons)
          spatial_gridded_obs = dsp.spatial_regrid(obs_dataset, new_lats, new_lons)
          screen.addstr(6, 4, "--> Spatially regridded.")
          screen.refresh()

          screen.addstr(7, 4, "Setting up metrics...")
          screen.refresh()
          bias = metrics.Bias()
          bias_evaluation = evaluation.Evaluation(spatial_gridded_model, [spatial_gridded_obs], [bias])
          screen.addstr(7, 4, "--> Metrics setting done.")
          screen.refresh()

          screen.addstr(8, 4, "Running evaluation.....")
          screen.refresh()
          bias_evaluation.run()
          results = bias_evaluation.results[0][0]
          screen.addstr(8, 4, "--> Evaluation Finished.")
          screen.refresh()

          screen.addstr(9, 4, "Generating plots....")
          screen.refresh()
          lats = new_lats
          lons = new_lons

          gridshape = (1, 1)
          sub_titles = [""]   #No subtitle set for now

          if not os.path.exists(working_directory):
               os.makedirs(working_directory)

          for i in range(len(results)):
               fname = working_directory + OUTPUT_PLOT + str(i)
               plotter.draw_contour_map(results[i], lats, lons, fname,
                               gridshape=gridshape, ptitle=plot_title,
                               subtitles=sub_titles)
          screen.addstr(9, 4, "--> Plots generated.")
          screen.refresh()
          screen.addstr(y-2, 1, "Press 'enter' to Exit: ")
          option = screen.getstr()