Ejemplo n.º 1
0
def _prepare_datasets_for_evaluation(reference, targets, config_data):
    """"""
    subset = config_data['evaluation'].get('subset', None)
    temporal_time_delta = config_data['evaluation'].get(
        'temporal_time_delta', None)
    spatial_regrid_lats = config_data['evaluation'].get(
        'spatial_regrid_lats', None)
    spatial_regrid_lons = config_data['evaluation'].get(
        'spatial_regrid_lons', None)

    # If we have a temporal time delta and it's daily (i.e., 1) we will
    # normalize the data as daily data (which means we adjust the start times
    # for each bucket of data to be consistent). By default we will normalize
    # the data as monthly. Note that this will not break yearly data so it's
    # safer to do this no matter what. This keeps us from ending up with 1-off
    # errors in the resulting dataset shape post-temporal/spatial adjustments
    # that break evaluations.
    string_time_delta = 'monthly'
    if temporal_time_delta and temporal_time_delta == 1:
        string_time_delta = 'daily'

    reference = dsp.normalize_dataset_datetimes(reference, string_time_delta)
    targets = [
        dsp.normalize_dataset_datetimes(t, string_time_delta) for t in targets
    ]

    if subset:
        start = dateutil.parser.parse(subset[4])
        end = dateutil.parser.parse(subset[5])
        bounds = Bounds(subset[0], subset[1], subset[2], subset[3], start, end)

        if reference:
            reference = dsp.safe_subset(bounds, reference)

        if targets:
            targets = [dsp.safe_subset(bounds, t) for t in targets]

    if temporal_time_delta:
        resolution = timedelta(temporal_time_delta)

        if reference:
            reference = dsp.temporal_rebin(reference, resolution)

        if targets:
            targets = [dsp.temporal_rebin(t, resolution) for t in targets]

    if spatial_regrid_lats and spatial_regrid_lons:
        lats = np.arange(spatial_regrid_lats[0], spatial_regrid_lats[1],
                         spatial_regrid_lats[2])
        lons = np.arange(spatial_regrid_lons[0], spatial_regrid_lons[1],
                         spatial_regrid_lons[2])

        if reference:
            reference = dsp.spatial_regrid(reference, lats, lons)

        if targets:
            targets = [dsp.spatial_regrid(t, lats, lons) for t in targets]

    return reference, targets
Ejemplo n.º 2
0
def _prepare_datasets_for_evaluation(reference, targets, config_data):
    """"""
    subset = config_data['evaluation'].get('subset', None)
    temporal_time_delta = config_data['evaluation'].get('temporal_time_delta', None)
    spatial_regrid_lats = config_data['evaluation'].get('spatial_regrid_lats', None)
    spatial_regrid_lons = config_data['evaluation'].get('spatial_regrid_lons', None)

    # If we have a temporal time delta and it's daily (i.e., 1) we will
    # normalize the data as daily data (which means we adjust the start times
    # for each bucket of data to be consistent). By default we will normalize
    # the data as monthly. Note that this will not break yearly data so it's
    # safer to do this no matter what. This keeps us from ending up with 1-off
    # errors in the resulting dataset shape post-temporal/spatial adjustments
    # that break evaluations.
    string_time_delta = 'monthly'
    if temporal_time_delta and temporal_time_delta == 1:
        string_time_delta = 'daily'

    reference = dsp.normalize_dataset_datetimes(reference, string_time_delta)
    targets = [dsp.normalize_dataset_datetimes(t, string_time_delta) for t in targets]

    if subset:
        start = dateutil.parser.parse(subset[4])
        end = dateutil.parser.parse(subset[5])
        bounds = Bounds(subset[0], subset[1], subset[2], subset[3], start, end)

        if reference:
            reference = dsp.safe_subset(bounds, reference)

        if targets:
            targets = [dsp.safe_subset(bounds, t) for t in targets]

    if temporal_time_delta:
        resolution = timedelta(temporal_time_delta)

        if reference:
            reference = dsp.temporal_rebin(reference, resolution)

        if targets:
            targets = [dsp.temporal_rebin(t, resolution) for t in targets]

    if spatial_regrid_lats and spatial_regrid_lons:
        lats = np.arange(spatial_regrid_lats[0], spatial_regrid_lats[1], spatial_regrid_lats[2])
        lons = np.arange(spatial_regrid_lons[0], spatial_regrid_lons[1], spatial_regrid_lons[2])

        if reference:
            reference = dsp.spatial_regrid(reference, lats, lons)

        if targets:
            targets = [dsp.spatial_regrid(t, lats, lons) for t in targets]

    return reference, targets
Ejemplo n.º 3
0
 def test_daily_time(self):
     # Test daily with time.hour != 0
     self.monthly_dataset.times = np.array([
         datetime.datetime(year, month, 15, 5)
         for year in range(2000, 2010) for month in range(1, 13)
     ])
     new_ds = dp.normalize_dataset_datetimes(self.monthly_dataset, 'daily')
     # Check that all the days have been shifted to the first of the month
     self.assertTrue(all(x.hour == 0 for x in new_ds.times))
Ejemplo n.º 4
0
 def test_daily_time(self):
     # Test daily with time.hour != 0
     self.monthly_dataset.times = np.array([
                                           datetime.datetime(
                                               year, month, 15, 5)
                                           for year in range(2000, 2010)
                                           for month in range(1, 13)])
     new_ds = dp.normalize_dataset_datetimes(self.monthly_dataset, 'daily')
     # Check that all the days have been shifted to the first of the month
     self.assertTrue(all(x.hour == 0 for x in new_ds.times))
Ejemplo n.º 5
0
      ref_dataset = rcmed.parameter_dataset(ref_data_info['dataset_id'],
                                            ref_data_info['parameter_id'],
                                            min_lat, max_lat, min_lon, max_lon,
                                            start_time, end_time)
elif ref_data_info['data_source'] == 'ESGF':
      username=raw_input('Enter your ESGF OpenID:\n')
      password=raw_input('Enter your ESGF password:\n')
      ds = esgf.load_dataset(dataset_id = ref_data_info['dataset_id'],
                             variable = ref_data_info['variable'],
                             esgf_username=username,
                             esgf_password=password)
      ref_dataset = ds[0]
else:
    print ' '
if temporal_resolution == 'daily' or temporal_resolution == 'monthly':
    ref_dataset =  dsp.normalize_dataset_datetimes(ref_dataset, temporal_resolution)
if 'multiplying_factor' in ref_data_info.keys():
    ref_dataset.values = ref_dataset.values*ref_data_info['multiplying_factor']

""" Step 2: Load model NetCDF Files into OCW Dataset Objects """
model_data_info = config['datasets']['targets']
model_lat_name = None
model_lon_name = None
if 'latitude_name' in model_data_info.keys():
    model_lat_name = model_data_info['latitude_name']
if 'longitude_name' in model_data_info.keys():
    model_lon_name = model_data_info['longitude_name']
boundary_check_model = True
if 'GCM_data' in model_data_info.keys():
    if model_data_info['GCM_data']:
        boundary_check_model = False                                           
Ejemplo n.º 6
0
    urllib.urlretrieve(FILE_LEADER + FILE_3, FILE_3)
""" Step 1: Load Local NetCDF File into OCW Dataset Objects and store in list"""
target_datasets.append(local.load_file(FILE_1, varName, name="KNMI"))
target_datasets.append(local.load_file(FILE_2, varName, name="REGCM"))
target_datasets.append(local.load_file(FILE_3, varName, name="UCT"))
""" Step 2: Fetch an OCW Dataset Object from the data_source.rcmed module """
print(
    "Working with the rcmed interface to get CRU3.1 Monthly Mean Precipitation"
)
# the dataset_id and the parameter id were determined from
# https://rcmes.jpl.nasa.gov/content/data-rcmes-database
CRU31 = rcmed.parameter_dataset(10, 37, LAT_MIN, LAT_MAX, LON_MIN, LON_MAX,
                                START, END)
""" Step 3: Processing Datasets so they are the same shape """
print("Processing datasets ...")
CRU31 = dsp.normalize_dataset_datetimes(CRU31, 'monthly')
print("... on units")
CRU31 = dsp.water_flux_unit_conversion(CRU31)

for member, each_target_dataset in enumerate(target_datasets):
    target_datasets[member] = dsp.subset(target_datasets[member], EVAL_BOUNDS)
    target_datasets[member] = dsp.water_flux_unit_conversion(
        target_datasets[member])
    target_datasets[member] = dsp.normalize_dataset_datetimes(
        target_datasets[member], 'monthly')

print("... spatial regridding")
new_lats = np.arange(LAT_MIN, LAT_MAX, gridLatStep)
new_lons = np.arange(LON_MIN, LON_MAX, gridLonStep)
CRU31 = dsp.spatial_regrid(CRU31, new_lats, new_lons)
Ejemplo n.º 7
0
target_datasets.append(local.load_file(FILE_2, varName, name="REGCM"))
target_datasets.append(local.load_file(FILE_3, varName, name="UCT"))


""" Step 2: Fetch an OCW Dataset Object from the data_source.rcmed module """
print("Working with the rcmed interface to get CRU3.1 Daily Precipitation")
# the dataset_id and the parameter id were determined from
# https://rcmes.jpl.nasa.gov/content/data-rcmes-database
CRU31 = rcmed.parameter_dataset(
    10, 37, LAT_MIN, LAT_MAX, LON_MIN, LON_MAX, START, END)


""" Step 3: Processing datasets so they are the same shape ... """
print("Processing datasets so they are the same shape")
CRU31 = dsp.water_flux_unit_conversion(CRU31)
CRU31 = dsp.normalize_dataset_datetimes(CRU31, 'monthly')

for member, each_target_dataset in enumerate(target_datasets):
    target_datasets[member] = dsp.subset(target_datasets[member], EVAL_BOUNDS)
    target_datasets[member] = dsp.water_flux_unit_conversion(target_datasets[
                                                             member])
    target_datasets[member] = dsp.normalize_dataset_datetimes(
        target_datasets[member], 'monthly')

print("... spatial regridding")
new_lats = np.arange(LAT_MIN, LAT_MAX, gridLatStep)
new_lons = np.arange(LON_MIN, LON_MAX, gridLonStep)
CRU31 = dsp.spatial_regrid(CRU31, new_lats, new_lons)


for member, each_target_dataset in enumerate(target_datasets):
Ejemplo n.º 8
0
cordex_af_pr.name = "cordex_af_pr"
eu_cordex_tas = local.load_file(EU_CORDEX_TAS, "tas")
eu_cordex_tas.name = "eu_cordex_tas"
eu_cordex_pr = local.load_file(EU_CORDEX_PR, "pr")
eu_cordex_pr.name = "eu_cordex_pr"
cru_31_pr = local.load_file(CRU_31_PR, "pr")
cru_31_pr.name = "cru_31_pr"
cru_31_tas = local.load_file(CRU_31_TAS, "tas")
cru_31_tas.name = "cru_31_tas"
trmm_pr = local.load_file(TRMM_PR, "pcp")
trmm_pr.name = "trmm_pr"

# Normalize the time values of our datasets so they fall on expected days
# of the month. For example, monthly data will be normalized so that:
# 15 Jan 2014, 15 Feb 2014 => 1 Jan 2014, 1 Feb 2014
cordex_af_tas = dsp.normalize_dataset_datetimes(cordex_af_tas, "monthly")
cordex_af_pr = dsp.normalize_dataset_datetimes(cordex_af_pr, "monthly")
eu_cordex_pr = dsp.normalize_dataset_datetimes(eu_cordex_pr, "monthly")
eu_cordex_tas = dsp.normalize_dataset_datetimes(eu_cordex_tas, "monthly")
cru_31_pr = dsp.normalize_dataset_datetimes(cru_31_pr, "monthly")
cru_31_tas = dsp.normalize_dataset_datetimes(cru_31_tas, "monthly")
trmm_pr = dsp.normalize_dataset_datetimes(trmm_pr, "monthly")


# Configure your evaluation here. The evaluation bounds are determined by
# the lat/lon/time values that are set here. If you set the lat/lon values
# outside of the range of the datasets' values you will get an error. Your
# start/end time values should be in 12 month intervals due to the metrics
# being used. If you want to change the datasets being used in the evaluation
# you should set the ref/target dataset values here to the corresponding
# loaded datasets from above.
Ejemplo n.º 9
0
def run_evaluation():
    ''' Run an OCW Evaluation.

    *run_evaluation* expects the Evaluation parameters to be POSTed in
    the following format.

    .. sourcecode:: javascript

        {
            reference_dataset: {
                // Id that tells us how we need to load this dataset.
                'data_source_id': 1 == local, 2 == rcmed,

                // Dict of data_source specific identifying information.
                //
                // if data_source_id == 1 == local:
                // {
                //     'id': The path to the local file on the server for loading.
                //     'var_name': The variable data to pull from the file.
                //     'lat_name': The latitude variable name.
                //     'lon_name': The longitude variable name.
                //     'time_name': The time variable name
                //     'name': Optional dataset name
                // }
                //
                // if data_source_id == 2 == rcmed:
                // {
                //     'dataset_id': The dataset id to grab from RCMED.
                //     'parameter_id': The variable id value used by RCMED.
                //     'name': Optional dataset name
                // }
                'dataset_info': {..}
            },

            // The list of target datasets to use in the Evaluation. The data
            // format for the dataset objects should be the same as the
            // reference_dataset above.
            'target_datasets': [{...}, {...}, ...],

            // All the datasets are re-binned to the reference dataset
            // before being added to an experiment. This step (in degrees)
            // is used when re-binning both the reference and target datasets.
            'spatial_rebin_lat_step': The lat degree step. Integer > 0,

            // Same as above, but for lon
            'spatial_rebin_lon_step': The lon degree step. Integer > 0,

            // The temporal resolution to use when doing a temporal re-bin
            // This is a timedelta of days to use so daily == 1, monthly is
            // (1, 31], annual/yearly is (31, 366], and full is anything > 366.
            'temporal_resolution': Integer in range(1, 999),

            // A list of the metric class names to use in the evaluation. The
            // names must match the class name exactly.
            'metrics': [Bias, TemporalStdDev, ...]

            // The bounding values used in the Evaluation. Note that lat values
            // should range from -180 to 180 and lon values from -90 to 90.
            'start_time': start time value in the format '%Y-%m-%d %H:%M:%S',
            'end_time': end time value in the format '%Y-%m-%d %H:%M:%S',
            'lat_min': The minimum latitude value,
            'lat_max': The maximum latitude value,
            'lon_min': The minimum longitude value,
            'lon_max': The maximum longitude value,

            // NOTE: At the moment, subregion support is fairly minimal. This
            // will be addressed in the future. Ideally, the user should be able
            // to load a file that they have locally. That would change the
            // format that this data is passed.
            'subregion_information': Path to a subregion file on the server.
        }
    '''
    # TODO: validate input parameters and return an error if not valid

    eval_time_stamp = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
    data = request.json

    eval_bounds = {
        'start_time': datetime.strptime(data['start_time'], '%Y-%m-%d %H:%M:%S'),
        'end_time': datetime.strptime(data['end_time'], '%Y-%m-%d %H:%M:%S'),
        'lat_min': float(data['lat_min']),
        'lat_max': float(data['lat_max']),
        'lon_min': float(data['lon_min']),
        'lon_max': float(data['lon_max'])
    }

    # Load all the datasets
    ref_dataset = _process_dataset_object(data['reference_dataset'], eval_bounds)

    target_datasets = [_process_dataset_object(obj, eval_bounds)
					   for obj
					   in data['target_datasets']]

    # Normalize the dataset time values so they break on consistent days of the
    # month or time of the day, depending on how they will be rebinned.
    resolution = data['temporal_resolution']
    time_delta = timedelta(days=resolution)

    time_step = 'daily' if resolution == 1 else 'monthly'
    ref_dataset = dsp.normalize_dataset_datetimes(ref_dataset, time_step)
    target_datasets = [dsp.normalize_dataset_datetimes(ds, time_step)
                       for ds in target_datasets]

    # Subset the datasets
    start = eval_bounds['start_time']
    end = eval_bounds['end_time']

    # Normalize all the values to the first of the month if we're not
    # dealing with daily data. This will ensure that a valid subregion
    # isn't considered out of bounds do to a dataset's time values
    # being shifted to the first of the month.
    if time_step != 'daily':
        if start.day != 1:
            day_offset = start.day - 1
            start -= timedelta(days=day_offset)

        if end.day != 1:
            day_offset = end.day - 1
            end -= timedelta(days=day_offset)

    subset = Bounds(eval_bounds['lat_min'],
                    eval_bounds['lat_max'],
                    eval_bounds['lon_min'],
                    eval_bounds['lon_max'],
                    start,
                    end)

    ref_dataset = dsp.safe_subset(subset, ref_dataset)
    target_datasets = [dsp.safe_subset(subset, ds)
                       for ds
                       in target_datasets]
    
    # Do temporal re-bin based off of passed resolution
    ref_dataset = dsp.temporal_rebin(ref_dataset, time_delta)
    target_datasets = [dsp.temporal_rebin(ds, time_delta)
					   for ds
					   in target_datasets]

    # Do spatial re=bin based off of reference dataset + lat/lon steps
    lat_step = data['spatial_rebin_lat_step']
    lon_step = data['spatial_rebin_lon_step']
    lat_bins, lon_bins = _calculate_new_latlon_bins(eval_bounds,
													lat_step,
													lon_step)

    ref_dataset = dsp.spatial_regrid(ref_dataset, lat_bins, lon_bins)
    target_datasets =  [dsp.spatial_regrid(ds, lat_bins, lon_bins)
						for ds
						in target_datasets]

    # Load metrics
    loaded_metrics = _load_metrics(data['metrics'])

    # Prime evaluation object with data
    evaluation = Evaluation(ref_dataset, target_datasets, loaded_metrics)

    # Run evaluation
    evaluation.run()

    # Plot
    _generate_evaluation_plots(evaluation, lat_bins, lon_bins, eval_time_stamp)

    return json.dumps({'eval_work_dir': eval_time_stamp})
Ejemplo n.º 10
0
def run_evaluation():
    ''' Run an OCW Evaluation.

    *run_evaluation* expects the Evaluation parameters to be POSTed in
    the following format.

    .. sourcecode:: javascript

        {
            reference_dataset: {
                // Id that tells us how we need to load this dataset.
                'data_source_id': 1 == local, 2 == rcmed,

                // Dict of data_source specific identifying information.
                //
                // if data_source_id == 1 == local:
                // {
                //     'id': The path to the local file on the server for loading.
                //     'var_name': The variable data to pull from the file.
                //     'lat_name': The latitude variable name.
                //     'lon_name': The longitude variable name.
                //     'time_name': The time variable name
                //     'name': Optional dataset name
                // }
                //
                // if data_source_id == 2 == rcmed:
                // {
                //     'dataset_id': The dataset id to grab from RCMED.
                //     'parameter_id': The variable id value used by RCMED.
                //     'name': Optional dataset name
                // }
                'dataset_info': {..}
            },

            // The list of target datasets to use in the Evaluation. The data
            // format for the dataset objects should be the same as the
            // reference_dataset above.
            'target_datasets': [{...}, {...}, ...],

            // All the datasets are re-binned to the reference dataset
            // before being added to an experiment. This step (in degrees)
            // is used when re-binning both the reference and target datasets.
            'spatial_rebin_lat_step': The lat degree step. Integer > 0,

            // Same as above, but for lon
            'spatial_rebin_lon_step': The lon degree step. Integer > 0,

            // The temporal resolution to use when doing a temporal re-bin
            // This is a timedelta of days to use so daily == 1, monthly is
            // (1, 31], annual/yearly is (31, 366], and full is anything > 366.
            'temporal_resolution': Integer in range(1, 999),

            // A list of the metric class names to use in the evaluation. The
            // names must match the class name exactly.
            'metrics': [Bias, TemporalStdDev, ...]

            // The bounding values used in the Evaluation. Note that lat values
            // should range from -180 to 180 and lon values from -90 to 90.
            'start_time': start time value in the format '%Y-%m-%d %H:%M:%S',
            'end_time': end time value in the format '%Y-%m-%d %H:%M:%S',
            'lat_min': The minimum latitude value,
            'lat_max': The maximum latitude value,
            'lon_min': The minimum longitude value,
            'lon_max': The maximum longitude value,

            // NOTE: At the moment, subregion support is fairly minimal. This
            // will be addressed in the future. Ideally, the user should be able
            // to load a file that they have locally. That would change the
            // format that this data is passed.
            'subregion_information': Path to a subregion file on the server.
        }
    '''
    # TODO: validate input parameters and return an error if not valid

    eval_time_stamp = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
    data = request.json

    eval_bounds = {
        'start_time': datetime.strptime(data['start_time'], '%Y-%m-%d %H:%M:%S'),
        'end_time': datetime.strptime(data['end_time'], '%Y-%m-%d %H:%M:%S'),
        'lat_min': float(data['lat_min']),
        'lat_max': float(data['lat_max']),
        'lon_min': float(data['lon_min']),
        'lon_max': float(data['lon_max'])
    }

    # Load all the datasets
    ref_dataset = _process_dataset_object(data['reference_dataset'], eval_bounds)

    target_datasets = [_process_dataset_object(obj, eval_bounds)
					   for obj
					   in data['target_datasets']]

    # Normalize the dataset time values so they break on consistent days of the
    # month or time of the day, depending on how they will be rebinned.
    resolution = data['temporal_resolution']
    time_delta = timedelta(days=resolution)

    time_step = 'daily' if resolution == 1 else 'monthly'
    ref_dataset = dsp.normalize_dataset_datetimes(ref_dataset, time_step)
    target_datasets = [dsp.normalize_dataset_datetimes(ds, time_step)
                       for ds in target_datasets]

    # Subset the datasets
    start = eval_bounds['start_time']
    end = eval_bounds['end_time']

    # Normalize all the values to the first of the month if we're not
    # dealing with daily data. This will ensure that a valid subregion
    # isn't considered out of bounds do to a dataset's time values
    # being shifted to the first of the month.
    if time_step != 'daily':
        if start.day != 1:
            day_offset = start.day - 1
            start -= timedelta(days=day_offset)

        if end.day != 1:
            day_offset = end.day - 1
            end -= timedelta(days=day_offset)

    subset = Bounds(eval_bounds['lat_min'],
                    eval_bounds['lat_max'],
                    eval_bounds['lon_min'],
                    eval_bounds['lon_max'],
                    start,
                    end)

    ref_dataset = dsp.safe_subset(ref_dataset, subset)
    target_datasets = [dsp.safe_subset(ds, subset)
                       for ds
                       in target_datasets]
    
    # Do temporal re-bin based off of passed resolution
    ref_dataset = dsp.temporal_rebin(ref_dataset, time_delta)
    target_datasets = [dsp.temporal_rebin(ds, time_delta)
					   for ds
					   in target_datasets]

    # Do spatial re=bin based off of reference dataset + lat/lon steps
    lat_step = data['spatial_rebin_lat_step']
    lon_step = data['spatial_rebin_lon_step']
    lat_bins, lon_bins = _calculate_new_latlon_bins(eval_bounds,
													lat_step,
													lon_step)

    ref_dataset = dsp.spatial_regrid(ref_dataset, lat_bins, lon_bins)
    target_datasets =  [dsp.spatial_regrid(ds, lat_bins, lon_bins)
						for ds
						in target_datasets]

    # Load metrics
    loaded_metrics = _load_metrics(data['metrics'])

    # Prime evaluation object with data
    evaluation = Evaluation(ref_dataset, target_datasets, loaded_metrics)

    # Run evaluation
    evaluation.run()

    # Plot
    _generate_evaluation_plots(evaluation, lat_bins, lon_bins, eval_time_stamp)

    return json.dumps({'eval_work_dir': eval_time_stamp})
Ejemplo n.º 11
0
    def test_montly(self):
        new_ds = dp.normalize_dataset_datetimes(
            self.monthly_dataset, 'monthly')

        # Check that all the days have been shifted to the first of the month
        self.assertTrue(all(x.day == 1 for x in new_ds.times))
Ejemplo n.º 12
0
print 'Loading observation dataset:\n', ref_data_info
ref_name = ref_data_info['data_name']
if ref_data_info['data_source'] == 'local':
    ref_dataset = local.load_file(ref_data_info['path'],
                                  ref_data_info['variable'],
                                  name=ref_name)
elif ref_data_info['data_source'] == 'rcmed':
    ref_dataset = rcmed.parameter_dataset(ref_data_info['dataset_id'],
                                          ref_data_info['parameter_id'],
                                          min_lat, max_lat, min_lon, max_lon,
                                          start_time, end_time)
else:
    print ' '
    # TO DO: support ESGF

ref_dataset = dsp.normalize_dataset_datetimes(ref_dataset, temporal_resolution)
if 'multiplying_factor' in ref_data_info.keys():
    ref_dataset.values = ref_dataset.values * ref_data_info[
        'multiplying_factor']
""" Step 2: Load model NetCDF Files into OCW Dataset Objects """
model_data_info = config['datasets']['targets']
print 'Loading model datasets:\n', model_data_info
if model_data_info['data_source'] == 'local':
    model_datasets, model_names = local.load_multiple_files(
        file_path=model_data_info['path'],
        variable_name=model_data_info['variable'])
else:
    print ' '
    # TO DO: support RCMED and ESGF
for idata, dataset in enumerate(model_datasets):
    model_datasets[idata] = dsp.normalize_dataset_datetimes(
Ejemplo n.º 13
0
# Extract info we don't want to put into the loader config
# Multiplying Factor to scale obs by. Currently only supported for reference
# (first) dataset. We should instead make this a parameter for each
# loader and Dataset objects.
fact = data_info[0].pop('multiplying_factor', 1)
""" Step 1: Load the datasets """
print('Loading datasets:\n{}'.format(data_info))
datasets = load_datasets_from_config(extra_opts, *data_info)
multiplying_factor = np.ones(len(datasets))
multiplying_factor[0] = fact
names = [dataset.name for dataset in datasets]
for i, dataset in enumerate(datasets):
    res = dataset.temporal_resolution()
    if res == 'daily' or res == 'monthly':
        datasets[i] = dsp.normalize_dataset_datetimes(dataset, res)
        if multiplying_factor[i] != 1:
            datasets[i].values *= multiplying_factor[i]
""" Step 2: Subset the data for temporal and spatial domain """
# Create a Bounds object to use for subsetting
if maximum_overlap_period:
    start_time, end_time = utils.get_temporal_overlap(datasets)
    print('Maximum overlap period')
    print('start_time: {}'.format(start_time))
    print('end_time: {}'.format(end_time))

if temporal_resolution == 'monthly' and end_time.day != 1:
    end_time = end_time.replace(day=1)

for i, dataset in enumerate(datasets):
    min_lat = np.max([min_lat, dataset.lats.min()])
Ejemplo n.º 14
0
target_dataset.name = "cru_31_tas"
LAT_MIN = -40
LAT_MAX = 40
LON_MIN = -20
LON_MAX = 55
START = datetime.datetime(1999, 1, 1)
END = datetime.datetime(2000, 12, 1)
SEASON_MONTH_START = 1
SEASON_MONTH_END = 12

EVAL_BOUNDS = Bounds(LAT_MIN, LAT_MAX, LON_MIN, LON_MAX, START, END)

# Normalize the time values of our datasets so they fall on expected days
# of the month. For example, monthly data will be normalized so that:
# 15 Jan 2014, 15 Feb 2014 => 1 Jan 2014, 1 Feb 2014
ref_dataset = dsp.normalize_dataset_datetimes(ref_dataset, "monthly")
target_dataset = dsp.normalize_dataset_datetimes(target_dataset, "monthly")

# Subset down the evaluation datasets to our selected evaluation bounds.
target_dataset = dsp.subset(EVAL_BOUNDS, target_dataset)
ref_dataset = dsp.subset(EVAL_BOUNDS, ref_dataset)

# Do a monthly temporal rebin of the evaluation datasets.
target_dataset = dsp.temporal_rebin(target_dataset, datetime.timedelta(days=30))
ref_dataset = dsp.temporal_rebin(ref_dataset, datetime.timedelta(days=30))

# Spatially regrid onto a 1 degree lat/lon grid within our evaluation bounds.
new_lats = np.arange(LAT_MIN, LAT_MAX, 1.0)
new_lons = np.arange(LON_MIN, LON_MAX, 1.0)
target_dataset = dsp.spatial_regrid(target_dataset, new_lats, new_lons)
ref_dataset = dsp.spatial_regrid(ref_dataset, new_lats, new_lons)
Ejemplo n.º 15
0
target_datasets = [target_dataset, target_dataset2, target_dataset3, target_dataset4]
LAT_MIN = 22
LAT_MAX = 71
LON_MIN = -43
LON_MAX = 64
START = datetime.datetime(1999, 1, 1)
END = datetime.datetime(2000, 12, 1)
SEASON_MONTH_START = 1
SEASON_MONTH_END = 12

EVAL_BOUNDS = Bounds(LAT_MIN, LAT_MAX, LON_MIN, LON_MAX, START, END)

# Normalize the time values of our datasets so they fall on expected days
# of the month. For example, monthly data will be normalized so that:
# 15 Jan 2014, 15 Feb 2014 => 1 Jan 2014, 1 Feb 2014
ref_dataset = dsp.normalize_dataset_datetimes(ref_dataset, 'monthly')
target_datasets = [dsp.normalize_dataset_datetimes(target, 'monthly')
                   for target in target_datasets]

# Subset down the evaluation datasets to our selected evaluation bounds.
ref_dataset = dsp.subset(EVAL_BOUNDS, ref_dataset)
target_datasets = [dsp.subset(EVAL_BOUNDS, target)
                   for target in target_datasets]

# Do a monthly temporal rebin of the evaluation datasets.
ref_dataset = dsp.temporal_rebin(ref_dataset, datetime.timedelta(days=30))
target_datasets = [dsp.temporal_rebin(target, datetime.timedelta(days=30))
                   for target in target_datasets]

# Spatially regrid onto a 1 degree lat/lon grid within our evaluation bounds.
new_lats = np.arange(LAT_MIN, LAT_MAX, 1.0)
Ejemplo n.º 16
0
# Load the example datasets into OCW Dataset objects. We want to load
# the 'tasmax' variable values. We'll also name the datasets for use
# when plotting.
################################################################################
knmi_dataset = local.load_file(FILE_1, "tasmax")
wrf_dataset = local.load_file(FILE_2, "tasmax")

knmi_dataset.name = "knmi"
wrf_dataset.name = "wrf"

# Date values from loaded datasets might not always fall on reasonable days.
# With monthly data, we could have data falling on the 1st, 15th, or some other
# day of the month. Let's fix that real quick.
################################################################################
knmi_dataset = dsp.normalize_dataset_datetimes(knmi_dataset, 'monthly')
wrf_dataset = dsp.normalize_dataset_datetimes(wrf_dataset, 'monthly')

# We're only going to run this evaluation over a years worth of data. We'll
# make a Bounds object and use it to subset our datasets.
################################################################################
subset = Bounds(-45, 42, -24, 60, datetime.datetime(1989, 1, 1), datetime.datetime(1989, 12, 1))
knmi_dataset = dsp.subset(subset, knmi_dataset)
wrf_dataset = dsp.subset(subset, wrf_dataset)

# Temporally re-bin the data into a monthly timestep.
################################################################################
knmi_dataset = dsp.temporal_rebin(knmi_dataset, datetime.timedelta(days=30))
wrf_dataset = dsp.temporal_rebin(wrf_dataset, datetime.timedelta(days=30))

# Spatially regrid the datasets onto a 1 degree grid.
Ejemplo n.º 17
0
target_dataset.name = "cru_31_tas"
LAT_MIN = -40
LAT_MAX = 40
LON_MIN = -20
LON_MAX = 55
START = datetime.datetime(1999, 1, 1)
END = datetime.datetime(2000, 12, 1)
SEASON_MONTH_START = 1
SEASON_MONTH_END = 12

EVAL_BOUNDS = Bounds(LAT_MIN, LAT_MAX, LON_MIN, LON_MAX, START, END)

# Normalize the time values of our datasets so they fall on expected days
# of the month. For example, monthly data will be normalized so that:
# 15 Jan 2014, 15 Feb 2014 => 1 Jan 2014, 1 Feb 2014
ref_dataset = dsp.normalize_dataset_datetimes(ref_dataset, "monthly")
target_dataset = dsp.normalize_dataset_datetimes(target_dataset, "monthly")

# Subset down the evaluation datasets to our selected evaluation bounds.
target_dataset = dsp.subset(EVAL_BOUNDS, target_dataset)
ref_dataset = dsp.subset(EVAL_BOUNDS, ref_dataset)

# Do a monthly temporal rebin of the evaluation datasets.
target_dataset = dsp.temporal_rebin(target_dataset,
                                    datetime.timedelta(days=30))
ref_dataset = dsp.temporal_rebin(ref_dataset, datetime.timedelta(days=30))

# Spatially regrid onto a 1 degree lat/lon grid within our evaluation bounds.
new_lats = np.arange(LAT_MIN, LAT_MAX, 1.0)
new_lons = np.arange(LON_MIN, LON_MAX, 1.0)
target_dataset = dsp.spatial_regrid(target_dataset, new_lats, new_lons)
Ejemplo n.º 18
0
]
LAT_MIN = 22
LAT_MAX = 71
LON_MIN = -43
LON_MAX = 64
START = datetime.datetime(1999, 1, 1)
END = datetime.datetime(2000, 12, 1)
SEASON_MONTH_START = 1
SEASON_MONTH_END = 12

EVAL_BOUNDS = Bounds(LAT_MIN, LAT_MAX, LON_MIN, LON_MAX, START, END)

# Normalize the time values of our datasets so they fall on expected days
# of the month. For example, monthly data will be normalized so that:
# 15 Jan 2014, 15 Feb 2014 => 1 Jan 2014, 1 Feb 2014
ref_dataset = dsp.normalize_dataset_datetimes(ref_dataset, 'monthly')
target_datasets = [
    dsp.normalize_dataset_datetimes(target, 'monthly')
    for target in target_datasets
]

# Subset down the evaluation datasets to our selected evaluation bounds.
ref_dataset = dsp.subset(EVAL_BOUNDS, ref_dataset)
target_datasets = [
    dsp.subset(EVAL_BOUNDS, target) for target in target_datasets
]

# Do a monthly temporal rebin of the evaluation datasets.
ref_dataset = dsp.temporal_rebin(ref_dataset, datetime.timedelta(days=30))
target_datasets = [
    dsp.temporal_rebin(target, datetime.timedelta(days=30))
Ejemplo n.º 19
0
cordex_af_pr.name = "cordex_af_pr"
eu_cordex_tas = local.load_file(EU_CORDEX_TAS, "tas")
eu_cordex_tas.name = "eu_cordex_tas"
eu_cordex_pr = local.load_file(EU_CORDEX_PR, "pr")
eu_cordex_pr.name = "eu_cordex_pr"
cru_31_pr = local.load_file(CRU_31_PR, "pr")
cru_31_pr.name = "cru_31_pr"
cru_31_tas = local.load_file(CRU_31_TAS, "tas")
cru_31_tas.name = "cru_31_tas"
trmm_pr = local.load_file(TRMM_PR, "pcp")
trmm_pr.name = "trmm_pr"

# Normalize the time values of our datasets so they fall on expected days
# of the month. For example, monthly data will be normalized so that:
# 15 Jan 2014, 15 Feb 2014 => 1 Jan 2014, 1 Feb 2014
cordex_af_tas = dsp.normalize_dataset_datetimes(cordex_af_tas, "monthly")
cordex_af_pr = dsp.normalize_dataset_datetimes(cordex_af_pr, "monthly")
eu_cordex_pr = dsp.normalize_dataset_datetimes(eu_cordex_pr, "monthly")
eu_cordex_tas = dsp.normalize_dataset_datetimes(eu_cordex_tas, "monthly")
cru_31_pr = dsp.normalize_dataset_datetimes(cru_31_pr, "monthly")
cru_31_tas = dsp.normalize_dataset_datetimes(cru_31_tas, "monthly")
trmm_pr = dsp.normalize_dataset_datetimes(trmm_pr, "monthly")

# Configure your evaluation here. The evaluation bounds are determined by
# the lat/lon/time values that are set here. If you set the lat/lon values
# outside of the range of the datasets' values you will get an error. Your
# start/end time values should be in 12 month intervals due to the metrics
# being used. If you want to change the datasets being used in the evaluation
# you should set the ref/target dataset values here to the corresponding
# loaded datasets from above.
#
Ejemplo n.º 20
0
# Extract info we don't want to put into the loader config
# Multiplying Factor to scale obs by. Currently only supported for reference
# (first) dataset. We should instead make this a parameter for each
# loader and Dataset objects.
fact = data_info[0].pop('multiplying_factor', 1)

""" Step 1: Load the datasets """
print('Loading datasets:\n{}'.format(data_info))
datasets = load_datasets_from_config(extra_opts, *data_info)
multiplying_factor = np.ones(len(datasets))
multiplying_factor[0] = fact
names = [dataset.name for dataset in datasets]
for i, dataset in enumerate(datasets):
    res = dataset.temporal_resolution()
    if res == 'daily' or res == 'monthly':
        datasets[i] = dsp.normalize_dataset_datetimes(dataset, res)
        if multiplying_factor[i] != 1:
            datasets[i].values *= multiplying_factor[i]

""" Step 2: Subset the data for temporal and spatial domain """
# Create a Bounds object to use for subsetting
if maximum_overlap_period:
    start_time, end_time = utils.get_temporal_overlap(datasets)
    print('Maximum overlap period')
    print('start_time: {}'.format(start_time))
    print('end_time: {}'.format(end_time))

if temporal_resolution == 'monthly' and end_time.day !=1:
    end_time = end_time.replace(day=1)

for i, dataset in enumerate(datasets):
Ejemplo n.º 21
0
    if 'multiplying_factor' in info:
        multiplying_factor[i] = info.pop('multiplying_factor')

# If models are GCMs we can skip boundary check. Probably need to find a more
# elegant way to express this in the config file API.
boundary_check = True
for i, info in enumerate(model_data_info):
    if 'boundary_check' in info:
        boundary_check = info.pop('boundary_check')
""" Step 1: Load the observation data """
print 'Loading observation datasets:\n', obs_data_info
obs_datasets = load_datasets_from_config(extra_opts, *obs_data_info)
obs_names = [dataset.name for dataset in obs_datasets]
for i, dataset in enumerate(obs_datasets):
    if temporal_resolution == 'daily' or temporal_resolution == 'monthly':
        obs_datasets[i] = dsp.normalize_dataset_datetimes(
            dataset, temporal_resolution)

    if multiplying_factor[i] != 1:
        obs_dataset.values *= multiplying_factor[i]
""" Step 2: Load model NetCDF Files into OCW Dataset Objects """
model_datasets = load_datasets_from_config(extra_opts, *model_data_info)
model_names = [dataset.name for dataset in model_datasets]
if temporal_resolution == 'daily' or temporal_resolution == 'monthly':
    for i, dataset in enumerate(model_datasets):
        model_datasets[i] = dsp.normalize_dataset_datetimes(
            dataset, temporal_resolution)
""" Step 3: Subset the data for temporal and spatial domain """
# Create a Bounds object to use for subsetting
if time_info['maximum_overlap_period']:
    start_time, end_time = utils.get_temporal_overlap(obs_datasets +
                                                      model_datasets)
Ejemplo n.º 22
0
    def test_montly(self):
        new_ds = dp.normalize_dataset_datetimes(self.monthly_dataset,
                                                'monthly')

        # Check that all the days have been shifted to the first of the month
        self.assertTrue(all(x.day == 1 for x in new_ds.times))
Ejemplo n.º 23
0
# Load the example datasets into OCW Dataset objects. We want to load
# the 'tasmax' variable values. We'll also name the datasets for use
# when plotting.
##########################################################################
knmi_dataset = local.load_file(FILE_1, "tasmax")
wrf_dataset = local.load_file(FILE_2, "tasmax")

knmi_dataset.name = "knmi"
wrf_dataset.name = "wrf"

# Date values from loaded datasets might not always fall on reasonable days.
# With monthly data, we could have data falling on the 1st, 15th, or some other
# day of the month. Let's fix that real quick.
##########################################################################
knmi_dataset = dsp.normalize_dataset_datetimes(knmi_dataset, 'monthly')
wrf_dataset = dsp.normalize_dataset_datetimes(wrf_dataset, 'monthly')

# We're only going to run this evaluation over a years worth of data. We'll
# make a Bounds object and use it to subset our datasets.
##########################################################################
subset = Bounds(lat_min=-45,
                lat_max=42,
                lon_min=-24,
                lon_max=60,
                start=datetime.datetime(1989, 1, 1),
                end=datetime.datetime(1989, 12, 1))
knmi_dataset = dsp.subset(knmi_dataset, subset)
wrf_dataset = dsp.subset(wrf_dataset, subset)

# Temporally re-bin the data into a monthly timestep.
Ejemplo n.º 24
0
# Extract info we don't want to put into the loader config
# Multiplying Factor to scale obs by. Currently only supported for reference
# (first) dataset. We should instead make this a parameter for each
# loader and Dataset objects.
fact = data_info[0].pop('multiplying_factor', 1)
    
""" Step 1: Load the datasets """
print('Loading datasets:\n{}'.format(data_info))
datasets = load_datasets_from_config(extra_opts, *data_info)
multiplying_factor = np.ones(len(datasets))
multiplying_factor[0] = fact
names = [dataset.name for dataset in datasets]
for i, dataset in enumerate(datasets):
    if temporal_resolution == 'daily' or temporal_resolution == 'monthly':
        datasets[i] = dsp.normalize_dataset_datetimes(dataset,
                                                      temporal_resolution)
        if multiplying_factor[i] != 1:
            datasets[i].values *= multiplying_factor[i]

""" Step 2: Subset the data for temporal and spatial domain """
# Create a Bounds object to use for subsetting
if time_info['maximum_overlap_period']:
    start_time, end_time = utils.get_temporal_overlap(datasets)
    print('Maximum overlap period')
    print('start_time: {}'.format(start_time))
    print('end_time: {}'.format(end_time))

if temporal_resolution == 'monthly' and end_time.day !=1:
    end_time = end_time.replace(day=1)

for i, dataset in enumerate(datasets):