예제 #1
0
def _prepare_datasets_for_evaluation(reference, targets, config_data):
    """"""
    subset = config_data['evaluation'].get('subset', None)
    temporal_time_delta = config_data['evaluation'].get(
        'temporal_time_delta', None)
    spatial_regrid_lats = config_data['evaluation'].get(
        'spatial_regrid_lats', None)
    spatial_regrid_lons = config_data['evaluation'].get(
        'spatial_regrid_lons', None)

    # If we have a temporal time delta and it's daily (i.e., 1) we will
    # normalize the data as daily data (which means we adjust the start times
    # for each bucket of data to be consistent). By default we will normalize
    # the data as monthly. Note that this will not break yearly data so it's
    # safer to do this no matter what. This keeps us from ending up with 1-off
    # errors in the resulting dataset shape post-temporal/spatial adjustments
    # that break evaluations.
    string_time_delta = 'monthly'
    if temporal_time_delta and temporal_time_delta == 1:
        string_time_delta = 'daily'

    reference = dsp.normalize_dataset_datetimes(reference, string_time_delta)
    targets = [
        dsp.normalize_dataset_datetimes(t, string_time_delta) for t in targets
    ]

    if subset:
        start = dateutil.parser.parse(subset[4])
        end = dateutil.parser.parse(subset[5])
        bounds = Bounds(subset[0], subset[1], subset[2], subset[3], start, end)

        if reference:
            reference = dsp.safe_subset(bounds, reference)

        if targets:
            targets = [dsp.safe_subset(bounds, t) for t in targets]

    if temporal_time_delta:
        resolution = timedelta(temporal_time_delta)

        if reference:
            reference = dsp.temporal_rebin(reference, resolution)

        if targets:
            targets = [dsp.temporal_rebin(t, resolution) for t in targets]

    if spatial_regrid_lats and spatial_regrid_lons:
        lats = np.arange(spatial_regrid_lats[0], spatial_regrid_lats[1],
                         spatial_regrid_lats[2])
        lons = np.arange(spatial_regrid_lons[0], spatial_regrid_lons[1],
                         spatial_regrid_lons[2])

        if reference:
            reference = dsp.spatial_regrid(reference, lats, lons)

        if targets:
            targets = [dsp.spatial_regrid(t, lats, lons) for t in targets]

    return reference, targets
예제 #2
0
def _prepare_datasets_for_evaluation(reference, targets, config_data):
    """"""
    subset = config_data['evaluation'].get('subset', None)
    temporal_time_delta = config_data['evaluation'].get('temporal_time_delta', None)
    spatial_regrid_lats = config_data['evaluation'].get('spatial_regrid_lats', None)
    spatial_regrid_lons = config_data['evaluation'].get('spatial_regrid_lons', None)

    # If we have a temporal time delta and it's daily (i.e., 1) we will
    # normalize the data as daily data (which means we adjust the start times
    # for each bucket of data to be consistent). By default we will normalize
    # the data as monthly. Note that this will not break yearly data so it's
    # safer to do this no matter what. This keeps us from ending up with 1-off
    # errors in the resulting dataset shape post-temporal/spatial adjustments
    # that break evaluations.
    string_time_delta = 'monthly'
    if temporal_time_delta and temporal_time_delta == 1:
        string_time_delta = 'daily'

    reference = dsp.normalize_dataset_datetimes(reference, string_time_delta)
    targets = [dsp.normalize_dataset_datetimes(t, string_time_delta) for t in targets]

    if subset:
        start = dateutil.parser.parse(subset[4])
        end = dateutil.parser.parse(subset[5])
        bounds = Bounds(subset[0], subset[1], subset[2], subset[3], start, end)

        if reference:
            reference = dsp.safe_subset(bounds, reference)

        if targets:
            targets = [dsp.safe_subset(bounds, t) for t in targets]

    if temporal_time_delta:
        resolution = timedelta(temporal_time_delta)

        if reference:
            reference = dsp.temporal_rebin(reference, resolution)

        if targets:
            targets = [dsp.temporal_rebin(t, resolution) for t in targets]

    if spatial_regrid_lats and spatial_regrid_lons:
        lats = np.arange(spatial_regrid_lats[0], spatial_regrid_lats[1], spatial_regrid_lats[2])
        lons = np.arange(spatial_regrid_lons[0], spatial_regrid_lons[1], spatial_regrid_lons[2])

        if reference:
            reference = dsp.spatial_regrid(reference, lats, lons)

        if targets:
            targets = [dsp.spatial_regrid(t, lats, lons) for t in targets]

    return reference, targets
예제 #3
0
 def test_partial_spatial_overlap(self):
     '''Ensure that safe_subset can handle out of bounds spatial values'''
     ds = dp.safe_subset(self.target_dataset, self.spatial_out_of_bounds)
     spatial_bounds = ds.spatial_boundaries()
     self.assertEquals(spatial_bounds[0], -60)
     self.assertEquals(spatial_bounds[1], 60)
     self.assertEquals(spatial_bounds[2], -170)
     self.assertEquals(spatial_bounds[3], 170)
예제 #4
0
 def test_partial_spatial_overlap(self):
     '''Ensure that safe_subset can handle out of bounds spatial values'''
     ds = dp.safe_subset(self.spatial_out_of_bounds, self.target_dataset)
     spatial_bounds = ds.spatial_boundaries()
     self.assertEquals(spatial_bounds[0], -60)
     self.assertEquals(spatial_bounds[1], 60)
     self.assertEquals(spatial_bounds[2], -170)
     self.assertEquals(spatial_bounds[3], 170)
예제 #5
0
    def test_partial_temporal_overlap(self):
        '''Ensure that safe_subset can handle out of bounds temporal values'''
        ds = dp.safe_subset(self.target_dataset, self.temporal_out_of_bounds)
        temporal_bounds = ds.time_range()
        start = datetime.datetime(2000, 1, 1)
        end = datetime.datetime(2009, 12, 1)

        self.assertEquals(temporal_bounds[0], start)
        self.assertEquals(temporal_bounds[1], end)
예제 #6
0
    def test_partial_temporal_overlap(self):
        '''Ensure that safe_subset can handle out of bounds temporal values'''
        ds = dp.safe_subset(self.temporal_out_of_bounds, self.target_dataset)
        temporal_bounds = ds.time_range()
        start = datetime.datetime(2000, 1, 1)
        end = datetime.datetime(2009, 12, 1)

        self.assertEquals(temporal_bounds[0], start)
        self.assertEquals(temporal_bounds[1], end)
예제 #7
0
    def test_entire_bounds_overlap(self):
        ds = dp.safe_subset(self.target_dataset, self.everything_out_of_bounds)
        spatial_bounds = ds.spatial_boundaries()
        temporal_bounds = ds.temporal_boundaries()
        start = datetime.datetime(2000, 1, 1)
        end = datetime.datetime(2009, 12, 1)

        self.assertEquals(spatial_bounds[0], -60)
        self.assertEquals(spatial_bounds[1], 60)
        self.assertEquals(spatial_bounds[2], -170)
        self.assertEquals(spatial_bounds[3], 170)
        self.assertEquals(temporal_bounds[0], start)
        self.assertEquals(temporal_bounds[1], end)
예제 #8
0
    def test_entire_bounds_overlap(self):
        ds = dp.safe_subset(self.everything_out_of_bounds, self.target_dataset)
        spatial_bounds = ds.spatial_boundaries()
        temporal_bounds = ds.time_range()
        start = datetime.datetime(2000, 1, 1)
        end = datetime.datetime(2009, 12, 1)

        self.assertEquals(spatial_bounds[0], -60)
        self.assertEquals(spatial_bounds[1], 60)
        self.assertEquals(spatial_bounds[2], -170)
        self.assertEquals(spatial_bounds[3], 170)
        self.assertEquals(temporal_bounds[0], start)
        self.assertEquals(temporal_bounds[1], end)
예제 #9
0
def _prepare_datasets_for_evaluation(reference, targets, config_data):
    """"""
    subset = config_data['evaluation'].get('subset', None)
    temporal_time_delta = config_data['evaluation'].get('temporal_time_delta', None)
    spatial_regrid_lats = config_data['evaluation'].get('spatial_regrid_lats', None)
    spatial_regrid_lons = config_data['evaluation'].get('spatial_regrid_lons', None)

    if subset:
        start = dateutil.parser.parse(subset[4])
        end = dateutil.parser.parse(subset[5])
        bounds = Bounds(subset[0], subset[1], subset[2], subset[3], start, end)

        if reference:
            reference = dsp.safe_subset(bounds, reference)

        if targets:
            targets = [dsp.safe_subset(bounds, t) for t in targets]

    if temporal_time_delta:
        resolution = timedelta(temporal_time_delta)

        if reference:
            reference = dsp.temporal_rebin(reference, resolution)

        if targets:
            targets = [dsp.temporal_rebin(t, resolution) for t in targets]

    if spatial_regrid_lats and spatial_regrid_lons:
        lats = np.arange(spatial_regrid_lats[0], spatial_regrid_lats[1], spatial_regrid_lats[2])
        lons = np.arange(spatial_regrid_lons[0], spatial_regrid_lons[1], spatial_regrid_lons[2])

        if reference:
            reference = dsp.spatial_regrid(reference, lats, lons)

        if targets:
            targets = [dsp.spatial_regrid(t, lats, lons) for t in targets]

    return reference, targets
예제 #10
0
def run_evaluation():
    ''' Run an OCW Evaluation.

    *run_evaluation* expects the Evaluation parameters to be POSTed in
    the following format.

    .. sourcecode:: javascript

        {
            reference_dataset: {
                // Id that tells us how we need to load this dataset.
                'data_source_id': 1 == local, 2 == rcmed,

                // Dict of data_source specific identifying information.
                //
                // if data_source_id == 1 == local:
                // {
                //     'id': The path to the local file on the server for loading.
                //     'var_name': The variable data to pull from the file.
                //     'lat_name': The latitude variable name.
                //     'lon_name': The longitude variable name.
                //     'time_name': The time variable name
                //     'name': Optional dataset name
                // }
                //
                // if data_source_id == 2 == rcmed:
                // {
                //     'dataset_id': The dataset id to grab from RCMED.
                //     'parameter_id': The variable id value used by RCMED.
                //     'name': Optional dataset name
                // }
                'dataset_info': {..}
            },

            // The list of target datasets to use in the Evaluation. The data
            // format for the dataset objects should be the same as the
            // reference_dataset above.
            'target_datasets': [{...}, {...}, ...],

            // All the datasets are re-binned to the reference dataset
            // before being added to an experiment. This step (in degrees)
            // is used when re-binning both the reference and target datasets.
            'spatial_rebin_lat_step': The lat degree step. Integer > 0,

            // Same as above, but for lon
            'spatial_rebin_lon_step': The lon degree step. Integer > 0,

            // The temporal resolution to use when doing a temporal re-bin
            // This is a timedelta of days to use so daily == 1, monthly is
            // (1, 31], annual/yearly is (31, 366], and full is anything > 366.
            'temporal_resolution': Integer in range(1, 999),

            // A list of the metric class names to use in the evaluation. The
            // names must match the class name exactly.
            'metrics': [Bias, TemporalStdDev, ...]

            // The bounding values used in the Evaluation. Note that lat values
            // should range from -180 to 180 and lon values from -90 to 90.
            'start_time': start time value in the format '%Y-%m-%d %H:%M:%S',
            'end_time': end time value in the format '%Y-%m-%d %H:%M:%S',
            'lat_min': The minimum latitude value,
            'lat_max': The maximum latitude value,
            'lon_min': The minimum longitude value,
            'lon_max': The maximum longitude value,

            // NOTE: At the moment, subregion support is fairly minimal. This
            // will be addressed in the future. Ideally, the user should be able
            // to load a file that they have locally. That would change the
            // format that this data is passed.
            'subregion_information': Path to a subregion file on the server.
        }
    '''
    # TODO: validate input parameters and return an error if not valid

    eval_time_stamp = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
    data = request.json

    eval_bounds = {
        'start_time': datetime.strptime(data['start_time'], '%Y-%m-%d %H:%M:%S'),
        'end_time': datetime.strptime(data['end_time'], '%Y-%m-%d %H:%M:%S'),
        'lat_min': float(data['lat_min']),
        'lat_max': float(data['lat_max']),
        'lon_min': float(data['lon_min']),
        'lon_max': float(data['lon_max'])
    }

    # Load all the datasets
    ref_dataset = _process_dataset_object(data['reference_dataset'], eval_bounds)

    target_datasets = [_process_dataset_object(obj, eval_bounds)
					   for obj
					   in data['target_datasets']]

    # Normalize the dataset time values so they break on consistent days of the
    # month or time of the day, depending on how they will be rebinned.
    resolution = data['temporal_resolution']
    time_delta = timedelta(days=resolution)

    time_step = 'daily' if resolution == 1 else 'monthly'
    ref_dataset = dsp.normalize_dataset_datetimes(ref_dataset, time_step)
    target_datasets = [dsp.normalize_dataset_datetimes(ds, time_step)
                       for ds in target_datasets]

    # Subset the datasets
    start = eval_bounds['start_time']
    end = eval_bounds['end_time']

    # Normalize all the values to the first of the month if we're not
    # dealing with daily data. This will ensure that a valid subregion
    # isn't considered out of bounds do to a dataset's time values
    # being shifted to the first of the month.
    if time_step != 'daily':
        if start.day != 1:
            day_offset = start.day - 1
            start -= timedelta(days=day_offset)

        if end.day != 1:
            day_offset = end.day - 1
            end -= timedelta(days=day_offset)

    subset = Bounds(eval_bounds['lat_min'],
                    eval_bounds['lat_max'],
                    eval_bounds['lon_min'],
                    eval_bounds['lon_max'],
                    start,
                    end)

    ref_dataset = dsp.safe_subset(subset, ref_dataset)
    target_datasets = [dsp.safe_subset(subset, ds)
                       for ds
                       in target_datasets]
    
    # Do temporal re-bin based off of passed resolution
    ref_dataset = dsp.temporal_rebin(ref_dataset, time_delta)
    target_datasets = [dsp.temporal_rebin(ds, time_delta)
					   for ds
					   in target_datasets]

    # Do spatial re=bin based off of reference dataset + lat/lon steps
    lat_step = data['spatial_rebin_lat_step']
    lon_step = data['spatial_rebin_lon_step']
    lat_bins, lon_bins = _calculate_new_latlon_bins(eval_bounds,
													lat_step,
													lon_step)

    ref_dataset = dsp.spatial_regrid(ref_dataset, lat_bins, lon_bins)
    target_datasets =  [dsp.spatial_regrid(ds, lat_bins, lon_bins)
						for ds
						in target_datasets]

    # Load metrics
    loaded_metrics = _load_metrics(data['metrics'])

    # Prime evaluation object with data
    evaluation = Evaluation(ref_dataset, target_datasets, loaded_metrics)

    # Run evaluation
    evaluation.run()

    # Plot
    _generate_evaluation_plots(evaluation, lat_bins, lon_bins, eval_time_stamp)

    return json.dumps({'eval_work_dir': eval_time_stamp})
예제 #11
0
def run_evaluation():
    ''' Run an OCW Evaluation.

    *run_evaluation* expects the Evaluation parameters to be POSTed in
    the following format.

    .. sourcecode:: javascript

        {
            reference_dataset: {
                // Id that tells us how we need to load this dataset.
                'data_source_id': 1 == local, 2 == rcmed,

                // Dict of data_source specific identifying information.
                //
                // if data_source_id == 1 == local:
                // {
                //     'id': The path to the local file on the server for loading.
                //     'var_name': The variable data to pull from the file.
                //     'lat_name': The latitude variable name.
                //     'lon_name': The longitude variable name.
                //     'time_name': The time variable name
                //     'name': Optional dataset name
                // }
                //
                // if data_source_id == 2 == rcmed:
                // {
                //     'dataset_id': The dataset id to grab from RCMED.
                //     'parameter_id': The variable id value used by RCMED.
                //     'name': Optional dataset name
                // }
                'dataset_info': {..}
            },

            // The list of target datasets to use in the Evaluation. The data
            // format for the dataset objects should be the same as the
            // reference_dataset above.
            'target_datasets': [{...}, {...}, ...],

            // All the datasets are re-binned to the reference dataset
            // before being added to an experiment. This step (in degrees)
            // is used when re-binning both the reference and target datasets.
            'spatial_rebin_lat_step': The lat degree step. Integer > 0,

            // Same as above, but for lon
            'spatial_rebin_lon_step': The lon degree step. Integer > 0,

            // The temporal resolution to use when doing a temporal re-bin
            // This is a timedelta of days to use so daily == 1, monthly is
            // (1, 31], annual/yearly is (31, 366], and full is anything > 366.
            'temporal_resolution': Integer in range(1, 999),

            // A list of the metric class names to use in the evaluation. The
            // names must match the class name exactly.
            'metrics': [Bias, TemporalStdDev, ...]

            // The bounding values used in the Evaluation. Note that lat values
            // should range from -180 to 180 and lon values from -90 to 90.
            'start_time': start time value in the format '%Y-%m-%d %H:%M:%S',
            'end_time': end time value in the format '%Y-%m-%d %H:%M:%S',
            'lat_min': The minimum latitude value,
            'lat_max': The maximum latitude value,
            'lon_min': The minimum longitude value,
            'lon_max': The maximum longitude value,

            // NOTE: At the moment, subregion support is fairly minimal. This
            // will be addressed in the future. Ideally, the user should be able
            // to load a file that they have locally. That would change the
            // format that this data is passed.
            'subregion_information': Path to a subregion file on the server.
        }
    '''
    # TODO: validate input parameters and return an error if not valid

    eval_time_stamp = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
    data = request.json

    eval_bounds = {
        'start_time': datetime.strptime(data['start_time'], '%Y-%m-%d %H:%M:%S'),
        'end_time': datetime.strptime(data['end_time'], '%Y-%m-%d %H:%M:%S'),
        'lat_min': float(data['lat_min']),
        'lat_max': float(data['lat_max']),
        'lon_min': float(data['lon_min']),
        'lon_max': float(data['lon_max'])
    }

    # Load all the datasets
    ref_dataset = _process_dataset_object(data['reference_dataset'], eval_bounds)

    target_datasets = [_process_dataset_object(obj, eval_bounds)
					   for obj
					   in data['target_datasets']]

    # Normalize the dataset time values so they break on consistent days of the
    # month or time of the day, depending on how they will be rebinned.
    resolution = data['temporal_resolution']
    time_delta = timedelta(days=resolution)

    time_step = 'daily' if resolution == 1 else 'monthly'
    ref_dataset = dsp.normalize_dataset_datetimes(ref_dataset, time_step)
    target_datasets = [dsp.normalize_dataset_datetimes(ds, time_step)
                       for ds in target_datasets]

    # Subset the datasets
    start = eval_bounds['start_time']
    end = eval_bounds['end_time']

    # Normalize all the values to the first of the month if we're not
    # dealing with daily data. This will ensure that a valid subregion
    # isn't considered out of bounds do to a dataset's time values
    # being shifted to the first of the month.
    if time_step != 'daily':
        if start.day != 1:
            day_offset = start.day - 1
            start -= timedelta(days=day_offset)

        if end.day != 1:
            day_offset = end.day - 1
            end -= timedelta(days=day_offset)

    subset = Bounds(eval_bounds['lat_min'],
                    eval_bounds['lat_max'],
                    eval_bounds['lon_min'],
                    eval_bounds['lon_max'],
                    start,
                    end)

    ref_dataset = dsp.safe_subset(ref_dataset, subset)
    target_datasets = [dsp.safe_subset(ds, subset)
                       for ds
                       in target_datasets]
    
    # Do temporal re-bin based off of passed resolution
    ref_dataset = dsp.temporal_rebin(ref_dataset, time_delta)
    target_datasets = [dsp.temporal_rebin(ds, time_delta)
					   for ds
					   in target_datasets]

    # Do spatial re=bin based off of reference dataset + lat/lon steps
    lat_step = data['spatial_rebin_lat_step']
    lon_step = data['spatial_rebin_lon_step']
    lat_bins, lon_bins = _calculate_new_latlon_bins(eval_bounds,
													lat_step,
													lon_step)

    ref_dataset = dsp.spatial_regrid(ref_dataset, lat_bins, lon_bins)
    target_datasets =  [dsp.spatial_regrid(ds, lat_bins, lon_bins)
						for ds
						in target_datasets]

    # Load metrics
    loaded_metrics = _load_metrics(data['metrics'])

    # Prime evaluation object with data
    evaluation = Evaluation(ref_dataset, target_datasets, loaded_metrics)

    # Run evaluation
    evaluation.run()

    # Plot
    _generate_evaluation_plots(evaluation, lat_bins, lon_bins, eval_time_stamp)

    return json.dumps({'eval_work_dir': eval_time_stamp})