Esempio n. 1
0
def Taylor_diagram_spatial_pattern_of_multiyear_climatology(
        obs_dataset, obs_name, model_datasets, model_names, file_name):

    # calculate climatological mean fields
    obs_clim_dataset = ds.Dataset(obs_dataset.lats, obs_dataset.lons,
                                  obs_dataset.times,
                                  utils.calc_temporal_mean(obs_dataset))
    model_clim_datasets = []
    for dataset in model_datasets:
        model_clim_datasets.append(
            ds.Dataset(dataset.lats, dataset.lons, dataset.times,
                       utils.calc_temporal_mean(dataset)))

    # Metrics (spatial standard deviation and pattern correlation)
    # determine the metrics
    taylor_diagram = metrics.SpatialPatternTaylorDiagram()

    # create the Evaluation object
    taylor_evaluation = Evaluation(
        obs_clim_dataset,  # Climatological mean of reference dataset for the evaluation
        model_clim_datasets,  # list of climatological means from model datasets for the evaluation
        [taylor_diagram])

    # run the evaluation (bias calculation)
    taylor_evaluation.run()

    taylor_data = taylor_evaluation.results[0]

    plotter.draw_taylor_diagram(taylor_data,
                                model_names,
                                obs_name,
                                file_name,
                                pos='upper right',
                                frameon=False)
Esempio n. 2
0
def temporal_rebin_with_time_index(target_dataset, nt_average):
    """ Rebin a Dataset to a new temporal resolution

    :param target_dataset: Dataset object that needs temporal rebinned
    :type target_dataset: :class:`dataset.Dataset`

    :param nt_average: Time resolution for the output datasets. 
     It is the same as the number of time indicies to be averaged. (length of time dimension in the rebinned dataset) = (original time dimension length/nt_average)
    :type temporal_resolution: integer

    :returns: A new temporally rebinned Dataset
    :rtype: :class:`dataset.Dataset`
    """
    nt = target_dataset.times.size
    if nt % nt_average != 0:
        print 'Warning: length of time dimension must be a multiple of nt_average'
    # nt2 is the length of time dimension in the rebinned dataset
    nt2 = nt / nt_average
    binned_dates = target_dataset.times[np.arange(nt2) * nt_average]
    binned_values = ma.zeros(np.insert(target_dataset.values.shape[1:], 0,
                                       nt2))
    for it in np.arange(nt2):
        binned_values[it, :] = ma.average(
            target_dataset.values[nt_average * it:nt_average * it +
                                  nt_average, :],
            axis=0)
    new_dataset = ds.Dataset(target_dataset.lats,
                             target_dataset.lons,
                             binned_dates,
                             binned_values,
                             variable=target_dataset.variable,
                             units=target_dataset.units,
                             name=target_dataset.name,
                             origin=target_dataset.origin)
    return new_dataset
Esempio n. 3
0
def normalize_dataset_datetimes(dataset, timestep):
    ''' Normalize Dataset datetime values.

    Force daily to an hour time value of 00:00:00.
    Force monthly data to the first of the month at midnight.

    :param dataset: The Dataset which will have its time value normalized.
    :type dataset: :class:`dataset.Dataset`

    :param timestep: The timestep of the Dataset's values. Either 'daily' or
        'monthly'.
    :type timestep: :mod:`string`

    :returns: A new Dataset with normalized datetime values.
    :rtype: :class:`dataset.Dataset`
    '''
    new_times = _rcmes_normalize_datetimes(dataset.times, timestep)
    return ds.Dataset(dataset.lats,
                      dataset.lons,
                      np.array(new_times),
                      dataset.values,
                      variable=dataset.variable,
                      units=dataset.units,
                      name=dataset.name,
                      origin=dataset.origin)
Esempio n. 4
0
    def setUp(self):
        lats = np.array(range(-60, 61, 1))
        lons = np.array(range(-170, 171, 1))
        times = np.array([
            datetime.datetime(year, month, 1) for year in range(2000, 2010)
            for month in range(1, 13)
        ])
        values = np.ones([len(times), len(lats), len(lons)])
        self.target_dataset = ds.Dataset(lats,
                                         lons,
                                         times,
                                         values,
                                         variable="test variable name",
                                         units='test variable units',
                                         name='foo')

        self.spatial_out_of_bounds = ds.Bounds(-165, 165, -180, 180,
                                               datetime.datetime(2001, 1, 1),
                                               datetime.datetime(2004, 1, 1))

        self.temporal_out_of_bounds = ds.Bounds(-40, 40, -160.25, 160.5,
                                                datetime.datetime(1999, 1, 15),
                                                datetime.datetime(2222, 2, 15))

        self.everything_out_of_bounds = ds.Bounds(
            -165, 165, -180, 180, datetime.datetime(1999, 1, 15),
            datetime.datetime(2222, 2, 15))
Esempio n. 5
0
def two_year_daily_2hr_dataset():
    lats = np.array(range(-89, 90, 2))
    lons = np.array(range(-179, 180, 2))
    times = np.array([datetime.datetime(2001, 1, 1) + datetime.timedelta(days=d, hours=2) for d in range(730)])
    values = np.ones([len(times), len(lats), len(lons)])
    dataset = ds.Dataset(lats, lons, times, values, variable='random data')
    return dataset    
Esempio n. 6
0
def temporal_rebin(target_dataset, temporal_resolution):
    """ Rebin a Dataset to a new temporal resolution
    
    :param target_dataset: Dataset object that needs temporal regridding
    :type target_dataset: Open Climate Workbench Dataset Object
    :param temporal_resolution: The new temporal bin size
    :type temporal_resolution: Python datetime.timedelta object
    
    :returns: A new temporally rebinned Dataset
    :rtype: Open Climate Workbench Dataset Object
    """
    # Decode the temporal resolution into a string format that
    # _rcmes_calc_average_on_new_time_unit_K() can understand
    day_count = temporal_resolution.days
    time_unit = None
    if day_count == 1:
        time_unit = 'daily'
    elif day_count > 1 and day_count <= 31:
        time_unit = 'monthly'
    elif day_count > 31 and day_count <= 366:
        time_unit = 'annual'
    else:
        time_unit = 'full'

    masked_values = target_dataset.values.view(ma.MaskedArray)
    binned_values, binned_dates = _rcmes_calc_average_on_new_time_unit_K(
        masked_values, target_dataset.times, time_unit)
    binned_dates = np.array(binned_dates)
    new_dataset = ds.Dataset(target_dataset.lats, target_dataset.lons,
                             binned_dates, binned_values,
                             target_dataset.variable, target_dataset.name)

    return new_dataset
Esempio n. 7
0
def ensemble(datasets):
    """
    Generate a single dataset which is the mean of the input datasets

    An ensemble datasets combines input datasets assuming the all have
    similar shape, dimensions, and units. 
    
    :param datasets: Datasets to be used to compose the ensemble dataset from.
        All Datasets must be the same shape.
    :type datasets: :class:`list` of :class:`dataset.Dataset`
    
    :returns: New Dataset with a name of 'Dataset Ensemble'
    :rtype: :class:`dataset.Dataset`
    """
    _check_dataset_shapes(datasets)
    dataset_values = [dataset.values for dataset in datasets]
    ensemble_values = ma.mean(dataset_values, axis=0)

    # Build new dataset object from the input datasets and the ensemble values and return it
    ensemble_dataset = ds.Dataset(datasets[0].lats,
                                  datasets[0].lons,
                                  datasets[0].times,
                                  ensemble_values,
                                  units=datasets[0].units,
                                  name="Dataset Ensemble")

    return ensemble_dataset
Esempio n. 8
0
def ten_year_monthly_15th_dataset():
    lats = np.array(range(-89, 90, 2))
    lons = np.array(range(-179, 180, 2))
    # Ten Years of monthly data
    times = np.array([datetime.datetime(year, month, 15) for year in range(2000, 2010) for month in range(1, 13)])
    values = np.ones([len(times), len(lats), len(lons)])
    input_dataset = ds.Dataset(lats, lons, times, values, variable="test variable name")
    return input_dataset
Esempio n. 9
0
def build_ten_cube_dataset(value):
    lats = np.array(range(-89, 90, 18))
    lons = np.array(range(-179, 180, 36))
    times = np.array([datetime.datetime(year, 1, 1) for year in range(2000, 2010)])
    values = np.ones([len(times), len(lats), len(lons)])
    values = values * value
    dataset = ds.Dataset(lats, lons, times, values)
    return dataset
Esempio n. 10
0
def spatial_regrid(target_dataset, new_latitudes, new_longitudes):
    """ Regrid a Dataset using the new latitudes and longitudes

    :param target_dataset: Dataset object that needs temporal regridding applied
    :type target_dataset: Open Climate Workbench Dataset Object
    :param new_latitudes: Array of latitudes
    :type new_latitudes: 1d Numpy Array
    :param new_longitudes: Array of longitudes
    :type new_longitudes: 1d Numpy Array

    :returns: A new spatially regridded Dataset
    :rtype: Open Climate Workbench Dataset Object
    """
    # Make masked array of shape (times, new_latitudes,new_longitudes)
    new_values = ma.zeros(
        [len(target_dataset.times),
         len(new_latitudes),
         len(new_longitudes)])

    # Create grids of the given lats and lons for the underlying API
    # NOTE: np.meshgrid() requires inputs (x, y) and returns data
    #       of shape(y|lat|rows, x|lon|columns).  So we pass in lons, lats
    #       and get back data.shape(lats, lons)
    lons, lats = np.meshgrid(target_dataset.lons, target_dataset.lats)
    new_lons, new_lats = np.meshgrid(new_longitudes, new_latitudes)
    # Convert all lats and lons into Numpy Masked Arrays
    lats = ma.array(lats)
    lons = ma.array(lons)
    new_lats = ma.array(new_lats)
    new_lons = ma.array(new_lons)
    target_values = ma.array(target_dataset.values)

    # Call _rcmes_spatial_regrid on each time slice
    for i in range(len(target_dataset.times)):
        new_values[i] = _rcmes_spatial_regrid(target_values[i], lats, lons,
                                              new_lats, new_lons)

    # TODO:
    # This will call down to the _congrid() function and the lat and lon
    # axis will be adjusted with the time axis being held constant

    # Create a new Dataset Object to return using new data
    regridded_dataset = ds.Dataset(new_latitudes, new_longitudes,
                                   target_dataset.times, new_values,
                                   target_dataset.variable,
                                   target_dataset.name)
    return regridded_dataset
Esempio n. 11
0
def normalize_dataset_datetimes(dataset, timestep):
    ''' Normalize Dataset datetime values.

    Force daily to an hour time value of 00:00:00.
    Force monthly data to the first of the month at midnight.

    :param dataset: The Dataset which will have its' time value normalized.
    :type dataset: Dataset
    :param timestep: The timestep of the Dataset's values. Either 'daily' or
        'monthly'.
    :type timestep: String

    :returns: A new Dataset with normalized datetimes.
    '''
    new_times = _rcmes_normalize_datetimes(dataset.times, timestep)
    return ds.Dataset(dataset.lats, dataset.lons, np.array(new_times),
                      dataset.values, dataset.variable, dataset.name)
Esempio n. 12
0
def subset(subregion, target_dataset):
    '''Subset given dataset(s) with subregion information

    :param subregion: The Bounds with which to subset the target Dataset. 
    :type subregion: Bounds
    :param target_dataset: The Dataset object to subset.
    :type target_dataset: Dataset

    :returns: The subset-ed Dataset object
    :rtype: Dataset

    :raises: ValueError
    '''

    # Ensure that the subregion information is well formed
    if not _are_bounds_contained_by_dataset(subregion, target_dataset):
        error = ("dataset_processor.subset received a subregion that is not "
                 "completely within the bounds of the target dataset.")
        logger.error(error)
        raise ValueError(error)

    # Get subregion indices into subregion data
    dataset_slices = _get_subregion_slice_indices(subregion, target_dataset)

    # Build new dataset with subset information
    return ds.Dataset(
        # Slice the lats array with our calculated slice indices
        target_dataset.
        lats[dataset_slices["lat_start"]:dataset_slices["lat_end"] + 1],
        # Slice the lons array with our calculated slice indices
        target_dataset.
        lons[dataset_slices["lon_start"]:dataset_slices["lon_end"] + 1],
        # Slice the times array with our calculated slice indices
        target_dataset.
        times[dataset_slices["time_start"]:dataset_slices["time_end"] + 1],
        # Slice the values array with our calculated slice indices
        target_dataset.values[
            dataset_slices["time_start"]:dataset_slices["time_end"] + 1,
            dataset_slices["lat_start"]:dataset_slices["lat_end"] + 1,
            dataset_slices["lon_start"]:dataset_slices["lon_end"] + 1],
        target_dataset.variable,
        target_dataset.name)
Esempio n. 13
0
def ten_year_monthly_dataset(latlon2d=False):
    lats = np.array(range(-89, 90, 2))
    lons = np.array(range(-179, 180, 2))
    # Need separate variable for input lats / lons because dataset only
    # makes shallow copies of them.
    ilats, ilons = lats, lons
    # For testing 2D lat lon grids
    if latlon2d:
        lons2, lats2 = np.meshgrid(lons, lats)
        ilats, ilons = lats2, lons2
    # Ten Years of monthly data
    times = np.array([datetime.datetime(year, month, 1)
                      for year in range(2000, 2010) for month in range(1, 13)])
    values = np.ones([len(times), len(lats), len(lons)])
    input_dataset = ds.Dataset(ilats,
                               ilons,
                               times,
                               values,
                               variable="test variable name",
                               units='test variable units',
                               name='foo')
    return input_dataset
Esempio n. 14
0
def ensemble(datasets):
    """
    Generate a single dataset which is the mean of the input datasets
    
    :param datasets: Datasets to be used to compose the ensemble dataset from.
    Note - All Datasets must be the same shape
    :type datasets: List of OCW Dataset Objects
    
    :returns: New Dataset with a name of 'Dataset Ensemble'
    :rtype: OCW Dataset Object
    """
    _check_dataset_shapes(datasets)
    dataset_values = [dataset.values for dataset in datasets]
    ensemble_values = np.mean(dataset_values, axis=0)

    # Build new dataset object from the input datasets and the ensemble values and return it
    ensemble_dataset = ds.Dataset(datasets[0].lats,
                                  datasets[0].lons,
                                  datasets[0].times,
                                  ensemble_values,
                                  name="Dataset Ensemble")

    return ensemble_dataset
Esempio n. 15
0
def temporal_subset(month_start,
                    month_end,
                    target_dataset,
                    average_each_year=False):
    """ Temporally subset data given month_index.

    :param month_start: An integer for beginning month (Jan=1)
    :type month_start: :class:`int`

    :param month_end: An integer for ending month (Jan=1)
    :type month_end: :class:`int`

    :param target_dataset: Dataset object that needs temporal subsetting
    :type target_dataset: Open Climate Workbench Dataset Object

    :param average_each_year: If True, output dataset is averaged for each year
    :type average_each_year: :class:'boolean'

    :returns: A temporal subset OCW Dataset
    :rtype: Open Climate Workbench Dataset Object
    """

    if month_start > month_end:
        month_index = range(month_start, 13)
        month_index.extend(range(1, month_end + 1))
    else:
        month_index = range(month_start, month_end + 1)

    dates = target_dataset.times
    months = np.array([d.month for d in dates])
    time_index = []
    for m_value in month_index:
        time_index = np.append(time_index, np.where(months == m_value)[0])
        if m_value == month_index[0]:
            time_index_first = np.min(np.where(months == m_value)[0])
        if m_value == month_index[-1]:
            time_index_last = np.max(np.where(months == m_value)[0])

    time_index = np.sort(time_index)

    time_index = time_index[np.where((time_index >= time_index_first)
                                     & (time_index <= time_index_last))]

    time_index = list(time_index)

    new_dataset = ds.Dataset(target_dataset.lats,
                             target_dataset.lons,
                             target_dataset.times[time_index],
                             target_dataset.values[time_index, :],
                             variable=target_dataset.variable,
                             units=target_dataset.units,
                             name=target_dataset.name)

    if average_each_year:
        nmonth = len(month_index)
        ntime = new_dataset.times.size
        nyear = ntime / nmonth
        averaged_time = []
        ny, nx = target_dataset.values.shape[1:]
        averaged_values = ma.zeros([nyear, ny, nx])
        for iyear in np.arange(nyear):
            # centered time index of the season between month_start and month_end in each year
            center_index = int(nmonth / 2) + iyear * nmonth
            if nmonth == 1:
                center_index = iyear
            averaged_time.append(new_dataset.times[center_index])
            averaged_values[iyear, :] = ma.average(
                new_dataset.values[nmonth * iyear:nmonth * iyear + nmonth, :],
                axis=0)
        new_dataset = ds.Dataset(target_dataset.lats,
                                 target_dataset.lons,
                                 np.array(averaged_time),
                                 averaged_values,
                                 variable=target_dataset.variable,
                                 units=target_dataset.units,
                                 name=target_dataset.name)

    return new_dataset
Esempio n. 16
0
def subset(subregion, target_dataset, subregion_name=None):
    '''Subset given dataset(s) with subregion information

    :param subregion: The Bounds with which to subset the target Dataset. 
    :type subregion: :class:`dataset.Bounds`

    :param target_dataset: The Dataset object to subset.
    :type target_dataset: :class:`dataset.Dataset`

    :param subregion_name: The subset-ed Dataset name
    :type subregion_name: :mod:`string`

    :returns: The subset-ed Dataset object
    :rtype: :class:`dataset.Dataset`

    :raises: ValueError
    '''

    if not subregion.start:
        subregion.start = target_dataset.times[0]
        subregion.end = target_dataset.times[-1]

    # Ensure that the subregion information is well formed
    _are_bounds_contained_by_dataset(subregion, target_dataset)

    # Get subregion indices into subregion data
    dataset_slices = _get_subregion_slice_indices(subregion, target_dataset)

    if not subregion_name:
        subregion_name = target_dataset.name

# Slice the values array with our calculated slice indices
    if target_dataset.values.ndim == 2:
        subset_values = ma.zeros([
            len(target_dataset.
                values[dataset_slices["lat_start"]:dataset_slices["lat_end"]]),
            len(target_dataset.
                values[dataset_slices["lon_start"]:dataset_slices["lon_end"]])
        ])

        subset_values = target_dataset.values[
            dataset_slices["lat_start"]:dataset_slices["lat_end"] + 1,
            dataset_slices["lon_start"]:dataset_slices["lon_end"] + 1]

    elif target_dataset.values.ndim == 3:
        subset_values = ma.zeros([
            len(target_dataset.values[
                dataset_slices["time_start"]:dataset_slices["time_end"]]),
            len(target_dataset.
                values[dataset_slices["lat_start"]:dataset_slices["lat_end"]]),
            len(target_dataset.
                values[dataset_slices["lon_start"]:dataset_slices["lon_end"]])
        ])

        subset_values = target_dataset.values[
            dataset_slices["time_start"]:dataset_slices["time_end"] + 1,
            dataset_slices["lat_start"]:dataset_slices["lat_end"] + 1,
            dataset_slices["lon_start"]:dataset_slices["lon_end"] + 1]

    # Build new dataset with subset information
    return ds.Dataset(
        # Slice the lats array with our calculated slice indices
        target_dataset.
        lats[dataset_slices["lat_start"]:dataset_slices["lat_end"] + 1],
        # Slice the lons array with our calculated slice indices
        target_dataset.
        lons[dataset_slices["lon_start"]:dataset_slices["lon_end"] + 1],
        # Slice the times array with our calculated slice indices
        target_dataset.
        times[dataset_slices["time_start"]:dataset_slices["time_end"] + 1],
        # Slice the values array with our calculated slice indices
        subset_values,
        variable=target_dataset.variable,
        units=target_dataset.units,
        name=subregion_name,
        origin=target_dataset.origin)
Esempio n. 17
0
def spatial_regrid(target_dataset, new_latitudes, new_longitudes):
    """ Regrid a Dataset using the new latitudes and longitudes

    :param target_dataset: Dataset object that needs spatially regridded
    :type target_dataset: :class:`dataset.Dataset`

    :param new_latitudes: Array of latitudes
    :type new_latitudes: :class:`numpy.ndarray`

    :param new_longitudes: Array of longitudes
    :type new_longitudes: :class:`numpy.ndarray`

    :returns: A new spatially regridded Dataset
    :rtype: :class:`dataset.Dataset`
    """

    # Create grids of the given lats and lons for the underlying API
    # NOTE: np.meshgrid() requires inputs (x, y) and returns data
    #       of shape(y|lat|rows, x|lon|columns).  So we pass in lons, lats
    #       and get back data.shape(lats, lons)
    if target_dataset.lons.ndim == 1 and target_dataset.lats.ndim == 1:
        lons, lats = np.meshgrid(target_dataset.lons, target_dataset.lats)
    else:
        lons = target_dataset.lons
        lats = target_dataset.lats
    if new_longitudes.ndim == 1 and new_latitudes.ndim == 1:
        new_lons, new_lats = np.meshgrid(new_longitudes, new_latitudes)
    else:
        new_lons = new_longitudes
        new_lats = new_latitudes

    # Make masked array of shape (times, new_latitudes,new_longitudes)
    new_values = ma.zeros(
        [len(target_dataset.times), new_lats.shape[0], new_lons.shape[1]])

    # Convert all lats and lons into Numpy Masked Arrays
    lats = ma.array(lats)
    lons = ma.array(lons)
    new_lats = ma.array(new_lats)
    new_lons = ma.array(new_lons)
    target_values = ma.array(target_dataset.values)

    # Call _rcmes_spatial_regrid on each time slice
    for i in range(len(target_dataset.times)):
        new_values[i] = _rcmes_spatial_regrid(target_values[i], lats, lons,
                                              new_lats, new_lons)

    # TODO:
    # This will call down to the _congrid() function and the lat and lon
    # axis will be adjusted with the time axis being held constant

    # Create a new Dataset Object to return using new data
    regridded_dataset = ds.Dataset(new_latitudes,
                                   new_longitudes,
                                   target_dataset.times,
                                   new_values,
                                   variable=target_dataset.variable,
                                   units=target_dataset.units,
                                   name=target_dataset.name,
                                   origin=target_dataset.origin)
    return regridded_dataset