def Taylor_diagram_spatial_pattern_of_multiyear_climatology( obs_dataset, obs_name, model_datasets, model_names, file_name): # calculate climatological mean fields obs_clim_dataset = ds.Dataset(obs_dataset.lats, obs_dataset.lons, obs_dataset.times, utils.calc_temporal_mean(obs_dataset)) model_clim_datasets = [] for dataset in model_datasets: model_clim_datasets.append( ds.Dataset(dataset.lats, dataset.lons, dataset.times, utils.calc_temporal_mean(dataset))) # Metrics (spatial standard deviation and pattern correlation) # determine the metrics taylor_diagram = metrics.SpatialPatternTaylorDiagram() # create the Evaluation object taylor_evaluation = Evaluation( obs_clim_dataset, # Climatological mean of reference dataset for the evaluation model_clim_datasets, # list of climatological means from model datasets for the evaluation [taylor_diagram]) # run the evaluation (bias calculation) taylor_evaluation.run() taylor_data = taylor_evaluation.results[0] plotter.draw_taylor_diagram(taylor_data, model_names, obs_name, file_name, pos='upper right', frameon=False)
def temporal_rebin_with_time_index(target_dataset, nt_average): """ Rebin a Dataset to a new temporal resolution :param target_dataset: Dataset object that needs temporal rebinned :type target_dataset: :class:`dataset.Dataset` :param nt_average: Time resolution for the output datasets. It is the same as the number of time indicies to be averaged. (length of time dimension in the rebinned dataset) = (original time dimension length/nt_average) :type temporal_resolution: integer :returns: A new temporally rebinned Dataset :rtype: :class:`dataset.Dataset` """ nt = target_dataset.times.size if nt % nt_average != 0: print 'Warning: length of time dimension must be a multiple of nt_average' # nt2 is the length of time dimension in the rebinned dataset nt2 = nt / nt_average binned_dates = target_dataset.times[np.arange(nt2) * nt_average] binned_values = ma.zeros(np.insert(target_dataset.values.shape[1:], 0, nt2)) for it in np.arange(nt2): binned_values[it, :] = ma.average( target_dataset.values[nt_average * it:nt_average * it + nt_average, :], axis=0) new_dataset = ds.Dataset(target_dataset.lats, target_dataset.lons, binned_dates, binned_values, variable=target_dataset.variable, units=target_dataset.units, name=target_dataset.name, origin=target_dataset.origin) return new_dataset
def normalize_dataset_datetimes(dataset, timestep): ''' Normalize Dataset datetime values. Force daily to an hour time value of 00:00:00. Force monthly data to the first of the month at midnight. :param dataset: The Dataset which will have its time value normalized. :type dataset: :class:`dataset.Dataset` :param timestep: The timestep of the Dataset's values. Either 'daily' or 'monthly'. :type timestep: :mod:`string` :returns: A new Dataset with normalized datetime values. :rtype: :class:`dataset.Dataset` ''' new_times = _rcmes_normalize_datetimes(dataset.times, timestep) return ds.Dataset(dataset.lats, dataset.lons, np.array(new_times), dataset.values, variable=dataset.variable, units=dataset.units, name=dataset.name, origin=dataset.origin)
def setUp(self): lats = np.array(range(-60, 61, 1)) lons = np.array(range(-170, 171, 1)) times = np.array([ datetime.datetime(year, month, 1) for year in range(2000, 2010) for month in range(1, 13) ]) values = np.ones([len(times), len(lats), len(lons)]) self.target_dataset = ds.Dataset(lats, lons, times, values, variable="test variable name", units='test variable units', name='foo') self.spatial_out_of_bounds = ds.Bounds(-165, 165, -180, 180, datetime.datetime(2001, 1, 1), datetime.datetime(2004, 1, 1)) self.temporal_out_of_bounds = ds.Bounds(-40, 40, -160.25, 160.5, datetime.datetime(1999, 1, 15), datetime.datetime(2222, 2, 15)) self.everything_out_of_bounds = ds.Bounds( -165, 165, -180, 180, datetime.datetime(1999, 1, 15), datetime.datetime(2222, 2, 15))
def two_year_daily_2hr_dataset(): lats = np.array(range(-89, 90, 2)) lons = np.array(range(-179, 180, 2)) times = np.array([datetime.datetime(2001, 1, 1) + datetime.timedelta(days=d, hours=2) for d in range(730)]) values = np.ones([len(times), len(lats), len(lons)]) dataset = ds.Dataset(lats, lons, times, values, variable='random data') return dataset
def temporal_rebin(target_dataset, temporal_resolution): """ Rebin a Dataset to a new temporal resolution :param target_dataset: Dataset object that needs temporal regridding :type target_dataset: Open Climate Workbench Dataset Object :param temporal_resolution: The new temporal bin size :type temporal_resolution: Python datetime.timedelta object :returns: A new temporally rebinned Dataset :rtype: Open Climate Workbench Dataset Object """ # Decode the temporal resolution into a string format that # _rcmes_calc_average_on_new_time_unit_K() can understand day_count = temporal_resolution.days time_unit = None if day_count == 1: time_unit = 'daily' elif day_count > 1 and day_count <= 31: time_unit = 'monthly' elif day_count > 31 and day_count <= 366: time_unit = 'annual' else: time_unit = 'full' masked_values = target_dataset.values.view(ma.MaskedArray) binned_values, binned_dates = _rcmes_calc_average_on_new_time_unit_K( masked_values, target_dataset.times, time_unit) binned_dates = np.array(binned_dates) new_dataset = ds.Dataset(target_dataset.lats, target_dataset.lons, binned_dates, binned_values, target_dataset.variable, target_dataset.name) return new_dataset
def ensemble(datasets): """ Generate a single dataset which is the mean of the input datasets An ensemble datasets combines input datasets assuming the all have similar shape, dimensions, and units. :param datasets: Datasets to be used to compose the ensemble dataset from. All Datasets must be the same shape. :type datasets: :class:`list` of :class:`dataset.Dataset` :returns: New Dataset with a name of 'Dataset Ensemble' :rtype: :class:`dataset.Dataset` """ _check_dataset_shapes(datasets) dataset_values = [dataset.values for dataset in datasets] ensemble_values = ma.mean(dataset_values, axis=0) # Build new dataset object from the input datasets and the ensemble values and return it ensemble_dataset = ds.Dataset(datasets[0].lats, datasets[0].lons, datasets[0].times, ensemble_values, units=datasets[0].units, name="Dataset Ensemble") return ensemble_dataset
def ten_year_monthly_15th_dataset(): lats = np.array(range(-89, 90, 2)) lons = np.array(range(-179, 180, 2)) # Ten Years of monthly data times = np.array([datetime.datetime(year, month, 15) for year in range(2000, 2010) for month in range(1, 13)]) values = np.ones([len(times), len(lats), len(lons)]) input_dataset = ds.Dataset(lats, lons, times, values, variable="test variable name") return input_dataset
def build_ten_cube_dataset(value): lats = np.array(range(-89, 90, 18)) lons = np.array(range(-179, 180, 36)) times = np.array([datetime.datetime(year, 1, 1) for year in range(2000, 2010)]) values = np.ones([len(times), len(lats), len(lons)]) values = values * value dataset = ds.Dataset(lats, lons, times, values) return dataset
def spatial_regrid(target_dataset, new_latitudes, new_longitudes): """ Regrid a Dataset using the new latitudes and longitudes :param target_dataset: Dataset object that needs temporal regridding applied :type target_dataset: Open Climate Workbench Dataset Object :param new_latitudes: Array of latitudes :type new_latitudes: 1d Numpy Array :param new_longitudes: Array of longitudes :type new_longitudes: 1d Numpy Array :returns: A new spatially regridded Dataset :rtype: Open Climate Workbench Dataset Object """ # Make masked array of shape (times, new_latitudes,new_longitudes) new_values = ma.zeros( [len(target_dataset.times), len(new_latitudes), len(new_longitudes)]) # Create grids of the given lats and lons for the underlying API # NOTE: np.meshgrid() requires inputs (x, y) and returns data # of shape(y|lat|rows, x|lon|columns). So we pass in lons, lats # and get back data.shape(lats, lons) lons, lats = np.meshgrid(target_dataset.lons, target_dataset.lats) new_lons, new_lats = np.meshgrid(new_longitudes, new_latitudes) # Convert all lats and lons into Numpy Masked Arrays lats = ma.array(lats) lons = ma.array(lons) new_lats = ma.array(new_lats) new_lons = ma.array(new_lons) target_values = ma.array(target_dataset.values) # Call _rcmes_spatial_regrid on each time slice for i in range(len(target_dataset.times)): new_values[i] = _rcmes_spatial_regrid(target_values[i], lats, lons, new_lats, new_lons) # TODO: # This will call down to the _congrid() function and the lat and lon # axis will be adjusted with the time axis being held constant # Create a new Dataset Object to return using new data regridded_dataset = ds.Dataset(new_latitudes, new_longitudes, target_dataset.times, new_values, target_dataset.variable, target_dataset.name) return regridded_dataset
def normalize_dataset_datetimes(dataset, timestep): ''' Normalize Dataset datetime values. Force daily to an hour time value of 00:00:00. Force monthly data to the first of the month at midnight. :param dataset: The Dataset which will have its' time value normalized. :type dataset: Dataset :param timestep: The timestep of the Dataset's values. Either 'daily' or 'monthly'. :type timestep: String :returns: A new Dataset with normalized datetimes. ''' new_times = _rcmes_normalize_datetimes(dataset.times, timestep) return ds.Dataset(dataset.lats, dataset.lons, np.array(new_times), dataset.values, dataset.variable, dataset.name)
def subset(subregion, target_dataset): '''Subset given dataset(s) with subregion information :param subregion: The Bounds with which to subset the target Dataset. :type subregion: Bounds :param target_dataset: The Dataset object to subset. :type target_dataset: Dataset :returns: The subset-ed Dataset object :rtype: Dataset :raises: ValueError ''' # Ensure that the subregion information is well formed if not _are_bounds_contained_by_dataset(subregion, target_dataset): error = ("dataset_processor.subset received a subregion that is not " "completely within the bounds of the target dataset.") logger.error(error) raise ValueError(error) # Get subregion indices into subregion data dataset_slices = _get_subregion_slice_indices(subregion, target_dataset) # Build new dataset with subset information return ds.Dataset( # Slice the lats array with our calculated slice indices target_dataset. lats[dataset_slices["lat_start"]:dataset_slices["lat_end"] + 1], # Slice the lons array with our calculated slice indices target_dataset. lons[dataset_slices["lon_start"]:dataset_slices["lon_end"] + 1], # Slice the times array with our calculated slice indices target_dataset. times[dataset_slices["time_start"]:dataset_slices["time_end"] + 1], # Slice the values array with our calculated slice indices target_dataset.values[ dataset_slices["time_start"]:dataset_slices["time_end"] + 1, dataset_slices["lat_start"]:dataset_slices["lat_end"] + 1, dataset_slices["lon_start"]:dataset_slices["lon_end"] + 1], target_dataset.variable, target_dataset.name)
def ten_year_monthly_dataset(latlon2d=False): lats = np.array(range(-89, 90, 2)) lons = np.array(range(-179, 180, 2)) # Need separate variable for input lats / lons because dataset only # makes shallow copies of them. ilats, ilons = lats, lons # For testing 2D lat lon grids if latlon2d: lons2, lats2 = np.meshgrid(lons, lats) ilats, ilons = lats2, lons2 # Ten Years of monthly data times = np.array([datetime.datetime(year, month, 1) for year in range(2000, 2010) for month in range(1, 13)]) values = np.ones([len(times), len(lats), len(lons)]) input_dataset = ds.Dataset(ilats, ilons, times, values, variable="test variable name", units='test variable units', name='foo') return input_dataset
def ensemble(datasets): """ Generate a single dataset which is the mean of the input datasets :param datasets: Datasets to be used to compose the ensemble dataset from. Note - All Datasets must be the same shape :type datasets: List of OCW Dataset Objects :returns: New Dataset with a name of 'Dataset Ensemble' :rtype: OCW Dataset Object """ _check_dataset_shapes(datasets) dataset_values = [dataset.values for dataset in datasets] ensemble_values = np.mean(dataset_values, axis=0) # Build new dataset object from the input datasets and the ensemble values and return it ensemble_dataset = ds.Dataset(datasets[0].lats, datasets[0].lons, datasets[0].times, ensemble_values, name="Dataset Ensemble") return ensemble_dataset
def temporal_subset(month_start, month_end, target_dataset, average_each_year=False): """ Temporally subset data given month_index. :param month_start: An integer for beginning month (Jan=1) :type month_start: :class:`int` :param month_end: An integer for ending month (Jan=1) :type month_end: :class:`int` :param target_dataset: Dataset object that needs temporal subsetting :type target_dataset: Open Climate Workbench Dataset Object :param average_each_year: If True, output dataset is averaged for each year :type average_each_year: :class:'boolean' :returns: A temporal subset OCW Dataset :rtype: Open Climate Workbench Dataset Object """ if month_start > month_end: month_index = range(month_start, 13) month_index.extend(range(1, month_end + 1)) else: month_index = range(month_start, month_end + 1) dates = target_dataset.times months = np.array([d.month for d in dates]) time_index = [] for m_value in month_index: time_index = np.append(time_index, np.where(months == m_value)[0]) if m_value == month_index[0]: time_index_first = np.min(np.where(months == m_value)[0]) if m_value == month_index[-1]: time_index_last = np.max(np.where(months == m_value)[0]) time_index = np.sort(time_index) time_index = time_index[np.where((time_index >= time_index_first) & (time_index <= time_index_last))] time_index = list(time_index) new_dataset = ds.Dataset(target_dataset.lats, target_dataset.lons, target_dataset.times[time_index], target_dataset.values[time_index, :], variable=target_dataset.variable, units=target_dataset.units, name=target_dataset.name) if average_each_year: nmonth = len(month_index) ntime = new_dataset.times.size nyear = ntime / nmonth averaged_time = [] ny, nx = target_dataset.values.shape[1:] averaged_values = ma.zeros([nyear, ny, nx]) for iyear in np.arange(nyear): # centered time index of the season between month_start and month_end in each year center_index = int(nmonth / 2) + iyear * nmonth if nmonth == 1: center_index = iyear averaged_time.append(new_dataset.times[center_index]) averaged_values[iyear, :] = ma.average( new_dataset.values[nmonth * iyear:nmonth * iyear + nmonth, :], axis=0) new_dataset = ds.Dataset(target_dataset.lats, target_dataset.lons, np.array(averaged_time), averaged_values, variable=target_dataset.variable, units=target_dataset.units, name=target_dataset.name) return new_dataset
def subset(subregion, target_dataset, subregion_name=None): '''Subset given dataset(s) with subregion information :param subregion: The Bounds with which to subset the target Dataset. :type subregion: :class:`dataset.Bounds` :param target_dataset: The Dataset object to subset. :type target_dataset: :class:`dataset.Dataset` :param subregion_name: The subset-ed Dataset name :type subregion_name: :mod:`string` :returns: The subset-ed Dataset object :rtype: :class:`dataset.Dataset` :raises: ValueError ''' if not subregion.start: subregion.start = target_dataset.times[0] subregion.end = target_dataset.times[-1] # Ensure that the subregion information is well formed _are_bounds_contained_by_dataset(subregion, target_dataset) # Get subregion indices into subregion data dataset_slices = _get_subregion_slice_indices(subregion, target_dataset) if not subregion_name: subregion_name = target_dataset.name # Slice the values array with our calculated slice indices if target_dataset.values.ndim == 2: subset_values = ma.zeros([ len(target_dataset. values[dataset_slices["lat_start"]:dataset_slices["lat_end"]]), len(target_dataset. values[dataset_slices["lon_start"]:dataset_slices["lon_end"]]) ]) subset_values = target_dataset.values[ dataset_slices["lat_start"]:dataset_slices["lat_end"] + 1, dataset_slices["lon_start"]:dataset_slices["lon_end"] + 1] elif target_dataset.values.ndim == 3: subset_values = ma.zeros([ len(target_dataset.values[ dataset_slices["time_start"]:dataset_slices["time_end"]]), len(target_dataset. values[dataset_slices["lat_start"]:dataset_slices["lat_end"]]), len(target_dataset. values[dataset_slices["lon_start"]:dataset_slices["lon_end"]]) ]) subset_values = target_dataset.values[ dataset_slices["time_start"]:dataset_slices["time_end"] + 1, dataset_slices["lat_start"]:dataset_slices["lat_end"] + 1, dataset_slices["lon_start"]:dataset_slices["lon_end"] + 1] # Build new dataset with subset information return ds.Dataset( # Slice the lats array with our calculated slice indices target_dataset. lats[dataset_slices["lat_start"]:dataset_slices["lat_end"] + 1], # Slice the lons array with our calculated slice indices target_dataset. lons[dataset_slices["lon_start"]:dataset_slices["lon_end"] + 1], # Slice the times array with our calculated slice indices target_dataset. times[dataset_slices["time_start"]:dataset_slices["time_end"] + 1], # Slice the values array with our calculated slice indices subset_values, variable=target_dataset.variable, units=target_dataset.units, name=subregion_name, origin=target_dataset.origin)
def spatial_regrid(target_dataset, new_latitudes, new_longitudes): """ Regrid a Dataset using the new latitudes and longitudes :param target_dataset: Dataset object that needs spatially regridded :type target_dataset: :class:`dataset.Dataset` :param new_latitudes: Array of latitudes :type new_latitudes: :class:`numpy.ndarray` :param new_longitudes: Array of longitudes :type new_longitudes: :class:`numpy.ndarray` :returns: A new spatially regridded Dataset :rtype: :class:`dataset.Dataset` """ # Create grids of the given lats and lons for the underlying API # NOTE: np.meshgrid() requires inputs (x, y) and returns data # of shape(y|lat|rows, x|lon|columns). So we pass in lons, lats # and get back data.shape(lats, lons) if target_dataset.lons.ndim == 1 and target_dataset.lats.ndim == 1: lons, lats = np.meshgrid(target_dataset.lons, target_dataset.lats) else: lons = target_dataset.lons lats = target_dataset.lats if new_longitudes.ndim == 1 and new_latitudes.ndim == 1: new_lons, new_lats = np.meshgrid(new_longitudes, new_latitudes) else: new_lons = new_longitudes new_lats = new_latitudes # Make masked array of shape (times, new_latitudes,new_longitudes) new_values = ma.zeros( [len(target_dataset.times), new_lats.shape[0], new_lons.shape[1]]) # Convert all lats and lons into Numpy Masked Arrays lats = ma.array(lats) lons = ma.array(lons) new_lats = ma.array(new_lats) new_lons = ma.array(new_lons) target_values = ma.array(target_dataset.values) # Call _rcmes_spatial_regrid on each time slice for i in range(len(target_dataset.times)): new_values[i] = _rcmes_spatial_regrid(target_values[i], lats, lons, new_lats, new_lons) # TODO: # This will call down to the _congrid() function and the lat and lon # axis will be adjusted with the time axis being held constant # Create a new Dataset Object to return using new data regridded_dataset = ds.Dataset(new_latitudes, new_longitudes, target_dataset.times, new_values, variable=target_dataset.variable, units=target_dataset.units, name=target_dataset.name, origin=target_dataset.origin) return regridded_dataset