def test_percentile_is_dimension_coordinate_multiple_timesteps(self): """ Test that the data has been reshaped correctly when multiple timesteps are in the cube. The array contents is also checked. """ expected = np.array([[ [[4.0, 4.71428571], [5.42857143, 6.14285714]], [[6.85714286, 7.57142857], [8.28571429, 9.0]], ]]) data = np.tile(np.linspace(5, 10, 8), 3).reshape(3, 2, 2, 2) data[0] -= 1 data[1] += 1 data[2] += 3 cubelist = CubeList([]) for i, hour in enumerate([7, 8]): cubelist.append( set_up_percentile_cube( data[:, i, :, :].astype(np.float32), np.array([10, 50, 90], dtype=np.float32), units="degC", time=datetime(2015, 11, 23, hour), frt=datetime(2015, 11, 23, 6), )) percentile_cube = cubelist.merge_cube() percentile_cube.transpose([1, 0, 2, 3]) plen = 1 reshaped_array = restore_non_probabilistic_dimensions( percentile_cube[0].data, percentile_cube, "percentile", plen) self.assertArrayAlmostEqual(reshaped_array, expected)
def test_multiple_timesteps(self): """ Test that the data has been reshaped correctly when there are multiple timesteps. The array contents are also checked. The output cube has only a single percentile, which is therefore demoted to a scalar coordinate. """ expected = np.array([ [[4.0, 4.71428571], [5.42857143, 6.14285714]], [[6.85714286, 7.57142857], [8.28571429, 9.0]], ]) cubelist = CubeList([]) for i, hour in enumerate([7, 8]): cubelist.append( set_up_percentile_cube( np.array([expected[i, :, :]], dtype=np.float32), np.array([50], dtype=np.float32), units="degC", time=datetime(2015, 11, 23, hour), frt=datetime(2015, 11, 23, 6), )) percentile_cube = cubelist.merge_cube() reshaped_array = restore_non_percentile_dimensions( percentile_cube.data.flatten(), next(percentile_cube.slices_over("percentile")), 1, ) self.assertArrayAlmostEqual(reshaped_array, expected)
def process(self, cube): """ Ensure that the cube passed to the maximum_within_vicinity method is 2d and subsequently merged back together. Args: cube (iris.cube.Cube): Thresholded cube. Returns: Iris.cube.Cube Cube containing the occurrences within a vicinity for each xy 2d slice, which have been merged back together. """ max_cubes = CubeList([]) for cube_slice in cube.slices( [cube.coord(axis='y'), cube.coord(axis='x')]): max_cubes.append(self.maximum_within_vicinity(cube_slice)) result_cube = max_cubes.merge_cube() # Put dimensions back if they were there before. result_cube = check_cube_coordinates(cube, result_cube) return result_cube
def process(self, cube: Cube) -> Cube: """ Ensure that the cube passed to the maximum_within_vicinity method is 2d and subsequently merged back together. Args: cube: Thresholded cube. Returns: Cube containing the occurrences within a vicinity for each xy 2d slice, which have been merged back together. """ if self.land_mask_cube and not spatial_coords_match( [cube, self.land_mask_cube]): raise ValueError( "Supplied cube do not have the same spatial coordinates and land mask" ) max_cubes = CubeList([]) for cube_slice in cube.slices( [cube.coord(axis="y"), cube.coord(axis="x")]): max_cubes.append(self.maximum_within_vicinity(cube_slice)) result_cube = max_cubes.merge_cube() # Put dimensions back if they were there before. result_cube = check_cube_coordinates(cube, result_cube) return result_cube
def get_cube(url, name_list=None, bbox=None, callback=None, time=None, units=None, constraint=None): cubes = iris.load_raw(url, callback=callback) if constraint: cubes = cubes.extract(constraint) if name_list: in_list = lambda cube: cube.standard_name in name_list cubes = CubeList([cube for cube in cubes if in_list(cube)]) if not cubes: raise ValueError('Cube does not contain {!r}'.format(name_list)) else: cube = cubes.merge_cube() if bbox: cube = intersection(cube, bbox) if time: if isinstance(time, datetime): start, stop = time, None elif isinstance(time, tuple): start, stop = time[0], time[1] else: raise ValueError('Time must be start or (start, stop).' ' Got {!r}'.format(time)) cube = time_slice(cube, start, stop) if units: if not cube.units == units: cube.convert_units(units) return cube
def setUp(self): """Set-up cubes for testing.""" frts = [ datetime.datetime(2017, 11, 10, 1, 0), datetime.datetime(2017, 11, 11, 1, 0), datetime.datetime(2017, 11, 12, 1, 0), ] forecast_cubes = CubeList() for frt in frts: forecast_cubes.append( set_up_variable_cube( np.ones((2, 3, 3), dtype=np.float32), frt=frt, time=frt + datetime.timedelta(hours=3), )) self.forecast = forecast_cubes.merge_cube() self.forecast.transpose([1, 0, 2, 3]) self.altitude = set_up_variable_cube(np.ones((3, 3), dtype=np.float32), name="surface_altitude", units="m") for coord in ["time", "forecast_reference_time", "forecast_period"]: self.altitude.remove_coord(coord) self.expected_forecast = self.forecast.data.shape self.expected_altitude = (len( self.forecast.coord("time").points), ) + self.altitude.shape
def get_cube(url, name_list, bbox=None, time=None, units=None, callback=None, constraint=None): """Only `url` and `name_list` are mandatory. The kw args are: `bbox`, `callback`, `time`, `units`, `constraint`.""" cubes = iris.load_raw(url, callback=callback) in_list = lambda cube: cube.standard_name in name_list cubes = CubeList([cube for cube in cubes if in_list(cube)]) if not cubes: raise ValueError('Cube does not contain {!r}'.format(name_list)) else: cube = cubes.merge_cube() if constraint: cube = cube.extract(constraint) if not cube: raise ValueError('No cube using {!r}'.format(constraint)) if bbox: cube = subset(cube, bbox) if not cube: raise ValueError('No cube using {!r}'.format(bbox)) if time: if isinstance(time, datetime): start, stop = time, None elif isinstance(time, tuple): start, stop = time[0], time[1] else: raise ValueError('Time must be start or (start, stop).' ' Got {!r}'.format(time)) cube = time_slice(cube, start, stop) if units: if cube.units != units: cube.convert_units(units) return cube
def process(self, cube): """ Ensure that the cube passed to the maximum_within_vicinity method is 2d and subsequently merged back together. Args: cube : Iris.cube.Cube Thresholded cube. Returns: Iris.cube.Cube Cube containing the occurrences within a vicinity for each xy 2d slice, which have been merged back together. """ slices_over_realization = (self.find_slices_over_coordinate( cube, "realization")) max_cubes = CubeList([]) for realization_slice in slices_over_realization: slices_over_time = (self.find_slices_over_coordinate( realization_slice, "time")) for time_slice in slices_over_time: max_cubes.append(self.maximum_within_vicinity(time_slice)) return max_cubes.merge_cube()
def _add_levels(cube, levels=13): clist = CubeList() for level in range(levels): mln = DimCoord(level, standard_name='model_level_number') other = cube.copy() other.add_aux_coord(mln) clist.append(other) return clist.merge_cube()
def segmentation(features,field,dxy,threshold=3e-3,target='maximum',level=None,method='watershed',max_distance=None,vertical_coord='auto'): """ Function using watershedding or random walker to determine cloud volumes associated with tracked updrafts Parameters: features: pandas.DataFrame output from trackpy/maketrack field: iris.cube.Cube containing the field to perform the watershedding on threshold: float threshold for the watershedding field to be used for the mask target: string Switch to determine if algorithm looks strating from maxima or minima in input field (maximum: starting from maxima (default), minimum: starting from minima) level slice levels at which to seed the cells for the watershedding algorithm method: str ('method') flag determining the algorithm to use (currently watershedding implemented) max_distance: float Maximum distance from a marker allowed to be classified as belonging to that cell Output: segmentation_out: iris.cube.Cube Cloud mask, 0 outside and integer numbers according to track inside the cloud """ import pandas as pd from iris.cube import CubeList logging.info('Start watershedding 3D') # check input for right dimensions: if not (field.ndim==3 or field.ndim==4): raise ValueError('input to segmentation step must be 3D or 4D including a time dimension') if 'time' not in [coord.name() for coord in field.coords()]: raise ValueError("input to segmentation step must include a dimension named 'time'") # CubeList and list to store individual segmentation masks and feature DataFrames with information about segmentation segmentation_out_list=CubeList() features_out_list=[] #loop over individual input timesteps for segmentation: field_time=field.slices_over('time') for i,field_i in enumerate(field_time): time_i=field_i.coord('time').units.num2date(field_i.coord('time').points[0]) features_i=features.loc[features['time']==time_i] segmentation_out_i,features_out_i=segmentation_timestep(field_i,features_i,dxy,threshold=threshold,target=target,level=level,method=method,max_distance=max_distance,vertical_coord=vertical_coord) segmentation_out_list.append(segmentation_out_i) features_out_list.append(features_out_i) logging.debug('Finished segmentation for '+time_i.strftime('%Y-%m-%d_%H:%M:%S')) #Merge output from individual timesteps: segmentation_out=segmentation_out_list.merge_cube() features_out=pd.concat(features_out_list) logging.debug('Finished segmentation') return segmentation_out,features_out
def __call__(self, src): """ Regrid the supplied :class:`~iris.cube.Cube` on to the target grid of this :class:`_CurvilinearRegridder`. The given cube must be defined with the same grid as the source grid used to create this :class:`_CurvilinearRegridder`. If the source cube has lazy data, it will be realized before regridding and the returned cube will also have realized data. Args: * src: A :class:`~iris.cube.Cube` to be regridded. Returns: A cube defined with the horizontal dimensions of the target and the other dimensions from this cube. The data values of this cube will be converted to values on the new grid using point-in-cell regridding. """ from iris.cube import Cube, CubeList # Validity checks. if not isinstance(src, Cube): raise TypeError("'src' must be a Cube") gx = self._get_horizontal_coord(self._src_cube, "x") gy = self._get_horizontal_coord(self._src_cube, "y") src_grid = (gx.copy(), gy.copy()) sx = self._get_horizontal_coord(src, "x") sy = self._get_horizontal_coord(src, "y") if (sx, sy) != src_grid: raise ValueError("The given cube is not defined on the same " "source grid as this regridder.") # Call the regridder function. # This includes repeating over any non-XY dimensions, because the # underlying routine does not support this. # FOR NOW: we will use cube.slices and merge to achieve this, # though that is not a terribly efficient method ... # TODO: create a template result cube and paste data slices into it, # which would be more efficient. result_slices = CubeList([]) for slice_cube in src.slices(sx): if self._regrid_info is None: # Calculate the basic regrid info just once. self._regrid_info = ( _regrid_weighted_curvilinear_to_rectilinear__prepare( slice_cube, self.weights, self._target_cube)) slice_result = ( _regrid_weighted_curvilinear_to_rectilinear__perform( slice_cube, self._regrid_info)) result_slices.append(slice_result) result = result_slices.merge_cube() return result
def _create_cube(self, filenames, variable): import numpy as np from cis.data_io.hdf import _read_hdf4 from iris.cube import Cube, CubeList from iris.coords import DimCoord, AuxCoord from cis.time_util import calculate_mid_time, cis_standard_time_unit from cis.data_io.hdf_sd import get_metadata from cf_units import Unit variables = ['XDim', 'YDim', variable] logging.info("Listing coordinates: " + str(variables)) cube_list = CubeList() # Read each file individually, let Iris do the merging at the end. for f in filenames: sdata, vdata = _read_hdf4(f, variables) lat_coord = DimCoord(_get_MODIS_SDS_data(sdata['YDim']), standard_name='latitude', units='degrees') lon_coord = DimCoord(_get_MODIS_SDS_data(sdata['XDim']), standard_name='longitude', units='degrees') # create time coordinate using the midpoint of the time delta between the start date and the end date start_datetime = self._get_start_date(f) end_datetime = self._get_end_date(f) mid_datetime = calculate_mid_time(start_datetime, end_datetime) logging.debug("Using {} as datetime for file {}".format( mid_datetime, f)) time_coord = AuxCoord(mid_datetime, standard_name='time', units=cis_standard_time_unit, bounds=[start_datetime, end_datetime]) var = sdata[variable] metadata = get_metadata(var) try: units = Unit(metadata.units) except ValueError: logging.warning( "Unable to parse units '{}' in {} for {}.".format( metadata.units, f, variable)) units = None cube = Cube(_get_MODIS_SDS_data(sdata[variable]), dim_coords_and_dims=[(lon_coord, 1), (lat_coord, 0)], aux_coords_and_dims=[(time_coord, None)], var_name=metadata._name, long_name=metadata.long_name, units=units) cube_list.append(cube) # Merge the cube list across the scalar time coordinates before returning a single cube. return cube_list.merge_cube()
def truth_dataframe_to_cube( df: DataFrame, training_dates: DatetimeIndex, ) -> Cube: """Convert a truth DataFrame into an iris Cube. Args: df: DataFrame expected to contain the following columns: ob_value, time, wmo_id, diagnostic, latitude, longitude, altitude, cf_name, height, period and units. Any other columns are ignored. training_dates: Datetimes spanning the training period. Returns: Cube containing the truths from the training period. """ cubelist = CubeList() for adate in training_dates: time_df = df.loc[(df["time"] == adate)] time_df = _preprocess_temporal_columns(time_df) if time_df.empty: continue # The following columns are expected to contain one unique value # per column. _unique_check(time_df, "diagnostic") if time_df["period"].isna().all(): time_bounds = None else: period = time_df["period"].values[0] time_bounds = [adate - period, adate] time_coord = _define_time_coord(adate, time_bounds) height_coord = _define_height_coord(time_df["height"].values[0]) cube = build_spotdata_cube( time_df["ob_value"].astype(np.float32), time_df["cf_name"].values[0], time_df["units"].values[0], time_df["altitude"].astype(np.float32), time_df["latitude"].astype(np.float32), time_df["longitude"].astype(np.float32), time_df["wmo_id"].values.astype("U5"), scalar_coords=[time_coord, height_coord], ) cubelist.append(cube) if not cubelist: return return cubelist.merge_cube()
def process(self, cube: Cube, mask_cube: Optional[Cube] = None) -> Cube: """ Call the methods required to apply a neighbourhood processing to a cube. Applies neighbourhood processing to each 2D x-y-slice of the input cube. If the input cube is masked the neighbourhood sum is calculated from the total of the unmasked data in the neighbourhood around each grid point. The neighbourhood mean is then calculated by dividing the neighbourhood sum at each grid point by the total number of valid grid points that contributed to that sum. If a mask_cube is provided then this is used to mask each x-y-slice prior to the neighburhood sum or mean being calculated. Args: cube: Cube containing the array to which the neighbourhood processing will be applied. mask_cube: Cube containing the array to be used as a mask. Zero values in this array are taken as points to be masked. Returns: Cube containing the smoothed field after the neighbourhood method has been applied. """ super().process(cube) check_if_grid_is_equal_area(cube) # If the data is masked, the mask will be processed as well as the # original_data * mask array. check_radius_against_distance(cube, self.radius) grid_cells = distance_to_number_of_grid_cells(cube, self.radius) if self.neighbourhood_method == "circular": self.kernel = circular_kernel(grid_cells, self.weighted_mode) elif self.neighbourhood_method == "square": self.nb_size = 2 * grid_cells + 1 try: mask_cube_data = mask_cube.data except AttributeError: mask_cube_data = None result_slices = CubeList() for cube_slice in cube.slices( [cube.coord(axis="y"), cube.coord(axis="x")]): cube_slice.data = self._calculate_neighbourhood( cube_slice.data, mask_cube_data) result_slices.append(cube_slice) neighbourhood_averaged_cube = result_slices.merge_cube() return neighbourhood_averaged_cube
def loadramscube_mult(filenames,variable,constraint=None,add_coordinates=None): from iris.cube import CubeList cube_list=[] for i in range(len(filenames)): cube_list.append(loadramscube_single(filenames[i],variable,add_coordinates=add_coordinates) ) for member in cube_list: member.attributes={} variable_cubes=CubeList(cube_list) variable_cube=variable_cubes.merge_cube() variable_cube=variable_cube.extract(constraint) return variable_cube
def merge_nc_files(cubes: CubeList, filename: str) -> None: try: logger.info('Merging files...') equalise_attributes(cubes) new_cube = cubes.merge_cube() logger.info(new_cube) output_nc_file = create_output_file(filename) logger.info(f'Saving {output_nc_file}...') iris.save(new_cube, output_nc_file) except Exception as e: raise MergeError(e) return output_nc_file
def concat_dim(cls, datasets, dim, vdims): """ Concatenates datasets along one dimension """ cubes = [] for c, cube in datasets.items(): cube = cube.copy() cube.add_aux_coord(DimCoord([c], var_name=dim.name)) cubes.append(cube) cubes = CubeList(cubes) equalise_attributes(cubes) return cubes.merge_cube()
def _create_cube(self, filenames, variable): import numpy as np from cis.data_io.hdf import _read_hdf4 from cis.data_io import hdf_vd from iris.cube import Cube, CubeList from iris.coords import DimCoord, AuxCoord from cis.time_util import calculate_mid_time, cis_standard_time_unit from cis.data_io.hdf_sd import get_metadata from cf_units import Unit variables = ['XDim:GlobalGrid', 'YDim:GlobalGrid', variable] logging.info("Listing coordinates: " + str(variables)) cube_list = CubeList() # Read each file individually, let Iris do the merging at the end. for f in filenames: sdata, vdata = _read_hdf4(f, variables) lat_points = np.linspace(-90., 90., hdf_vd.get_data(vdata['YDim:GlobalGrid'])) lon_points = np.linspace(-180., 180., hdf_vd.get_data(vdata['XDim:GlobalGrid'])) lat_coord = DimCoord(lat_points, standard_name='latitude', units='degrees') lon_coord = DimCoord(lon_points, standard_name='longitude', units='degrees') # create time coordinate using the midpoint of the time delta between the start date and the end date start_datetime = self._get_start_date(f) end_datetime = self._get_end_date(f) mid_datetime = calculate_mid_time(start_datetime, end_datetime) logging.debug("Using {} as datetime for file {}".format(mid_datetime, f)) time_coord = AuxCoord(mid_datetime, standard_name='time', units=cis_standard_time_unit, bounds=[start_datetime, end_datetime]) var = sdata[variable] metadata = get_metadata(var) try: units = Unit(metadata.units) except ValueError: logging.warning("Unable to parse units '{}' in {} for {}.".format(metadata.units, f, variable)) units = None cube = Cube(_get_MODIS_SDS_data(sdata[variable]), dim_coords_and_dims=[(lon_coord, 1), (lat_coord, 0)], aux_coords_and_dims=[(time_coord, None)], var_name=metadata._name, long_name=metadata.long_name, units=units) cube_list.append(cube) # Merge the cube list across the scalar time coordinates before returning a single cube. return cube_list.merge_cube()
def _fill_months(cube): if cube.coord('time').shape[0] == 12: return cube cubes = CubeList(cube.slices_over('time')) model_cube = cubes[0].copy() for month in range(1, 13): month_constraint = iris.Constraint( # pylint: disable=cell-var-from-loop time=lambda cell: cell.point.month == month) if cubes.extract(month_constraint): continue cubes.append( OSICmorizer._create_nan_cube(model_cube, month, month=True)) cube = cubes.merge_cube() return cube
def realization_cubes_fixture() -> CubeList: """Set up a single realization cube in parameter space""" realizations = [0, 1, 2, 3] data = np.ones((len(realizations), 2, 2), dtype=np.float32) times = [datetime(2017, 11, 10, hour) for hour in [4, 5, 6]] cubes = CubeList() for time in times: cubes.append( set_up_variable_cube( data, realizations=realizations, spatial_grid="equalarea", time=time, frt=datetime(2017, 11, 10, 1), )) cube = cubes.merge_cube() return CubeList(cube.slices_over("realization"))
def test_ice_large_with_fc(self): """Test that large VII probs do increase zero lightning risk when forecast lead time is non-zero (three forecast_period points)""" self.ice_cube.data[:, 1, 1] = 1.0 self.fg_cube.data[1, 1] = 0.0 frt_point = self.fg_cube.coord("forecast_reference_time").points[0] fg_cube_input = CubeList([]) for fc_time in np.array([1, 2.5, 3]) * 3600: # seconds fg_cube_next = self.fg_cube.copy() fg_cube_next.coord("time").points = [frt_point + fc_time] fg_cube_next.coord("forecast_period").points = [fc_time] fg_cube_input.append(squeeze(fg_cube_next)) fg_cube_input = fg_cube_input.merge_cube() expected = fg_cube_input.copy() # expected.data contains all ones except: expected.data[0, 1, 1] = 0.54 expected.data[1, 1, 1] = 0.0 expected.data[2, 1, 1] = 0.0 result = self.plugin.apply_ice(fg_cube_input, self.ice_cube) self.assertArrayAlmostEqual(result.data, expected.data)
def main(): # Parameters to compare between forecasts path = datadir + 'deterministic/' filename = 'rp_physics.nc' name = 'Temperature [K]' pressure = 500 lead_time = 7*24 cs = iris.Constraint( name=name, pressure=pressure, forecast_period=lead_time) # Load full precision reference forecast cube = iris.load_cube(path + filename, cs) # Calculate the errors with each different precision used as the `truth` diffs = CubeList() for pseudo_truth in cube.slices_over('precision'): # Add the precision of the `truth` cube as another coordinate p = pseudo_truth.coord('precision').points[0] p = AuxCoord(p, long_name='reference_precision') # Calculate the errors diff = rms_diff(cube, pseudo_truth) diff.add_aux_coord(p) # Store the errors in the cubelist diffs.append(diff) # Combine all the errors into a single cube with dimensions of # precision vs reference_precision diffs = diffs.merge_cube() # Plot the errors qplt.pcolor(diffs, vmin=0, cmap='cubehelix_r') precisions = cube.coord('precision').points plt.xticks(precisions) plt.yticks(precisions) plt.show() return
def realization_cubes_fixture() -> CubeList: """Set up a single realization cube in parameter space""" realizations = [0, 1, 2, 3] data = np.ones((len(realizations), 2, 2), dtype=np.float32) times = [datetime(2017, 11, 10, hour) for hour in [4, 5, 6]] cubes = CubeList() for time in times: cubes.append( set_up_variable_cube( data, realizations=realizations, spatial_grid="equalarea", time=time, frt=datetime(2017, 11, 10, 1), )) cube = cubes.merge_cube() sliced_cubes = CubeList(cube.slices_over("realization")) [ s.attributes.update({"history": f"20171110T{i:02d}00Z"}) for i, s in enumerate(sliced_cubes) ] return sliced_cubes
def truth_spot(truth_grid): truth_data_spot = truth_grid[0, ...].data.reshape((2, 9)) truths_spot_list = CubeList() for day in range(5, 7): time_coords = construct_scalar_time_coords( datetime(2017, 11, day, 4, 0), None, datetime(2017, 11, day, 4, 0), ) time_coords = [t[0] for t in time_coords] truths_spot_list.append( build_spotdata_cube( truth_data_spot, name="probability_of_air_temperature_above_threshold", units="1", altitude=_dummy_point_locations, latitude=_dummy_point_locations, longitude=_dummy_point_locations, wmo_id=_dummy_string_ids, additional_dims=[_threshold_coord], scalar_coords=time_coords, ) ) truths_spot = truths_spot_list.merge_cube() return truths_spot
def subset_data( cube: Cube, grid_spec: Optional[Dict[str, Dict[str, int]]] = None, site_list: Optional[List] = None, ) -> Cube: """Extract a spatial cutout or subset of sites from data to generate suite reference outputs. Args: cube: Input dataset grid_spec: Dictionary containing bounding grid points and an integer "thinning factor" for each of UK and global grid, to create cutouts. Eg a "thinning factor" of 10 would mean every 10th point being taken for the cutout. The expected dictionary has keys that are spatial coordinate names, with values that are dictionaries with "min", "max" and "thin" keys. site_list: List of WMO site IDs to extract. These IDs must match the type and format of the "wmo_id" coordinate on the input spot cube. Returns: Subset of input cube as specified by input constraints Raises: ValueError: If site_list is not provided for a spot data cube ValueError: If the spot data cube does not contain any of the required sites ValueError: If grid_spec is not provided for a gridded cube ValueError: If grid_spec does not contain entries for the spatial coordinates on the input gridded data ValueError: If the grid_spec provided does not overlap with the cube domain """ if cube.coords("spot_index"): if site_list is None: raise ValueError("site_list required to extract from spot data") constraint = Constraint( coord_values={"wmo_id": lambda x: x in site_list}) result = cube.extract(constraint) if result is None: raise ValueError( f"Cube does not contain any of the required sites: {site_list}" ) else: if grid_spec is None: raise ValueError("grid_spec required to extract from gridded data") x_coord = cube.coord(axis="x").name() y_coord = cube.coord(axis="y").name() for coord in [y_coord, x_coord]: if coord not in grid_spec: raise ValueError( f"Cube coordinates {y_coord}, {x_coord} are not present within " f"{grid_spec.keys()}") def _create_cutout(cube, grid_spec): """Given a gridded data cube and boundary limits for cutout dimensions, create cutout. Expects cube on either lat-lon or equal area grid. """ x_coord = cube.coord(axis="x").name() y_coord = cube.coord(axis="y").name() xmin = grid_spec[x_coord]["min"] xmax = grid_spec[x_coord]["max"] ymin = grid_spec[y_coord]["min"] ymax = grid_spec[y_coord]["max"] # need to use cube intersection for circular coordinates (longitude) if x_coord == "longitude": lat_constraint = Constraint( latitude=lambda y: ymin <= y.point <= ymax) cutout = cube.extract(lat_constraint) if cutout is None: return cutout cutout = cutout.intersection(longitude=(xmin, xmax), ignore_bounds=True) # intersection creates a new coordinate with default datatype - we # therefore need to re-cast to meet the IMPROVER standard cutout.coord("longitude").points = cutout.coord( "longitude").points.astype(FLOAT_DTYPE) if cutout.coord("longitude").bounds is not None: cutout.coord("longitude").bounds = cutout.coord( "longitude").bounds.astype(FLOAT_DTYPE) else: x_constraint = Constraint( projection_x_coordinate=lambda x: xmin <= x.point <= xmax) y_constraint = Constraint( projection_y_coordinate=lambda y: ymin <= y.point <= ymax) cutout = cube.extract(x_constraint & y_constraint) return cutout cutout = _create_cutout(cube, grid_spec) if cutout is None: raise ValueError( "Cube domain does not overlap with cutout specified:\n" f"{x_coord}: {grid_spec[x_coord]}, {y_coord}: {grid_spec[y_coord]}" ) original_coords = get_dim_coord_names(cutout) thin_x = grid_spec[x_coord]["thin"] thin_y = grid_spec[y_coord]["thin"] result_list = CubeList() try: for subcube in cutout.slices([y_coord, x_coord]): result_list.append(subcube[::thin_y, ::thin_x]) except ValueError as cause: # error is raised if X or Y coordinate are single-valued (non-dimensional) if "iterator" in str(cause) and "dimension" in str(cause): raise ValueError( "Function does not support single point extraction") else: raise result = result_list.merge_cube() enforce_coordinate_ordering(result, original_coords) return result
def forecast_dataframe_to_cube( df: DataFrame, training_dates: DatetimeIndex, forecast_period: int, ) -> Cube: """Convert a forecast DataFrame into an iris Cube. The percentiles within the forecast DataFrame are rebadged as realizations. Args: df: DataFrame expected to contain the following columns: forecast, blend_time, forecast_period, forecast_reference_time, time, wmo_id, percentile, diagnostic, latitude, longitude, period, height, cf_name, units. Any other columns are ignored. training_dates: Datetimes spanning the training period. forecast_period: Forecast period in seconds as an integer. Returns: Cube containing the forecasts from the training period. """ fp_point = pd.Timedelta(int(forecast_period), unit="seconds") cubelist = CubeList() for adate in training_dates: time_df = df.loc[(df["time"] == adate) & (df["forecast_period"] == fp_point)] time_df = _preprocess_temporal_columns(time_df) if time_df.empty: continue # The following columns are expected to contain one unique value # per column. for col in ["period", "height", "cf_name", "units", "diagnostic"]: _unique_check(time_df, col) if time_df["period"].isna().all(): time_bounds = None fp_bounds = None else: period = time_df["period"].values[0] time_bounds = [adate - period, adate] fp_bounds = [fp_point - period, fp_point] time_coord = _define_time_coord(adate, time_bounds) height_coord = _define_height_coord(time_df["height"].values[0]) fp_coord = AuxCoord( np.array(fp_point.total_seconds(), dtype=TIME_COORDS["forecast_period"].dtype), "forecast_period", bounds=fp_bounds if fp_bounds is None else [ np.array(f.total_seconds(), dtype=TIME_COORDS["forecast_period"].dtype) for f in fp_bounds ], units=TIME_COORDS["forecast_period"].units, ) frt_coord = AuxCoord( np.array( time_df["forecast_reference_time"].values[0].timestamp(), dtype=TIME_COORDS["forecast_reference_time"].dtype, ), "forecast_reference_time", units=TIME_COORDS["forecast_reference_time"].units, ) for percentile in sorted(df["percentile"].unique()): perc_coord = DimCoord(np.float32(percentile), long_name="percentile", units="%") perc_df = time_df.loc[time_df["percentile"] == percentile] cube = build_spotdata_cube( perc_df["forecast"].astype(np.float32), perc_df["cf_name"].values[0], perc_df["units"].values[0], perc_df["altitude"].astype(np.float32), perc_df["latitude"].astype(np.float32), perc_df["longitude"].astype(np.float32), perc_df["wmo_id"].values.astype("U5"), scalar_coords=[ time_coord, frt_coord, fp_coord, perc_coord, height_coord, ], ) cubelist.append(cube) if not cubelist: return cube = cubelist.merge_cube() return RebadgePercentilesAsRealizations()(cube)
def create_data_object(self, filenames, variable): from netCDF4 import Dataset from biggus import OrthoArrayAdapter from iris.cube import Cube, CubeList from iris.coords import DimCoord from iris.fileformats.netcdf import NetCDFDataProxy from datetime import datetime from os.path import basename from cis.time_util import cis_standard_time_unit from cis.data_io.gridded_data import make_from_cube import numpy as np cubes = CubeList() for f in filenames: # Open the file ds = Dataset(f) # E.g. 'NO2.COLUMN.VERTICAL.TROPOSPHERIC.CS30_BACKSCATTER.SOLAR' v = ds.variables[variable] # Get the coords lat = ds.variables['LATITUDE'] lon = ds.variables['LONGITUDE'] # Create a biggus adaptor over the data scale_factor = getattr(v, 'scale_factor', None) add_offset = getattr(v, 'add_offset', None) if scale_factor is None and add_offset is None: v_dtype = v.datatype elif scale_factor is not None: v_dtype = scale_factor.dtype else: v_dtype = add_offset.dtype # proxy = NetCDFDataProxy(v.shape, v_dtype, f, variable, float(v.VAR_FILL_VALUE)) # a = OrthoArrayAdapter(proxy) # Mask out all invalid values (NaN, Inf, etc) a = np.ma.masked_invalid(v[:]) # Set everything negative to NaN a = np.ma.masked_less(a, 0.0) # Just read the lat and lon in directly lat_coord = DimCoord(lat[:], standard_name='latitude', units='degrees', long_name=lat.VAR_DESCRIPTION) lon_coord = DimCoord(lon[:], standard_name='longitude', units='degrees', long_name=lon.VAR_DESCRIPTION) # Pull the date out of the filename fname = basename(f) dt = datetime.strptime(fname[:10], "%Y_%m_%d") t_coord = DimCoord(cis_standard_time_unit.date2num(dt), standard_name='time', units=cis_standard_time_unit) c = Cube(a, long_name=getattr(v, "VAR_DESCRIPTION", None), units=getattr(v, "VAR_UNITS", None), dim_coords_and_dims=[(lat_coord, 0), (lon_coord, 1)]) c.add_aux_coord(t_coord) # Close the file ds.close() cubes.append(c) # We have a scalar time coord and no conflicting metadata so this should just create one cube... merged = cubes.merge_cube() # Return as a CIS GriddedData object return make_from_cube(merged)
units = iris.unit.Unit('celsius') name_list = ['sea_water_temperature', 'sea_surface_temperature', 'sea_water_potential_temperature', 'equivalent_potential_temperature', 'sea_water_conservative_temperature', 'pseudo_equivalent_potential_temperature'] url = "http://crow.marine.usf.edu:8080/thredds/dodsC/FVCOM-Nowcast-Agg.nc" cubes = iris.load_raw(url) in_list = lambda cube: cube.standard_name in name_list cubes = CubeList([cube for cube in cubes if in_list(cube)]) cube = cubes.merge_cube() lat = iris.Constraint(latitude=lambda cell: bbox[1] <= cell < bbox[3]) lon = iris.Constraint(longitude=lambda cell: bbox[0] <= cell <= bbox[2]) cube = cube.extract(lon & lat) istart = time_near(cube, start) istop = time_near(cube, stop) cube = cube[istart:istop, ...] # <codecell> print cube # <codecell>
def process(self, cube: Cube) -> Cube: """ Produces the vicinity processed data. The input data is sliced to yield y-x slices to which the maximum_within_vicinity method is applied. The different vicinity radii (if multiple) are looped over and a coordinate recording the radius used is added to each resulting cube. A single cube is returned with the leading coordinates of the input cube preserved. If a single vicinity radius is provided, a new scalar radius_of_vicinity coordinate will be found on the returned cube. If multiple radii are provided, this coordinate will be a dimension coordinate following any probabilistic / realization coordinates. Args: cube: Thresholded cube. Returns: Cube containing the occurrences within a vicinity for each radius, calculated for each yx slice, which have been merged to yield a single cube. Raises: ValueError: Cube and land mask have differing spatial coordinates. """ if self.land_mask_cube and not spatial_coords_match( [cube, self.land_mask_cube]): raise ValueError( "Supplied cube do not have the same spatial coordinates and land mask" ) if not self.native_grid_point_radius: grid_point_radii = [ distance_to_number_of_grid_cells(cube, radius) for radius in self.radii ] else: grid_point_radii = self.radii radii_cubes = CubeList() # List of non-spatial dimensions to restore as leading on the output. leading_dimensions = [ crd.name() for crd in cube.coords(dim_coords=True) if not crd.coord_system ] for radius, grid_point_radius in zip(self.radii, grid_point_radii): max_cubes = CubeList([]) for cube_slice in cube.slices( [cube.coord(axis="y"), cube.coord(axis="x")]): max_cubes.append( self.maximum_within_vicinity(cube_slice, grid_point_radius)) result_cube = max_cubes.merge_cube() # Put dimensions back if they were there before. result_cube = check_cube_coordinates(cube, result_cube) # Add a coordinate recording the vicinity radius applied to the data. self._add_vicinity_coordinate(result_cube, radius) radii_cubes.append(result_cube) # Merge cubes produced for each vicinity radius. result_cube = radii_cubes.merge_cube() # Enforce order of leading dimensions on the output to match the input. enforce_coordinate_ordering(result_cube, leading_dimensions) if is_probability(result_cube): result_cube.rename(in_vicinity_name_format(result_cube.name())) else: result_cube.rename(f"{result_cube.name()}_in_vicinity") return result_cube