def test_between_thresholds(self): """Test correct name is returned from a probability between thresholds """ result = extract_diagnostic_name( "probability_of_visibility_in_air_between_thresholds" ) self.assertEqual(result, "visibility_in_air")
def test_basic(self): """Test correct name is returned from a standard (above threshold) probability field""" result = extract_diagnostic_name( "probability_of_air_temperature_above_threshold" ) self.assertEqual(result, "air_temperature")
def _calculate_shower_condition(self, shape): """Calculate deterministic "precipitation is showery" field""" showery_points = np.ones(shape, dtype=FLOAT_DTYPE) for cube in self.cubes: name = extract_diagnostic_name(cube.name()) slice_constraint = iris.Constraint( coord_values={ name: lambda cell: np.isclose( cell.point, self.tree[name]["diagnostic_threshold"]) }) threshold_slice = cube.extract(slice_constraint) if threshold_slice is None: msg = "Cube {} does not contain required threshold {}" raise ValueError( msg.format(cube.name(), self.tree[name]["diagnostic_threshold"])) prob = self.tree[name]["probability_threshold"] if self.tree[name]["operator"] == "above": condition_met = np.where(threshold_slice.data >= prob, 1, 0) else: condition_met = np.where(threshold_slice.data < prob, 1, 0) showery_points = np.multiply(showery_points, condition_met) return showery_points.astype(FLOAT_DTYPE)
def test_in_vicinity(self): """Test correct name is returned from an "in vicinity" probability. Name "cloud_height" is used in this test to illustrate why suffix cannot be removed with "rstrip".""" diagnostic = 'cloud_height' result = extract_diagnostic_name( 'probability_of_{}_in_vicinity_above_threshold'.format(diagnostic)) self.assertEqual(result, diagnostic)
def process( self, cube_list, new_diagnostic_name, broadcast_to_threshold=False, ): """ Multiply data from a list of input cubes into a single cube. The first cube in the input list provides the combined cube metadata. Args: cube_list (iris.cube.CubeList or list): List of cubes to combine. new_diagnostic_name (str): New name for the combined diagnostic. broadcast_to_threshold (bool): True if the first cube has a threshold coordinate to which the following cube(s) need(s) to be broadcast prior to combining data. Returns: iris.cube.Cube: Cube containing the combined data. Raises: ValueError: If the cube_list contains only one cube. TypeError: If combining data results in float64 data. """ if len(cube_list) < 2: msg = "Expecting 2 or more cubes in cube_list" raise ValueError(msg) if broadcast_to_threshold: cube_list = self._setup_coords_for_broadcast(cube_list) self._check_dimensions_match( cube_list, comparators=[eq, self._coords_are_broadcastable]) result = self._combine_cube_data(cube_list) if broadcast_to_threshold: probabilistic_name = cube_list[0].name() diagnostic_name = extract_diagnostic_name(probabilistic_name) # Rename the threshold coordinate to match the name of the diagnostic # that results from the combine operation. result.coord(var_name="threshold").rename(new_diagnostic_name) result.coord(new_diagnostic_name).var_name = "threshold" new_diagnostic_name = probabilistic_name.replace( diagnostic_name, new_diagnostic_name) result.rename(new_diagnostic_name) return result
def set_up_probability_threshold_cube(data, phenomenon_standard_name, phenomenon_units, forecast_thresholds=np.array([8, 10, 12]), timesteps=1, y_dimension_length=3, x_dimension_length=3, spp__relative_to_threshold='above'): """ Create a cube containing multiple probability_above/below_threshold values for the coordinate. """ cube_long_name = ("probability_of_{}_{}_threshold".format( phenomenon_standard_name, spp__relative_to_threshold)) cube = Cube(data, long_name=cube_long_name, units=1) threshold_coord_name = extract_diagnostic_name(cube_long_name) try: cube.add_dim_coord( DimCoord(forecast_thresholds, threshold_coord_name, units=phenomenon_units, var_name="threshold"), 0) except ValueError: cube.add_dim_coord( DimCoord(forecast_thresholds, long_name=threshold_coord_name, units=phenomenon_units, var_name="threshold"), 0) time_origin = "hours since 1970-01-01 00:00:00" calendar = "gregorian" tunit = Unit(time_origin, calendar) cube.add_dim_coord( DimCoord(np.linspace(412227.0, 412327.0, timesteps, dtype=np.float32), "time", units=tunit), 1) cube.add_dim_coord( DimCoord(np.linspace(-45.0, 45.0, y_dimension_length, dtype=np.float32), 'latitude', units='degrees'), 2) cube.add_dim_coord( DimCoord(np.linspace(120, 180, x_dimension_length, dtype=np.float32), 'longitude', units='degrees'), 3) cube.coord( var_name="threshold").attributes["spp__relative_to_threshold"] = ( spp__relative_to_threshold) return cube
def _update_metadata(self, output_cube, original_units): """ Update output cube name and threshold coordinate Args: output_cube (iris.cube.Cube): Cube containing new "between_thresholds" probabilities original_units (str): Required threshold-type coordinate units """ output_cube.rename("probability_of_{}_between_thresholds".format( extract_diagnostic_name(self.cube.name()))) new_thresh_coord = output_cube.coord(self.thresh_coord.name()) new_thresh_coord.convert_units(original_units) new_thresh_coord.attributes[ "spp__relative_to_threshold"] = "between_thresholds"
def test_error_not_probability(self): """Test exception if input is not a probability cube name""" with self.assertRaises(ValueError): extract_diagnostic_name("lwe_precipitation_rate")
def test_below_threshold(self): """Test correct name is returned from a probability below threshold""" result = extract_diagnostic_name( "probability_of_air_temperature_below_threshold" ) self.assertEqual(result, "air_temperature")
def check_input_cubes(self, cubes): """ Check that the input cubes contain all the diagnostics and thresholds required by the decision tree. Sets self.coord_named_threshold to "True" if threshold-type coordinates have the name "threshold" (as opposed to the standard name of the diagnostic), for backward compatibility. Args: cubes (iris.cube.CubeList): A CubeList containing the input diagnostic cubes. Raises: IOError: Raises an IOError if any of the required input data is missing. The error includes details of which fields are missing. """ missing_data = [] for query in self.queries.values(): diagnostics = expand_nested_lists(query, 'diagnostic_fields') thresholds = expand_nested_lists(query, 'diagnostic_thresholds') conditions = expand_nested_lists(query, 'diagnostic_conditions') for diagnostic, threshold, condition in zip( diagnostics, thresholds, conditions): # First we check the diagnostic name and units, performing # a conversion is required and possible. test_condition = (iris.Constraint(name=diagnostic)) matched_cube = cubes.extract(test_condition) if not matched_cube: missing_data.append([diagnostic, threshold, condition]) continue else: cube_threshold_units = ( find_threshold_coordinate(matched_cube[0]).units) threshold.convert_units(cube_threshold_units) # Then we check if the required threshold is present in the # cube, and that the thresholding is relative to it correctly. threshold = threshold.points.item() threshold_name = find_threshold_coordinate( matched_cube[0]).name() # Check cube and threshold coordinate names match according to # expected convention. If not, add to exception dictionary. if extract_diagnostic_name(diagnostic) != threshold_name: self.threshold_coord_names[diagnostic] = ( threshold_name) # Set flag to check for old threshold coordinate names if (threshold_name == "threshold" and not self.coord_named_threshold): self.coord_named_threshold = True test_condition = ( iris.Constraint( coord_values={threshold_name: lambda cell: ( threshold * (1. - self.float_tolerance) < cell < threshold * (1. + self.float_tolerance))}, cube_func=lambda cube: ( find_threshold_coordinate( cube ).attributes['spp__relative_to_threshold'] == condition))) matched_threshold = matched_cube.extract(test_condition) if not matched_threshold: missing_data.append([diagnostic, threshold, condition]) if missing_data: msg = ('Weather Symbols input cubes are missing' ' the following required' ' input fields:\n') dyn_msg = ('name: {}, threshold: {}, ' 'spp__relative_to_threshold: {}\n') for item in missing_data: msg = msg + dyn_msg.format(*item) raise IOError(msg) return
def construct_extract_constraint( self, diagnostics, thresholds, coord_named_threshold): """ Construct an iris constraint. Args: diagnostics (str or list of str): The names of the diagnostics to be extracted from the CubeList. thresholds (iris.AuxCoord or list of iris.AuxCoord): All thresholds within the given diagnostic cubes that are needed, including units. Note these are NOT coords from the original cubes, just constructs to associate units with values. coord_named_threshold (bool): If true, use old naming convention for threshold coordinates (coord.long_name=threshold). Otherwise extract threshold coordinate name from diagnostic name Returns: str or list of str: String, or list of strings, encoding iris cube constraints. """ def _constraint_string(diagnostic, threshold_name, threshold_val): """ Return iris constraint as a string for deferred creation of the lambda functions. Args: diagnostic (str): Name of diagnostic threshold_name (str): Name of threshold coordinate on input cubes threshold_val (float): Value of threshold coordinate required Returns: (str) """ return ("iris.Constraint(name='{diagnostic}', {threshold_name}=" "lambda cell: {threshold_val} * {float_min} < cell < " "{threshold_val} * {float_max})".format( diagnostic=diagnostic, threshold_name=threshold_name, threshold_val=threshold_val, float_min=(1. - WeatherSymbols().float_tolerance), float_max=(1. + WeatherSymbols().float_tolerance))) # if input is list, loop over and return a list of strings if isinstance(diagnostics, list): constraints = [] for diagnostic, threshold in zip(diagnostics, thresholds): if coord_named_threshold: threshold_coord_name = "threshold" elif diagnostic in self.threshold_coord_names: threshold_coord_name = ( self.threshold_coord_names[diagnostic]) else: threshold_coord_name = extract_diagnostic_name(diagnostic) threshold_val = threshold.points.item() constraints.append( _constraint_string( diagnostic, threshold_coord_name, threshold_val)) return constraints # otherwise, return a string if coord_named_threshold: threshold_coord_name = "threshold" elif diagnostics in self.threshold_coord_names: threshold_coord_name = self.threshold_coord_names[diagnostics] else: threshold_coord_name = extract_diagnostic_name(diagnostics) threshold_val = thresholds.points.item() constraint = _constraint_string( diagnostics, threshold_coord_name, threshold_val) return constraint
def split_forecasts_and_truth(cubes, truth_attribute): """ A common utility for splitting the various inputs cubes required for calibration CLIs. These are generally the forecast cubes, historic truths, and in some instances a land-sea mask is also required. Args: cubes (list): A list of input cubes which will be split into relevant groups. These include the historical forecasts, in the format supported by the calibration CLIs, and the truth cubes. truth_attribute (str): An attribute and its value in the format of "attribute=value", which must be present on truth cubes. Returns: (tuple): tuple containing: **forecast** (iris.cube.Cube): A cube containing all the historic forecasts. **truth** (iris.cube.Cube): A cube containing all the truth data. **land_sea_mask** (iris.cube.Cube or None): If found within the input cubes list a land-sea mask will be returned, else None is returned. Raises: ValueError: An unexpected number of distinct cube names were passed in. IOError: More than one cube was identified as a land-sea mask. IOError: Missing truth or historical forecast in input cubes. """ grouped_cubes = {} for cube in cubes: try: cube_name = extract_diagnostic_name(cube.name()) except ValueError: cube_name = cube.name() grouped_cubes.setdefault(cube_name, []).append(cube) if len(grouped_cubes) == 1: # Only one group - all forecast/truth cubes land_sea_mask = None diag_name = list(grouped_cubes.keys())[0] elif len(grouped_cubes) == 2: # Two groups - the one with exactly one cube matching a name should # be the land_sea_mask, since we require more than 2 cubes in # the forecast/truth group grouped_cubes = OrderedDict( sorted(grouped_cubes.items(), key=lambda kv: len(kv[1]))) # landsea name should be the key with the lowest number of cubes (1) landsea_name, diag_name = list(grouped_cubes.keys()) land_sea_mask = grouped_cubes[landsea_name][0] if len(grouped_cubes[landsea_name]) != 1: raise IOError('Expected one cube for land-sea mask.') else: raise ValueError('Must have cubes with 1 or 2 distinct names.') # split non-land_sea_mask cubes on forecast vs truth truth_key, truth_value = truth_attribute.split('=') input_cubes = grouped_cubes[diag_name] grouped_cubes = {'truth': [], 'historical forecast': []} for cube in input_cubes: if cube.attributes.get(truth_key) == truth_value: grouped_cubes['truth'].append(cube) else: grouped_cubes['historical forecast'].append(cube) missing_inputs = ' and '.join(k for k, v in grouped_cubes.items() if not v) if missing_inputs: raise IOError('Missing ' + missing_inputs + ' input.') truth = MergeCubes()(grouped_cubes['truth']) forecast = MergeCubes()(grouped_cubes['historical forecast']) return forecast, truth, land_sea_mask
def check_input_cubes(self, cubes): """ Check that the input cubes contain all the diagnostics and thresholds required by the decision tree. Sets self.coord_named_threshold to "True" if threshold-type coordinates have the name "threshold" (as opposed to the standard name of the diagnostic), for backward compatibility. Args: cubes (iris.cube.CubeList): A CubeList containing the input diagnostic cubes. Returns: dict or None: A dictionary of (keyword) nodes names where the diagnostic data is missing and (values) node associated with diagnostic_missing_action. Raises: IOError: Raises an IOError if any of the required input data is missing. The error includes details of which fields are missing. """ optional_node_data_missing = {} missing_data = [] for key, query in self.queries.items(): diagnostics = get_parameter_names( expand_nested_lists(query, "diagnostic_fields")) thresholds = expand_nested_lists(query, "diagnostic_thresholds") conditions = expand_nested_lists(query, "diagnostic_conditions") for diagnostic, threshold, condition in zip( diagnostics, thresholds, conditions): # First we check the diagnostic name and units, performing # a conversion is required and possible. test_condition = iris.Constraint(name=diagnostic) matched_cube = cubes.extract(test_condition) if not matched_cube: if "diagnostic_missing_action" in query: optional_node_data_missing.update( {key: query[query["diagnostic_missing_action"]]}) else: missing_data.append([diagnostic, threshold, condition]) continue cube_threshold_units = find_threshold_coordinate( matched_cube[0]).units threshold.convert_units(cube_threshold_units) # Then we check if the required threshold is present in the # cube, and that the thresholding is relative to it correctly. threshold = threshold.points.item() threshold_name = find_threshold_coordinate( matched_cube[0]).name() # Check cube and threshold coordinate names match according to # expected convention. If not, add to exception dictionary. if extract_diagnostic_name(diagnostic) != threshold_name: self.threshold_coord_names[diagnostic] = threshold_name # Set flag to check for old threshold coordinate names if threshold_name == "threshold" and not self.coord_named_threshold: self.coord_named_threshold = True # Check threshold == 0.0 if abs(threshold) < self.float_abs_tolerance: coord_constraint = { threshold_name: lambda cell: (-self.float_abs_tolerance < cell < self. float_abs_tolerance) } else: coord_constraint = { threshold_name: lambda cell: (threshold * (1.0 - self.float_tolerance) < cell < threshold * (1.0 + self.float_tolerance)) } test_condition = iris.Constraint( coord_values=coord_constraint, cube_func=lambda cube: (find_threshold_coordinate(cube).attributes[ "spp__relative_to_threshold"] == condition), ) matched_threshold = matched_cube.extract(test_condition) if not matched_threshold: missing_data.append([diagnostic, threshold, condition]) if missing_data: msg = ("Weather Symbols input cubes are missing" " the following required" " input fields:\n") dyn_msg = "name: {}, threshold: {}, " "spp__relative_to_threshold: {}\n" for item in missing_data: msg = msg + dyn_msg.format(*item) raise IOError(msg) if not optional_node_data_missing: optional_node_data_missing = None return optional_node_data_missing