def test_in_vicinity_name_format(self): """Test that 'in_vicinity' is added correctly to the name for both above and below threshold cases""" correct_name_above = "probability_of_X_in_vicinity_above_threshold" new_name_above = in_vicinity_name_format(self.cube.name()) self.cube.rename("probability_of_X_below_threshold") correct_name_below = "probability_of_X_in_vicinity_below_threshold" new_name_below = in_vicinity_name_format(self.cube.name()) self.assertEqual(new_name_above, correct_name_above) self.assertEqual(new_name_below, correct_name_below)
def test_no_above_below_threshold(self): """Test the case of name without above/below_threshold is handled correctly""" self.cube.rename("probability_of_X") correct_name_no_threshold = "probability_of_X_in_vicinity" new_name_no_threshold = in_vicinity_name_format(self.cube.name()) self.assertEqual(new_name_no_threshold, correct_name_no_threshold)
def test_between_thresholds(self): """Test for "between_thresholds" suffix""" self.cube.rename('probability_of_visibility_between_thresholds') correct_name = ( 'probability_of_visibility_in_vicinity_between_thresholds') new_name = in_vicinity_name_format(self.cube.name()) self.assertEqual(new_name, correct_name)
def test_in_vicinity_already_exists(self): """Test the case of 'in_vicinity' already existing in the cube name""" self.cube.rename("probability_of_X_in_vicinity") result = in_vicinity_name_format(self.cube.name()) self.assertEqual(result, "probability_of_X_in_vicinity")
def process(cube: cli.inputcube, *, threshold_values: cli.comma_separated_list = None, threshold_config: cli.inputjson = None, threshold_units: str = None, comparison_operator='>', fuzzy_factor: float = None, collapse_coord: str = None, vicinity: float = None): """Module to apply thresholding to a parameter dataset. Calculate the threshold truth values of input data relative to the provided threshold value. A fuzzy factor or fuzzy bounds may be provided to smooth probabilities where values are close to the threshold. Args: cube (iris.cube.Cube): A cube to be processed. threshold_values (list of float): Threshold value or values about which to calculate the truth values; e.g. 270,300. Must be omitted if 'threshold_config' is used. threshold_config (dict): Threshold configuration containing threshold values and (optionally) fuzzy bounds. Best used in combination with 'threshold_units' It should contain a dictionary of strings that can be interpreted as floats with the structure: "THRESHOLD_VALUE": [LOWER_BOUND, UPPER_BOUND] e.g: {"280.0": [278.0, 282.0], "290.0": [288.0, 292.0]}, or with structure "THRESHOLD_VALUE": "None" (no fuzzy bounds). Repeated thresholds with different bounds are ignored; only the last duplicate will be used. threshold_units (str): Units of the threshold values. If not provided the units are assumed to be the same as those of the input cube. Specifying the units here will allow a suitable conversion to match the input units if possible. comparison_operator (str): Indicates the comparison_operator to use with the threshold. e.g. 'ge' or '>=' to evaluate data >= threshold or '<' to evaluate data < threshold. When using fuzzy thresholds, there is no difference between < and <= or > and >=. Options: > >= < <= gt ge lt le. fuzzy_factor (float of None): A decimal fraction defining the factor about the threshold value(s) which should be treated as fuzzy. Data which fail a test against the hard threshold value may return a fractional truth value if they fall within this fuzzy factor region. Fuzzy factor must be in the range 0-1, with higher values indicating a narrower fuzzy factor region / sharper threshold. A fuzzy factor cannot be used with a zero threshold or a threshold_config file. collapse_coord (str): An optional ability to set which coordinate we want to collapse over. vicinity (float): Distance in metres used to define the vicinity within which to search for an occurrence Returns: iris.cube.Cube: Cube of probabilities relative to the given thresholds Raises: ValueError: If threshold_config and threshold_values are both set ValueError: If threshold_config is used for fuzzy thresholding Warns: UserWarning: If collapsing coordinates with a masked array """ import warnings import numpy as np from improver.blending.calculate_weights_and_blend import WeightAndBlend from improver.metadata.probabilistic import in_vicinity_name_format from improver.threshold import BasicThreshold from improver.utilities.spatial import OccurrenceWithinVicinity if threshold_config and threshold_values: raise ValueError( "--threshold-config and --threshold-values are mutually exclusive " "- please set one or the other, not both") if threshold_config and fuzzy_factor: raise ValueError( "--threshold-config cannot be used for fuzzy thresholding") if threshold_config: thresholds = [] fuzzy_bounds = [] for key in threshold_config.keys(): thresholds.append(np.float32(key)) # If the first threshold has no bounds, fuzzy_bounds is # set to None and subsequent bounds checks are skipped if threshold_config[key] == "None": fuzzy_bounds = None continue fuzzy_bounds.append(tuple(threshold_config[key])) else: thresholds = [np.float32(x) for x in threshold_values] fuzzy_bounds = None result_no_collapse_coord = BasicThreshold( thresholds, fuzzy_factor=fuzzy_factor, fuzzy_bounds=fuzzy_bounds, threshold_units=threshold_units, comparison_operator=comparison_operator)(cube) if vicinity is not None: # smooth thresholded occurrences over local vicinity result_no_collapse_coord = OccurrenceWithinVicinity(vicinity)( result_no_collapse_coord) new_cube_name = in_vicinity_name_format( result_no_collapse_coord.name()) result_no_collapse_coord.rename(new_cube_name) if collapse_coord is None: return result_no_collapse_coord # Raise warning if result_no_collapse_coord is masked array if np.ma.isMaskedArray(result_no_collapse_coord.data): warnings.warn("Collapse-coord option not fully tested with " "masked data.") # Take a weighted mean across realizations with equal weights plugin = WeightAndBlend(collapse_coord, "linear", y0val=1.0, ynval=1.0) return plugin(result_no_collapse_coord)
def process(self, cube: Cube) -> Cube: """ Produces the vicinity processed data. The input data is sliced to yield y-x slices to which the maximum_within_vicinity method is applied. The different vicinity radii (if multiple) are looped over and a coordinate recording the radius used is added to each resulting cube. A single cube is returned with the leading coordinates of the input cube preserved. If a single vicinity radius is provided, a new scalar radius_of_vicinity coordinate will be found on the returned cube. If multiple radii are provided, this coordinate will be a dimension coordinate following any probabilistic / realization coordinates. Args: cube: Thresholded cube. Returns: Cube containing the occurrences within a vicinity for each radius, calculated for each yx slice, which have been merged to yield a single cube. Raises: ValueError: Cube and land mask have differing spatial coordinates. """ if self.land_mask_cube and not spatial_coords_match( [cube, self.land_mask_cube]): raise ValueError( "Supplied cube do not have the same spatial coordinates and land mask" ) if not self.native_grid_point_radius: grid_point_radii = [ distance_to_number_of_grid_cells(cube, radius) for radius in self.radii ] else: grid_point_radii = self.radii radii_cubes = CubeList() # List of non-spatial dimensions to restore as leading on the output. leading_dimensions = [ crd.name() for crd in cube.coords(dim_coords=True) if not crd.coord_system ] for radius, grid_point_radius in zip(self.radii, grid_point_radii): max_cubes = CubeList([]) for cube_slice in cube.slices( [cube.coord(axis="y"), cube.coord(axis="x")]): max_cubes.append( self.maximum_within_vicinity(cube_slice, grid_point_radius)) result_cube = max_cubes.merge_cube() # Put dimensions back if they were there before. result_cube = check_cube_coordinates(cube, result_cube) # Add a coordinate recording the vicinity radius applied to the data. self._add_vicinity_coordinate(result_cube, radius) radii_cubes.append(result_cube) # Merge cubes produced for each vicinity radius. result_cube = radii_cubes.merge_cube() # Enforce order of leading dimensions on the output to match the input. enforce_coordinate_ordering(result_cube, leading_dimensions) if is_probability(result_cube): result_cube.rename(in_vicinity_name_format(result_cube.name())) else: result_cube.rename(f"{result_cube.name()}_in_vicinity") return result_cube
def process( cube: cli.inputcube, land_sea_mask: cli.inputcube = None, *, threshold_values: cli.comma_separated_list = None, threshold_config: cli.inputjson = None, threshold_units: str = None, comparison_operator=">", fuzzy_factor: float = None, collapse_coord: str = None, vicinity: float = None, ): """Module to apply thresholding to a parameter dataset. Calculate the threshold truth values of input data relative to the provided threshold value. A fuzzy factor or fuzzy bounds may be provided to smooth probabilities where values are close to the threshold. Args: cube (iris.cube.Cube): A cube to be processed. threshold_values (list of float): Threshold value or values about which to calculate the truth values; e.g. 270,300. Must be omitted if 'threshold_config' is used. threshold_config (dict): Threshold configuration containing threshold values and (optionally) fuzzy bounds. Best used in combination with 'threshold_units' It should contain a dictionary of strings that can be interpreted as floats with the structure: "THRESHOLD_VALUE": [LOWER_BOUND, UPPER_BOUND] e.g: {"280.0": [278.0, 282.0], "290.0": [288.0, 292.0]}, or with structure "THRESHOLD_VALUE": "None" (no fuzzy bounds). Repeated thresholds with different bounds are ignored; only the last duplicate will be used. threshold_units (str): Units of the threshold values. If not provided the units are assumed to be the same as those of the input cube. Specifying the units here will allow a suitable conversion to match the input units if possible. comparison_operator (str): Indicates the comparison_operator to use with the threshold. e.g. 'ge' or '>=' to evaluate data >= threshold or '<' to evaluate data < threshold. When using fuzzy thresholds, there is no difference between < and <= or > and >=. Options: > >= < <= gt ge lt le. fuzzy_factor (float of None): A decimal fraction defining the factor about the threshold value(s) which should be treated as fuzzy. Data which fail a test against the hard threshold value may return a fractional truth value if they fall within this fuzzy factor region. Fuzzy factor must be in the range 0-1, with higher values indicating a narrower fuzzy factor region / sharper threshold. A fuzzy factor cannot be used with a zero threshold or a threshold_config file. collapse_coord (str): An optional ability to set which coordinate we want to collapse over. vicinity (float): Distance in metres used to define the vicinity within which to search for an occurrence land_sea_mask (Cube): Binary land-sea mask data. True for land-points, False for sea. Restricts in-vicinity processing to only include points of a like mask value. Returns: iris.cube.Cube: Cube of probabilities relative to the given thresholds Raises: ValueError: If threshold_config and threshold_values are both set ValueError: If threshold_config is used for fuzzy thresholding """ from improver.metadata.probabilistic import in_vicinity_name_format from improver.threshold import BasicThreshold from improver.utilities.cube_manipulation import collapse_realizations from improver.utilities.spatial import OccurrenceWithinVicinity if threshold_config and threshold_values: raise ValueError( "--threshold-config and --threshold-values are mutually exclusive " "- please set one or the other, not both" ) if threshold_config and fuzzy_factor: raise ValueError("--threshold-config cannot be used for fuzzy thresholding") if threshold_config: thresholds = [] fuzzy_bounds = [] for key in threshold_config.keys(): # Ensure thresholds are float64 to avoid rounding errors during # possible unit conversion. thresholds.append(float(key)) # If the first threshold has no bounds, fuzzy_bounds is # set to None and subsequent bounds checks are skipped if threshold_config[key] == "None": fuzzy_bounds = None continue fuzzy_bounds.append(tuple(threshold_config[key])) else: # Ensure thresholds are float64 to avoid rounding errors during possible # unit conversion. thresholds = [float(x) for x in threshold_values] fuzzy_bounds = None each_threshold_func_list = [] if vicinity is not None: # smooth thresholded occurrences over local vicinity each_threshold_func_list.append( OccurrenceWithinVicinity(vicinity, land_mask_cube=land_sea_mask) ) elif land_sea_mask: raise ValueError("Cannot apply land-mask cube without in-vicinity processing") if collapse_coord == "realization": # TODO change collapse_coord argument to boolean "collapse_realizations" # (requires suite change) each_threshold_func_list.append(collapse_realizations) elif collapse_coord is not None: raise ValueError("Cannot collapse over non-realization coordinate") result = BasicThreshold( thresholds, fuzzy_factor=fuzzy_factor, fuzzy_bounds=fuzzy_bounds, threshold_units=threshold_units, comparison_operator=comparison_operator, each_threshold_func=each_threshold_func_list, )(cube) if vicinity is not None: result.rename(in_vicinity_name_format(result.name())) return result
def process(cube, threshold_values=None, threshold_dict=None, threshold_units=None, comparison_operator='>', fuzzy_factor=None, collapse_coord="None", vicinity=None): """Module to apply thresholding to a parameter dataset. Calculate the threshold truth values of input data relative to the provided threshold value. By default data are tested to be above the threshold, though the below_threshold boolean enables testing below thresholds. A fuzzy factor or fuzzy bounds may be provided to capture data that is close to the threshold. Args: cube (iris.cube.Cube): A cube to be processed. threshold_values (float): Threshold value or values about which to calculate the truth values; e.g. 270 300. Must be omitted if 'threshold_config' is used. Default is None. threshold_dict (dict): Threshold configuration containing threshold values and (optionally) fuzzy bounds. Best used in combination with 'threshold_units' It should contain a dictionary of strings that can be interpreted as floats with the structure: "THRESHOLD_VALUE": [LOWER_BOUND, UPPER_BOUND] e.g: {"280.0": [278.0, 282.0], "290.0": [288.0, 292.0]}, or with structure "THRESHOLD_VALUE": "None" (no fuzzy bounds). Repeated thresholds with different bounds are not handled well. Only the last duplicate will be used. Default is None. threshold_units (str): Units of the threshold values. If not provided the units are assumed to be the same as those of the input cube. Specifying the units here will allow a suitable conversion to match the input units if possible. comparison_operator (str): Indicates the comparison_operator to use with the threshold. e.g. 'ge' or '>=' to evaluate data >= threshold or '<' to evaluate data < threshold. When using fuzzy thresholds, there is no difference between < and <= or > and >=. Default is >. Valid choices: > >= < <= gt ge lt le. fuzzy_factor (float): A decimal fraction defining the factor about the threshold value(s) which should be treated as fuzzy. Data which fail a test against the hard threshold value may return a fractional truth value if they fall within this fuzzy factor region. Fuzzy factor must be in the range 0-1, with higher values indicating a narrower fuzzy factor region / sharper threshold. N.B. A fuzzy factor cannot be used with a zero threshold or a threshold_dict. collapse_coord (str): An optional ability to set which coordinate we want to collapse over. The default is set to None. vicinity (float): If True, distance in metres used to define the vicinity within which to search for an occurrence. Returns: iris.cube.Cube: processed Cube. Raises: RuntimeError: If threshold_dict and threshold_values are both used. Warns: warning: If collapsing coordinates with a masked array. """ if threshold_dict and threshold_values: raise RuntimeError('threshold_dict cannot be used ' 'with threshold_values') if threshold_dict: try: thresholds = [] fuzzy_bounds = [] is_fuzzy = True for key in threshold_dict.keys(): thresholds.append(float(key)) if is_fuzzy: # If the first threshold has no bounds, fuzzy_bounds is # set to None and subsequent bounds checks are skipped if threshold_dict[key] == "None": is_fuzzy = False fuzzy_bounds = None else: fuzzy_bounds.append(tuple(threshold_dict[key])) except ValueError as err: # Extend error message with hint for common JSON error. raise type(err)( "{} in threshold dictionary file. \nHINT: Try adding a zero " "after the decimal point.".format(err)) except Exception as err: # Extend any errors with message about WHERE this occurred. raise type(err)("{} in dictionary file.".format(err)) else: thresholds = threshold_values fuzzy_bounds = None result_no_collapse_coord = BasicThreshold( thresholds, fuzzy_factor=fuzzy_factor, fuzzy_bounds=fuzzy_bounds, threshold_units=threshold_units, comparison_operator=comparison_operator).process(cube) if vicinity is not None: # smooth thresholded occurrences over local vicinity result_no_collapse_coord = OccurrenceWithinVicinity(vicinity).process( result_no_collapse_coord) new_cube_name = in_vicinity_name_format( result_no_collapse_coord.name()) result_no_collapse_coord.rename(new_cube_name) if collapse_coord == "None": result = result_no_collapse_coord else: # Raise warning if result_no_collapse_coord is masked array if np.ma.isMaskedArray(result_no_collapse_coord.data): warnings.warn("Collapse-coord option not fully tested with " "masked data.") # Take a weighted mean across realizations with equal weights plugin = WeightAndBlend(collapse_coord, "linear", y0val=1.0, ynval=1.0) result_collapse_coord = plugin.process(result_no_collapse_coord) result = result_collapse_coord return result