예제 #1
0
 def test_in_vicinity_name_format(self):
     """Test that 'in_vicinity' is added correctly to the name for both
     above and below threshold cases"""
     correct_name_above = "probability_of_X_in_vicinity_above_threshold"
     new_name_above = in_vicinity_name_format(self.cube.name())
     self.cube.rename("probability_of_X_below_threshold")
     correct_name_below = "probability_of_X_in_vicinity_below_threshold"
     new_name_below = in_vicinity_name_format(self.cube.name())
     self.assertEqual(new_name_above, correct_name_above)
     self.assertEqual(new_name_below, correct_name_below)
예제 #2
0
 def test_no_above_below_threshold(self):
     """Test the case of name without above/below_threshold is handled
     correctly"""
     self.cube.rename("probability_of_X")
     correct_name_no_threshold = "probability_of_X_in_vicinity"
     new_name_no_threshold = in_vicinity_name_format(self.cube.name())
     self.assertEqual(new_name_no_threshold, correct_name_no_threshold)
예제 #3
0
 def test_between_thresholds(self):
     """Test for "between_thresholds" suffix"""
     self.cube.rename('probability_of_visibility_between_thresholds')
     correct_name = (
         'probability_of_visibility_in_vicinity_between_thresholds')
     new_name = in_vicinity_name_format(self.cube.name())
     self.assertEqual(new_name, correct_name)
예제 #4
0
 def test_in_vicinity_already_exists(self):
     """Test the case of 'in_vicinity' already existing in the cube name"""
     self.cube.rename("probability_of_X_in_vicinity")
     result = in_vicinity_name_format(self.cube.name())
     self.assertEqual(result, "probability_of_X_in_vicinity")
예제 #5
0
def process(cube: cli.inputcube,
            *,
            threshold_values: cli.comma_separated_list = None,
            threshold_config: cli.inputjson = None,
            threshold_units: str = None,
            comparison_operator='>',
            fuzzy_factor: float = None,
            collapse_coord: str = None,
            vicinity: float = None):
    """Module to apply thresholding to a parameter dataset.

    Calculate the threshold truth values of input data relative to the
    provided threshold value. A fuzzy factor or fuzzy bounds may be provided
    to smooth probabilities where values are close to the threshold.

    Args:
        cube (iris.cube.Cube):
            A cube to be processed.
        threshold_values (list of float):
            Threshold value or values about which to calculate the truth
            values; e.g. 270,300. Must be omitted if 'threshold_config'
            is used.
        threshold_config (dict):
            Threshold configuration containing threshold values and
            (optionally) fuzzy bounds. Best used in combination with
            'threshold_units' It should contain a dictionary of strings that
            can be interpreted as floats with the structure:
            "THRESHOLD_VALUE": [LOWER_BOUND, UPPER_BOUND]
            e.g: {"280.0": [278.0, 282.0], "290.0": [288.0, 292.0]},
            or with structure "THRESHOLD_VALUE": "None" (no fuzzy bounds).
            Repeated thresholds with different bounds are ignored; only the
            last duplicate will be used.
        threshold_units (str):
            Units of the threshold values. If not provided the units are
            assumed to be the same as those of the input cube. Specifying
            the units here will allow a suitable conversion to match
            the input units if possible.
        comparison_operator (str):
            Indicates the comparison_operator to use with the threshold.
            e.g. 'ge' or '>=' to evaluate data >= threshold or '<' to
            evaluate data < threshold. When using fuzzy thresholds, there is
            no difference between < and <= or > and >=.
            Options: > >= < <= gt ge lt le.
        fuzzy_factor (float of None):
            A decimal fraction defining the factor about the threshold value(s)
            which should be treated as fuzzy. Data which fail a test against
            the hard threshold value may return a fractional truth value if
            they fall within this fuzzy factor region.
            Fuzzy factor must be in the range 0-1, with higher values
            indicating a narrower fuzzy factor region / sharper threshold.
            A fuzzy factor cannot be used with a zero threshold or a
            threshold_config file.
        collapse_coord (str):
            An optional ability to set which coordinate we want to collapse
            over.
        vicinity (float):
            Distance in metres used to define the vicinity within which to
            search for an occurrence

    Returns:
        iris.cube.Cube:
            Cube of probabilities relative to the given thresholds

    Raises:
        ValueError: If threshold_config and threshold_values are both set
        ValueError: If threshold_config is used for fuzzy thresholding

     Warns:
        UserWarning: If collapsing coordinates with a masked array

    """
    import warnings
    import numpy as np

    from improver.blending.calculate_weights_and_blend import WeightAndBlend
    from improver.metadata.probabilistic import in_vicinity_name_format
    from improver.threshold import BasicThreshold
    from improver.utilities.spatial import OccurrenceWithinVicinity

    if threshold_config and threshold_values:
        raise ValueError(
            "--threshold-config and --threshold-values are mutually exclusive "
            "- please set one or the other, not both")
    if threshold_config and fuzzy_factor:
        raise ValueError(
            "--threshold-config cannot be used for fuzzy thresholding")

    if threshold_config:
        thresholds = []
        fuzzy_bounds = []
        for key in threshold_config.keys():
            thresholds.append(np.float32(key))
            # If the first threshold has no bounds, fuzzy_bounds is
            # set to None and subsequent bounds checks are skipped
            if threshold_config[key] == "None":
                fuzzy_bounds = None
                continue
            fuzzy_bounds.append(tuple(threshold_config[key]))
    else:
        thresholds = [np.float32(x) for x in threshold_values]
        fuzzy_bounds = None

    result_no_collapse_coord = BasicThreshold(
        thresholds,
        fuzzy_factor=fuzzy_factor,
        fuzzy_bounds=fuzzy_bounds,
        threshold_units=threshold_units,
        comparison_operator=comparison_operator)(cube)

    if vicinity is not None:
        # smooth thresholded occurrences over local vicinity
        result_no_collapse_coord = OccurrenceWithinVicinity(vicinity)(
            result_no_collapse_coord)
        new_cube_name = in_vicinity_name_format(
            result_no_collapse_coord.name())
        result_no_collapse_coord.rename(new_cube_name)

    if collapse_coord is None:
        return result_no_collapse_coord

    # Raise warning if result_no_collapse_coord is masked array
    if np.ma.isMaskedArray(result_no_collapse_coord.data):
        warnings.warn("Collapse-coord option not fully tested with "
                      "masked data.")
    # Take a weighted mean across realizations with equal weights
    plugin = WeightAndBlend(collapse_coord, "linear", y0val=1.0, ynval=1.0)

    return plugin(result_no_collapse_coord)
예제 #6
0
    def process(self, cube: Cube) -> Cube:
        """
        Produces the vicinity processed data. The input data is sliced to
        yield y-x slices to which the maximum_within_vicinity method is applied.
        The different vicinity radii (if multiple) are looped over and a
        coordinate recording the radius used is added to each resulting cube.
        A single cube is returned with the leading coordinates of the input cube
        preserved. If a single vicinity radius is provided, a new scalar
        radius_of_vicinity coordinate will be found on the returned cube. If
        multiple radii are provided, this coordinate will be a dimension
        coordinate following any probabilistic / realization coordinates.

        Args:
            cube:
                Thresholded cube.

        Returns:
            Cube containing the occurrences within a vicinity for each radius,
            calculated for each yx slice, which have been merged to yield a
            single cube.

        Raises:
            ValueError: Cube and land mask have differing spatial coordinates.
        """
        if self.land_mask_cube and not spatial_coords_match(
            [cube, self.land_mask_cube]):
            raise ValueError(
                "Supplied cube do not have the same spatial coordinates and land mask"
            )

        if not self.native_grid_point_radius:
            grid_point_radii = [
                distance_to_number_of_grid_cells(cube, radius)
                for radius in self.radii
            ]
        else:
            grid_point_radii = self.radii

        radii_cubes = CubeList()

        # List of non-spatial dimensions to restore as leading on the output.
        leading_dimensions = [
            crd.name() for crd in cube.coords(dim_coords=True)
            if not crd.coord_system
        ]

        for radius, grid_point_radius in zip(self.radii, grid_point_radii):
            max_cubes = CubeList([])
            for cube_slice in cube.slices(
                [cube.coord(axis="y"),
                 cube.coord(axis="x")]):
                max_cubes.append(
                    self.maximum_within_vicinity(cube_slice,
                                                 grid_point_radius))
            result_cube = max_cubes.merge_cube()

            # Put dimensions back if they were there before.
            result_cube = check_cube_coordinates(cube, result_cube)

            # Add a coordinate recording the vicinity radius applied to the data.
            self._add_vicinity_coordinate(result_cube, radius)

            radii_cubes.append(result_cube)

        # Merge cubes produced for each vicinity radius.
        result_cube = radii_cubes.merge_cube()

        # Enforce order of leading dimensions on the output to match the input.
        enforce_coordinate_ordering(result_cube, leading_dimensions)

        if is_probability(result_cube):
            result_cube.rename(in_vicinity_name_format(result_cube.name()))
        else:
            result_cube.rename(f"{result_cube.name()}_in_vicinity")

        return result_cube
예제 #7
0
def process(
    cube: cli.inputcube,
    land_sea_mask: cli.inputcube = None,
    *,
    threshold_values: cli.comma_separated_list = None,
    threshold_config: cli.inputjson = None,
    threshold_units: str = None,
    comparison_operator=">",
    fuzzy_factor: float = None,
    collapse_coord: str = None,
    vicinity: float = None,
):
    """Module to apply thresholding to a parameter dataset.

    Calculate the threshold truth values of input data relative to the
    provided threshold value. A fuzzy factor or fuzzy bounds may be provided
    to smooth probabilities where values are close to the threshold.

    Args:
        cube (iris.cube.Cube):
            A cube to be processed.
        threshold_values (list of float):
            Threshold value or values about which to calculate the truth
            values; e.g. 270,300. Must be omitted if 'threshold_config'
            is used.
        threshold_config (dict):
            Threshold configuration containing threshold values and
            (optionally) fuzzy bounds. Best used in combination with
            'threshold_units' It should contain a dictionary of strings that
            can be interpreted as floats with the structure:
            "THRESHOLD_VALUE": [LOWER_BOUND, UPPER_BOUND]
            e.g: {"280.0": [278.0, 282.0], "290.0": [288.0, 292.0]},
            or with structure "THRESHOLD_VALUE": "None" (no fuzzy bounds).
            Repeated thresholds with different bounds are ignored; only the
            last duplicate will be used.
        threshold_units (str):
            Units of the threshold values. If not provided the units are
            assumed to be the same as those of the input cube. Specifying
            the units here will allow a suitable conversion to match
            the input units if possible.
        comparison_operator (str):
            Indicates the comparison_operator to use with the threshold.
            e.g. 'ge' or '>=' to evaluate data >= threshold or '<' to
            evaluate data < threshold. When using fuzzy thresholds, there is
            no difference between < and <= or > and >=.
            Options: > >= < <= gt ge lt le.
        fuzzy_factor (float of None):
            A decimal fraction defining the factor about the threshold value(s)
            which should be treated as fuzzy. Data which fail a test against
            the hard threshold value may return a fractional truth value if
            they fall within this fuzzy factor region.
            Fuzzy factor must be in the range 0-1, with higher values
            indicating a narrower fuzzy factor region / sharper threshold.
            A fuzzy factor cannot be used with a zero threshold or a
            threshold_config file.
        collapse_coord (str):
            An optional ability to set which coordinate we want to collapse
            over.
        vicinity (float):
            Distance in metres used to define the vicinity within which to
            search for an occurrence
        land_sea_mask (Cube):
            Binary land-sea mask data. True for land-points, False for sea.
            Restricts in-vicinity processing to only include points of a
            like mask value.

    Returns:
        iris.cube.Cube:
            Cube of probabilities relative to the given thresholds

    Raises:
        ValueError: If threshold_config and threshold_values are both set
        ValueError: If threshold_config is used for fuzzy thresholding
    """
    from improver.metadata.probabilistic import in_vicinity_name_format
    from improver.threshold import BasicThreshold
    from improver.utilities.cube_manipulation import collapse_realizations
    from improver.utilities.spatial import OccurrenceWithinVicinity

    if threshold_config and threshold_values:
        raise ValueError(
            "--threshold-config and --threshold-values are mutually exclusive "
            "- please set one or the other, not both"
        )
    if threshold_config and fuzzy_factor:
        raise ValueError("--threshold-config cannot be used for fuzzy thresholding")

    if threshold_config:
        thresholds = []
        fuzzy_bounds = []
        for key in threshold_config.keys():
            # Ensure thresholds are float64 to avoid rounding errors during
            # possible unit conversion.
            thresholds.append(float(key))
            # If the first threshold has no bounds, fuzzy_bounds is
            # set to None and subsequent bounds checks are skipped
            if threshold_config[key] == "None":
                fuzzy_bounds = None
                continue
            fuzzy_bounds.append(tuple(threshold_config[key]))
    else:
        # Ensure thresholds are float64 to avoid rounding errors during possible
        # unit conversion.
        thresholds = [float(x) for x in threshold_values]
        fuzzy_bounds = None

    each_threshold_func_list = []

    if vicinity is not None:
        # smooth thresholded occurrences over local vicinity
        each_threshold_func_list.append(
            OccurrenceWithinVicinity(vicinity, land_mask_cube=land_sea_mask)
        )
    elif land_sea_mask:
        raise ValueError("Cannot apply land-mask cube without in-vicinity processing")

    if collapse_coord == "realization":
        # TODO change collapse_coord argument to boolean "collapse_realizations"
        # (requires suite change)
        each_threshold_func_list.append(collapse_realizations)
    elif collapse_coord is not None:
        raise ValueError("Cannot collapse over non-realization coordinate")

    result = BasicThreshold(
        thresholds,
        fuzzy_factor=fuzzy_factor,
        fuzzy_bounds=fuzzy_bounds,
        threshold_units=threshold_units,
        comparison_operator=comparison_operator,
        each_threshold_func=each_threshold_func_list,
    )(cube)

    if vicinity is not None:
        result.rename(in_vicinity_name_format(result.name()))

    return result
예제 #8
0
def process(cube,
            threshold_values=None,
            threshold_dict=None,
            threshold_units=None,
            comparison_operator='>',
            fuzzy_factor=None,
            collapse_coord="None",
            vicinity=None):
    """Module to apply thresholding to a parameter dataset.

    Calculate the threshold truth values of input data relative to the
    provided threshold value. By default data are tested to be above the
    threshold, though the below_threshold boolean enables testing below
    thresholds.
    A fuzzy factor or fuzzy bounds may be provided to capture data that is
    close to the threshold.

    Args:
        cube (iris.cube.Cube):
             A cube to be processed.
        threshold_values (float):
            Threshold value or values about which to calculate the truth
            values; e.g. 270 300. Must be omitted if 'threshold_config'
            is used.
            Default is None.
        threshold_dict (dict):
            Threshold configuration containing threshold values and
            (optionally) fuzzy bounds. Best used in combination with
            'threshold_units' It should contain a dictionary of strings that
            can be interpreted as floats with the structure:
            "THRESHOLD_VALUE": [LOWER_BOUND, UPPER_BOUND]
            e.g: {"280.0": [278.0, 282.0], "290.0": [288.0, 292.0]},
            or with structure
            "THRESHOLD_VALUE": "None" (no fuzzy bounds).
            Repeated thresholds with different bounds are not
            handled well. Only the last duplicate will be used.
            Default is None.
        threshold_units (str):
            Units of the threshold values. If not provided the units are
            assumed to be the same as those of the input cube. Specifying
            the units here will allow a suitable conversion to match
            the input units if possible.
        comparison_operator (str):
            Indicates the comparison_operator to use with the threshold.
            e.g. 'ge' or '>=' to evaluate data >= threshold or '<' to
            evaluate data < threshold. When using fuzzy thresholds, there is
            no difference between < and <= or > and >=.
            Default is >. Valid choices: > >= < <= gt ge lt le.
        fuzzy_factor (float):
            A decimal fraction defining the factor about the threshold value(s)
            which should be treated as fuzzy. Data which fail a test against
            the hard threshold value may return a fractional truth value if
            they fall within this fuzzy factor region.
            Fuzzy factor must be in the range 0-1, with higher values
            indicating a narrower fuzzy factor region / sharper threshold.
            N.B. A fuzzy factor cannot be used with a zero threshold or a
            threshold_dict.
        collapse_coord (str):
            An optional ability to set which coordinate we want to collapse
            over. The default is set to None.
        vicinity (float):
            If True, distance in metres used to define the vicinity within
            which to search for an occurrence.

    Returns:
        iris.cube.Cube:
            processed Cube.

    Raises:
        RuntimeError:
            If threshold_dict and threshold_values are both used.

     Warns:
        warning:
            If collapsing coordinates with a masked array.

    """
    if threshold_dict and threshold_values:
        raise RuntimeError('threshold_dict cannot be used '
                           'with threshold_values')
    if threshold_dict:
        try:
            thresholds = []
            fuzzy_bounds = []
            is_fuzzy = True
            for key in threshold_dict.keys():
                thresholds.append(float(key))
                if is_fuzzy:
                    # If the first threshold has no bounds, fuzzy_bounds is
                    # set to None and subsequent bounds checks are skipped
                    if threshold_dict[key] == "None":
                        is_fuzzy = False
                        fuzzy_bounds = None
                    else:
                        fuzzy_bounds.append(tuple(threshold_dict[key]))
        except ValueError as err:
            # Extend error message with hint for common JSON error.
            raise type(err)(
                "{} in threshold dictionary file. \nHINT: Try adding a zero "
                "after the decimal point.".format(err))
        except Exception as err:
            # Extend any errors with message about WHERE this occurred.
            raise type(err)("{} in dictionary file.".format(err))
    else:
        thresholds = threshold_values
        fuzzy_bounds = None

    result_no_collapse_coord = BasicThreshold(
        thresholds,
        fuzzy_factor=fuzzy_factor,
        fuzzy_bounds=fuzzy_bounds,
        threshold_units=threshold_units,
        comparison_operator=comparison_operator).process(cube)

    if vicinity is not None:
        # smooth thresholded occurrences over local vicinity
        result_no_collapse_coord = OccurrenceWithinVicinity(vicinity).process(
            result_no_collapse_coord)
        new_cube_name = in_vicinity_name_format(
            result_no_collapse_coord.name())
        result_no_collapse_coord.rename(new_cube_name)

    if collapse_coord == "None":
        result = result_no_collapse_coord
    else:
        # Raise warning if result_no_collapse_coord is masked array
        if np.ma.isMaskedArray(result_no_collapse_coord.data):
            warnings.warn("Collapse-coord option not fully tested with "
                          "masked data.")
        # Take a weighted mean across realizations with equal weights
        plugin = WeightAndBlend(collapse_coord, "linear", y0val=1.0, ynval=1.0)
        result_collapse_coord = plugin.process(result_no_collapse_coord)
        result = result_collapse_coord
    return result