Exemple #1
0
def process(cube: cli.inputcube,
            *,
            threshold_values: cli.comma_separated_list = None,
            threshold_config: cli.inputjson = None,
            threshold_units: str = None,
            comparison_operator='>',
            fuzzy_factor: float = None,
            collapse_coord: str = None,
            vicinity: float = None):
    """Module to apply thresholding to a parameter dataset.

    Calculate the threshold truth values of input data relative to the
    provided threshold value. A fuzzy factor or fuzzy bounds may be provided
    to smooth probabilities where values are close to the threshold.

    Args:
        cube (iris.cube.Cube):
            A cube to be processed.
        threshold_values (list of float):
            Threshold value or values about which to calculate the truth
            values; e.g. 270,300. Must be omitted if 'threshold_config'
            is used.
        threshold_config (dict):
            Threshold configuration containing threshold values and
            (optionally) fuzzy bounds. Best used in combination with
            'threshold_units' It should contain a dictionary of strings that
            can be interpreted as floats with the structure:
            "THRESHOLD_VALUE": [LOWER_BOUND, UPPER_BOUND]
            e.g: {"280.0": [278.0, 282.0], "290.0": [288.0, 292.0]},
            or with structure "THRESHOLD_VALUE": "None" (no fuzzy bounds).
            Repeated thresholds with different bounds are ignored; only the
            last duplicate will be used.
        threshold_units (str):
            Units of the threshold values. If not provided the units are
            assumed to be the same as those of the input cube. Specifying
            the units here will allow a suitable conversion to match
            the input units if possible.
        comparison_operator (str):
            Indicates the comparison_operator to use with the threshold.
            e.g. 'ge' or '>=' to evaluate data >= threshold or '<' to
            evaluate data < threshold. When using fuzzy thresholds, there is
            no difference between < and <= or > and >=.
            Options: > >= < <= gt ge lt le.
        fuzzy_factor (float of None):
            A decimal fraction defining the factor about the threshold value(s)
            which should be treated as fuzzy. Data which fail a test against
            the hard threshold value may return a fractional truth value if
            they fall within this fuzzy factor region.
            Fuzzy factor must be in the range 0-1, with higher values
            indicating a narrower fuzzy factor region / sharper threshold.
            A fuzzy factor cannot be used with a zero threshold or a
            threshold_config file.
        collapse_coord (str):
            An optional ability to set which coordinate we want to collapse
            over.
        vicinity (float):
            Distance in metres used to define the vicinity within which to
            search for an occurrence

    Returns:
        iris.cube.Cube:
            Cube of probabilities relative to the given thresholds

    Raises:
        ValueError: If threshold_config and threshold_values are both set
        ValueError: If threshold_config is used for fuzzy thresholding

     Warns:
        UserWarning: If collapsing coordinates with a masked array

    """
    import warnings
    import numpy as np

    from improver.blending.calculate_weights_and_blend import WeightAndBlend
    from improver.metadata.probabilistic import in_vicinity_name_format
    from improver.threshold import BasicThreshold
    from improver.utilities.spatial import OccurrenceWithinVicinity

    if threshold_config and threshold_values:
        raise ValueError(
            "--threshold-config and --threshold-values are mutually exclusive "
            "- please set one or the other, not both")
    if threshold_config and fuzzy_factor:
        raise ValueError(
            "--threshold-config cannot be used for fuzzy thresholding")

    if threshold_config:
        thresholds = []
        fuzzy_bounds = []
        for key in threshold_config.keys():
            thresholds.append(np.float32(key))
            # If the first threshold has no bounds, fuzzy_bounds is
            # set to None and subsequent bounds checks are skipped
            if threshold_config[key] == "None":
                fuzzy_bounds = None
                continue
            fuzzy_bounds.append(tuple(threshold_config[key]))
    else:
        thresholds = [np.float32(x) for x in threshold_values]
        fuzzy_bounds = None

    result_no_collapse_coord = BasicThreshold(
        thresholds,
        fuzzy_factor=fuzzy_factor,
        fuzzy_bounds=fuzzy_bounds,
        threshold_units=threshold_units,
        comparison_operator=comparison_operator)(cube)

    if vicinity is not None:
        # smooth thresholded occurrences over local vicinity
        result_no_collapse_coord = OccurrenceWithinVicinity(vicinity)(
            result_no_collapse_coord)
        new_cube_name = in_vicinity_name_format(
            result_no_collapse_coord.name())
        result_no_collapse_coord.rename(new_cube_name)

    if collapse_coord is None:
        return result_no_collapse_coord

    # Raise warning if result_no_collapse_coord is masked array
    if np.ma.isMaskedArray(result_no_collapse_coord.data):
        warnings.warn("Collapse-coord option not fully tested with "
                      "masked data.")
    # Take a weighted mean across realizations with equal weights
    plugin = WeightAndBlend(collapse_coord, "linear", y0val=1.0, ynval=1.0)

    return plugin(result_no_collapse_coord)
Exemple #2
0
def process(
    mandatory_attributes_json: cli.inputjson,
    *,
    name="air_pressure_at_sea_level",
    units=None,
    spatial_grid="latlon",
    time_period: int = None,
    json_input: cli.inputjson = None,
    ensemble_members: int = 8,
    grid_spacing: float = None,
    domain_corner: cli.comma_separated_list_of_float = None,
    npoints: int = 71,
):
    """ Generate a cube with metadata only.

    Args:
        mandatory_attributes_json (Dict):
            Specifies the values of the mandatory attributes, title, institution and
            source.
        name (Optional[str]):
            Output variable name, or if creating a probability cube the name of the
            underlying variable to which the probability field applies.
        units (Optional[str]):
            Output variable units, or if creating a probability cube the units of the
            underlying variable / threshold.
        spatial_grid (Optional[str]):
            What type of x/y coordinate values to use.  Permitted values are
            "latlon" or "equalarea".
        time_period (Optional[int]):
            The period in minutes between the time bounds. This is used to calculate
            the lower time bound. If unset the diagnostic will be instantaneous, i.e.
            without time bounds.
        json_input (Optional[Dict]):
            Dictionary containing values for one or more of: "name", "units", "time",
            "time_bounds", "frt", "spp__relative_to_threshold", "attributes"
            (dictionary of additional metadata attributes) and "coords" (dictionary).
            "coords" can contain "height_levels" (list of height/pressure level values),
            and one of "realizations", "percentiles" or "thresholds" (list of dimension
            values).
        ensemble_members (Optional[int]):
            Number of ensemble members. Default 8. Will not be used if "realizations",
            "percentiles" or "thresholds" provided in json_input.
        grid_spacing (Optional[float]):
            Resolution of grid (metres or degrees).
        domain_corner (Optional[Tuple[float, float]]):
            Bottom left corner of grid domain (y,x) (degrees for latlon or metres for
            equalarea).
        npoints (Optional[int]):
            Number of points along each of the y and x spatial axes.

    Returns:
        iris.cube.Cube:
            Output of generate_metadata()
    """
    # Set arguments to pass to generate_metadata function and remove json_input for
    # processing contents before adding
    generate_metadata_args = locals()
    for key in ["mandatory_attributes_json", "json_input"]:
        generate_metadata_args.pop(key, None)

    from improver.synthetic_data.generate_metadata import generate_metadata
    from improver.synthetic_data.utilities import (
        get_height_levels,
        get_leading_dimension,
    )
    from improver.utilities.temporal import cycletime_to_datetime

    if json_input is not None:
        # Get leading dimension and height/pressure data from json_input
        if "coords" in json_input:
            coord_data = json_input["coords"]

            (
                json_input["leading_dimension"],
                json_input["cube_type"],
            ) = get_leading_dimension(coord_data)
            json_input["height_levels"], json_input["pressure"] = get_height_levels(
                coord_data
            )

            json_input.pop("coords", None)

        # Convert str time, frt and time_bounds to datetime
        if "time" in json_input:
            json_input["time"] = cycletime_to_datetime(json_input["time"])

        if "frt" in json_input:
            json_input["frt"] = cycletime_to_datetime(json_input["frt"])

        if "time_bounds" in json_input:
            time_bounds = []
            for tb in json_input["time_bounds"]:
                time_bounds.append(cycletime_to_datetime(tb))
            json_input["time_bounds"] = time_bounds

        # Update generate_metadata_args with the json_input data
        generate_metadata_args.update(json_input)
    return generate_metadata(mandatory_attributes_json, **generate_metadata_args)
Exemple #3
0
def process(
    cube: cli.inputcube,
    land_sea_mask: cli.inputcube = None,
    *,
    threshold_values: cli.comma_separated_list = None,
    threshold_config: cli.inputjson = None,
    threshold_units: str = None,
    comparison_operator=">",
    fuzzy_factor: float = None,
    collapse_coord: str = None,
    vicinity: float = None,
):
    """Module to apply thresholding to a parameter dataset.

    Calculate the threshold truth values of input data relative to the
    provided threshold value. A fuzzy factor or fuzzy bounds may be provided
    to smooth probabilities where values are close to the threshold.

    Args:
        cube (iris.cube.Cube):
            A cube to be processed.
        threshold_values (list of float):
            Threshold value or values about which to calculate the truth
            values; e.g. 270,300. Must be omitted if 'threshold_config'
            is used.
        threshold_config (dict):
            Threshold configuration containing threshold values and
            (optionally) fuzzy bounds. Best used in combination with
            'threshold_units' It should contain a dictionary of strings that
            can be interpreted as floats with the structure:
            "THRESHOLD_VALUE": [LOWER_BOUND, UPPER_BOUND]
            e.g: {"280.0": [278.0, 282.0], "290.0": [288.0, 292.0]},
            or with structure "THRESHOLD_VALUE": "None" (no fuzzy bounds).
            Repeated thresholds with different bounds are ignored; only the
            last duplicate will be used.
        threshold_units (str):
            Units of the threshold values. If not provided the units are
            assumed to be the same as those of the input cube. Specifying
            the units here will allow a suitable conversion to match
            the input units if possible.
        comparison_operator (str):
            Indicates the comparison_operator to use with the threshold.
            e.g. 'ge' or '>=' to evaluate data >= threshold or '<' to
            evaluate data < threshold. When using fuzzy thresholds, there is
            no difference between < and <= or > and >=.
            Options: > >= < <= gt ge lt le.
        fuzzy_factor (float of None):
            A decimal fraction defining the factor about the threshold value(s)
            which should be treated as fuzzy. Data which fail a test against
            the hard threshold value may return a fractional truth value if
            they fall within this fuzzy factor region.
            Fuzzy factor must be in the range 0-1, with higher values
            indicating a narrower fuzzy factor region / sharper threshold.
            A fuzzy factor cannot be used with a zero threshold or a
            threshold_config file.
        collapse_coord (str):
            An optional ability to set which coordinate we want to collapse
            over.
        vicinity (float):
            Distance in metres used to define the vicinity within which to
            search for an occurrence
        land_sea_mask (Cube):
            Binary land-sea mask data. True for land-points, False for sea.
            Restricts in-vicinity processing to only include points of a
            like mask value.

    Returns:
        iris.cube.Cube:
            Cube of probabilities relative to the given thresholds

    Raises:
        ValueError: If threshold_config and threshold_values are both set
        ValueError: If threshold_config is used for fuzzy thresholding
    """
    from improver.metadata.probabilistic import in_vicinity_name_format
    from improver.threshold import BasicThreshold
    from improver.utilities.cube_manipulation import collapse_realizations
    from improver.utilities.spatial import OccurrenceWithinVicinity

    if threshold_config and threshold_values:
        raise ValueError(
            "--threshold-config and --threshold-values are mutually exclusive "
            "- please set one or the other, not both"
        )
    if threshold_config and fuzzy_factor:
        raise ValueError("--threshold-config cannot be used for fuzzy thresholding")

    if threshold_config:
        thresholds = []
        fuzzy_bounds = []
        for key in threshold_config.keys():
            # Ensure thresholds are float64 to avoid rounding errors during
            # possible unit conversion.
            thresholds.append(float(key))
            # If the first threshold has no bounds, fuzzy_bounds is
            # set to None and subsequent bounds checks are skipped
            if threshold_config[key] == "None":
                fuzzy_bounds = None
                continue
            fuzzy_bounds.append(tuple(threshold_config[key]))
    else:
        # Ensure thresholds are float64 to avoid rounding errors during possible
        # unit conversion.
        thresholds = [float(x) for x in threshold_values]
        fuzzy_bounds = None

    each_threshold_func_list = []

    if vicinity is not None:
        # smooth thresholded occurrences over local vicinity
        each_threshold_func_list.append(
            OccurrenceWithinVicinity(vicinity, land_mask_cube=land_sea_mask)
        )
    elif land_sea_mask:
        raise ValueError("Cannot apply land-mask cube without in-vicinity processing")

    if collapse_coord == "realization":
        # TODO change collapse_coord argument to boolean "collapse_realizations"
        # (requires suite change)
        each_threshold_func_list.append(collapse_realizations)
    elif collapse_coord is not None:
        raise ValueError("Cannot collapse over non-realization coordinate")

    result = BasicThreshold(
        thresholds,
        fuzzy_factor=fuzzy_factor,
        fuzzy_bounds=fuzzy_bounds,
        threshold_units=threshold_units,
        comparison_operator=comparison_operator,
        each_threshold_func=each_threshold_func_list,
    )(cube)

    if vicinity is not None:
        result.rename(in_vicinity_name_format(result.name()))

    return result