예제 #1
0
def process(cube: cli.inputcube,
            *,
            threshold_values: cli.comma_separated_list = None,
            threshold_config: cli.inputjson = None,
            threshold_units: str = None,
            comparison_operator='>',
            fuzzy_factor: float = None,
            collapse_coord: str = None,
            vicinity: float = None):
    """Module to apply thresholding to a parameter dataset.

    Calculate the threshold truth values of input data relative to the
    provided threshold value. A fuzzy factor or fuzzy bounds may be provided
    to smooth probabilities where values are close to the threshold.

    Args:
        cube (iris.cube.Cube):
            A cube to be processed.
        threshold_values (list of float):
            Threshold value or values about which to calculate the truth
            values; e.g. 270,300. Must be omitted if 'threshold_config'
            is used.
        threshold_config (dict):
            Threshold configuration containing threshold values and
            (optionally) fuzzy bounds. Best used in combination with
            'threshold_units' It should contain a dictionary of strings that
            can be interpreted as floats with the structure:
            "THRESHOLD_VALUE": [LOWER_BOUND, UPPER_BOUND]
            e.g: {"280.0": [278.0, 282.0], "290.0": [288.0, 292.0]},
            or with structure "THRESHOLD_VALUE": "None" (no fuzzy bounds).
            Repeated thresholds with different bounds are ignored; only the
            last duplicate will be used.
        threshold_units (str):
            Units of the threshold values. If not provided the units are
            assumed to be the same as those of the input cube. Specifying
            the units here will allow a suitable conversion to match
            the input units if possible.
        comparison_operator (str):
            Indicates the comparison_operator to use with the threshold.
            e.g. 'ge' or '>=' to evaluate data >= threshold or '<' to
            evaluate data < threshold. When using fuzzy thresholds, there is
            no difference between < and <= or > and >=.
            Options: > >= < <= gt ge lt le.
        fuzzy_factor (float of None):
            A decimal fraction defining the factor about the threshold value(s)
            which should be treated as fuzzy. Data which fail a test against
            the hard threshold value may return a fractional truth value if
            they fall within this fuzzy factor region.
            Fuzzy factor must be in the range 0-1, with higher values
            indicating a narrower fuzzy factor region / sharper threshold.
            A fuzzy factor cannot be used with a zero threshold or a
            threshold_config file.
        collapse_coord (str):
            An optional ability to set which coordinate we want to collapse
            over.
        vicinity (float):
            Distance in metres used to define the vicinity within which to
            search for an occurrence

    Returns:
        iris.cube.Cube:
            Cube of probabilities relative to the given thresholds

    Raises:
        ValueError: If threshold_config and threshold_values are both set
        ValueError: If threshold_config is used for fuzzy thresholding

     Warns:
        UserWarning: If collapsing coordinates with a masked array

    """
    import warnings
    import numpy as np

    from improver.blending.calculate_weights_and_blend import WeightAndBlend
    from improver.metadata.probabilistic import in_vicinity_name_format
    from improver.threshold import BasicThreshold
    from improver.utilities.spatial import OccurrenceWithinVicinity

    if threshold_config and threshold_values:
        raise ValueError(
            "--threshold-config and --threshold-values are mutually exclusive "
            "- please set one or the other, not both")
    if threshold_config and fuzzy_factor:
        raise ValueError(
            "--threshold-config cannot be used for fuzzy thresholding")

    if threshold_config:
        thresholds = []
        fuzzy_bounds = []
        for key in threshold_config.keys():
            thresholds.append(np.float32(key))
            # If the first threshold has no bounds, fuzzy_bounds is
            # set to None and subsequent bounds checks are skipped
            if threshold_config[key] == "None":
                fuzzy_bounds = None
                continue
            fuzzy_bounds.append(tuple(threshold_config[key]))
    else:
        thresholds = [np.float32(x) for x in threshold_values]
        fuzzy_bounds = None

    result_no_collapse_coord = BasicThreshold(
        thresholds,
        fuzzy_factor=fuzzy_factor,
        fuzzy_bounds=fuzzy_bounds,
        threshold_units=threshold_units,
        comparison_operator=comparison_operator)(cube)

    if vicinity is not None:
        # smooth thresholded occurrences over local vicinity
        result_no_collapse_coord = OccurrenceWithinVicinity(vicinity)(
            result_no_collapse_coord)
        new_cube_name = in_vicinity_name_format(
            result_no_collapse_coord.name())
        result_no_collapse_coord.rename(new_cube_name)

    if collapse_coord is None:
        return result_no_collapse_coord

    # Raise warning if result_no_collapse_coord is masked array
    if np.ma.isMaskedArray(result_no_collapse_coord.data):
        warnings.warn("Collapse-coord option not fully tested with "
                      "masked data.")
    # Take a weighted mean across realizations with equal weights
    plugin = WeightAndBlend(collapse_coord, "linear", y0val=1.0, ynval=1.0)

    return plugin(result_no_collapse_coord)
예제 #2
0
def process(cube,
            threshold_values=None,
            threshold_dict=None,
            threshold_units=None,
            comparison_operator='>',
            fuzzy_factor=None,
            collapse_coord="None",
            vicinity=None):
    """Module to apply thresholding to a parameter dataset.

    Calculate the threshold truth values of input data relative to the
    provided threshold value. By default data are tested to be above the
    threshold, though the below_threshold boolean enables testing below
    thresholds.
    A fuzzy factor or fuzzy bounds may be provided to capture data that is
    close to the threshold.

    Args:
        cube (iris.cube.Cube):
             A cube to be processed.
        threshold_values (float):
            Threshold value or values about which to calculate the truth
            values; e.g. 270 300. Must be omitted if 'threshold_config'
            is used.
            Default is None.
        threshold_dict (dict):
            Threshold configuration containing threshold values and
            (optionally) fuzzy bounds. Best used in combination with
            'threshold_units' It should contain a dictionary of strings that
            can be interpreted as floats with the structure:
            "THRESHOLD_VALUE": [LOWER_BOUND, UPPER_BOUND]
            e.g: {"280.0": [278.0, 282.0], "290.0": [288.0, 292.0]},
            or with structure
            "THRESHOLD_VALUE": "None" (no fuzzy bounds).
            Repeated thresholds with different bounds are not
            handled well. Only the last duplicate will be used.
            Default is None.
        threshold_units (str):
            Units of the threshold values. If not provided the units are
            assumed to be the same as those of the input cube. Specifying
            the units here will allow a suitable conversion to match
            the input units if possible.
        comparison_operator (str):
            Indicates the comparison_operator to use with the threshold.
            e.g. 'ge' or '>=' to evaluate data >= threshold or '<' to
            evaluate data < threshold. When using fuzzy thresholds, there is
            no difference between < and <= or > and >=.
            Default is >. Valid choices: > >= < <= gt ge lt le.
        fuzzy_factor (float):
            A decimal fraction defining the factor about the threshold value(s)
            which should be treated as fuzzy. Data which fail a test against
            the hard threshold value may return a fractional truth value if
            they fall within this fuzzy factor region.
            Fuzzy factor must be in the range 0-1, with higher values
            indicating a narrower fuzzy factor region / sharper threshold.
            N.B. A fuzzy factor cannot be used with a zero threshold or a
            threshold_dict.
        collapse_coord (str):
            An optional ability to set which coordinate we want to collapse
            over. The default is set to None.
        vicinity (float):
            If True, distance in metres used to define the vicinity within
            which to search for an occurrence.

    Returns:
        iris.cube.Cube:
            processed Cube.

    Raises:
        RuntimeError:
            If threshold_dict and threshold_values are both used.

     Warns:
        warning:
            If collapsing coordinates with a masked array.

    """
    if threshold_dict and threshold_values:
        raise RuntimeError('threshold_dict cannot be used '
                           'with threshold_values')
    if threshold_dict:
        try:
            thresholds = []
            fuzzy_bounds = []
            is_fuzzy = True
            for key in threshold_dict.keys():
                thresholds.append(float(key))
                if is_fuzzy:
                    # If the first threshold has no bounds, fuzzy_bounds is
                    # set to None and subsequent bounds checks are skipped
                    if threshold_dict[key] == "None":
                        is_fuzzy = False
                        fuzzy_bounds = None
                    else:
                        fuzzy_bounds.append(tuple(threshold_dict[key]))
        except ValueError as err:
            # Extend error message with hint for common JSON error.
            raise type(err)(
                "{} in threshold dictionary file. \nHINT: Try adding a zero "
                "after the decimal point.".format(err))
        except Exception as err:
            # Extend any errors with message about WHERE this occurred.
            raise type(err)("{} in dictionary file.".format(err))
    else:
        thresholds = threshold_values
        fuzzy_bounds = None

    result_no_collapse_coord = BasicThreshold(
        thresholds,
        fuzzy_factor=fuzzy_factor,
        fuzzy_bounds=fuzzy_bounds,
        threshold_units=threshold_units,
        comparison_operator=comparison_operator).process(cube)

    if vicinity is not None:
        # smooth thresholded occurrences over local vicinity
        result_no_collapse_coord = OccurrenceWithinVicinity(vicinity).process(
            result_no_collapse_coord)
        new_cube_name = in_vicinity_name_format(
            result_no_collapse_coord.name())
        result_no_collapse_coord.rename(new_cube_name)

    if collapse_coord == "None":
        result = result_no_collapse_coord
    else:
        # Raise warning if result_no_collapse_coord is masked array
        if np.ma.isMaskedArray(result_no_collapse_coord.data):
            warnings.warn("Collapse-coord option not fully tested with "
                          "masked data.")
        # Take a weighted mean across realizations with equal weights
        plugin = WeightAndBlend(collapse_coord, "linear", y0val=1.0, ynval=1.0)
        result_collapse_coord = plugin.process(result_no_collapse_coord)
        result = result_collapse_coord
    return result
예제 #3
0
def process(
    cube: cli.inputcube,
    land_sea_mask: cli.inputcube = None,
    *,
    threshold_values: cli.comma_separated_list = None,
    threshold_config: cli.inputjson = None,
    threshold_units: str = None,
    comparison_operator=">",
    fuzzy_factor: float = None,
    collapse_coord: str = None,
    vicinity: float = None,
):
    """Module to apply thresholding to a parameter dataset.

    Calculate the threshold truth values of input data relative to the
    provided threshold value. A fuzzy factor or fuzzy bounds may be provided
    to smooth probabilities where values are close to the threshold.

    Args:
        cube (iris.cube.Cube):
            A cube to be processed.
        threshold_values (list of float):
            Threshold value or values about which to calculate the truth
            values; e.g. 270,300. Must be omitted if 'threshold_config'
            is used.
        threshold_config (dict):
            Threshold configuration containing threshold values and
            (optionally) fuzzy bounds. Best used in combination with
            'threshold_units' It should contain a dictionary of strings that
            can be interpreted as floats with the structure:
            "THRESHOLD_VALUE": [LOWER_BOUND, UPPER_BOUND]
            e.g: {"280.0": [278.0, 282.0], "290.0": [288.0, 292.0]},
            or with structure "THRESHOLD_VALUE": "None" (no fuzzy bounds).
            Repeated thresholds with different bounds are ignored; only the
            last duplicate will be used.
        threshold_units (str):
            Units of the threshold values. If not provided the units are
            assumed to be the same as those of the input cube. Specifying
            the units here will allow a suitable conversion to match
            the input units if possible.
        comparison_operator (str):
            Indicates the comparison_operator to use with the threshold.
            e.g. 'ge' or '>=' to evaluate data >= threshold or '<' to
            evaluate data < threshold. When using fuzzy thresholds, there is
            no difference between < and <= or > and >=.
            Options: > >= < <= gt ge lt le.
        fuzzy_factor (float of None):
            A decimal fraction defining the factor about the threshold value(s)
            which should be treated as fuzzy. Data which fail a test against
            the hard threshold value may return a fractional truth value if
            they fall within this fuzzy factor region.
            Fuzzy factor must be in the range 0-1, with higher values
            indicating a narrower fuzzy factor region / sharper threshold.
            A fuzzy factor cannot be used with a zero threshold or a
            threshold_config file.
        collapse_coord (str):
            An optional ability to set which coordinate we want to collapse
            over.
        vicinity (float):
            Distance in metres used to define the vicinity within which to
            search for an occurrence
        land_sea_mask (Cube):
            Binary land-sea mask data. True for land-points, False for sea.
            Restricts in-vicinity processing to only include points of a
            like mask value.

    Returns:
        iris.cube.Cube:
            Cube of probabilities relative to the given thresholds

    Raises:
        ValueError: If threshold_config and threshold_values are both set
        ValueError: If threshold_config is used for fuzzy thresholding
    """
    from improver.metadata.probabilistic import in_vicinity_name_format
    from improver.threshold import BasicThreshold
    from improver.utilities.cube_manipulation import collapse_realizations
    from improver.utilities.spatial import OccurrenceWithinVicinity

    if threshold_config and threshold_values:
        raise ValueError(
            "--threshold-config and --threshold-values are mutually exclusive "
            "- please set one or the other, not both"
        )
    if threshold_config and fuzzy_factor:
        raise ValueError("--threshold-config cannot be used for fuzzy thresholding")

    if threshold_config:
        thresholds = []
        fuzzy_bounds = []
        for key in threshold_config.keys():
            # Ensure thresholds are float64 to avoid rounding errors during
            # possible unit conversion.
            thresholds.append(float(key))
            # If the first threshold has no bounds, fuzzy_bounds is
            # set to None and subsequent bounds checks are skipped
            if threshold_config[key] == "None":
                fuzzy_bounds = None
                continue
            fuzzy_bounds.append(tuple(threshold_config[key]))
    else:
        # Ensure thresholds are float64 to avoid rounding errors during possible
        # unit conversion.
        thresholds = [float(x) for x in threshold_values]
        fuzzy_bounds = None

    each_threshold_func_list = []

    if vicinity is not None:
        # smooth thresholded occurrences over local vicinity
        each_threshold_func_list.append(
            OccurrenceWithinVicinity(vicinity, land_mask_cube=land_sea_mask)
        )
    elif land_sea_mask:
        raise ValueError("Cannot apply land-mask cube without in-vicinity processing")

    if collapse_coord == "realization":
        # TODO change collapse_coord argument to boolean "collapse_realizations"
        # (requires suite change)
        each_threshold_func_list.append(collapse_realizations)
    elif collapse_coord is not None:
        raise ValueError("Cannot collapse over non-realization coordinate")

    result = BasicThreshold(
        thresholds,
        fuzzy_factor=fuzzy_factor,
        fuzzy_bounds=fuzzy_bounds,
        threshold_units=threshold_units,
        comparison_operator=comparison_operator,
        each_threshold_func=each_threshold_func_list,
    )(cube)

    if vicinity is not None:
        result.rename(in_vicinity_name_format(result.name()))

    return result
예제 #4
0
def main(argv=None):
    """Load in arguments and get going."""
    parser = ArgParser(
        description="Calculate the threshold truth value of input data "
        "relative to the provided threshold value. By default data are "
        "tested to be above the thresholds, though the --below_threshold "
        "flag enables testing below thresholds. A fuzzy factor or fuzzy "
        "bounds may be provided to capture data that is close to the "
        "threshold.")
    parser.add_argument("input_filepath",
                        metavar="INPUT_FILE",
                        help="A path to an input NetCDF file to be processed")
    parser.add_argument("output_filepath",
                        metavar="OUTPUT_FILE",
                        help="The output path for the processed NetCDF")
    parser.add_argument("threshold_values",
                        metavar="THRESHOLD_VALUES",
                        nargs="*",
                        type=float,
                        help="Threshold value or values about which to "
                        "calculate the truth values; e.g. 270 300. "
                        "Must be omitted if --threshold_config is used.")
    parser.add_argument("--threshold_config",
                        metavar="THRESHOLD_CONFIG",
                        type=str,
                        help="Threshold configuration JSON file containing "
                        "thresholds and (optionally) fuzzy bounds. Best used "
                        "in combination  with --threshold_units. "
                        "It should contain a dictionary of strings that can "
                        "be interpreted as floats with the structure: "
                        " \"THRESHOLD_VALUE\": [LOWER_BOUND, UPPER_BOUND] "
                        "e.g: {\"280.0\": [278.0, 282.0], "
                        "\"290.0\": [288.0, 292.0]}, or with structure "
                        " \"THRESHOLD_VALUE\": \"None\" (no fuzzy bounds). "
                        "Repeated thresholds with different bounds are not "
                        "handled well. Only the last duplicate will be used.")
    parser.add_argument("--threshold_units",
                        metavar="THRESHOLD_UNITS",
                        default=None,
                        type=str,
                        help="Units of the threshold values. If not provided "
                        "the units are assumed to be the same as those of the "
                        "input dataset. Specifying the units here will allow "
                        "a suitable conversion to match the input units if "
                        "possible.")
    parser.add_argument("--below_threshold",
                        default=False,
                        action='store_true',
                        help="By default truth values of 1 are returned for "
                        "data ABOVE the threshold value(s). Using this flag "
                        "changes this behaviour to return 1 for data below "
                        "the threshold values.")
    parser.add_argument("--fuzzy_factor",
                        metavar="FUZZY_FACTOR",
                        default=None,
                        type=float,
                        help="A decimal fraction defining the factor about "
                        "the threshold value(s) which should be treated as "
                        "fuzzy. Data which fail a test against the hard "
                        "threshold value may return a fractional truth value "
                        "if they fall within this fuzzy factor region. Fuzzy "
                        "factor must be in the range 0-1, with higher values "
                        "indicating a narrower fuzzy factor region / sharper "
                        "threshold. NB A fuzzy factor cannot be used with a "
                        "zero threshold or a threshold_config file.")
    parser.add_argument("--collapse-coord",
                        type=str,
                        metavar="COLLAPSE-COORD",
                        default="None",
                        help="An optional ability to set which coordinate "
                        "we want to collapse over. The default is set "
                        "to None.")
    parser.add_argument("--vicinity",
                        type=float,
                        default=None,
                        help="If set,"
                        " distance in metres used to define the vicinity "
                        "within which to search for an occurrence.")

    args = parser.parse_args(args=argv)

    # Deal with mutual-exclusions that ArgumentParser can't handle:
    if args.threshold_values and args.threshold_config:
        raise parser.error("--threshold_config option is not compatible "
                           "with THRESHOLD_VALUES list.")
    if args.fuzzy_factor and args.threshold_config:
        raise parser.error("--threshold_config option is not compatible "
                           "with --fuzzy_factor option.")

    cube = load_cube(args.input_filepath)

    if args.threshold_config:
        try:
            # Read in threshold configuration from JSON file.
            with open(args.threshold_config, 'r') as input_file:
                thresholds_from_file = json.load(input_file)
            thresholds = []
            fuzzy_bounds = []
            is_fuzzy = True
            for key in thresholds_from_file.keys():
                thresholds.append(float(key))
                if is_fuzzy:
                    # If the first threshold has no bounds, fuzzy_bounds is
                    # set to None and subsequent bounds checks are skipped
                    if thresholds_from_file[key] == "None":
                        is_fuzzy = False
                        fuzzy_bounds = None
                    else:
                        fuzzy_bounds.append(tuple(thresholds_from_file[key]))
        except ValueError as err:
            # Extend error message with hint for common JSON error.
            raise type(err)(err + " in JSON file {}. \nHINT: Try "
                            "adding a zero after the decimal point.".format(
                                args.threshold_config))
        except Exception as err:
            # Extend any errors with message about WHERE this occurred.
            raise type(err)(err +
                            " in JSON file {}".format(args.threshold_config))
    else:
        thresholds = args.threshold_values
        fuzzy_bounds = None

    result_no_collapse_coord = BasicThreshold(
        thresholds,
        fuzzy_factor=args.fuzzy_factor,
        fuzzy_bounds=fuzzy_bounds,
        threshold_units=args.threshold_units,
        below_thresh_ok=args.below_threshold).process(cube)

    if args.vicinity is not None:
        # smooth thresholded occurrences over local vicinity
        result_no_collapse_coord = OccurrenceWithinVicinity(
            args.vicinity).process(result_no_collapse_coord)

        new_cube_name = in_vicinity_name_format(
            result_no_collapse_coord.name())

        result_no_collapse_coord.rename(new_cube_name)

    if args.collapse_coord == "None":
        save_netcdf(result_no_collapse_coord, args.output_filepath)
    else:
        # Raise warning if result_no_collapse_coord is masked array
        if np.ma.isMaskedArray(result_no_collapse_coord.data):
            warnings.warn("Collapse-coord option not fully tested with "
                          "masked data.")
        # This is where we fix values for y0val, slope and weighting_mode.
        # In this case they are fixed to the values required for realization
        # collapse. This can be changed if other functionality needs to be
        # implemented.
        weights = ChooseDefaultWeightsLinear(y0val=1.0, slope=0.0).process(
            result_no_collapse_coord, args.collapse_coord)

        BlendingPlugin = WeightedBlendAcrossWholeDimension(
            args.collapse_coord, weighting_mode='weighted_mean')
        result_collapse_coord = BlendingPlugin.process(
            result_no_collapse_coord, weights)

        save_netcdf(result_collapse_coord, args.output_filepath)