def process(cube: cli.inputcube, *, threshold_values: cli.comma_separated_list = None, threshold_config: cli.inputjson = None, threshold_units: str = None, comparison_operator='>', fuzzy_factor: float = None, collapse_coord: str = None, vicinity: float = None): """Module to apply thresholding to a parameter dataset. Calculate the threshold truth values of input data relative to the provided threshold value. A fuzzy factor or fuzzy bounds may be provided to smooth probabilities where values are close to the threshold. Args: cube (iris.cube.Cube): A cube to be processed. threshold_values (list of float): Threshold value or values about which to calculate the truth values; e.g. 270,300. Must be omitted if 'threshold_config' is used. threshold_config (dict): Threshold configuration containing threshold values and (optionally) fuzzy bounds. Best used in combination with 'threshold_units' It should contain a dictionary of strings that can be interpreted as floats with the structure: "THRESHOLD_VALUE": [LOWER_BOUND, UPPER_BOUND] e.g: {"280.0": [278.0, 282.0], "290.0": [288.0, 292.0]}, or with structure "THRESHOLD_VALUE": "None" (no fuzzy bounds). Repeated thresholds with different bounds are ignored; only the last duplicate will be used. threshold_units (str): Units of the threshold values. If not provided the units are assumed to be the same as those of the input cube. Specifying the units here will allow a suitable conversion to match the input units if possible. comparison_operator (str): Indicates the comparison_operator to use with the threshold. e.g. 'ge' or '>=' to evaluate data >= threshold or '<' to evaluate data < threshold. When using fuzzy thresholds, there is no difference between < and <= or > and >=. Options: > >= < <= gt ge lt le. fuzzy_factor (float of None): A decimal fraction defining the factor about the threshold value(s) which should be treated as fuzzy. Data which fail a test against the hard threshold value may return a fractional truth value if they fall within this fuzzy factor region. Fuzzy factor must be in the range 0-1, with higher values indicating a narrower fuzzy factor region / sharper threshold. A fuzzy factor cannot be used with a zero threshold or a threshold_config file. collapse_coord (str): An optional ability to set which coordinate we want to collapse over. vicinity (float): Distance in metres used to define the vicinity within which to search for an occurrence Returns: iris.cube.Cube: Cube of probabilities relative to the given thresholds Raises: ValueError: If threshold_config and threshold_values are both set ValueError: If threshold_config is used for fuzzy thresholding Warns: UserWarning: If collapsing coordinates with a masked array """ import warnings import numpy as np from improver.blending.calculate_weights_and_blend import WeightAndBlend from improver.metadata.probabilistic import in_vicinity_name_format from improver.threshold import BasicThreshold from improver.utilities.spatial import OccurrenceWithinVicinity if threshold_config and threshold_values: raise ValueError( "--threshold-config and --threshold-values are mutually exclusive " "- please set one or the other, not both") if threshold_config and fuzzy_factor: raise ValueError( "--threshold-config cannot be used for fuzzy thresholding") if threshold_config: thresholds = [] fuzzy_bounds = [] for key in threshold_config.keys(): thresholds.append(np.float32(key)) # If the first threshold has no bounds, fuzzy_bounds is # set to None and subsequent bounds checks are skipped if threshold_config[key] == "None": fuzzy_bounds = None continue fuzzy_bounds.append(tuple(threshold_config[key])) else: thresholds = [np.float32(x) for x in threshold_values] fuzzy_bounds = None result_no_collapse_coord = BasicThreshold( thresholds, fuzzy_factor=fuzzy_factor, fuzzy_bounds=fuzzy_bounds, threshold_units=threshold_units, comparison_operator=comparison_operator)(cube) if vicinity is not None: # smooth thresholded occurrences over local vicinity result_no_collapse_coord = OccurrenceWithinVicinity(vicinity)( result_no_collapse_coord) new_cube_name = in_vicinity_name_format( result_no_collapse_coord.name()) result_no_collapse_coord.rename(new_cube_name) if collapse_coord is None: return result_no_collapse_coord # Raise warning if result_no_collapse_coord is masked array if np.ma.isMaskedArray(result_no_collapse_coord.data): warnings.warn("Collapse-coord option not fully tested with " "masked data.") # Take a weighted mean across realizations with equal weights plugin = WeightAndBlend(collapse_coord, "linear", y0val=1.0, ynval=1.0) return plugin(result_no_collapse_coord)
def process(cube, threshold_values=None, threshold_dict=None, threshold_units=None, comparison_operator='>', fuzzy_factor=None, collapse_coord="None", vicinity=None): """Module to apply thresholding to a parameter dataset. Calculate the threshold truth values of input data relative to the provided threshold value. By default data are tested to be above the threshold, though the below_threshold boolean enables testing below thresholds. A fuzzy factor or fuzzy bounds may be provided to capture data that is close to the threshold. Args: cube (iris.cube.Cube): A cube to be processed. threshold_values (float): Threshold value or values about which to calculate the truth values; e.g. 270 300. Must be omitted if 'threshold_config' is used. Default is None. threshold_dict (dict): Threshold configuration containing threshold values and (optionally) fuzzy bounds. Best used in combination with 'threshold_units' It should contain a dictionary of strings that can be interpreted as floats with the structure: "THRESHOLD_VALUE": [LOWER_BOUND, UPPER_BOUND] e.g: {"280.0": [278.0, 282.0], "290.0": [288.0, 292.0]}, or with structure "THRESHOLD_VALUE": "None" (no fuzzy bounds). Repeated thresholds with different bounds are not handled well. Only the last duplicate will be used. Default is None. threshold_units (str): Units of the threshold values. If not provided the units are assumed to be the same as those of the input cube. Specifying the units here will allow a suitable conversion to match the input units if possible. comparison_operator (str): Indicates the comparison_operator to use with the threshold. e.g. 'ge' or '>=' to evaluate data >= threshold or '<' to evaluate data < threshold. When using fuzzy thresholds, there is no difference between < and <= or > and >=. Default is >. Valid choices: > >= < <= gt ge lt le. fuzzy_factor (float): A decimal fraction defining the factor about the threshold value(s) which should be treated as fuzzy. Data which fail a test against the hard threshold value may return a fractional truth value if they fall within this fuzzy factor region. Fuzzy factor must be in the range 0-1, with higher values indicating a narrower fuzzy factor region / sharper threshold. N.B. A fuzzy factor cannot be used with a zero threshold or a threshold_dict. collapse_coord (str): An optional ability to set which coordinate we want to collapse over. The default is set to None. vicinity (float): If True, distance in metres used to define the vicinity within which to search for an occurrence. Returns: iris.cube.Cube: processed Cube. Raises: RuntimeError: If threshold_dict and threshold_values are both used. Warns: warning: If collapsing coordinates with a masked array. """ if threshold_dict and threshold_values: raise RuntimeError('threshold_dict cannot be used ' 'with threshold_values') if threshold_dict: try: thresholds = [] fuzzy_bounds = [] is_fuzzy = True for key in threshold_dict.keys(): thresholds.append(float(key)) if is_fuzzy: # If the first threshold has no bounds, fuzzy_bounds is # set to None and subsequent bounds checks are skipped if threshold_dict[key] == "None": is_fuzzy = False fuzzy_bounds = None else: fuzzy_bounds.append(tuple(threshold_dict[key])) except ValueError as err: # Extend error message with hint for common JSON error. raise type(err)( "{} in threshold dictionary file. \nHINT: Try adding a zero " "after the decimal point.".format(err)) except Exception as err: # Extend any errors with message about WHERE this occurred. raise type(err)("{} in dictionary file.".format(err)) else: thresholds = threshold_values fuzzy_bounds = None result_no_collapse_coord = BasicThreshold( thresholds, fuzzy_factor=fuzzy_factor, fuzzy_bounds=fuzzy_bounds, threshold_units=threshold_units, comparison_operator=comparison_operator).process(cube) if vicinity is not None: # smooth thresholded occurrences over local vicinity result_no_collapse_coord = OccurrenceWithinVicinity(vicinity).process( result_no_collapse_coord) new_cube_name = in_vicinity_name_format( result_no_collapse_coord.name()) result_no_collapse_coord.rename(new_cube_name) if collapse_coord == "None": result = result_no_collapse_coord else: # Raise warning if result_no_collapse_coord is masked array if np.ma.isMaskedArray(result_no_collapse_coord.data): warnings.warn("Collapse-coord option not fully tested with " "masked data.") # Take a weighted mean across realizations with equal weights plugin = WeightAndBlend(collapse_coord, "linear", y0val=1.0, ynval=1.0) result_collapse_coord = plugin.process(result_no_collapse_coord) result = result_collapse_coord return result
def process( cube: cli.inputcube, land_sea_mask: cli.inputcube = None, *, threshold_values: cli.comma_separated_list = None, threshold_config: cli.inputjson = None, threshold_units: str = None, comparison_operator=">", fuzzy_factor: float = None, collapse_coord: str = None, vicinity: float = None, ): """Module to apply thresholding to a parameter dataset. Calculate the threshold truth values of input data relative to the provided threshold value. A fuzzy factor or fuzzy bounds may be provided to smooth probabilities where values are close to the threshold. Args: cube (iris.cube.Cube): A cube to be processed. threshold_values (list of float): Threshold value or values about which to calculate the truth values; e.g. 270,300. Must be omitted if 'threshold_config' is used. threshold_config (dict): Threshold configuration containing threshold values and (optionally) fuzzy bounds. Best used in combination with 'threshold_units' It should contain a dictionary of strings that can be interpreted as floats with the structure: "THRESHOLD_VALUE": [LOWER_BOUND, UPPER_BOUND] e.g: {"280.0": [278.0, 282.0], "290.0": [288.0, 292.0]}, or with structure "THRESHOLD_VALUE": "None" (no fuzzy bounds). Repeated thresholds with different bounds are ignored; only the last duplicate will be used. threshold_units (str): Units of the threshold values. If not provided the units are assumed to be the same as those of the input cube. Specifying the units here will allow a suitable conversion to match the input units if possible. comparison_operator (str): Indicates the comparison_operator to use with the threshold. e.g. 'ge' or '>=' to evaluate data >= threshold or '<' to evaluate data < threshold. When using fuzzy thresholds, there is no difference between < and <= or > and >=. Options: > >= < <= gt ge lt le. fuzzy_factor (float of None): A decimal fraction defining the factor about the threshold value(s) which should be treated as fuzzy. Data which fail a test against the hard threshold value may return a fractional truth value if they fall within this fuzzy factor region. Fuzzy factor must be in the range 0-1, with higher values indicating a narrower fuzzy factor region / sharper threshold. A fuzzy factor cannot be used with a zero threshold or a threshold_config file. collapse_coord (str): An optional ability to set which coordinate we want to collapse over. vicinity (float): Distance in metres used to define the vicinity within which to search for an occurrence land_sea_mask (Cube): Binary land-sea mask data. True for land-points, False for sea. Restricts in-vicinity processing to only include points of a like mask value. Returns: iris.cube.Cube: Cube of probabilities relative to the given thresholds Raises: ValueError: If threshold_config and threshold_values are both set ValueError: If threshold_config is used for fuzzy thresholding """ from improver.metadata.probabilistic import in_vicinity_name_format from improver.threshold import BasicThreshold from improver.utilities.cube_manipulation import collapse_realizations from improver.utilities.spatial import OccurrenceWithinVicinity if threshold_config and threshold_values: raise ValueError( "--threshold-config and --threshold-values are mutually exclusive " "- please set one or the other, not both" ) if threshold_config and fuzzy_factor: raise ValueError("--threshold-config cannot be used for fuzzy thresholding") if threshold_config: thresholds = [] fuzzy_bounds = [] for key in threshold_config.keys(): # Ensure thresholds are float64 to avoid rounding errors during # possible unit conversion. thresholds.append(float(key)) # If the first threshold has no bounds, fuzzy_bounds is # set to None and subsequent bounds checks are skipped if threshold_config[key] == "None": fuzzy_bounds = None continue fuzzy_bounds.append(tuple(threshold_config[key])) else: # Ensure thresholds are float64 to avoid rounding errors during possible # unit conversion. thresholds = [float(x) for x in threshold_values] fuzzy_bounds = None each_threshold_func_list = [] if vicinity is not None: # smooth thresholded occurrences over local vicinity each_threshold_func_list.append( OccurrenceWithinVicinity(vicinity, land_mask_cube=land_sea_mask) ) elif land_sea_mask: raise ValueError("Cannot apply land-mask cube without in-vicinity processing") if collapse_coord == "realization": # TODO change collapse_coord argument to boolean "collapse_realizations" # (requires suite change) each_threshold_func_list.append(collapse_realizations) elif collapse_coord is not None: raise ValueError("Cannot collapse over non-realization coordinate") result = BasicThreshold( thresholds, fuzzy_factor=fuzzy_factor, fuzzy_bounds=fuzzy_bounds, threshold_units=threshold_units, comparison_operator=comparison_operator, each_threshold_func=each_threshold_func_list, )(cube) if vicinity is not None: result.rename(in_vicinity_name_format(result.name())) return result
def main(argv=None): """Load in arguments and get going.""" parser = ArgParser( description="Calculate the threshold truth value of input data " "relative to the provided threshold value. By default data are " "tested to be above the thresholds, though the --below_threshold " "flag enables testing below thresholds. A fuzzy factor or fuzzy " "bounds may be provided to capture data that is close to the " "threshold.") parser.add_argument("input_filepath", metavar="INPUT_FILE", help="A path to an input NetCDF file to be processed") parser.add_argument("output_filepath", metavar="OUTPUT_FILE", help="The output path for the processed NetCDF") parser.add_argument("threshold_values", metavar="THRESHOLD_VALUES", nargs="*", type=float, help="Threshold value or values about which to " "calculate the truth values; e.g. 270 300. " "Must be omitted if --threshold_config is used.") parser.add_argument("--threshold_config", metavar="THRESHOLD_CONFIG", type=str, help="Threshold configuration JSON file containing " "thresholds and (optionally) fuzzy bounds. Best used " "in combination with --threshold_units. " "It should contain a dictionary of strings that can " "be interpreted as floats with the structure: " " \"THRESHOLD_VALUE\": [LOWER_BOUND, UPPER_BOUND] " "e.g: {\"280.0\": [278.0, 282.0], " "\"290.0\": [288.0, 292.0]}, or with structure " " \"THRESHOLD_VALUE\": \"None\" (no fuzzy bounds). " "Repeated thresholds with different bounds are not " "handled well. Only the last duplicate will be used.") parser.add_argument("--threshold_units", metavar="THRESHOLD_UNITS", default=None, type=str, help="Units of the threshold values. If not provided " "the units are assumed to be the same as those of the " "input dataset. Specifying the units here will allow " "a suitable conversion to match the input units if " "possible.") parser.add_argument("--below_threshold", default=False, action='store_true', help="By default truth values of 1 are returned for " "data ABOVE the threshold value(s). Using this flag " "changes this behaviour to return 1 for data below " "the threshold values.") parser.add_argument("--fuzzy_factor", metavar="FUZZY_FACTOR", default=None, type=float, help="A decimal fraction defining the factor about " "the threshold value(s) which should be treated as " "fuzzy. Data which fail a test against the hard " "threshold value may return a fractional truth value " "if they fall within this fuzzy factor region. Fuzzy " "factor must be in the range 0-1, with higher values " "indicating a narrower fuzzy factor region / sharper " "threshold. NB A fuzzy factor cannot be used with a " "zero threshold or a threshold_config file.") parser.add_argument("--collapse-coord", type=str, metavar="COLLAPSE-COORD", default="None", help="An optional ability to set which coordinate " "we want to collapse over. The default is set " "to None.") parser.add_argument("--vicinity", type=float, default=None, help="If set," " distance in metres used to define the vicinity " "within which to search for an occurrence.") args = parser.parse_args(args=argv) # Deal with mutual-exclusions that ArgumentParser can't handle: if args.threshold_values and args.threshold_config: raise parser.error("--threshold_config option is not compatible " "with THRESHOLD_VALUES list.") if args.fuzzy_factor and args.threshold_config: raise parser.error("--threshold_config option is not compatible " "with --fuzzy_factor option.") cube = load_cube(args.input_filepath) if args.threshold_config: try: # Read in threshold configuration from JSON file. with open(args.threshold_config, 'r') as input_file: thresholds_from_file = json.load(input_file) thresholds = [] fuzzy_bounds = [] is_fuzzy = True for key in thresholds_from_file.keys(): thresholds.append(float(key)) if is_fuzzy: # If the first threshold has no bounds, fuzzy_bounds is # set to None and subsequent bounds checks are skipped if thresholds_from_file[key] == "None": is_fuzzy = False fuzzy_bounds = None else: fuzzy_bounds.append(tuple(thresholds_from_file[key])) except ValueError as err: # Extend error message with hint for common JSON error. raise type(err)(err + " in JSON file {}. \nHINT: Try " "adding a zero after the decimal point.".format( args.threshold_config)) except Exception as err: # Extend any errors with message about WHERE this occurred. raise type(err)(err + " in JSON file {}".format(args.threshold_config)) else: thresholds = args.threshold_values fuzzy_bounds = None result_no_collapse_coord = BasicThreshold( thresholds, fuzzy_factor=args.fuzzy_factor, fuzzy_bounds=fuzzy_bounds, threshold_units=args.threshold_units, below_thresh_ok=args.below_threshold).process(cube) if args.vicinity is not None: # smooth thresholded occurrences over local vicinity result_no_collapse_coord = OccurrenceWithinVicinity( args.vicinity).process(result_no_collapse_coord) new_cube_name = in_vicinity_name_format( result_no_collapse_coord.name()) result_no_collapse_coord.rename(new_cube_name) if args.collapse_coord == "None": save_netcdf(result_no_collapse_coord, args.output_filepath) else: # Raise warning if result_no_collapse_coord is masked array if np.ma.isMaskedArray(result_no_collapse_coord.data): warnings.warn("Collapse-coord option not fully tested with " "masked data.") # This is where we fix values for y0val, slope and weighting_mode. # In this case they are fixed to the values required for realization # collapse. This can be changed if other functionality needs to be # implemented. weights = ChooseDefaultWeightsLinear(y0val=1.0, slope=0.0).process( result_no_collapse_coord, args.collapse_coord) BlendingPlugin = WeightedBlendAcrossWholeDimension( args.collapse_coord, weighting_mode='weighted_mean') result_collapse_coord = BlendingPlugin.process( result_no_collapse_coord, weights) save_netcdf(result_collapse_coord, args.output_filepath)