Example #1
 def convert(self, unit, density=998.2, molar_mass=12.011):
     """Using cf_units (UDUNITS2) convert the unit in place
     - handles ( M L-2 T-1 ) --> ( L T-1 ), assuming water
     - handles (       mol ) --> (     M ), assuming carbon
     if 'units' not in self.ds[self.varname].attrs:
         msg = "Cannot convert the units of the DataArray lacking the 'units' attribute"
         raise ValueError(msg)
     src_unit = Unit(self.ds[self.varname].units)
     tar_unit = Unit(unit)
     mass_density = Unit("kg m-3")
     molar_density = Unit("g mol-1")
     if ((src_unit / tar_unit) / mass_density).is_dimensionless():
         self.ds[self.varname] /= density
         src_unit /= mass_density
     elif ((tar_unit / src_unit) / mass_density).is_dimensionless():
         self.ds[self.varname] *= density
         src_unit *= mass_density
     if ((src_unit / tar_unit) / molar_density).is_dimensionless():
         self.ds[self.varname] /= molar_mass
         src_unit /= molar_density
     elif ((tar_unit / src_unit) / molar_density).is_dimensionless():
         self.ds[self.varname] *= molar_mass
         src_unit *= molar_density
     src_unit.convert(self.ds[self.varname].data, tar_unit, inplace=True)
     self.ds[self.varname].attrs['units'] = unit
     if "bounds" in self.ds[self.varname].attrs:
         if self.ds[self.varname].attrs['bounds'] in self.ds:
     return self
Example #2
def unit_conversion_fac(from_unit, to_unit):
    """Returns multiplicative unit conversion factor for input units
    Input must be either instances of :class:`cf_units.Unit` class or string.
    from_unit : :obj:`cf_units.Unit`, or :obj:`str`
        unit to be converted
    to_unit : :obj:`cf_units.Unit`, or :obj:`str`
        final unit
        multiplicative conversion factor
        if units cannot be converted into each other using cf_units package
    if isinstance(from_unit, str):
        from_unit = Unit(from_unit)

    return from_unit.convert(1, to_unit)
Example #3
    def convert(self, tar_unit):
        unit0 = Unit(self.unit)
        unit1 = Unit(tar_unit)
        data = self.data
        self.data = unit0.convert(data, unit1)
        self.unit = tar_unit

        return self
Example #5
def unit_converter(data, inunit, outunit):
    Unit converter. Takes an (numpy) array, valid udunits inunits and outunits
    as strings, and returns the array in outunits.

    data : array_like
    inunit : string
             unit to convert from, must be UDUNITS-compatible string
    outunit : string
              unit to conver to, must be UDUNITS-compatible string

    out : array_like

    >>> import numpy as np
    >>> c = Converter("kg","Gt")
    >>> out = c(np.array([1,2])*1e12)
    >>> out = array([ 1.,  2.])

    inunit = str(inunit)
    outunit = str(outunit)
    if isinstance(data, np.ma.MaskedArray):
        mask = data.mask
        mask = None
    data = np.array(data)
    if not (inunit == outunit):
                from cf_units import Unit
                in_unit  = Unit(inunit)
                out_unit  = Unit(outunit)
                outdata = in_unit.convert(data, out_unit)
                from udunits2 import Converter, System, Unit
                sys = System()
                c = Converter((Unit(sys, inunit), Unit(sys, outunit)))
                outdata = c(data)
                "Neither cf_units or udunits2 module found, you're on your own.")
            c = 1. / 1e3
            outdata = c * data
        outdata = data

    if mask is not None:
        return np.ma.array(outdata, mask=mask)
        return outdata
Example #6
    def _apply_minimum_precip_rate(self, precip_cube, cube):
        """Ensure that negative precipitation rates are capped at the defined
        minimum precipitation rate.

            precip_cube (iris.cube.Cube):
                Cube containing a precipitation rate input field.
            cube (iris.cube.Cube):
                Cube containing the precipitation rate field after combining
                with orographic enhancement.

                Cube containing the precipitation rate field where any
                negative precipitation rates have been capped at the defined
                minimum precipitation rate.

        if self.operation == "subtract":
            original_units = Unit("mm/hr")
            threshold_in_cube_units = original_units.convert(
                self.min_precip_rate_mmh, cube.units
            threshold_in_precip_cube_units = original_units.convert(
                self.min_precip_rate_mmh, precip_cube.units

            # Ignore invalid warnings generated if e.g. a NaN is encountered
            # within the less than (<) comparison.
            with np.errstate(invalid="ignore"):
                # Create a mask computed from where the input precipitation
                # cube is greater or equal to the threshold and the result
                # of combining the precipitation rate input cube with the
                # orographic enhancement has generated a cube with
                # precipitation rates less than the threshold.
                mask = (precip_cube.data >= threshold_in_precip_cube_units) & (
                    cube.data <= threshold_in_cube_units

                # Set any values lower than the threshold to be equal to
                # the minimum precipitation rate.
                cube.data[mask] = threshold_in_cube_units
        return cube
Example #7
    def get_vertical_extent(self, ds, data_dict):
        global_atts = ds.metadata_mapping["NC_GLOBAL"]
            if global_atts["geospatial_vertical_positive"] == "down":
                sign = 1
            elif global_atts["geospatial_vertical_positive"] == "up":
                sign = -1

            units = Unit(global_atts["geospatial_vertical_units"])

            # convert to meters
            #unit_conv = Unit(ds.geospatial_vertical_units) / Unit("m")

            m = Unit("meters")
            orig_units = Unit(global_atts["geospatial_vertical_units"]) * sign

            extra_keys = {kvp["key"] for kvp in data_dict["extras"]}
            if "vertical_min" not in extra_keys:
                converted_min = orig_units.convert(
                    float(global_atts["geospatial_vertical_min"]), m)
                if not isnan(converted_min):
                        "key": "vertical_min",
                        "value": str(converted_min)
                    log.warning("vertical_min was NaN, skipping")
            if "vertical_max" not in extra_keys:
                converted_max = orig_units.convert(
                    float(global_atts["geospatial_vertical_max"]), m)
                if not isnan(converted_max):
                        "key": "vertical_max",
                        "value": str(converted_max)
                    log.warning("vertical_max was NaN, skipping")
        except (AttributeError, ValueError, KeyError) as e:
                "Encountered attribute error when attempting to get vertical bounds of OPeNDAP dataset"
Example #8
    def _getGridInformation(self):
        """Looks in the model output for cell areas as well as land fractions.
        def _shiftLon(lon):
            return (lon <=
                    180) * lon + (lon > 180) * (lon - 360) + (lon < -180) * 360

        # Are there cell areas associated with this model?
        area_name = None
        area_name = "area" if "area" in self.variables.keys() else area_name
        area_name = "areacella" if "areacella" in self.variables.keys(
        ) else area_name
        if area_name is not None:
            with Dataset(self.variables[area_name][0]) as f:
                A = f.variables[area_name]
                unit = Unit(A.units) if "units" in A.ncattrs() else Unit("m2")
                self.cell_areas = unit.convert(A[...], "m2", inplace=True)
            if not ("lat_bnds" in self.variables.keys()
                    and "lon_bnds" in self.variables.keys()):
            with Dataset(self.variables["lat_bnds"][0]) as f:
                x = f.variables["lat_bnds"][...]
            with Dataset(self.variables["lon_bnds"][0]) as f:
                y = f.variables["lon_bnds"][...]
                s = y.mean(axis=1).argmin()
                y = np.roll(_shiftLon(y), -s, axis=0)
                if y[0, 0] > y[0, 1]: y[0, 0] = -180.
                if y[-1, 0] > y[-1, 1]: y[-1, 1] = +180.
            self.cell_areas = il.CellAreas(None, None, lat_bnds=x, lon_bnds=y)

        # Now we do the same for land fractions
        frac_name = None
        frac_name = "landfrac" if "landfrac" in self.variables.keys(
        ) else frac_name
        frac_name = "sftlf" if "sftlf" in self.variables.keys() else frac_name
        if frac_name is None:
            self.land_areas = self.cell_areas
            with Dataset(self.variables[frac_name][0]) as f:
                self.land_fraction = f.variables[frac_name][...]
            # some models represent the fraction as a percent
            if np.ma.max(self.land_fraction) > 10: self.land_fraction *= 0.01
            with np.errstate(over='ignore', under='ignore'):
                if not np.allclose(self.cell_areas.shape,
                    msg = "The model %s has areacella %s which is a different shape than sftlf %s" % (
                        self.name, str(self.cell_areas.shape),
                    raise ValueError(msg)
                self.land_areas = self.cell_areas * self.land_fraction
        self.land_area = np.ma.sum(self.land_areas)
Example #9
 def test_gregorian_calendar_conversion_setup(self):
     # Reproduces a situation where a unit's gregorian calendar would not
     # match (using the `is` operator) to the literal string 'gregorian',
     # causing an `is not` test to return a false negative.
     cal_str = cf_units.CALENDAR_GREGORIAN
     calendar = self.MyStr(cal_str)
     self.assertIsNot(calendar, cal_str)
     u1 = Unit('hours since 1970-01-01 00:00:00', calendar=calendar)
     u2 = Unit('hours since 1969-11-30 00:00:00', calendar=calendar)
     u1point = np.array([8.], dtype=np.float32)
     expected = np.array([776.], dtype=np.float32)
     result = u1.convert(u1point, u2)
     return expected, result
Example #11
 def setUp(self):
     """Set up cubes for testing. This includes a 'subtracted_cube'
     containing some negative precipitation values that should be
     set to a minimum precipitation rate threshold."""
     self.precip_cube = set_up_precipitation_rate_cubelist()[0]
     oe_cube = set_up_orographic_enhancement_cube()[0]
     # Cap orographic enhancement to be zero where there is a precipitation
     # rate of zero.
     original_units = Unit("mm/hr")
     threshold_in_cube_units = (original_units.convert(
         MIN_PRECIP_RATE_MMH, self.precip_cube.units))
     oe_cube.data[self.precip_cube.data < threshold_in_cube_units] = 0.
     self.oe_cube = oe_cube
     self.added_cube = self.precip_cube + oe_cube
     self.subtracted_cube = self.precip_cube - oe_cube
Example #12
    def _apply_orographic_enhancement(self, precip_cube, oe_cube):
        """Combine the precipitation rate cube and the orographic enhancement

            precip_cube (iris.cube.Cube):
                Cube containing the input precipitation field.
            oe_cube (iris.cube.Cube):
                Cube containing the orographic enhancement field matching
                the validity time of the precipitation cube.

                Cube containing the precipitation rate field modified by the
                orographic enhancement cube.

        # Convert orographic enhancement into the units of the precipitation
        # rate cube.

        # Set orographic enhancement to be zero for points with a
        # precipitation rate of < 1/32 mm/hr.
        original_units = Unit("mm/hr")
        threshold_in_cube_units = original_units.convert(
            self.min_precip_rate_mmh, precip_cube.units

        # Ignore invalid warnings generated if e.g. a NaN is encountered
        # within the less than (<) comparison.
        with np.errstate(invalid="ignore"):
            oe_cube.data[precip_cube.data < threshold_in_cube_units] = 0.0

        # Add / subtract orographic enhancement where data is not masked
        cube = precip_cube.copy()
        if self.operation == "add":
            cube.data = cube.data + oe_cube.data
        elif self.operation == "subtract":
            cube.data = cube.data - oe_cube.data
            msg = (
                "Operation '{}' not supported for combining "
                "precipitation rate and "
                "orographic enhancement.".format(self.operation)
            raise ValueError(msg)

        return cube
Example #13
    def test_non_gregorian_calendar_conversion_dtype(self):
        for start_dtype, exp_convert in (
            (np.float32, True),
            (np.float64, True),
            (np.int32, False),
            (np.int64, False),
            (np.int, False),
            data = np.arange(4, dtype=start_dtype)
            u1 = Unit("hours since 2000-01-01 00:00:00", calendar="360_day")
            u2 = Unit("hours since 2000-01-02 00:00:00", calendar="360_day")
            result = u1.convert(data, u2)

            if exp_convert:
                self.assertEqual(result.dtype, start_dtype)
                self.assertEqual(result.dtype, np.int64)
Example #14
    def _apply_orographic_enhancement(self, precip_cube, oe_cube):
        """Combine the precipitation rate cube and the orographic enhancement

            precip_cube (iris.cube.Cube):
                Cube containing the input precipitation field.
            oe_cube (iris.cube.Cube):
                Cube containing the orographic enhancement field matching
                the validity time of the precipitation cube.

            cube (iris.cube.Cube):
                Cube containing the precipitation rate field modified by the
                orographic enhancement cube.

        # Ensure the orographic enhancement cube matches the
        # dimensions of the precip_cube.
        oe_cube = check_cube_coordinates(precip_cube, oe_cube.copy())

        # Ensure that orographic enhancement is in the units of the
        # precipitation rate cube.

        # Set orographic enhancement to be zero for points with a
        # precipitation rate of < 1/32 mm/hr.
        original_units = Unit("mm/hr")
        threshold_in_cube_units = (original_units.convert(
            self.min_precip_rate_mmh, precip_cube.units))

        # Ignore invalid warnings generated if e.g. a NaN is encountered
        # within the less than (<) comparison.
        with np.errstate(invalid='ignore'):
            oe_cube.data[precip_cube.data < threshold_in_cube_units] = 0.

        # Use CubeCombiner to combine the cubes.
        temp_cubelist = iris.cube.CubeList([precip_cube, oe_cube])
        cube = CubeCombiner(self.operation).process(temp_cubelist,
        return cube
Example #15
    def parse_date(datestr):
        Parse the time query param
            if datestr.startswith('now-'):
                p = re.compile(r'^now-(?P<val>\d+)\s*(?P<units>\w+)$')
                match = p.search(datestr)
                val = int(match.group('val'))
                units = match.group('units')
                # If not valid units, exception will throw
                unknown_unit = Unit(units)
                hrs = Unit('hours')
                # convert to hours
                num_hrs = unknown_unit.convert(val, hrs)
                dt_now = datetime.now(tz=timezone.utc)
                return dt_now - timedelta(hours=num_hrs)

            return dateparse(datestr)
        except Exception:
            return None
Example #16
def convert_unit(value, unit, new_unit):
    One-line unit conversion

    value: ``float``
        The starting value for the conversion.

    unit: ``str``
        The starting unit for the conversion.

    new_unit: ``str``
        The desired unit for the conversion

    new_value: ``float``
        The starting value, but converted to the new unit.
    start_unit = Unit(unit)
    return start_unit.convert(value, new_unit)
Example #17
from cf_units import Unit
import numpy as np
import matplotlib.pyplot as plt
from sympy import Symbol

c = Unit("deg_c")
k = Unit("deg_k")
c.convert(0, k)

# Now we test the actual use case
# for a plot.

second = Unit("second")
minute = Unit("minute")
meter = Unit("meter")
xs = [i * second for i in range(1, 10)]
ys = [meter / x for x in xs]

def plot_with_units(ax, xt, yt):
    xs, x_unit = xt
    ys, y_unit = yt
    xnums = [x.convert(1, x_unit) for x in xs]
    ynums = [y.convert(1, y_unit) for y in ys]
    print(xnums, ynums)
    ax.plot(xnums, ynums)

def auto_plot_with_units(ax, xt, yt):
Example #18
class BasicThreshold(object):
    """Apply a threshold truth criterion to a cube.

    Calculate the threshold truth values based on a linear membership function
    around the threshold values provided. A cube will be returned with a new
    threshold dimension coordinate.

    Can operate on multiple time sequences within a cube.
    def __init__(self,
        Set up for processing an in-or-out of threshold field, including the
        generation of fuzzy_bounds which are required to threshold an input
        cube (through self.process(cube)).  If fuzzy_factor is not None, fuzzy
        bounds are calculated using the threshold value in the units in which
        it is provided.

        The usage of fuzzy_factor is exemplified as follows:

        For a 6 mm/hr threshold with a 0.75 fuzzy factor, a range of 25%
        around this threshold (between (6*0.75=) 4.5 and (6*(2-0.75)=) 7.5)
        would be generated. The probabilities of exceeding values within this
        range are scaled linearly, so that 4.5 mm/hr yields a thresholded value
        of 0 and 7.5 mm/hr yields a thresholded value of 1. Therefore, in this
        case, the thresholded exceedance probabilities between 4.5 mm/hr and
        7.5 mm/hr would follow the pattern:


            Data value | Probability
                4.5     |   0
                5.0     |   0.167
                5.5     |   0.333
                6.0     |   0.5
                6.5     |   0.667
                7.0     |   0.833
                7.5     |   1.0

            thresholds (list of floats or float):
                The threshold points for 'significant' datapoints.

        Keyword Args:
            fuzzy_factor (float):
                Specifies lower bound for fuzzy membership value when
                multiplied by each threshold. Upper bound is equivalent linear
                distance above threshold. If None, no fuzzy_factor is applied.
            fuzzy_bounds (list of tuples):
                Lower and upper bounds for fuzziness.
                List should be of same length as thresholds.
                Each entry in list should be a tuple of two floats
                representing the lower and upper bounds respectively.
                If None, no fuzzy_bounds are applied.
            threshold_units (string):
                Units of the threshold values. If not provided the units are
                assumed to be the same as those of the input cube.
            below_thresh_ok (boolean):
                True to count points as significant if *below* the threshold,
                False to count points as significant if *above* the threshold.

            ValueError: If a threshold of 0.0 is requested when using a fuzzy
            ValueError: If the fuzzy_factor is not greater than 0 and less
                        than 1.
            ValueError: If both fuzzy_factor and fuzzy_bounds are set
                        as this is ambiguous.
        # ensure threshold is a list, even if only a single value is provided
        self.thresholds = thresholds
        if np.isscalar(thresholds):
            self.thresholds = [thresholds]

        # if necessary, set threshold units
        if threshold_units is None:
            self.threshold_units = None
            self.threshold_units = Unit(threshold_units)

        # initialise threshold coordinate name as None
        self.threshold_coord_name = None

        # read fuzzy factor or set (default) to 1 (no smoothing)
        fuzzy_factor_loc = 1.
        if fuzzy_factor is not None:
            if fuzzy_bounds is not None:
                raise ValueError(
                    "Invalid combination of keywords. Cannot specify "
                    "fuzzy_factor and fuzzy_bounds together")
            if not 0 < fuzzy_factor < 1:
                raise ValueError(
                    "Invalid fuzzy_factor: must be >0 and <1: {}".format(
            if 0 in self.thresholds:
                raise ValueError(
                    "Invalid threshold with fuzzy factor: cannot use a "
                    "multiplicative fuzzy factor with threshold == 0")
            fuzzy_factor_loc = fuzzy_factor

        # Set fuzzy-bounds.  If neither fuzzy_factor nor fuzzy_bounds is set,
        # both lower_thr and upper_thr default to the threshold value.  A test
        # of this equality is used later to determine whether to process with
        # a sharp threshold or fuzzy bounds.
        if fuzzy_bounds is None:
            self.fuzzy_bounds = []
            for thr in self.thresholds:
                lower_thr = thr * fuzzy_factor_loc
                upper_thr = thr * (2. - fuzzy_factor_loc)
                if thr < 0:
                    lower_thr, upper_thr = upper_thr, lower_thr
                self.fuzzy_bounds.append((lower_thr, upper_thr))
            self.fuzzy_bounds = fuzzy_bounds

        # ensure fuzzy_bounds is a list of tuples
        if isinstance(fuzzy_bounds, tuple):
            self.fuzzy_bounds = [fuzzy_bounds]

        # check that thresholds and fuzzy_bounds are self-consistent
        for thr, bounds in zip(self.thresholds, self.fuzzy_bounds):
            assert len(bounds) == 2, ("Invalid bounds for one threshold: {}. "
                                      "Expected 2 floats.".format(bounds))
            bounds_msg = ("Threshold must be within bounds: "
                          "!( {} <= {} <= {} )".format(bounds[0], thr,
            assert bounds[0] <= thr, bounds_msg
            assert bounds[1] >= thr, bounds_msg

        self.below_thresh_ok = below_thresh_ok

    def __repr__(self):
        """Represent the configured plugin instance as a string."""
        return ('<BasicThreshold: thresholds {}, ' + 'fuzzy_bounds {}, ' +
                'below_thresh_ok: {}>').format(self.thresholds,

    def _add_threshold_coord(self, cube, threshold):
        Add a scalar threshold-type coordinate to a cube containing
        thresholded data and promote the new coordinate to be the
        leading dimension of the cube.

            cube (iris.cube.Cube):
                Cube containing thresholded data (1s and 0s)
            threshold (np.float32):
                Value at which the data has been thresholded

                With new "threshold" axis
            coord = iris.coords.DimCoord(
                np.array([threshold], dtype=np.float32),
        except ValueError as cause:
            if 'is not a valid standard_name' in str(cause):
                coord = iris.coords.DimCoord(
                    np.array([threshold], dtype=np.float32),
                raise ValueError(cause)

        return iris.util.new_axis(cube, coord)

    def process(self, input_cube):
        """Convert each point to a truth value based on provided threshold
        values. The truth value may or may not be fuzzy depending upon if
        fuzzy_bounds are supplied.  If the plugin has a "threshold_units"
        member, this is used to convert both thresholds and fuzzy bounds into
        the units of the input cube.

            input_cube (iris.cube.Cube):
                Cube to threshold. The code is dimension-agnostic.

            cube (iris.cube.Cube):
                Cube after a threshold has been applied. The data within this
                cube will contain values between 0 and 1 to indicate whether
                a given threshold has been exceeded or not.

                The cube meta-data will contain:
                * Input_cube name prepended with
                probability_of_X_above(or below)_threshold (where X is
                the diagnostic under consideration)
                * Threshold dimension coordinate with same units as input_cube
                * Threshold attribute (above or below threshold)
                * Cube units set to (1).

            ValueError: if a np.nan value is detected within the input cube.

        # Record input cube data type to ensure consistent output, though
        # integer data must become float to enable fuzzy thresholding.
        input_cube_dtype = input_cube.dtype
        if input_cube.dtype.kind == 'i':
            input_cube_dtype = np.float32

        thresholded_cubes = iris.cube.CubeList()
        if np.isnan(input_cube.data).any():
            raise ValueError("Error: NaN detected in input cube data")

        # if necessary, convert thresholds and fuzzy bounds into cube units
        if self.threshold_units is not None:
            self.thresholds = [
                self.threshold_units.convert(threshold, input_cube.units)
                for threshold in self.thresholds
            self.fuzzy_bounds = [
                    self.threshold_units.convert(threshold, input_cube.units)
                    for threshold in bounds
                ]) for bounds in self.fuzzy_bounds

        # set name of threshold coordinate to match input diagnostic
        self.threshold_coord_name = input_cube.name()

        # apply fuzzy thresholding
        for threshold, bounds in zip(self.thresholds, self.fuzzy_bounds):
            cube = input_cube.copy()
            # if upper and lower bounds are equal, set a deterministic 0/1
            # probability based on exceedance of the threshold
            if bounds[0] == bounds[1]:
                truth_value = cube.data > threshold
            # otherwise, scale exceedance probabilities linearly between 0/1
            # at the min/max fuzzy bounds and 0.5 at the threshold value
                truth_value = np.where(
                    cube.data < threshold,
                            data_range=(bounds[0], threshold),
                            scale_range=(0., 0.5),
                            data_range=(threshold, bounds[1]),
                            scale_range=(0.5, 1.),
            truth_value = truth_value.astype(input_cube_dtype)
            # if requirement is for probabilities below threshold (rather than
            # above), invert the exceedance probability
            if self.below_thresh_ok:
                truth_value = 1. - truth_value

            cube.data = truth_value
            # Overwrite masked values that have been thresholded
            # with the un-thresholded values from the input cube.
            if np.ma.is_masked(cube.data):
                cube.data[input_cube.data.mask] = (
            cube = self._add_threshold_coord(cube, threshold)

        cube, = thresholded_cubes.concatenate()
        # TODO: Correct when formal cf-standards exists
        # Force the metadata to temporary conventions
        if self.below_thresh_ok:
            cube.attributes.update({'relative_to_threshold': 'below'})
            cube.attributes.update({'relative_to_threshold': 'above'})
        cube.units = Unit(1)

        cube = enforce_coordinate_ordering(cube,
                                           ["realization", "percentile_over"])

        return cube
Example #19
class BasicThreshold(BasePlugin):

    """Apply a threshold truth criterion to a cube.

    Calculate the threshold truth values based on a linear membership function
    around the threshold values provided. A cube will be returned with a new
    threshold dimension coordinate.

    Can operate on multiple time sequences within a cube.

    def __init__(self, thresholds, fuzzy_factor=None,
                 fuzzy_bounds=None, threshold_units=None,
        Set up for processing an in-or-out of threshold field, including the
        generation of fuzzy_bounds which are required to threshold an input
        cube (through self.process(cube)).  If fuzzy_factor is not None, fuzzy
        bounds are calculated using the threshold value in the units in which
        it is provided.

        The usage of fuzzy_factor is exemplified as follows:

        For a 6 mm/hr threshold with a 0.75 fuzzy factor, a range of 25%
        around this threshold (between (6*0.75=) 4.5 and (6*(2-0.75)=) 7.5)
        would be generated. The probabilities of exceeding values within this
        range are scaled linearly, so that 4.5 mm/hr yields a thresholded value
        of 0 and 7.5 mm/hr yields a thresholded value of 1. Therefore, in this
        case, the thresholded exceedance probabilities between 4.5 mm/hr and
        7.5 mm/hr would follow the pattern:


            Data value | Probability
                4.5     |   0
                5.0     |   0.167
                5.5     |   0.333
                6.0     |   0.5
                6.5     |   0.667
                7.0     |   0.833
                7.5     |   1.0

            thresholds (list of float or float):
                The threshold points for 'significant' datapoints.
            fuzzy_factor (float):
                Specifies lower bound for fuzzy membership value when
                multiplied by each threshold. Upper bound is equivalent linear
                distance above threshold. If None, no fuzzy_factor is applied.
            fuzzy_bounds (list of tuple):
                Lower and upper bounds for fuzziness.
                List should be of same length as thresholds.
                Each entry in list should be a tuple of two floats
                representing the lower and upper bounds respectively.
                If None, no fuzzy_bounds are applied.
            threshold_units (str):
                Units of the threshold values. If not provided the units are
                assumed to be the same as those of the input cube.
            comparison_operator (str):
                Indicates the comparison_operator to use with the threshold.
                e.g. 'ge' or '>=' to evaluate data >= threshold or '<' to
                evaluate data < threshold. When using fuzzy thresholds, there
                is no difference between < and <= or > and >=.
                Valid choices: > >= < <= gt ge lt le.

            ValueError: If a threshold of 0.0 is requested when using a fuzzy
            ValueError: If the fuzzy_factor is not greater than 0 and less
                        than 1.
            ValueError: If both fuzzy_factor and fuzzy_bounds are set
                        as this is ambiguous.
        # ensure threshold is a list, even if only a single value is provided
        self.thresholds = thresholds
        if np.isscalar(thresholds):
            self.thresholds = [thresholds]

        # if necessary, set threshold units
        if threshold_units is None:
            self.threshold_units = None
            self.threshold_units = Unit(threshold_units)

        # initialise threshold coordinate name as None
        self.threshold_coord_name = None

        # read fuzzy factor or set (default) to 1 (no smoothing)
        fuzzy_factor_loc = 1.
        if fuzzy_factor is not None:
            if fuzzy_bounds is not None:
                raise ValueError(
                    "Invalid combination of keywords. Cannot specify "
                    "fuzzy_factor and fuzzy_bounds together")
            if not 0 < fuzzy_factor < 1:
                raise ValueError(
                    "Invalid fuzzy_factor: must be >0 and <1: {}".format(
            if 0 in self.thresholds:
                raise ValueError(
                    "Invalid threshold with fuzzy factor: cannot use a "
                    "multiplicative fuzzy factor with threshold == 0")
            fuzzy_factor_loc = fuzzy_factor

        # Set fuzzy-bounds.  If neither fuzzy_factor nor fuzzy_bounds is set,
        # both lower_thr and upper_thr default to the threshold value.  A test
        # of this equality is used later to determine whether to process with
        # a sharp threshold or fuzzy bounds.
        if fuzzy_bounds is None:
            self.fuzzy_bounds = []
            for thr in self.thresholds:
                lower_thr = thr * fuzzy_factor_loc
                upper_thr = thr * (2. - fuzzy_factor_loc)
                if thr < 0:
                    lower_thr, upper_thr = upper_thr, lower_thr
                self.fuzzy_bounds.append((lower_thr, upper_thr))
            self.fuzzy_bounds = fuzzy_bounds

        # ensure fuzzy_bounds is a list of tuples
        if isinstance(fuzzy_bounds, tuple):
            self.fuzzy_bounds = [fuzzy_bounds]

        # check that thresholds and fuzzy_bounds are self-consistent
        for thr, bounds in zip(self.thresholds, self.fuzzy_bounds):
            if len(bounds) != 2:
                raise ValueError("Invalid bounds for one threshold: {}."
                                 " Expected 2 floats.".format(bounds))
            if bounds[0] > thr or bounds[1] < thr:
                bounds_msg = ("Threshold must be within bounds: "
                              "!( {} <= {} <= {} )".format(bounds[0],
                                                           thr, bounds[1]))
                raise ValueError(bounds_msg)

        # Dict of known logical comparisons. Each key contains a dict of
        # {'function': The operator function for this comparison_operator,
        #  'spp_string': Comparison_Operator string for use in CF-convention
        #                meta-data}
        self.comparison_operator_dict = {}
            ['ge', 'GE', '>='], {'function': operator.ge,
                                 'spp_string': 'above'}))
            ['gt', 'GT', '>'], {'function': operator.gt,
                                'spp_string': 'above'}))
            ['le', 'LE', '<='], {'function': operator.le,
                                 'spp_string': 'below'}))
            ['lt', 'LT', '<'], {'function': operator.lt,
                                'spp_string': 'below'}))
        self.comparison_operator_string = comparison_operator

    def __repr__(self):
        """Represent the configured plugin instance as a string."""
        return (
            '<BasicThreshold: thresholds {}, ' +
            'fuzzy_bounds {}, ' +
            'method: data {} threshold>'
        ).format(self.thresholds, self.fuzzy_bounds,

    def _add_threshold_coord(self, cube, threshold):
        Add a scalar threshold-type coordinate to a cube containing
        thresholded data and promote the new coordinate to be the
        leading dimension of the cube.

            cube (iris.cube.Cube):
                Cube containing thresholded data (1s and 0s)
            threshold (float):
                Value at which the data has been thresholded

                With new "threshold" axis
        coord = iris.coords.DimCoord(np.array([threshold], dtype=np.float32),
        coord.var_name = "threshold"

        # Use an spp__relative_to_threshold attribute, as an extension to the
        # CF-conventions.

        return iris.util.new_axis(cube, coord)

    def _decode_comparison_operator_string(self):
        """Sets self.comparison_operator based on
        self.comparison_operator_string. This is a dict containing the keys
        'function' and 'spp_string'.
        Raises errors if invalid options are found.

            ValueError: If self.comparison_operator_string does not match a
                        defined method.
            self.comparison_operator = self.comparison_operator_dict[
        except KeyError:
            msg = (f'String "{self.comparison_operator_string}" '
                   'does not match any known comparison_operator method')
            raise ValueError(msg)

    def process(self, input_cube):
        """Convert each point to a truth value based on provided threshold
        values. The truth value may or may not be fuzzy depending upon if
        fuzzy_bounds are supplied.  If the plugin has a "threshold_units"
        member, this is used to convert both thresholds and fuzzy bounds into
        the units of the input cube.

            input_cube (iris.cube.Cube):
                Cube to threshold. The code is dimension-agnostic.

                Cube after a threshold has been applied. The data within this
                cube will contain values between 0 and 1 to indicate whether
                a given threshold has been exceeded or not.

                The cube meta-data will contain:
                * Input_cube name prepended with
                probability_of_X_above(or below)_threshold (where X is
                the diagnostic under consideration)
                * Threshold dimension coordinate with same units as input_cube
                * Threshold attribute (above or below threshold)
                * Cube units set to (1).

            ValueError: if a np.nan value is detected within the input cube.

        # Record input cube data type to ensure consistent output, though
        # integer data must become float to enable fuzzy thresholding.
        input_cube_dtype = input_cube.dtype
        if input_cube.dtype.kind == 'i':
            input_cube_dtype = np.float32

        thresholded_cubes = iris.cube.CubeList()
        if np.isnan(input_cube.data).any():
            raise ValueError("Error: NaN detected in input cube data")

        # if necessary, convert thresholds and fuzzy bounds into cube units
        if self.threshold_units is not None:
            self.thresholds = [self.threshold_units.convert(threshold,
                               for threshold in self.thresholds]
            self.fuzzy_bounds = [tuple([
                self.threshold_units.convert(threshold, input_cube.units)
                for threshold in bounds]) for bounds in self.fuzzy_bounds]

        # set name of threshold coordinate to match input diagnostic
        self.threshold_coord_name = input_cube.name()

        # apply fuzzy thresholding
        for threshold, bounds in zip(self.thresholds, self.fuzzy_bounds):
            cube = input_cube.copy()
            # if upper and lower bounds are equal, set a deterministic 0/1
            # probability based on exceedance of the threshold
            if bounds[0] == bounds[1]:
                truth_value = self.comparison_operator['function'](
                    cube.data, threshold)
            # otherwise, scale exceedance probabilities linearly between 0/1
            # at the min/max fuzzy bounds and 0.5 at the threshold value
                truth_value = np.where(
                    cube.data < threshold,
                            data_range=(bounds[0], threshold),
                            scale_range=(0., 0.5),
                            data_range=(threshold, bounds[1]),
                            scale_range=(0.5, 1.),
                # if requirement is for probabilities below threshold (rather
                # than above), invert the exceedance probability
                if 'below' in self.comparison_operator['spp_string']:
                    truth_value = 1. - truth_value
            truth_value = truth_value.astype(input_cube_dtype)

            cube.data = truth_value
            # Overwrite masked values that have been thresholded
            # with the un-thresholded values from the input cube.
            if np.ma.is_masked(cube.data):
                cube.data[input_cube.data.mask] = (
            cube = self._add_threshold_coord(cube, threshold)

        cube, = thresholded_cubes.concatenate()

        cube.units = Unit(1)

        cube = enforce_coordinate_ordering(
            cube, ["realization", "percentile"])

        return cube
Example #20
    def create_data_object(self, filenames, variable):
        logging.debug("Creating data object for variable " + variable)

        variables = [("ER2_IMU/Longitude", "x"), ("ER2_IMU/Latitude", "y"),
                     ("ER2_IMU/gps_time", "t"), ("State/Pressure", "p"),
                     ("DataProducts/Altitude", "z"), ("header/date", ""),
                     (variable, '')]

        logging.info("Listing coordinates: " + str(variables))

        var_data = read_many_files_individually(filenames,
                                                [v[0] for v in variables])

        date_times = []
        for times, date in zip(var_data['ER2_IMU/gps_time'],
            # Date is stored as an array (of length 92??) of floats with format: yyyymmdd
            date_str = str(int(date[0]))
            t_unit = Unit('hours since {}-{}-{} 00:00:00'.format(
                date_str[0:4], date_str[4:6], date_str[6:8]))
                t_unit.convert(get_data(times), cis_standard_time_unit))

        # time_data = utils.concatenate([get_data(i) for i in var_data['ER2_IMU/gps_time']])
        # date_str = str(int(var_data['header/date'][0][0]))
        # Flatten the data by taking the 0th column of the transpose
        time_coord = DimCoord(utils.concatenate(date_times).T[0],

        # TODO This won't work for multiple files since the altitude bins are different for each flight...
        alt_data = utils.concatenate(
            [get_data(i) for i in var_data["DataProducts/Altitude"]])
        alt_coord = DimCoord(alt_data[0], standard_name='altitude', units='m')

        pres_data = utils.concatenate(
            [get_data(i) for i in var_data["State/Pressure"]])
        pres_coord = AuxCoord(pres_data,
        # Fix the air-pressure units

        lat_data = utils.concatenate(
            [get_data(i) for i in var_data['ER2_IMU/Latitude']])
        lat_coord = AuxCoord(lat_data.T[0], standard_name='latitude')

        lon_data = utils.concatenate(
            [get_data(i) for i in var_data['ER2_IMU/Longitude']])
        lon_coord = AuxCoord(lon_data.T[0], standard_name='longitude')

        data = utils.concatenate([get_data(i) for i in var_data[variable]])
        metadata = get_metadata(var_data[variable][0])

        cube = Cube(np.ma.masked_invalid(data),
                    dim_coords_and_dims=[(alt_coord, 1), (time_coord, 0)],
                    aux_coords_and_dims=[(lat_coord, (0, )),
                                         (lon_coord, (0, )),
                                         (pres_coord, (0, 1))])
        gd = GriddedData.make_from_cube(cube)
        return gd
Example #21
 def test_non_gregorian_calendar_conversion_dtype(self):
     data = np.arange(4, dtype=np.float32)
     u1 = Unit('hours since 2000-01-01 00:00:00', calendar='360_day')
     u2 = Unit('hours since 2000-01-02 00:00:00', calendar='360_day')
     result = u1.convert(data, u2)
     self.assertEqual(result.dtype, np.float32)
Example #23
def extract_vars(nc_file,
    Return a :class:`xarray.DataArray` object for the desired variable in a single NetCDF file object.

    Adapted from wrf.util

    :param ncfile: (:class:`netCDF4.Dataset`, :class:`Nio.NioFile`): An open netCDF file
    :param var_name: (:obj:`str`) The variable name.
    :param time_idx: (:obj:`int` or :data:`wrf.ALL_TIMES`, optional): The desired time index. This value can be a positive integer, negative integer,
            or None to return all times in the file or sequence. The default is None (return all idxs).
    :param  target_units: (:obj:`str`) If not None, attempt to convert units to this format using cf_units.
    :returns: :class:`xarray.DataArray`:  An array object that contains metadata.

    multitime = is_multi_time(time_idx)
    time_idx_or_slice = time_idx if not multitime else slice(None)
        var = nc_file.variables[var_name]
    except KeyError:
        raise ValueError('No variable named {} available in {}'.format(
            var_name, nc_file.filepath()))  # TODO: refactor to ValidationError
    if len(var.shape) > 1:
        data = var[time_idx_or_slice, :]
        data = var[time_idx_or_slice]

    if target_units is not None and hasattr(var, 'units'):
            u = Unit(var.units)
            data = u.convert(data, target_units)
        except ValueError:
            logger.warning('Could not parse units "{}" for variable {}'.format(
                var.units, var_name))

    # Want to preserve the time dimension
    if not multitime:
        if len(var.shape) > 1:
            data = data[np.newaxis, :]
            data = data[np.newaxis]

    attrs = OrderedDict()
    for dkey, val in var.__dict__.items():
        # scipy.io adds these but don't want them
        if dkey in ("data", "_shape", "_size", "_typecode", "_attributes",
                    "maskandscale", "dimensions"):

        _dkey = dkey if isinstance(dkey, str) else dkey.decode()
        attrs[_dkey] = val

    dimnames = var.dimensions[-data.ndim:]

    coords = OrderedDict()

    if dimnames[
            0] == time_variable:  # TODO needs to work around this step for ozone climatology
        t = extract_times(nc_file, time_idx, time_variable)
        if not multitime:
            t = [t]
        coords[dimnames[0]] = t

    if len(dimnames) == 2 and dimnames[1] == vertical_variable:
        t = extract_vars(nc_file,
        coords['height'] = t

    data_array = DataArray(data,

    return data_array