def process(self, cubes, coordinates=None): """ Aggregate the input reliability calibration table cubes and return the result. Args: cubes (list or iris.cube.CubeList): The cube or cubes containing the reliability calibration tables to aggregate. coordinates (list or None): A list of coordinates over which to aggregate the reliability calibration table using summation. If the argument is None and a single cube is provided, this cube will be returned unchanged. """ coordinates = [] if coordinates is None else coordinates try: (cube, ) = cubes except ValueError: cubes = iris.cube.CubeList(cubes) self._check_frt_coord(cubes) cube = cubes.merge_cube() coordinates.append("forecast_reference_time") else: if not coordinates: return cube result = collapsed(cube, coordinates, iris.analysis.SUM) frt = create_unified_frt_coord(cube.coord("forecast_reference_time")) result.replace_coord(frt) return result
def _calculate_location_parameter_from_mean(self, optimised_coeffs): """ Function to calculate the location parameter when the ensemble mean at each grid point is the predictor. Further information is available in the :mod:`module level docstring \ <improver.calibration.ensemble_calibration>`. Args: optimised_coeffs (dict): A dictionary containing the calibration coefficient names as keys with their corresponding values. Returns: numpy.ndarray: Location parameter calculated using the ensemble mean as the predictor. """ forecast_predictor = collapsed(self.current_forecast, "realization", iris.analysis.MEAN) # Calculate location parameter = a + b*X, where X is the # raw ensemble mean. In this case, b = beta. location_parameter = ( optimised_coeffs["alpha"] + optimised_coeffs["beta"] * forecast_predictor.data).astype( np.float32) return location_parameter
def collapse_mask_coord(self, cube: Cube) -> Cube: """ Collapse the chosen coordinate with the available weights. The result of the neighbourhood processing is taken into account to renormalize any weights corresponding to a NaN in the result from neighbourhooding. In this case the weights are re-normalized so that we do not lose probability. Args: cube: Cube containing the array to which the square neighbourhood with a mask has been applied. Dimensions self.coord_for_masking, y and x. Returns: Cube containing the weighted mean from neighbourhood after collapsing the chosen coordinate. """ # Mask out any NaNs in the neighbourhood data so that Iris ignores # them when calculating the weighted mean. cube.data = ma.masked_invalid(cube.data, copy=False) # Collapse the coord_for_masking. Renormalization of the weights happen # within the underlying call to a numpy function within the Iris method. result = collapsed( cube, self.coord_for_masking, iris.analysis.MEAN, weights=self.collapse_weights.data, ) # Set masked invalid data points back to np.nans if np.ma.is_masked(result.data): result.data.data[result.data.mask] = np.nan # Remove references to self.coord_masked in the result cube. result.remove_coord(self.coord_for_masking) return result
def weighted_mean(self, cube, weights): """ Blend data using a weighted mean using the weights provided. Args: cube (iris.cube.Cube): The cube which is being blended over self.blend_coord. weights (iris.cube.Cube or None): Cube of blending weights or None. Returns: iris.cube.Cube: The cube with values blended over self.blend_coord, with suitable weightings applied. """ weights_array = self.non_percentile_weights(cube, weights) # Calculate the weighted average. cube_new = collapsed(cube, self.blend_coord, iris.analysis.MEAN, weights=weights_array) cube_new.data = cube_new.data.astype(np.float32) return cube_new
def percentile_weighted_mean(self, cube, weights): """ Blend percentile data using the weights provided. Args: cube (iris.cube.Cube): The cube which is being blended over self.blend_coord. Assumes self.blend_coord and percentile are leading coordinates (enforced in process). weights (iris.cube.Cube): Cube of blending weights. Returns: iris.cube.Cube: The cube with percentile values blended over self.blend_coord, with suitable weightings applied. """ non_perc_slice = next(cube.slices_over(PERC_COORD)) weights_array = self.get_weights_array(non_perc_slice, weights) weights_array = self._normalise_weights(weights_array) # Set up aggregator PERCENTILE_BLEND = Aggregator( "mean", # Use CF-compliant cell method. PercentileBlendingAggregator.aggregate, ) cube_new = collapsed( cube, self.blend_coord, PERCENTILE_BLEND, percentiles=cube.coord(PERC_COORD).points, arr_weights=weights_array, ) return cube_new
def test_two_methods(self): """Test that a cube keeps its original cell method but another isn't added. """ cube = self.cube method = iris.coords.CellMethod("test") cube.add_cell_method(method) result = collapsed(cube, "realization", iris.analysis.MEAN) self.assertTupleEqual(result.cell_methods, (method,)) self.assertTrue((result.data == self.expected_data).all())
def _calculate_time_average(wind_cubes, time_coord): """Average input cubelist over time""" cube = wind_cubes.merge_cube() try: mean = collapsed(cube, "time", iris.analysis.MEAN) except CoordinateCollapseError: # collapse will fail if there is only one time point return cube mean.coord("time").points = time_coord.points mean.coord("time").units = time_coord.units return mean
def test_two_coords(self): """Test behaviour collapsing over 2 coordinates, including not escalating precision when collapsing a float coordinate (latitude)""" expected_data = self.cube.collapsed( ["realization", "latitude"], iris.analysis.MEAN ).data result = collapsed(self.cube, ["realization", "latitude"], iris.analysis.MEAN) self.assertTrue((result.data == expected_data).all()) self.assertEqual( result.coord("latitude").dtype, self.cube.coord("latitude").dtype )
def percentile_weighted_mean(self, cube, weights, perc_coord): """ Blend percentile data using the weights provided. Args: cube (iris.cube.Cube): The cube which is being blended over self.blend_coord. weights (iris.cube.Cube): Cube of blending weights. perc_coord (iris.coords.DimCoord): The percentile coordinate for this cube. Returns: iris.cube.Cube: The cube with percentile values blended over self.blend_coord, with suitable weightings applied. """ percentiles = np.array(perc_coord.points, dtype=np.float32) (perc_dim, ) = cube.coord_dims(perc_coord.name()) # The iris.analysis.Aggregator moves the coordinate being # collapsed to index=-1 in initialisation, before the # aggregation method is called. This reduces by 1 the index # of all coordinates with an initial index higher than the # collapsing coordinate. As we need to know the index of # the percentile coordinate at a later step, if it will be # changed by this process, we adjust our record (perc_dim) # here. if cube.coord_dims(self.blend_coord)[0] < perc_dim: perc_dim -= 1 weights_array = self.percentile_weights(cube, weights, perc_coord) # Set up aggregator PERCENTILE_BLEND = Aggregator( "mean", # Use CF-compliant cell method. PercentileBlendingAggregator.aggregate, ) cube_new = collapsed( cube, self.blend_coord, PERCENTILE_BLEND, arr_percent=percentiles, arr_weights=weights_array, perc_dim=perc_dim, ) cube_new.data = cube_new.data.astype(np.float32) # Ensure collapsed coordinates do not promote themselves # to float64. for coord in cube_new.coords(): if coord.points.dtype == np.float64: coord.points = coord.points.astype(np.float32) return cube_new
def process(self, cube): """ Collapse the chosen coordinates with the available weights. The result of the neighbourhood processing is taken into account to renormalize any weights corresponding to a NaN in the result from neighbourhooding. In this case the weights are re-normalized so that we do not lose probability. Args: cube (iris.cube.Cube): Cube containing the array to which the square neighbourhood with a mask has been applied. Returns: iris.cube.Cube: Cube containing the weighted mean from neighbourhood after collapsing the chosen coordinate. """ # Mask out any NaNs in the neighbourhood data so that Iris ignores # them when calculating the weighted mean. cube.data = ma.masked_invalid(cube.data, copy=False) yname = cube.coord(axis="y").name() xname = cube.coord(axis="x").name() renormalize = True if self.weights.shape == cube.shape: weights = self.renormalize_weights(cube) renormalize = False # Loop over any extra dimensions cubelist = iris.cube.CubeList([]) for slice_3d in cube.slices([self.coord_masked, yname, xname]): if renormalize: weights = self.renormalize_weights(slice_3d) renormalize = False collapsed_slice = collapsed(slice_3d, self.coord_masked, iris.analysis.MEAN, weights=weights) cubelist.append(collapsed_slice) result = cubelist.merge_cube() # Promote any scalar coordinates with one point back to dimension # coordinates if they were dimensions in the input cube. # Take a slice over the coordinate we are collapsing as we do not # expect this in the output cube. first_slice = next(cube.slices_over([self.coord_masked])) result = check_cube_coordinates(first_slice, result) # Remove references to self.coord_masked in the result cube. result.remove_coord(self.coord_masked) return result
def _group_timezones(self, timezone_mask): """ If the ancillary will be used with data that is not available at hourly intervals, the masks can be grouped to match the intervals of the data. For example, 3-hourly interval data might group UTC offsets: {12: [-12, -11], 9: [-10, -9, -8], 6: [-7, -6, -5], etc.} The dictionary specifying the groupings has a key, which provides the UTC offset to be used for the group. The list contains the UTC offsets that should be grouped together. The grouped UTC_offset cubes are collapsed together over the UTC_offset coordinate using iris.analysis.MIN. This means all the unmasked (0) points in each cube are preserved as the dimension is collapsed, enlarging the unmasked region to include all unmasked points from all the cubes. Args: timezone_mask (iris.cube.CubeList): A cube list containing a mask cube for each UTC offset that has been found necessary. Returns: iris.cube.CubeList: A cube list containing cubes created by blending together different UTC offset cubes to create larger masked regions. """ grouped_timezone_masks = iris.cube.CubeList() for offset, group in self.groupings.items(): # If offset key comes from a json file it will be a string offset = int(offset) constraint = iris.Constraint( UTC_offset=lambda cell: group[0] <= cell <= group[-1] ) subset = timezone_mask.extract(constraint) if not subset: continue subset = subset.merge_cube() if subset.coord("UTC_offset").shape[0] > 1: subset = collapsed(subset, "UTC_offset", iris.analysis.MIN) subset.coord("UTC_offset").points = [offset] else: (point,) = subset.coord("UTC_offset").points subset.coord("UTC_offset").points = [offset] subset.coord("UTC_offset").bounds = [ min(point, offset), max(point, offset), ] grouped_timezone_masks.append(subset) return grouped_timezone_masks
def process(self, cube): """ Create a cube containing the percentiles as a new dimension. What's generated by default is: * 15 percentiles - (0%, 5%, 10%, 20%, 25%, 30%, 40%, 50%, 60%, 70%, 75%, 80%, 90%, 95%, 100%) Args: cube (iris.cube.Cube): Given the collapse coordinate, convert the set of values along that coordinate into a PDF and extract percentiles. Returns: iris.cube.Cube: A single merged cube of all the cubes produced by each percentile collapse. """ # Store data type and enforce the same type on return. data_type = cube.dtype # Test that collapse coords are present in cube before proceeding. n_collapse_coords = len(self.collapse_coord) n_valid_coords = sum([ test_coord == coord.name() for coord in cube.coords() for test_coord in self.collapse_coord ]) # Rename the percentile coordinate to "percentile" and also # makes sure that the associated unit is %. if n_valid_coords == n_collapse_coords: result = collapsed( cube, self.collapse_coord, iris.analysis.PERCENTILE, percent=self.percentiles, fast_percentile_method=self.fast_percentile_method, ) result.data = result.data.astype(data_type) for coord in self.collapse_coord: result.remove_coord(coord) percentile_coord = find_percentile_coordinate(result) result.coord(percentile_coord).rename("percentile") result.coord(percentile_coord).units = "%" return result raise CoordinateNotFoundError( "Coordinate '{}' not found in cube passed to {}.".format( self.collapse_coord, self.__class__.__name__))
def normalised_masked_weights(weights_cube, blend_coord): """ Normalise spatial weights along dimension associated with the blend_coord. If for a given point the sum of the weights along the blend_coord is zero then the returned normalised weight for that point will also be zero. This correspsonds to the case where there is missing data for that point for all slices along the blend_coord. Args: weights_cube (iris.cube.Cube): A cube with spatial weights and any other leading dimension. This cube must have a coordinate matching the name given by blend_coord which corresponds to the dimension along which the normalisation is needed. blend_coord (str): The string that will match to a coordinate in both input cube. This coordinate corresponds to the dimension along which the normalisation is needed. Returns: iris.cube.Cube: A cube with the same dimensions as the input cube, but with the weights normalised along the blend_coord dimension. The blend_coord will be the leading dimension on the output cube. """ summed_weights = collapsed(weights_cube, blend_coord, iris.analysis.SUM) result = iris.cube.CubeList() # Slice over blend_coord so the dimensions match. for weight_slice in weights_cube.slices_over(blend_coord): # Only divide where the sum of weights are positive. Setting # the out keyword args sets the default value for where # the sum of the weights are zero. normalised_data = np.divide( weight_slice.data, summed_weights.data, out=np.zeros_like(weight_slice.data), where=(summed_weights.data > 0), ) result.append(weight_slice.copy(data=normalised_data)) return result.merge_cube()
def weighted_mean(self, cube, weights): """ Blend data using a weighted mean using the weights provided. Args: cube (iris.cube.Cube): The cube which is being blended over self.blend_coord. weights (iris.cube.Cube or None): Cube of blending weights or None. Returns: iris.cube.Cube: The cube with values blended over self.blend_coord, with suitable weightings applied. """ weights_array = self.non_percentile_weights(cube, weights) (collapse_dim,) = cube.coord_dims(self.blend_coord) if collapse_dim == 0: slice_dim = 1 else: slice_dim = 0 allow_slicing = cube.ndim > 3 if allow_slicing: cube_slices = cube.slices_over(slice_dim) else: cube_slices = [cube] weights_slices = ( np.moveaxis(weights_array, slice_dim, 0) if allow_slicing else [weights_array] ) result_slices = iris.cube.CubeList( collapsed(c_slice, self.blend_coord, iris.analysis.MEAN, weights=w_slice) for c_slice, w_slice in zip(cube_slices, weights_slices) ) result = result_slices.merge_cube() if allow_slicing else result_slices[0] return result
def weighted_mean(self, cube: Cube, weights: Optional[Cube]) -> Cube: """ Blend data using a weighted mean using the weights provided. Args: cube: The cube which is being blended over self.blend_coord. Assumes leading blend dimension (enforced in process) weights: Cube of blending weights or None. Returns: The cube with values blended over self.blend_coord, with suitable weightings applied. """ weights_array = self.get_weights_array(cube, weights) slice_dim = 1 allow_slicing = cube.ndim > 3 if allow_slicing: cube_slices = cube.slices_over(slice_dim) else: cube_slices = [cube] weights_slices = ( np.moveaxis(weights_array, slice_dim, 0) if allow_slicing else [weights_array] ) result_slices = iris.cube.CubeList( collapsed(c_slice, self.blend_coord, iris.analysis.MEAN, weights=w_slice) for c_slice, w_slice in zip(cube_slices, weights_slices) ) result = result_slices.merge_cube() if allow_slicing else result_slices[0] return result
def process(self, historic_forecast, truth, landsea_mask=None): """ Using Nonhomogeneous Gaussian Regression/Ensemble Model Output Statistics, estimate the required coefficients from historical forecasts. The main contents of this method is: 1. Check that the predictor is valid. 2. Filter the historic forecasts and truth to ensure that these inputs match in validity time. 3. Apply unit conversion to ensure that the historic forecasts and truth have the desired units for calibration. 4. Calculate the variance of the historic forecasts. If the chosen predictor is the mean, also calculate the mean of the historic forecasts. 5. If a land-sea mask is provided then mask out sea points in the truth and predictor from the historic forecasts. 6. Calculate initial guess at coefficient values by performing a linear regression, if requested, otherwise default values are used. 7. Perform minimisation. Args: historic_forecast (iris.cube.Cube): The cube containing the historical forecasts used for calibration. truth (iris.cube.Cube): The cube containing the truth used for calibration. landsea_mask (iris.cube.Cube): The optional cube containing a land-sea mask. If provided, only land points are used to calculate the coefficients. Within the land-sea mask cube land points should be specified as ones, and sea points as zeros. Returns: iris.cube.Cube: Cube containing the coefficients estimated using EMOS. The cube contains a coefficient_index dimension coordinate and a coefficient_name auxiliary coordinate. Raises: ValueError: If either the historic_forecast or truth cubes were not passed in. ValueError: If the units of the historic and truth cubes do not match. """ if not (historic_forecast and truth): raise ValueError("historic_forecast and truth cubes must be " "provided.") # Ensure predictor is valid. check_predictor(self.predictor) historic_forecast, truth = ( filter_non_matching_cubes(historic_forecast, truth)) # Make sure inputs have the same units. if self.desired_units: historic_forecast.convert_units(self.desired_units) truth.convert_units(self.desired_units) if historic_forecast.units != truth.units: msg = ("The historic forecast units of {} do not match " "the truth units {}. These units must match, so that " "the coefficients can be estimated.") raise ValueError(msg) if self.predictor.lower() == "mean": no_of_realizations = None forecast_predictor = collapsed( historic_forecast, "realization", iris.analysis.MEAN) elif self.predictor.lower() == "realizations": no_of_realizations = len( historic_forecast.coord("realization").points) forecast_predictor = historic_forecast forecast_var = collapsed( historic_forecast, "realization", iris.analysis.VARIANCE) # If a landsea_mask is provided mask out the sea points if landsea_mask: self.mask_cube(forecast_predictor, landsea_mask) self.mask_cube(forecast_var, landsea_mask) self.mask_cube(truth, landsea_mask) # Computing initial guess for EMOS coefficients initial_guess = self.compute_initial_guess( truth, forecast_predictor, self.predictor, self.ESTIMATE_COEFFICIENTS_FROM_LINEAR_MODEL_FLAG, no_of_realizations=no_of_realizations) # Calculate coefficients if there are no nans in the initial guess. if np.any(np.isnan(initial_guess)): optimised_coeffs = initial_guess else: optimised_coeffs = ( self.minimiser( initial_guess, forecast_predictor, truth, forecast_var, self.predictor, self.distribution.lower())) coefficients_cube = ( self.create_coefficients_cube(optimised_coeffs, historic_forecast)) return coefficients_cube
def test_single_method(self): """Test that a collapsed cube is returned with no cell method added""" result = collapsed(self.cube, "realization", iris.analysis.MEAN) self.assertTupleEqual(result.cell_methods, ()) self.assertTrue((result.data == self.expected_data).all())