def test_site_list_with_unique_id_coordinate_missing_name(self): """Test an error is raised if a unique_id_coordinate is provided but no name for the resulting coordinate.""" data = np.array([1.6, 1.3, 1.4, 1.1]) msg = "A unique_site_id_key must be provided" with self.assertRaisesRegex(ValueError, msg): build_spotdata_cube( data, *self.args, unique_site_id=self.unique_site_id, )
def test_renaming_to_set_standard_name(self): """Test that CF standard names are set as such in the returned cube, whilst non-standard names remain as the long_name.""" standard_name_cube = build_spotdata_cube( 1.6, 'air_temperature', 'degC', 10., 59.5, 1.3, '03854') non_standard_name_cube = build_spotdata_cube( 1.6, 'toast_temperature', 'degC', 10., 59.5, 1.3, '03854') self.assertEqual(standard_name_cube.standard_name, 'air_temperature') self.assertEqual(standard_name_cube.long_name, None) self.assertEqual(non_standard_name_cube.standard_name, None) self.assertEqual(non_standard_name_cube.long_name, 'toast_temperature')
def test_3d_spot_cube_with_unequal_length_coordinates(self): """Test error is raised if coordinates lengths do not match data dimensions.""" data = np.ones((4, 2, 2), dtype=np.float32) msg = "Unequal lengths" with self.assertRaisesRegex(ValueError, msg): build_spotdata_cube( data, 'air_temperature', 'degC', self.altitude, self.latitude, self.longitude, self.wmo_id, neighbour_methods=self.neighbour_methods, grid_attributes=self.grid_attributes)
def test_3d_spot_cube_with_unequal_length_coordinates(self): """Test error is raised if coordinates lengths do not match data dimensions.""" data = np.ones((4, 2, 2), dtype=np.float32) msg = "Unequal lengths" with self.assertRaisesRegex(ValueError, msg): build_spotdata_cube( data, *self.args, neighbour_methods=self.neighbour_methods, grid_attributes=self.grid_attributes, )
def build_diagnostic_cube(neighbour_cube, diagnostic_cube, spot_values): """ Builds a spot data cube containing the extracted diagnostic values. Args: neighbour_cube (iris.cube.Cube): This cube is needed as a source for information about the spot sites which needs to be included in the spot diagnostic cube. diagnostic_cube (iris.cube.Cube): The cube is needed to provide the name and units of the diagnostic that is being processed. spot_values (numpy.ndarray): An array containing the diagnostic values extracted for the required spot sites. Returns: iris.cube.Cube: A spot data cube containing the extracted diagnostic data. """ neighbour_cube = build_spotdata_cube( spot_values, diagnostic_cube.name(), diagnostic_cube.units, neighbour_cube.coord("altitude").points, neighbour_cube.coord(axis="y").points, neighbour_cube.coord(axis="x").points, neighbour_cube.coord("wmo_id").points, ) return neighbour_cube
def setUp(self): """Set-up truth cubes.""" super().setUp() # Create a cube of the format expected based on the input dataframe. cubes = iris.cube.CubeList([]) for time, data_slice in zip([self.time1, self.time2, self.time3], _chunker(self.truth_data, 3)): time_coord = iris.coords.DimCoord( np.array(time.timestamp(), dtype=TIME_COORDS["time"].dtype), "time", bounds=[ np.array(t.timestamp(), dtype=TIME_COORDS["time"].dtype) for t in [time - self.period, time] ], units=TIME_COORDS["time"].units, ) cubes.append( build_spotdata_cube( data_slice.astype(np.float32), self.cf_name, self.units, np.array(self.altitudes, dtype=np.float32), np.array(self.latitudes, dtype=np.float32), np.array(self.longitudes, dtype=np.float32), wmo_id=self.wmo_ids, scalar_coords=[time_coord, self.height_coord], )) self.expected_period_truth = cubes.merge_cube() self.expected_instantaneous_truth = self.expected_period_truth.copy() self.expected_instantaneous_truth.coord("time").bounds = None
def spot_fixture(): """Spot temperature cube""" alts = np.array([15, 82, 0, 4, 15, 269], dtype=np.float32) lats = np.array([60.75, 60.13, 58.95, 57.37, 58.22, 57.72], dtype=np.float32) lons = np.array([-0.85, -1.18, -2.9, -7.40, -6.32, -4.90], dtype=np.float32) wmo_ids = np.array(["3002", "3005", "3017", "3023", "3026", "3031"]) spot_cube = build_spotdata_cube( np.arange(6).astype(np.float32), "air_temperature", "degC", alts, lats, lons, wmo_ids, ) spot_cube.add_aux_coord( iris.coords.AuxCoord([50], long_name="percentile", units="%")) spot_cube.attributes = { "source": "IMPROVER", "institution": "Met Office", "title": "IMPROVER Post-Processed Multi-Model Blend UK Spot Values", "mosg__model_configuration": "uk_det uk_ens", } (time, _), (blend_time, _), (_, _) = construct_scalar_time_coords( time=datetime(2021, 2, 3, 14), time_bounds=None, frt=datetime(2021, 2, 3, 10)) blend_time.rename("blend_time") spot_cube.add_aux_coord(time) spot_cube.add_aux_coord(blend_time) return spot_cube
def test_scalar_coords(self): """Test additional scalar coordinates""" time_origin = "hours since 1970-01-01 00:00:00" calendar = "gregorian" tunit = Unit(time_origin, calendar) time_coord = AuxCoord(419811., "time", units=tunit) frt_coord = AuxCoord(419805., "forecast_reference_time", units=tunit) fp_coord = AuxCoord(6, "forecast_period", units="hours") data = np.ones((4, 2), dtype=np.float32) result = build_spotdata_cube( data, 'air_temperature', 'degC', self.altitude, self.latitude, self.longitude, self.wmo_id, scalar_coords=[time_coord, frt_coord, fp_coord], neighbour_methods=self.neighbour_methods) self.assertAlmostEqual(result.coord('time').points[0], 419811.) self.assertAlmostEqual( result.coord('forecast_reference_time').points[0], 419805.) self.assertEqual(result.coord('forecast_period').points[0], 6)
def setUp(self): """Set up cubes for use in testing.""" data = np.ones(9).reshape(3, 3).astype(np.float32) self.reference_cube = set_up_variable_cube(data, spatial_grid="equalarea") self.cube1 = self.reference_cube.copy() self.cube2 = self.reference_cube.copy() self.unmatched_cube = set_up_variable_cube(data, spatial_grid="latlon") self.diagnostic_cube_hash = create_coordinate_hash(self.reference_cube) neighbours = np.array([[[0.0], [0.0], [0.0]]]) altitudes = np.array([0]) latitudes = np.array([0]) longitudes = np.array([0]) wmo_ids = np.array([0]) grid_attributes = ["x_index", "y_index", "vertical_displacement"] neighbour_methods = ["nearest"] self.neighbour_cube = build_spotdata_cube( neighbours, "grid_neighbours", 1, altitudes, latitudes, longitudes, wmo_ids, grid_attributes=grid_attributes, neighbour_methods=neighbour_methods, ) self.neighbour_cube.attributes[ "model_grid_hash"] = self.diagnostic_cube_hash
def test_scalar_coords(self): """Test additional scalar coordinates""" [(time_coord, _), (frt_coord, _), (fp_coord, _) ] = construct_scalar_time_coords(datetime(2015, 11, 23, 4, 30), None, datetime(2015, 11, 22, 22, 30)) data = np.ones((2, 4), dtype=np.float32) result = build_spotdata_cube( data, "air_temperature", "degC", self.altitude, self.latitude, self.longitude, self.wmo_id, scalar_coords=[time_coord, frt_coord, fp_coord], neighbour_methods=self.neighbour_methods, ) # pylint: disable=unsubscriptable-object self.assertEqual(result.coord("time").points[0], time_coord.points[0]) self.assertEqual( result.coord("forecast_reference_time").points[0], frt_coord.points[0]) self.assertEqual( result.coord("forecast_period").points[0], fp_coord.points[0])
def build_diagnostic_cube( neighbour_cube: Cube, diagnostic_cube: Cube, spot_values: ndarray, additional_dims: Optional[List[DimCoord]] = None, scalar_coords: Optional[List[AuxCoord]] = None, auxiliary_coords: Optional[List[AuxCoord]] = None, unique_site_id: Optional[Union[List[str], ndarray]] = None, unique_site_id_key: Optional[str] = None, ) -> Cube: """ Builds a spot data cube containing the extracted diagnostic values. Args: neighbour_cube: This cube is needed as a source for information about the spot sites which needs to be included in the spot diagnostic cube. diagnostic_cube: The cube is needed to provide the name and units of the diagnostic that is being processed. spot_values: An array containing the diagnostic values extracted for the required spot sites. additional_dims: Optional list containing iris.coord.DimCoords with any leading dimensions required before spot data. scalar_coords: Optional list containing iris.coord.AuxCoords with all scalar coordinates relevant for the spot sites. auxiliary_coords: Optional list containing iris.coords.AuxCoords which are non-scalar. unique_site_id: Optional list of 8-digit unique site identifiers. unique_site_id_key: String to name the unique_site_id coordinate. Required if unique_site_id is in use. Returns: A spot data cube containing the extracted diagnostic data. """ spot_diagnostic_cube = build_spotdata_cube( spot_values, diagnostic_cube.name(), diagnostic_cube.units, neighbour_cube.coord("altitude").points, neighbour_cube.coord(axis="y").points, neighbour_cube.coord(axis="x").points, neighbour_cube.coord("wmo_id").points, unique_site_id=unique_site_id, unique_site_id_key=unique_site_id_key, scalar_coords=scalar_coords, auxiliary_coords=auxiliary_coords, additional_dims=additional_dims, ) return spot_diagnostic_cube
def truth_dataframe_to_cube( df: DataFrame, training_dates: DatetimeIndex, ) -> Cube: """Convert a truth DataFrame into an iris Cube. Args: df: DataFrame expected to contain the following columns: ob_value, time, wmo_id, diagnostic, latitude, longitude, altitude, cf_name, height, period and units. Any other columns are ignored. training_dates: Datetimes spanning the training period. Returns: Cube containing the truths from the training period. """ cubelist = CubeList() for adate in training_dates: time_df = df.loc[(df["time"] == adate)] time_df = _preprocess_temporal_columns(time_df) if time_df.empty: continue # The following columns are expected to contain one unique value # per column. _unique_check(time_df, "diagnostic") if time_df["period"].isna().all(): time_bounds = None else: period = time_df["period"].values[0] time_bounds = [adate - period, adate] time_coord = _define_time_coord(adate, time_bounds) height_coord = _define_height_coord(time_df["height"].values[0]) cube = build_spotdata_cube( time_df["ob_value"].astype(np.float32), time_df["cf_name"].values[0], time_df["units"].values[0], time_df["altitude"].astype(np.float32), time_df["latitude"].astype(np.float32), time_df["longitude"].astype(np.float32), time_df["wmo_id"].values.astype("U5"), scalar_coords=[time_coord, height_coord], ) cubelist.append(cube) if not cubelist: return return cubelist.merge_cube()
def test_3d_spot_cube(self): """Test output with two extra dimensions""" data = np.ones((4, 2, 3), dtype=np.float32) result = build_spotdata_cube( data, 'air_temperature', 'degC', self.altitude, self.latitude, self.longitude, self.wmo_id, neighbour_methods=self.neighbour_methods, grid_attributes=self.grid_attributes) self.assertArrayAlmostEqual(result.data, data) self.assertEqual(result.coord_dims('neighbour_selection_method')[0], 1) self.assertEqual(result.coord_dims('grid_attributes')[0], 2)
def test_3d_spot_cube(self): """Test output with two extra dimensions""" data = np.ones((2, 3, 4), dtype=np.float32) result = build_spotdata_cube( data, *self.args, neighbour_methods=self.neighbour_methods, grid_attributes=self.grid_attributes, ) self.assertArrayAlmostEqual(result.data, data) self.assertEqual(result.coord_dims("neighbour_selection_method")[0], 0) self.assertEqual(result.coord_dims("grid_attributes")[0], 1)
def test_site_list(self): """Test output for a list of sites""" data = np.array([1.6, 1.3, 1.4, 1.1]) result = build_spotdata_cube(data, *self.args) self.assertArrayAlmostEqual(result.data, data) self.assertArrayAlmostEqual( result.coord("altitude").points, self.altitude) self.assertArrayAlmostEqual( result.coord("latitude").points, self.latitude) self.assertArrayAlmostEqual( result.coord("longitude").points, self.longitude) self.assertArrayEqual(result.coord("wmo_id").points, self.wmo_id)
def test_site_list(self): """Test output for a list of sites""" data = np.array([1.6, 1.3, 1.4, 1.1]) result = build_spotdata_cube(data, 'air_temperature', 'degC', self.altitude, self.latitude, self.longitude, self.wmo_id) self.assertArrayAlmostEqual(result.data, data) self.assertArrayAlmostEqual( result.coord('altitude').points, self.altitude) self.assertArrayAlmostEqual( result.coord('latitude').points, self.latitude) self.assertArrayAlmostEqual( result.coord('longitude').points, self.longitude) self.assertArrayEqual(result.coord('wmo_id').points, self.wmo_id)
def setUp(self): """Set-up truth cubes.""" super().setUp() # Create a cube of the format expected based on the input dataframe. cubes = iris.cube.CubeList([]) for time, data_slice in zip([self.time1, self.time2, self.time3], _chunker(self.truth_data, 3)): time_coord = iris.coords.DimCoord( np.array(time.timestamp(), dtype=TIME_COORDS["time"].dtype), "time", bounds=[ np.array(t.timestamp(), dtype=TIME_COORDS["time"].dtype) for t in [time - self.period, time] ], units=TIME_COORDS["time"].units, ) cubes.append( build_spotdata_cube( data_slice.astype(np.float32), self.cf_name, self.units, np.array(self.altitudes, dtype=np.float32), np.array(self.latitudes, dtype=np.float32), np.array(self.longitudes, dtype=np.float32), wmo_id=self.wmo_ids, scalar_coords=[time_coord, self.height_coord], )) self.expected_period_truth = cubes.merge_cube() self.expected_instantaneous_truth = self.expected_period_truth.copy() self.expected_instantaneous_truth.coord("time").bounds = None unique_id_coord = iris.coords.AuxCoord( [self.wmo_ids[1] + "0"], long_name="station_id", units="no_unit", attributes={"unique_site_identifier": "true"}, ) self.expected_truth_station_id = self.expected_period_truth[:, [1]].copy( ) site_id_dim = self.expected_truth_station_id.coord_dims( "spot_index")[0] self.expected_truth_station_id.add_aux_coord(unique_id_coord, site_id_dim) self.expected_truth_station_id.coord("spot_index").points = np.array( [0], dtype=np.int32)
def wxcode_series_fixture(data, cube_type, offset_reference_times: bool) -> Cube: """Generate a time series of weather code cubes for combination to create a period representative code. When offset_reference_times is set, each successive cube will have a reference time one hour older.""" time = TARGET_TIME ntimes = len(data) wxcubes = [] for i in range(ntimes): wxtime = time - timedelta(hours=i) wxbounds = [wxtime - timedelta(hours=1), wxtime] if offset_reference_times: wxfrt = time - timedelta(hours=18) - timedelta(hours=i) else: wxfrt = time - timedelta(hours=18) wxdata = np.ones((2, 2), dtype=np.int8) wxdata[0, 0] = data[i] if cube_type == "gridded": wxcubes.append( set_up_wxcube(data=wxdata, time=wxtime, time_bounds=wxbounds, frt=wxfrt)) else: time_coords = construct_scalar_time_coords(wxtime, wxbounds, wxfrt) time_coords = [crd for crd, _ in time_coords] latitudes = np.array([50, 52, 54, 56]) longitudes = np.array([-4, -2, 0, 2]) altitudes = wmo_ids = unique_site_id = np.arange(4) unique_site_id_key = "met_office_site_id" wxcubes.append( build_spotdata_cube( wxdata.flatten(), "weather_code", 1, altitudes, latitudes, longitudes, wmo_ids, unique_site_id=unique_site_id, unique_site_id_key=unique_site_id_key, scalar_coords=time_coords, )) return wxcubes
def test_neighbour_method(self): """Test output where neighbour_methods is populated""" data = np.array([[1.6, 1.7], [1.3, 1.5], [1.4, 1.4], [1.1, 1.3]]) result = build_spotdata_cube( data, 'air_temperature', 'degC', self.altitude, self.latitude, self.longitude, self.wmo_id, neighbour_methods=self.neighbour_methods) self.assertArrayAlmostEqual(result.data, data) self.assertEqual(result.coord_dims('neighbour_selection_method')[0], 1) self.assertArrayEqual( result.coord('neighbour_selection_method').points, np.arange(2)) self.assertArrayEqual( result.coord('neighbour_selection_method_name').points, self.neighbour_methods)
def test_grid_attributes(self): """Test output where grid_attributes is populated""" data = np.array([[1.6, 1.7, 1.8], [1.3, 1.5, 1.5], [1.4, 1.4, 1.5], [1.1, 1.3, 1.4]]) result = build_spotdata_cube( data, 'air_temperature', 'degC', self.altitude, self.latitude, self.longitude, self.wmo_id, grid_attributes=self.grid_attributes, grid_attributes_dim=1) self.assertArrayAlmostEqual(result.data, data) self.assertEqual(result.coord_dims('grid_attributes')[0], 1) self.assertArrayEqual( result.coord('grid_attributes').points, np.arange(3)) self.assertArrayEqual( result.coord('grid_attributes_key').points, self.grid_attributes)
def test_neighbour_method(self): """Test output where neighbour_methods is populated""" data = np.array([[1.6, 1.3, 1.4, 1.1], [1.7, 1.5, 1.4, 1.3]]) result = build_spotdata_cube(data, *self.args, neighbour_methods=self.neighbour_methods) self.assertArrayAlmostEqual(result.data, data) self.assertEqual(result.coord_dims("neighbour_selection_method")[0], 0) self.assertArrayEqual( result.coord("neighbour_selection_method").points, np.arange(2)) self.assertArrayEqual( result.coord("neighbour_selection_method_name").points, self.neighbour_methods, )
def test_site_list_with_unique_id_coordinate(self): """Test output for a list of sites with a unique_id_coordinate.""" data = np.array([1.6, 1.3, 1.4, 1.1]) result = build_spotdata_cube( data, *self.args, unique_site_id=self.unique_site_id, unique_site_id_key="met_office_site_id", ) self.assertArrayEqual( result.coord("met_office_site_id").points, self.unique_site_id) self.assertEqual( result.coord( "met_office_site_id").attributes["unique_site_identifier"], "true", )
def test_grid_attributes(self): """Test output where grid_attributes is populated""" data = np.array([[1.6, 1.3, 1.4, 1.1], [1.7, 1.5, 1.4, 1.3], [1.8, 1.5, 1.5, 1.4]]) result = build_spotdata_cube( data, *self.args, grid_attributes=self.grid_attributes, ) self.assertArrayAlmostEqual(result.data, data) self.assertEqual(result.coord_dims("grid_attributes")[0], 0) self.assertArrayEqual( result.coord("grid_attributes").points, np.arange(3)) self.assertArrayEqual( result.coord("grid_attributes_key").points, self.grid_attributes)
def test_non_scalar_coords(self): """Test additional non-scalar coordinates, specifically multi-dimensional auxiliary coordinates like time for local-timezone products that have been reshaped into 1-dimensional coordinates that should be associated with the spot-index coordinate.""" times = np.array([datetime(2015, 11, 23, i, 0) for i in range(0, 4)]) time_coord = iris.coords.AuxCoord(times, "time") data = np.ones((2, 4), dtype=np.float32) result = build_spotdata_cube( data, *self.args, auxiliary_coords=[time_coord], neighbour_methods=self.neighbour_methods, ) self.assertArrayEqual(result.coord("time").points, times) self.assertEqual(result.coord_dims("time"), result.coord_dims("spot_index"))
def test_3d_spot_cube(self): """Test output with two extra dimensions""" data = np.ones((2, 3, 4), dtype=np.float32) result = build_spotdata_cube( data, "air_temperature", "degC", self.altitude, self.latitude, self.longitude, self.wmo_id, neighbour_methods=self.neighbour_methods, grid_attributes=self.grid_attributes, ) self.assertArrayAlmostEqual(result.data, data) self.assertEqual(result.coord_dims("neighbour_selection_method")[0], 0) self.assertEqual(result.coord_dims("grid_attributes")[0], 1)
def build_diagnostic_cube( neighbour_cube: Cube, diagnostic_cube: Cube, spot_values: ndarray, additional_dims: Optional[List[DimCoord]] = None, scalar_coords: Optional[List[AuxCoord]] = None, ) -> Cube: """ Builds a spot data cube containing the extracted diagnostic values. Args: neighbour_cube: This cube is needed as a source for information about the spot sites which needs to be included in the spot diagnostic cube. diagnostic_cube: The cube is needed to provide the name and units of the diagnostic that is being processed. spot_values: An array containing the diagnostic values extracted for the required spot sites. additional_dims: Optional list containing iris.coord.DimCoords with any leading dimensions required before spot data. scalar_coords: Optional list containing iris.coord.AuxCoords with all scalar coordinates relevant for the spot sites. Returns: A spot data cube containing the extracted diagnostic data. """ neighbour_cube = build_spotdata_cube( spot_values, diagnostic_cube.name(), diagnostic_cube.units, neighbour_cube.coord("altitude").points, neighbour_cube.coord(axis="y").points, neighbour_cube.coord(axis="x").points, neighbour_cube.coord("wmo_id").points, scalar_coords=scalar_coords, additional_dims=additional_dims, ) return neighbour_cube
def spot_fixture(): alts = np.array([15, 82, 0, 4, 15, 269], dtype=np.float32) lats = np.array([60.75, 60.13, 58.95, 57.37, 58.22, 57.72], dtype=np.float32) lons = np.array([-0.85, -1.18, -2.9, -7.40, -6.32, -4.90], dtype=np.float32) wmo_ids = ["3002", "3005", "3017", "3023", "3026", "3031"] cube = build_spotdata_cube( np.arange(6).astype(np.float32), "air_temperature", "degC", alts, lats, lons, wmo_ids, ) cube.add_aux_coord( iris.coords.AuxCoord([50], long_name="percentile", units="%")) return cube
def test_scalar_coords(self): """Test additional scalar coordinates""" [(time_coord, _), (frt_coord, _), (fp_coord, _) ] = construct_scalar_time_coords(datetime(2015, 11, 23, 4, 30), None, datetime(2015, 11, 22, 22, 30)) data = np.ones((2, 4), dtype=np.float32) result = build_spotdata_cube( data, *self.args, scalar_coords=[time_coord, frt_coord, fp_coord], neighbour_methods=self.neighbour_methods, ) self.assertEqual(result.coord("time").points[0], time_coord.points[0]) self.assertEqual( result.coord("forecast_reference_time").points[0], frt_coord.points[0]) self.assertEqual( result.coord("forecast_period").points[0], fp_coord.points[0])
def _build_spot_probability_cube(dummy_point_locations, dummy_string_ids): """Set up a spot cube with an IMPROVER-style threshold coordinate and suitable data""" threshold_coord = DimCoord( ECC_TEMPERATURE_THRESHOLDS, standard_name="air_temperature", var_name="threshold", units="degC", attributes={"spp__relative_to_threshold": "above"}, ) return build_spotdata_cube( ECC_SPOT_PROBABILITIES, name="probability_of_air_temperature_above_threshold", units="1", altitude=dummy_point_locations, latitude=dummy_point_locations, longitude=dummy_point_locations, wmo_id=dummy_string_ids, additional_dims=[threshold_coord], )
def test_scalar_coords(self): """Test additional scalar coordinates""" [(time_coord, _), (frt_coord, _), (fp_coord, _)] = ( construct_scalar_time_coords( datetime(2015, 11, 23, 4, 30), None, datetime(2015, 11, 22, 22, 30))) data = np.ones((4, 2), dtype=np.float32) result = build_spotdata_cube( data, 'air_temperature', 'degC', self.altitude, self.latitude, self.longitude, self.wmo_id, scalar_coords=[time_coord, frt_coord, fp_coord], neighbour_methods=self.neighbour_methods) self.assertEqual(result.coord('time').points[0], time_coord.points[0]) self.assertEqual( result.coord('forecast_reference_time').points[0], frt_coord.points[0]) self.assertEqual( result.coord('forecast_period').points[0], fp_coord.points[0])