def test_statsmodels_mean(self, warning_list=None): """ Test that the plugin raises no warnings if the statsmodels module is not found for when the predictor is the ensemble mean. """ import imp try: imp.find_module('statsmodels') statsmodels_found = True except ImportError: statsmodels_found = False cube = self.cube historic_forecasts = CubeList([]) for index in [1.0, 2.0, 3.0, 4.0, 5.0]: temp_cube = cube.copy() temp_cube.coord("time").points = (temp_cube.coord("time").points - index) historic_forecasts.append(temp_cube) historic_forecasts.concatenate_cube() distribution = "gaussian" desired_units = "degreesC" predictor_of_mean_flag = "mean" if not statsmodels_found: Plugin(distribution, desired_units, predictor_of_mean_flag=predictor_of_mean_flag) self.assertTrue(len(warning_list) == 0)
def test_statsmodels_mean(self, warning_list=None): """ Test that the plugin raises no warnings if the statsmodels module is not found for when the predictor is the ensemble mean. """ import imp try: statsmodels_found = imp.find_module('statsmodels') statsmodels_found = True except ImportError: statsmodels_found = False cube = self.cube historic_forecasts = CubeList([]) for index in [1.0, 2.0, 3.0, 4.0, 5.0]: temp_cube = cube.copy() temp_cube.coord("time").points = (temp_cube.coord("time").points - index) historic_forecasts.append(temp_cube) historic_forecasts.concatenate_cube() current_forecast_predictor = cube truth = cube.collapsed("realization", iris.analysis.MAX) distribution = "gaussian" desired_units = "degreesC" predictor_of_mean_flag = "mean" no_of_realizations = 3 estimate_coefficients_from_linear_model_flag = True if not statsmodels_found: plugin = Plugin(distribution, desired_units, predictor_of_mean_flag=predictor_of_mean_flag) self.assertTrue(len(warning_list) == 0)
def load_Nexrad(filenames,variable): import iris from iris.cube import CubeList from iris import coords from datetime import datetime,timedelta from os.path import basename cube_list=[] for filename in filenames: cube=iris.load_cube(filename,'variable')#'equivalent_reflectivity_factor' #time=iris.load_cube(filename,'time') timestring=basename(filename)[12:27] time_point=datetime.strptime("".join(timestring), "%Y%m%d_%H%M%S") time_days=(time_point - datetime(1970,1,1)).total_seconds() / timedelta(1).total_seconds() x=iris.load_cube(filename,'X-coordinate in Cartesian system') y=iris.load_cube(filename,'Y-coordinate in Cartesian system') z=iris.load_cube(filename,'Z-coordinate in Cartesian system') lat=iris.load_cube(filename,'Latitude grid') lon=iris.load_cube(filename,'Longitude grid') cube.remove_coord('time') cube.add_dim_coord(coords.DimCoord(time_days, standard_name=None, long_name='time', var_name='time', units='days since 1970-01-01', bounds=None, attributes=None, coord_system=None, circular=False),0) cube.add_dim_coord(coords.DimCoord(x.data, standard_name=None, long_name='x', var_name='x', units='m', bounds=None, attributes=None, coord_system=None, circular=False),2) cube.add_dim_coord(coords.DimCoord(y.data, standard_name=None, long_name='y', var_name='y', units='m', bounds=None, attributes=None, coord_system=None, circular=False),3) cube.add_dim_coord(coords.DimCoord(z.data, standard_name=None, long_name='z', var_name='z', units='m', bounds=None, attributes=None, coord_system=None, circular=False),1) cube.add_aux_coord(coords.AuxCoord(lat.data, standard_name='latitude', long_name='latitude', var_name='latitude', units='degrees', bounds=None, attributes=None, coord_system=None),(2,3)) cube.add_aux_coord(coords.AuxCoord(lon.data, standard_name='longitude', long_name='longitude', var_name='longitude', units='degrees', bounds=None, attributes=None, coord_system=None),(2,3)) cube_list.append(cube) for member in cube_list: member.attributes={} variable_cubes=CubeList(cube_list) variable_cube=variable_cubes.concatenate_cube() return variable_cube
def test_statsmodels_members(self): """ Test that the plugin raises the desired warning if the statsmodels module is not found for when the predictor is the ensemble members. """ warnings.simplefilter("always") import imp try: statsmodels_found = imp.find_module('statsmodels') statsmodels_found = True except ImportError: statsmodels_found = False cube = self.cube historic_forecasts = CubeList([]) for index in [1.0, 2.0, 3.0, 4.0, 5.0]: temp_cube = cube.copy() temp_cube.coord("time").points = (temp_cube.coord("time").points - index) historic_forecasts.append(temp_cube) historic_forecasts.concatenate_cube() current_forecast_predictor = cube truth = cube.collapsed("realization", iris.analysis.MAX) distribution = "gaussian" desired_units = "degreesC" predictor_of_mean_flag = "members" no_of_members = 3 estimate_coefficients_from_linear_model_flag = True if not statsmodels_found: with warnings.catch_warnings(record=True) as warning_list: plugin = Plugin(distribution, desired_units, predictor_of_mean_flag=predictor_of_mean_flag) self.assertTrue(len(warning_list) == 1) self.assertTrue( any(item.category == UserWarning for item in warning_list)) self.assertTrue("The statsmodels can not be imported" in str( warning_list[0]))
def ensemble_collocate(ensemble, observations, member_dimension='job'): """ Efficiently collocate (interpolate) many ensemble members on to a set of (un-gridded) observations Note ---- This function requires both Iris and CIS to be installed Parameters ---------- ensemble: ~cis.data_io.gridded_data.GriddedData The ensemble of (model) samples to interpolate on to the observations observations: ~cis.data_io.ungridded_data.UngriddedData The observations on to which the observations will be sampled member_dimension: str The name of the dimension which represents the ensemble members in `ensemble` Returns ------- col_ensemble: iris.cube.Cube The ensemble values interpolated on to the observation locations, with the ensemble members along the leading dimension. """ from iris.cube import Cube, CubeList from iris.coords import DimCoord, AuxCoord from cis.collocation.col_implementations import GriddedUngriddedCollocator, DummyConstraint from cis.data_io.gridded_data import make_from_cube col = GriddedUngriddedCollocator(missing_data_for_missing_sample=False) col_members = CubeList() for member in ensemble.slices_over(member_dimension): # Use CIS to collocate each ensemble member on to the observations # The interpolation weights are cached within col automatically collocated_job, = col.collocate(observations, make_from_cube(member), DummyConstraint(), 'lin') # Turn the interpolated data in to a flat cube for easy stacking new_c = Cube(collocated_job.data.reshape(1, -1), long_name=collocated_job.name(), units='1', dim_coords_and_dims=[ (DimCoord(np.arange(collocated_job.data.shape[0]), long_name="obs"), 1), (DimCoord(member.coord(member_dimension).points, long_name=member_dimension), 0) ], aux_coords_and_dims=[ (AuxCoord(c.points, standard_name=c.standard_name), 1) for c in collocated_job.coords() ]) col_members.append(new_c) col_ensemble = col_members.concatenate_cube() return col_ensemble
def test_predict_interface_multiple_samples(self): from iris.cube import CubeList # Get the actual test data # Use the class method `eval_fn` so 'self' doesn't get passed expected = CubeList([type(self).eval_fn(p, job_n=i) for i, p in enumerate(self.test_params)]) expected = expected.concatenate_cube() pred_m, pred_var = self.model.predict(self.test_params) assert_allclose(expected.data, pred_m.data, rtol=1e-3) assert pred_m.name() == 'Emulated ' + (expected.name() or 'data') assert pred_var.name() == 'Variance in emulated ' + (expected.name() or 'data') assert pred_m.units == expected.units assert pred_var.units == expected.units
def test_statsmodels_realizations(self, warning_list=None): """ Test that the plugin raises the desired warning if the statsmodels module is not found for when the predictor is the ensemble realizations. """ import imp try: imp.find_module('statsmodels') statsmodels_found = True except ImportError: statsmodels_found = False cube = self.cube historic_forecasts = CubeList([]) for index in [1.0, 2.0, 3.0, 4.0, 5.0]: temp_cube = cube.copy() temp_cube.coord("time").points = (temp_cube.coord("time").points - index) historic_forecasts.append(temp_cube) historic_forecasts.concatenate_cube() distribution = "gaussian" desired_units = "degreesC" predictor_of_mean_flag = "realizations" if not statsmodels_found: Plugin(distribution, desired_units, predictor_of_mean_flag=predictor_of_mean_flag) warning_msg = "The statsmodels can not be imported" self.assertTrue( any(item.category == ImportWarning for item in warning_list)) self.assertTrue( any(warning_msg in str(item) for item in warning_list))
def test_batch_stats(self): from iris.cube import CubeList from esem.utils import get_random_params # Test that the sample_mean function returns the mean of the sample sample_params = get_random_params(self.params.shape[1], 25) expected = CubeList([type(self).eval_fn(p, job_n=i) for i, p in enumerate(sample_params)]) expected_ensemble = expected.concatenate_cube() mean, std_dev = self.model.batch_stats(sample_params) assert_allclose(mean.data, expected_ensemble.data.mean(axis=0), rtol=0.5) # This is a really loose test but it needs to be because of the # stochastic nature of the model and the ensemble points assert_allclose(std_dev.data, expected_ensemble.data.std(axis=0), rtol=0.5)
def test_predict_interface_multiple_samples(self): from iris.cube import CubeList # Get the actual test data # Use the class method `eval_fn` so 'self' doesn't get passed expected = CubeList([ type(self).eval_fn(p, job_n=i) for i, p in enumerate(self.test_params) ]) expected = expected.concatenate_cube() pred_m, pred_var = self.model.predict(self.test_params) # For some reason the relative tolerance has to be # higher here than in the other tests??? assert_allclose(expected.data, pred_m.data, rtol=1e-1) assert pred_m.name() == 'Emulated ' + (expected.name() or 'data') assert_allclose(np.full_like(expected.data, np.NaN), pred_var.data, equal_nan=True) assert pred_var.name() == 'Variance in emulated ' + expected.name() assert pred_m.units == expected.units
def get_1d_two_param_cube(params=None, n_samples=10): """ Create an ensemble of 1d cubes perturbed over two idealised parameter spaces. One of params or n_samples must be provided :param np.array params: A list of params to sample the ensemble over :param int n_samples: The number of params to sample (between 0. and 1.) :return: """ from iris.cube import CubeList if params is None: params = np.linspace(np.zeros((2, )), np.ones((2, )), n_samples) cubes = CubeList([]) for j, p in enumerate(params): c = make_dummy_1d_cube(j) # Perturb base data to represent some change in a parameter c.data *= simple_polynomial_fn_two_param(*p) cubes.append(c) ensemble = cubes.concatenate_cube() return ensemble
def segmentation_2D(track, field, dxy, threshold=0, target='maximum', method='watershed', max_distance=None): """ Function using watershedding or random walker to determine cloud volumes associated with tracked updrafts Parameters: track: pandas.DataFrame output from trackpy/maketrack field_in: iris.cube.Cube containing the 3D (time,x,y) field to perform the watershedding on threshold: float threshold for the watershedding field to be used for the mask target: string Switch to determine if algorithm looks strating from maxima or minima in input field (maximum: starting from maxima (default), minimum: starting from minima) method: str ('method') flag determining the algorithm to use (currently watershedding implemented) Output: segmentation_out: iris.cube.Cube Cloud mask, 0 outside and integer numbers according to track inside the clouds """ import numpy as np from skimage.morphology import watershed # from skimage.segmentation import random_walker import logging from iris.cube import CubeList from iris.util import new_axis from scipy.ndimage import distance_transform_edt logging.info('Start wateshedding 2D') # CubeList to store individual segmentation masks segmentation_out_list = CubeList() track['ncells'] = 0 if max_distance is not None: max_distance_pixel = np.ceil(max_distance / dxy) field_time = field.slices_over('time') for i, field_i in enumerate(field_time): # Create cube of the same dimensions and coordinates as input data to store mask: segmentation_out_i = 1 * field_i segmentation_out_i.rename('segmentation_mask') segmentation_out_i.units = 1 data_i = field_i.core_data() time_i = field_i.coord('time').units.num2date( field_i.coord('time').points[0]) tracks_i = track[track['time'] == time_i] # mask data outside region above/below threshold and invert data if tracking maxima: if target == 'maximum': unmasked = data_i > threshold data_i_segmentation = -1 * data_i elif target == 'minimum': unmasked = data_i < threshold data_i_segmentation = data_i else: raise ValueError('unknown type of target') markers = np.zeros_like(unmasked).astype(np.int32) for index, row in tracks_i.iterrows(): markers[int(row['hdim_1']), int(row['hdim_2'])] = row['feature'] markers[~unmasked] = 0 if method == 'watershed': segmentation_mask_i = watershed(data_i_segmentation, markers.astype(np.int32), mask=unmasked) # elif method=='random_walker': # #res1 = random_walker(Mask, markers,mode='cg') # res1=random_walker(data_i_segmentation, markers.astype(np.int32), # beta=130, mode='bf', tol=0.001, copy=True, multichannel=False, return_full_prob=False, spacing=None) else: raise ValueError('unknown method, must be watershed') # remove everything from the individual masks that is more than max_distance_pixel away from the markers if max_distance is not None: for feature in tracks_i['feature']: D = distance_transform_edt((markers != feature).astype(int)) segmentation_mask_i[np.bitwise_and( segmentation_mask_i == feature, D > max_distance_pixel)] = 0 segmentation_out_i.data = segmentation_mask_i # using merge throws error, so cubes with time promoted to DimCoord and using concatenate: # segmentation_out_list.append(segmentation_out_i) segmentation_out_i_temp = new_axis(segmentation_out_i, scalar_coord='time') segmentation_out_list.append(segmentation_out_i_temp) # count number of grid cells asoociated to each tracked cell and write that into DataFrame: values, count = np.unique(segmentation_mask_i, return_counts=True) counts = dict(zip(values, count)) for index, row in tracks_i.iterrows(): if row['feature'] in counts.keys(): track.loc[index, 'ncells'] = counts[row['feature']] logging.debug('Finished segmentation 2D for ' + time_i.strftime('%Y-%m-%d_%H:%M:%S')) #merge individual masks in CubeList into one Cube: # using merge throws error, so cubes with time promoted to DimCoord and using concatenate: # segmentation_out=segmentation_out_list.merge_cube() segmentation_out = segmentation_out_list.concatenate_cube() logging.debug('Finished segmentation 2D') return segmentation_out, track
def _get_cube(self, file_list, climatology=False, overlay_probability_levels=False): """ Get an iris cube based on the given files using selection criteria from the input_data. @param file_list (list[str]): a list of file name to retrieve data from @param climatology (boolean): if True extract the climatology data @param overlay_probability_levels (boolean): if True only include the 10th, 50th and 90th percentile data @return an iris cube, maybe 'None' if overlay_probability_levels=True """ if climatology is True: LOG.info("_get_cube for climatology") elif overlay_probability_levels is True: LOG.info("_get_cube, overlay probability levels") else: LOG.info("_get_cube") if LOG.getEffectiveLevel() == logging.DEBUG: LOG.debug("_get_cube from %s files", len(file_list)) for fpath in file_list: LOG.debug(" - FILE: %s", fpath) # Load the cubes cubes = CubeList() try: for file_path in file_list: f_list = glob.glob(file_path) cube_list = [iris.load_cube(f) for f in f_list] cubes.extend(cube_list) except IOError as ex: if overlay_probability_levels is True: # not all variables have corresponding probabilistic data return None for file_name in file_list: file_name = file_name.split("*")[0] if not path.exists(file_name): LOG.error("File not found: %s", file_name) raise UKCPDPDataNotFoundException from ex if overlay_probability_levels is True: collection = COLLECTION_PROB else: collection = self.input_data.get_value(InputType.COLLECTION) # Remove time_bnds cubes if collection == COLLECTION_PROB: unfiltered_cubes = cubes cubes = CubeList() for cube in unfiltered_cubes: if cube.name() != "time_bnds": cubes.append(cube) # Different creation dates will stop cubes concatenating, so lets # remove them for cube in cubes: coords = cube.coords(var_name="creation_date") for coord in coords: cube.remove_coord(coord) if len(cubes) == 0: LOG.warning("No data was retrieved from the following files:%s", file_list) raise UKCPDPDataNotFoundException( "No data found for given selection options") LOG.debug("First cube:\n%s", cubes[0]) LOG.debug("Concatenate cubes:\n%s", cubes) iris.experimental.equalise_cubes.equalise_attributes(cubes) unify_time_units(cubes) try: cube = cubes.concatenate_cube() except iris.exceptions.ConcatenateError as ex: LOG.error("Failed to concatenate cubes:\n%s\n%s", ex, cubes) error_cubes = CubeList() for error_cube in cubes: error_cubes.append(error_cube) try: LOG.info("Appending %s", error_cube.coord("ensemble_member_id").points[0]) except iris.exceptions.CoordinateNotFoundError: pass try: error_cubes.concatenate_cube() except iris.exceptions.ConcatenateError as ex: message = "" try: message = " {}".format( error_cube.coord("ensemble_member_id").points[0]) except iris.exceptions.CoordinateNotFoundError: pass LOG.error( "Error when concatenating cube%s:\n%s\n%s", message, ex, error_cube, ) break # pylint: disable=W0707 raise UKCPDPDataNotFoundException( "No data found for given selection options") LOG.debug("Concatenated cube:\n%s", cube) if climatology is True: # generate a time slice constraint based on the baseline time_slice_constraint = self._time_slice_selector(True) else: # generate a time slice constraint time_slice_constraint = self._time_slice_selector(False) if time_slice_constraint is not None: cube = cube.extract(time_slice_constraint) if cube is None: if time_slice_constraint is not None: LOG.warning( "Time slice constraint resulted in no cubes being " "returned: %s", time_slice_constraint, ) raise UKCPDPDataNotFoundException( "Selection constraints resulted in no data being" " selected") # generate a temporal constraint temporal_constraint = self._get_temporal_selector() if temporal_constraint is not None: cube = cube.extract(temporal_constraint) if cube is None: if temporal_constraint is not None: LOG.warning( "Temporal constraint resulted in no cubes being " "returned: %s", temporal_constraint, ) raise UKCPDPDataNotFoundException( "Selection constraints resulted in no data being" " selected") # extract 10, 50 and 90 percentiles if overlay_probability_levels is True: cube = get_probability_levels(cube, False) # generate an area constraint area_constraint = self._get_spatial_selector(cube, collection) if area_constraint is not None: cube = cube.extract(area_constraint) if self.input_data.get_area_type() == AreaType.BBOX: # Make sure we still have x, y dimension coordinated for # bboxes cube = self._promote_x_y_coords(cube) if cube is None: if area_constraint is not None: LOG.warning( "Area constraint resulted in no cubes being " "returned: %s", area_constraint, ) raise UKCPDPDataNotFoundException( "Selection constraints resulted in no data being" " selected") return cube
def create_data_object(self, filenames, variable, index_offset=1): from cis.data_io.hdf_vd import get_data from cis.data_io.hdf_vd import VDS from pyhdf.error import HDF4Error from cis.data_io import hdf_sd from iris.coords import DimCoord, AuxCoord from iris.cube import Cube, CubeList from cis.data_io.gridded_data import GriddedData from cis.time_util import cis_standard_time_unit from datetime import datetime from iris.util import new_axis import numpy as np logging.debug("Creating data object for variable " + variable) variables = ["Pressure_Mean"] logging.info("Listing coordinates: " + str(variables)) variables.append(variable) # reading data from files sdata = {} for filename in filenames: try: sds_dict = hdf_sd.read(filename, variables) except HDF4Error as e: raise IOError(str(e)) for var in list(sds_dict.keys()): utils.add_element_to_list_in_dict(sdata, var, sds_dict[var]) # work out size of data arrays # the coordinate variables will be reshaped to match that. # NOTE: This assumes that all Caliop_L1 files have the same altitudes. # If this is not the case, then the following line will need to be changed # to concatenate the data from all the files and not just arbitrarily pick # the altitudes from the first file. alt_data = self._get_calipso_data(hdf_sd.HDF_SDS(filenames[0], 'Altitude_Midpoint'))[0, :] alt_coord = DimCoord(alt_data, standard_name='altitude', units='km') alt_coord.convert_units('m') lat_data = self._get_calipso_data(hdf_sd.HDF_SDS(filenames[0], 'Latitude_Midpoint'))[0, :] lat_coord = DimCoord(lat_data, standard_name='latitude', units='degrees_north') lon_data = self._get_calipso_data(hdf_sd.HDF_SDS(filenames[0], 'Longitude_Midpoint'))[0, :] lon_coord = DimCoord(lon_data, standard_name='longitude', units='degrees_east') cubes = CubeList() for f in filenames: t = get_data(VDS(f, "Nominal_Year_Month"), True)[0] time_data = cis_standard_time_unit.date2num(datetime(int(t[0:4]), int(t[4:6]), 15)) time_coord = AuxCoord(time_data, long_name='Profile_Time', standard_name='time', units=cis_standard_time_unit) # retrieve data + its metadata var = sdata[variable] metadata = hdf.read_metadata(var, "SD") data = self._get_calipso_data(hdf_sd.HDF_SDS(f, variable)) pres_data = self._get_calipso_data(hdf_sd.HDF_SDS(f, 'Pressure_Mean')) pres_coord = AuxCoord(pres_data, standard_name='air_pressure', units='hPa') if data.ndim == 2: # pres_coord = new_axis() cube = Cube(data, long_name=metadata.long_name or variable, units=self.clean_units(metadata.units), dim_coords_and_dims=[(lat_coord, 0), (lon_coord, 1)], aux_coords_and_dims=[(time_coord, ())]) # Promote the time scalar coord to a length one dimension new_cube = new_axis(cube, 'time') cubes.append(new_cube) elif data.ndim == 3: # pres_coord = new_axis() cube = Cube(data, long_name=metadata.long_name or variable, units=self.clean_units(metadata.units), dim_coords_and_dims=[(lat_coord, 0), (lon_coord, 1), (alt_coord, 2)], aux_coords_and_dims=[(time_coord, ())]) # Promote the time scalar coord to a length one dimension new_cube = new_axis(cube, 'time') # Then add the (extended) pressure coord so that it is explicitly a function of time new_cube.add_aux_coord(pres_coord[np.newaxis, ...], (0, 1, 2, 3)) cubes.append(new_cube) else: raise ValueError("Unexpected number of dimensions for CALIOP data: {}".format(data.ndim)) # Concatenate the cubes from each file into a single GriddedData object gd = GriddedData.make_from_cube(cubes.concatenate_cube()) return gd
def process_diagnostic(diagnostics, neighbours, sites, ancillary_data, diagnostic_name): """ Extract data and write output for a given diagnostic. Args: diagnostics (dict): Dictionary containing information regarding how the diagnostics are to be processed. For example:: { "temperature": { "diagnostic_name": "air_temperature", "extrema": true, "filepath": "temperature_at_screen_level", "interpolation_method": "model_level_temperature_lapse_rate", "neighbour_finding": { "land_constraint": false, "method": "fast_nearest_neighbour", "vertical_bias": null } } } neighbours (numpy.array): Array of neigbouring grid points that are associated with sites in the SortedDictionary of sites. sites (dict): A dictionary containing the properties of spotdata sites. ancillary_data (dict): A dictionary containing additional model data that is needed. e.g. {'orography': <cube of orography>} diagnostic_name (string): A string matching the keys in the diagnostics dictionary that will be used to access information regarding how the diagnostic is to be processed. Returns: (tuple): tuple containing: **resulting_cube** (iris.cube.Cube or None): Cube after extracting the diagnostic requested using the desired extraction method. None is returned if the "resulting_cubes" is an empty CubeList after processing. **extrema_cubes** (iris.cube.CubeList or None): CubeList containing extrema values, if the 'extrema' diagnostic is requested. None is returned if the value for diagnostic_dict["extrema"] is False, so that the extrema calculation is not required. """ diagnostic_dict = diagnostics[diagnostic_name] # Grab the relevant set of grid point neighbours for the neighbour finding # method being used by this diagnostic. neighbour_hash = (construct_neighbour_hash( diagnostic_dict['neighbour_finding'])) neighbour_list = neighbours[neighbour_hash] # Get optional kwargs that may be set to override defaults. optionals = [ 'upper_level', 'lower_level', 'no_neighbours', 'dz_tolerance', 'dthetadz_threshold', 'dz_max_adjustment' ] kwargs = {} if ancillary_data.get('config_constants') is not None: for optional in optionals: constant = ancillary_data.get('config_constants').get(optional) if constant is not None: kwargs[optional] = constant # Create a list of datetimes to loop through. forecast_times = [] for cube in diagnostic_dict["data"]: time = cube.coord("time") forecast_times.extend(time.units.num2date(time.points)) # Create empty iris.cube.CubeList to hold extracted data cubes. resulting_cubes = CubeList() # Loop over forecast times. for a_time in forecast_times: # Extract Cube from CubeList at current time. time_extract = datetime_constraint(a_time) cube = extract_cube_at_time(diagnostic_dict["data"], a_time, time_extract) if cube is None: # If no cube is available at given time, try the next time. continue ad = {} if diagnostic_dict["additional_data"] is not None: # Extract additional diagnostics at current time. ad = extract_ad_at_time(diagnostic_dict["additional_data"], a_time, time_extract) args = (cube, sites, neighbour_list, ancillary_data, ad) # Extract diagnostic data using defined method. resulting_cubes.append( ExtractData(diagnostic_dict['interpolation_method']).process( *args, **kwargs)) if resulting_cubes: # Concatenate CubeList into Cube for cubes with different # forecast times. resulting_cube = resulting_cubes.concatenate_cube() else: resulting_cube = None if diagnostic_dict['extrema']: extrema_cubes = (ExtractExtrema(24, start_hour=9).process( resulting_cube.copy())) extrema_cubes = extrema_cubes.merge() else: extrema_cubes = None return resulting_cube, extrema_cubes
def create_data_object(self, filenames, variable, index_offset=1): from cis.data_io.hdf_vd import get_data from cis.data_io.hdf_vd import VDS from pyhdf.error import HDF4Error from cis.data_io import hdf_sd from iris.coords import DimCoord, AuxCoord from iris.cube import Cube, CubeList from cis.data_io.gridded_data import GriddedData from cis.time_util import cis_standard_time_unit from datetime import datetime from iris.util import new_axis import numpy as np logging.debug("Creating data object for variable " + variable) variables = ["Pressure_Mean"] logging.info("Listing coordinates: " + str(variables)) variables.append(variable) # reading data from files sdata = {} for filename in filenames: try: sds_dict = hdf_sd.read(filename, variables) except HDF4Error as e: raise IOError(str(e)) for var in list(sds_dict.keys()): utils.add_element_to_list_in_dict(sdata, var, sds_dict[var]) # work out size of data arrays # the coordinate variables will be reshaped to match that. # NOTE: This assumes that all Caliop_L1 files have the same altitudes. # If this is not the case, then the following line will need to be changed # to concatenate the data from all the files and not just arbitrarily pick # the altitudes from the first file. alt_data = self._get_calipso_data( hdf_sd.HDF_SDS(filenames[0], 'Altitude_Midpoint'))[0, :] alt_coord = DimCoord(alt_data, standard_name='altitude', units='km') alt_coord.convert_units('m') lat_data = self._get_calipso_data( hdf_sd.HDF_SDS(filenames[0], 'Latitude_Midpoint'))[0, :] lat_coord = DimCoord(lat_data, standard_name='latitude', units='degrees_north') lon_data = self._get_calipso_data( hdf_sd.HDF_SDS(filenames[0], 'Longitude_Midpoint'))[0, :] lon_coord = DimCoord(lon_data, standard_name='longitude', units='degrees_east') cubes = CubeList() for f in filenames: t = get_data(VDS(f, "Nominal_Year_Month"), True)[0] time_data = cis_standard_time_unit.date2num( datetime(int(t[0:4]), int(t[4:6]), 15)) time_coord = AuxCoord(time_data, long_name='Profile_Time', standard_name='time', units=cis_standard_time_unit) # retrieve data + its metadata var = sdata[variable] metadata = hdf.read_metadata(var, "SD") data = self._get_calipso_data(hdf_sd.HDF_SDS(f, variable)) pres_data = self._get_calipso_data( hdf_sd.HDF_SDS(f, 'Pressure_Mean')) pres_coord = AuxCoord(pres_data, standard_name='air_pressure', units='hPa') if data.ndim == 2: # pres_coord = new_axis() cube = Cube(data, long_name=metadata.long_name or variable, units=self.clean_units(metadata.units), dim_coords_and_dims=[(lat_coord, 0), (lon_coord, 1)], aux_coords_and_dims=[(time_coord, ())]) # Promote the time scalar coord to a length one dimension new_cube = new_axis(cube, 'time') cubes.append(new_cube) elif data.ndim == 3: # pres_coord = new_axis() cube = Cube(data, long_name=metadata.long_name or variable, units=self.clean_units(metadata.units), dim_coords_and_dims=[(lat_coord, 0), (lon_coord, 1), (alt_coord, 2)], aux_coords_and_dims=[(time_coord, ())]) # Promote the time scalar coord to a length one dimension new_cube = new_axis(cube, 'time') # Then add the (extended) pressure coord so that it is explicitly a function of time new_cube.add_aux_coord(pres_coord[np.newaxis, ...], (0, 1, 2, 3)) cubes.append(new_cube) else: raise ValueError( "Unexpected number of dimensions for CALIOP data: {}". format(data.ndim)) # Concatenate the cubes from each file into a single GriddedData object gd = GriddedData.make_from_cube(cubes.concatenate_cube()) return gd