def test_statsmodels_mean(self, warning_list=None):
        """
        Test that the plugin raises no warnings if the statsmodels module
        is not found for when the predictor is the ensemble mean.
        """
        import imp
        try:
            imp.find_module('statsmodels')
            statsmodels_found = True
        except ImportError:
            statsmodels_found = False

        cube = self.cube

        historic_forecasts = CubeList([])
        for index in [1.0, 2.0, 3.0, 4.0, 5.0]:
            temp_cube = cube.copy()
            temp_cube.coord("time").points = (temp_cube.coord("time").points -
                                              index)
            historic_forecasts.append(temp_cube)
        historic_forecasts.concatenate_cube()

        distribution = "gaussian"
        desired_units = "degreesC"
        predictor_of_mean_flag = "mean"

        if not statsmodels_found:
            Plugin(distribution,
                   desired_units,
                   predictor_of_mean_flag=predictor_of_mean_flag)
            self.assertTrue(len(warning_list) == 0)
Example #2
0
    def test_statsmodels_mean(self, warning_list=None):
        """
        Test that the plugin raises no warnings if the statsmodels module
        is not found for when the predictor is the ensemble mean.
        """
        import imp
        try:
            statsmodels_found = imp.find_module('statsmodels')
            statsmodels_found = True
        except ImportError:
            statsmodels_found = False

        cube = self.cube

        historic_forecasts = CubeList([])
        for index in [1.0, 2.0, 3.0, 4.0, 5.0]:
            temp_cube = cube.copy()
            temp_cube.coord("time").points = (temp_cube.coord("time").points -
                                              index)
            historic_forecasts.append(temp_cube)
        historic_forecasts.concatenate_cube()

        current_forecast_predictor = cube
        truth = cube.collapsed("realization", iris.analysis.MAX)
        distribution = "gaussian"
        desired_units = "degreesC"
        predictor_of_mean_flag = "mean"
        no_of_realizations = 3
        estimate_coefficients_from_linear_model_flag = True

        if not statsmodels_found:
            plugin = Plugin(distribution,
                            desired_units,
                            predictor_of_mean_flag=predictor_of_mean_flag)
            self.assertTrue(len(warning_list) == 0)
Example #3
0
def load_Nexrad(filenames,variable):
    import iris
    from iris.cube import CubeList
    from iris import coords
    from datetime import datetime,timedelta
    from os.path import basename
    cube_list=[]
    for filename in filenames:
        cube=iris.load_cube(filename,'variable')#'equivalent_reflectivity_factor'
        #time=iris.load_cube(filename,'time')
        timestring=basename(filename)[12:27]
        time_point=datetime.strptime("".join(timestring), "%Y%m%d_%H%M%S")
        time_days=(time_point - datetime(1970,1,1)).total_seconds() / timedelta(1).total_seconds()
        x=iris.load_cube(filename,'X-coordinate in Cartesian system')
        y=iris.load_cube(filename,'Y-coordinate in Cartesian system')
        z=iris.load_cube(filename,'Z-coordinate in Cartesian system')
        lat=iris.load_cube(filename,'Latitude grid')
        lon=iris.load_cube(filename,'Longitude grid')
        cube.remove_coord('time')
        cube.add_dim_coord(coords.DimCoord(time_days, standard_name=None, long_name='time', var_name='time', units='days since 1970-01-01', bounds=None, attributes=None, coord_system=None, circular=False),0)
        cube.add_dim_coord(coords.DimCoord(x.data, standard_name=None, long_name='x', var_name='x', units='m', bounds=None, attributes=None, coord_system=None, circular=False),2)
        cube.add_dim_coord(coords.DimCoord(y.data, standard_name=None, long_name='y', var_name='y', units='m', bounds=None, attributes=None, coord_system=None, circular=False),3)
        cube.add_dim_coord(coords.DimCoord(z.data, standard_name=None, long_name='z', var_name='z', units='m', bounds=None, attributes=None, coord_system=None, circular=False),1)
        cube.add_aux_coord(coords.AuxCoord(lat.data, standard_name='latitude', long_name='latitude', var_name='latitude', units='degrees', bounds=None, attributes=None, coord_system=None),(2,3))
        cube.add_aux_coord(coords.AuxCoord(lon.data, standard_name='longitude', long_name='longitude', var_name='longitude', units='degrees', bounds=None, attributes=None, coord_system=None),(2,3))
        cube_list.append(cube)
    for member in cube_list:
        member.attributes={}
    variable_cubes=CubeList(cube_list)
    variable_cube=variable_cubes.concatenate_cube()
    
    return variable_cube
Example #4
0
    def test_statsmodels_members(self):
        """
        Test that the plugin raises the desired warning if the statsmodels
        module is not found for when the predictor is the ensemble members.
        """
        warnings.simplefilter("always")
        import imp
        try:
            statsmodels_found = imp.find_module('statsmodels')
            statsmodels_found = True
        except ImportError:
            statsmodels_found = False

        cube = self.cube

        historic_forecasts = CubeList([])
        for index in [1.0, 2.0, 3.0, 4.0, 5.0]:
            temp_cube = cube.copy()
            temp_cube.coord("time").points = (temp_cube.coord("time").points -
                                              index)
            historic_forecasts.append(temp_cube)
        historic_forecasts.concatenate_cube()

        current_forecast_predictor = cube
        truth = cube.collapsed("realization", iris.analysis.MAX)
        distribution = "gaussian"
        desired_units = "degreesC"
        predictor_of_mean_flag = "members"
        no_of_members = 3
        estimate_coefficients_from_linear_model_flag = True

        if not statsmodels_found:
            with warnings.catch_warnings(record=True) as warning_list:
                plugin = Plugin(distribution,
                                desired_units,
                                predictor_of_mean_flag=predictor_of_mean_flag)
                self.assertTrue(len(warning_list) == 1)
                self.assertTrue(
                    any(item.category == UserWarning for item in warning_list))
                self.assertTrue("The statsmodels can not be imported" in str(
                    warning_list[0]))
Example #5
0
def ensemble_collocate(ensemble, observations, member_dimension='job'):
    """
     Efficiently collocate (interpolate) many ensemble members on to a set of (un-gridded) observations

    Note
    ----
    This function requires both Iris and CIS to be installed

    Parameters
    ----------
    ensemble: ~cis.data_io.gridded_data.GriddedData
        The ensemble of (model) samples to interpolate on to the observations
    observations: ~cis.data_io.ungridded_data.UngriddedData
        The observations on to which the observations will be sampled
    member_dimension: str
        The name of the dimension which represents the ensemble members in `ensemble`

    Returns
    -------
    col_ensemble: iris.cube.Cube
        The ensemble values interpolated on to the observation locations, with the ensemble members
        along the leading dimension.
    """
    from iris.cube import Cube, CubeList
    from iris.coords import DimCoord, AuxCoord
    from cis.collocation.col_implementations import GriddedUngriddedCollocator, DummyConstraint
    from cis.data_io.gridded_data import make_from_cube

    col = GriddedUngriddedCollocator(missing_data_for_missing_sample=False)
    col_members = CubeList()

    for member in ensemble.slices_over(member_dimension):
        # Use CIS to collocate each ensemble member on to the observations
        #  The interpolation weights are cached within col automatically
        collocated_job, = col.collocate(observations, make_from_cube(member),
                                        DummyConstraint(), 'lin')
        # Turn the interpolated data in to a flat cube for easy stacking
        new_c = Cube(collocated_job.data.reshape(1, -1),
                     long_name=collocated_job.name(),
                     units='1',
                     dim_coords_and_dims=[
                         (DimCoord(np.arange(collocated_job.data.shape[0]),
                                   long_name="obs"), 1),
                         (DimCoord(member.coord(member_dimension).points,
                                   long_name=member_dimension), 0)
                     ],
                     aux_coords_and_dims=[
                         (AuxCoord(c.points, standard_name=c.standard_name), 1)
                         for c in collocated_job.coords()
                     ])
        col_members.append(new_c)
    col_ensemble = col_members.concatenate_cube()
    return col_ensemble
Example #6
0
    def test_predict_interface_multiple_samples(self):
        from iris.cube import CubeList
        # Get the actual test data
        #  Use the class method `eval_fn` so 'self' doesn't get passed
        expected = CubeList([type(self).eval_fn(p, job_n=i) for i, p in enumerate(self.test_params)])
        expected = expected.concatenate_cube()

        pred_m, pred_var = self.model.predict(self.test_params)

        assert_allclose(expected.data, pred_m.data, rtol=1e-3)
        assert pred_m.name() == 'Emulated ' + (expected.name() or 'data')
        assert pred_var.name() == 'Variance in emulated ' + (expected.name() or 'data')
        assert pred_m.units == expected.units
        assert pred_var.units == expected.units
    def test_statsmodels_realizations(self, warning_list=None):
        """
        Test that the plugin raises the desired warning if the statsmodels
        module is not found for when the predictor is the ensemble
        realizations.
        """
        import imp
        try:
            imp.find_module('statsmodels')
            statsmodels_found = True
        except ImportError:
            statsmodels_found = False

        cube = self.cube

        historic_forecasts = CubeList([])
        for index in [1.0, 2.0, 3.0, 4.0, 5.0]:
            temp_cube = cube.copy()
            temp_cube.coord("time").points = (temp_cube.coord("time").points -
                                              index)
            historic_forecasts.append(temp_cube)
        historic_forecasts.concatenate_cube()

        distribution = "gaussian"
        desired_units = "degreesC"
        predictor_of_mean_flag = "realizations"

        if not statsmodels_found:
            Plugin(distribution,
                   desired_units,
                   predictor_of_mean_flag=predictor_of_mean_flag)
            warning_msg = "The statsmodels can not be imported"
            self.assertTrue(
                any(item.category == ImportWarning for item in warning_list))
            self.assertTrue(
                any(warning_msg in str(item) for item in warning_list))
Example #8
0
    def test_batch_stats(self):
        from iris.cube import CubeList
        from esem.utils import get_random_params
        # Test that the sample_mean function returns the mean of the sample

        sample_params = get_random_params(self.params.shape[1], 25)

        expected = CubeList([type(self).eval_fn(p, job_n=i) for i, p in enumerate(sample_params)])
        expected_ensemble = expected.concatenate_cube()

        mean, std_dev = self.model.batch_stats(sample_params)

        assert_allclose(mean.data, expected_ensemble.data.mean(axis=0), rtol=0.5)
        # This is a really loose test but it needs to be because of the
        #  stochastic nature of the model and the ensemble points
        assert_allclose(std_dev.data, expected_ensemble.data.std(axis=0), rtol=0.5)
Example #9
0
    def test_predict_interface_multiple_samples(self):
        from iris.cube import CubeList
        # Get the actual test data
        #  Use the class method `eval_fn` so 'self' doesn't get passed
        expected = CubeList([
            type(self).eval_fn(p, job_n=i)
            for i, p in enumerate(self.test_params)
        ])
        expected = expected.concatenate_cube()

        pred_m, pred_var = self.model.predict(self.test_params)

        # For some reason the relative tolerance has to be
        # higher here than in the other tests???
        assert_allclose(expected.data, pred_m.data, rtol=1e-1)
        assert pred_m.name() == 'Emulated ' + (expected.name() or 'data')
        assert_allclose(np.full_like(expected.data, np.NaN),
                        pred_var.data,
                        equal_nan=True)
        assert pred_var.name() == 'Variance in emulated ' + expected.name()
        assert pred_m.units == expected.units
Example #10
0
def get_1d_two_param_cube(params=None, n_samples=10):
    """
    Create an ensemble of 1d cubes perturbed over two idealised parameter
    spaces. One of params or n_samples must be provided
    :param np.array params: A list of params to sample the ensemble over
    :param int n_samples: The number of params to sample (between 0. and 1.)
    :return:
    """
    from iris.cube import CubeList

    if params is None:
        params = np.linspace(np.zeros((2, )), np.ones((2, )), n_samples)

    cubes = CubeList([])
    for j, p in enumerate(params):
        c = make_dummy_1d_cube(j)
        # Perturb base data to represent some change in a parameter
        c.data *= simple_polynomial_fn_two_param(*p)
        cubes.append(c)

    ensemble = cubes.concatenate_cube()
    return ensemble
Example #11
0
def segmentation_2D(track,
                    field,
                    dxy,
                    threshold=0,
                    target='maximum',
                    method='watershed',
                    max_distance=None):
    """
    Function using watershedding or random walker to determine cloud volumes associated with tracked updrafts
    Parameters:
    track:         pandas.DataFrame 
                   output from trackpy/maketrack
    field_in:      iris.cube.Cube
                   containing the 3D (time,x,y) field to perform the watershedding on 
    threshold:     float 
                   threshold for the watershedding field to be used for the mask
    target:        string
                   Switch to determine if algorithm looks strating from maxima or minima in input field (maximum: starting from maxima (default), minimum: starting from minima)
    method:        str ('method')
                   flag determining the algorithm to use (currently watershedding implemented)
    
    Output:
    segmentation_out: iris.cube.Cube
                   Cloud mask, 0 outside and integer numbers according to track inside the clouds
    
    """
    import numpy as np
    from skimage.morphology import watershed
    #    from skimage.segmentation import random_walker
    import logging
    from iris.cube import CubeList
    from iris.util import new_axis
    from scipy.ndimage import distance_transform_edt

    logging.info('Start wateshedding 2D')

    # CubeList to store individual segmentation masks
    segmentation_out_list = CubeList()

    track['ncells'] = 0

    if max_distance is not None:
        max_distance_pixel = np.ceil(max_distance / dxy)

    field_time = field.slices_over('time')
    for i, field_i in enumerate(field_time):

        # Create cube of the same dimensions and coordinates as input data to store mask:
        segmentation_out_i = 1 * field_i
        segmentation_out_i.rename('segmentation_mask')
        segmentation_out_i.units = 1

        data_i = field_i.core_data()
        time_i = field_i.coord('time').units.num2date(
            field_i.coord('time').points[0])
        tracks_i = track[track['time'] == time_i]

        # mask data outside region above/below threshold and invert data if tracking maxima:
        if target == 'maximum':
            unmasked = data_i > threshold
            data_i_segmentation = -1 * data_i
        elif target == 'minimum':
            unmasked = data_i < threshold
            data_i_segmentation = data_i
        else:
            raise ValueError('unknown type of target')
        markers = np.zeros_like(unmasked).astype(np.int32)
        for index, row in tracks_i.iterrows():
            markers[int(row['hdim_1']), int(row['hdim_2'])] = row['feature']
        markers[~unmasked] = 0

        if method == 'watershed':
            segmentation_mask_i = watershed(data_i_segmentation,
                                            markers.astype(np.int32),
                                            mask=unmasked)
#        elif method=='random_walker':
#            #res1 = random_walker(Mask, markers,mode='cg')
#             res1=random_walker(data_i_segmentation, markers.astype(np.int32),
#                                beta=130, mode='bf', tol=0.001, copy=True, multichannel=False, return_full_prob=False, spacing=None)
        else:
            raise ValueError('unknown method, must be watershed')

            # remove everything from the individual masks that is more than max_distance_pixel away from the markers
        if max_distance is not None:
            for feature in tracks_i['feature']:
                D = distance_transform_edt((markers != feature).astype(int))
                segmentation_mask_i[np.bitwise_and(
                    segmentation_mask_i == feature,
                    D > max_distance_pixel)] = 0

        segmentation_out_i.data = segmentation_mask_i
        # using merge throws error, so cubes with time promoted to DimCoord and using concatenate:
        #        segmentation_out_list.append(segmentation_out_i)
        segmentation_out_i_temp = new_axis(segmentation_out_i,
                                           scalar_coord='time')
        segmentation_out_list.append(segmentation_out_i_temp)

        # count number of grid cells asoociated to each tracked cell and write that into DataFrame:
        values, count = np.unique(segmentation_mask_i, return_counts=True)
        counts = dict(zip(values, count))
        for index, row in tracks_i.iterrows():
            if row['feature'] in counts.keys():
                track.loc[index, 'ncells'] = counts[row['feature']]
        logging.debug('Finished segmentation 2D for ' +
                      time_i.strftime('%Y-%m-%d_%H:%M:%S'))

    #merge individual masks in CubeList into one Cube:
    # using merge throws error, so cubes with time promoted to DimCoord and using concatenate:
#    segmentation_out=segmentation_out_list.merge_cube()
    segmentation_out = segmentation_out_list.concatenate_cube()

    logging.debug('Finished segmentation 2D')

    return segmentation_out, track
Example #12
0
    def _get_cube(self,
                  file_list,
                  climatology=False,
                  overlay_probability_levels=False):
        """
        Get an iris cube based on the given files using selection criteria
        from the input_data.

        @param file_list (list[str]): a list of file name to retrieve data from
        @param climatology (boolean): if True extract the climatology data
        @param overlay_probability_levels (boolean): if True only include the
            10th, 50th and 90th percentile data

        @return an iris cube, maybe 'None' if overlay_probability_levels=True
        """
        if climatology is True:
            LOG.info("_get_cube for climatology")
        elif overlay_probability_levels is True:
            LOG.info("_get_cube, overlay probability levels")
        else:
            LOG.info("_get_cube")

        if LOG.getEffectiveLevel() == logging.DEBUG:
            LOG.debug("_get_cube from %s files", len(file_list))
            for fpath in file_list:
                LOG.debug(" - FILE: %s", fpath)

        # Load the cubes
        cubes = CubeList()
        try:
            for file_path in file_list:
                f_list = glob.glob(file_path)
                cube_list = [iris.load_cube(f) for f in f_list]
                cubes.extend(cube_list)

        except IOError as ex:
            if overlay_probability_levels is True:
                # not all variables have corresponding probabilistic data
                return None
            for file_name in file_list:
                file_name = file_name.split("*")[0]
                if not path.exists(file_name):
                    LOG.error("File not found: %s", file_name)
            raise UKCPDPDataNotFoundException from ex

        if overlay_probability_levels is True:
            collection = COLLECTION_PROB
        else:
            collection = self.input_data.get_value(InputType.COLLECTION)

        # Remove time_bnds cubes
        if collection == COLLECTION_PROB:
            unfiltered_cubes = cubes
            cubes = CubeList()
            for cube in unfiltered_cubes:
                if cube.name() != "time_bnds":
                    cubes.append(cube)

        # Different creation dates will stop cubes concatenating, so lets
        # remove them
        for cube in cubes:
            coords = cube.coords(var_name="creation_date")
            for coord in coords:
                cube.remove_coord(coord)

        if len(cubes) == 0:
            LOG.warning("No data was retrieved from the following files:%s",
                        file_list)
            raise UKCPDPDataNotFoundException(
                "No data found for given selection options")

        LOG.debug("First cube:\n%s", cubes[0])
        LOG.debug("Concatenate cubes:\n%s", cubes)

        iris.experimental.equalise_cubes.equalise_attributes(cubes)
        unify_time_units(cubes)

        try:
            cube = cubes.concatenate_cube()
        except iris.exceptions.ConcatenateError as ex:
            LOG.error("Failed to concatenate cubes:\n%s\n%s", ex, cubes)
            error_cubes = CubeList()
            for error_cube in cubes:
                error_cubes.append(error_cube)
                try:
                    LOG.info("Appending %s",
                             error_cube.coord("ensemble_member_id").points[0])
                except iris.exceptions.CoordinateNotFoundError:
                    pass
                try:
                    error_cubes.concatenate_cube()
                except iris.exceptions.ConcatenateError as ex:
                    message = ""
                    try:
                        message = " {}".format(
                            error_cube.coord("ensemble_member_id").points[0])
                    except iris.exceptions.CoordinateNotFoundError:
                        pass
                    LOG.error(
                        "Error when concatenating cube%s:\n%s\n%s",
                        message,
                        ex,
                        error_cube,
                    )
                    break

            # pylint: disable=W0707
            raise UKCPDPDataNotFoundException(
                "No data found for given selection options")

        LOG.debug("Concatenated cube:\n%s", cube)

        if climatology is True:
            # generate a time slice constraint based on the baseline
            time_slice_constraint = self._time_slice_selector(True)
        else:
            # generate a time slice constraint
            time_slice_constraint = self._time_slice_selector(False)
        if time_slice_constraint is not None:
            cube = cube.extract(time_slice_constraint)

        if cube is None:
            if time_slice_constraint is not None:
                LOG.warning(
                    "Time slice constraint resulted in no cubes being "
                    "returned: %s",
                    time_slice_constraint,
                )
            raise UKCPDPDataNotFoundException(
                "Selection constraints resulted in no data being"
                " selected")

        # generate a temporal constraint
        temporal_constraint = self._get_temporal_selector()
        if temporal_constraint is not None:
            cube = cube.extract(temporal_constraint)

        if cube is None:
            if temporal_constraint is not None:
                LOG.warning(
                    "Temporal constraint resulted in no cubes being "
                    "returned: %s",
                    temporal_constraint,
                )
            raise UKCPDPDataNotFoundException(
                "Selection constraints resulted in no data being"
                " selected")

        # extract 10, 50 and 90 percentiles
        if overlay_probability_levels is True:
            cube = get_probability_levels(cube, False)

        # generate an area constraint
        area_constraint = self._get_spatial_selector(cube, collection)
        if area_constraint is not None:
            cube = cube.extract(area_constraint)
            if self.input_data.get_area_type() == AreaType.BBOX:
                # Make sure we still have x, y dimension coordinated for
                # bboxes
                cube = self._promote_x_y_coords(cube)

        if cube is None:
            if area_constraint is not None:
                LOG.warning(
                    "Area constraint resulted in no cubes being "
                    "returned: %s",
                    area_constraint,
                )
            raise UKCPDPDataNotFoundException(
                "Selection constraints resulted in no data being"
                " selected")

        return cube
Example #13
0
    def create_data_object(self, filenames, variable, index_offset=1):
        from cis.data_io.hdf_vd import get_data
        from cis.data_io.hdf_vd import VDS
        from pyhdf.error import HDF4Error
        from cis.data_io import hdf_sd
        from iris.coords import DimCoord, AuxCoord
        from iris.cube import Cube, CubeList
        from cis.data_io.gridded_data import GriddedData
        from cis.time_util import cis_standard_time_unit
        from datetime import datetime
        from iris.util import new_axis
        import numpy as np

        logging.debug("Creating data object for variable " + variable)

        variables = ["Pressure_Mean"]
        logging.info("Listing coordinates: " + str(variables))

        variables.append(variable)

        # reading data from files
        sdata = {}
        for filename in filenames:
            try:
                sds_dict = hdf_sd.read(filename, variables)
            except HDF4Error as e:
                raise IOError(str(e))

            for var in list(sds_dict.keys()):
                utils.add_element_to_list_in_dict(sdata, var, sds_dict[var])

        # work out size of data arrays
        # the coordinate variables will be reshaped to match that.
        # NOTE: This assumes that all Caliop_L1 files have the same altitudes.
        #       If this is not the case, then the following line will need to be changed
        #       to concatenate the data from all the files and not just arbitrarily pick
        #       the altitudes from the first file.
        alt_data = self._get_calipso_data(hdf_sd.HDF_SDS(filenames[0], 'Altitude_Midpoint'))[0, :]
        alt_coord = DimCoord(alt_data, standard_name='altitude', units='km')
        alt_coord.convert_units('m')

        lat_data = self._get_calipso_data(hdf_sd.HDF_SDS(filenames[0], 'Latitude_Midpoint'))[0, :]
        lat_coord = DimCoord(lat_data, standard_name='latitude', units='degrees_north')

        lon_data = self._get_calipso_data(hdf_sd.HDF_SDS(filenames[0], 'Longitude_Midpoint'))[0, :]
        lon_coord = DimCoord(lon_data, standard_name='longitude', units='degrees_east')

        cubes = CubeList()
        for f in filenames:
            t = get_data(VDS(f, "Nominal_Year_Month"), True)[0]
            time_data = cis_standard_time_unit.date2num(datetime(int(t[0:4]), int(t[4:6]), 15))
            time_coord = AuxCoord(time_data, long_name='Profile_Time', standard_name='time',
                                  units=cis_standard_time_unit)

            # retrieve data + its metadata
            var = sdata[variable]
            metadata = hdf.read_metadata(var, "SD")

            data = self._get_calipso_data(hdf_sd.HDF_SDS(f, variable))

            pres_data = self._get_calipso_data(hdf_sd.HDF_SDS(f, 'Pressure_Mean'))
            pres_coord = AuxCoord(pres_data, standard_name='air_pressure', units='hPa')

            if data.ndim == 2:
                # pres_coord = new_axis()
                cube = Cube(data, long_name=metadata.long_name or variable, units=self.clean_units(metadata.units),
                            dim_coords_and_dims=[(lat_coord, 0), (lon_coord, 1)],
                            aux_coords_and_dims=[(time_coord, ())])
                # Promote the time scalar coord to a length one dimension
                new_cube = new_axis(cube, 'time')
                cubes.append(new_cube)
            elif data.ndim == 3:
                # pres_coord = new_axis()
                cube = Cube(data, long_name=metadata.long_name or variable, units=self.clean_units(metadata.units),
                            dim_coords_and_dims=[(lat_coord, 0), (lon_coord, 1), (alt_coord, 2)],
                            aux_coords_and_dims=[(time_coord, ())])
                # Promote the time scalar coord to a length one dimension
                new_cube = new_axis(cube, 'time')
                # Then add the (extended) pressure coord so that it is explicitly a function of time
                new_cube.add_aux_coord(pres_coord[np.newaxis, ...], (0, 1, 2, 3))
                cubes.append(new_cube)
            else:
                raise ValueError("Unexpected number of dimensions for CALIOP data: {}".format(data.ndim))


        # Concatenate the cubes from each file into a single GriddedData object
        gd = GriddedData.make_from_cube(cubes.concatenate_cube())
        return gd
Example #14
0
def process_diagnostic(diagnostics, neighbours, sites, ancillary_data,
                       diagnostic_name):
    """
    Extract data and write output for a given diagnostic.

    Args:
        diagnostics (dict):
            Dictionary containing information regarding how the diagnostics
            are to be processed.

            For example::

              {
                  "temperature": {
                      "diagnostic_name": "air_temperature",
                      "extrema": true,
                      "filepath": "temperature_at_screen_level",
                      "interpolation_method":
                          "model_level_temperature_lapse_rate",
                      "neighbour_finding": {
                          "land_constraint": false,
                          "method": "fast_nearest_neighbour",
                          "vertical_bias": null
                      }
                  }
              }

        neighbours (numpy.array):
            Array of neigbouring grid points that are associated with sites
            in the SortedDictionary of sites.

        sites (dict):
            A dictionary containing the properties of spotdata sites.

        ancillary_data (dict):
            A dictionary containing additional model data that is needed.
            e.g. {'orography': <cube of orography>}

        diagnostic_name (string):
            A string matching the keys in the diagnostics dictionary that
            will be used to access information regarding how the diagnostic
            is to be processed.

    Returns:
        (tuple): tuple containing:
            **resulting_cube** (iris.cube.Cube or None):
                Cube after extracting the diagnostic requested using the
                desired extraction method.
                None is returned if the "resulting_cubes" is an empty CubeList
                after processing.
            **extrema_cubes** (iris.cube.CubeList or None):
                CubeList containing extrema values, if the 'extrema' diagnostic
                is requested.
                None is returned if the value for diagnostic_dict["extrema"]
                is False, so that the extrema calculation is not required.

    """
    diagnostic_dict = diagnostics[diagnostic_name]

    # Grab the relevant set of grid point neighbours for the neighbour finding
    # method being used by this diagnostic.
    neighbour_hash = (construct_neighbour_hash(
        diagnostic_dict['neighbour_finding']))
    neighbour_list = neighbours[neighbour_hash]

    # Get optional kwargs that may be set to override defaults.
    optionals = [
        'upper_level', 'lower_level', 'no_neighbours', 'dz_tolerance',
        'dthetadz_threshold', 'dz_max_adjustment'
    ]
    kwargs = {}
    if ancillary_data.get('config_constants') is not None:
        for optional in optionals:
            constant = ancillary_data.get('config_constants').get(optional)
            if constant is not None:
                kwargs[optional] = constant

    # Create a list of datetimes to loop through.
    forecast_times = []
    for cube in diagnostic_dict["data"]:
        time = cube.coord("time")
        forecast_times.extend(time.units.num2date(time.points))

    # Create empty iris.cube.CubeList to hold extracted data cubes.
    resulting_cubes = CubeList()

    # Loop over forecast times.
    for a_time in forecast_times:
        # Extract Cube from CubeList at current time.
        time_extract = datetime_constraint(a_time)
        cube = extract_cube_at_time(diagnostic_dict["data"], a_time,
                                    time_extract)
        if cube is None:
            # If no cube is available at given time, try the next time.
            continue

        ad = {}
        if diagnostic_dict["additional_data"] is not None:
            # Extract additional diagnostics at current time.
            ad = extract_ad_at_time(diagnostic_dict["additional_data"], a_time,
                                    time_extract)

        args = (cube, sites, neighbour_list, ancillary_data, ad)

        # Extract diagnostic data using defined method.
        resulting_cubes.append(
            ExtractData(diagnostic_dict['interpolation_method']).process(
                *args, **kwargs))

    if resulting_cubes:
        # Concatenate CubeList into Cube for cubes with different
        # forecast times.
        resulting_cube = resulting_cubes.concatenate_cube()
    else:
        resulting_cube = None

    if diagnostic_dict['extrema']:
        extrema_cubes = (ExtractExtrema(24, start_hour=9).process(
            resulting_cube.copy()))
        extrema_cubes = extrema_cubes.merge()
    else:
        extrema_cubes = None

    return resulting_cube, extrema_cubes
Example #15
0
    def create_data_object(self, filenames, variable, index_offset=1):
        from cis.data_io.hdf_vd import get_data
        from cis.data_io.hdf_vd import VDS
        from pyhdf.error import HDF4Error
        from cis.data_io import hdf_sd
        from iris.coords import DimCoord, AuxCoord
        from iris.cube import Cube, CubeList
        from cis.data_io.gridded_data import GriddedData
        from cis.time_util import cis_standard_time_unit
        from datetime import datetime
        from iris.util import new_axis
        import numpy as np

        logging.debug("Creating data object for variable " + variable)

        variables = ["Pressure_Mean"]
        logging.info("Listing coordinates: " + str(variables))

        variables.append(variable)

        # reading data from files
        sdata = {}
        for filename in filenames:
            try:
                sds_dict = hdf_sd.read(filename, variables)
            except HDF4Error as e:
                raise IOError(str(e))

            for var in list(sds_dict.keys()):
                utils.add_element_to_list_in_dict(sdata, var, sds_dict[var])

        # work out size of data arrays
        # the coordinate variables will be reshaped to match that.
        # NOTE: This assumes that all Caliop_L1 files have the same altitudes.
        #       If this is not the case, then the following line will need to be changed
        #       to concatenate the data from all the files and not just arbitrarily pick
        #       the altitudes from the first file.
        alt_data = self._get_calipso_data(
            hdf_sd.HDF_SDS(filenames[0], 'Altitude_Midpoint'))[0, :]
        alt_coord = DimCoord(alt_data, standard_name='altitude', units='km')
        alt_coord.convert_units('m')

        lat_data = self._get_calipso_data(
            hdf_sd.HDF_SDS(filenames[0], 'Latitude_Midpoint'))[0, :]
        lat_coord = DimCoord(lat_data,
                             standard_name='latitude',
                             units='degrees_north')

        lon_data = self._get_calipso_data(
            hdf_sd.HDF_SDS(filenames[0], 'Longitude_Midpoint'))[0, :]
        lon_coord = DimCoord(lon_data,
                             standard_name='longitude',
                             units='degrees_east')

        cubes = CubeList()
        for f in filenames:
            t = get_data(VDS(f, "Nominal_Year_Month"), True)[0]
            time_data = cis_standard_time_unit.date2num(
                datetime(int(t[0:4]), int(t[4:6]), 15))
            time_coord = AuxCoord(time_data,
                                  long_name='Profile_Time',
                                  standard_name='time',
                                  units=cis_standard_time_unit)

            # retrieve data + its metadata
            var = sdata[variable]
            metadata = hdf.read_metadata(var, "SD")

            data = self._get_calipso_data(hdf_sd.HDF_SDS(f, variable))

            pres_data = self._get_calipso_data(
                hdf_sd.HDF_SDS(f, 'Pressure_Mean'))
            pres_coord = AuxCoord(pres_data,
                                  standard_name='air_pressure',
                                  units='hPa')

            if data.ndim == 2:
                # pres_coord = new_axis()
                cube = Cube(data,
                            long_name=metadata.long_name or variable,
                            units=self.clean_units(metadata.units),
                            dim_coords_and_dims=[(lat_coord, 0),
                                                 (lon_coord, 1)],
                            aux_coords_and_dims=[(time_coord, ())])
                # Promote the time scalar coord to a length one dimension
                new_cube = new_axis(cube, 'time')
                cubes.append(new_cube)
            elif data.ndim == 3:
                # pres_coord = new_axis()
                cube = Cube(data,
                            long_name=metadata.long_name or variable,
                            units=self.clean_units(metadata.units),
                            dim_coords_and_dims=[(lat_coord, 0),
                                                 (lon_coord, 1),
                                                 (alt_coord, 2)],
                            aux_coords_and_dims=[(time_coord, ())])
                # Promote the time scalar coord to a length one dimension
                new_cube = new_axis(cube, 'time')
                # Then add the (extended) pressure coord so that it is explicitly a function of time
                new_cube.add_aux_coord(pres_coord[np.newaxis, ...],
                                       (0, 1, 2, 3))
                cubes.append(new_cube)
            else:
                raise ValueError(
                    "Unexpected number of dimensions for CALIOP data: {}".
                    format(data.ndim))

        # Concatenate the cubes from each file into a single GriddedData object
        gd = GriddedData.make_from_cube(cubes.concatenate_cube())
        return gd