Esempio n. 1
0
def read_many_files_individually(filenames, usr_variables):
    """
    Read multiple Variables from many NetCDF files manually - i.e. not with MFDataset as this doesn't always work,
    in particular for NetCDF4 files.

    :param filenames: A list of NetCDF filenames to read, or a string with wildcards.
    :param usr_variables: A list of variable (dataset) names to read from the files. The names must appear exactly as
      in in the NetCDF file. Variable names may be fully qualified NetCDF4 Hierarchical group variables in the form
      ``<group1>/<group2....>/<variable_name>``, e.g. ``AVHRR/Ch4CentralWavenumber``.
    :return: A dictionary of lists of variable instances constructed from all of the input files with the fully
      qualified variable name as the key
    """
    from cis.utils import add_element_to_list_in_dict

    usr_variables = listify(usr_variables)

    var_data = {}

    for filename in filenames:

        var_dict = read(filename, usr_variables)
        for var in list(var_dict.keys()):
            add_element_to_list_in_dict(var_data, var, var_dict[var])

    return var_data
Esempio n. 2
0
def read_many_files_individually(filenames, usr_variables):
    """
    Read multiple Variables from many NetCDF files manually - i.e. not with MFDataset as this doesn't always work,
    in particular for NetCDF4 files.

    :param filenames: A list of NetCDF filenames to read, or a string with wildcards.
    :param usr_variables: A list of variable (dataset) names to read from the files. The names must appear exactly as
      in in the NetCDF file. Variable names may be fully qualified NetCDF4 Hierarchical group variables in the form
      ``<group1>/<group2....>/<variable_name>``, e.g. ``AVHRR/Ch4CentralWavenumber``.
    :return: A dictionary of lists of variable instances constructed from all of the input files with the fully
      qualified variable name as the key
    """
    from cis.utils import add_element_to_list_in_dict

    usr_variables = listify(usr_variables)

    var_data = {}

    for filename in filenames:

        var_dict = read(filename, usr_variables)
        for var in list(var_dict.keys()):
            add_element_to_list_in_dict(var_data, var, var_dict[var])

    return var_data
Esempio n. 3
0
    def _create_one_dimensional_coord_list(self, filenames, index_offset=1):
        """
        Create a set of coordinates appropriate for a ond-dimensional (column integrated) variable
        :param filenames:
        :param int index_offset: For 5km products this will choose the coordinates which represent the start (0),
        middle (1) and end (2) of the 15 shots making up each column retrieval.
        :return:
        """
        from pyhdf.error import HDF4Error
        from cis.data_io import hdf_sd
        import datetime as dt
        from cis.time_util import convert_sec_since_to_std_time, cis_standard_time_unit

        variables = ['Latitude', 'Longitude', "Profile_Time"]
        logging.info("Listing coordinates: " + str(variables))

        # reading data from files
        sdata = {}
        for filename in filenames:
            try:
                sds_dict = hdf_sd.read(filename, variables)
            except HDF4Error as e:
                raise IOError(str(e))

            for var in list(sds_dict.keys()):
                utils.add_element_to_list_in_dict(sdata, var, sds_dict[var])

        # latitude
        lat_data = hdf.read_data(sdata['Latitude'], self._get_calipso_data)[:, index_offset]
        lat_metadata = hdf.read_metadata(sdata['Latitude'], "SD")
        lat_coord = Coord(lat_data, lat_metadata, 'Y')

        # longitude
        lon = sdata['Longitude']
        lon_data = hdf.read_data(lon, self._get_calipso_data)[:, index_offset]
        lon_metadata = hdf.read_metadata(lon, "SD")
        lon_coord = Coord(lon_data, lon_metadata, 'X')

        # profile time, x
        time = sdata['Profile_Time']
        time_data = hdf.read_data(time, self._get_calipso_data)[:, index_offset]
        time_data = convert_sec_since_to_std_time(time_data, dt.datetime(1993, 1, 1, 0, 0, 0))
        time_coord = Coord(time_data, Metadata(name='Profile_Time', standard_name='time', shape=time_data.shape,
                                               units=cis_standard_time_unit), "T")

        # create the object containing all coordinates
        coords = CoordList()
        coords.append(lat_coord)
        coords.append(lon_coord)
        coords.append(time_coord)

        return coords
Esempio n. 4
0
def load_multiple_aeronet(filenames, variables=None):
    from cis.utils import add_element_to_list_in_dict, concatenate

    adata = {}

    for filename in filenames:
        logging.debug("reading file: " + filename)

        # reading in all variables into a dictionary:
        # a_dict, key: variable name, value: list of masked arrays
        a_dict = load_aeronet(filename, variables)
        for var in list(a_dict.keys()):
            add_element_to_list_in_dict(adata, var, a_dict[var])

    for var in list(adata.keys()):
        adata[var] = concatenate(adata[var])

    return adata
Esempio n. 5
0
File: hdf.py Progetto: cedadev/cis
def read(filenames, variables):

    sdata = {}
    vdata = {}

    for filename in filenames:

        logging.debug("reading file: " + filename)

        # reading in all variables into a 2 dictionaries:
        # sdata, key: variable name, value: list of sds
        # vdata, key: variable name, value: list of vds
        sds_dict, vds_dict = _read_hdf4(filename, variables)
        for var in list(sds_dict.keys()):
            utils.add_element_to_list_in_dict(sdata, var, sds_dict[var])
        for var in list(vds_dict.keys()):
            utils.add_element_to_list_in_dict(vdata, var, vds_dict[var])

    return sdata, vdata
Esempio n. 6
0
def read(filenames, variables):

    sdata = {}
    vdata = {}

    for filename in filenames:

        logging.debug("reading file: " + filename)

        # reading in all variables into a 2 dictionaries:
        # sdata, key: variable name, value: list of sds
        # vdata, key: variable name, value: list of vds
        sds_dict, vds_dict = _read_hdf4(filename, variables)
        for var in list(sds_dict.keys()):
            utils.add_element_to_list_in_dict(sdata, var, sds_dict[var])
        for var in list(vds_dict.keys()):
            utils.add_element_to_list_in_dict(vdata, var, vds_dict[var])

    return sdata, vdata
Esempio n. 7
0
def load_multiple_hysplit(fnames, variables=None):
    from cis.utils import add_element_to_list_in_dict, concatenate

    hdata = {}

    for filename in fnames:
        logging.debug("reading file: " + filename)

        # read in all trajectories
        # h_dict, key: trajectory starting lat/lon/altm value: dict containing trajectory data
        h_dict = load_hysplit(filename, variables)
        for traj in list(h_dict.keys()):
            if (traj in hdata):
                for var in list(h_dict[traj].keys()):
                    # TODO error appending masked array! add these manually
                    add_element_to_list_in_dict(hdata[traj], var, h_dict[traj][var])
                for var in list(hdata[traj].keys()):
                    hdata[traj][var] = concatenate(hdata[traj][var])
            else:
                hdata[traj] = h_dict[traj]

    return hdata
Esempio n. 8
0
def load_hysplit(fname, variables=None):
    import numpy as np
    from numpy import ma
    from datetime import datetime, timedelta
    from cis.time_util import cis_standard_time_unit
    from cis.utils import add_element_to_list_in_dict, concatenate

    std_day = cis_standard_time_unit.num2date(0)

    fmetadata = get_file_metadata(fname)
    try:
        rawd = np.genfromtxt(fname,
                             skip_header=fmetadata['data_start'],
                             dtype=np.float64,
                             usemask=True)
    except (StopIteration, IndexError) as e:
        raise IOError(e)

    data_dict = {}
    # Get data for one trajectory at a time
    for t in range(1, fmetadata['n_trajectories']+1):
        tdata_dict = {}

        trajectory_data = rawd[rawd[:,hysplit_default_var.index('TRAJECTORY_NO')] == t]
        # Convert time from each row to standard time
        for trajectory in trajectory_data:
            day = datetime((int(trajectory[2]) + 2000), # TODO Dan: is it okay to assume this?
                            int(trajectory[3]),
                            int(trajectory[4]))
            sday = float((day - std_day).days)
            td = timedelta(hours=int(trajectory[5]), minutes=int(trajectory[6]))
            fractional_day = td.total_seconds()/(24.0*60.0*60.0)
            dt = sday + fractional_day
            add_element_to_list_in_dict(tdata_dict, 'DATETIMES', [dt])
        # Clean up data
        tdata_dict['DATETIMES'] = ma.array(concatenate(tdata_dict['DATETIMES'])) # TODO mask is only one value

        # Add other default data
        tdata_dict['LAT'] = trajectory_data[:,hysplit_default_var.index('LAT')]
        tdata_dict['LON'] = trajectory_data[:,hysplit_default_var.index('LON')]
        tdata_dict['ALT'] = trajectory_data[:,hysplit_default_var.index('ALT')]
        tdata_dict['PRESSURE'] = trajectory_data[:, hysplit_default_var.index('PRESSURE')]
        # TODO any other default variables to add?

        # If variables set, fetch only set variables
        if variables is not None:
            for key in variables:
                try:
                    tdata_dict[key] = trajectory_data[:,fmetadata['labels'].index(key)]
                except ValueError:
                    raise InvalidVariableError(key + "does not exist in " + fname)
        # Else, return all variables in file
        else:
            for label in fmetadata['custom_labels']:
                try:
                    tdata_dict[label] = trajectory_data[:,fmetadata['labels'].index(label)]
                except ValueError:
                    raise InvalidVariableError(key + " does not exist in " + fname)

        # TODO trajectory keys are tuples of lat/long/alt
        tkey = fmetadata['trajectories'][t]
        data_dict[tkey] = tdata_dict

    return data_dict
Esempio n. 9
0
    def _create_coord_list(self, filenames, index_offset=0):
        import logging
        from cis.data_io import hdf as hdf
        from cis.data_io.Coord import Coord, CoordList
        from cis.data_io.ungridded_data import Metadata
        import cis.utils as utils
        from cis.data_io.hdf_vd import VDS
        from pyhdf.error import HDF4Error
        from cis.data_io import hdf_sd
        import datetime as dt
        from cis.time_util import convert_sec_since_to_std_time, cis_standard_time_unit

        variables = ['Latitude', 'Longitude', "Profile_Time", "Pressure"]
        logging.info("Listing coordinates: " + str(variables))

        # reading data from files
        sdata = {}
        for filename in filenames:
            try:
                sds_dict = hdf_sd.read(filename, variables)
            except HDF4Error as e:
                raise IOError(str(e))

            for var in list(sds_dict.keys()):
                utils.add_element_to_list_in_dict(sdata, var, sds_dict[var])

        alt_name = "altitude"
        logging.info("Additional coordinates: '" + alt_name + "'")

        # work out size of data arrays
        # the coordinate variables will be reshaped to match that.
        # NOTE: This assumes that all Caliop_L1 files have the same altitudes.
        #       If this is not the case, then the following line will need to be changed
        #       to concatenate the data from all the files and not just arbitrarily pick
        #       the altitudes from the first file.
        alt_data = get_data(VDS(filenames[0], "Lidar_Data_Altitudes"), True)
        alt_data *= 1000.0  # Convert to m
        len_x = alt_data.shape[0]

        lat_data = hdf.read_data(sdata['Latitude'], self._get_calipso_data)
        len_y = lat_data.shape[0]

        new_shape = (len_x, len_y)

        # altitude
        alt_data = utils.expand_1d_to_2d_array(alt_data, len_y, axis=0)
        alt_metadata = Metadata(name=alt_name, standard_name=alt_name, shape=new_shape)
        alt_coord = Coord(alt_data, alt_metadata)

        # pressure
        if self.include_pressure:
            pres_data = hdf.read_data(sdata['Pressure'], self._get_calipso_data)
            pres_metadata = hdf.read_metadata(sdata['Pressure'], "SD")
            # Fix badly formatted units which aren't CF compliant and will break if they are aggregated
            if str(pres_metadata.units) == "hPA":
                pres_metadata.units = "hPa"
            pres_metadata.shape = new_shape
            pres_coord = Coord(pres_data, pres_metadata, 'P')

        # latitude
        lat_data = utils.expand_1d_to_2d_array(lat_data[:, index_offset], len_x, axis=1)
        lat_metadata = hdf.read_metadata(sdata['Latitude'], "SD")
        lat_metadata.shape = new_shape
        lat_coord = Coord(lat_data, lat_metadata, 'Y')

        # longitude
        lon = sdata['Longitude']
        lon_data = hdf.read_data(lon, self._get_calipso_data)
        lon_data = utils.expand_1d_to_2d_array(lon_data[:, index_offset], len_x, axis=1)
        lon_metadata = hdf.read_metadata(lon, "SD")
        lon_metadata.shape = new_shape
        lon_coord = Coord(lon_data, lon_metadata, 'X')

        # profile time, x
        time = sdata['Profile_Time']
        time_data = hdf.read_data(time, self._get_calipso_data)
        time_data = convert_sec_since_to_std_time(time_data, dt.datetime(1993, 1, 1, 0, 0, 0))
        time_data = utils.expand_1d_to_2d_array(time_data[:, index_offset], len_x, axis=1)
        time_coord = Coord(time_data, Metadata(name='Profile_Time', standard_name='time', shape=time_data.shape,
                                               units=cis_standard_time_unit), "T")

        # create the object containing all coordinates
        coords = CoordList()
        coords.append(lat_coord)
        coords.append(lon_coord)
        coords.append(time_coord)
        coords.append(alt_coord)
        if self.include_pressure and (pres_data.shape == alt_data.shape):
            # For MODIS L1 this may is not be true, so skips the air pressure reading. If required for MODIS L1 then
            # some kind of interpolation of the air pressure would be required, as it is on a different (smaller) grid
            # than for the Lidar_Data_Altitudes.
            coords.append(pres_coord)

        return coords
Esempio n. 10
0
    def create_data_object(self, filenames, variable, index_offset=1):
        from cis.data_io.hdf_vd import get_data
        from cis.data_io.hdf_vd import VDS
        from pyhdf.error import HDF4Error
        from cis.data_io import hdf_sd
        from iris.coords import DimCoord, AuxCoord
        from iris.cube import Cube
        from cis.data_io.gridded_data import GriddedData
        from cis.time_util import cis_standard_time_unit

        logging.debug("Creating data object for variable " + variable)

        variables = ['Latitude', 'Longitude', "Profile_Time", "Pressure"]
        logging.info("Listing coordinates: " + str(variables))

        variables.append(variable)

        # reading data from files
        sdata = {}
        for filename in filenames:
            try:
                sds_dict = hdf_sd.read(filename, variables)
            except HDF4Error as e:
                raise IOError(str(e))

            for var in list(sds_dict.keys()):
                utils.add_element_to_list_in_dict(sdata, var, sds_dict[var])

        alt_name = "altitude"
        logging.info("Additional coordinates: '" + alt_name + "'")

        # work out size of data arrays
        # the coordinate variables will be reshaped to match that.
        # NOTE: This assumes that all Caliop_L1 files have the same altitudes.
        #       If this is not the case, then the following line will need to be changed
        #       to concatenate the data from all the files and not just arbitrarily pick
        #       the altitudes from the first file.
        alt_data = get_data(VDS(filenames[0], "Lidar_Data_Altitudes"), True)

        alt_coord = DimCoord(alt_data, standard_name='altitude', units='km')
        alt_coord.convert_units('m')

        lat_data = hdf.read_data(sdata['Latitude'], self._get_calipso_data)[:, index_offset]
        lat_coord = AuxCoord(lat_data, standard_name='latitude')

        pres_data = hdf.read_data(sdata['Pressure'], self._get_calipso_data)
        pres_coord = AuxCoord(pres_data, standard_name='air_pressure', units='hPa')

        # longitude
        lon = sdata['Longitude']
        lon_data = hdf.read_data(lon, self._get_calipso_data)[:, index_offset]
        lon_coord = AuxCoord(lon_data, standard_name='longitude')

        # profile time, x
        time = sdata['Profile_Time']
        time_data = hdf.read_data(time, self._get_calipso_data)[:, index_offset]
        time_coord = DimCoord(time_data, long_name='Profile_Time', standard_name='time',
                              units="seconds since 1993-01-01 00:00:00")
        time_coord.convert_units(cis_standard_time_unit)

        # retrieve data + its metadata
        var = sdata[variable]
        metadata = hdf.read_metadata(var, "SD")

        if variable in MIXED_RESOLUTION_VARIABLES:
            logging.warning("Using Level 2 resolution profile for mixed resolution variable {}. See CALIPSO "
                            "documentation for more details".format(variable))
            data = hdf.read_data(var, self._get_mixed_resolution_calipso_data)
        else:
            data = hdf.read_data(var, self._get_calipso_data)

        cube = Cube(data, long_name=metadata.long_name, units=self.clean_units(metadata.units),
                    dim_coords_and_dims=[(alt_coord, 1), (time_coord, 0)], aux_coords_and_dims=[(lat_coord, (0,)),
                                                                                                (lon_coord, (0,)),
                                                                                                (pres_coord, (0, 1))])
        gd = GriddedData.make_from_cube(cube)
        return gd
Esempio n. 11
0
    def create_data_object(self, filenames, variable, index_offset=1):
        from cis.data_io.hdf_vd import get_data
        from cis.data_io.hdf_vd import VDS
        from pyhdf.error import HDF4Error
        from cis.data_io import hdf_sd
        from iris.coords import DimCoord, AuxCoord
        from iris.cube import Cube, CubeList
        from cis.data_io.gridded_data import GriddedData
        from cis.time_util import cis_standard_time_unit
        from datetime import datetime
        from iris.util import new_axis
        import numpy as np

        logging.debug("Creating data object for variable " + variable)

        variables = ["Pressure_Mean"]
        logging.info("Listing coordinates: " + str(variables))

        variables.append(variable)

        # reading data from files
        sdata = {}
        for filename in filenames:
            try:
                sds_dict = hdf_sd.read(filename, variables)
            except HDF4Error as e:
                raise IOError(str(e))

            for var in list(sds_dict.keys()):
                utils.add_element_to_list_in_dict(sdata, var, sds_dict[var])

        # work out size of data arrays
        # the coordinate variables will be reshaped to match that.
        # NOTE: This assumes that all Caliop_L1 files have the same altitudes.
        #       If this is not the case, then the following line will need to be changed
        #       to concatenate the data from all the files and not just arbitrarily pick
        #       the altitudes from the first file.
        alt_data = self._get_calipso_data(hdf_sd.HDF_SDS(filenames[0], 'Altitude_Midpoint'))[0, :]
        alt_coord = DimCoord(alt_data, standard_name='altitude', units='km')
        alt_coord.convert_units('m')

        lat_data = self._get_calipso_data(hdf_sd.HDF_SDS(filenames[0], 'Latitude_Midpoint'))[0, :]
        lat_coord = DimCoord(lat_data, standard_name='latitude', units='degrees_north')

        lon_data = self._get_calipso_data(hdf_sd.HDF_SDS(filenames[0], 'Longitude_Midpoint'))[0, :]
        lon_coord = DimCoord(lon_data, standard_name='longitude', units='degrees_east')

        cubes = CubeList()
        for f in filenames:
            t = get_data(VDS(f, "Nominal_Year_Month"), True)[0]
            time_data = cis_standard_time_unit.date2num(datetime(int(t[0:4]), int(t[4:6]), 15))
            time_coord = AuxCoord(time_data, long_name='Profile_Time', standard_name='time',
                                  units=cis_standard_time_unit)

            # retrieve data + its metadata
            var = sdata[variable]
            metadata = hdf.read_metadata(var, "SD")

            data = self._get_calipso_data(hdf_sd.HDF_SDS(f, variable))

            pres_data = self._get_calipso_data(hdf_sd.HDF_SDS(f, 'Pressure_Mean'))
            pres_coord = AuxCoord(pres_data, standard_name='air_pressure', units='hPa')

            if data.ndim == 2:
                # pres_coord = new_axis()
                cube = Cube(data, long_name=metadata.long_name or variable, units=self.clean_units(metadata.units),
                            dim_coords_and_dims=[(lat_coord, 0), (lon_coord, 1)],
                            aux_coords_and_dims=[(time_coord, ())])
                # Promote the time scalar coord to a length one dimension
                new_cube = new_axis(cube, 'time')
                cubes.append(new_cube)
            elif data.ndim == 3:
                # pres_coord = new_axis()
                cube = Cube(data, long_name=metadata.long_name or variable, units=self.clean_units(metadata.units),
                            dim_coords_and_dims=[(lat_coord, 0), (lon_coord, 1), (alt_coord, 2)],
                            aux_coords_and_dims=[(time_coord, ())])
                # Promote the time scalar coord to a length one dimension
                new_cube = new_axis(cube, 'time')
                # Then add the (extended) pressure coord so that it is explicitly a function of time
                new_cube.add_aux_coord(pres_coord[np.newaxis, ...], (0, 1, 2, 3))
                cubes.append(new_cube)
            else:
                raise ValueError("Unexpected number of dimensions for CALIOP data: {}".format(data.ndim))


        # Concatenate the cubes from each file into a single GriddedData object
        gd = GriddedData.make_from_cube(cubes.concatenate_cube())
        return gd
Esempio n. 12
0
    def create_data_object(self, filenames, variable, index_offset=1):
        from cis.data_io.hdf_vd import get_data
        from cis.data_io.hdf_vd import VDS
        from pyhdf.error import HDF4Error
        from cis.data_io import hdf_sd
        from iris.coords import DimCoord, AuxCoord
        from iris.cube import Cube
        from cis.data_io.gridded_data import GriddedData
        from cis.time_util import cis_standard_time_unit

        logging.debug("Creating data object for variable " + variable)

        variables = ['Latitude', 'Longitude', "Profile_Time", "Pressure"]
        logging.info("Listing coordinates: " + str(variables))

        variables.append(variable)

        # reading data from files
        sdata = {}
        for filename in filenames:
            try:
                sds_dict = hdf_sd.read(filename, variables)
            except HDF4Error as e:
                raise IOError(str(e))

            for var in list(sds_dict.keys()):
                utils.add_element_to_list_in_dict(sdata, var, sds_dict[var])

        alt_name = "altitude"
        logging.info("Additional coordinates: '" + alt_name + "'")

        # work out size of data arrays
        # the coordinate variables will be reshaped to match that.
        # NOTE: This assumes that all Caliop_L1 files have the same altitudes.
        #       If this is not the case, then the following line will need to be changed
        #       to concatenate the data from all the files and not just arbitrarily pick
        #       the altitudes from the first file.
        alt_data = get_data(VDS(filenames[0], "Lidar_Data_Altitudes"), True)

        alt_coord = DimCoord(alt_data, standard_name='altitude', units='km')
        alt_coord.convert_units('m')

        lat_data = hdf.read_data(sdata['Latitude'],
                                 self._get_calipso_data)[:, index_offset]
        lat_coord = AuxCoord(lat_data, standard_name='latitude')

        pres_data = hdf.read_data(sdata['Pressure'], self._get_calipso_data)
        pres_coord = AuxCoord(pres_data,
                              standard_name='air_pressure',
                              units='hPa')

        # longitude
        lon = sdata['Longitude']
        lon_data = hdf.read_data(lon, self._get_calipso_data)[:, index_offset]
        lon_coord = AuxCoord(lon_data, standard_name='longitude')

        # profile time, x
        time = sdata['Profile_Time']
        time_data = hdf.read_data(time, self._get_calipso_data)[:,
                                                                index_offset]
        time_coord = DimCoord(time_data,
                              long_name='Profile_Time',
                              standard_name='time',
                              units="seconds since 1993-01-01 00:00:00")
        time_coord.convert_units(cis_standard_time_unit)

        # retrieve data + its metadata
        var = sdata[variable]
        metadata = hdf.read_metadata(var, "SD")

        if variable in MIXED_RESOLUTION_VARIABLES:
            logging.warning(
                "Using Level 2 resolution profile for mixed resolution variable {}. See CALIPSO "
                "documentation for more details".format(variable))
            data = hdf.read_data(var, self._get_mixed_resolution_calipso_data)
        else:
            data = hdf.read_data(var, self._get_calipso_data)

        cube = Cube(data,
                    long_name=metadata.long_name,
                    units=self.clean_units(metadata.units),
                    dim_coords_and_dims=[(alt_coord, 1), (time_coord, 0)],
                    aux_coords_and_dims=[(lat_coord, (0, )),
                                         (lon_coord, (0, )),
                                         (pres_coord, (0, 1))])
        gd = GriddedData.make_from_cube(cube)
        return gd
Esempio n. 13
0
    def create_data_object(self, filenames, variable, index_offset=1):
        from cis.data_io.hdf_vd import get_data
        from cis.data_io.hdf_vd import VDS
        from pyhdf.error import HDF4Error
        from cis.data_io import hdf_sd
        from iris.coords import DimCoord, AuxCoord
        from iris.cube import Cube, CubeList
        from cis.data_io.gridded_data import GriddedData
        from cis.time_util import cis_standard_time_unit
        from datetime import datetime
        from iris.util import new_axis
        import numpy as np

        logging.debug("Creating data object for variable " + variable)

        variables = ["Pressure_Mean"]
        logging.info("Listing coordinates: " + str(variables))

        variables.append(variable)

        # reading data from files
        sdata = {}
        for filename in filenames:
            try:
                sds_dict = hdf_sd.read(filename, variables)
            except HDF4Error as e:
                raise IOError(str(e))

            for var in list(sds_dict.keys()):
                utils.add_element_to_list_in_dict(sdata, var, sds_dict[var])

        # work out size of data arrays
        # the coordinate variables will be reshaped to match that.
        # NOTE: This assumes that all Caliop_L1 files have the same altitudes.
        #       If this is not the case, then the following line will need to be changed
        #       to concatenate the data from all the files and not just arbitrarily pick
        #       the altitudes from the first file.
        alt_data = self._get_calipso_data(
            hdf_sd.HDF_SDS(filenames[0], 'Altitude_Midpoint'))[0, :]
        alt_coord = DimCoord(alt_data, standard_name='altitude', units='km')
        alt_coord.convert_units('m')

        lat_data = self._get_calipso_data(
            hdf_sd.HDF_SDS(filenames[0], 'Latitude_Midpoint'))[0, :]
        lat_coord = DimCoord(lat_data,
                             standard_name='latitude',
                             units='degrees_north')

        lon_data = self._get_calipso_data(
            hdf_sd.HDF_SDS(filenames[0], 'Longitude_Midpoint'))[0, :]
        lon_coord = DimCoord(lon_data,
                             standard_name='longitude',
                             units='degrees_east')

        cubes = CubeList()
        for f in filenames:
            t = get_data(VDS(f, "Nominal_Year_Month"), True)[0]
            time_data = cis_standard_time_unit.date2num(
                datetime(int(t[0:4]), int(t[4:6]), 15))
            time_coord = AuxCoord(time_data,
                                  long_name='Profile_Time',
                                  standard_name='time',
                                  units=cis_standard_time_unit)

            # retrieve data + its metadata
            var = sdata[variable]
            metadata = hdf.read_metadata(var, "SD")

            data = self._get_calipso_data(hdf_sd.HDF_SDS(f, variable))

            pres_data = self._get_calipso_data(
                hdf_sd.HDF_SDS(f, 'Pressure_Mean'))
            pres_coord = AuxCoord(pres_data,
                                  standard_name='air_pressure',
                                  units='hPa')

            if data.ndim == 2:
                # pres_coord = new_axis()
                cube = Cube(data,
                            long_name=metadata.long_name or variable,
                            units=self.clean_units(metadata.units),
                            dim_coords_and_dims=[(lat_coord, 0),
                                                 (lon_coord, 1)],
                            aux_coords_and_dims=[(time_coord, ())])
                # Promote the time scalar coord to a length one dimension
                new_cube = new_axis(cube, 'time')
                cubes.append(new_cube)
            elif data.ndim == 3:
                # pres_coord = new_axis()
                cube = Cube(data,
                            long_name=metadata.long_name or variable,
                            units=self.clean_units(metadata.units),
                            dim_coords_and_dims=[(lat_coord, 0),
                                                 (lon_coord, 1),
                                                 (alt_coord, 2)],
                            aux_coords_and_dims=[(time_coord, ())])
                # Promote the time scalar coord to a length one dimension
                new_cube = new_axis(cube, 'time')
                # Then add the (extended) pressure coord so that it is explicitly a function of time
                new_cube.add_aux_coord(pres_coord[np.newaxis, ...],
                                       (0, 1, 2, 3))
                cubes.append(new_cube)
            else:
                raise ValueError(
                    "Unexpected number of dimensions for CALIOP data: {}".
                    format(data.ndim))

        # Concatenate the cubes from each file into a single GriddedData object
        gd = GriddedData.make_from_cube(cubes.concatenate_cube())
        return gd