Esempio n. 1
0
def get_precip_flag(cloudsat_filenames, cloudsat_dir=None, verbose=0):

    all_flags = []

    for cloudsat_path in cloudsat_filenames:

        # if precipitation information is stored in another file
        if cloudsat_dir is not None:
            basename = os.path.basename(cloudsat_path)
            filename = glob.glob(
                os.path.join(cloudsat_dir, basename[:11] + "*.hdf"))[0]
        else:
            filename = cloudsat_path

        f = HDF(filename, SDC.READ)
        vs = f.vstart()

        vdata_precip = vs.attach('Precip_flag')
        precip = vdata_precip[:]

        if verbose:
            print("hdf information", vs.vdatainfo())
            print('Nb pixels: ', len(precip))
            print('Precip_flag values: ', np.unique(precip))

        all_flags += precip

        # close everything
        vdata_precip.detach()

        vs.end()
        f.close()

    return np.array(all_flags).flatten().astype(np.int8)
Esempio n. 2
0
def get_hdf_VD_file_variables(filename):
    """
    Get all the variables from an HDF VD file

    :param filename: The filename of the file to get the variables from
    :returns: An OrderedDict containing the variables from the file
    """
    variables = None
    if not HDF:
        raise ImportError("HDF support was not installed, please reinstall with pyhdf to read HDF files.")

    try:
        # Open file
        datafile = HDF(filename)
        vs = datafile.vstart()
        # List of required variable names
        names = vs.vdatainfo()
        # This returns a list of tuples, so convert into a dictionary for easy lookup
        variables = {}
        for var in names:
            variables[var[0]] = var[1:]
        # Close file
        vs.end()
        datafile.close()
    except:
        logging.error("Error while reading VD data")

    return variables
Esempio n. 3
0
def get_hdf_VD_file_variables(filename):
    """
    Get all the variables from an HDF VD file

    :param filename: The filename of the file to get the variables from
    :returns: An OrderedDict containing the variables from the file
    """
    variables = None
    if not HDF:
        raise ImportError(
            "HDF support was not installed, please reinstall with pyhdf to read HDF files."
        )

    try:
        # Open file
        datafile = HDF(filename)
        vs = datafile.vstart()
        # List of required variable names
        names = vs.vdatainfo()
        # This returns a list of tuples, so convert into a dictionary for easy lookup
        variables = {}
        for var in names:
            variables[var[0]] = var[1:]
        # Close file
        vs.end()
        datafile.close()
    except:
        logging.error("Error while reading VD data")

    return variables
Esempio n. 4
0
    def get_variable_names(self, filenames, data_type=None):
        try:
            from pyhdf.SD import SD
            from pyhdf.HDF import HDF
        except ImportError:
            raise ImportError("HDF support was not installed, please reinstall with pyhdf to read HDF files.")

        valid_variables = set([])
        for filename in filenames:
            # Do VD variables
            datafile = HDF(filename)
            vdata = datafile.vstart()
            variables = vdata.vdatainfo()
            # Assumes that latitude shape == longitude shape (it should):
            # dim_length = [var[3] for var in variables if var[0] == 'Latitude'][0]
            for var in variables:
                # if var[3] == dim_length:
                valid_variables.add(var[0])

            # Do SD variables:
            sd = SD(filename)
            datasets = sd.datasets()
            # if 'Height' in datasets:
            #     valid_shape = datasets['Height'][1]
            for var in datasets:
                    # if datasets[var][1] == valid_shape:
                valid_variables.add(var)

        return valid_variables
Esempio n. 5
0
    def get_variable_names(self, filenames, data_type=None):
        try:
            from pyhdf.SD import SD
            from pyhdf.HDF import HDF
        except ImportError:
            raise ImportError(
                "HDF support was not installed, please reinstall with pyhdf to read HDF files."
            )

        valid_variables = set([])
        for filename in filenames:
            # Do VD variables
            datafile = HDF(filename)
            vdata = datafile.vstart()
            variables = vdata.vdatainfo()
            # Assumes that latitude shape == longitude shape (it should):
            dim_length = [var[3] for var in variables
                          if var[0] == 'Latitude'][0]
            for var in variables:
                if var[3] == dim_length:
                    valid_variables.add(var[0])

            # Do SD variables:
            sd = SD(filename)
            datasets = sd.datasets()
            if 'Height' in datasets:
                valid_shape = datasets['Height'][1]
                for var in datasets:
                    if datasets[var][1] == valid_shape:
                        valid_variables.add(var)

        return valid_variables
Esempio n. 6
0
    def get_vdata(self, VDataName):
        """Return VData (binary table) from hdf4.

        Parameters
        ----------
        VDataName : str
            Name of the VData (stored as binary table in hdf) field

        Returns
        -------
        dict
            returns VData dictionary
        """
        try:
            h4 = HDF(self.filename)
            vs_handle = h4.vstart()
            # in the following vs_handle.vdatainfo() should give information
            # about all vdata, but this does not function correctly with MO
            # installation.
            # print vs_handle.vdatainfo()
            vd = vs_handle.attach(VDataName)
            vdi = vd.fieldinfo()
            vd.detach()
            vdata = {}
            for i in vdi:
                vd = vs_handle.attach(VDataName)
                vd.setfields(i[0])
                vdata[i[0]] = vd.read()
                vd.detach()
            vs_handle.end()
            h4.close()
        except HDF4Error as e:
            raise HDF4Error(e)
        return vdata
Esempio n. 7
0
def get_coordinates(cloudsat_filenames, verbose=0):

    all_latitudes, all_longitudes = [], []

    for cloudsat_path in cloudsat_filenames:

        f = HDF(cloudsat_path, SDC.READ)
        vs = f.vstart()

        vdata_lat = vs.attach('Latitude')
        vdata_long = vs.attach('Longitude')

        latitudes = vdata_lat[:]
        longitudes = vdata_long[:]

        assert len(latitudes) == len(longitudes), "cloudsat hdf corrupted"

        if verbose:
            print("hdf information", vs.vdatainfo())
            print('Nb pixels: ', len(latitudes))
            print('Lat min, Lat max: ', min(latitudes), max(latitudes))
            print('Long min, Long max: ', min(longitudes), max(longitudes))

        all_latitudes += latitudes
        all_longitudes += longitudes

        # close everything
        vdata_lat.detach()
        vdata_long.detach()
        vs.end()
        f.close()

    return np.array(all_latitudes).flatten(), np.array(
        all_longitudes).flatten()
Esempio n. 8
0
class HDF4File:
    """
    Simplified interface for reading HDF4 files. It combines the SD and VS
    low-level interfaces.
    """

    # Attributes:
    #     variables(``list``): List of strings of variable names contained in
    #         this file.

    def __init__(self, path):
        self.path = path
        self.file_handle = HDF(str(path))

        self.scientific_dataset = SD(str(path))
        datasets = self.scientific_dataset.datasets()
        dataset_dict = {
            key: Dataset(weakref.ref(self), key, *info)
            for key, info in datasets.items()
        }
        self.datasets = dataset_dict

        self.vdata_table = VS(self.file_handle)
        vdata_dict = {
            info[0]: VData(weakref.ref(self), *info)
            for info in self.vdata_table.vdatainfo()
        }
        self.vdata = vdata_dict

    def __del__(self):
        if self.file_handle:
            self.file_handle.close()
            self.file_handle = None

    @property
    def variables(self):
        """
        Names of the variables available in this file.
        """
        return list(self.datasets.keys()) + list(self.vdata.keys())

    def __getattribute__(self, name):
        try:
            return object.__getattribute__(self, name)
        except AttributeError as error:
            datasets = object.__getattribute__(self, "datasets")
            if name in datasets:
                return datasets[name]
            vdata = object.__getattribute__(self, "vdata")
            if name in vdata:
                return vdata[name]
            raise error

    def __repr__(self):
        return f"HDF4File({self.path})"
Esempio n. 9
0
    def __enter__(self):
        """
        Open HDF file and interfaces for use as context manager.

        :returns: Self.
        """
        self.hdf = HDF(self.fname)
        self.vs = self.hdf.vstart()
        self.v = self.hdf.vgstart()

        return self
Esempio n. 10
0
def get_data(vds, first_record=False, missing_values=None):
    """
    Actually read the data from the VDS handle. We shouldn't need to check for HDF being installed here because the
    VDS object which is being passed to us can only have come from pyhdf.

    :param vds:
    :param first_record:
    :param missing_values:
    :return:
    """
    import numpy as np
    from pyhdf.HDF import HDF, HDF4Error
    from cis.utils import create_masked_array_for_missing_values

    # get file and variable reference from tuple
    filename = vds.filename
    variable = vds.variable

    try:
        datafile = HDF(filename)
    except HDF4Error as e:
        raise IOError(e)

    vs = datafile.vstart()

    if first_record:
        # FIXME - This is the only bit that is actually different to the baseline
        vd = vs.attach('metadata')
        vd.setfields(variable)
        data = vd.read()
    else:
        # get data for that variable
        vd = vs.attach(variable)
        data = vd.read(nRec=vd.inquire()[0])

    # create numpy array from data
    data = np.array(data).flatten()

    # dealing with missing data
    if missing_values is None:
        v = _get_attribute_value(vd, 'missing')
        v = float(v) if v is not None else None
        missing_values = [v]

    data = create_masked_array_for_missing_values(data, missing_values)

    # detach and close
    vd.detach()
    vs.end()
    datafile.close()

    return data
Esempio n. 11
0
class Hdf4File(DataProductBase):
    """
    Base class for file products using HDF4File format. The :class:`Hdf4File`
    wraps around the pyhdf.SD class to implement RAII.
    """
    def __init__(self, filename):
        """
        Open an HDF4 file for reading.

        Arguments:

            filename(str): The path to the file to open.
        """
        super().__init__()
        from pyhdf.HDF import HDF, HC
        from pyhdf.SD import SD, SDC
        import pyhdf.VS
        self.filename = filename
        self.hdf = HDF(self.filename, HC.READ)
        self.vs = self.hdf.vstart()
        self.sd = SD(self.filename, SDC.READ)

    @property
    def vs_attributes(self):
        vs_attributes = [t[0] for t in self.vs.vdatainfo()]
        return vs_attributes

    @property
    def sd_attributes(self):
        sd_attributes = [t for t in self.sd.datasets()]
        return sd_attributes

    @property
    def attributes(self):
        return self.vs_attributes + self.sd_attributes

    def __getitem__(self, name):
        if name in self.vs_attributes:
            return self.vs.attach(name)
        elif name in self.sd_attributes:
            return self.sd.select(name)
        else:
            raise ValueError("{} is not a known attribute of this file.")

    def __del__(self):
        self.sd.end()
        self.vs.end()
        self.hdf.close()
Esempio n. 12
0
    def __init__(self, filename):
        """
        Open an HDF4 file for reading.

        Arguments:

            filename(str): The path to the file to open.
        """
        super().__init__()
        from pyhdf.HDF import HDF, HC
        from pyhdf.SD import SD, SDC
        import pyhdf.VS
        self.filename = filename
        self.hdf = HDF(self.filename, HC.READ)
        self.vs = self.hdf.vstart()
        self.sd = SD(self.filename, SDC.READ)
Esempio n. 13
0
def get_data(vds, first_record=False, missing_values=None):
    """
    Actually read the data from the VDS handle. We shouldn't need to check for HDF being installed here because the
    VDS object which is being passed to us can only have come from pyhdf.

    :param vds:
    :param first_record:
    :param missing_values:
    :return:
    """

    # get file and variable reference from tuple
    filename = vds.filename
    variable = vds.variable

    try:
        datafile = HDF(filename)
    except HDF4Error as e:
        raise IOError(e)

    vs = datafile.vstart()

    if first_record:
        vd = vs.attach(vs.next(-1))
        vd.setfields(variable)
        data = vd.read()
    else:
        # get data for that variable
        vd = vs.attach(variable)
        data = vd.read(nRec=vd.inquire()[0])

    # create numpy array from data
    data = np.array(data).flatten()

    # dealing with missing data
    if missing_values is None:
        missing_values = [_get_attribute_value(vd, 'missing')]

    data = create_masked_array_for_missing_values(data, missing_values)

    # detach and close
    vd.detach()
    vs.end()
    datafile.close()

    return data
Esempio n. 14
0
def get_data(vds, first_record=False, missing_values=None):
    """
    Actually read the data from the VDS handle. We shouldn't need to check for HDF being installed here because the
    VDS object which is being passed to us can only have come from pyhdf.

    :param vds:
    :param first_record:
    :param missing_values:
    :return:
    """

    # get file and variable reference from tuple
    filename = vds.filename
    variable = vds.variable

    try:
        datafile = HDF(filename)
    except HDF4Error as e:
        raise IOError(e)

    vs = datafile.vstart()

    if first_record:
        vd = vs.attach(vs.next(-1))
        vd.setfields(variable)
        data = vd.read()
    else:
        # get data for that variable
        vd = vs.attach(variable)
        data = vd.read(nRec=vd.inquire()[0])

    # create numpy array from data
    data = np.array(data).flatten()

    # dealing with missing data
    if missing_values is None:
        missing_values = [__get_attribute_value(vd, 'missing')]

    data = create_masked_array_for_missing_values(data, missing_values)

    # detach and close
    vd.detach()
    vs.end()
    datafile.close()

    return data
Esempio n. 15
0
    def open(self):
        """Open for reading."""

        if self.hdf is None:
            self.hdf = HDF(self.file)
            self.vs = self.hdf.vstart()
            # Ignore exceptions telling us there are no VData's
            try:
                pass
                #self.vdinfo = self.vs.vdatainfo()
            except HDF4Error:
                pass
            # Ignore exceptions telling us there are no SDS's
            try:
                self.sd = SD(self.file)
            except HDF4Error:
                pass
Esempio n. 16
0
    def open(self, view=None, datamodel=None, datamodel_geolocation_dims=None):
        """Open the HDF file

        Args:
            view (dict, optional): a dictionary where keys are dimension names
                and values are slices. A view can be set on a file, meaning
                that only the subset defined by this view will be accessible.
                This view is expressed as any subset (see :func:`get_values`).
                For example::

                view = {'time':slice(0,0), 'lat':slice(200,300),
                'lon':slice(200,300)}

            datamodel (str): type of feature read or written. Internal argument
                only used by the classes from :mod:`~cerbere.datamodel`
                package. Can be 'Grid', 'Swath', etc...

            datamodel_geolocation_dims (list, optional): list of the name of the
                geolocation dimensions defining the data model to be read in
                the file. Optional argument, only used by the datamodel
                classes, in case the mapper class can store different types of
                data models.

        Returns:
            an handler on the opened file
        """
        self.view = view
        if self.is_writable():
            raise NotImplementedError
        else:
            if not os.path.exists(self._url):
                raise Exception("File %s is not existing" % self._url)

        if (self._url is not None) and (self._mode is not None):
            logging.debug("MODE : %s", self._mode)
            self._handler = SD(self._url, MODES[self._mode])
            # case of vgroup containing some information
            if self._mode == 'r':
                # open HDF file
                self._hdffile = HDF(self._url, HC.READ)
                # initialize V interface on HDF file
                self._vdata = self._hdffile.vstart()
            return self._handler
        else:
            return None
Esempio n. 17
0
    def __init__(self, path):
        self.path = path
        self.file_handle = HDF(str(path))

        self.scientific_dataset = SD(str(path))
        datasets = self.scientific_dataset.datasets()
        dataset_dict = {
            key: Dataset(weakref.ref(self), key, *info)
            for key, info in datasets.items()
        }
        self.datasets = dataset_dict

        self.vdata_table = VS(self.file_handle)
        vdata_dict = {
            info[0]: VData(weakref.ref(self), *info)
            for info in self.vdata_table.vdatainfo()
        }
        self.vdata = vdata_dict
Esempio n. 18
0
    def __enter__(self):
        """
        Open HDF file and interfaces for use as context manager.
        :return self:
        """
        self.hdf = HDF(self.fname)
        self.vs = self.hdf.vstart()
        self.v = self.hdf.vgstart()

        return self
Esempio n. 19
0
def get_metadata(vds):
    from cis.data_io.ungridded_data import Metadata

    # get file and variable reference from tuple
    filename = vds.filename
    variable = vds.variable

    datafile = HDF(filename)
    vs = datafile.vstart()

    # get data for that variable
    vd = vs.attach(variable)

    name = variable
    misc = vd.attrinfo()

    long_name = _pop_attribute_value(misc, 'long_name', '')
    units = _pop_attribute_value(misc, 'units', '')
    factor = _pop_attribute_value(misc, 'factor')
    offset = _pop_attribute_value(misc, 'offset')
    missing = _pop_attribute_value(misc, 'missing')

    # VD data are always 1D, so the shape is simply the length of the data vector
    shape = [len(vd.read(nRec=vd.inquire()[0]))]

    # Tidy up the rest of the data in misc:
    misc = {k: v[2] for k, v in misc.items()}

    metadata = Metadata(name=name,
                        long_name=long_name,
                        shape=shape,
                        units=units,
                        factor=factor,
                        offset=offset,
                        missing_value=missing,
                        misc=misc)

    # detach and close
    vd.detach()
    vs.end()
    datafile.close()

    return metadata
Esempio n. 20
0
    def _get_cloudsat_vds_data(self, vds):
        from cis.data_io.hdf_vd import _get_attribute_value, HDF, HDF4Error
        from cis.utils import create_masked_array_for_missing_data
        import numpy as np

        # get file and variable reference from tuple
        filename = vds.filename
        variable = vds.variable

        try:
            datafile = HDF(filename)
        except HDF4Error as e:
            raise IOError(e)

        vs = datafile.vstart()
        vd = vs.attach(variable)
        data = vd.read(nRec=vd.inquire()[0])

        # create numpy array from data
        data = np.array(data).flatten()

        missing_value = _get_attribute_value(vd, 'missing', None)

        if missing_value is not None:
            data = create_masked_array_for_missing_data(data, missing_value)

        valid_range = _get_attribute_value(vd, "valid_range")
        if valid_range is not None:
            # Assume it's the right data type already
            data = np.ma.masked_outside(data, *valid_range)

        # TODO This probably won't work....
        factor = _get_attribute_value(vd, "factor", 1)
        offset = _get_attribute_value(vd, "offset", 0)
        data = self._apply_scaling_factor_CLOUDSAT(data, factor, offset)

        # detach and close
        vd.detach()
        vs.end()
        datafile.close()

        return data
Esempio n. 21
0
    def _get_cloudsat_vds_data(self, vds):
        from cis.data_io.hdf_vd import _get_attribute_value, HDF, HDF4Error
        from cis.utils import create_masked_array_for_missing_data
        import numpy as np

        # get file and variable reference from tuple
        filename = vds.filename
        variable = vds.variable

        try:
            datafile = HDF(filename)
        except HDF4Error as e:
            raise IOError(e)

        vs = datafile.vstart()
        vd = vs.attach(variable)
        data = vd.read(nRec=vd.inquire()[0])

        # create numpy array from data
        data = np.array(data).flatten()

        missing_value = _get_attribute_value(vd, 'missing', None)

        if missing_value is not None:
            data = create_masked_array_for_missing_data(data, missing_value)

        valid_range = _get_attribute_value(vd, "valid_range")
        if valid_range is not None:
            # Assume it's the right data type already
            data = np.ma.masked_outside(data, *valid_range)

        # TODO This probably won't work....
        factor = _get_attribute_value(vd, "factor", 1)
        offset = _get_attribute_value(vd, "offset", 0)
        data = self._apply_scaling_factor_CLOUDSAT(data, factor, offset)

        # detach and close
        vd.detach()
        vs.end()
        datafile.close()

        return data
Esempio n. 22
0
def read(filename, variables=None, datadict=None):
    """
    Given a filename and a list of file names return a dictionary of VD data handles

    :param filename: full path to a single HDF4 file
    :param variables: A list of variables to read, if no variables are given, no variables are read
    :param datadict: A dictionary of variable name, data handle pairs to be appended to
    :return: An updated datadict with any new variables appended.
    """

    if not HDF:
        raise ImportError(
            "HDF support was not installed, please reinstall with pyhdf to read HDF files."
        )

    if datadict is None:
        datadict = {}

    variables = listify(variables)

    vs = None
    datafile = None
    try:
        datafile = HDF(filename)
        vs = datafile.vstart()

        for variable in variables:
            try:
                vd = vs.attach(variable)
                vd.detach()
                datadict[variable] = VDS(filename, variable)
            except:
                # ignore variable that failed
                pass
    finally:
        if vs is not None:
            vs.end()
        if datafile is not None:
            datafile.close()

    return datadict
Esempio n. 23
0
def read(filename, variables=None, datadict=None):
    """
    Given a filename and a list of file names return a dictionary of VD data handles

    :param filename: full path to a single HDF4 file
    :param variables: A list of variables to read, if no variables are given, no variables are read
    :param datadict: A dictionary of variable name, data handle pairs to be appended to
    :return: An updated datadict with any new variables appended.
    """

    if not HDF:
        raise ImportError("HDF support was not installed, please reinstall with pyhdf to read HDF files.")

    if datadict is None:
        datadict = {}

    variables = listify(variables)

    vs = None
    datafile = None
    try:
        datafile = HDF(filename)
        vs = datafile.vstart()

        for variable in variables:
            try:
                vd = vs.attach(variable)
                vd.detach()
                datadict[variable] = VDS(filename, variable)
            except:
                # ignore variable that failed
                pass
    finally:
        if vs is not None:
            vs.end()
        if datafile is not None:
            datafile.close()

    return datadict
Esempio n. 24
0
def parse_ace_data(hdf4_fname, N=1000):
    """
    Load ACE data *hdf4_fname* and return a pandas :class:`DataFrame`
    with the information. Process *N* lines of the HDF file at a time.
    """
    key = key_from_fname(hdf4_fname)
    hdf = HDF(hdf4_fname)
    try:
        vs = hdf.vstart()
        vdata = vs.attach(key)
        fieldinfo = vdata.fieldinfo()
        loop_divmod = divmod(vdata.inquire()[0], N)
        fields = [x[0] for x in fieldinfo]
        data_map = defaultdict(list)
        for i in range(loop_divmod[0] + 1):
            try:
                data = vdata.read(N if i < loop_divmod[0] else loop_divmod[1])
            except HDF4Error:
                break
            for data_i in data:
                for data_ii, field in zip(data_i, fields):
                    data_map[field].append(data_ii)
    finally:
        vdata.detach()
        vs.vend()
        hdf.close()
    # convert to DataFrame
    remove_set = set(
        ['year', 'fp_year', 'day', 'fp_doy', 'hr', 'min', 'sec', 'ACEepoch'])
    dt = []
    for year, day, hr, minute, sec in zip(
            *[data_map[x] for x in ['year', 'day', 'hr', 'min', 'sec']]):
        dt.append(
            datetime(year, 1, 1) +
            timedelta(days=day - 1, hours=hr, minutes=minute, seconds=sec))
    data = {k: v for k, v in data_map.iteritems() if k not in remove_set}
    df = PD.DataFrame(index=dt, data=data)
    return df
Esempio n. 25
0
def dump_cloudsat(filename):
    """
    walk the hdf file and print out
    information about each vgroup and vdata
    object

    Parameters
    ----------

    filename: str or Path object
        name of hdf file

    Returns
    -------

    prints information to stdout
    """
    #
    
    filename=str(filename)
    hdf = HDF(filename)

    # Initialize the SD, V and VS interfaces on the file.
    sd = SD(filename)
    vs = hdf.vstart()
    v  = hdf.vgstart()

    # Scan all vgroups in the file.
    ref = -1
    while 1:
        try:
            ref = v.getid(ref)
            print('vgroup: ',ref)
        except HDF4Error as msg:    # no more vgroup
            break
        describevg(ref,v,vs,sd)
    return None
Esempio n. 26
0
def get_metadata(vds):
    from cis.data_io.ungridded_data import Metadata

    # get file and variable reference from tuple
    filename = vds.filename
    variable = vds.variable

    datafile = HDF(filename)
    vs = datafile.vstart()

    # get data for that variable
    vd = vs.attach(variable)

    name = variable
    long_name = __get_attribute_value(vd, 'long_name')
    # VD data are always 1D, so the shape is simply the length of the data vector
    shape = [len(vd.read(nRec=vd.inquire()[0]))]
    units = __get_attribute_value(vd, 'units')
    valid_range = __get_attribute_value(vd, 'valid_range')
    factor = __get_attribute_value(vd, 'factor')
    offset = __get_attribute_value(vd, 'offset')
    missing = __get_attribute_value(vd, 'missing')

    # put the whole dictionary of attributes into 'misc'
    # so that other metadata of interest can still be retrieved if need be
    misc = vd.attrinfo()

    metadata = Metadata(name=name, long_name=long_name, shape=shape, units=units, range=valid_range,
                        factor=factor, offset=offset, missing_value=missing, misc=misc)

    # detach and close
    vd.detach()
    vs.end()
    datafile.close()

    return metadata
Esempio n. 27
0
    def open(self,
             view=None,
             datamodel=None,
             datamodel_geolocation_dims=None):
        """Open the HDF file

        Args:
            view (dict, optional): a dictionary where keys are dimension names
                and values are slices. A view can be set on a file, meaning
                that only the subset defined by this view will be accessible.
                This view is expressed as any subset (see :func:`get_values`).
                For example::

                view = {'time':slice(0,0), 'lat':slice(200,300),
                'lon':slice(200,300)}

            datamodel (str): type of feature read or written. Internal argument
                only used by the classes from :mod:`~cerbere.datamodel`
                package. Can be 'Grid', 'Swath', etc...

            datamodel_geolocation_dims (list, optional): list of the name of the
                geolocation dimensions defining the data model to be read in
                the file. Optional argument, only used by the datamodel
                classes, in case the mapper class can store different types of
                data models.

        Returns:
            an handler on the opened file
        """
        self.view=view
        if self.is_writable():
            raise NotImplementedError
        else:
            if not os.path.exists(self._url):
                raise Exception("File %s is not existing" % self._url)

        if (self._url is not None) and (self._mode is not None):
            logging.debug("MODE : %s", self._mode)
            self._handler = SD(self._url, MODES[self._mode])
            # case of vgroup containing some information
            if self._mode == 'r':
                # open HDF file
                self._hdffile = HDF(self._url, HC.READ)
                # initialize V interface on HDF file
                self._vdata = self._hdffile.vstart()
            return self._handler
        else:
            return None
Esempio n. 28
0
File: hdf.py Progetto: CMDA-CMU/CMDA
 def open(self):
     """Open for reading."""
     
     if self.hdf is None:
         self.hdf = HDF(self.file)
         self.vs = self.hdf.vstart()
         # Ignore exceptions telling us there are no VData's
         try:
             self.vdinfo = self.vs.vdatainfo()
         except HDF4Error:
             pass
         # Ignore exceptions telling us there are no SDS's
         try:
             self.sd = SD(self.file)
         except HDF4Error:
             pass
Esempio n. 29
0
def read_amsr_hdf4(filename):
    from pyhdf.SD import SD, SDC
    from pyhdf.HDF import HDF  # HC
    import pyhdf.VS

    retv = AmsrObject()
    h4file = SD(filename, SDC.READ)
    # datasets = h4file.datasets()
    # attributes = h4file.attributes()
    # for idx, attr in enumerate(attributes.keys()):
    #    print idx, attr
    for sds in ["Longitude", "Latitude", "High_res_cloud"]:
        data = h4file.select(sds).get()
        if sds in ["Longitude", "Latitude"]:
            retv.all_arrays[sds.lower()] = data.ravel()
        elif sds in ["High_res_cloud"]:
            lwp_gain = h4file.select(sds).attributes()['Scale']
            retv.all_arrays["lwp_mm"] = data.ravel() * lwp_gain

        # print h4file.select(sds).info()
    h4file = HDF(filename, SDC.READ)
    vs = h4file.vstart()
    data_info_list = vs.vdatainfo()
    # print "1D data compound/Vdata"
    for item in data_info_list:
        # 1D data compound/Vdata
        name = item[0]
        # print name
        if name in ["Time"]:
            data_handle = vs.attach(name)
            data = np.array(data_handle[:])
            retv.all_arrays["sec1993"] = data
            data_handle.detach()
        else:
            pass
            # print name
        # data = np.array(data_handle[:])
        # attrinfo_dic = data_handle.attrinfo()
        # factor = data_handle.findattr('factor')
        # offset = data_handle.findattr('offset')
        # print data_handle.factor
        # data_handle.detach()
    # print data_handle.attrinfo()
    h4file.close()
    # for key in retv.all_arrays.keys():
    #    print key, retv.all_arrays[key]
    return retv
Esempio n. 30
0
    def get_metadata_badccsv_level3(self):
        self.handler_id = "Hdf handler level 3."
        spatial = None

        file_info = self.get_metadata_generic_level1()

        #First method for extracting information.
        self.hdf = HDF(self.file_path)
        self.vs = self.hdf.vstart()
        self.v = self.hdf.vgstart()

        geospatial = self.get_geospatial()
        temporal = self.get_temporal()


        if geospatial is not None:
            lat_u = self.normalize_coord(float(max(geospatial["Latitude"])))
            lat_l = self.normalize_coord(float(min(geospatial["Latitude"])))

            lon_u = self.normalize_coord(float(max(geospatial["Longitude"])))
            lon_l = self.normalize_coord(float(min(geospatial["Longitude"])))

            spatial =  {"coordinates": {"type": "envelope", "coordinates": [[round(lon_l, 3), round(lat_l, 3)], [round(lon_u, 3), round(lat_u, 3)]] } }
        else:
            #Second method.
            geospatial = self.get_geolocation()

            if geospatial is not None:
                lat_u = self.normalize_coord(float(max(geospatial[0])))
                lat_l = self.normalize_coord(float(min(geospatial[0])))

                lon_u = self.normalize_coord(float(max(geospatial[1])))
                lon_l = self.normalize_coord(float(min(geospatial[1])))

                spatial =  {"coordinates": {"type": "envelope", "coordinates": [[round(lon_l, 3), round(lat_l, 3)], [round(lon_u, 3), round(lat_u, 3)]] } }


        if temporal is not None:
            file_info[0]["info"]["temporal"] = {"start_time": temporal["start_time"], "end_time": temporal["end_time"] }


        return file_info + (None, spatial, )
Esempio n. 31
0
class HDFFile(AbstractMapper):
    '''
    Generic storage class for HDF files
    '''

    def __init__(self, url=None, mode=READ_ONLY, **kwargs):
        """
        """
        AbstractMapper.__init__(self, url=url, mode=mode, **kwargs)
        return

    def open(self,
             view=None,
             datamodel=None,
             datamodel_geolocation_dims=None):
        """Open the HDF file

        Args:
            view (dict, optional): a dictionary where keys are dimension names
                and values are slices. A view can be set on a file, meaning
                that only the subset defined by this view will be accessible.
                This view is expressed as any subset (see :func:`get_values`).
                For example::

                view = {'time':slice(0,0), 'lat':slice(200,300),
                'lon':slice(200,300)}

            datamodel (str): type of feature read or written. Internal argument
                only used by the classes from :mod:`~cerbere.datamodel`
                package. Can be 'Grid', 'Swath', etc...

            datamodel_geolocation_dims (list, optional): list of the name of the
                geolocation dimensions defining the data model to be read in
                the file. Optional argument, only used by the datamodel
                classes, in case the mapper class can store different types of
                data models.

        Returns:
            an handler on the opened file
        """
        self.view=view
        if self.is_writable():
            raise NotImplementedError
        else:
            if not os.path.exists(self._url):
                raise Exception("File %s is not existing" % self._url)

        if (self._url is not None) and (self._mode is not None):
            logging.debug("MODE : %s", self._mode)
            self._handler = SD(self._url, MODES[self._mode])
            # case of vgroup containing some information
            if self._mode == 'r':
                # open HDF file
                self._hdffile = HDF(self._url, HC.READ)
                # initialize V interface on HDF file
                self._vdata = self._hdffile.vstart()
            return self._handler
        else:
            return None

    def close(self):
        self._vdata.end()                    # terminate V interface
        self._hdffile.close()
        self._handler = None
        self._vdata = None
        self._hdffile = None
        return

    def get_fieldnames(self):
        '''
        Returns the list of geophysical fields stored for the feature
        '''
        fields = self.get_handler().datasets().keys()
        # remove here time/space information to keep only geophysical fields
        for field in ['time', 'lat', 'lon']:
            if field in fields:
                fields.remove(self.get_geolocation_field(field))
        return fields

    def read_field_attributes(self, fieldname):
        """
        return the specific storage attributes of a variable
        (_FillValue, scale_factor, add_offset)
        """
        native_fieldname = self.get_geolocation_field(fieldname)
        if native_fieldname is None:
            native_fieldname = fieldname
        attrs = self.get_handler().select(native_fieldname).attributes()
        return attrs

    def get_dimsize(self, dimname):
        hdfdim = self.get_matching_dimname(dimname)
        if hdfdim is None:
            hdfdim = dimname
        for fieldname in self.get_handler().datasets():
            dims = self.get_handler().select(fieldname).dimensions()
            for dim in dims:
                if dim == hdfdim:
                    return dims[dim]
        return None

    def get_dimensions(self, fieldname=None):
        """
        Return the standard dimension names of a file or a field in the file

        :keyword fieldname: the field from which to get the dimension names.
            For a geolocation field, use the cerbere standard name
            (time, lat, lon), though native field name will work too.
        :type fieldname: str

        :return: the standard dimensions of the field or file.
        :rtype: tuple of strings
        """
        if fieldname is None:
            raise NotImplementedError
        else:
            native_fieldname = self.get_geolocation_field(fieldname)
            if native_fieldname is None:
                native_fieldname = fieldname
            var = self.get_handler().select(native_fieldname)
            if var is None:
                raise Exception("Variable %s not existing in file"\
                                    % native_fieldname)
            dims = OrderedDict(
                    sorted(var.dimensions(full=True).items(),
                           key=lambda t: t[1][1])
                    )
            dims = [self.get_standard_dimname(dim) for dim in dims]
            return tuple(dims)

    def read_field(self, fieldname):
        namingauth = None
        native_fieldname = self.get_geolocation_field(fieldname)
        if native_fieldname is None:
            native_fieldname = fieldname
        varattrs = copy.copy(self.read_field_attributes(fieldname))
        if 'long_name' in varattrs:
            descr = varattrs['long_name']
        else:
            descr = None
        variable = Variable(
                        shortname=fieldname,
                        description=descr,
                        authority=namingauth,
                        standardname=None
                        )
        dims = self.get_full_dimensions(fieldname)
        TYPE_CONVERT = {'4': numpy.dtype(numpy.int8),
                        '5': numpy.dtype(numpy.float32),
                        '20': numpy.dtype(numpy.int8),
                        '21': numpy.dtype(numpy.uint8),
                        '22': numpy.dtype(numpy.int16),
                        '23': numpy.dtype(numpy.uint16),
                        '24': numpy.dtype(numpy.int32)
                        }
        typestr = self.get_handler().select(native_fieldname).info()[3]
        rec = Field(
                variable,
                dims,
                datatype=TYPE_CONVERT[str(typestr)]
                )
        rec.attach_storage(self.get_field_handler(fieldname))
        # MetaData
        rec.units = None
        if 'units' in varattrs:
            rec.units = varattrs['units']
        rec.valid_min = None
        rec.valid_max = None
        rec.attributes = {}
        if ('valid_min' in varattrs and 'valid_max' in varattrs)\
                 or 'valid_range' in varattrs:
            if 'valid_range' in varattrs:
                rec.valid_min, rec.valid_max = varattrs['valid_range']
            else:
                rec.valid_min = varattrs['valid_min']
                rec.valid_max = varattrs['valid_max']
            if 'scale_factor' in varattrs:
                rec.valid_min = rec.valid_min * varattrs['scale_factor']
                rec.valid_max = rec.valid_max * varattrs['scale_factor']
            if 'add_offset' in varattrs:
                rec.valid_min = rec.valid_min + varattrs['add_offset']
                rec.valid_max = rec.valid_max + varattrs['add_offset']
        for att in varattrs:
            if not att in ['units', 'scale_factor', 'add_offset',
                           '_FillValue', 'valid_min', 'valid_max',
                           'scale_factor_err', 'add_offset_err',
                           'valid_range', 'calibrated_nt', 'SDS_type',
                           'long_name', 'bad_value_scaled',
                           'bad_value_unscaled']:
                rec.attributes[att] = varattrs[att]
        return rec

    def read_values(self, fieldname, slices=None):
        native_fieldname = self.get_geolocation_field(fieldname)
        if native_fieldname is None:
            native_fieldname = fieldname
        var = self.get_handler().select(native_fieldname)
        if slices is None:
            values = var.get()
        else:
            dims = self.get_full_dimensions(fieldname).keys()
            newslices = []
            # fill in slices with None values
            for ind, slc in enumerate(slices):
                i0, i1, step = slc.start, slc.stop, slc.step
                if i0 is None:
                    i0 = 0
                if i1 is None:
                    i1 = self.get_dimsize(dims[ind])
                if step is None:
                    step = 1
                newslices.append(slice(i0, i1, step))
            # Added conversion to int as get does not support long values.
            slstart = [int(s.start) for s in newslices]
            slstop = [int(s.stop - s.start) for s in newslices]
            slstride = [int(s.step) for s in newslices]
            values = var.get(start=tuple(slstart),
                             count=tuple(slstop),
                             stride=tuple(slstride))
        attrs = self.read_field_attributes(fieldname)
        if '_FillValue' in attrs:
            fill_value = attrs['_FillValue']
        else:
            fill_value = None
        if not fill_value is None:
            values = numpy.ma.array(values, fill_value=fill_value)
        else:
            values = numpy.ma.array(values)
        if 'scale_factor' in attrs:
            values = values * attrs['scale_factor']
        if 'add_offset' in attrs:
            values = values + attrs['add_offset']
        return values

    def read_global_attributes(self):
        return self.get_handler().attributes()

    def read_global_attribute(self, attr):
        """
        """
        return self.read_global_attributes()[attr]

    def write_field(self, fieldname):
        """
        """
        raise NotImplementedError

    def read_fillvalue(self, fieldname):
        """
        """
        raise NotImplementedError

    def create_field(self, field, dim_translation=None):
        """
        """
        raise NotImplementedError

    def create_dim(self, dimname, size=None):
        """
        """
        raise NotImplementedError

    def write_global_attributes(self, attrs):
        """
        write the storage (file) global attributes
        """
        raise NotImplementedError

    def get_start_time(self):
        """Returns the minimum date of the file temporal coverage"""
        raise NotImplementedError

    def get_end_time(self):
        """
        """
        raise NotImplementedError

    def get_bbox(self):
        '''
        returns the bounding box of the feature, as a tuple
         (lonmin, latmin, lonmax, latmax)
        '''
        return None
    def get_spatial_resolution_in_deg(self):
        """Returns the average spatial resolution in degrees"""
        return None
Esempio n. 32
0
class HDFFile(AbstractMapper):
    '''
    Generic storage class for HDF files
    '''
    def __init__(self, url=None, mode=READ_ONLY, **kwargs):
        """
        """
        AbstractMapper.__init__(self, url=url, mode=mode, **kwargs)
        return

    def open(self, view=None, datamodel=None, datamodel_geolocation_dims=None):
        """Open the HDF file

        Args:
            view (dict, optional): a dictionary where keys are dimension names
                and values are slices. A view can be set on a file, meaning
                that only the subset defined by this view will be accessible.
                This view is expressed as any subset (see :func:`get_values`).
                For example::

                view = {'time':slice(0,0), 'lat':slice(200,300),
                'lon':slice(200,300)}

            datamodel (str): type of feature read or written. Internal argument
                only used by the classes from :mod:`~cerbere.datamodel`
                package. Can be 'Grid', 'Swath', etc...

            datamodel_geolocation_dims (list, optional): list of the name of the
                geolocation dimensions defining the data model to be read in
                the file. Optional argument, only used by the datamodel
                classes, in case the mapper class can store different types of
                data models.

        Returns:
            an handler on the opened file
        """
        self.view = view
        if self.is_writable():
            raise NotImplementedError
        else:
            if not os.path.exists(self._url):
                raise Exception("File %s is not existing" % self._url)

        if (self._url is not None) and (self._mode is not None):
            logging.debug("MODE : %s", self._mode)
            self._handler = SD(self._url, MODES[self._mode])
            # case of vgroup containing some information
            if self._mode == 'r':
                # open HDF file
                self._hdffile = HDF(self._url, HC.READ)
                # initialize V interface on HDF file
                self._vdata = self._hdffile.vstart()
            return self._handler
        else:
            return None

    def close(self):
        self._vdata.end()  # terminate V interface
        self._hdffile.close()
        self._handler = None
        self._vdata = None
        self._hdffile = None
        return

    def get_fieldnames(self):
        '''
        Returns the list of geophysical fields stored for the feature
        '''
        fields = self.get_handler().datasets().keys()
        # remove here time/space information to keep only geophysical fields
        for field in ['time', 'lat', 'lon']:
            if field in fields:
                fields.remove(self.get_geolocation_field(field))
        return fields

    def read_field_attributes(self, fieldname):
        """
        return the specific storage attributes of a variable
        (_FillValue, scale_factor, add_offset)
        """
        native_fieldname = self.get_geolocation_field(fieldname)
        if native_fieldname is None:
            native_fieldname = fieldname
        attrs = self.get_handler().select(native_fieldname).attributes()
        return attrs

    def get_dimsize(self, dimname):
        hdfdim = self.get_matching_dimname(dimname)
        if hdfdim is None:
            hdfdim = dimname
        for fieldname in self.get_handler().datasets():
            dims = self.get_handler().select(fieldname).dimensions()
            for dim in dims:
                if dim == hdfdim:
                    return dims[dim]
        return None

    def get_dimensions(self, fieldname=None):
        """
        Return the standard dimension names of a file or a field in the file

        :keyword fieldname: the field from which to get the dimension names.
            For a geolocation field, use the cerbere standard name
            (time, lat, lon), though native field name will work too.
        :type fieldname: str

        :return: the standard dimensions of the field or file.
        :rtype: tuple of strings
        """
        if fieldname is None:
            raise NotImplementedError
        else:
            native_fieldname = self.get_geolocation_field(fieldname)
            if native_fieldname is None:
                native_fieldname = fieldname
            var = self.get_handler().select(native_fieldname)
            if var is None:
                raise Exception("Variable %s not existing in file"\
                                    % native_fieldname)
            dims = OrderedDict(
                sorted(var.dimensions(full=True).items(),
                       key=lambda t: t[1][1]))
            dims = [self.get_standard_dimname(dim) for dim in dims]
            return tuple(dims)

    def read_field(self, fieldname):
        namingauth = None
        native_fieldname = self.get_geolocation_field(fieldname)
        if native_fieldname is None:
            native_fieldname = fieldname
        varattrs = copy.copy(self.read_field_attributes(fieldname))
        if 'long_name' in varattrs:
            descr = varattrs['long_name']
        else:
            descr = None
        variable = Variable(shortname=fieldname,
                            description=descr,
                            authority=namingauth,
                            standardname=None)
        dims = self.get_full_dimensions(fieldname)
        TYPE_CONVERT = {
            '4': numpy.dtype(numpy.int8),
            '5': numpy.dtype(numpy.float32),
            '20': numpy.dtype(numpy.int8),
            '21': numpy.dtype(numpy.uint8),
            '22': numpy.dtype(numpy.int16),
            '23': numpy.dtype(numpy.uint16),
            '24': numpy.dtype(numpy.int32)
        }
        typestr = self.get_handler().select(native_fieldname).info()[3]
        rec = Field(variable, dims, datatype=TYPE_CONVERT[str(typestr)])
        rec.attach_storage(self.get_field_handler(fieldname))
        # MetaData
        rec.units = None
        if 'units' in varattrs:
            rec.units = varattrs['units']
        rec.valid_min = None
        rec.valid_max = None
        rec.attributes = {}
        if ('valid_min' in varattrs and 'valid_max' in varattrs)\
                 or 'valid_range' in varattrs:
            if 'valid_range' in varattrs:
                rec.valid_min, rec.valid_max = varattrs['valid_range']
            else:
                rec.valid_min = varattrs['valid_min']
                rec.valid_max = varattrs['valid_max']
            if 'scale_factor' in varattrs:
                rec.valid_min = rec.valid_min * varattrs['scale_factor']
                rec.valid_max = rec.valid_max * varattrs['scale_factor']
            if 'add_offset' in varattrs:
                rec.valid_min = rec.valid_min + varattrs['add_offset']
                rec.valid_max = rec.valid_max + varattrs['add_offset']
        for att in varattrs:
            if not att in [
                    'units', 'scale_factor', 'add_offset', '_FillValue',
                    'valid_min', 'valid_max', 'scale_factor_err',
                    'add_offset_err', 'valid_range', 'calibrated_nt',
                    'SDS_type', 'long_name', 'bad_value_scaled',
                    'bad_value_unscaled'
            ]:
                rec.attributes[att] = varattrs[att]
        return rec

    def read_values(self, fieldname, slices=None):
        native_fieldname = self.get_geolocation_field(fieldname)
        if native_fieldname is None:
            native_fieldname = fieldname
        var = self.get_handler().select(native_fieldname)
        if slices is None:
            values = var.get()
        else:
            dims = self.get_full_dimensions(fieldname).keys()
            newslices = []
            # fill in slices with None values
            for ind, slc in enumerate(slices):
                i0, i1, step = slc.start, slc.stop, slc.step
                if i0 is None:
                    i0 = 0
                if i1 is None:
                    i1 = self.get_dimsize(dims[ind])
                if step is None:
                    step = 1
                newslices.append(slice(i0, i1, step))
            # Added conversion to int as get does not support long values.
            slstart = [int(s.start) for s in newslices]
            slstop = [int(s.stop - s.start) for s in newslices]
            slstride = [int(s.step) for s in newslices]
            values = var.get(start=tuple(slstart),
                             count=tuple(slstop),
                             stride=tuple(slstride))
        attrs = self.read_field_attributes(fieldname)
        if '_FillValue' in attrs:
            fill_value = attrs['_FillValue']
        else:
            fill_value = None
        if not fill_value is None:
            values = numpy.ma.array(values, fill_value=fill_value)
        else:
            values = numpy.ma.array(values)
        if 'scale_factor' in attrs:
            values = values * attrs['scale_factor']
        if 'add_offset' in attrs:
            values = values + attrs['add_offset']
        return values

    def read_global_attributes(self):
        return self.get_handler().attributes()

    def read_global_attribute(self, attr):
        """
        """
        return self.read_global_attributes()[attr]

    def write_field(self, fieldname):
        """
        """
        raise NotImplementedError

    def read_fillvalue(self, fieldname):
        """
        """
        raise NotImplementedError

    def create_field(self, field, dim_translation=None):
        """
        """
        raise NotImplementedError

    def create_dim(self, dimname, size=None):
        """
        """
        raise NotImplementedError

    def write_global_attributes(self, attrs):
        """
        write the storage (file) global attributes
        """
        raise NotImplementedError

    def get_start_time(self):
        """Returns the minimum date of the file temporal coverage"""
        raise NotImplementedError

    def get_end_time(self):
        """
        """
        raise NotImplementedError

    def get_bbox(self):
        '''
        returns the bounding box of the feature, as a tuple
         (lonmin, latmin, lonmax, latmax)
        '''
        return None

    def get_spatial_resolution_in_deg(self):
        """Returns the average spatial resolution in degrees"""
        return None
Esempio n. 33
0
File: hdf.py Progetto: CMDA-CMU/CMDA
class HdfFile(object):
    """Class implementing HDF file access."""
    
    GEOLOC_FIELDS = ()
    
    def __init__(self, file):
        """Constructor."""
        self.file = file
        self.hdf = None
        self.vs = None
        self.vdinfo = None
        self.sd = None
        self.savedVarsDict = None
        self.open()
        
        # Permit data files without VData's
        if type(self.vdinfo) != type(None):
            self.vdList = [i[0] for i in self.vdinfo]
            #print self.vdList
        
        # Permit data files without SDS's
        if type(self.sd) != type(None):
            self.datasetList = self.sd.datasets().keys()
            #print self.datasetList

        self.levels    = {}
        self.geoDict   = self._getGeoDict()
        self.dataDict  = self._getDataDict()
        self.close()
        
    # Always define in subclass
    def _getGeoDict(self): raise NotImplementedError("Not implemented.")
    # Always define in subclass, except for cloudsat
    def _getDataDict(self): return None

    def open(self):
        """Open for reading."""
        
        if self.hdf is None:
            self.hdf = HDF(self.file)
            self.vs = self.hdf.vstart()
            # Ignore exceptions telling us there are no VData's
            try:
                self.vdinfo = self.vs.vdatainfo()
            except HDF4Error:
                pass
            # Ignore exceptions telling us there are no SDS's
            try:
                self.sd = SD(self.file)
            except HDF4Error:
                pass
    
    def close(self):
        """Close hdf file."""
        
        if hasattr(self, 'hdf') and self.hdf is not None:
            self.vs.end()
            self.hdf.close()
            self.sd.end()
            self.hdf = None
            self.vs = None
            self.vdinfo = None
            self.sd = None
    
    def getGeo(self): return self.geoDict
    
    def get(self, var):
        """Return variable array dict."""
        
        self.open()
        
        #get list of vars
        if isinstance(var, types.StringTypes): var = [var]
        elif isinstance(var, (types.ListType, types.TupleType)): pass
        elif var is None:
            # If we don't have any SDS's, go with the vdata's only
            # if we don't have any vdata's go with SDS's only
            try:
                var = self.vdList;
                try:
                    var.extend(self.datasetList)
                except AttributeError:
                    pass
            except AttributeError:
                var = self.datasetList                
        else: raise RuntimeError("Incorrect argument type for %s." % var)
        
        #create dict of (attrs, array) for each var
        a = {}
        for v in var:
            if v=='': continue # added by bytang
            #handle SD types
            ds = None
            if v in self.datasetList:
                try:
                    ds = self.sd.select(v)
                    a[v] = (ds.attributes(), N.array(ds.get()))
                    continue
                except HDF4Error, e: pass
                finally:
                    if ds is not None: ds.endaccess()
Esempio n. 34
0
class HDF4(_geospatial):
    """
    HDF4 context manager class.
    """
    hdf = None
    vs = None
    v = None

    def __init__(self, fname):
        """
        :param str fname: The path of the HDF4 file.
        """
        self.fname = str(fname)

    def __enter__(self):
        """
        Open HDF file and interfaces for use as context manager.

        :returns: Self.
        """
        self.hdf = HDF(self.fname)
        self.vs = self.hdf.vstart()
        self.v = self.hdf.vgstart()

        return self

    def __exit__(self, *args):
        """
        Close interfaces and HDF file after finishing use in context manager.
        """
        self.v.end()
        self.vs.end()
        self.hdf.close()

    def _get_coords(self, vs, fn):
        """
        Iterate through vgroup and return a list of coordinates (if existing).

        :param HDF4.V.vs vs: VData object
        :param str fn: Path to the data file
        :returns: Dict containing geospatial information.
        """
        mappings = {
            "NVlat2": "lat",
            "NVlng2": "lon",
        }

        coords = {}
        for k, v in mappings.iteritems():
            ref = vs.find(k)
            vd = vs.attach(ref)

            coords[v] = []
            while True:
                try:
                    coord = float(vd.read()[0][0])
                    coord /= 10**7
                    coords[v].append(coord)
                except HDF4Error:  # End of file
                    break

            vd.detach()
        return coords

    def _get_temporal(self, vs, fn):
        """
        Return start and end timestamps (if existing)

        :param HDF4.V.vs vs: VData object
        :param str fn: Path to the data file
        :returns: Dict containing temporal information.
        """
        mappings = {
            "MIdate": "date",
            "MIstime": "start_time",
            "MIetime": "end_time",
        }

        timestamps = {}
        for k, v in mappings.iteritems():
            ref = vs.find(k)
            vd = vs.attach(ref)

            timestamps[v] = []
            while True:
                try:
                    timestamps[v].append(vd.read()[0][0])
                except HDF4Error:  # EOF
                    break

            vd.detach()

        # This list comprehension basically converts from a list of integers
        # into a list of chars and joins them together to make strings
        # ...
        # If unclear - HDF text data comes out as a list of integers, e.g.:
        # 72 101 108 108 111 32 119 111 114 108 100 (this means "Hello world")
        # Those "char" numbers get converted to strings with this snippet.
        dates = [chr(x) for x in timestamps["date"] if x != 0]
        timestamps["date"] = ''.join(dates)

        return self._parse_timestamps(timestamps)

    def _parse_timestamps(self, tm_dict):
        """
        Parse start and end timestamps from an HDF4 file.

        :param dict tm_dict: The timestamp to be parsed
        :returns: Dict containing start and end timestamps
        """
        st_base = ("%s %s" % (tm_dict["date"], tm_dict["start_time"][0]))
        et_base = ("%s %s" % (tm_dict["date"], tm_dict["end_time"][0]))

        for t_format in ["%d/%m/%y %H%M%S", "%d/%m/%Y %H%M%S"]:
            try:
                start_time = datetime.datetime.strptime(st_base, t_format)
                end_time = datetime.datetime.strptime(et_base, t_format)
            except ValueError:
                # ValueError will be raised if strptime format doesn't match
                # the actual timestamp - so just try the next strptime format
                continue

        return {
            "start_time": start_time.isoformat(),
            "end_time": end_time.isoformat()
        }

    def get_geospatial(self):
        """
        Search through HDF4 file, returning a list of coordinates from the
        'Navigation' vgroup (if it exists).

        :returns: Dict containing geospatial information.
        """
        ref = -1
        while True:
            try:
                ref = self.v.getid(ref)
                vg = self.v.attach(ref)

                if vg._name == "Navigation":
                    geospatial = self._get_coords(self.vs, self.fname)
                    geospatial["type"] = "track"  # Type annotation
                    vg.detach()
                    return geospatial

                vg.detach()
            except HDF4Error:  # End of file
                # This is a weird way of handling files, but this is what the
                # pyhdf library demonstrates...
                break

        return None

    def get_temporal(self):
        """
        Search through HDF4 file, returning timestamps from the 'Mission'
        vgroup (if it exists)

        :returns: List containing temporal metadata
        """
        ref = -1
        while True:
            try:
                ref = self.v.getid(ref)
                vg = self.v.attach(ref)

                if vg._name == "Mission":
                    temporal = self._get_temporal(self.vs, self.fname)
                    vg.detach()
                    return temporal

                vg.detach()
            except HDF4Error:  # End of file
                # This 'except at end of file' thing is some pyhdf weirdness
                # Check the pyhdf documentation for clarification
                break

        return None

    def get_properties(self):
        """
        Returns ceda_di.metadata.properties.Properties object
        containing geospatial and temporal metadata from file.

        :returns: Metadata.product.Properties object
        """
        geospatial = self.get_geospatial()
        temporal = self.get_temporal()
        filesystem = super(HDF4, self).get_filesystem(self.fname)
        data_format = {
            "format": "HDF4",
        }

        instrument = arsf.Hyperspectral.get_instrument(filesystem["filename"])
        flight_info = arsf.Hyperspectral.get_flight_info(
            filesystem["filename"])
        props = product.Properties(spatial=geospatial,
                                   temporal=temporal,
                                   filesystem=filesystem,
                                   data_format=data_format,
                                   instrument=instrument,
                                   flight_info=flight_info)

        return props
Esempio n. 35
0
def HDFread(filename, variable, vgroup=None):
    """
    Extract the data for non-scientific data in V mode of hdf file
    """
    if vgroup is None:
        vgroup = 'Geolocation Fields'
        
    filename=str(filename)
    hdf = HDF(filename, HC.READ)

    # Initialize the SD, V and VS interfaces on the file.
    sd = SD(filename)
    vs = hdf.vstart()
    v  = hdf.vgstart()
    vg_dict={}
    ref = -1
    while 1:
        try:
            ref = v.getid(ref)
            #print('vgroup ref number: ',ref)
        except HDF4Error as msg:    # no more vgroup
            break
        vg = v.attach(ref)
        # print("----------------")
        # print("vg name is:", vg._name, "class:",vg._class, "tag,ref:", end=' ')
        # print(vg._tag, vg._refnum)
        vg_dict[vg._name]=(vg._tag, vg._refnum)
        vg.detach()
        
    tag, ref = vg_dict[vgroup]

    # Open all data of the class
    vg = v.attach(ref)
    # print("----------------")
    # print("vg name is:", vg._name, "class:",vg._class, "tag,ref:", end=' ')
    # print(vg._tag, vg._refnum)

    # All fields in the class
    members = vg.tagrefs()

    nrecs = []
    names = []
    for tag, ref in members:
        # Vdata tag
        if tag == HC.DFTAG_VH:
            vd = vs.attach(ref)
            nrec, intmode, fields, size, name = vd.inquire()
            nrecs.append(nrec)
            names.append(name)
            vd.detach()
    try:
        idx = names.index(variable)
    except ValueError:
        error=f'{variable} is not in {names} for vgroup {vgroup}'
        raise ValueError(error)
        
    var = vs.attach(members[idx][1])
    V   = var.read(nrecs[idx])
    var.detach()
    # Terminate V, VS and SD interfaces.
    v.end()
    vs.end()
    sd.end()
    # Close HDF file.
    hdf.close()
    return np.asarray(V)
Esempio n. 36
0
def open_dataset_hdf(filename, variables=None, drop_variables=[]):
    da_dict = {}

    # First read SD (scientific datasets)
    sd = SD(filename)
    if variables is None:
        data_vars = sd.datasets().keys()
    else:
        data_vars = variables
    for dname in data_vars:

        if dname in drop_variables: continue
        if dname not in sd.datasets().keys(): continue

        sds = sd.select(dname)

        # get (masked) data
        d = np.where(sds[:] != sds.getfillvalue(), sds[:], np.nan)

        # check for more masks
        if 'missing' in sds.attributes():
            d[d == sds.missing] = np.nan

        # unpack data
        if 'offset' in sds.attributes() and 'factor' in sds.attributes():
            d = d / sds.factor + sds.offset

        # coordinate variables...how to do this?! Look for VDATA?
        # just save as DataArray for now, without coordinate variables...
        dims = [sds.dim(i).info()[0] for i in range(len(sds.dimensions()))]

        da_dict[dname] = xr.DataArray(d,
                                      dims=dims,
                                      attrs=sds.attributes(),
                                      name=dname)

        # Close this dataset
        sds.endaccess()

    # Close file
    sd.end()

    # ...now read VDATA...
    hdf = HDF(filename)
    vs = hdf.vstart()
    if variables is None:
        data_vars, *__ = zip(*vs.vdatainfo())
    else:
        data_vars = variables
    for vname in data_vars:

        if vname in drop_variables: continue
        if vname not in [v[0] for v in vs.vdatainfo()]: continue

        # attach vdata
        vd = vs.attach(vname)

        # get vdata info
        nrec, mode, fields, *__ = vd.inquire()
        if nrec == 0:
            vd.detach()
            continue

        # read data
        d = np.array(vd[:]).squeeze()

        # make sure not to overwrite coordinate variables
        if all([vname not in da.dims for v, da in da_dict.items()]):
            da_dict[vname] = xr.DataArray(d)

        vd.detach()

    # clean up
    vs.end()

    # HDF files do not always close cleanly, so close manually
    hdf.close()

    return xr.Dataset(da_dict)
Esempio n. 37
0
class HdfFile(object):
    """Class implementing HDF file access."""

    GEOLOC_FIELDS = ()

    def __init__(self, file):
        """Constructor."""
        self.file = file
        self.hdf = None
        self.vs = None
        self.vdinfo = None
        self.sd = None
        self.savedVarsDict = None
        self.open()

        # Permit data files without VData's
        if type(self.vdinfo) != type(None):
            self.vdList = [i[0] for i in self.vdinfo]
            #print self.vdList

        # Permit data files without SDS's
        if type(self.sd) != type(None):
            self.datasetList = self.sd.datasets().keys()
            #print self.datasetList

        self.levels = {}
        self.geoDict = self._getGeoDict()
        self.dataDict = self._getDataDict()
        self.close()

    # Always define in subclass
    def _getGeoDict(self):
        raise NotImplementedError("Not implemented.")

    # Always define in subclass, except for cloudsat
    def _getDataDict(self):
        return None

    def open(self):
        """Open for reading."""

        if self.hdf is None:
            self.hdf = HDF(self.file)
            self.vs = self.hdf.vstart()
            # Ignore exceptions telling us there are no VData's
            try:
                pass
                #self.vdinfo = self.vs.vdatainfo()
            except HDF4Error:
                pass
            # Ignore exceptions telling us there are no SDS's
            try:
                self.sd = SD(self.file)
            except HDF4Error:
                pass

    def close(self):
        """Close hdf file."""

        if hasattr(self, 'hdf') and self.hdf is not None:
            self.vs.end()
            self.hdf.close()
            self.sd.end()
            self.hdf = None
            self.vs = None
            self.vdinfo = None
            self.sd = None

    def getGeo(self):
        return self.geoDict

    def get(self, var):
        """Return variable array dict."""

        self.open()

        #get list of vars
        if isinstance(var, types.StringTypes): var = [var]
        elif isinstance(var, (types.ListType, types.TupleType)): pass
        elif var is None:
            # If we don't have any SDS's, go with the vdata's only
            # if we don't have any vdata's go with SDS's only
            try:
                var = self.vdList
                try:
                    var.extend(self.datasetList)
                except AttributeError:
                    pass
            except AttributeError:
                var = self.datasetList
        else:
            raise RuntimeError("Incorrect argument type for %s." % var)

        #create dict of (attrs, array) for each var
        a = {}
        for v in var:
            #handle SD types
            ds = None
            if v in self.datasetList:
                try:
                    ds = self.sd.select(v)
                    a[v] = (ds.attributes(), N.array(ds.get()))
                    continue
                except HDF4Error, e:
                    pass
                finally:
                    if ds is not None: ds.endaccess()
Esempio n. 38
0
class HDF4(_geospatial):
    """
    HDF4 context manager class.
    """
    hdf = None
    vs = None
    v = None

    def __init__(self, fname):
        """
        :param str fname: The path of the HDF4 file.
        """
        self.fname = str(fname)

    def __enter__(self):
        """
        Open HDF file and interfaces for use as context manager.

        :returns: Self.
        """
        self.hdf = HDF(self.fname)
        self.vs = self.hdf.vstart()
        self.v = self.hdf.vgstart()

        return self

    def __exit__(self, *args):
        """
        Close interfaces and HDF file after finishing use in context manager.
        """
        self.v.end()
        self.vs.end()
        self.hdf.close()

    def _get_coords(self, vs, fn):
        """
        Iterate through vgroup and return a list of coordinates (if existing).

        :param HDF4.V.vs vs: VData object
        :param str fn: Path to the data file
        :returns: Dict containing geospatial information.
        """
        mappings = {
            "NVlat2": "lat",
            "NVlng2": "lon",
        }

        coords = {}
        for k, v in mappings.iteritems():
            ref = vs.find(k)
            vd = vs.attach(ref)

            coords[v] = []
            while True:
                try:
                    coord = float(vd.read()[0][0])
                    coord /= 10**7
                    coords[v].append(coord)
                except HDF4Error:  # End of file
                    break

            vd.detach()
        return coords

    def _get_temporal(self, vs, fn):
        """
        Return start and end timestamps (if existing)

        :param HDF4.V.vs vs: VData object
        :param str fn: Path to the data file
        :returns: Dict containing temporal information.
        """
        mappings = {
            "MIdate": "date",
            "MIstime": "start_time",
            "MIetime": "end_time",
        }

        timestamps = {}
        for k, v in mappings.iteritems():
            ref = vs.find(k)
            vd = vs.attach(ref)

            timestamps[v] = []
            while True:
                try:
                    timestamps[v].append(vd.read()[0][0])
                except HDF4Error:  # EOF
                    break

            vd.detach()

        # This list comprehension basically converts from a list of integers
        # into a list of chars and joins them together to make strings
        # ...
        # If unclear - HDF text data comes out as a list of integers, e.g.:
        # 72 101 108 108 111 32 119 111 114 108 100 (this means "Hello world")
        # Those "char" numbers get converted to strings with this snippet.
        dates = [chr(x) for x in timestamps["date"] if x != 0]
        timestamps["date"] = ''.join(dates)

        return self._parse_timestamps(timestamps)

    def _parse_timestamps(self, tm_dict):
        """
        Parse start and end timestamps from an HDF4 file.

        :param dict tm_dict: The timestamp to be parsed
        :returns: Dict containing start and end timestamps
        """
        st_base = ("%s %s" % (tm_dict["date"], tm_dict["start_time"][0]))
        et_base = ("%s %s" % (tm_dict["date"], tm_dict["end_time"][0]))

        for t_format in ["%d/%m/%y %H%M%S", "%d/%m/%Y %H%M%S"]:
            try:
                start_time = datetime.datetime.strptime(st_base, t_format)
                end_time = datetime.datetime.strptime(et_base, t_format)
            except ValueError:
                # ValueError will be raised if strptime format doesn't match
                # the actual timestamp - so just try the next strptime format
                continue

        return {"start_time": start_time.isoformat(),
                "end_time": end_time.isoformat()}

    def get_geospatial(self):
        """
        Search through HDF4 file, returning a list of coordinates from the
        'Navigation' vgroup (if it exists).

        :returns: Dict containing geospatial information.
        """
        ref = -1
        while True:
            try:
                ref = self.v.getid(ref)
                vg = self.v.attach(ref)

                if vg._name == "Navigation":
                    geospatial = self._get_coords(self.vs, self.fname)
                    geospatial["type"] = "track"  # Type annotation
                    vg.detach()
                    return geospatial

                vg.detach()
            except HDF4Error:  # End of file
                # This is a weird way of handling files, but this is what the
                # pyhdf library demonstrates...
                break

        return None

    def get_temporal(self):
        """
        Search through HDF4 file, returning timestamps from the 'Mission'
        vgroup (if it exists)

        :returns: List containing temporal metadata
        """
        ref = -1
        while True:
            try:
                ref = self.v.getid(ref)
                vg = self.v.attach(ref)

                if vg._name == "Mission":
                    temporal = self._get_temporal(self.vs, self.fname)
                    vg.detach()
                    return temporal

                vg.detach()
            except HDF4Error:  # End of file
                # This 'except at end of file' thing is some pyhdf weirdness
                # Check the pyhdf documentation for clarification
                break

        return None

    def get_properties(self):
        """
        Returns ceda_di.metadata.properties.Properties object
        containing geospatial and temporal metadata from file.

        :returns: Metadata.product.Properties object
        """
        geospatial = self.get_geospatial()
        temporal = self.get_temporal()
        filesystem = super(HDF4, self).get_filesystem(self.fname)
        data_format = {
            "format": "HDF4",
        }

        instrument = arsf.Hyperspectral.get_instrument(filesystem["filename"])
        flight_info = arsf.Hyperspectral.get_flight_info(filesystem["filename"])
        props = product.Properties(spatial=geospatial,
                                   temporal=temporal,
                                   filesystem=filesystem,
                                   data_format=data_format,
                                   instrument=instrument,
                                   flight_info=flight_info)

        return props
Esempio n. 39
0
class HdfFile(GenericFile):

    def __init__(self, file_path, level, additional_param=None):
        GenericFile.__init__(self, file_path, level)
        self.handler_id = "hdf2."
        self.FILE_FORMAT = "hdf2."
        #hdf = None
        #vs = None
        #v = None

    def get_handler_id(self):
        return self.handler_id

    def _get_coords(self, vs, fn):
        """
        Iterate through vgroup and return a list of coordinates (if existing).
        :param HDF4.V.vs vs: VData object
        :param str fn: Path to the data file
        :returns: Dict containing geospatial information.
        """
        mappings = {
            "NVlat2": "Latitude",
            "NVlng2": "Longitude",
        }

        coords = {}
        for k, v in mappings.iteritems():
            ref = vs.find(k)
            vd = vs.attach(ref)

            coords[v] = []
            while True:
                try:
                    coord = float(vd.read()[0][0])
                    coord /= 10**7
                    coords[v].append(coord)
                except HDF4Error:  # End of file
                    break

            vd.detach()
        return coords

    def _get_temporal(self, vs, fn):
        """
        Return start and end timestamps (if existing)
        :param HDF4.V.vs vs: VData object
        :param str fn: Path to the data file
        :returns: Dict containing temporal information.
        """
        mappings = {
            "MIdate": "date",
            "MIstime": "start_time",
            "MIetime": "end_time",
        }

        timestamps = {}
        for k, v in mappings.iteritems():
            ref = vs.find(k)
            vd = vs.attach(ref)

            timestamps[v] = []
            while True:
                try:
                    timestamps[v].append(vd.read()[0][0])
                except HDF4Error:  # EOF
                    break

            vd.detach()

        # This list comprehension basically converts from a list of integers
        # into a list of chars and joins them together to make strings
        # ...
        # If unclear - HDF text data comes out as a list of integers, e.g.:
        # 72 101 108 108 111 32 119 111 114 108 100 (this means "Hello world")
        # Those "char" numbers get converted to strings with this snippet.
        dates = [chr(x) for x in timestamps["date"] if x != 0]
        timestamps["date"] = ''.join(dates)

        return self._parse_timestamps(timestamps)

    def _parse_timestamps(self, tm_dict):
        """
        Parse start and end timestamps from an HDF4 file.
        :param dict tm_dict: The timestamp to be parsed
        :returns: Dict containing start and end timestamps
        """
        st_base = ("%s %s" % (tm_dict["date"], tm_dict["start_time"][0]))
        et_base = ("%s %s" % (tm_dict["date"], tm_dict["end_time"][0]))

        for t_format in ["%d/%m/%y %H%M%S", "%d/%m/%Y %H%M%S"]:
            try:
                start_time = datetime.datetime.strptime(st_base, t_format)
                end_time = datetime.datetime.strptime(et_base, t_format)
            except ValueError:
                # ValueError will be raised if strptime format doesn't match
                # the actual timestamp - so just try the next strptime format
                continue

        return {"start_time": start_time.isoformat(),
                "end_time": end_time.isoformat()}

    def get_geospatial(self):
        """
        Search through HDF4 file, returning a list of coordinates from the
        'Navigation' vgroup (if it exists).
        :returns: Dict containing geospatial information.
        """
        ref = -1
        while True:
            try:
                ref = self.v.getid(ref)
                vg = self.v.attach(ref)

                if vg._name == "Navigation":
                    geospatial = self._get_coords(self.vs, self.file_path)
                    geospatial["type"] = "track"  # Type annotation
                    vg.detach()
                    return geospatial

                vg.detach()
            except HDF4Error:  # End of file
                # This is a weird way of handling files, but this is what the
                # pyhdf library demonstrates...
                break

        return None

    def get_temporal(self):
        """
        Search through HDF4 file, returning timestamps from the 'Mission'
        vgroup (if it exists)
        :returns: List containing temporal metadata
        """
        ref = -1
        while True:
            try:
                ref = self.v.getid(ref)
                vg = self.v.attach(ref)

                if vg._name == "Mission":
                    temporal = self._get_temporal(self.vs, self.file_path)
                    vg.detach()
                    return temporal

                vg.detach()
            except HDF4Error:  # End of file
                # This 'except at end of file' thing is some pyhdf weirdness
                # Check the pyhdf documentation for clarification
                break

        return None

    def get_phenomena(self, fp):
        phen_list = []
        return phen_list

    def get_metadata_badccsv_level2(self):
        return None

    def get_geolocation(self):
        # Open file.

        hdf = SD(self.file_path, SDC.READ)

        # List available SDS datasets.
        datasets = hdf.datasets()

        # Read dataset.
        #DATAFIELD_NAME='RelHumid_A'
        #data3D = hdf.select(DATAFIELD_NAME)
        #data = data3D[11,:,:]

        # Read geolocation dataset.
        try:
            lat = hdf.select('Latitude')
            latitude = lat[:,:].flatten()
            lon = hdf.select('Longitude')
            longitude = lon[:,:].flatten()
            return (latitude, longitude)
        except HDF4Error:
            return None
 
    def normalize_coord(self, coord):
        if coord < -180:
            coord = 0

        return coord

    def get_metadata_badccsv_level3(self):
        self.handler_id = "Hdf handler level 3."
        spatial = None

        file_info = self.get_metadata_generic_level1()

        #First method for extracting information.
        self.hdf = HDF(self.file_path)
        self.vs = self.hdf.vstart()
        self.v = self.hdf.vgstart()

        geospatial = self.get_geospatial()
        temporal = self.get_temporal()


        if geospatial is not None:
            lat_u = self.normalize_coord(float(max(geospatial["Latitude"])))
            lat_l = self.normalize_coord(float(min(geospatial["Latitude"])))

            lon_u = self.normalize_coord(float(max(geospatial["Longitude"])))
            lon_l = self.normalize_coord(float(min(geospatial["Longitude"])))

            spatial =  {"coordinates": {"type": "envelope", "coordinates": [[round(lon_l, 3), round(lat_l, 3)], [round(lon_u, 3), round(lat_u, 3)]] } }
        else:
            #Second method.
            geospatial = self.get_geolocation()

            if geospatial is not None:
                lat_u = self.normalize_coord(float(max(geospatial[0])))
                lat_l = self.normalize_coord(float(min(geospatial[0])))

                lon_u = self.normalize_coord(float(max(geospatial[1])))
                lon_l = self.normalize_coord(float(min(geospatial[1])))

                spatial =  {"coordinates": {"type": "envelope", "coordinates": [[round(lon_l, 3), round(lat_l, 3)], [round(lon_u, 3), round(lat_u, 3)]] } }


        if temporal is not None:
            file_info[0]["info"]["temporal"] = {"start_time": temporal["start_time"], "end_time": temporal["end_time"] }


        return file_info + (None, spatial, )

    def get_metadata(self):

        if self.level == "1":
            res = self.get_metadata_generic_level1()
        elif self.level == "2":
            res = self.get_metadata_generic_level1()
        elif self.level == "3":
            res = self.get_metadata_badccsv_level3()

        res[0]["info"]["format"] = self.FILE_FORMAT

        return res

    def __enter__(self):
        return self

    def __exit__(self, *args):
        pass