Example #1
0
def read_hdf4_info(input_file_path) :
    """
    get information about variable names and attributes (both global and variable specific) from the
    given file. The file is assumed to exist and be a valid hdf4 file

    returns something in the form:

        {
            GLOBAL_ATTRS_KEY    : a dictionary of attribute values keyed by the attribute names
            VAR_LIST_KEY        : [list of variable names]
            VAR_INFO_KEY        :   {
                                        <var_name> :    {
                                                            SHAPE_KEY: (shape of variable data)
                                                            VAR_ATTRS_KEY: a dictionary of attribute values keyed by the attribute names
                                                        }
                                    }

        }

        TODO, depending on what changes need to be made for CF compliance this data structure may need to change a lot
    """

    file_info = { }

    # open the file
    file_object = SD(input_file_path, SDC.READ)

    # get information on the global attributes in the file
    global_attrs = file_object.attributes()
    file_info[GLOBAL_ATTRS_KEY] = global_attrs

    # get information on the variables in the file
    variable_list = file_object.datasets().keys()
    file_info[VAR_LIST_KEY] = variable_list

    # for each variable in a file, get more specific information about it
    file_info[VAR_INFO_KEY] = { }
    sets_temp = file_object.datasets()
        # this should return a dictionary with entries for each variable in the form
        #       <variable name>: ((dimension names), (data shape), type, index num)
    for var_name in variable_list :
        var_object = file_object.select(var_name)
        var_attrs  = var_object.attributes()
        file_info[VAR_INFO_KEY][var_name] = {
                                                SHAPE_KEY: sets_temp[var_name][1],
                                                VAR_ATTRS_KEY: var_attrs,
                                            }

    return file_info, file_object
Example #2
0
    def get_variable_names(self, filenames, data_type=None):
        try:
            from pyhdf.SD import SD
            from pyhdf.HDF import HDF
        except ImportError:
            raise ImportError("HDF support was not installed, please reinstall with pyhdf to read HDF files.")

        valid_variables = set([])
        for filename in filenames:
            # Do VD variables
            datafile = HDF(filename)
            vdata = datafile.vstart()
            variables = vdata.vdatainfo()
            # Assumes that latitude shape == longitude shape (it should):
            # dim_length = [var[3] for var in variables if var[0] == 'Latitude'][0]
            for var in variables:
                # if var[3] == dim_length:
                valid_variables.add(var[0])

            # Do SD variables:
            sd = SD(filename)
            datasets = sd.datasets()
            # if 'Height' in datasets:
            #     valid_shape = datasets['Height'][1]
            for var in datasets:
                    # if datasets[var][1] == valid_shape:
                valid_variables.add(var)

        return valid_variables
Example #3
0
def export_multi_fluid_LFM(argv):
	if (len(argv) >= 2):
		input_filename = argv[0]
		output_filename = argv[1]
		print input_filename
		sd = SD(input_filename, SDC.READ)

		grid = get_corners(sd)

		timesteps = 0

		# step = 1640000

		for key in sd.datasets().keys():
			shift = key.find('time_step')
			if shift == 0:
				if len(argv) == 3:
					step = argv[2]
					if key == 'time_step_'+str(step):
						export_timestep(sd, output_filename, key, grid)
				else:
					export_timestep(sd, output_filename, key, grid)
				timesteps += 1

		print 'timesteps found in file:', timesteps


	else:
		print 'usage: python lfm_split.py input_multi_timestep_hdf output_filename_prefix step(optional)'
Example #4
0
def main():

    varname_to_rpn_name = {
        "precipitation": "PR",
        "relativeError": "RERR"
    }

    varnames = list(varname_to_rpn_name.keys())

    target_dir = "/skynet3_rech1/huziy/from_hdf4"
    source_dir = "/st1_fs2/winger/Validation/TRMM/HDF_format"

    for f_name in os.listdir(source_dir):
        if not f_name.endswith("HDF"):
            continue

        path = os.path.join(source_dir, f_name)
        ds = SD(path)
        print(ds.datasets())
        target_path = os.path.join(target_dir, f_name + ".rpn")
        r_obj = RPN(target_path, mode="w")
        for varname in varnames:
            var_data = ds.select(varname)[0, :, :]
            r_obj.write_2D_field(
                name=varname_to_rpn_name[varname],
                data=var_data, label=varname, grid_type="L",
                ig = [25, 25, 4013, 18012])
        r_obj.close()
Example #5
0
 def load(self, fldname, **kwargs):
     """ Load Cali Current fields for a given day"""
     self._timeparams(**kwargs)
     
     if fldname == 'chl':
         filename = "/C%04i%03i_chl_mapped.hdf" % (self.yr, self.yd)
         #ncfieldname = 'chl_%04i_%03i' % (yr,yd)
         def scale(PV): return 10**(PV*0.015-2)
     elif fldname == 'sst':
         filename = "/M%04i%03i_sst_mapped.hdf" % (self.yr, self.yd)
         #ncfieldname = 'sst_%04i_%03i' % (yr,yd)            
         def scale(PV): return PV*0.15000001-3
     if not os.path.isfile(self.datadir + filename):
         print "Downloading " + filename
         self.download(fldname, self.jd)
         
     h = SD(self.datadir + filename,SDC.READ)        
     ncfieldname = h.datasets().keys()[0]
     fld =  h.select(ncfieldname)
     attr = fld.attributes()
     PV = fld[:].astype(np.float)
     PV[PV<0] = PV[PV<0]+256
     PV[PV==0]   = np.nan
     PV[PV==255] = np.nan
     setattr(self, fldname, scale(PV)[self.j1:self.j2, self.i1:self.i2])
Example #6
0
def read_rrc(inpath):
    '''Read rrc data m*n from hdf file'''

    '''b1-5;b13-16 for MODIS Rrc
        Rrc_1238 Rrc_443-862 ozone senz solz for VIIRS rrc   
    '''  
    hdf = SD(inpath, SDC.READ)
    #dts = sorted(hdf.datasets().keys())
    modis_key = ['CorrRefl_01','CorrRefl_02','CorrRefl_03','CorrRefl_04','CorrRefl_05',
                 'CorrRefl_13','CorrRefl_14','CorrRefl_15','CorrRefl_16']
    viirs_key = ['Rrc_443','Rrc_486','Rrc_551','Rrc_671','Rrc_745','Rrc_862','Rrc_1238']
    mission = os.path.basename(inpath)[0]
    if mission =='A' or mission =='T':keys = modis_key
    elif mission=='V':keys = viirs_key
    else:keys = hdf.datasets().keys()
    for i,dt in enumerate(keys):
        print(i,dt)
        band = hdf.select(dt)[:,:]        
        if i==0:             
            limit = (band.shape[0],band.shape[1],len(keys))            
            rrc = np.zeros(limit,dtype = np.float)
            rrc[:,:,i] = band
        else:
            rrc[:,:,i] = band
    hdf.end()
    print(rrc.shape)
    return rrc
Example #7
0
def print_dataset_2A12(*arg):

	FILE_NAME=arg[0]+'1B01.'+arg[1]
	hdf = SD(FILE_NAME, SDC.READ)

	'List available SDS datasets'
	for ds in hdf.datasets():
		print ds
Example #8
0
 def __init__(self, filename, filename_info, filetype_info):
     super(HDF4FileHandler, self).__init__(filename, filename_info, filetype_info)
     self.file_content = {}
     file_handle = SD(self.filename, SDC.READ)
     self._collect_attrs('', file_handle.attributes())
     for k, v in file_handle.datasets().items():
         self.collect_metadata(k, file_handle.select(k))
     del file_handle
Example #9
0
def print_dataset_1C21(*arg):

	FILE_NAME=arg[0]+'1C21.'+arg[1]
	print FILE_NAME
	hdf = SD(FILE_NAME, SDC.READ)

	'List available SDS datasets'
	for ds in hdf.datasets():
		print ds
Example #10
0
 def landmask(self):
     if not hasattr(self, "_landmask"):
         filename = os.path.basename(self.landurl)
         if not os.path.isfile(self.datadir + filename):
             urllib.urlretrieve(self.dataurl + self.landurl,
                                self.datadir + filename)
         h = SD(self.datadir + filename, SDC.READ)        
         ncfieldname = h.datasets().keys()[0]
         self._landmask =   h.select(ncfieldname)[:] == -1
     return self._landmask
Example #11
0
def open_file(filename):
    A = SD(filename)

    # retrieve data SDS
    d = A.datasets()
    sds_name = d.keys()[0]  # name of sds. Dictionary method.
    sds = A.select(sds_name)
    pin = A.attributes()

    return sds, pin
Example #12
0
    def get_variable_names(self, filenames, data_type=None):
        try:
            from pyhdf.SD import SD
        except ImportError:
            raise ImportError("HDF support was not installed, please reinstall with pyhdf to read HDF files.")

        variables = set([])

        # Determine the valid shape for variables
        sd = SD(filenames[0])
        datasets = sd.datasets()
        len_x = datasets['Latitude'][1][0]  # Assumes that latitude shape == longitude shape (it should)
        alt_data = get_data(VDS(filenames[0], "Lidar_Data_Altitudes"), True)
        len_y = alt_data.shape[0]
        valid_shape = (len_x, len_y)

        for filename in filenames:
            sd = SD(filename)
            for var_name, var_info in sd.datasets().items():
                if var_info[1] == valid_shape:
                    variables.add(var_name)

        return variables
Example #13
0
    def get_variable_names(self, filenames, data_type=None):
        try:
            from pyhdf.SD import SD
        except ImportError:
            raise ImportError("HDF support was not installed, please reinstall with pyhdf to read HDF files.")

        variables = set([])

        # Determine the valid shape for variables
        sd = SD(filenames[0])
        datasets = sd.datasets()
        len_x = datasets['Latitude'][1][0]  # Assumes that latitude shape == longitude shape (it should)
        alt_data = get_data(VDS(filenames[0], "Lidar_Data_Altitudes"), True)
        len_y = alt_data.shape[0]
        valid_shape = (len_x, len_y)

        for filename in filenames:
            sd = SD(filename)
            for var_name, var_info in sd.datasets().items():
                if var_info[1] == valid_shape:
                    variables.add(var_name)

        return variables
Example #14
0
    def get_variable_names(self, filenames, data_type=None):
        try:
            from pyhdf.SD import SD
        except ImportError:
            raise ImportError("HDF support was not installed, please reinstall with pyhdf to read HDF files.")

        variables = set([])

        # Determine the valid shape for variables
        sd = SD(filenames[0])
        datasets = sd.datasets()
        len_x = datasets['Latitude_Midpoint'][1][1]
        len_y = datasets['Longitude_Midpoint'][1][1]
        len_z = datasets['Altitude_Midpoint'][1][1]
        valid_shape = (len_x, len_y, len_z)

        for filename in filenames:
            sd = SD(filename)
            for var_name, var_info in sd.datasets().items():
                if var_info[1] == valid_shape:
                    variables.add(var_name)

        return variables
Example #15
0
def read_MODIS_hdf_to_ndarray(fullname, DATAFIELD_NAME):
    ##########################################################
    #
    #
    import numpy as np
    from pyhdf.SD import SD, SDC
    hdf = SD(fullname, SDC.READ)

    # Read AOD dataset.
    if DATAFIELD_NAME.upper() in hdf.datasets():
        DATAFIELD_NAME = DATAFIELD_NAME.upper()

    if DATAFIELD_NAME in hdf.datasets():
        hdf_raw = hdf.select(DATAFIELD_NAME)
        print("found data set of {}: {}".format(DATAFIELD_NAME, hdf_raw))

    else:
        print("There is no data set of {}: {}".format(DATAFIELD_NAME, hdf_raw))
        hdf_raw = np.arange(0)

    # Read geolocation dataset.
    if 'Latitude' in hdf.datasets() and 'Longitude' in hdf.datasets():
        lat = hdf.select('Latitude')
        latitude = lat[:, :]
        lon = hdf.select('Longitude')
        longitude = lon[:, :]

    elif 'Latitude'.lower() in hdf.datasets() and 'Longitude'.lower(
    ) in hdf.datasets():
        lat = hdf.select('Latitude'.lower())
        latitude = lat[:, :]
        lon = hdf.select('Longitude'.lower())
        longitude = lon[:, :]
    else:
        latitude, longitude  \
            = np.arange(0), np.arange(0)

    if 'cntl_pt_cols' in hdf.datasets() and 'cntl_pt_rows' in hdf.datasets():
        cntl_pt_cols = hdf.select('cntl_pt_cols')
        cntl_pt_cols = cntl_pt_cols[:]
        cntl_pt_rows = hdf.select('cntl_pt_rows')
        cntl_pt_rows = cntl_pt_rows[:]
    else:
        cntl_pt_cols, cntl_pt_rows = np.arange(0), np.arange(0)

    return hdf_raw, latitude, longitude, cntl_pt_cols, cntl_pt_rows
Example #16
0
    def get_variable_names(self, filenames, data_type=None):
        try:
            from pyhdf.SD import SD
        except ImportError:
            raise ImportError("HDF support was not installed, please reinstall with pyhdf to read HDF files.")

        variables = set([])
        for filename in filenames:
            sd = SD(filename)
            for var_name, var_info in sd.datasets().items():
                # Check that the dimensions are correct
                if var_info[0] == ('YDim:mod08', 'XDim:mod08'):
                    variables.add(var_name)

        return variables
Example #17
0
File: MODIS.py Project: cpaulik/cis
    def get_variable_names(self, filenames, data_type=None):
        try:
            from pyhdf.SD import SD
        except ImportError:
            raise ImportError("HDF support was not installed, please reinstall with pyhdf to read HDF files.")

        variables = set([])
        for filename in filenames:
            sd = SD(filename)
            for var_name, var_info in sd.datasets().iteritems():
                # Check that the dimensions are correct
                if var_info[0] == ("YDim:mod08", "XDim:mod08"):
                    variables.add(var_name)

        return variables
def load_vision(filename, var=None, T=1):
    """load_vision loads a Vision log file and
       returns its content in a dict.
    """

    assert exists(filename), 'Invalid filename.'

    f = SD(filename, SDC.READ)

    # New time axis
    end = ceil(f.select('ts_group_0').get()[-1])
    new_time = np.arange(0, end, T)

    # Initialize dict
    req_data = {'t': new_time}

    # Loop over variable list and loaded signals to search for matches
    if not var:
        req_data.update({key.split('.')[-1]: _select_interp(new_time, f, key)
                         for key in f.datasets().keys()
                         if not key.startswith('ts_')})
    elif isinstance(var, basestring):
        first_match = next((key for key in f.datasets().keys() if var in key),
                           None)
        req_data.update({var: _select_interp(new_time, f, first_match)})
    else:
        first_match = zip(var,
                          [next((key for key in f.datasets().keys()
                                 if sig in key), None)
                           for sig in var])
        req_data.update({sig: _select_interp(new_time, f, key)
                         for sig, key in first_match})

    f.end()

    return req_data
    def parseMetadata(self, filepath):

        metadata = {}
    
        dir, filename = os.path.split(filepath)
        if re.match(FILENAME_PATTERN, filename):
            logging.info("Parsing HDF file=%s" % filepath)

            # open HDF file
            try:
                hdfFile = SD(filepath, SDC.READ)
            except HDF4Error as e:
                logging.info(e)
                raise e

            # variables
            variables = hdfFile.datasets().keys()

            # time fields
            year = hdfFile.select('Year')[:]
            month = hdfFile.select('Month')[:]
            day = hdfFile.select('Day')[:]
            hour = hdfFile.select('Hour')[:]
            minute = hdfFile.select('Minute')[:]
            second = hdfFile.select('Seconds')[:]

            # space fields
            lon = hdfFile.select('Longitude')[:]
            lat = hdfFile.select('Latitude')[:]

            datetimes = []
            lats = []
            lons = []
            for t in range(22):
                for x in range(15):
                    if year[t,x] != -9999:

                        datetimes.append( dt.datetime(year[t,x],month[t,x],day[t,x],hour[t,x],minute[t,x],second[t,x], tzinfo=tzutc()) )
                        lons.append( lon[t,x] )
                        lats.append( lat[t,x] )
                        
            # store metadata values
            storeMetadata(metadata, np.asarray(lons), np.asarray(lats), np.asarray(datetimes), variables)

            # close HDF file
            hdfFile.end()

        return metadata
Example #20
0
def read_calipso_hdf4(filename, retv):
    from pyhdf.SD import SD, SDC

    # from pyhdf.HDF import HDF, HC
    # import pyhdf.VS

    def convert_data(data):
        if len(data.shape) == 2:
            if data.shape[1] == 1:
                return data[:, 0]
            elif data.shape[0] == 1:
                return data[0, :]
        return data

    if filename is not None:
        h4file = SD(filename, SDC.READ)
        datasets = h4file.datasets()
        # attributes = h4file.attributes()
        singleshotdata = {}
        for idx, dataset in enumerate(datasets.keys()):
            # non-goups
            if dataset in scip_these_larger_variables_until_needed.keys():
                logger.debug("Not reading " + dataset)
                continue
            elif dataset[0:8] == 'Surface_':
                logger.debug("Not reading " + dataset)
                continue
            if dataset in [
                    "ssNumber_Layers_Found", "ssLayer_Base_Altitude",
                    "ssLayer_Top_Pressure", "ssLayer_Top_Altitude"
            ]:
                singleshotdata[dataset] = h4file.select(dataset).get()
            if dataset[0:2] == "ss":
                # already saved temporarly what we need
                continue
            name = dataset.lower()
            # print idx, dataset
            if dataset in atrain_match_names.keys():
                name = atrain_match_names[dataset]
            data = np.array(h4file.select(dataset).get())
            setattr(retv, name, data)
        if "ssNumber_Layers_Found" in singleshotdata.keys():
            # Extract number of cloudy single shots (max 15)
            # plus average cloud base and top
            # in 5 km FOV
            logger.info("Reading single shot information")
            retv = rearrange_calipso_the_single_shot_info(retv, singleshotdata)
    return retv
Example #21
0
class Hdf4File(DataProductBase):
    """
    Base class for file products using HDF4File format. The :class:`Hdf4File`
    wraps around the pyhdf.SD class to implement RAII.
    """
    def __init__(self, filename):
        """
        Open an HDF4 file for reading.

        Arguments:

            filename(str): The path to the file to open.
        """
        super().__init__()
        from pyhdf.HDF import HDF, HC
        from pyhdf.SD import SD, SDC
        import pyhdf.VS
        self.filename = filename
        self.hdf = HDF(self.filename, HC.READ)
        self.vs = self.hdf.vstart()
        self.sd = SD(self.filename, SDC.READ)

    @property
    def vs_attributes(self):
        vs_attributes = [t[0] for t in self.vs.vdatainfo()]
        return vs_attributes

    @property
    def sd_attributes(self):
        sd_attributes = [t for t in self.sd.datasets()]
        return sd_attributes

    @property
    def attributes(self):
        return self.vs_attributes + self.sd_attributes

    def __getitem__(self, name):
        if name in self.vs_attributes:
            return self.vs.attach(name)
        elif name in self.sd_attributes:
            return self.sd.select(name)
        else:
            raise ValueError("{} is not a known attribute of this file.")

    def __del__(self):
        self.sd.end()
        self.vs.end()
        self.hdf.close()
Example #22
0
def HDFsd_read(filename,sdname):
    the_file = SD(str(filename), SDC.READ)
    try:
        out=the_file.select(sdname)
        values=out.get()
        attributes=out.attributes()
    except HDF4Error as e:
        datasets_dict = the_file.datasets()
        print(f"couldn't find {sdname} in "
              f"\n{pprint.pformat(datasets_dict)}")
        values=None
        attributes=None
        print(e)
    the_file.end()
        
    return values, attributes
Example #23
0
def parse_hdf(filename):
    hdf_file = '{path}/data/{filename}'.format(
        path=sys.path[0],
        filename=filename
    )
    file = SD(hdf_file)
    print log_string, 'file info: ', file.info()
    datasets_dict = file.datasets()
    print log_string, '数据集:'
    for idx, sds in enumerate(datasets_dict):
        sds_obj = file.select(sds)
        data = sds_obj.get()
        data_attr = sds_obj.attributes()
        availabe_dict[sds] = data
        print log_string, idx, sds, ' :', data.shape
    file.end()
Example #24
0
    def get_sds(self, fieldnames=None):
        """Return specific or all SDS in the hdf file as dictionary.

        SDS arrays can be accessed using the 'data' key. Note that no scaling
        is applied to the data in get() method (use get_scaled() to achieve
        that). However, the scaling and missing data information can be
        accessed using the following keys:
            'scale_factor'
            'add_offset'
            '_FillValue'
        """

        # Convert scalar fieldnames to sequence
        if isinstance(fieldnames, basestring):
            fieldnames = (fieldnames, )

        # Open file to read SDs
        try:
            h4 = SD(self.filename, mode=SDC.READ)
            sclinfo = None
            if 'Slope_and_Offset_Usage' in h4.attributes():
                sclinfo = 'Slope_and_Offset_Usage'

            # Get all available SDS from file if fieldnames in not given
            if fieldnames is None:
                fieldnames = []
                for key in sorted(h4.datasets()):
                    fieldnames.append(key)

            # Create and empty dataset dictionary with all available
            # fields fill in data from SDS
            sds = dict.fromkeys(fieldnames, {})
            for key in sds:
                attrs = h4.select(key).attributes()
                if sclinfo:
                    attrs[sclinfo] = h4.attributes()[sclinfo]

                sds[key] = attrs
                sds[key]['data'] = h4.select(key).get()

            # Close hdf interface
            h4.end()
        except HDF4Error as e:
            raise HDF4Error(e)

        # Return raw (possibly un-calibrated) SDS/attributes dictionary
        return sds
Example #25
0
def read_amsr_hdf4(filename):
    from pyhdf.SD import SD, SDC
    from pyhdf.HDF import HDF, HC
    import pyhdf.VS 

    retv = AmsrObject()
    h4file = SD(filename, SDC.READ)
    datasets = h4file.datasets()
    attributes = h4file.attributes()
    #for idx,attr in enumerate(attributes.keys()):
    #    print idx, attr
    for sds in ["Longitude", "Latitude", "High_res_cloud"]:
        data = h4file.select(sds).get()
        if sds in ["Longitude", "Latitude"]:
            retv.all_arrays[sds.lower()] = data.ravel()
        elif sds in ["High_res_cloud"]:
            lwp_gain = h4file.select(sds).attributes()['Scale']
            retv.all_arrays["lwp_mm"] = data.ravel() * lwp_gain

        #print h4file.select(sds).info()
    h4file = HDF(filename, SDC.READ)
    vs = h4file.vstart()
    data_info_list = vs.vdatainfo()
    #print "1D data compound/Vdata"
    for item in data_info_list:
        #1D data compound/Vdata
        name = item[0]
        #print name
        if name in ["Time"]:
            data_handle = vs.attach(name)
            data = np.array(data_handle[:])
            retv.all_arrays["sec1993"] = data 
            data_handle.detach()
        else:
            pass
            #print name
        #data = np.array(data_handle[:])
        #attrinfo_dic = data_handle.attrinfo()
        #factor = data_handle.findattr('factor')
        #offset = data_handle.findattr('offset')
        #print data_handle.factor
        #data_handle.detach()
    #print data_handle.attrinfo()
    h4file.close()
    #for key in retv.all_arrays.keys():
    #    print key, retv.all_arrays[key]
    return retv
Example #26
0
    def oneImage():
        hdfname = sys.argv[1]
        lat = float(sys.argv[2])
        lon = float(sys.argv[3])
        xc, yc = convertCoord(lat, lon)

        hd = SD(hdfname, SDC.READ)
        params = sorted(hd.datasets().keys())
        for i in range(len(params)):
            print i, params[i]
        i = int(raw_input('\nChoose # of parameter to plot: '))
        datav = hd.select(params[i])
        datavmean = datav[0, :, :]
        print 'x pixel= ', xc
        print 'y pixel= ', yc
        print 'value at coordinate= ', datavmean[yc, xc]
        displayHDF(datavmean, params[i])
Example #27
0
def read_calipso_hdf4(filename, retv):
    from pyhdf.SD import SD, SDC
    from pyhdf.HDF import HDF, HC
    import pyhdf.VS 
    def convert_data(data):
        if len(data.shape) == 2:
            if data.shape[1] == 1:
                return data[:, 0]
            elif data.shape[0] == 1:
                return data[0, :]
        return data
    if filename is not None:
        h4file = SD(filename, SDC.READ)
        datasets = h4file.datasets()
        attributes = h4file.attributes()
        singleshotdata = {}
        for idx, dataset in enumerate(datasets.keys()):
            #non-goups
            if dataset in scip_these_larger_variables_until_needed.keys():        
                continue
            elif dataset[0:8] == 'Surface_':
                continue
            if dataset in ["ssNumber_Layers_Found", 
                           "ssLayer_Base_Altitude", 
                           "ssLayer_Top_Altitude"]:
                singleshotdata[dataset] = h4file.select(dataset).get()
            if dataset[0:2] == "ss":
                #already saved temporarly what we need
                continue            
            name = dataset.lower()
            #print idx, dataset
            if dataset in atrain_match_names.keys():
                name = atrain_match_names[dataset]
            data = np.array(h4file.select(dataset).get())
            setattr(retv, name, data) 
        if "ssNumber_Layers_Found" in singleshotdata.keys():
            # Extract number of cloudy single shots (max 15)
            # plus average cloud base and top
            # in 5 km FOV
            logger.info("Reading single shot information")
            retv = rearrange_calipso_the_single_shot_info(
                retv,
                singleshotdata)
    return retv 
    def __init__(self,
                 datatype,
                 version,
                 fname,
                 variables=None,
                 verbose=False):
        self.datatype = datatype
        self.version = version
        self.fname = fname
        self.verbose = verbose
        self.dataset = {}
        print(version.capitalize())
        if version == 'HDF4':
            hdf = SD(self.fname, SDC.READ)
            if variables is None:
                for d in hdf.datasets():
                    self.dataset[d] = np.array(hdf.select(d).get())
                    if verbose: print(d)
            else:
                for d in variables:
                    self.dataset[d] = np.array(hdf.select(d).get())
                    if verbose: print('reading', d)

        elif version == 'HDF5':
            hdf = h5py.File(fname, 'r')
            if variables is None:
                allvariables = list(hdf.keys())
                print('allvariables', allvariables)
                for d in allvariables:
                    self.dataset[d] = np.array(hdf[d])
                    if verbose: print(d)
            else:
                for d in variables:
                    self.dataset[d] = np.array(hdf[d])
                    if verbose: print(d)

        else:
            print(datatype, '  not supported')
        if verbose:
            for key in self.dataset:
                print('variable: ',key, ' dimension', self.dataset[key].shape,\
                       'min, max', np.nanmin(self.dataset[key]),np.nanmax(self.dataset[key]))
            print('finished')
Example #29
0
    def get(self, fieldnames=[]):
        '''Returns specific or all SDS in the hdf file as dictionary.
        SDS arrays can be accessed using the 'data' key. Note that no scaling
        is applied to the data in get() method (use get_scaled() to achieve
        that). However, the scaling and missing data information can be
        accessed using the following keys:
            'scale_factor'
            'add_offset'
            '_FillValue'
        '''
        if not isinstance(fieldnames, list):
            fieldnames = [fieldnames]

        try:
            # Open hdf4 interface in read mode
            h4 = SD(self.filename, mode=SDC.READ)
            sclinfo = None
            if 'Slope_and_Offset_Usage' in h4.attributes():
                sclinfo = 'Slope_and_Offset_Usage'

            if len(fieldnames) == 0:
                # Get all available field names from hdf
                fieldnames = []
                for key in sorted(h4.datasets()):
                    fieldnames.append(key)
            # Create and empty dataset dictionary with all available
            # fields fill in data from SDS
            datasets = dict.fromkeys(fieldnames, {})
            for key in datasets:
                attrs = h4.select(key).attributes()
                if sclinfo:
                    attrs[sclinfo] = h4.attributes()[sclinfo]

                datasets[key] = attrs
                datasets[key]['data'] = h4.select(key).get()
            # Close hdf interface
            h4.end()
        except HDF4Error as e:
            print "HDF4Error", e
            sys.exit(1)

        # Return un-calibrated datasets/attributes dictionary
        return datasets
Example #30
0
def load_data_from_files(filename):
    if not os.path.exists(filename):
        print("File {} does not exist, cannot load data.".format(filename))
        return
    elif not HDF.ishdf(filename):
        print("File {} is not in hdf4 file format, cannot load data.".format(
            filename))
        return

    f = SD(filename, SDC.READ)
    data_field = None
    for i, d in enumerate(f.datasets()):
        # print("{0}. {1}".format(i+1,d))
        if "NDVI" in d:
            data_field = d

    ndvi_data = f.select(data_field)
    data = np.array(ndvi_data.get())
    return data
Example #31
0
def get_layer_information(cloudsat_filenames, get_quality=True, verbose=0):
    """ Returns
    CloudLayerType: -9: error, 0: non determined, 1-8 cloud types 
    CloudLayerBase: in km
    CloudLayerTop: in km
    CloudTypeQuality: valid range [0, 1]; if <get_quality>
    """

    all_info = {
        'CloudLayerType': [],
        'CloudLayerBase': [],
        'CloudLayerTop': []
    }

    if get_quality:
        all_info['CloudTypeQuality'] = []

    for cloudsat_path in cloudsat_filenames:

        sd = SD(cloudsat_path, SDC.READ)

        if verbose:
            # List available SDS datasets.
            print("hdf datasets:", sd.datasets())

        # get cloud types at each height
        for key, value in all_info.items():
            value.append(sd.select(key).get())

    for key, value in all_info.items():
        value = np.vstack(value)

        if key == 'CloudLayerType':
            all_info[key] = value.astype(np.int8)
        else:
            all_info[key] = value.astype(np.float16)

    if not get_quality:
        all_info['CloudTypeQuality'] = None

    return all_info
Example #32
0
def FILE_SCAN(fname):

    try:
        f = SD(fname, SDC.READ)
    except:
        exit('Error [lss]: cannot access \'{fname}\'.'.format(fname=fname))

    vnames = f.datasets().keys()

    data_dict = {}
    for vname in vnames:
        obj = f.select(vname)
        info = obj.info()
        if info[1] == 1:
            data_dict[vname] = str((info[2], ))
        elif info[1] > 1:
            data_dict[vname] = str(tuple(info[2]))

    f.end()

    return data_dict
Example #33
0
def hdf4(file, var_name='', print_out=False):
    # read a MODIS HDF4 file
    dat = SD(file, SDC.READ)
    datasets_dic = dat.datasets()
    if len(var_name) == 0:
        for idx, sds in enumerate(datasets_dic.keys()):
            print(idx, sds)
        return ''

    sds_obj = dat.select(var_name)  # select sds

    data = sds_obj.get()  # get sds data
    data = data.astype('float')

    data[data == sds_obj.attributes()['_FillValue']] = np.nan
    data /= sds_obj.attributes()['scale_factor']

    if print_out:
        pprint.pprint(sds_obj.attributes())

    return data
def SDSinfo(data_path, fname, product=None):
    '''
    #product: MOD021KM,MYD04_L2,.. MODIS product type (just the output file name)
    #for *.hdf files
    '''
    file = data_path + fname
    hdf = SD(file)
    dsets = hdf.datasets()
    keys = dsets.keys()
    if product == None or product == 'MOD021K':
        product = fname[0:7]
        f1 = open(product + ".info", "w")
    else:
        f1 = open(product + ".info", "w")
    f1.write(fname + '\n')
    f1.write('Number of variables: ' + str(np.size(keys)) + '\n')
    f1.write('Key list' + '\n')
    for i in np.arange(np.size(keys)):
        f1.write('[' + str(i) + ']' + keys[i] + '\n')

    for i in np.arange(np.size(keys)):
        d = hdf.select(keys[i])
        f1.write(
            '===========================================================' +
            '\n')
        f1.write('[' + str(i) + '] ' + 'Variable name:' + keys[i] + '\n')
        f1.write(str(d.dimensions()) + '\n')
        f1.write(
            '===========================================================' +
            '\n')

        atrb = d.attributes()
        for j in np.arange(np.size(atrb.keys())):
            f1.write('[' + str(i) + '.' + str(j) + ']' + (atrb.keys())[j] +
                     '\n')
            f1.write('\t' + str(atrb.get((atrb.keys())[j])) + '\n')

        f1.write('\n\n\n')
    f1.close()
    print(product + ".info SAVED!!")
Example #35
0
        def fetch_MOD03_coordinates(start_time, aqua=False):
            import os.path
            from glob import glob
            from pyhdf.SD import SD
            from pyhdf.error import HDF4Error

            # Locate MOD03 file
            search_path = start_time.strftime(
                os.path.join(self.mod03_path, "MOD03.A%Y%j.%H%M.061*hdf"))
            if aqua:
                # NOTE: System dependent approximation
                search_path = search_path.replace("MOD", "MYD")
            try:
                mod03_file = glob(search_path)[0]
            except IndexError:
                raise FileNotFoundError("MOD03: " + search_path)

            # Read space-time grid from that file
            try:
                file_object = SD(mod03_file)
                dims = file_object.datasets()["Longitude"][1]
                count = dims[0] // 10, dims[1] // 10
                mod_lon = _get_hdf_data(file_object,
                                        "Longitude",
                                        start=(0, 2),
                                        count=count,
                                        stride=(10, 10))
                mod_lat = _get_hdf_data(file_object,
                                        "Latitude",
                                        start=(0, 2),
                                        count=count,
                                        stride=(10, 10))
                mod_time = _get_hdf_data(file_object,
                                         "EV start time",
                                         count=count[:1])
                file_object.end()
            except HDF4Error:
                raise IOError("Corrupted file: " + mod03_file)

            return mod_lon, mod_lat, mod_time
Example #36
0
class SNOW2C():
    def __init__(self, filename):

        self.filename = filename
        #        file_name = 'MYD06_L2.A2007219.2010.006.2014053202546.hdf'
        self.file = SD(self.filename, SDC.READ)

        datasets_dic = self.file.datasets()

        print(datasets_dic.keys())

        self.SWC = self.__getitem__("snow_water_content")
        self.R = self.__getitem__("snowfall_rate")
        self.logN0 = self.__getitem__("log_N0")
        self.loglambda = self.__getitem__("log_lambda")
        self.height = self.__getitem__("Height")

    def __getitem__(self, key):

        sds_obj = self.file.select(key)  # select sds
        data = sds_obj.get()
        return data
Example #37
0
    def get_geolocation(self):
        # Open file.

        hdf = SD(self.file_path, SDC.READ)

        # List available SDS datasets.
        datasets = hdf.datasets()

        # Read dataset.
        #DATAFIELD_NAME='RelHumid_A'
        #data3D = hdf.select(DATAFIELD_NAME)
        #data = data3D[11,:,:]

        # Read geolocation dataset.
        try:
            lat = hdf.select('Latitude')
            latitude = lat[:,:].flatten()
            lon = hdf.select('Longitude')
            longitude = lon[:,:].flatten()
            return (latitude, longitude)
        except HDF4Error:
            return None
Example #38
0
def hdf4ls(filename):
    """
    Read the SDS datasets and global attributes of an hdf4 file

    Parameters
    ----------

    filename: str or Path object
        hdf4 file to read

    Returns
    -------
 
    prints the metadata

    """
    the_file = SD(str(filename), SDC.READ)
    stars = '*' * 50
    print((f'\n{stars}\nReading {filename}\n'
           f'\nnnumber of datasets, number of attributes\n'
           f'={the_file.info()}\n{stars}\n'))
    datasets_dict = the_file.datasets()
    print(f'\n{stars}\nHere are the datasets\n{stars}\n')
    for idx, sds in enumerate(datasets_dict.keys()):
        out = the_file.select(sds)
        dims = [item for item in out.dimensions().values()]
        print(f'{stars}\n')
        print(idx, sds, dims)
        pprint.pprint(out.attributes().keys())
        print(f'{stars}\n\n')

    print(f'\n{stars}\nHere are the truncated global attributes\n{stars}\n')
    for key, value in the_file.attributes().items():
        print_value = str(value)
        if len(print_value) > 100:
            print_value = print_value[:100]
        print(f'Key: {key} --- Value: {print_value}')
Example #39
0
def list_datasets(FILE_NAME):
    # Open file.
    hdf = SD(FILE_NAME, SDC.READ)
    # List available SDS datasets.
    print(hdf.datasets())
Example #40
0
def addHdf2bin(hdfFilepath, bfpath, period, startyear, lineMin, lineMax, sampMin, sampMax):
	'''Adds an HDF to a binary file and return its path'''
	
	
	# Dictionary used to convert from a numeric data type to its symbolic representation - http://pysclint.sourceforge.net/pyhdf/pyhdf.SD.html
	typeTab = {
		SDC.CHAR:    'CHAR',
		SDC.CHAR8:   'CHAR8',
		SDC.UCHAR8:  'UCHAR8',
		SDC.INT8:    'INT8',
		SDC.UINT8:   'UINT8',
		SDC.INT16:   'INT16',
		SDC.UINT16:  'UINT16',
		SDC.INT32:   'INT32',
		SDC.UINT32:  'UINT32',
		SDC.FLOAT32: 'FLOAT32',
		SDC.FLOAT64: 'FLOAT64'
	}

	#TODO:Not yet tested to work with SciDB: CHAR, CHAR8, UCHAR8, INT8, UINT8, INT32, UINT32, FLOAT32, FLOAT64
	typeTab2 = {
		'CHAR': 'c',
		'CHAR8': 'b',
		'UCHAR8': 'B',
		'INT8': 'h',
		'UINT8': 'H',
		'INT16': 'h',
		'UINT16': 'H',
		'INT32': 'l',
		'UINT32': 'L',
		'FLOAT32': 'f',
		'FLOAT64': 'd'
	}	

	try:
		path, filename = os.path.split(hdfFilepath)
		hdf = SD(hdfFilepath, SDC.READ)
		ds = hdf.datasets()
		banddict = {} # band values
		banddatatype = {}
		bandindex = {}# band index in the file
		bandres = {} # band resolution e.g. (4800, 4800)
		for k in ds.keys():
			banddict[k] = hdf.select(k)
			banddatatype[k] = typeTab2[typeTab[ds[k][2]]]
			bandindex[ds[k][3]] = k
			bandres[k] = ds[k][1]
		sortedbandindexkeys = sorted(bandindex.keys())
		#Get the temporal index
		dateDOY = filename[9:16]
		timid = date2grid(dateDOY, period, startyear)
		#Get the spatial indexes
		tile = filename[17:23]
		#Test: All the bands have the same resolution
		resolution = 0
		firsttime = True
		for k in bandres:
			if bandres[k][0] != bandres[k][1]:
				raise Exception('Band resolution mismatch')
			if firsttime:
				firsttime = False
				resolution = bandres[k][0]
		llid = tile2grid(tile, resolution)
		deltalonid = llid['lonid']
		deltalatid = llid['latid']
		bfile = open(bfpath, "ab")
		for i in range(lineMin, lineMax + 1):
			rowdict = {}
			for k in banddict.keys():
				rowdict[k] = banddict[k][i, ]
			latid = deltalatid + i
			for j in range(sampMin, sampMax + 1):
				lonid = deltalonid + j
				idx = [lonid + (latid * pow(10, 6)) +  (timid * pow(10, 11))]# Encodes the indexes in a single value
				idxa = array('L', idx)
				#print str(lonid) + " "  + str(latid) + " " + str(timid) + " " + str(idx)
				idxa.tofile(bfile) # Writes the coordinates to the file
				#Get values
				for k in sortedbandindexkeys:
					bandname = bandindex[k]
					val = [rowdict[bandname][j]]
					vala = array(banddatatype[bandname], val)
					vala.tofile(bfile) # Writes the band value to the file
		bfile.close()
	except IOError as e:
		logging.exception("IOError:\n" + str(e.message) + " " + hdfFilepath)
	except:
		e = sys.exc_info()[0]
		logging.exception("Unknown exception:\n" + str(e.message) + " " + hdfFilepath)
	return bfpath
Example #41
0
def load_hdf_sd(FILE_NAME):
    """
    Name:

        load_hdf_sd
    
    Purpose:

        to load everyything in a hdf file using the SD protocol instead of GDAL
        makes nans out of Missing_value and _FilValue. Scales the values by the scale_factor
    
    Calling Sequence:

        dat,dat_dict = load_hdf_sd(FILE_NAME) 
    
    Input: 
  
        FILE_NAME: path and name of hdf file
    
    Output:

        dat: dictionary with numpy array of values
        dat_dict: dictionary with dictionaries of attributes for each read value
    
    Keywords: 

       none
    
    Dependencies:

        numpy
        pyhdf, SDC, SD
    
    Required files:
   
        dat file
    
    Example:

        ...
        
    Modification History:
    
        Written (v1.0): Samuel LeBlanc, 2015-05-13, NASA Ames
        Modified (v1.1): by Samuel LeBlanc, 2015-07-01, NASA Ames, Happy Canada Day!
                        - added Fill value keyword selection
                        - added scale factor and add offset
        
    """
    import numpy as np
    from pyhdf.SD import SD, SDC
    print 'Reading file: '+FILE_NAME
    hdf = SD(FILE_NAME, SDC.READ)
    dat = dict()
    dat_dict = dict()
    for name in hdf.datasets().keys():
        print '  '+name+': %s' % (hdf.datasets()[name],)
        dat[name] = hdf.select(name)[:]
        dat_dict[name] = hdf.select(name).attributes()
        try:
            scale_factor = dat_dict[name].get('scale_factor')
            if not scale_factor:
                scale_factor = 1.0
            dat[name] = dat[name]*scale_factor
            try:
                dat[name][dat[name] == dat_dict[name].get('missing_value')*scale_factor] = np.nan
            except TypeError:
                print 'No missing_value on '+name
            try:
                dat[name][dat[name] == dat_dict[name].get('_FillValue')*scale_factor] = np.nan
            except TypeError:
                print 'No FillValue on '+name
            add_offset = dat_dict[name].get('add_offset')
            if not add_offset:
                add_offset = 0
            dat[name] = dat[name] + add_offset
        except:
            print 'Problem in filling with nans and getting the offsets, must do it manually'
    return dat, dat_dict
Example #42
0
class HDFEOSBaseFileReader(BaseFileHandler):
    """Base file handler for HDF EOS data for both L1b and L2 products."""
    def __init__(self, filename, filename_info, filetype_info):
        """Initialize the base reader."""
        BaseFileHandler.__init__(self, filename, filename_info, filetype_info)
        try:
            self.sd = SD(self.filename)
        except HDF4Error as err:
            error_message = "Could not load data from file {}: {}".format(
                self.filename, err)
            raise ValueError(error_message)

        # Read metadata
        self.metadata = self.read_mda(self.sd.attributes()['CoreMetadata.0'])
        self.metadata.update(
            self.read_mda(self.sd.attributes()['StructMetadata.0']))
        self.metadata.update(
            self.read_mda(self.sd.attributes()['ArchiveMetadata.0']))

    @staticmethod
    def read_mda(attribute):
        """Read the EOS metadata."""
        lines = attribute.split('\n')
        mda = {}
        current_dict = mda
        path = []
        prev_line = None
        for line in lines:
            if not line:
                continue
            if line == 'END':
                break
            if prev_line:
                line = prev_line + line
            key, val = line.split('=')
            key = key.strip()
            val = val.strip()
            try:
                val = eval(val)
            except NameError:
                pass
            except SyntaxError:
                prev_line = line
                continue
            prev_line = None
            if key in ['GROUP', 'OBJECT']:
                new_dict = {}
                path.append(val)
                current_dict[val] = new_dict
                current_dict = new_dict
            elif key in ['END_GROUP', 'END_OBJECT']:
                if val != path[-1]:
                    raise SyntaxError
                path = path[:-1]
                current_dict = mda
                for item in path:
                    current_dict = current_dict[item]
            elif key in ['CLASS', 'NUM_VAL']:
                pass
            else:
                current_dict[key] = val
        return mda

    @property
    def start_time(self):
        """Get the start time of the dataset."""
        date = (self.metadata['INVENTORYMETADATA']['RANGEDATETIME']
                ['RANGEBEGINNINGDATE']['VALUE'] + ' ' +
                self.metadata['INVENTORYMETADATA']['RANGEDATETIME']
                ['RANGEBEGINNINGTIME']['VALUE'])
        return datetime.strptime(date, '%Y-%m-%d %H:%M:%S.%f')

    @property
    def end_time(self):
        """Get the end time of the dataset."""
        date = (self.metadata['INVENTORYMETADATA']['RANGEDATETIME']
                ['RANGEENDINGDATE']['VALUE'] + ' ' +
                self.metadata['INVENTORYMETADATA']['RANGEDATETIME']
                ['RANGEENDINGTIME']['VALUE'])
        return datetime.strptime(date, '%Y-%m-%d %H:%M:%S.%f')

    def _read_dataset_in_file(self, dataset_name):
        if dataset_name not in self.sd.datasets():
            error_message = "Dataset name {} not included in available datasets {}".format(
                dataset_name, self.sd.datasets())
            raise KeyError(error_message)

        dataset = self.sd.select(dataset_name)
        return dataset

    def load_dataset(self, dataset_name):
        """Load the dataset from HDF EOS file."""
        from satpy.readers.hdf4_utils import from_sds

        dataset = self._read_dataset_in_file(dataset_name)
        fill_value = dataset._FillValue
        dask_arr = from_sds(dataset, chunks=CHUNK_SIZE)
        dims = ('y', 'x') if dask_arr.ndim == 2 else None
        data = xr.DataArray(dask_arr, dims=dims, attrs=dataset.attributes())

        # preserve integer data types if possible
        if np.issubdtype(data.dtype, np.integer):
            new_fill = fill_value
        else:
            new_fill = np.nan
            data.attrs.pop('_FillValue', None)
        good_mask = data != fill_value

        scale_factor = data.attrs.get('scale_factor')
        if scale_factor is not None:
            data = data * scale_factor

        data = data.where(good_mask, new_fill)
        return data
Example #43
0
# -*- coding: utf-8 -*-

import os
import matplotlib as mpl
import matplotlib.pyplot as plt
from mpl_toolkits.basemap import Basemap
import numpy as np
from pyhdf.SD import SD, SDC

# Open file.
FILE_NAME = 'AIRS.2002.08.01.L3.RetStd_H031.v4.0.21.0.G06104133732.hdf'
hdf = SD(FILE_NAME, SDC.READ)

# List available SDS datasets.
print hdf.datasets()

# Read dataset.
DATAFIELD_NAME='RelHumid_A'
data3D = hdf.select(DATAFIELD_NAME)
data = data3D[11,:,:]

# Read geolocation dataset.
lat = hdf.select('Latitude')
latitude = lat[:,:]
lon = hdf.select('Longitude')
longitude = lon[:,:]

m = Basemap(projection='cyl', resolution='l', llcrnrlat=-90, urcrnrlat = 90, llcrnrlon=-180, urcrnrlon = 180)
m.drawcoastlines(linewidth=0.5)
m.drawparallels(np.arange(-90., 120., 30.), labels=[1, 0, 0, 0])
m.drawmeridians(np.arange(-180., 181., 45.), labels=[0, 0, 0, 1])
Example #44
0
def readTolnetH4(fname):
    d = {}
    profileDicts = []
    h4 = SD(fname, SDC.READ)
    #  get available records in hdf (you want this, sadly, because nobody calls o3 mixing ratio the same thing,
    # and in one case time isn't even called the same thing).
    availableSDS = list(h4.datasets().keys())

    d['ALT'] = np.asarray(h4get(h4, 'ALTITUDE'))
    d['Elevation'] = np.asarray(h4get(h4, "ALTITUDE.INSTRUMENT"))

    #annoyingly time is different in one case, so we have ugly "ifs" here...
    if 'DATETIME.START' in availableSDS:
        d['startTime'] = getDatetimeFromMJD(
            np.asarray(h4get(h4, "DATETIME.START")))
    elif 'DATETIME' in availableSDS:
        d['startTime'] = getDatetimeFromMJD(np.asarray(h4get(h4, "DATETIME")))
    else:
        sys.exit("Sorry. Time isn't even in this file: {}".format(fname))

    # again for the FTIR Instruments, they only give one time
    if 'DATETIME.STOP' in availableSDS:
        d['endTime'] = getDatetimeFromMJD(
            np.asarray(h4get(h4, "DATETIME.STOP")))
    elif 'DATETIME' in availableSDS:
        d['endTime'] = getDatetimeFromMJD(np.asarray(h4get(h4, "DATETIME")))

    d['dT'] = np.asarray(h4get(h4, "INTEGRATION.TIME"))
    d['Latitude'] = np.asarray(h4get(h4, "LATITUDE.INSTRUMENT"))
    d['Longitude'] = np.asarray(h4get(h4, "LONGITUDE.INSTRUMENT"))

    # This gets fun, because everyone decided to call mixing ratio something slightly different.
    if 'O3.MIXING.RATIO.VOLUME_DERIVED' in availableSDS:
        d['O3MR'] = np.asarray(h4get(h4, "O3.MIXING.RATIO.VOLUME_DERIVED"))
    elif 'O3.MIXING.RATIO.VOLUME_EMISSION' in availableSDS:
        d['O3MR'] = np.asarray(h4get(h4, "O3.MIXING.RATIO.VOLUME_EMISSION"))
    elif 'O3.MIXING.RATIO.VOLUME_ABSORPTION.SOLAR' in availableSDS:
        d['O3MR'] = np.asarray(
            h4get(h4, "O3.MIXING.RATIO.VOLUME_ABSORPTION.SOLAR"))
    else:
        sys.exit("Weird file:{} No ozone information.".format(fname))

    # same for uncertianty, although I haven't used it yet.
    if 'O3.MIXING.RATIO.VOLUME_DERIVED_UNCERTAINTY.COMBINED.STANDARD' in availableSDS:
        d['O3MRUncert'] = np.asarray(
            h4get(
                h4,
                "O3.MIXING.RATIO.VOLUME_DERIVED_UNCERTAINTY.COMBINED.STANDARD")
        )
    elif 'O3.MIXING.RATIO.VOLUME_EMISSION_UNCERTAINTY.COMBINED.STANDARD' in availableSDS:
        d['O3MRUncert'] = np.asarray(
            h4get(
                h4,
                "O3.MIXING.RATIO.VOLUME_EMISSION_UNCERTAINTY.COMBINED.STANDARD"
            ))
    elif 'O3.COLUMN_ABSORPTION.SOLAR_UNCERTAINTY.SYSTEMATIC.STANDARD' in availableSDS:
        d['O3MRUncert'] = np.asarray(
            h4get(
                h4,
                "O3.COLUMN_ABSORPTION.SOLAR_UNCERTAINTY.SYSTEMATIC.STANDARD"))
    else:
        sys.exit(
            "Weird file:{} No ozone uncertianty information.".format(fname))

    # if it's a lidar get number density.

    if 'O3.NUMBER.DENSITY_ABSORPTION.DIFFERENTIAL' in availableSDS:
        d['O3ND'] = np.asarray(
            h4get(h4, "O3.NUMBER.DENSITY_ABSORPTION.DIFFERENTIAL"))

    d['Press'] = np.asarray(h4get(h4, "PRESSURE_INDEPENDENT"))
    d['Temp'] = np.asarray(h4get(h4, "TEMPERATURE_INDEPENDENT"))
    if (len(d['Press'].shape) == 2):
        if (d['Press'][0, 0] < d['Press'][0, 1]): flipIt = True
        else: flipIt = False
    else:
        if (d['Press'][0] < d['Press'][1]): flipIt = True
        else: flipIt = False

    dims = d['O3MR'].shape
    if (len(dims) > 1):
        nProfiles, nLevels = dims[0], dims[1]
        for i in range(0, nProfiles):
            dd = {}
            dd['startTime'] = d['startTime'][i]
            dd['endTime'] = d['endTime'][i]
            if (flipIt): dd['O3MR'] = d['O3MR'][i, ::-1]
            else: dd['O3MR'] = d['O3MR'][i, :]
            if ('O3ND' in list(d.keys())):
                if (flipIt): dd['O3ND'] = d['O3ND'][i, ::-1]
                else: dd['O3ND'] = d['O3ND'][i, :]
            if (flipIt): dd['Press'] = d['Press'][i, ::-1]
            else: dd['Press'] = d['Press'][i, :]
            if (flipIt): dd['Temp'] = d['Temp'][i, ::-1]
            else: dd['Temp'] = d['Temp'][i, :]
            dd['Longitude'] = d['Longitude']
            dd['Latitude'] = d['Latitude']
            dd['Elevation'] = d['Elevation']
            profileDicts.append(dd)
    else:
        dd = {}
        dd['startTime'] = d['startTime']
        dd['endTime'] = d['endTime']
        if (flipIt): dd['O3MR'] = d['O3MR'][::-1]
        else: dd['O3MR'] = d['O3MR'][:]
        if ('O3ND' in list(d.keys())):
            if (flipIt): dd['O3ND'] = d['O3ND'][::-1]
            else: dd['O3ND'] = d['O3ND'][:]
        if (flipIt): dd['Press'] = d['Press'][::-1]
        else: dd['Press'] = d['Press']
        if (flipIt): dd['Temp'] = d['Temp'][::-1]
        else: dd['Temp'] = d['Temp']
        dd['Longitude'] = d['Longitude']
        dd['Latitude'] = d['Latitude']
        dd['Elevation'] = d['Elevation']
        profileDicts.append(dd)

    # because they had to be different..same as ".close()" would be for any other api in the universe.
    h4.end()
    return profileDicts
Example #45
0
import numpy as np
from pyhdf.SD import SD, SDC, SDS
import sys



_file = 'C:\Users\AKO NA LNG\Desktop\Esquivel Files\School Files 2\Special Problem\MODIS\MOD06\MOD06_L2.A2015348.0215.006.2015348152515.hdf'
mod06 = SD(_file, SDC.READ)



#List SDS


ds = mod06.datasets()
ds_lst = ds.keys()

for i in range(len(ds_lst)):
	print ds_lst[i]



#Metadata for Cloud Optical Thickness
sds_name = "Cloud_Optical_Thickness"
sds = mod06.select(sds_name)

data = sds.attributes(full=1)
data_keys_lst = data.keys()
print "\n"
print "**************************************\n"
print "CLOUD OPTICAL THICKNESS METADATA\n"
import matplotlib.pyplot as plt
from matplotlib.colors import from_levels_and_colors
from matplotlib.colors import ListedColormap
from mpl_toolkits.basemap import Basemap, cm
from pyhdf.SD import SD, SDC

file_name = '/Users/wusun/Dropbox/Projects/Regional COS flux/data/modis_igbp_class/2012/' + \
  'US_CA/MCD12Q1.A2012001.h08v05.051.2014288195944.hdf'
lat_name = '/Users/wusun/Dropbox/Projects/Regional COS flux/data/modis_igbp_class/2012/' + \
  'US_CA/MCD12Q1.A2012001.h08v05.051.2014288195944.lat'
lon_name = '/Users/wusun/Dropbox/Projects/Regional COS flux/data/modis_igbp_class/2012/' + \
  'US_CA/MCD12Q1.A2012001.h08v05.051.2014288195944.lon'
hdf = SD(file_name, SDC.READ)

# list available SDS datasets
print hdf.datasets()

datafield_name = 'Land_Cover_Type_1'
hdf_data = hdf.select(datafield_name)
lc_type1 = hdf_data[:,:].astype(np.uint8)

# read lat-lon coordinates
lat = np.genfromtxt(lat_name, delimiter=',', usecols=[0])
lat = lat.reshape(lc_type1.shape)

lon = np.genfromtxt(lon_name, delimiter=',', usecols=[0])
lon = lon.reshape(lc_type1.shape)

fig = plt.figure(figsize=(12,6))
m = Basemap(projection='cea',llcrnrlat=32,urcrnrlat=42,\
	llcrnrlon=-125,urcrnrlon=-114,lat_ts=30,resolution='h')
Example #47
0
fmt_suffix = ".h5"
workFileName = "sketchG." + modis_base + modis_item + fmt_suffix

key_across = 'Cell_Across_Swath_1km:mod05'
key_along = 'Cell_Along_Swath_1km:mod05'

hdf = SD(dataPath + modis_filename, SDC.READ)
ds_wv_nir = hdf.select('Water_Vapor_Near_Infrared')
data = ds_wv_nir.get()

# MODIS_Swath_Type_GEO/Geolocation_Fields/
# Latitude

hdf_geo = SD(dataPath + modis_geofilename, SDC.READ)
print('hg info: ', hdf_geo.info())
for idx, sds in enumerate(hdf_geo.datasets().keys()):
    print(idx, sds)
# hdf_geo_ds = hdf_geo.select['']

# hdf_geo_swath     = hdf_geo.select('MODIS_Swath_Type_GEO')
# hdf_geo_swath_gf  = hdf_geo_swath['Geolocation_Fields']
hdf_geo_lat = hdf_geo.select('Latitude').get()
hdf_geo_lon = hdf_geo.select('Longitude').get()
print('hgl type  ', type(hdf_geo_lat))
print('hgl shape ', hdf_geo_lat.shape, hdf_geo_lon.shape)
print('hgl dtype ', hdf_geo_lat.dtype)
# exit()

add_offset = ds_wv_nir.attributes()['add_offset']
scale_factor = ds_wv_nir.attributes()['scale_factor']
print('scale_factor = %f, add_offset = %f.' % (scale_factor, add_offset))
Example #48
0
def load(satscene, **kwargs):
    """Read data from file and load it into *satscene*.  Load data into the
    *channels*. *Channels* is a list or a tuple containing channels we will
    load data into. If None, all channels are loaded.
    """    
    del kwargs

    conf = ConfigParser()
    conf.read(os.path.join(CONFIG_PATH, satscene.fullname + ".cfg"))
    options = {}
    for option, value in conf.items(satscene.instrument_name+"-level3",
                                    raw = True):
        options[option] = value

    pathname = os.path.join(options["dir"], options['filename'])    
    filename = satscene.time_slot.strftime(pathname)
    
    for prodname in GEO_PHYS_PRODUCTS + FLAGS_QUALITY:
        if prodname in satscene.channels_to_load:
            
            prod_chan = ModisEosHdfLevel2(prodname)
            prod_chan.read(filename)
            prod_chan.satid = satscene.satname.capitalize()
            prod_chan.resolution = 1000.0
            prod_chan.shape = prod_chan.data.shape

            # All this for the netCDF writer:
            prod_chan.info['var_name'] = prodname
            prod_chan.info['var_data'] = prod_chan.data
            resolution_str = str(int(prod_chan.resolution))+'m'
            prod_chan.info['var_dim_names'] = ('y'+resolution_str,
                                               'x'+resolution_str)
            prod_chan.info['long_name'] = prod_chan.attr['long_name'][:-1]
            try:
                prod_chan.info['standard_name'] = prod_chan.attr['standard_name'][:-1]
            except KeyError:
                pass
            valid_min = np.min(prod_chan.data)
            valid_max = np.max(prod_chan.data)
            prod_chan.info['valid_range'] = np.array([valid_min, valid_max])
            prod_chan.info['resolution'] = prod_chan.resolution

            if prodname == 'l2_flags':
                # l2 flags definitions
                for i in range(1, 33):
                    key =  "f%02d_name"%i
                    prod_chan.info[key] = prod_chan.attr[key][:-1]

            satscene.channels.append(prod_chan)
            if prodname in CHANNELS:
                satscene[prodname].info['units'] = '%'
            else:
                satscene[prodname].info['units'] = prod_chan.attr['units'][:-1]

            LOG.info("Loading modis lvl2 product '%s' done"%prodname)

    # Check if there are any bands to load:
    channels_to_load = False
    for bandname in CHANNELS:
        if bandname in satscene.channels_to_load:
            channels_to_load = True
            break

    if channels_to_load:
        #print "FILE: ", filename
        eoshdf = SD(filename)
        # Get all the Attributes:
        # Common Attributes, Data Time,
        # Data Structure and Scene Coordinates
        info = {}
        for key in eoshdf.attributes().keys():
            info[key] = eoshdf.attributes()[key]

        dsets = eoshdf.datasets()
        selected_dsets = []

        for bandname in CHANNELS:
            if (bandname in satscene.channels_to_load and
                bandname in dsets):

                value = eoshdf.select(bandname)
                selected_dsets.append(value)
        
                # Get only the selected datasets
                attr = value.attributes()
                band = value.get()

                nodata = attr['bad_value_scaled']
                mask = np.equal(band, nodata)
                satscene[bandname] = (np.ma.masked_where(mask, band) * 
                                      attr['slope'] + attr['intercept'])

                satscene[bandname].info['units'] = '%'
                satscene[bandname].info['long_name'] = attr['long_name'][:-1]

        for dset in selected_dsets:
            dset.endaccess()  

        LOG.info("Loading modis lvl2 Remote Sensing Reflectances done")
        eoshdf.end()


    lat, lon = get_lat_lon(satscene, None)

    from pyresample import geometry
    satscene.area = geometry.SwathDefinition(lons=lon, lats=lat)

    #print "Variant: ", satscene.variant 
    satscene.variant = 'regional' # Temporary fix!

    LOG.info("Loading modis data done.")
Example #49
0
class hdf (object):
    """wrapper for HDF4 dataset for comparison
    __call__ yields sequence of variable names
    __getitem__ returns individual variables ready for slicing to numpy arrays
    """
    
    _hdf = None
    
    def __init__(self, filename, allowWrite=False):
        
        if pyhdf is None:
            LOG.error('pyhdf is not installed and is needed in order to read hdf4 files')
            assert(pyhdf is not None)
        mode = SDC.READ
        if allowWrite:
            mode = mode | SDC.WRITE
        
        self._hdf = SD(filename, mode)
        self.attributeCache = CaseInsensitiveAttributeCache(self)

    def __call__(self):
        "yield names of variables to be compared"
        return self._hdf.datasets().keys()
    
    # this returns a numpy array with a copy of the full, scaled
    # data for this variable, if the data type must be changed to allow
    # for scaling it will be (so the return type may not reflect the
    # type found in the original file)
    def __getitem__(self, name):
        # defaults
        scale_factor = 1.0
        add_offset = 0.0
        data_type = None 
        scaling_method = None
        
        # get the variable object and use it to
        # get our raw data and scaling info
        variable_object = self.get_variable_object(name)
        raw_data_copy = variable_object[:]
        try :
            # TODO, this currently won't work with geocat data, work around it for now
            scale_factor, scale_factor_error, add_offset, add_offset_error, data_type = SDS.getcal(variable_object)
        except HDF4Error:
            # load just the scale factor and add offset information by hand
            temp = self.attributeCache.get_variable_attributes(name)
            if ADD_OFFSET_STR in temp.keys() :
                add_offset = temp[ADD_OFFSET_STR]
                data_type = np.dtype(type(add_offset))
            if SCALE_FACTOR_STR in temp.keys() :
                scale_factor = temp[SCALE_FACTOR_STR]
                data_type = np.dtype(type(scale_factor))
            if SCALE_METHOD_STR in temp.keys() :
                scaling_method = temp[SCALE_METHOD_STR]
        SDS.endaccess(variable_object)
        
        # don't do lots of work if we don't need to scale things
        if (scale_factor == 1.0) and (add_offset == 0.0) :
            return raw_data_copy
        
        # at the moment geocat has several scaling methods that don't match the normal standards for hdf
        """
        please see constant.f90 for a more up to date version of this information:
            INTEGER(kind=int1) :: NO_SCALE              ! 0
            INTEGER(kind=int1) :: LINEAR_SCALE          ! 1
            INTEGER(kind=int1) :: LOG_SCALE             ! 2
            INTEGER(kind=int1) :: SQRT_SCALE            ! 3 
        """
        if (scaling_method == 0) :
            return raw_data_copy
        if not ((scaling_method is None) or (int(scaling_method) <= 1)) :
            LOG.warn ('Scaling method of \"' + str(scaling_method) + '\" will be ignored in favor of hdf standard method. '
                      + 'This may cause problems with data consistency')
        
        # if we don't have a data type something strange has gone wrong
        assert(not (data_type is None))
        
        # get information about where the data is the missing value
        missing_val = self.missing_value(name)
        missing_mask = np.zeros(raw_data_copy.shape, dtype=np.bool)
        missing_mask[raw_data_copy == missing_val] = True
        
        # create the scaled version of the data
        scaled_data_copy                = np.array(raw_data_copy, dtype=data_type)
        scaled_data_copy[~missing_mask] = (scaled_data_copy[~missing_mask] * scale_factor) + add_offset #TODO, type truncation issues?
        
        return scaled_data_copy 
    
    def get_variable_object(self, name):
        return self._hdf.select(name)
    
    def missing_value(self, name):
        
        return self.get_attribute(name, fillValConst1)
    
    def create_new_variable(self, variablename, missingvalue=None, data=None, variabletocopyattributesfrom=None):
        """
        create a new variable with the given name
        optionally set the missing value (fill value) and data to those given
        
        the created variable will be returned, or None if a variable could not
        be created
        """
        
        raise IOUnimplimentedError('Unable to create variable in hdf file, this functionality is not yet available.')
        
        return None
    
    def add_attribute_data_to_variable(self, variableName, newAttributeName, newAttributeValue) :
        """
        if the attribute exists for the given variable, set it to the new value
        if the attribute does not exist for the given variable, create it and set it to the new value
        """
        
        raise IOUnimplimentedError('Unable add attribute to hdf file, this functionality is not yet available.')
        
        return
    
    def get_variable_attributes (self, variableName, caseInsensitive=True) :
        """
        returns all the attributes associated with a variable name
        """
        
        toReturn = None
        if caseInsensitive :
            toReturn = self.attributeCache.get_variable_attributes(variableName)
        else :
            toReturn = self.get_variable_object(variableName).attributes()
        
        return toReturn
    
    def get_attribute(self, variableName, attributeName, caseInsensitive=True) :
        """
        returns the value of the attribute if it is available for this variable, or None
        """
        toReturn = None
        
        if caseInsensitive :
            toReturn = self.attributeCache.get_variable_attribute(variableName, attributeName)
        else :
            temp_attributes = self.get_variable_attributes(variableName, caseInsensitive=False)
            
            if attributeName in temp_attributes :
                toReturn = temp_attributes[attributeName]
        
        return toReturn
    
    def get_global_attributes(self, caseInsensitive=True) :
        """
        get a list of all the global attributes for this file or None
        """
        
        toReturn = None
        
        if caseInsensitive :
            toReturn = self.attributeCache.get_global_attributes()
        else :
            toReturn = self._hdf.attributes()
        
        return toReturn
    
    def get_global_attribute(self, attributeName, caseInsensitive=True) :
        """
        returns the value of a global attribute if it is available or None
        """
        
        toReturn = None
        
        if caseInsensitive :
            toReturn = self.attributeCache.get_global_attribute(attributeName)
        else :
            if attributeName in self._hdf.attributes() :
                toReturn = self._hdf.attributes()[attributeName]
        
        return toReturn
    
    def is_loadable_type (self, name) :
        """
        check to see if the indicated variable is a type that can be loaded
        """
        
        # TODO, are there any bad types for these files?
        return True
Example #50
0
def load_hdf_sd(FILE_NAME):
    """
    Name:

        load_hdf_sd
    
    Purpose:

        to load everyything in a hdf file using the SD protocol instead of GDAL
        makes nans out of Missing_value and _FilValue. Scales the values by the scale_factor
    
    Calling Sequence:

        dat,dat_dict = load_hdf_sd(FILE_NAME) 
    
    Input: 
  
        FILE_NAME: path and name of hdf file
    
    Output:

        dat: dictionary with numpy array of values
        dat_dict: dictionary with dictionaries of attributes for each read value
    
    Keywords: 

       none
    
    Dependencies:

        numpy
        pyhdf, SDC, SD
    
    Required files:
   
        dat file
    
    Example:

        ...
        
    Modification History:
    
        Written (v1.0): Samuel LeBlanc, 2015-05-13, NASA Ames
        Modified (v1.1): by Samuel LeBlanc, 2015-07-01, NASA Ames, Happy Canada Day!
                        - added Fill value keyword selection
                        - added scale factor and add offset
        
    """
    import numpy as np
    from pyhdf.SD import SD, SDC
    print 'Reading file: ' + FILE_NAME
    hdf = SD(FILE_NAME, SDC.READ)
    dat = dict()
    dat_dict = dict()
    for name in hdf.datasets().keys():
        print '  ' + name + ': %s' % (hdf.datasets()[name], )
        dat[name] = hdf.select(name)[:]
        dat_dict[name] = hdf.select(name).attributes()
        try:
            scale_factor = dat_dict[name].get('scale_factor')
            if not scale_factor:
                scale_factor = 1.0
            dat[name] = dat[name] * scale_factor
            try:
                dat[name][dat[name] == dat_dict[name].get('missing_value') *
                          scale_factor] = np.nan
            except TypeError:
                print 'No missing_value on ' + name
            try:
                dat[name][dat[name] == dat_dict[name].get('_FillValue') *
                          scale_factor] = np.nan
            except TypeError:
                print 'No FillValue on ' + name
            add_offset = dat_dict[name].get('add_offset')
            if not add_offset:
                add_offset = 0
            dat[name] = dat[name] + add_offset
        except:
            print 'Problem in filling with nans and getting the offsets, must do it manually'
    return dat, dat_dict
import pprint

fname = 'BRDF_DB.A2001001.h12v04.002.2005235120000.hdf'
workspace = '/home/arthur/Dropbox/projects/mcd43_nrt/'
os.chdir(workspace)

# 'BRDF_Model_ID', 'BRDF_Albedo_Quality',
sds_list = [
    'BRDF_Albedo_Parameters_S1', 'BRDF_Albedo_Parameters_S2',
    'BRDF_Albedo_Parameters_S3'
]

#

hdf_ds = SD(fname, SDC.WRITE)
datasets_dict = hdf_ds.datasets()

# print sds names
for idx, sds in enumerate(datasets_dict.keys()):
    print(idx, sds)

# select each parameter sds, make a np array full of fill values
for sds in sds_list:
    dataset_param = hdf_ds.select(sds)
    #pprint.pprint(dataset_param.attributes())
    data_np = dataset_param[:, :]
    fill_arr_param = np.full(data_np.shape, 32767, dtype='int16')
    dataset_param[:, :] = fill_arr_param
    dataset_param.endaccess()

# do the same as above for the model id sds, make a np array full of fill values
Example #52
0
#!/usr/bin/env python
# This is an example of how the pyhdf package can be used to access MODIS data
from pyhdf.SD import SD, SDC
import numpy as np
from hdf_functions import scaled_data, print_description

file_name = '../assets/example_data/MYD05_L2.A2017109.0000.006.NRT.hdf'

file = SD(file_name, SDC.READ)

print('Data size: ' + str(file.info()[0]) + ', ' + str(file.info()[1]))
datasets = file.datasets()

# Print data contents
print('\nData content:')
for key in datasets.keys():
    print('\n* ' + key)
    sds_obj = file.select(key)
    print_description(sds_obj)
    data = scaled_data(sds_obj)
    print(data)



Example #53
0
flags = ds.select('l2p_flags')
flags
flags = np.array(flags[:,:], dtype='uint16')
flags
plt.imshow(flags)
bit15_mask = np.bitwise_and(flags, bit15)>0
bit15 = np.array(np.ones(flags.shape)*(2**14), dtype='uint16')
bit16 = np.array(np.ones(flags.shape)*(2**15), dtype='uint16')
bit15_mask = np.bitwise_and(flags, bit15)>0
bit16_mask = np.bitwise_and(flags, bit16)>0
missing = np.logical_and(bit15_mask, bit16_mask)
missing
np.min(missing)
plt.imshow(missing)
newds = SD('testmissing.hdf', SDC.READ)
newds.datasets()
new = newds.select('missing_flag')
im2 = new[:,:]
plt.imshow(im2)
plt.imshow(im)
plt.imshow(flags)
plt.imshow(missing)
plt.imshow(missing)
plt.imshow(im2)
plt.imshow(missing)
cloud_flag = np.logical_and(bit16_mask, np.logical_not(bit15_mask))
plt.imshow(cloud_flag)
cloud_flag
plt.imshow(cloud_flag)
math = SD('outmath2', SDC.READ)
math = SD('outmath2.hdf', SDC.READ)
Example #54
0
class HDF4Store(xmlplot.common.VariableStore,xmlstore.util.referencedobject):

    class Variable(xmlplot.common.Variable):
        def __init__(self,store,hdfvar):
            xmlplot.common.Variable.__init__(self,store)
            self.hdfvar = hdfvar
            self.info = self.hdfvar.info()
            
        def getName_raw(self):
            return self.info[0]
            
        def getDimensions_raw(self):
            dimnames = []
            for idim in range(self.info[1]):
                dim = self.hdfvar.dim(idim)
                dimnames.append(dim.info()[0])
            return dimnames
            
        def getLongName(self):
            atts = self.getProperties()
            if 'long_name' in atts: return atts['long_name']
            return xmlplot.common.Variable.getLongName(self)

        def getUnit(self):
            atts = self.getProperties()
            if 'units' in atts: return atts['units']
            return xmlplot.common.Variable.getUnit(self)
            
        def getShape(self):
            shape = self.info[2]
            if isinstance(shape,int): shape = (shape,)
            return shape
            
        def getDataType(self):
            return datatypes.get(self.info[3],None)
            
        def getProperties(self):
            return self.hdfvar.attributes()
            
        def getSlice(self,bounds=None,dataonly=False,transfercoordinatemask=True):
            dimnames = self.getDimensions_raw()
            shape = self.getShape()

            # Determine final slice
            if bounds is None: bounds = (Ellipsis,)
            newbounds = []
            for bound,dimlen,dimname in zip(xmlplot.common.processEllipsis(bounds,shape),shape,dimnames):
                if isinstance(bound,int):
                    # Integer value provided as index.
                    assert bound>=-dimlen, 'Slice index %i lies below the lowest possible index for dimension %s (%i).' % (bound,dimname,-dimlen  )
                    assert bound<  dimlen, 'Slice index %i exceeds the highest possible index for dimension %s (%i).'   % (bound,dimname, dimlen-1)
                    if bound<0: bound += dimlen
                elif isinstance(bound,slice):
                    start,stop,step = bound.indices(dimlen)
                    bound = slice(start,stop,step)
                newbounds.append(bound)
            bounds = tuple(newbounds)

            # Get data
            dat = numpy.asarray(self.hdfvar[bounds])

            # Mask fill value
            fillvalue = self.hdfvar.attributes().get('_FillValue',None)
            if fillvalue is None: fillvalue = self.hdfvar.attributes().get('Fill',None)
            if fillvalue is not None: dat = numpy.ma.array(dat,mask=(dat==fillvalue),copy=False)

            # Determine scale factor and offset, and cast data to acommodating type if needed.
            scale  = self.hdfvar.attributes().get('scale_factor',None)
            offset = self.hdfvar.attributes().get('add_offset',  None)
            if scale is not None or offset is not None and dat.dtype!=numpy.float:
                dat = dat.astype(numpy.float)
            if scale  is not None: dat *= scale
            if offset is not None: dat += offset

            if dataonly: return dat

            newdimnames = [d for d,b in zip(dimnames,bounds) if not isinstance(b,int)]
            varslice = self.Slice(newdimnames)
            varslice.data = dat
            inewdim = 0
            for dimname,bound in zip(dimnames,bounds):
                 # Get the coordinate variable          
                coordvar = self.store.getVariable_raw(dimname)
                
                if coordvar is None:
                    # No coordinate variable available: use indices
                    if not isinstance(bound,slice): continue
                    coorddims = [dimname]
                    coords = numpy.arange(bound.start,bound.stop,bound.step,dtype=numpy.float)
                else:
                    # Coordinate variable present: use it.
                    coorddims = list(coordvar.getDimensions())

                    # Debug check: see if all coordinate dimensions are also used by the variable.
                    for cd in coorddims:
                        assert cd in dimnames, 'Coordinate dimension %s is not used by this variable (it uses %s).' % (cd,', '.join(dimnames))

                    # Get coordinate values
                    coordslice = [bounds[dimnames.index(cd)] for cd in coorddims]
                    coords = coordvar.getSlice(coordslice, dataonly=True)

                # Get the list of coordinate dimensions after the ones with single index have been sliced out.
                newcoorddims = [cd for cd in coorddims if isinstance(bounds[dimnames.index(cd)],slice)]

                # Transfer the coordinate mask to the data if desired.
                coordmask = numpy.ma.getmask(coords)
                if transfercoordinatemask and coordmask is not numpy.ma.nomask:
                    coordmask = xmlplot.common.broadcastSelective(coordmask,newcoorddims,dat.shape,newdimnames)
                    if datamask is numpy.ma.nomask:
                        datamask = coordmask
                    else:
                        datamask |= coordmask

                # If we take a single index for this dimension, it will not be included in the output.
                if not isinstance(bound,slice): continue
                
                # Coordinates should not have a mask - undo the masking.
                if coordmask is not numpy.ma.nomask:
                    coords = numpy.ma.getdata(coords)

                # Auto-generate staggered coordinates
                coords_stag = xmlplot.common.stagger(coords)

                # Insert data dimensions where they are lacking in coordinate
                coords      = xmlplot.common.broadcastSelective(coords,     (dimname,),dat.shape,               newdimnames)
                coords_stag = xmlplot.common.broadcastSelective(coords_stag,(dimname,),[l+1 for l in dat.shape],newdimnames)

                # Assign coordinate values
                varslice.coords     [inewdim] = coords
                varslice.coords_stag[inewdim] = coords_stag

                inewdim += 1

            return varslice

    def __init__(self,path):
        xmlplot.common.VariableStore.__init__(self)
        xmlstore.util.referencedobject.__init__(self)
        from pyhdf.SD import SD, SDC
        self.file = SD(str(path),SDC.READ)

    def getVariable_raw(self,varname):
        """Returns a Variable object for the given original short variable name.
        The method must be implemented by derived classes.
        """
        if varname not in self.file.datasets().keys(): return None
        return self.Variable(self,self.file.select(varname))

    def getVariableNames_raw(self):
        """Returns a list of original short names for all variables present in the store.
        The method must be implemented by derived classes.
        """
        return self.file.datasets().keys()

    def getProperties(self):
        return self.file.attributes()

    def unlink(self):
        self.file.end()
import os
import matplotlib as mpl
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import numpy as np
from cartopy.mpl.gridliner import LONGITUDE_FORMATTER, LATITUDE_FORMATTER
FILE_NAME = 'MOD08_D3.A2010001.006.2015041224130.hdf'
DATAFIELD_NAME = 'Cloud_Fraction_Mean'

from pyhdf.SD import SD, SDC
hdf = SD(FILE_NAME, SDC.READ)


print(hdf.info())

datasets_dic = hdf.datasets()

for idx,sds in enumerate(datasets_dic.keys()):
    print(idx,sds)

# Read dataset.
data_raw = hdf.select(DATAFIELD_NAME)
data = data_raw[:,:].astype(np.double)

# Read lat/lon.
xdim = hdf.select('XDim')
lon = xdim[:].astype(np.double)

ydim = hdf.select('YDim')
lat = ydim[:].astype(np.double)
Example #56
0
class HDF_SDS(object):
    """
    This class is used in place of the pyhdf.SD.SDS class to allow the file contents to be loaded at a later time
    rather than in this module read method (so that we can close the SD instances and free up file handles)
    """

    _sd = None
    _sds = None
    _filename = None
    _variable = None

    def __init__(self, filename, variable):
        self._filename = filename
        self._variable = variable

    def _open_sds(self):
        """
        Open the SDS file for reading
        """
        from pyhdf.SD import SD as SDS

        self._sd = SDS(self._filename)
        self._sds = self._sd.select(self._variable)

    def _close_sds(self):
        """
        Close the SDS file for reading

        NB: Exceptions thrown from here may hide an exception thrown in get(), info(), etc.
        """
        try:
            if self._sds is not None:
                self._sds.endaccess()
        finally:
            if self._sd is not None:
                self._sd.end()

    def get(self):
        """
        Call pyhdf.SD.SDS.get(), opening and closing the file
        """
        try:
            self._open_sds()
            data = self._sds.get()
            return data
        finally:
            self._close_sds()

    def attributes(self):
        """
        Call pyhdf.SD.SDS.attributes(), opening and closing the file
        """
        try:
            self._open_sds()
            attributes = self._sds.attributes()
            return attributes
        finally:
            self._close_sds()

    def info(self):
        """
        Call pyhdf.SD.SDS.info(), opening and closing the file
        """
        try:
            self._open_sds()
            info = self._sds.info()
            return info
        finally:
            self._close_sds()

    def dimensions(self):
        """
        Call pyhdf.SD.SDS.dimensions(), opening and closing the file
        """
        from collections import OrderedDict
        try:
            self._open_sds()
            var_description = self._sd.datasets()[self._variable]
            return OrderedDict(zip(var_description[0], var_description[1]))
        finally:
            self._close_sds()
    JCZ1 = [
        JCZ_file["经度"][i], JCZ_file["纬度"][i],
        JCZ_file["城市"][i] + "-" + JCZ_file["监测点名称"][i]
    ]
    exec(
        'JCZ%s = [JCZ_file["经度"][i],JCZ_file["纬度"][i],JCZ_file["城市"][i]+"-"+JCZ_file["监测点名称"][i]]'
        % i)
    exec("JCZ.append(JCZ%s)" % i)
print("监测站总数", len(JCZ), "个")
#文件读取HDF文件
aod_outcome_list = []
for hdf in file_dir:
    HDF_FILR_URL = hdf
    file = SD(HDF_FILR_URL)
    # print(file.info())
    datasets_dic = file.datasets()
    '''
    #输出数据集名称
    for idx, sds in enumerate(datasets_dic.keys()):
        print(idx, sds)
    '''
    sds_obj1 = file.select('Longitude')  #选择经度
    sds_obj2 = file.select('Latitude')  #选择纬度
    sds_obj3 = file.select(
        'Optical_Depth_Land_And_Ocean')  #AOD数据集,反演质量最高的AOD集;另一个质量低,覆盖区域多
    longitude = sds_obj1.get()  # 读取数据
    latitude = sds_obj2.get()
    aod = sds_obj3.get()
    longitude = pd.DataFrame(longitude)  #格式转换
    latitude = pd.DataFrame(latitude)
    aod = pd.DataFrame(aod)
Example #58
0
def modis_oc(file_name,
             limit=None,
             flagLevel=None,
            param='sst'):
#  ;Example of setting up flag level for non SST images
#  ;--> cf http://oceancolor.gsfc.nasa.gov/DOCS/Ocean_Level-2_Data_Products.pdf
#  flagtable=REPLICATE(0B,32) ;32 bits flag
#  ;flaglevel[[0,1,2,3,4,5,8,9,10,12,14,15,16,17,18,19,21,22,23,25,26,28,29]]=1B ;RQ: remove 1 for table correspondance in documentation (table 3)
#  
#  IF (~exist(sfglevel)) THEN sfglevel=1
#  
#  case (flaglevel) of
#    1: flagTable[[  1,  3,  5,            15,16,      19,         25,26]]=1B
#    2: flagTable[[0,1,  3,4,5,8,9,10,  14,15,16,      19,21,22,23,25,26]]=1B
#    3: flagTable[[0,1,  3,4,5,8,9,10,  14,15,16,      19,21,22,23,25,26]]=1B
#    else: flagTable[[0,1,  3,4,5,8,9,10,  14,15,16,      19,21,22,23,25,26]]=1B
#  endcase
#
#;   flaglevel[[  1,  3,  5,       12,   15,16,      19,         25,26]]=1B ;low value flag
#;  ;flaglevel[[0,1,  3,4,5,8,9,10,12,14,15,16,      19,21,22,23,25,26]]=1B ;operational flag (strong removal)
    #Read MODIS HDF4 data
    f = SD(file_name, SDC.READ)
    fattr=f.attributes()

    #Load coordinates
    lon=f.select('longitude')
    lat=f.select('latitude')

    #Shrink image
    #############
    
    
    #scene dimensions
    
    info_sst=f.datasets()[param]
    dnames=info_sst[0]
    d=info_sst[1]
    
    lonvec=lon.get().reshape(d[0]*d[1])
    latvec=lat.get().reshape(d[0]*d[1])
    
    
    #Get points within domain
    if limit is not None :
       indvec,flagvec=in_limits(lonvec, latvec, limit)
    
    flagmat=flagvec.reshape(d[0],d[1])
    rowsum=np.sum(flagmat, 0)
    colsum=np.sum(flagmat, 1)
    yflag=rowsum >= 1
    xflag=colsum >= 1
    xid=np.arange(d[0])
    xid=xid.compress(xflag)
    xcnt=int(xid.size)
    xst=int(xid.min())
    yid=np.arange(d[1])
    yid=yid.compress(yflag)
    ycnt=int(yid.size)
    yst=int(yid.min())

    #Shrink lon & lat
    lon_var=lon.get(start=[xst,yst], count=[xcnt,ycnt])
    lat_var=lat.get(start=[xst,yst], count=[xcnt,ycnt])
    
    
    #Load SST image
    ###############
    sst=f.select(param)
    attr=sst.attributes()
    slope=attr['slope']
    intercept=attr['intercept']
    flagValue=attr['bad_value_scaled']
    sst_var=sst.get(start=[xst,yst], count=[xcnt,ycnt]) #Shrink sst image
    
    #Compute mask
    fg=f.select('qual_'+param)
    fg_var=fg.get(start=[xst,yst], count=[xcnt,ycnt])
    
    if flagLevel is None :
        mask=sst_var == flagValue
    else :
        mask= (sst_var == flagValue) | (fg_var >= flagLevel)
    
    sst_var=np.ma.masked_array(sst_var*slope + intercept, mask=mask,type=float)
Example #59
0
from pyhdf.SD import SD, SDC
import pprint
import numpy as np
from matplotlib import pyplot as plt

file_name = "F:/0nti_modis/MODIS/20191125_101530_AQUA_MOD021KM.hdf"
print(f'reading {file_name}')
the_file = SD(file_name, SDC.READ)
stars = '*' * 50
print((f'\n{stars}\nnumber of datasets, number of attributes'
       f'={the_file.info()}\n{stars}\n'
       f'\nHere is the help file for the info funtion:\n'))
help(SD.info)

datasets_dict = the_file.datasets()

for idx, sds in enumerate(datasets_dict.keys()):
    print(idx, sds)

longwave_data = the_file.select('EV_1KM_Emissive')  # select sds
print(longwave_data.info())
help(longwave_data.info)

data_row = longwave_data[0, 0, :]  # get sds data
print(data_row.shape, data_row.dtype)

longwave_data[0, :, :]

pprint.pprint(longwave_data.attributes())
Example #60
0
def modis_sst(file_name,
              limit=None,
              flagLevel=None,
              param='sst'):
    
    #Setup flag tables for L2 data
    flagTable=np.zeros(32,dtype=bool)
    if flagLevel == 0 : flagTable[[1]]=True
    elif flagLevel == 1 :flagTable[[  1,  3,  5,            15,16,      19,         25,26]]=True
    elif flagLevel == 2 : flagTable[[0,1,  3,4,5,8,9,10,  14,15,16,      19,21,22,23,25,26]]=True
    elif flagLevel >= 3 : flagTable[[0,1,  3,4,5,8,9,10,  14,15,16,      19,21,22,23,25,26]]=True
    flags=np.where(flagTable)[0]

    #Read MODIS HDF4 data
    f = SD(file_name, SDC.READ)
    fattr=f.attributes()
    
    #Get dimensions
    nScans=fattr['Number of Scan Lines']
    sCtl=fattr['Number of Scan Control Points']
    pCtl=fattr['Number of Pixel Control Points']
    nPix=fattr['Pixels per Scan Line']

    #Load coordinates 
    #UPDATE THIS SECTION TO ALLOW CLEAN LOAD OF THE IMAGE...
    lonsel=f.select('longitude')
    latsel=f.select('latitude')
#    
#    pCtl_ind=np.arange(pCtl,dtype=np.float32)
#    p_ind=(pCtl-1)*np.arange(nPix,dtype=np.float32)/(nPix-1)
#    
#    sCtl_ind=np.arange(sCtl,dtype=np.float32)
#    s_ind=(sCtl-1)*np.arange(nScans,dtype=np.float32)/(nScans-1)
    
#    dum=interp2d2d(sCtl_ind, pCtl_ind, lonsel.get(), s_ind, p_ind)
#    dumlon=interp1d(p_ind, lonsel.get(), pCtl_ind, spline=True)
#    dumlat=interp1d(p_ind, lonsel.get(), pCtl_ind, spline=True)

#    shlon=shlat=lon.dimensions().values()

    #Shrink image
    #############
    
    
    #scene dimensions
    
    info_sst=f.datasets()[param]
    dnames=info_sst[0]
    d=info_sst[1]
    
    lonvec=lonsel.get().reshape(d[0]*d[1])
    latvec=latsel.get().reshape(d[0]*d[1])
    
    
    #Get points within domain
    if limit is not None :
       indvec,flagvec=in_limits(lonvec, latvec, limit)
    
    flagmat=flagvec.reshape(d[0],d[1])
    rowsum=np.sum(flagmat, 0)
    colsum=np.sum(flagmat, 1)
    yflag=rowsum >= 1
    xflag=colsum >= 1
    xid=np.arange(d[0])
    xid=xid.compress(xflag)
    xcnt=np.int(xid.size)
    yid=np.arange(d[1])
    yid=yid.compress(yflag)
    ycnt=np.int(yid.size)
    

    if xcnt == 0: raise Exception('Error : no longitude within limits')
    if ycnt == 0: raise Exception('Error : no latitude within limits')
    
    #Get start points
    xst=np.int(xid.min())
    yst=np.int(yid.min())
    
    

    #Shrink lon & lat
    lon_var=lonsel.get(start=[xst,yst], count=[xcnt,ycnt])
    lat_var=latsel.get(start=[xst,yst], count=[xcnt,ycnt])
    
    #Load SST image
    ###############
    sst=f.select(param)
    attr=sst.attributes()
    slope=attr['slope']
    intercept=attr['intercept']
    flagValue=attr['bad_value_scaled']
    sst_var=sst.get(start=[xst,yst], count=[xcnt,ycnt]) #Shrink sst image
    
    #Compute mask
    if (param == 'sst') or (param == 'sst4') :
        fg=f.select('qual_'+param)
        fg_var=fg.get(start=[xst,yst], count=[xcnt,ycnt])
    
        if flagLevel is None :
            mask=sst_var == flagValue
        else :
            mask= (sst_var == flagValue) | (fg_var >= flagLevel)
    elif param == 'chlor_a'  :
        fg=f.select('l2_flags')
        fg_var=fg.get(start=[xst,yst], count=[xcnt,ycnt]).flatten()
#        dumvar=[False]*(xcnt*ycnt)#np.zeros((xcnt,ycnt,32),dtype=str).reshape((xcnt*ycnt*32))
        dumfg=[[np.int(b) for b in np.binary_repr(f,32)[::-1]] for f in fg_var] #Rq : bits should be read from end to start
        dumvar=np.sum(np.array(dumfg)[:,flags],1) >= 1
#        for i,f in enumerate(fg_var) :
#            dumvar[i] =  (np.array([b for b in np.binary_repr(f,32)])[flags] == '1').any()
        mask=np.reshape(dumvar,(xcnt,ycnt))
    
    #Check flags
#    plt.bar(np.arange(1,33),np.sum(dumfg,0)[::-1]/np.float64(xcnt*ycnt)); plt.show()
        
    sst_var=np.ma.masked_array(sst_var*slope + intercept, mask=mask,type=float)
    
    
    #Image reprojection to avoid bow tie effect
#    import pyresample as pr
#    swath_def=pr.geometry.SwathDefinition(lons=lon_var, lats=lat_var)
#    lons,lats=np.meshgrid(np.arange(lon_var.min(),lon_var.max(),0.005), np.arange( lat_var.min(),lat_var.max(),0.005))
#    grid_def = pr.geometry.GridDefinition(lons=lons, lats=lats)
#    
#    area_id = 'NWMEd'
#    area_name = 'NWMed'
#    proj_id = 'cyl'
#    proj4_args = '+proj=eqc +lat_ts=0 +lat_0=0 +lon_0=0 +x_0=0 +y_0=0 +a=6378137 +b=6378137 +units=m'
#    x_size = 601
#    y_size = 351
#    area_extent = (5., 41., 11., 44.5)
#    area_def = pr.utils.get_area_def(area_id, area_name, proj_id, proj4_args, x_size, y_size, area_extent )
#    res = pr.kd_tree.resample_gauss(swath_def, sst_var.data, grid_def, radius_of_influence=10000.,sigmas=500.,fill_value=None)
    

    
    #Quality control
#    sstfg=f.select('qual_sst')
#    l2fg=f.select('l2_flags')
    

    #Return output dictionary
    return {'lon':lon_var,'lat':lat_var,'sst':sst_var}