def load_data_from_files(filename): if not os.path.exists(filename): print("File {} does not exist, cannot load data.".format(filename)) return elif not HDF.ishdf(filename): print("File {} is not in hdf4 file format, cannot load data.".format( filename)) return f = SD(filename, SDC.READ) data_field = None for i, d in enumerate(f.datasets()): # print("{0}. {1}".format(i+1,d)) if "NDVI" in d: data_field = d ndvi_data = f.select(data_field) data = np.array(ndvi_data.get()) return data
def test_hdf_type(filename): """ This is a simple function to return the type of HDF file that is passed to it""" filetype = None """check to see if file is an hdf4 file returns 1 if HDF4 file returns 0 if not an HDF4 file""" hdf4flag = HDF4.ishdf(filename) if hdf4flag == 1: filetype = 'HDF4' #check to see if file is hdf5 (also support hdf5-eos) # returns >0 if True # returns 0 if False hdf5flag = HDF5.isHDF5File(filename) if hdf5flag > 0: filetype = 'HDF5' return filetype
def read_misr_dir(cls, rawdirname, AODdirname, outfile): """read_misr_dir(rawdirname, AODdirname, outfile) Read in raw MISR data from .hdf files in rawdirname, and AOD data from all .hdf files in AODdirname. Pickle the result and save it to outfile. Note: does NOT update object fields. Follow this with a call to readin(). """ # Get the meta-information #meta = sd.attributes() # for val in ['Origin_block.ulc.x', # 'Origin_block.ulc.y', # 'Local_mode_site_name']: #info[val] = meta[val] # Get orbit parameters? data = [] rgbimages = [] datestr = [] datestr2 = [] i = 0 # Read in the AOD (from local mode) data; this is what we'll analyze files = sorted(os.listdir(AODdirname)) for f in files: if fnmatch.fnmatch(f, '*.hdf'): print " %d / %d " % (i, len(files)), i += 1 filename = AODdirname + f # Check that filename exists and is an HDF file if HDF.ishdf(filename) != 1: print "File %s cannot be found or is not an HDF-4 file." % filename continue orbit = int(filename.split('_')[5].split('O')[1]) thisdate = MISRData.orbit_to_date(orbit) print "orbit: %d -> %s " % (orbit, thisdate) datestr = datestr + [thisdate] sd = SD.SD(filename) # This is 3 (SOMBlock) x 32 (x) x 128 (y) x 4 (bands) dataset = sd.select('RegBestEstimateSpectralOptDepth') dim = dataset.dimensions() # Get all of the data for the green band (band = 1) along_track = dim['SOMBlockDim:RegParamsAer'] * dim['XDim:RegParamsAer'] cross_track = dim['YDim:RegParamsAer'] data_now = dataset.get((0,0,0,1),(dim['SOMBlockDim:RegParamsAer'], dim['XDim:RegParamsAer'], dim['YDim:RegParamsAer'], 1)).squeeze() # Reshape to concatenate blocks nrows = data_now.shape[0]*data_now.shape[1] ncols = data_now.shape[2] data_now = data_now.reshape((nrows, ncols)) # Set -9999 values to NaN naninds = np.equal(data_now, -9999) # Visualize this timeslice #pylab.imshow(data_now) #pylab.title(thisdate) #pylab.axis('off') #pylab.savefig(filename + '.png') # Set -9999 values to NaN data_now[naninds] = float('NaN') data_now = data_now.reshape((-1, 1)) #print type(data_now) #print data_now.shape if data == []: data = [data_now] else: data.append(data_now) # Close the file sd.end() print '.', sys.stdout.flush() data = np.asarray(data).squeeze().T print data.shape print # Data is now n x d, where n = # pixels and d = # timepts print 'Read data set with %d pixels, %d time points.' % data.shape # TODO: Add lat/lon coords here latlons = ['Unknown'] * data.shape[0] # Read in the raw data (for later visualization) files = sorted(os.listdir(rawdirname + 'AN/')) print "+++++++++++++" print len(files) iii = 0 for f in files: if fnmatch.fnmatch(f, '*.hdf'): filename = rawdirname + 'AN/' + f #print filename print " %d / %d " % (iii, len(files)), iii += 1 # Check that filename exists and is an HDF file if HDF.ishdf(filename) != 1: print "File %s cannot be found or is not an HDF-4 file." % filename continue # We'll assume there's a one-to-one correspondence # with the AOD data. But print it out anyway as a check. orbit = int(filename.split('_')[6].split('O')[1]) thisdate = MISRData.orbit_to_date(orbit) print "orbit: %d -> %s " % (orbit, thisdate) datestr2 = datestr2 + [thisdate] sd = SD.SD(filename) ################################################################################################################################################################## dataset = sd.select('Green Radiance/RDQI') dim = dataset.dimensions() data_g = dataset.get((60,0,0), (4, dim['XDim:GreenBand'], dim['YDim:GreenBand']), (1, 1, 1) ).reshape([2048, 2048]) mountains = np.equal(data_g, 65511) padding = np.equal(data_g, 65515) hlines = np.equal(data_g, 65523) data_g[data_g == 65515] = 0 # PADDING conv_factor_ds = sd.select('GreenConversionFactor') dim = conv_factor_ds.dimensions() conv_factor = conv_factor_ds.get((60,0,0), (4, dim['XDim:BRF Conversion Factors'], dim['YDim:BRF Conversion Factors']), (1, 1, 1) ).reshape((32, 32)) conv_factor[conv_factor < 0] = 0 for x in range(0,data_g.shape[0],64): for y in range(0,data_g.shape[1],64): converted = np.multiply(data_g[x:x+64,y:y+64], conv_factor[x/64,y/64]) data_g[x:x+64,y:y+64] = converted dataset = sd.select('Red Radiance/RDQI') dim = dataset.dimensions() data_r = dataset.get((60,0,0), (4, dim['XDim:RedBand'], dim['YDim:RedBand']), (1, 1, 1) ).reshape([2048, 2048]) data_r[data_r == 65515] = 0 # PADDING conv_factor_ds = sd.select('RedConversionFactor') dim = conv_factor_ds.dimensions() conv_factor = conv_factor_ds.get((60,0,0), (4, dim['XDim:BRF Conversion Factors'], dim['YDim:BRF Conversion Factors']), (1, 1, 1) ).reshape((32, 32)) conv_factor[conv_factor < 0] = 0 for x in range(0,data_r.shape[0],64): for y in range(0,data_r.shape[1],64): converted = np.multiply(data_r[x:x+64,y:y+64], conv_factor[x/64,y/64]) data_r[x:x+64,y:y+64] = converted dataset = sd.select('Blue Radiance/RDQI') dim = dataset.dimensions() data_b = dataset.get((60,0,0), (4, dim['XDim:BlueBand'], dim['YDim:BlueBand']), (1, 1, 1) ).reshape([2048, 2048]) data_b[data_b == 65515] = 0 # PADDING conv_factor_ds = sd.select('BlueConversionFactor') dim = conv_factor_ds.dimensions() conv_factor = conv_factor_ds.get((60,0,0), (4, dim['XDim:BRF Conversion Factors'], dim['YDim:BRF Conversion Factors']), (1, 1, 1) ).reshape((32, 32)) conv_factor[conv_factor < 0] = 0 for x in range(0,data_b.shape[0],64): for y in range(0,data_b.shape[1],64): converted = np.multiply(data_b[x:x+64,y:y+64], conv_factor[x/64,y/64]) data_b[x:x+64,y:y+64] = converted im = np.zeros([2048, 2048, 3]) data_r = data_r / float(data_r.max()) * 256 data_g = data_g / float(data_g.max()) * 256 data_b = data_b / float(data_b.max()) * 256 im[...,0] = data_r im[...,1] = data_g im[...,2] = data_b im = im.astype('uint8') im[np.equal(im, 0)] = 255 im[0:512, 64:, :] = im[0:512, :-64, :] im[1024:, :-64, :] = im[1024:, 64:, :] im[1536:, :-64, :] = im[1536:, 64:, :] isnotwhite = np.not_equal(im, 255) isnotwhiterows = isnotwhite.sum(1) isnotwhitecols = isnotwhite.sum(0) goodrows = [i for i in range(im.shape[0]) if isnotwhiterows[i, :].sum() > 0] goodcols = [i for i in range(im.shape[1]) if isnotwhitecols[i, :].sum() > 0] im = im[goodrows[0]:goodrows[-1], goodcols[0]:goodcols[-1], :] rgbimages.append(im) # Close the file sd.end() print '.', sys.stdout.flush() outf = open(outfile, 'w') print len(datestr) # Assert that the raw and AOD sequences are corresponding for i in range(len(datestr)): if datestr[i] != datestr2[i]: print "ERROR! Date sequences do not align." print " detected at index %d: AOD %s, raw %s" % (i, datestr[i], datestr2[i]) pickle.dump((data, rgbimages, along_track, cross_track, latlons, datestr), outf) #pickle.dump((data, along_track, cross_track, # latlons, datestr), outf) outf.close()