Exemple #1
0
def load_data_from_files(filename):
    if not os.path.exists(filename):
        print("File {} does not exist, cannot load data.".format(filename))
        return
    elif not HDF.ishdf(filename):
        print("File {} is not in hdf4 file format, cannot load data.".format(
            filename))
        return

    f = SD(filename, SDC.READ)
    data_field = None
    for i, d in enumerate(f.datasets()):
        # print("{0}. {1}".format(i+1,d))
        if "NDVI" in d:
            data_field = d

    ndvi_data = f.select(data_field)
    data = np.array(ndvi_data.get())
    return data
def test_hdf_type(filename):
        """ This is a simple function to return the type of HDF file that is passed to it"""
	filetype = None

	"""check to see if file is an hdf4 file
	returns 1 if HDF4 file
	returns 0 if not an HDF4 file"""
	hdf4flag = HDF4.ishdf(filename)

	if hdf4flag == 1:
		filetype = 'HDF4'

	
	#check to see if file is hdf5 (also support hdf5-eos)
	# returns >0 if True
	# returns 0 if False
	hdf5flag = HDF5.isHDF5File(filename)
	if hdf5flag > 0:
		filetype = 'HDF5'

	return filetype
Exemple #3
0
  def  read_misr_dir(cls, rawdirname, AODdirname, outfile):
    """read_misr_dir(rawdirname, AODdirname, outfile)

    Read in raw MISR data from .hdf files in rawdirname,
    and AOD data from all .hdf files in AODdirname.
    Pickle the result and save it to outfile.
    Note: does NOT update object fields.
    Follow this with a call to readin().
    """

      # Get the meta-information
      #meta = sd.attributes()
        #        for val in ['Origin_block.ulc.x',
        #             'Origin_block.ulc.y',
        #            'Local_mode_site_name']:
        #info[val] = meta[val]

      # Get orbit parameters?

    data = []
    rgbimages = []

    datestr = []
    datestr2 = []
    i = 0

    # Read in the AOD (from local mode) data; this is what we'll analyze
    files = sorted(os.listdir(AODdirname))
    for f in files:
      if fnmatch.fnmatch(f, '*.hdf'):
        print " %d / %d " % (i, len(files)),
        i += 1
      
        filename = AODdirname + f

        # Check that filename exists and is an HDF file
        if HDF.ishdf(filename) != 1:
          print "File %s cannot be found or is not an HDF-4 file." % filename
          continue

        orbit    = int(filename.split('_')[5].split('O')[1])
        thisdate = MISRData.orbit_to_date(orbit)
        print "orbit: %d -> %s " % (orbit, thisdate)
        datestr = datestr + [thisdate]
        
        sd = SD.SD(filename)

        # This is 3 (SOMBlock) x 32 (x) x 128 (y) x 4 (bands)
        dataset  = sd.select('RegBestEstimateSpectralOptDepth')
        dim      = dataset.dimensions()
        # Get all of the data for the green band (band = 1)
        along_track = dim['SOMBlockDim:RegParamsAer'] * dim['XDim:RegParamsAer'] 
        cross_track = dim['YDim:RegParamsAer']
        data_now = dataset.get((0,0,0,1),(dim['SOMBlockDim:RegParamsAer'],
                                          dim['XDim:RegParamsAer'],
                                          dim['YDim:RegParamsAer'],
                                          1)).squeeze()

        # Reshape to concatenate blocks
        nrows    = data_now.shape[0]*data_now.shape[1]
        ncols    = data_now.shape[2]
        data_now = data_now.reshape((nrows, ncols))

        # Set -9999 values to NaN
        naninds = np.equal(data_now, -9999)

        # Visualize this timeslice
        #pylab.imshow(data_now)
        #pylab.title(thisdate)
        #pylab.axis('off')
        #pylab.savefig(filename + '.png')

        # Set -9999 values to NaN
        data_now[naninds] = float('NaN')

        data_now = data_now.reshape((-1, 1))
        #print type(data_now)
        #print data_now.shape
        if data == []:
          data = [data_now]
        else:
          data.append(data_now)

        # Close the file
        sd.end()

        print '.',
        sys.stdout.flush()

    data = np.asarray(data).squeeze().T
    print data.shape
    
    print
    # Data is now n x d, where n = # pixels and d = # timepts
    print 'Read data set with %d pixels, %d time points.' % data.shape
    
    # TODO: Add lat/lon coords here
    latlons = ['Unknown'] * data.shape[0]

    # Read in the raw data (for later visualization)
    files = sorted(os.listdir(rawdirname + 'AN/'))
    print "+++++++++++++"
    print len(files)
    iii = 0
    for f in files:
      if fnmatch.fnmatch(f, '*.hdf'):
        filename = rawdirname + 'AN/' + f
        #print filename
        print " %d / %d " % (iii, len(files)),
        iii += 1

        # Check that filename exists and is an HDF file
        if HDF.ishdf(filename) != 1:
          print "File %s cannot be found or is not an HDF-4 file." % filename
          continue

        # We'll assume there's a one-to-one correspondence
        # with the AOD data.  But print it out anyway as a check.
        orbit    = int(filename.split('_')[6].split('O')[1])
        thisdate = MISRData.orbit_to_date(orbit)
        print "orbit: %d -> %s " % (orbit, thisdate)
        datestr2 = datestr2 + [thisdate]
        
        sd = SD.SD(filename)
        
        
        ##################################################################################################################################################################
        dataset  = sd.select('Green Radiance/RDQI')
        dim      = dataset.dimensions()
        data_g = dataset.get((60,0,0),
                             (4, dim['XDim:GreenBand'], dim['YDim:GreenBand']),
                             (1, 1, 1)
                             ).reshape([2048, 2048])
        
        mountains = np.equal(data_g, 65511)
        padding = np.equal(data_g, 65515)
        hlines = np.equal(data_g, 65523)
        
        data_g[data_g == 65515] = 0 # PADDING

        conv_factor_ds = sd.select('GreenConversionFactor')
        dim         = conv_factor_ds.dimensions()
        conv_factor = conv_factor_ds.get((60,0,0),
                                         (4, dim['XDim:BRF Conversion Factors'], dim['YDim:BRF Conversion Factors']),
                                         (1, 1, 1)
                                         ).reshape((32, 32))
        
        conv_factor[conv_factor < 0] = 0
        
        for x in range(0,data_g.shape[0],64):
          for y in range(0,data_g.shape[1],64):
            converted = np.multiply(data_g[x:x+64,y:y+64],
                                       conv_factor[x/64,y/64])
            data_g[x:x+64,y:y+64] = converted
        
        dataset  = sd.select('Red Radiance/RDQI')
        dim      = dataset.dimensions()
        data_r = dataset.get((60,0,0),
                             (4, dim['XDim:RedBand'], dim['YDim:RedBand']),
                             (1, 1, 1)
                             ).reshape([2048, 2048])
        
        data_r[data_r == 65515] = 0 # PADDING
        
        conv_factor_ds = sd.select('RedConversionFactor')
        dim         = conv_factor_ds.dimensions()
        conv_factor = conv_factor_ds.get((60,0,0),
                                         (4, dim['XDim:BRF Conversion Factors'], dim['YDim:BRF Conversion Factors']),
                                         (1, 1, 1)
                                         ).reshape((32, 32))
        conv_factor[conv_factor < 0] = 0
        
        for x in range(0,data_r.shape[0],64):
          for y in range(0,data_r.shape[1],64):
            converted = np.multiply(data_r[x:x+64,y:y+64],
                                       conv_factor[x/64,y/64])
            data_r[x:x+64,y:y+64] = converted
        
        dataset  = sd.select('Blue Radiance/RDQI')
        dim      = dataset.dimensions()
        data_b = dataset.get((60,0,0),
                             (4, dim['XDim:BlueBand'], dim['YDim:BlueBand']),
                             (1, 1, 1)
                             ).reshape([2048, 2048])
        
        data_b[data_b == 65515] = 0 # PADDING
        
        conv_factor_ds = sd.select('BlueConversionFactor')
        dim         = conv_factor_ds.dimensions()
        conv_factor = conv_factor_ds.get((60,0,0),
                                         (4, dim['XDim:BRF Conversion Factors'], dim['YDim:BRF Conversion Factors']),
                                         (1, 1, 1)
                                         ).reshape((32, 32))
        conv_factor[conv_factor < 0] = 0
        
        for x in range(0,data_b.shape[0],64):
          for y in range(0,data_b.shape[1],64):
            converted = np.multiply(data_b[x:x+64,y:y+64],
                                       conv_factor[x/64,y/64])
            data_b[x:x+64,y:y+64] = converted
        
        im = np.zeros([2048, 2048, 3])
        data_r = data_r / float(data_r.max()) * 256
        data_g = data_g / float(data_g.max()) * 256
        data_b = data_b / float(data_b.max()) * 256

        im[...,0] = data_r
        im[...,1] = data_g
        im[...,2] = data_b
        im = im.astype('uint8')
        
        im[np.equal(im, 0)] = 255
        
        
        im[0:512, 64:, :] = im[0:512, :-64, :]
        im[1024:, :-64, :] = im[1024:, 64:, :]
        im[1536:, :-64, :] = im[1536:, 64:, :]
        
        isnotwhite = np.not_equal(im, 255)
        isnotwhiterows = isnotwhite.sum(1)
        isnotwhitecols = isnotwhite.sum(0)
        goodrows = [i for i in range(im.shape[0]) if isnotwhiterows[i, :].sum() > 0]
        goodcols = [i for i in range(im.shape[1]) if isnotwhitecols[i, :].sum() > 0]
        im = im[goodrows[0]:goodrows[-1], goodcols[0]:goodcols[-1], :]
        
        rgbimages.append(im)

        # Close the file
        sd.end()

        print '.',
        sys.stdout.flush()
    
    outf = open(outfile, 'w')
    print len(datestr)
    
    # Assert that the raw and AOD sequences are corresponding
    for i in range(len(datestr)):
      if datestr[i] != datestr2[i]:
        print "ERROR!  Date sequences do not align."
        print "  detected at index %d: AOD %s, raw %s" % (i, datestr[i], datestr2[i])
    
    pickle.dump((data, rgbimages, along_track, cross_track,
                 latlons, datestr), outf)
    #pickle.dump((data, along_track, cross_track,
    #             latlons, datestr), outf)
    outf.close()
Exemple #4
0
  def  read_misr_dir(cls, rawdirname, AODdirname, outfile):
    """read_misr_dir(rawdirname, AODdirname, outfile)

    Read in raw MISR data from .hdf files in rawdirname,
    and AOD data from all .hdf files in AODdirname.
    Pickle the result and save it to outfile.
    Note: does NOT update object fields.
    Follow this with a call to readin().
    """

      # Get the meta-information
      #meta = sd.attributes()
        #        for val in ['Origin_block.ulc.x',
        #             'Origin_block.ulc.y',
        #            'Local_mode_site_name']:
        #info[val] = meta[val]

      # Get orbit parameters?

    data = []
    rgbimages = []

    datestr = []
    datestr2 = []
    i = 0

    # Read in the AOD (from local mode) data; this is what we'll analyze
    files = sorted(os.listdir(AODdirname))
    for f in files:
      if fnmatch.fnmatch(f, '*.hdf'):
        print " %d / %d " % (i, len(files)),
        i += 1
      
        filename = AODdirname + f

        # Check that filename exists and is an HDF file
        if HDF.ishdf(filename) != 1:
          print "File %s cannot be found or is not an HDF-4 file." % filename
          continue

        orbit    = int(filename.split('_')[5].split('O')[1])
        thisdate = MISRData.orbit_to_date(orbit)
        print "orbit: %d -> %s " % (orbit, thisdate)
        datestr = datestr + [thisdate]
        
        sd = SD.SD(filename)

        # This is 3 (SOMBlock) x 32 (x) x 128 (y) x 4 (bands)
        dataset  = sd.select('RegBestEstimateSpectralOptDepth')
        dim      = dataset.dimensions()
        # Get all of the data for the green band (band = 1)
        along_track = dim['SOMBlockDim:RegParamsAer'] * dim['XDim:RegParamsAer'] 
        cross_track = dim['YDim:RegParamsAer']
        data_now = dataset.get((0,0,0,1),(dim['SOMBlockDim:RegParamsAer'],
                                          dim['XDim:RegParamsAer'],
                                          dim['YDim:RegParamsAer'],
                                          1)).squeeze()

        # Reshape to concatenate blocks
        nrows    = data_now.shape[0]*data_now.shape[1]
        ncols    = data_now.shape[2]
        data_now = data_now.reshape((nrows, ncols))

        # Set -9999 values to NaN
        naninds = np.equal(data_now, -9999)

        # Visualize this timeslice
        #pylab.imshow(data_now)
        #pylab.title(thisdate)
        #pylab.axis('off')
        #pylab.savefig(filename + '.png')

        # Set -9999 values to NaN
        data_now[naninds] = float('NaN')

        data_now = data_now.reshape((-1, 1))
        #print type(data_now)
        #print data_now.shape
        if data == []:
          data = [data_now]
        else:
          data.append(data_now)

        # Close the file
        sd.end()

        print '.',
        sys.stdout.flush()

    data = np.asarray(data).squeeze().T
    print data.shape
    
    print
    # Data is now n x d, where n = # pixels and d = # timepts
    print 'Read data set with %d pixels, %d time points.' % data.shape
    
    # TODO: Add lat/lon coords here
    latlons = ['Unknown'] * data.shape[0]

    # Read in the raw data (for later visualization)
    files = sorted(os.listdir(rawdirname + 'AN/'))
    print "+++++++++++++"
    print len(files)
    iii = 0
    for f in files:
      if fnmatch.fnmatch(f, '*.hdf'):
        filename = rawdirname + 'AN/' + f
        #print filename
        print " %d / %d " % (iii, len(files)),
        iii += 1

        # Check that filename exists and is an HDF file
        if HDF.ishdf(filename) != 1:
          print "File %s cannot be found or is not an HDF-4 file." % filename
          continue

        # We'll assume there's a one-to-one correspondence
        # with the AOD data.  But print it out anyway as a check.
        orbit    = int(filename.split('_')[6].split('O')[1])
        thisdate = MISRData.orbit_to_date(orbit)
        print "orbit: %d -> %s " % (orbit, thisdate)
        datestr2 = datestr2 + [thisdate]
        
        sd = SD.SD(filename)
        
        
        ##################################################################################################################################################################
        dataset  = sd.select('Green Radiance/RDQI')
        dim      = dataset.dimensions()
        data_g = dataset.get((60,0,0),
                             (4, dim['XDim:GreenBand'], dim['YDim:GreenBand']),
                             (1, 1, 1)
                             ).reshape([2048, 2048])
        
        mountains = np.equal(data_g, 65511)
        padding = np.equal(data_g, 65515)
        hlines = np.equal(data_g, 65523)
        
        data_g[data_g == 65515] = 0 # PADDING

        conv_factor_ds = sd.select('GreenConversionFactor')
        dim         = conv_factor_ds.dimensions()
        conv_factor = conv_factor_ds.get((60,0,0),
                                         (4, dim['XDim:BRF Conversion Factors'], dim['YDim:BRF Conversion Factors']),
                                         (1, 1, 1)
                                         ).reshape((32, 32))
        
        conv_factor[conv_factor < 0] = 0
        
        for x in range(0,data_g.shape[0],64):
          for y in range(0,data_g.shape[1],64):
            converted = np.multiply(data_g[x:x+64,y:y+64],
                                       conv_factor[x/64,y/64])
            data_g[x:x+64,y:y+64] = converted
        
        dataset  = sd.select('Red Radiance/RDQI')
        dim      = dataset.dimensions()
        data_r = dataset.get((60,0,0),
                             (4, dim['XDim:RedBand'], dim['YDim:RedBand']),
                             (1, 1, 1)
                             ).reshape([2048, 2048])
        
        data_r[data_r == 65515] = 0 # PADDING
        
        conv_factor_ds = sd.select('RedConversionFactor')
        dim         = conv_factor_ds.dimensions()
        conv_factor = conv_factor_ds.get((60,0,0),
                                         (4, dim['XDim:BRF Conversion Factors'], dim['YDim:BRF Conversion Factors']),
                                         (1, 1, 1)
                                         ).reshape((32, 32))
        conv_factor[conv_factor < 0] = 0
        
        for x in range(0,data_r.shape[0],64):
          for y in range(0,data_r.shape[1],64):
            converted = np.multiply(data_r[x:x+64,y:y+64],
                                       conv_factor[x/64,y/64])
            data_r[x:x+64,y:y+64] = converted
        
        dataset  = sd.select('Blue Radiance/RDQI')
        dim      = dataset.dimensions()
        data_b = dataset.get((60,0,0),
                             (4, dim['XDim:BlueBand'], dim['YDim:BlueBand']),
                             (1, 1, 1)
                             ).reshape([2048, 2048])
        
        data_b[data_b == 65515] = 0 # PADDING
        
        conv_factor_ds = sd.select('BlueConversionFactor')
        dim         = conv_factor_ds.dimensions()
        conv_factor = conv_factor_ds.get((60,0,0),
                                         (4, dim['XDim:BRF Conversion Factors'], dim['YDim:BRF Conversion Factors']),
                                         (1, 1, 1)
                                         ).reshape((32, 32))
        conv_factor[conv_factor < 0] = 0
        
        for x in range(0,data_b.shape[0],64):
          for y in range(0,data_b.shape[1],64):
            converted = np.multiply(data_b[x:x+64,y:y+64],
                                       conv_factor[x/64,y/64])
            data_b[x:x+64,y:y+64] = converted
        
        im = np.zeros([2048, 2048, 3])
        data_r = data_r / float(data_r.max()) * 256
        data_g = data_g / float(data_g.max()) * 256
        data_b = data_b / float(data_b.max()) * 256

        im[...,0] = data_r
        im[...,1] = data_g
        im[...,2] = data_b
        im = im.astype('uint8')
        
        im[np.equal(im, 0)] = 255
        
        
        im[0:512, 64:, :] = im[0:512, :-64, :]
        im[1024:, :-64, :] = im[1024:, 64:, :]
        im[1536:, :-64, :] = im[1536:, 64:, :]
        
        isnotwhite = np.not_equal(im, 255)
        isnotwhiterows = isnotwhite.sum(1)
        isnotwhitecols = isnotwhite.sum(0)
        goodrows = [i for i in range(im.shape[0]) if isnotwhiterows[i, :].sum() > 0]
        goodcols = [i for i in range(im.shape[1]) if isnotwhitecols[i, :].sum() > 0]
        im = im[goodrows[0]:goodrows[-1], goodcols[0]:goodcols[-1], :]
        
        rgbimages.append(im)

        # Close the file
        sd.end()

        print '.',
        sys.stdout.flush()
    
    outf = open(outfile, 'w')
    print len(datestr)
    
    # Assert that the raw and AOD sequences are corresponding
    for i in range(len(datestr)):
      if datestr[i] != datestr2[i]:
        print "ERROR!  Date sequences do not align."
        print "  detected at index %d: AOD %s, raw %s" % (i, datestr[i], datestr2[i])
    
    pickle.dump((data, rgbimages, along_track, cross_track,
                 latlons, datestr), outf)
    #pickle.dump((data, along_track, cross_track,
    #             latlons, datestr), outf)
    outf.close()