def extract_TRMM_HDF(hdf_list, layer_indexs, outdir, resolution): """ Extracts TRMM products from HDF to tif. http://pmm.nasa.gov/data-access/data-products :param hdf_list: list of hdf files or directory with hdfs :param layer_indexs: list of integer layer indexs :param outdir: directory to place outputs :param resolution: The size of a pixel in degrees, either "0.25", "0.5", "1.0", "5.0" depending on the specific TRMM product you are extracting. :return: a list of all files created as output """ hdf_list = core.enf_filelist(hdf_list) output_filelist = [] # load the GPM datatype from the library datatype = datatype_library()["TRMM_{0}_GLOBAL".format(resolution)] # for every hdf file in the input list for hdf in hdf_list: # extract layers and add the new filepaths to the output filelist hdf_output_filelist = _extract_HDF_datatype(hdf, layer_indexs, outdir, datatype) output_filelist += hdf_output_filelist return output_filelist
def extract_targz(filepaths): """ Input list of filepaths OR a directory path with tar.gz files in it Simple batch extractor of files with tar.gz compression creates a folder for each input tar.gz file with its extracted contents, then deletes the original tar.gz file. Useful for bulk Landsat data extraction. """ filepaths = core.enf_filelist(filepaths) for filepath in filepaths: if ".tar.gz" in filepath: head, tail = os.path.split(filepath) tfile = tarfile.open(filepath, 'r:gz') outdir = os.path.join(head, tail.replace(".tar.gz", "")) tfile.extractall(outdir) tfile.close() os.remove(filepath) print("Extracted {0}".format(filepath)) return
def extract_targz(filepaths): """ Input list of filepaths OR a directory path with tar.gz files in it Simple batch extractor of files with tar.gz compression creates a folder for each input tar.gz file with its extracted contents, then deletes the original tar.gz file. Useful for bulk Landsat data extraction. """ filepaths = core.enf_filelist(filepaths) for filepath in filepaths: if ".tar.gz" in filepath: head,tail = os.path.split(filepath) tfile = tarfile.open(filepath, 'r:gz') outdir = os.path.join(head, tail.replace(".tar.gz","")) tfile.extractall(outdir) tfile.close() os.remove(filepath) print("Extracted {0}".format(filepath)) return
def extract_archive(filepaths, delete_originals=False): """ Input list of filepaths OR a directory path with compressed files in it. Attempts to decompress the following formats Support formats include ``.tar.gz``, ``.tar``, ``.gz``, ``.zip``. :param filepaths: list of filepaths to archives for extraction :param delete_originals: Set to "True" if archives may be deleted after their contents is successful extracted. """ filepaths = core.enf_filelist(filepaths) for filepath in filepaths: head, tail = os.path.split(filepath) if filepath.endswith(".tar.gz"): with tarfile.open(filepath, 'r:gz') as tfile: outdir = os.path.join(head, tail.replace(".tar.gz", "")) tfile.extractall(outdir) # gzip only compresses single files elif filepath.endswith(".gz"): with gzip.open(filepath, 'rb') as gzfile: outfile = os.path.join(head, tail.replace(".gz", "")) content = gzfile.read() with open(outfile, 'wb') as of: of.write(content) elif filepath.endswith(".tar"): with tarfile.open(filepath, 'r') as tfile: outdir = os.path.join(head, tail.replace(".tar", "")) tfile.extractall(outdir) elif filepath.endswith(".zip"): with zipfile.ZipFile(filepath, "r") as zipf: outdir = os.path.join(head, tail.replace(".zip", "")) zipf.extractall(outdir) else: return print("Extracted {0}".format(filepath)) if delete_originals: os.remove(filepath) return
def extract_archive(filepaths, delete_originals = False): """ Input list of filepaths OR a directory path with compressed files in it. Attempts to decompress the following formats Support formats include ``.tar.gz``, ``.tar``, ``.gz``, ``.zip``. :param filepaths: list of filepaths to archives for extraction :param delete_originals: Set to "True" if archives may be deleted after their contents is successful extracted. """ filepaths = core.enf_filelist(filepaths) for filepath in filepaths: head,tail = os.path.split(filepath) if filepath.endswith(".tar.gz"): with tarfile.open(filepath, 'r:gz') as tfile: outdir = os.path.join(head, tail.replace(".tar.gz","")) tfile.extractall(outdir) # gzip only compresses single files elif filepath.endswith(".gz"): with gzip.open(filepath, 'rb') as gzfile: outfile = os.path.join(head, tail.replace(".gz","")) content = gzfile.read() with open(outfile, 'wb') as of: of.write(content) elif filepath.endswith(".tar"): with tarfile.open(filepath, 'r') as tfile: outdir = os.path.join(head, tail.replace(".tar","")) tfile.extractall(outdir) elif filepath.endswith(".zip"): with zipfile.ZipFile(filepath, "r") as zipf: outdir = os.path.join(head, tail.replace(".zip","")) zipf.extractall(outdir) else: return print("Extracted {0}".format(filepath)) if delete_originals: os.remove(filepath) return
def extract_archive(filepaths): """ Input list of filepaths OR a directory path with compressed files in it. Attempts to decompress the following formats formats: .tar.gz .tar .gz .zip """ filepaths = core.enf_filelist(filepaths) for filepath in filepaths: head, tail = os.path.split(filepath) if ".tar.gz" in filepath: with tarfile.open(filepath, 'r:gz') as tfile: outdir = os.path.join(head, tail.replace(".tar.gz", "")) tfile.extractall(outdir) # gzip only compresses single files elif ".gz" in filepath: with gzip.open(filepath, 'rb') as gzfile: outfile = os.path.join(head, tail.replace(".gz", "")) content = gzfile.read() with open(outfile, 'wb') as of: of.write(content) elif ".tar" in filepath: with tarfile.open(filepath, 'r') as tfile: outdir = os.path.join(head, tail.replace(".tar", "")) tfile.extractall(outdir) elif ".zip" in filepath: with zipfile.ZipFile(filepath, "r") as zipf: outdir = os.path.join(head, tail.replace(".zip", "")) zipf.extractall(outdir) else: return #os.remove(filepath) print("Extracted {0}".format(filepath)) return
def extract_archive(filepaths): """ Input list of filepaths OR a directory path with compressed files in it. Attempts to decompress the following formats formats: .tar.gz .tar .gz .zip """ filepaths = core.enf_filelist(filepaths) for filepath in filepaths: head,tail = os.path.split(filepath) if ".tar.gz" in filepath: with tarfile.open(filepath, 'r:gz') as tfile: outdir = os.path.join(head, tail.replace(".tar.gz","")) tfile.extractall(outdir) # gzip only compresses single files elif ".gz" in filepath: with gzip.open(filepath, 'rb') as gzfile: outfile = os.path.join(head, tail.replace(".gz","")) content = gzfile.read() with open(outfile, 'wb') as of: of.write(content) elif ".tar" in filepath: with tarfile.open(filepath, 'r') as tfile: outdir = os.path.join(head, tail.replace(".tar","")) tfile.extractall(outdir) elif ".zip" in filepath: with zipfile.ZipFile(filepath, "r") as zipf: outdir = os.path.join(head, tail.replace(".zip","")) zipf.extractall(outdir) else: return #os.remove(filepath) print("Extracted {0}".format(filepath)) return
def enf_rastlist(filelist): """ ensures a list of inputs filepaths contains only valid raster types """ # first place the input through the same requirements of any filelist filelist = core.enf_filelist(filelist) new_filelist = [] for filename in filelist: if os.path.isfile(filename): if is_rast(filename): new_filelist.append(filename) return new_filelist
def enf_rastlist(filelist): """ ensures a list of inputs filepaths contains only valid raster tyeps """ # first place the input through the same requirements of any filelist filelist = core.enf_filelist(filelist) new_filelist = [] for filename in filelist: ext = filename[-3:] if os.path.isfile(filename): if is_rast(filename): new_filelist.append(filename) return (new_filelist)
def enf_rastlist(filelist): """ Ensures a list of inputs filepaths contains only valid raster types :param filelist: a list of filepaths that contains some raster filetypes :return new_filelist: a list of filepaths with all non-raster files removed """ # first place the input through the same requirements of any filelist filelist = core.enf_filelist(filelist) new_filelist = [] for filename in filelist: if os.path.isfile(filename): if is_rast(filename): new_filelist.append(filename) return new_filelist
def extract_GPM_IMERG(hdf_list, layer_indexs, outdir = None, resolution = "0.1"): """ Extracts GPM_IMERG data from its HDF5 format. :param hdf_list: list of hdf files or directory with hdfs :param layer_indexs: list of integer layer indexs :param outdir: directory to place outputs :param resolution: The size of a pixel in degrees, either "0.1" or "0.15" depending on GPM product. :return: a list of all files created as output Typical contents of a GPM HDF are: == =========== ================================ ============== ID layer shape Layer name data type == =========== ================================ ============== 0 [3600x1800] HQobservationTime (16-bit int) 1 [3600x1800] HQprecipSource (16-bit int) 2 [3600x1800] HQprecipitation (32-bit float) 3 [3600x1800] IRkalmanFilterWeight (16-bit int) 4 [3600x1800] IRprecipitation (32-bit float) 5 [3600x1800] precipitationCal (32-bit float) 6 [3600x1800] precipitationUncal (32-bit float) 7 [3600x1800] probabilityLiquidPrecipitation (16-bit int) 8 [3600x1800] randomError (32-bit float) == =========== ================================ ============== """ hdf_list = core.enf_filelist(hdf_list) output_filelist = [] # load the GPM datatype from the library datatype = datatype_library()["GPM_IMERG_{0}_GLOBAL".format(resolution)] # for every hdf file in the input list for hdf in hdf_list: # extract layers and add the new filepaths to the output filelist hdf_output_filelist = _extract_HDF_datatype(hdf, layer_indexs, outdir, datatype, nodata_value = -9999.9) output_filelist += hdf_output_filelist return output_filelist
def extract_MPE_NetCDF(netcdf_list, layer_indexs, outdir, area): """ extracts SMOS data from its native NetCDF format. :param netcdf_list: list of hdf files or directory with netcdfs :param layer_indexs: list of integer layer indices :param outdir: directory to place outputs :param area: presently only supports "CONUS" :return: A list of all files created as output """ netcdf_list = core.enf_filelist(netcdf_list) output_filelist = [] # load the GPM datatype from the library dtype = datatype_library()["MPE_HRAP_{0}".format(area)] # for every hdf file in the input list for netcdf in netcdf_list: data = _extract_NetCDF_layer_data(netcdf, layer_indexs) for layer_index in layer_indexs: dataset = data[layer_index] outpath = core.create_outname(outdir, netcdf, str(layer_index), "tif") print("creating dataset at {0}".format(outpath)) _gdal_dataset_to_tif(dataset, outpath, cust_projection=dtype.projectionTXT, cust_geotransform=dtype.geotransform, force_custom=False, nodata_value=-1) output_filelist.append(outpath) return output_filelist
def extract_MPE_NetCDF(netcdf_list, layer_indexs, outdir, area): """ extracts SMOS data from its native NetCDF format. :param netcdf_list: list of hdf files or directory with netcdfs :param layer_indexs: list of integer layer indices :param outdir: directory to place outputs :param area: presently only supports "CONUS" :return: A list of all files created as output """ netcdf_list = core.enf_filelist(netcdf_list) output_filelist = [] # load the GPM datatype from the library dtype = datatype_library()["MPE_HRAP_{0}".format(area)] # for every hdf file in the input list for netcdf in netcdf_list: data = _extract_NetCDF_layer_data(netcdf, layer_indexs) for layer_index in layer_indexs: dataset = data[layer_index] outpath = core.create_outname(outdir, netcdf, str(layer_index), "tif") print("creating dataset at {0}".format(outpath)) _gdal_dataset_to_tif(dataset, outpath, cust_projection = dtype.projectionTXT, cust_geotransform = dtype.geotransform, force_custom = False, nodata_value = -1) output_filelist.append(outpath) return output_filelist
def extract_from_hdf(filelist, layerlist, layernames = False, outdir = None): """ Extracts tifs from MODIS extract_HDF_layer files, ensures proper projection. inputs: filelist list of '.hdf' files from which data should be extracted (or a directory) layerlist list of layer numbers to pull out as individual tifs should be integers such as [0,4] for the 0th and 4th layer respectively. layernames list of layer names to put more descriptive file suffixes to each layer outdir directory to which tif files should be saved if outdir is left as 'False', files are saved in the same directory as the input file was found. """ if outdir is not None: if not os.path.exists(outdir): os.makedirs(outdir) # enforce lists for iteration purposes and sanitize inputs filelist = core.enf_filelist(filelist) for filename in filelist: if '.xml' in filename or not '.hdf' in filename: filelist.remove(filename) layerlist = core.enf_list(layerlist) layernames = core.enf_list(layernames) # ignore user input layernames if they are invalid, but print warnings if layernames and not len(layernames) == len(layerlist): Warning('Layernames must be the same length as layerlist!') Warning('Ommiting user defined layernames!') layernames = False # create empty list to add failed file names into failed = [] # iterate through every file in the input filelist for infile in filelist: # pull the filename and path apart path,name = os.path.split(infile) arcpy.env.workspace = path for i,layer in enumerate(layerlist): # specify the layer names. if layernames: layername = layernames[i] else: layername = str(layer).zfill(3) # use the input output directory if the user input one, otherwise build one if outdir: outname = os.path.join(outdir, "{0}_{1}.tif".format(name[:-4], layername)) else: outname = os.path.join(path, "{0}_{1}.tif".format(name[:-4], layername)) # perform the extracting and projection definition try: # extract the subdataset arcpy.ExtractSubDataset_management(infile, outname, str(layer)) # define the projection as the MODIS Sinusoidal define_projection(outname) print("Extracted {0}".format(os.path.basename(outname))) except: print("Failed to extract {0} from {1}".format(os.path.basename(outname), os.path.basename(infile))) failed.append(infile) print("Finished extracting all hdfs! \n") return failed
def extract_from_hdf(file_list, layer_list, layer_names = False, outdir = None): """ Extracts tifs from MODIS HDF files, ensures proper projection. :param file_list: either a list of '.hdf' files from which data should be extracted, or a directory containing '.hdf' files. :param layer_list: list of layer numbers to pull out as individual tifs should be integers such as [0,4] for the 0th and 4th layer respectively. :param layer_names: list of layer names to put more descriptive file suffixes to each layer :param outdir: directory to which tif files should be saved if outdir is left as 'None', files are saved in the same directory as the input file was found. :return output_filelist: returns a list of all files created by this function """ if outdir is not None: if not os.path.exists(outdir): os.makedirs(outdir) # enforce lists for iteration purposes and sanitize inputs file_list = core.enf_filelist(file_list) for filename in file_list: if '.xml' in filename or '.ovr' in filename or not '.hdf' in filename: file_list.remove(filename) layer_list = core.enf_list(layer_list) layer_names = core.enf_list(layer_names) # ignore user input layer_names if they are invalid, but print warnings if layer_names and not len(layer_names) == len(layer_list): Warning('layer_names must be the same length as layer_list!') Warning('Omitting user defined layer_names!') layer_names = False output_filelist = [] # iterate through every file in the input file_list for infile in file_list: # pull the filename and path apart path,name = os.path.split(infile) arcpy.env.workspace = path for i, layer in enumerate(layer_list): # specify the layer names. if layer_names: layername = layer_names[i] else: layername = str(layer).zfill(3) # use the input output directory if the user input one, otherwise build one if outdir: outname = os.path.join(outdir, "{0}_{1}.tif".format(name[:-4], layername)) else: outname = os.path.join(path, "{0}_{1}.tif".format(name[:-4], layername)) # perform the extracting and projection definition try: arcpy.ExtractSubDataset_management(infile, outname, str(layer)) define_projection(outname) output_filelist.append(outname) print("Extracted {0}".format(os.path.basename(outname))) except: print("Failed to extract {0} from {1}".format(os.path.basename(outname), os.path.basename(infile))) return output_filelist
def extract_from_hdf(filelist, layerlist, layernames=False, outdir=None): """ Extracts tifs from MODIS extract_HDF_layer files, ensures proper projection. inputs: filelist list of '.hdf' files from which data should be extracted (or a directory) layerlist list of layer numbers to pull out as individual tifs should be integers such as [0,4] for the 0th and 4th layer respectively. layernames list of layer names to put more descriptive file suffixes to each layer outdir directory to which tif files should be saved if outdir is left as 'False', files are saved in the same directory as the input file was found. """ if outdir is not None: if not os.path.exists(outdir): os.makedirs(outdir) # enforce lists for iteration purposes and sanitize inputs filelist = core.enf_filelist(filelist) for filename in filelist: if '.xml' in filename or not '.hdf' in filename: filelist.remove(filename) layerlist = core.enf_list(layerlist) layernames = core.enf_list(layernames) # ignore user input layernames if they are invalid, but print warnings if layernames and not len(layernames) == len(layerlist): Warning('Layernames must be the same length as layerlist!') Warning('Ommiting user defined layernames!') layernames = False # create empty list to add failed file names into failed = [] # iterate through every file in the input filelist for infile in filelist: # pull the filename and path apart path, name = os.path.split(infile) arcpy.env.workspace = path for i, layer in enumerate(layerlist): # specify the layer names. if layernames: layername = layernames[i] else: layername = str(layer).zfill(3) # use the input output directory if the user input one, otherwise build one if outdir: outname = os.path.join( outdir, "{0}_{1}.tif".format(name[:-4], layername)) else: outname = os.path.join( path, "{0}_{1}.tif".format(name[:-4], layername)) # perform the extracting and projection definition try: # extract the subdataset arcpy.ExtractSubDataset_management(infile, outname, str(layer)) # define the projection as the MODIS Sinusoidal define_projection(outname) print("Extracted {0}".format(os.path.basename(outname))) except: print("Failed to extract {0} from {1}".format( os.path.basename(outname), os.path.basename(infile))) failed.append(infile) print("Finished extracting all hdfs! \n") return failed
def extract_HDF_layers(filelist, layerlist, layernames = None, outdir = None): """ Function extracts tifs from HDFs. Use "Extract_MODIS_HDF" in the modis module for better handling of MODIS data with sinusoidal projections. inputs: filelist list of '.hdf' files from which data should be extracted layerlist list of layer numbers to pull out as individual tifs should be integers such as [0,4] for the 0th and 4th layer respectively. layernames list of layer names to put more descriptive names to each layer outdir directory to which tif files should be saved if outdir is left as 'False', files are saved in the same directory as the input file was found. """ # Set up initial arcpy modules, workspace, and parameters, and sanitize inputs. arcpy.env.overwriteOutput = True # enforce lists for iteration purposes filelist = core.enf_filelist(filelist) layerlist = core.enf_list(layerlist) layernames = core.enf_list(layernames) # ignore user input layernames if they are invalid, but print warnings if layernames and not len(layernames) == len(layerlist): print('layernames must be the same length as layerlist!') print('ommiting user defined layernames!') layernames=False # create empty list to add filenames into produced_files = [] # iterate through every file in the input filelist for infile in filelist: # pull the filename and path apart path,name = os.path.split(infile) arcpy.env.workspace = path for i in range(len(layerlist)): layer=layerlist[i] # specify the layer names. if layernames is not None: layername = layernames[i] else: layername = str(layer).zfill(3) # use the input output directory if the user input one, otherwise build one if outdir is not None: if not os.path.exists(os.path.join(outdir)): os.makedirs(outdir) else: outdir = os.path.dirname(infile) outname = core.create_outname(outdir, infile, layername, ext = "tif") # perform the extracting and projection definition try: # extract the subdataset arcpy.ExtractSubDataset_management(infile, outname, str(layer)) print('Extracted ' + outname) produced_files.append(outname) except: print('Failed to extract '+ outname + ' from ' + infile) return produced_files
def gap_fill_temporal(rasterlist, outdir = False): """ This function is designed to input a time sequence of rasters with partial voids and output a copy of each input image with every pixel equal to the last good value taken. This function will step forward in time through each raster and fill voids from the values of previous rasters. The resulting output image will contain all the data that was in the original image, with the voids filled with older data. A second output image will be generated where the pixel values are equal to the age of each pixel in the image. So if a void was filled with data thats 5 days old, the "age" raster will have a value of "5" at that location. Inputs: rasterlist a list of filepaths for rasters with which to fill gaps *the first item in this list will be the base raster **values will be filled based on the list's ordering (e.g. gap in raster 1 will first attempt to take the corresponding value from raster 2, then 3 if raster 2 also contains a gap there) outdir the path to the desired output folder *optional - left "False" by default **if left "False", the output tiff will be place in the same folder as the first input raster """ #enforce the list of rasters to ensure it's sanitized rasterlist = core.enf_filelist(rasterlist) #create an empty list to store output arrays in arr_list = [] #convert each raster in the input list to an array, and save its data to the list for i, raster in enumerate(rasterlist): item = rasterlist[i] item_arr = to_numpy(item) arr_list.append(item_arr[0].data) #convert the list to a numpy array arr_list = np.array(arr_list) #set the lists of ranges of values for each dimension of the output array xrange = range(0, np.shape(arr_list)[2]) yrange = range(0, np.shape(arr_list)[1]) zrange = range(0, np.shape(arr_list)[0]) #pull out the first array to be edited new_arr = arr_list[0] #loop through each x, y value #if the first array's value at each location is "Not A Number", #attempt to fill it with the corresponding value from the next array #if no array has a corresponding value, it will be left as a "nan" for i in yrange: for j in xrange: if np.isnan(new_arr[i,j]) == True: x = 1 while x <= zrange[-1]: if np.isnan(arr_list[x,i,j]) == False: new_arr[i,j] = arr_list[x,i,j] break x = x + 1 #separate the filename from the first input array inname = os.path.splitext(rasterlist[0])[0] #create an output name if outdir: outdir = os.path.abspath(outdir) name = "{0}_gapfilled.tif".format(os.path.split(inname)[1]) outname = os.path.join(outdir, name) else: outname = "{0}_gapfilled.tif".format(inname) #convert the edited array to a tiff from_numpy(new_arr, item_arr[1], outname, "NoData") return outname