def fetch_Landsat8_scene_list(): """ Simple downloads and extracts the most recent version of the scene_list text file for reference http://landsat-pds.s3.amazonaws.com/scene_list.gz :return scene_list_text_data: returns a text data object with all the data on scene inventory on amazon WS. """ print("Updating scene list") # define save path for new scene list directory = site.getsitepackages()[1] gz_path = "{0}/dnppy/landsat/metadata/scene_list.gz".format(directory) txt_path = "{0}/dnppy/landsat/metadata/scene_list.txt".format(directory) # download then extract the gz file to a txt file. download_url("http://landsat-pds.s3.amazonaws.com/scene_list.gz", gz_path) with gzip.open(gz_path, 'rb') as gz: content = gz.read() with open(txt_path, 'wb+') as f: f.writelines(content) # build a new text data object from the fresh scene list scene_list_text_data = textio.text_data() scene_list_text_data.read_csv(txt_path, delim=",", has_headers=True) return scene_list_text_data
def fetch_Landsat8_scene_list(): """ Simple downloads and extracts the most recent version of the scene_list text file for reference http://landsat-pds.s3.amazonaws.com/scene_list.gz :return scene_list_text_data: returns a text data object with all the data on scene inventory on amazon WS. """ print("Updating scene list") # define save path for new scene list directory = site.getsitepackages()[1] gz_path = "{0}/dnppy/landsat/metadata/scene_list.gz".format(directory) txt_path = "{0}/dnppy/landsat/metadata/scene_list.txt".format(directory) # download then extract the gz file to a txt file. download_url("http://landsat-pds.s3.amazonaws.com/scene_list.gz", gz_path) with gzip.open(gz_path,'rb') as gz: content = gz.read() with open(txt_path, 'wb+') as f: f.writelines(content) # build a new text data object from the fresh scene list scene_list_text_data = textio.text_data() scene_list_text_data.read_csv(txt_path, delim = ",", has_headers = True) return scene_list_text_data
def fetch_MPE(start_dto, end_dto, outdir, area=None): """ Fetches Multisensor Precipitation Estimates data from weather/noaa server at: [http://water.weather.gov/precip/p_download_new/] Inputs: start_dto datetime object for start date of desired range end_dto datetime object for end date of desired range outdir output directory where files should be saved (str) area area of interest, either "conus", "ak" or "pr" for continental us, alaska, or puerto rico respectively """ # set defaults if area is None: area = "conus" server = "http://water.weather.gov" # use start and end datetimes to build list of dates dates = [] output_files = [] date_delta = end_dto - start_dto for i in range(date_delta.days + 1): dates.append(start_dto + timedelta(days=i)) # try to download all files for dates for date in dates: workdir = "/".join([ server, "precip", "p_download_new", str(date.year), str(date.month).zfill(2), str(date.day).zfill(2) ]) filename = "nws_precip_{0}_{1}{2}{3}.nc".format( area, str(date.year), str(date.month).zfill(2), str(date.day).zfill(2)) try: full_url = "/".join([workdir, filename]) outname = os.path.join(outdir, filename) download_url(full_url, outname) output_files.append(outname) print("Downloaded '{0}'".format(filename)) except: print("Could not find MPE data for '{0}' on {0}".format( area, date)) return output_files
def fetch_Landsat8_tile(amazon_url, tilename, outdir, bands=None): """ This function makes use of the amazon web service hosted Landsat 8 OLI data. It recieves an amazon web url for a single landsat tile, and downloads the desired files :param amazon_url: url to amazons page hosting these landsat tiles :param tilename: landsat tile name :param outdir: output directory to place landsat data :param bands: list of bands to download when not all are desired, options include any of [1,2,3,4,5,6,7,8,9,10,11,"QA"]. The MTL file is ALWAYS downloaded. :return tilepath: returns a filepath to the new landsat tile folder with .TIFs in it """ if bands is None: bands = map(str, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, "QA"]) else: bands = map(str, (core.enf_list(bands))) # create the scene name from the input parameters and use that to generate the scene's unique url connection = urllib.urlopen(amazon_url) page = connection.read().split("\n") print("Downloading landsat tile {0}".format(tilename)) for line in page: if "<li><a href=" in line: # pull filename from html code filename = line.split('"')[1] # pull out band information band_id = filename.replace(tilename + "_", "").split(".")[0].replace("B", "") good_band = band_id in bands mtl_file = "MTL" in band_id # download desired files. if good_band or mtl_file: link = amazon_url.replace("index.html", filename) savename = os.path.join(outdir, tilename, filename) # try twice if filepath doesn't already exist if not os.path.isfile(savename): try: download_url(link, savename) except: download_url(link, savename) print("\tDownloaded {0}".format(filename)) else: print("\t Found {0}".format(filename)) return os.path.join(outdir, tilename)
def fetch_MPE(start_dto, end_dto, outdir, area = None): """ Fetches Multisensor Precipitation Estimates data from weather/noaa server at [http://water.weather.gov/precip/p_download_new/] :param start_dto: datetime object for start date of desired range :param end_dto: datetime object for end date of desired range :param outdir: output directory where files should be saved (str) :param area: area of interest, either "conus", "ak" or "pr" for continental us, alaska, or Puerto Rico respectively :return output_files: list of output files fetched by this function """ # set defaults if area is None: area = "conus" server = "http://water.weather.gov" # use start and end datetimes to build list of dates dates = [] output_files = [] date_delta = end_dto - start_dto for i in range(date_delta.days +1): dates.append(start_dto + timedelta(days = i)) # try to download all files for dates for date in dates: workdir = "/".join([server, "precip","p_download_new", str(date.year), str(date.month).zfill(2), str(date.day).zfill(2)]) filename = "nws_precip_{0}_{1}{2}{3}.nc".format(area, str(date.year), str(date.month).zfill(2), str(date.day).zfill(2)) try: full_url = "/".join([workdir, filename]) outname = os.path.join(outdir, filename) download_url(full_url, outname) output_files.append(outname) print("Downloaded '{0}'".format(filename)) except: print("Could not find MPE data for '{0}' on {0}".format(area, date)) return output_files
def fetch_Landsat8_tile(amazon_url, tilename, outdir, bands = None): """ This function makes use of the amazon web service hosted Landsat 8 OLI data. It recieves an amazon web url for a single landsat tile, and downloads the desired files :param amazon_url: url to amazons page hosting these landsat tiles :param tilename: landsat tile name :param outdir: output directory to place landsat data :param bands: list of bands to download when not all are desired, options include any of [1,2,3,4,5,6,7,8,9,10,11,"QA"]. The MTL file is ALWAYS downloaded. :return tilepath: returns a filepath to the new landsat tile folder with .TIFs in it """ if bands is None: bands = map(str, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, "QA"]) else: bands = map(str, (core.enf_list(bands))) # create the scene name from the input parameters and use that to generate the scene's unique url connection = urllib.urlopen(amazon_url) page = connection.read().split("\n") print("Downloading landsat tile {0}".format(tilename)) for line in page: if "<li><a href=" in line: # pull filename from html code filename = line.split('"')[1] # pull out band information band_id = filename.replace(tilename + "_","").split(".")[0].replace("B","") good_band = band_id in bands mtl_file = "MTL" in band_id # download desired files. if good_band or mtl_file: link = amazon_url.replace("index.html",filename) savename = os.path.join(outdir, tilename, filename) # try twice if filepath doesn't already exist if not os.path.isfile(savename): try: download_url(link, savename) except: download_url(link, savename) print("\tDownloaded {0}".format(filename)) else: print("\t Found {0}".format(filename)) return os.path.join(outdir, tilename)
def fetch_Landsat8_tile(amazon_url, tilename, outdir, bands=None): """ This function makes use of the amazon web service hosted Landsat 8 OLI data. It recieves an amazon web url for a single landsat tile, and downloads the desired files defaults to downlod all bands, but users can call bands = [1,2,3,4,5,6,7,8,9,10,11,"QA"] to control which files are downloaded. The MTL file is ALWAYS downloaded. """ if bands is None: bands = map(str, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, "QA"]) else: bands = map(str, (core.enf_list(bands))) # create the scene name from the input parameters and use that to generate the scene's unique url connection = urllib.urlopen(amazon_url) page = connection.read().split("\n") print("Downloading landsat tile {0}".format(tilename)) for line in page: if "<li><a href=" in line: # pull filename from html code filename = line.split('"')[1] # pull out band information band_id = filename.replace(tilename + "_", "").split(".")[0].replace("B", "") good_band = band_id in bands mtl_file = "MTL" in band_id # download desired files. if good_band or mtl_file: link = amazon_url.replace("index.html", filename) savename = os.path.join(outdir, tilename, filename) download_url(link, savename) print("\tDownloaded {0}".format(filename)) return
def fetch_Landsat8_tile(amazon_url, tilename, outdir, bands = None): """ This function makes use of the amazon web service hosted Landsat 8 OLI data. It recieves an amazon web url for a single landsat tile, and downloads the desired files defaults to downlod all bands, but users can call bands = [1,2,3,4,5,6,7,8,9,10,11,"QA"] to control which files are downloaded. The MTL file is ALWAYS downloaded. """ if bands is None: bands = map(str,[1,2,3,4,5,6,7,8,9,10,11,"QA"]) else: bands = map(str,(core.enf_list(bands))) # create the scene name from the input parameters and use that to generate the scene's unique url connection = urllib.urlopen(amazon_url) page = connection.read().split("\n") print("Downloading landsat tile {0}".format(tilename)) for line in page: if "<li><a href=" in line: # pull filename from html code filename = line.split('"')[1] # pull out band information band_id = filename.replace(tilename + "_","").split(".")[0].replace("B","") good_band = band_id in bands mtl_file = "MTL" in band_id # download desired files. if good_band or mtl_file: link = amazon_url.replace("index.html",filename) savename = os.path.join(outdir, tilename, filename) download_url(link, savename) print("\tDownloaded {0}".format(filename)) return
def Landsat_WELD(product, tiles, years, outdir): """ Fetch WELD data from the server at [http://e4ftl01.cr.usgs.gov/WELD] Weld data is corrected and processed Landsat 5 and 7 data that is distributed in the MODIS sinusoidal projection and grid format. Read more about WELD data. https://landsat.usgs.gov/WELD.php http://globalmonitoring.sdstate.edu/projects/weldglobal/ Inputs: product WELD product to download such as 'USWK','USMO','USYR' tiles list of tiles to grab such as ['h11v12','h11v11'] years list of years to grab such as range(2001,2014) outdir output directory to save downloaded files """ # check formats global dates tiles = core.enf_list(tiles) years = core.enf_list(years) years = [str(year) for year in years] # create output directories for tile in tiles: if not os.path.exists(os.path.join(outdir,tile)): os.makedirs(os.path.join(outdir,tile)) print '{Fetch_Landsat_WELD} Connecting to servers!' # Map the contents of the directory site= 'http://e4ftl01.cr.usgs.gov/WELD/WELD'+product+'.001' try: dates = list_http(site) except: print '{Fetch_Landsat_WELD} Could not connect to site! check inputs!' # find just the folders within the desired year range. good_dates=[] for date in dates: try: y, m, d = date.split(".") if y in years: good_dates.append(date) except: pass print 'Found ' + str(len(good_dates)) + ' days within year range' # for all folders within the desired date range, map the subfolder contents. for good_date in good_dates: files = list_http(site+'/'+good_date) for afile in files: # only list files with desired tilenames and not preview jpgs if not '.jpg' in afile: for tile in tiles: if tile in afile: # assemble the address address = '/'.join([site,good_date,afile]) print '{Fetch_Landsat_WELD} Downloading' + address #download the file. outname = os.path.join(outdir,tile,afile) download_url(address, outname) return
def download_urls(url_list, outdir, file_types=None): """ Downloads a list of files. Retries failed downloads This script downloads a list of files and places it in the output directory. It was built to be nested within "Download_filelist" to allow loops to continuously retry failed files until they are successful or a retry limit is reached. :param url_list: array of urls, probably as read from a text file :param file_types: list of file types to download. Useful for excluding extraneous metadata by only downloading 'hdf' or 'tif' for example. Please note that often times, you actually NEED the metadata. :param outdir: folder where files are to be placed after download :return failed: list of files which failed download """ failed = [] url_list = core.enf_list(url_list) # creates output folder at desired path if it doesn't already exist if not os.path.exists(outdir): os.makedirs(outdir) # establish a wait time that will increase when downloads fail. This helps to reduce # the frequency of REVERB server rejections for requesting too many downloads wait = 0 for site in url_list: download = False url = site.rstrip() sub = url.split("/") leng = len(sub) name = sub[leng - 1] # Determine whether or not to download the file based on filetype. if file_types is not None: for filetype in file_types: if filetype in name[-4:]: download = True else: download = True # attempt download of the file, or skip it. if download: try: # wait for the wait time before attempting writing a file time.sleep(wait) download_url(url, os.path.join(outdir, name)) print("{0} is downloaded {1}".format(name, wait)) # reduce the wait time when downloads succeed. if wait >= 1: wait -= wait # add to the fail count if the download is unsuccessful and wait longer next time. except: print("{0} will be retried! {1}".format(sub[leng - 1], wait)) wait += 5 failed.append(url) print("Finished downloading urls!") return failed
def fetch_GPM_IMERG(start_dto, end_dto, outdir, product = "gis", time_res = "1day"): """ Fetches 30 minute resolution GPM IMERG data from an ftp server. Several restrictions exist for this relatively new dataset, please read in the input section carefully. http://pps.gsfc.nasa.gov/Documents/GPM_Data_Info_140616.pdf Input: start_dto datetime object for starting time of study boundary end_dto datetiem object for ending time of study boundary outdir output directory to save the data product either "early" , "late" or "final" for full HDF5 data stacks of the respective runs, which are all at 30minute resolutions. OR product can be set equal to "gis" (default) to find only tif averages of the precipitation estimates. This gis tif data is only provided for data less than one year old. time_res if "product" is set to "gis", specify what time average period you want. options are "30min", "3hr", "1day", "3day", "7day". Defaults to "1day" learn more at the link below [http://pmm.nasa.gov/data-access/downloads/gpm] """ # set up empty list of downloaded filepaths on local dir download_list = [] # username and password info, should eventually be some DEVELOP credential. # this information is not at all sensitive. login = "******" # special filtering for gis type tif data to minimize data representation overlap. if product == "gis": if time_res == "30min": ok_minutes = [str(x).zfill(4) for x in range(0, 1440, 30)] elif time_res == "3hr": ok_minutes = [str(x).zfill(4) for x in range(0, 1440, 180)] else: ok_minutes = "0000" # assemble address information pps_server = r"ftp://jsimpson.pps.eosdis.nasa.gov" # set product directory prod_server = "/".join(["NRTPUB/imerg", product]) # log in and list available month folders. foldnames, foldpaths = list_ftp(site = pps_server, dir = prod_server, username = login, password = login) # perform a simple quick filtering of folders that definitely don't have data we want. for foldname in foldnames: try: int(foldname) except: foldnames.remove(foldname) for foldname in foldnames: print("exploring directory '{0}'".format(foldname)) subdir = "/".join([prod_server, foldname]) filenames, filepaths = list_ftp(site = pps_server, dir = subdir, username = login, password = login) for filepath in filepaths: filename = os.path.basename(filepath) finfo = filename.split(".") prod = finfo[3] date_cords = finfo[4] minutes = finfo[5] time = finfo[7] date_str = date_cords.split("-")[0] date = datetime.strptime(date_str, "%Y%m%d") + timedelta(minutes = int(minutes)) # see if this file meets criteria for download good_date = start_dto <= date <= end_dto if product == "gis": good_minutes = minutes in ok_minutes good_time = time_res == time else: good_minutes = True good_time = True # download the files if good_date and good_time and good_minutes: outname = os.path.join(outdir, date.strftime("%Y-%m-%d"), filename) download_url(filepath, outname, username = login, password = login) print("saved '{0}' in '{1}'".format(filename, outdir)) download_list.append(outname) return download_list
def fetch_SRTM(ll_lat, ll_lon, ur_lat, ur_lon, product, outdir=None, mosaic=None): """ downloads data from the Shuttle Radar Topography Mission (SRTM) [http://e4ftl01.cr.usgs.gov/SRTM/] This data can be used to create DEMs of a variety of resolutions. :param ll_lat: latitude of lower left corner :param ll_lon: longitude of lower left corner :param ur_lat: latitude of upper right corner :param ur_lon: longitude of upper right corner :param product: short name of product you want. See http://e4ftl01.cr.usgs.gov/SRTM/ . do not include the version number. Example: "SRTMGL1". Note that version "002" data of .DEM format does not support mosaicing. :param outdir: local directory to save downloaded files :param mosaic: Set to TRUE to mosaic all downloaded DEM tiles as "SRTM_mosaic.tif" :return tile_list: a list of all successfully downloaded tif filepaths for further manipulation NOTE: arcmap will open the output hgt files ONLY if they are not renamed. turns out arcmap does some funky things when interpreting these files. """ # build empty return list tile_list = [] # build list of lat/lon pairs from input corners lat_lon_pairs = [] for i in range(int(ll_lat), int(ur_lat + 1)): for j in range(int(ll_lon), int(ur_lon + 1)): lat_lon_pairs.append((i, j)) print lat_lon_pairs # determine product version if product is "SRTMGL30": print( "Download of product SRTMGL30 is supported, but arcmap does not support this filetype" ) format_string = "{2}{3}{0}{1}.{4}.dem.zip" version = "002" mosaic = None else: format_string = "{0}{1}{2}{3}.{4}.hgt.zip" version = "003" host = "http://e4ftl01.cr.usgs.gov/SRTM" subhost = "{0}/{1}.{2}/2000.02.11/".format(host, product, version) print("Connecting to host at {0}".format(subhost)) for lat_lon_pair in lat_lon_pairs: lat, lon = lat_lon_pair # set North-south, East-West convention. if lat >= 0: NS = "N" else: NS = "S" if lon >= 0: EW = "E" else: EW = "W" if product is "SRTMGL30": if abs(lon) <= 20: lon = 20 elif abs(lon) <= 60: lon = 60 elif abs(lon) <= 100: lon = 100 else: lon = 140 if abs(lat) <= 10: lat = 10 elif abs(lat) <= 40: lat = 40 else: lat = 90 NS = NS.lower() EW = EW.lower() # build up the filename and file link filename = format_string.format(NS, str(abs(lat)).zfill(2), EW, str(abs(lon)).zfill(3), product) filelink = "{0}/{1}".format(subhost, filename) # decide where to put the file, then download it if outdir is not None: outpath = os.path.join(outdir, filename) else: outpath = filename print("Downloading and extracting {0}".format(filename)) download_url(filelink, outpath) # unzip the file and reassemble descriptive name with zipfile.ZipFile(outpath, "r") as z: if version == "003": itemname = "{0}{1}{2}{3}.hgt".format(NS, str(abs(lat)).zfill(2), EW, str(abs(lon)).zfill(3)) elif version == "002": itemname = "{0}{1}{2}{3}.DEM".format(EW.upper(), str(abs(lon)).zfill(3), NS.upper(), str(abs(lat)).zfill(2)) z.extract(itemname, outdir) z.close() # clean up and add this file to output list os.remove(outpath) tile_list.append(os.path.join(outdir, itemname)) print("Finished download and extraction of SRTM data") if mosaic is True: # use gdal to mosaic these raster together out_mosaic = os.path.join(outdir, "SRTM_mosaic.tif") core.run_command("gdalwarp", tile_list, out_mosaic) return out_mosaic else: return tile_list
def fetch_TRMM(start_dto, end_dto, outdir, product_string): """ Fetches TRMM data from an FTP server. ftp://trmmopen.gsfc.nasa.gov/trmmdata/ByDate/V07/ :param start_dto: datetime object for start date of desired range :param end_dto: datetime object for end date of desired range :param outdir: output directory where files should be saved (str) :param product_string: the string for the desired product, options include 1B11, 1B21, 1CTMI, 2A12, 2A21, 2A23, 2A25, 2B31, 3B42, 3G25, 3G31. The usual precip product of interest is the well known 3B42 data product. :param output_files: a list of new filepaths created by this function """ # set up empty structure dates = [] output_files = [] ftpsite = "ftp://pps.gsfc.nasa.gov" un = "*****@*****.**" date_delta = end_dto - start_dto for i in range(date_delta.days + 1): dates.append(start_dto + timedelta(days=i)) for date in dates: # navigate to path of desired year/month/day workdir = '/'.join([ 'trmmdata', 'ByDate', 'V07', str(date.year), str(date.month).zfill(2), str(date.day).zfill(2) ]) filenames, filepaths = list_ftp(site=ftpsite, dir=workdir, username=un, password=un) for filename in filenames: if product_string in filename: try: outname = os.path.join(outdir, os.path.basename(filename)) download_url(ftpsite + filename, outname, username=un, password=un) output_files.append(outname) # now extract it out of its GZ format with gzip.open(outname, 'rb') as gz: with open(outname.replace(".gz", ""), 'wb') as f: content = gz.read() f.write(content) os.remove(outname) print("downloaded and extracted {0}".format( os.path.basename(filename))) except: print("failed to download {0}".format( os.path.basename(filename))) print("Finished downloading TRMM files!") return output_files
def fetch_Landsat_WELD(product, tiles, years, outdir): """ Fetch WELD data from the server at [http://e4ftl01.cr.usgs.gov/WELD]. Weld data is corrected and processed Landsat 5 and 7 data that is distributed in the MODIS sinusoidal projection and grid format. Read more about WELD data. https://landsat.usgs.gov/WELD.php http://globalmonitoring.sdstate.edu/projects/weldglobal/ :param product: WELD product to download such as 'USWK','USMO','USYR' :param tiles: list of tiles to grab such as ['h11v12','h11v11'] :param years: list of years to grab such as range(2001,2014) :param outdir: output directory to save downloaded files :return output_filelist: A list of full filepaths to files fetched be this function """ output_filelist = [] # check formats global dates tiles = core.enf_list(tiles) years = core.enf_list(years) years = [str(year) for year in years] # create output directories for tile in tiles: if not os.path.exists(os.path.join(outdir, tile)): os.makedirs(os.path.join(outdir, tile)) print('Connecting to servers!') # Map the contents of the directory site = 'https://e4ftl01.cr.usgs.gov/WELD/WELD' + product + '.001' try: dates = list_http_e4ftl01(site) except: print('Could not connect to site! check inputs!') # find just the folders within the desired year range. good_dates = [] for date in dates: try: y, m, d = date.split(".") if y in years: good_dates.append(date) except: pass print("Found {0} days within year range".format(len(good_dates))) # for all folders within the desired date range, map the subfolder contents. for good_date in good_dates: files = list_http_e4ftl01(site + '/' + good_date) for afile in files: # only list files with desired tilenames and not preview jpgs if not '.jpg' in afile: for tile in tiles: if tile in afile: # assemble the address address = '/'.join([site, good_date, afile]) print("Downloading {0}".format(address)) #download the file. outname = os.path.join(outdir, tile, afile) output_filelist.append(outname) download_url(address, outname) return
dir_ = './images' if not os.path.exists(dir_): os.makedirs(dir_) i = 1 writer = imageio.get_writer('movie.mp4', fps=30) for img in cleaned_up_img_history: png_ext_file = f'f{i}.png' png_path = os.path.join(dir_, png_ext_file) if not os.path.exists(png_path): print(f'[{i}] downloading...', end='', flush=True) path = download_url(img['url'], filebasename=f'f{i}', folder=dir_) print(f'done') if path.endswith('.svg'): print(f'[{i}] processing (svg2png)...', end='', flush=True) svg2png(path, png_path, desired_width=OUT_WIDTH, desired_height=OUT_HEIGHT) print('done') print(f'[{i}] adding image to writer... ', end='', flush=True) writer.append_data(imageio.imread(png_path)) print('done') elif path.endswith('.png'): print(f'[{i}] adding image to writer... ', end='', flush=True)
def fetch_MODIS(product, version, tiles, outdir, start_dto, end_dto, force_overwrite = False): """ Fetch MODIS Land products from one of two servers. If this function runs and downloads 0 files, check that your inputs are consistent with the naming convention at the appropriate server address. http://e4ftl01.cr.usgs.gov ftp://n5eil01u.ecs.nsidc.org :param product: MODIS product to download such as 'MOD10A1' or 'MYD11A1' :param version: version number, usually '004' or '041' or '005' :param tiles: list of tiles to grab such as ['h11v12','h11v11'] NOTE: for some MODIS products, the h and v are omitted. :param outdir : output directory to save downloaded files :param start_dto: datetime object, the starting date of the range of data to download :param end_dto: datetime object, the ending date of the range of data to download :param force_overwrite: will re-download files even if they already exist :return out_filepaths: a list of filepaths to all files created by this function """ out_filepaths = [] # check formats tiles = core.enf_list(tiles) # create output directories if not os.path.exists(outdir): os.makedirs(outdir) print("Connecting to servers!") # obtain the web address, protocol information, and subdirectory where # this tpe of MODIS data can be found. site, isftp, Dir = _find_modis_product(product, version) if Dir: print("Connected to {0}/{1}".format(site, Dir)) else: print("Connected to {0}".format(site)) # Depending on the type of connection (ftp vs http) populate the file list try: if isftp: dates,_ = list_ftp(site, False, False, Dir) else: dates = list_http_e4ftl01(site) except: raise ValueError("Could not connect to {0}/{1}".format(site,Dir)) # refine contents down to just addresses of valid year and j_day good_dates = [] for date in dates: try: date_dto = datetime.strptime(date, "%Y.%m.%d") if start_dto <= date_dto <= end_dto: good_dates.append(date) except: print("skipping non date folder name {0}".format(date)) print('Found {0} days within range'.format(len(good_dates))) # for all folders within the desired date range, map the subfolder contents. for good_date in good_dates: if isftp: files,_ = list_ftp(site, False, False, Dir + '/' + good_date) else: files = list_http_e4ftl01(site + '/' + good_date) for afile in files: # only list files with desired tile names and not preview jpgs if not '.jpg' in afile: for tile in tiles: if tile in afile: # assemble the address if isftp: address='/'.join(['ftp://'+site, Dir, good_date, afile]) else: address='/'.join([site, good_date, afile]) #download the file outname = os.path.join(outdir, afile) out_filepaths.append(outname) if not os.path.isfile(outname) and not force_overwrite: download_url(address, outname) print('Downloaded {0}'.format(address)) print("Finished retrieving MODIS - {0} data!".format(product)) print("Downloaded {0} files".format(len(out_filepaths))) return out_filepaths
def fetch_MODIS(product, version, tiles, outdir, start_dto, end_dto, force_overwrite=False): """ Fetch MODIS Land products from one of two servers. If this function runs and downloads 0 files, check that your inputs are consistent with the naming convention at the appropriate server address. http://e4ftl01.cr.usgs.gov ftp://n5eil01u.ecs.nsidc.org :param product: MODIS product to download such as 'MOD10A1' or 'MYD11A1' :param version: version number, usually '004' or '041' or '005' :param tiles: list of tiles to grab such as ['h11v12','h11v11'] NOTE: for some MODIS products, the h and v are omitted. :param outdir : output directory to save downloaded files :param start_dto: datetime object, the starting date of the range of data to download :param end_dto: datetime object, the ending date of the range of data to download :param force_overwrite: will re-download files even if they already exist :return out_filepaths: a list of filepaths to all files created by this function """ out_filepaths = [] # check formats tiles = core.enf_list(tiles) # create output directories if not os.path.exists(outdir): os.makedirs(outdir) print("Connecting to servers!") # obtain the web address, protocol information, and subdirectory where # this tpe of MODIS data can be found. site, isftp, Dir = _find_modis_product(product, version) if Dir: print("Connected to {0}/{1}".format(site, Dir)) else: print("Connected to {0}".format(site)) # Depending on the type of connection (ftp vs http) populate the file list try: if isftp: dates, _ = list_ftp(site, False, False, Dir) else: dates = list_http_e4ftl01(site) except: raise ValueError("Could not connect to {0}/{1}".format(site, Dir)) # refine contents down to just addresses of valid year and j_day good_dates = [] for date in dates: try: date_dto = datetime.strptime(date, "%Y.%m.%d") if start_dto <= date_dto <= end_dto: good_dates.append(date) except: print("skipping non date folder name {0}".format(date)) print('Found {0} days within range'.format(len(good_dates))) # for all folders within the desired date range, map the subfolder contents. for good_date in good_dates: if isftp: files, _ = list_ftp(site, False, False, Dir + '/' + good_date) else: files = list_http_e4ftl01(site + '/' + good_date) for afile in files: # only list files with desired tile names and not preview jpgs if not '.jpg' in afile: for tile in tiles: if tile in afile: # assemble the address if isftp: address = '/'.join( ['ftp://' + site, Dir, good_date, afile]) else: address = '/'.join([site, good_date, afile]) #download the file outname = os.path.join(outdir, afile) out_filepaths.append(outname) if not os.path.isfile(outname) and not force_overwrite: download_url(address, outname) print('Downloaded {0}'.format(address)) print("Finished retrieving MODIS - {0} data!".format(product)) print("Downloaded {0} files".format(len(out_filepaths))) return out_filepaths
def fetch_GPM_IMERG(start_dto, end_dto, outdir, product="gis", time_res="1day"): """ Fetches 30 minute resolution GPM IMERG data from an ftp server. Several restrictions exist for this relatively new dataset, please read in the input section carefully. http://pps.gsfc.nasa.gov/Documents/GPM_Data_Info_140616.pdf :param start_dto: datetime object for starting time of study boundary :param end_dto: datetime object for ending time of study boundary :param outdir: output directory to save the data :param product: either "early" , "late" or "final" for full HDF5 data stacks of the respective runs, which are all at 30minute resolutions. OR product can be set equal to "gis" (default) to find only tif averages of the precipitation estimates. This gis tif data is ONLY provided for data less than one year old. :param time_res: if "product" is set to "gis", specify what time average period you want. options are "30min", "3hr", "1day", "3day", "7day". Defaults to "1day" :return: Returns a list of filepaths to freshly downloaded files learn more at [http://pmm.nasa.gov/data-access/downloads/gpm] """ # set up empty list of downloaded filepaths on local dir download_list = [] # username and password info, should eventually be some DEVELOP credential. # this information is not at all sensitive. login = "******" # special filtering for gis type tif data to minimize data representation overlap. if product == "gis": if time_res == "30min": ok_minutes = [str(x).zfill(4) for x in range(0, 1440, 30)] elif time_res == "3hr": ok_minutes = [str(x).zfill(4) for x in range(0, 1440, 180)] else: ok_minutes = "0000" # assemble address information pps_server = r"ftp://jsimpson.pps.eosdis.nasa.gov" # set product directory prod_server = "/".join(["NRTPUB/imerg", product]) # log in and list available month folders. foldnames, foldpaths = list_ftp(site=pps_server, dir=prod_server, username=login, password=login) # perform a simple quick filtering of folders that definitely don't have data we want. for foldname in foldnames: try: int(foldname) except: foldnames.remove(foldname) for foldname in foldnames: print("exploring directory '{0}'".format(foldname)) subdir = "/".join([prod_server, foldname]) filenames, filepaths = list_ftp(site=pps_server, dir=subdir, username=login, password=login) for filepath in filepaths: filename = os.path.basename(filepath) finfo = filename.split(".") prod = finfo[3] date_cords = finfo[4] minutes = finfo[5] time = finfo[7] date_str = date_cords.split("-")[0] date = datetime.strptime( date_str, "%Y%m%d") + timedelta(minutes=int(minutes)) # see if this file meets criteria for download good_date = start_dto <= date <= end_dto if product == "gis": good_minutes = minutes in ok_minutes good_time = time_res == time else: good_minutes = True good_time = True # download the files if good_date and good_time and good_minutes: outname = os.path.join(outdir, date.strftime("%Y-%m-%d"), filename) download_url(filepath, outname, username=login, password=login) print("saved '{0}' in '{1}'".format(filename, outdir)) download_list.append(outname) return download_list
def download_urls(url_list, outdir, filetypes = False): """ Downloads a list of files. Retries failed downloads This script downloads a list of files and places it in the output directory. It was built to be nested within "Download_filelist" to allow loops to continuously retry failed files until they are successful or a retry limit is reached. Inputs: url_list array of urls, probably as read from a text file filetypes list of filetypes to download. Useful for excluding extraneous metadata by only downloding 'hdf' or 'tif' for example. Please note that often times, you actually NEED the metadata. outdir folder where files are to be placed after download Output: failed list of files which failed download """ failed = [] url_list = core.enf_list(url_list) # creates output folder at desired path if it doesn't already exist if not os.path.exists(outdir): os.makedirs(outdir) # establish a wait time that will increase when downloads fail. This helps to reduce # the frequency of REVERB server rejections for requesting too many downloads wait = 0 for site in url_list: download = False url = site.rstrip() sub = url.split("/") leng = len(sub) name = sub[leng-1] # Determine whether or not to download the file based on filetype. if filetypes: for filetype in filetypes: if filetype in name[-4:]: download = True else: download = True # attempt download of the file, or skip it. if download: try: # wait for the wait time before attempting writing a file time.sleep(wait) download_url(url, os.path.join(outdir,name)) print("{0} is downloaded {1}".format(name, wait)) # reduce the wait time when downloads succeed. if wait >= 1: wait -= wait # add to the fail count if the download is unsuccessful and wait longer next time. except: print("{0} will be retried! {1}".format(sub[leng-1], wait)) wait += 5 failed.append(url) print("Finished downloading urls!") return failed
def fetch_SRTM(ll_lat, ll_lon, ur_lat, ur_lon, product, outdir=None, mosaic=None): """ downloads data from the Shuttle Radar Topography Mission (SRTM) [http://e4ftl01.cr.usgs.gov/SRTM/] This data can be used to create DEMs of a variety of resolutions. :param ll_lat: latitude of lower left corner :param ll_lon: longitude of lower left corner :param ur_lat: latitude of upper right corner :param ur_lon: longitude of upper right corner :param product: short name of product you want. See http://e4ftl01.cr.usgs.gov/SRTM/ . do not include the version number. Example: "SRTMGL1". Note that version "002" data of .DEM format does not support mosaicing. :param outdir: local directory to save downloaded files :param mosaic: Set to TRUE to mosaic all downloaded DEM tiles as "SRTM_mosaic.tif" :return tile_list: a list of all successfully downloaded tif filepaths for further manipulation NOTE: arcmap will open the output hgt files ONLY if they are not renamed. turns out arcmap does some funky things when interpreting these files. """ # build empty return list tile_list = [] # build list of lat/lon pairs from input corners lat_lon_pairs = [] for i in range(int(ll_lat), int(ur_lat + 1)): for j in range(int(ll_lon), int(ur_lon + 1)): lat_lon_pairs.append((i, j)) print lat_lon_pairs # determine product version if product is "SRTMGL30": print ("Download of product SRTMGL30 is supported, but arcmap does not support this filetype") format_string = "{2}{3}{0}{1}.{4}.dem.zip" version = "002" mosaic = None else: format_string = "{0}{1}{2}{3}.{4}.hgt.zip" version = "003" host = "http://e4ftl01.cr.usgs.gov/SRTM" subhost = "{0}/{1}.{2}/2000.02.11/".format(host, product, version) print ("Connecting to host at {0}".format(subhost)) for lat_lon_pair in lat_lon_pairs: lat, lon = lat_lon_pair # set North-south, East-West convention. if lat >= 0: NS = "N" else: NS = "S" if lon >= 0: EW = "E" else: EW = "W" if product is "SRTMGL30": if abs(lon) <= 20: lon = 20 elif abs(lon) <= 60: lon = 60 elif abs(lon) <= 100: lon = 100 else: lon = 140 if abs(lat) <= 10: lat = 10 elif abs(lat) <= 40: lat = 40 else: lat = 90 NS = NS.lower() EW = EW.lower() # build up the filename and file link filename = format_string.format(NS, str(abs(lat)).zfill(2), EW, str(abs(lon)).zfill(3), product) filelink = "{0}/{1}".format(subhost, filename) # decide where to put the file, then download it if outdir is not None: outpath = os.path.join(outdir, filename) else: outpath = filename print ("Downloading and extracting {0}".format(filename)) download_url(filelink, outpath) # unzip the file and reassemble descriptive name with zipfile.ZipFile(outpath, "r") as z: if version == "003": itemname = "{0}{1}{2}{3}.hgt".format(NS, str(abs(lat)).zfill(2), EW, str(abs(lon)).zfill(3)) elif version == "002": itemname = "{0}{1}{2}{3}.DEM".format( EW.upper(), str(abs(lon)).zfill(3), NS.upper(), str(abs(lat)).zfill(2) ) z.extract(itemname, outdir) z.close() # clean up and add this file to output list os.remove(outpath) tile_list.append(os.path.join(outdir, itemname)) print ("Finished download and extraction of SRTM data") if mosaic is True: # use gdal to mosaic these raster together out_mosaic = os.path.join(outdir, "SRTM_mosaic.tif") core.run_command("gdalwarp", tile_list, out_mosaic) return out_mosaic else: return tile_list
def fetch_MODIS(product, version, tiles, outdir, years, j_days = False, force_overwrite = False): """ Fetch MODIS Land products from one of two servers. http://e4ftl01.cr.usgs.gov ftp://n5eil01u.ecs.nsidc.org Inputs: product MODIS product to download such as 'MOD10A1' or 'MYD11A1' version version number, usually '004' or '041' or '005' tiles list of tiles to grab such as ['h11v12','h11v11'] outdir output directory to save downloaded files years list of years to grab such as range(2001,2014) j_days list of days to grab such as range(31:60). Defaults to all days in year force_overwrite will re-download files even if they already exist """ def Find_MODIS_Product(product, version): """ Subfunction to determine server properties for MODIS data product. returns http/ftp handles the two current servers where aqua/terra MODIS data can be downloaded are site1='http://e4ftl01.cr.usgs.gov' site2='n5eil01u.ecs.nsidc.org' Inputs: product modis product such as 'MOD10A1' versions modis version, usually '005', '004', or '041' Outputs: site server address where data can be found ftp ftp handle for open ftp session Dir subdirectory of server to further search for files of input product. """ sat_designation = product[0:3] prod_ID = product[3:] site1 = 'http://e4ftl01.cr.usgs.gov/' site2 = 'n5eil01u.ecs.nsidc.org' isftp = False Dir = False # refine the address of the desired data product if '10' in prod_ID: isftp = True site = site2 if sat_designation == 'MOD': if isftp: Dir = 'MOST/' + product + '.' + version else: site = site1+'MOLT/' + product + '.' + version elif sat_designation == 'MYD': if isftp: Dir = 'DP1/MOSA/' + product + '.' + version else: site = site1+'MOLA/' + product+'.' + version elif sat_designation == 'MCD': site = site1+'MOTA/' + product + '.' + version else: print('No such MODIS product is available for download with this script!') site = "None" return site, isftp, Dir # check formats tiles = core.enf_list(tiles) years = core.enf_list(years) years = [str(year) for year in years] if isinstance(j_days, list): js = [str(j_day).zfill(3) for j_day in j_days] elif isinstance(j_days, int) and j_days != False: js = [str(j_days)] else: js = [str(x).zfill(3) for x in range(367)] # do a quick input tile check for 6 characters. for tile in tiles: if not len(tile) == 6: print("Warning! your tiles appear to be invalid!") print("Warning! make sure they are in format 'h##v##") # create output directories if not os.path.exists(outdir): os.makedirs(outdir) print("Connecting to servers!") # obtain the web address, protocol information, and subdirectory where # this tpe of MODIS data can be found. site, isftp, Dir = Find_MODIS_Product(product, version) if Dir: print("Connected to {0}/{1}".format(site, Dir)) else: print("Connected to {0}".format(site)) # Depending on the type of connection (ftp vs http) populate the file list try: if isftp: dates,_ = list_ftp(site, False, False, Dir) else: dates = list_http(site) except: raise ValueError("Could not connect to {0}/{1}".format(site,Dir)) # refine contents down to just addresses of valid year and j_day good_dates=[] for date in dates: try: dto = datetime.strptime(date, "%Y.%m.%d") j_day = dto.strftime("%j") year = dto.strftime("%Y") if year in years: good_dates.append(date) if j_days: if j_day not in js: good_dates.remove(date) except ValueError: print("skipping non date folder name {0}".format(date)) print('Found {0} days within range'.format(len(good_dates))) # for all folders within the desired date range, map the subfolder contents. for good_date in good_dates: if isftp: files,_ = list_ftp(site, False, False, Dir + '/' + good_date) else: files = list_http(site + '/' + good_date) for afile in files: # only list files with desired tile names and not preview jpgs if not '.jpg' in afile: for tile in tiles: if tile in afile: # assemble the address if isftp: address='/'.join(['ftp://'+site, Dir, good_date, afile]) else: address='/'.join([site, good_date, afile]) #download the file outname = os.path.join(outdir, afile) if not os.path.isfile(outname) and not force_overwrite: download_url(address, outname) print('Downloaded {0}'.format(address)) print('Finished retrieving MODIS - {0} data!'.format(product)) return
def fetch_TRMM(start_dto, end_dto, outdir, product_string): """ Fetches TRMM data from an FTP server. ftp://trmmopen.gsfc.nasa.gov/trmmdata/ByDate/V07/ Input: start_dto datetime object for start date of desired range end_dto datetime object for end date of desired range outdir output directory where files should be saved (str) product_string the string for the desired product, options include 1B11, 1B21, 1CTMI, 2A12, 2A21, 2A23, 2A25, 2B31, 3B42, 3G25, 3G31. The usual precip product of interest is the famous 3B42 data product. outputs: output_files a list of new filepaths created by this function """ # set up empty structure dates = [] output_files = [] ftpsite = "ftp://pps.gsfc.nasa.gov" un = "*****@*****.**" date_delta = end_dto - start_dto for i in range(date_delta.days +1): dates.append(start_dto + timedelta(days = i)) for date in dates: # navigate to path of desired year/month/day workdir = '/'.join(['trmmdata','ByDate','V07', str(date.year), str(date.month).zfill(2), str(date.day).zfill(2)]) filenames, filepaths = list_ftp(site = ftpsite, dir = workdir, username = un, password = un) for filename in filenames: if product_string in filename: try: outname = os.path.join(outdir, os.path.basename(filename)) download_url(ftpsite + filename, outname, username = un, password = un) output_files.append(outname) # now extract it out of its GZ format with gzip.open(outname, 'rb') as gz: with open(outname.replace(".gz",""), 'wb') as f: content = gz.read() f.write(content) os.remove(outname) print("downloaded and extracted {0}".format(os.path.basename(filename))) except: print("failed to download {0}".format(os.path.basename(filename))) print("Finished downloading TRMM files!") return output_files
def fetch_SRTM(ll_lat, ll_lon, ur_lat, ur_lon, product, outdir = None, mosaic = None): """ downloads data from the Shuttle Radar Topography Mission (SRTM) [http://e4ftl01.cr.usgs.gov/SRTM/] This data can be used to create DEMS of a variety of resolutions. Inputs: ll_lat latitude of lower left corner ll_lon longitude of lower left corner ur_lat latitude of upper right corner ur_lon longitude of upper right corner product short name of product you want. See link below https://lpdaac.usgs.gov/products/measures_products_table outdir local directory to save downloaded files mosaic Set to TRUE to mosaic all downloaded DEM tiles. Returns: tif_list a list of all successfully downloaded tif filepaths for further manipulation NOTE: arcmap will open the output hgt files ONLY if they are not renamed. turns out arcmap does some funky things when interpreting these files. """ # build empty return list tif_list = [] # build list of lat/lon pairs from input corners lat_lon_pairs = [] for i in range(int(ll_lat), int(ur_lat + 1) + 1): for j in range(int(ll_lon), int(ur_lon + 1) + 1): lat_lon_pairs.append((i, j)) print lat_lon_pairs # determine product version if product is "SRTMGL30": print("Download of product SRTMGL30 is supported, but arcmap does not support this filetype") format_string = "{2}{3}{0}{1}.{4}.dem.zip" version = "002" else: format_string = "{0}{1}{2}{3}.{4}.hgt.zip" version = "003" host = "http://e4ftl01.cr.usgs.gov/SRTM" subhost = "{0}/{1}.{2}/2000.02.11/".format(host, product, version) print("Connecting to host at {0}".format(subhost)) for lat_lon_pair in lat_lon_pairs: lat, lon = lat_lon_pair # set North-south, East-West convention. if lat >= 0: NS = "N" else: NS = "S" if lon >= 0: EW = "E" else: EW = "W" if product is "SRTMGL30": if abs(lon) <= 20: lon = 20 elif abs(lon) <=60: lon = 60 elif abs(lon) <= 100: lon = 100 else: lon = 140 if abs(lat) <= 10: lat = 10 elif abs(lat) <=40: lat = 40 else: lat = 90 NS = NS.lower() EW = EW.lower() # build up the filename and file link filename = format_string.format(NS, str(abs(lat)).zfill(2), EW, str(abs(lon)).zfill(3), product) filelink = "{0}/{1}".format(subhost, filename) # decide where to put the file, then download it if outdir is not None: outpath = os.path.join(outdir, filename) else: outpath = filename print("Downloading and extracting {0}".format(filename)) download_url(filelink, outpath) # unzip the file and reassemble descriptive name with zipfile.ZipFile(outpath, "r") as z: itemname = "{0}{1}{2}{3}.hgt".format(NS, str(abs(lat)).zfill(2), EW, str(abs(lon)).zfill(3)) z.extract(itemname, outdir) z.close() # clean up and add this file to output list os.remove(outpath) tif_list.append(os.path.join(outdir,itemname)) if mosaic is True: arcpy.MosaicToNewRaster_management(tif_list, outdir, "SRTM_mosaic.tif", number_of_bands = 1, pixel_type = "32_BIT_SIGNED") print("Finished download and extraction of SRTM data") return tif_list
def fetch_SRTM(lat_lon_pairs, product, outdir = None, mosaic = None): """ downloads data from the Shuttle Radar Topography Mission (SRTM) [http://e4ftl01.cr.usgs.gov/SRTM/] This data can be used to create DEMS of a variety of resolutions. Inputs: lat_lon_pairs tupled integer values of lat,lon combinations. may be a list of tuples. (N positive, E positive) product short name of product you want. See link below https://lpdaac.usgs.gov/products/measures_products_table outdir local directory to save downloaded files mosaic Set to TRUE to mosaic all downloaded DEM tiles. Returns: tif_list a list of all successfully downloaded tif filepaths for further manipulation Example: lat_lons = [(37,-76), (37,-77)] # Two tiles prod = "SRTMGL3" #3 arc second DEM product) download.fetch_SRTM(lat_lons, prod) NOTE: arcmap will open the output hgt files ONLY if they are not renamed. turns out arcmap does some funky things when interpreting these files. """ # build empty return list tif_list = [] # sanitize input list lat_lon_pairs = core.enf_list(lat_lon_pairs) # determine product version if product is "SRTMGL30": print("Download of product SRTMGL30 is supported, but arcmap does not support this filetype") format_string = "{2}{3}{0}{1}.{4}.dem.zip" version = "002" else: format_string = "{0}{1}{2}{3}.{4}.hgt.zip" version = "003" host = "http://e4ftl01.cr.usgs.gov/SRTM" subhost = "{0}/{1}.{2}/2000.02.11/".format(host, product, version) print("Connecting to host at {0}".format(subhost)) for lat_lon_pair in lat_lon_pairs: lat, lon = lat_lon_pair # set North-south, East-West convention. if lat >= 0: NS = "N" else: NS = "S" if lon >= 0: EW = "E" else: EW = "W" if product is "SRTMGL30": if abs(lon) <= 20: lon = 20 elif abs(lon) <=60: lon = 60 elif abs(lon) <= 100: lon = 100 else: lon = 140 if abs(lat) <= 10: lat = 10 elif abs(lat) <=40: lat = 40 else: lat = 90 NS = NS.lower() EW = EW.lower() # build up the filename and file link filename = format_string.format(NS, str(abs(lat)).zfill(2), EW, str(abs(lon)).zfill(3), product) filelink = "{0}/{1}".format(subhost, filename) # decide where to put the file, then download it if outdir is not None: outpath = os.path.join(outdir, filename) else: outpath = filename print("Downloading and extracting {0}".format(filename)) download_url(filelink, outpath) # unzip the file and reassemble descriptive name with zipfile.ZipFile(outpath, "r") as z: itemname = "{0}{1}{2}{3}.hgt".format(NS, str(abs(lat)).zfill(2), EW, str(abs(lon)).zfill(3)) z.extract(itemname, outdir) z.close() # clean up and add this file to output list os.remove(outpath) tif_list.append(os.path.join(outdir,itemname)) if mosaic is True: arcpy.MosaicToNewRaster_management(tif_list, outdir, "SRTM_mosaic.tif", number_of_bands = 1, pixel_type = "32_BIT_SIGNED") print("Finished download and extraction of SRTM data") return tif_list