def main(output_folder, overwrite_flag=False): """Download soil Available Water Capacity (AWC) raster Args: output_folder (str): folder path where files will be saved overwrite_flag (bool): If True, overwrite existing files Returns: None """ # Composite SSURGO/STATSGO download_url = 'https://storage.googleapis.com/openet/ssurgo/AWC_WTA_0to10cm_composite.tif' # STATSGO Only # download_url = 'https://storage.googleapis.com/openet/statsgo/AWC_WTA_0to10cm_statsgo.tif' output_name = download_url.split('/')[-1] output_path = os.path.join(output_folder, output_name) if not os.path.isdir(output_folder): os.makedirs(output_folder) if not os.path.isfile(output_path) or overwrite_flag: logging.info('\nDownloading AWC') logging.info(' {}'.format(download_url)) logging.info(' {}'.format(output_path)) url_download(download_url, output_path) else: logging.debug('\nAWC raster already downloaded')
def main(output_folder, year=2011, overwrite_flag=False): """Download NLCD raster Args: output_folder (str): folder path where files will be saved year (str, int): NLCD year (2006 or 2011) overwrite_flag (bool): If True, overwrite existing files Returns: None """ download_url = ( 'http://gisdata.usgs.gov/TDDS/DownloadFile.php?' 'TYPE=nlcd{0}&FNAME=nlcd_{0}_landcover_2011_edition_2014_10_10.zip' ).format(year) zip_name = 'nlcd_{}_landcover_2011_edition_2014_10_10.zip'.format(year) zip_path = os.path.join(output_folder, zip_name) output_name = zip_name.replace('.zip', '.img') # output_path = os.path.join(output_folder, output_name) output_path = os.path.join(output_folder, os.path.splitext(zip_name)[0], output_name) if not os.path.isdir(output_folder): os.makedirs(output_folder) if ((not os.path.isfile(zip_path) and not os.path.isfile(output_path)) or overwrite_flag): logging.info('\nDownloading NLCD') logging.info(' {}'.format(download_url)) logging.info(' {}'.format(zip_path)) url_download(download_url, zip_path) else: logging.debug('\nNLCD raster already downloaded') if ((overwrite_flag or not os.path.isfile(output_path)) and os.path.isfile(zip_path)): logging.info('\nExtracting NLCD files') logging.debug(' {}'.format(output_path)) with zipfile.ZipFile(zip_path) as zf: zf.extractall(output_folder) else: logging.debug('\nNLCD raster already extracted')
def main(output_folder, overwrite_flag=False): """Download Landsat WRS2 descending footprint shapefile Args: output_folder (str): folder path where files will be saved overwrite_flag (bool): If True, overwrite existing files Returns: None """ download_url = ( 'https://landsat.usgs.gov/sites/default/files/documents/wrs2_descending.zip' ) zip_name = 'wrs2_descending.zip' zip_path = os.path.join(output_folder, zip_name) output_name = zip_name.replace('.zip', '.shp') output_path = os.path.join(output_folder, output_name) # output_path = os.path.join( # output_folder, os.path.splitext(zip_name)[0], output_name) if not os.path.isdir(output_folder): os.makedirs(output_folder) if ((not os.path.isfile(zip_path) and not os.path.isfile(output_path)) or overwrite_flag): logging.info('\nDownloading Landsat WRS2 descending shapefile') logging.info(' {}'.format(download_url)) logging.info(' {}'.format(zip_path)) url_download(download_url, zip_path) else: logging.debug('\nFootprint shapefile already downloaded') if ((overwrite_flag or not os.path.isfile(output_path)) and os.path.isfile(zip_path)): logging.info('\nExtracting Landsat WRS2 descending shapefile') logging.debug(' {}'.format(output_path)) with zipfile.ZipFile(zip_path) as zf: zf.extractall(output_folder) else: logging.debug('\nFootprint shapefile already extracted')
def main(output_folder, version='140', overwrite_flag=False): """Download LANDFIRE veg. type Args: output_folder (str): folder path where files will be saved version (str): LANDFIRE version (105, 110, 120, 130, 140) overwrite_flag (bool): If True, overwrite existing files Returns: None """ version = str(version).replace('.', '') base_url = 'https://www.landfire.gov/bulk/downloadfile.php?FNAME=' zip_dict = { '140': 'US_{0}_mosaic-US_{0}EVT_04252017.zip&TYPE=landfire'.format(version), '130': 'US_{0}_Mosaic-US_{0}_EVT_04232015.zip&TYPE=landfire'.format(version), '120': 'US_{0}_Mosaic-US_{0}_EVT_06142017.zip&TYPE=landfire'.format(version), '110': 'US_{0}_mosaic_Refresh-US_{0}EVT_09122104.zip&TYPE=landfire'.format( version), '105': 'US_{0}_mosaic_Refresh-US_{0}evt_09122104.zip&TYPE=landfire'.format( version), } download_url = base_url + zip_dict[version] output_name = 'US_{}_EVT'.format(version) zip_path = os.path.join(output_folder, output_name + '.zip') if not os.path.isfile(zip_path) or overwrite_flag: logging.info('\nDownloading LANDFIRE vegetation type') logging.info(' {}'.format(download_url)) logging.info(' {}'.format(zip_path)) url_download(download_url, zip_path, verify=False) else: logging.debug('\nLANDFIRE raster already downloaded') if os.path.isfile(zip_path): logging.info('\nExtracting LANDFIRE files') with zipfile.ZipFile(zip_path) as zf: # Extract files using zip naming and folder structure # zf.extractall(output_folder) # Ignore top level zip folder name for member in zf.namelist(): # Replace root folder and switch to OS separator output_path = list(member.split('/')) output_path[0] = output_name output_path = os.sep.join(output_path) output_ws = os.path.join(output_folder, os.path.dirname(output_path)) # Skip directories if not os.path.basename(output_path): continue # Only keep "grid" files if 'Grid' not in output_path: continue # Build output directories if not os.path.isdir(output_ws): os.makedirs(output_ws) # Extract logging.debug(' {}'.format(output_path)) source = zf.open(member) target = open(os.path.join(output_folder, output_path), "wb") with source, target: shutil.copyfileobj(source, target) else: logging.debug('\nLANDFIRE raster already extracted')
def main(ancillary_ws=os.getcwd(), zero_elev_nodata_flag=False, overwrite_flag=False): """Process GRIDMET ancillary data Args: ancillary_ws (str): folder of ancillary rasters zero_elev_nodata_flag (bool): if True, set elevation nodata values to 0 overwrite_flag (bool): if True, overwrite existing files Returns: None """ logging.info('\nProcess GRIDMET ancillary rasters') # Site URL elev_url = 'https://climate.northwestknowledge.net/METDATA/data/metdata_elevationdata.nc' # Manually define the spatial reference and extent of the GRIDMET data # This could be read in from a raster gridmet_osr = osr.SpatialReference() # Assume GRIDMET data is in WGS84 not NAD83 (need to check with John) gridmet_osr.ImportFromEPSG(4326) # gridmet_osr.ImportFromEPSG(4326) gridmet_proj = gdc.osr_proj(gridmet_osr) gridmet_cs = 1. / 24 # 0.041666666666666666 gridmet_x = -125 + gridmet_cs * 5 gridmet_y = 49 + gridmet_cs * 10 # gridmet_y = lon_array[0,0] - 0.5 * gridmet_cs # gridmet_y = lat_array[0,0] + 0.5 * gridmet_cs # gridmet_rows, gridmet_cols = elev_array.shape gridmet_geo = (gridmet_x, gridmet_cs, 0., gridmet_y, 0., -gridmet_cs) # gridmet_extent = gdc.geo_extent( # gridmet_geo, gridmet_rows, gridmet_cols) # Keep track of the original/full geo-transform and extent # gridmet_full_geo = ( # gridmet_x, gridmet_cs, 0., gridmet_y, 0., -gridmet_cs) # gridmet_full_extent = gdc.geo_extent( # gridmet_geo, gridmet_rows, gridmet_cols) logging.debug(' X/Y: {} {}'.format(gridmet_x, gridmet_y)) logging.debug(' Geo: {}'.format(gridmet_geo)) logging.debug(' Cellsize: {}'.format(gridmet_cs)) # Build output workspace if it doesn't exist if not os.path.isdir(ancillary_ws): os.makedirs(ancillary_ws) # Output paths elev_nc = os.path.join(ancillary_ws, os.path.basename(elev_url)) elev_raster = os.path.join(ancillary_ws, 'gridmet_elev.img') lat_raster = os.path.join(ancillary_ws, 'gridmet_lat.img') lon_raster = os.path.join(ancillary_ws, 'gridmet_lon.img') # Compute DEM raster if overwrite_flag or not os.path.isfile(elev_raster): logging.info('\nGRIDMET DEM') logging.info(' Downloading') logging.debug(' {}'.format(elev_url)) logging.debug(' {}'.format(elev_nc)) url_download(elev_url, elev_nc) # try: # urllib.urlretrieve(elev_url, elev_nc) # except: # logging.error(" ERROR: {}\n FILE: {}".format( # sys.exc_info()[0], elev_nc)) # # Try to remove the file since it may not have completely downloaded # os.remove(elev_nc) logging.info(' Extracting') logging.debug(' {}'.format(elev_raster)) elev_nc_f = netCDF4.Dataset(elev_nc, 'r') elev_ma = elev_nc_f.variables['elevation'][0, :, :] elev_array = elev_ma.data.astype(np.float32) # elev_nodata = float(elev_ma.fill_value) elev_array[(elev_array == elev_ma.fill_value) | (elev_array <= -300)] = np.nan if zero_elev_nodata_flag: elev_array[np.isnan(elev_array)] = 0 if np.all(np.isnan(elev_array)): logging.error( '\nERROR: The elevation array is all nodata, exiting\n') sys.exit() gdc.array_to_raster(elev_array, elev_raster, output_geo=gridmet_geo, output_proj=gridmet_proj) elev_nc_f.close() # del elev_nc_f, elev_ma, elev_array, elev_nodata del elev_nc_f, elev_ma, elev_array os.remove(elev_nc) # Compute latitude/longitude rasters if ((overwrite_flag or not os.path.isfile(lat_raster) or not os.path.isfile(lat_raster)) and os.path.isfile(elev_raster)): logging.info('\nGRIDMET Latitude/Longitude') logging.debug(' {}'.format(lat_raster)) lat_array, lon_array = gdc.raster_lat_lon_func(elev_raster) # Handle the conversion to radians in the other GRIDMET scripts # lat_array *= (math.pi / 180) gdc.array_to_raster(lat_array, lat_raster, output_geo=gridmet_geo, output_proj=gridmet_proj) logging.debug(' {}'.format(lon_raster)) gdc.array_to_raster(lon_array, lon_raster, output_geo=gridmet_geo, output_proj=gridmet_proj) del lat_array, lon_array logging.debug('\nScript Complete')
def main(ancillary_ws=os.getcwd(), zero_elev_nodata_flag=False, overwrite_flag=False): """Process NLDAS ancillary data Args: ancillary_ws (str): folder of ancillary rasters zero_elev_nodata_flag (bool): if True, set elevation nodata values to 0 overwrite_flag (bool): if True, overwrite existing files Returns: None """ logging.info('\nProcess NLDAS ancillary data') # Site URLs mask_url = 'http://ldas.gsfc.nasa.gov/nldas/asc/NLDASmask_UMDunified.asc' elev_url = 'http://ldas.gsfc.nasa.gov/nldas/asc/gtopomean15k.asc' nldas_epsg = 'EPSG:4269' # nldas_epsg = 'EPSG:4326' nldas_nodata = -9999.0 # Site URLs # file_re = re.compile( # 'NLDAS_FORA0125_H.A(?P<YEAR>\d{4})(?P<MONTH>\d{2})(?P<DAY>\d{2}).' + # '(?P<TIME>\d{4}).002.grb') # file_re = re.compile( # 'NLDAS_FORA0125_H.A(?P<DATE>\d{8}).(?P<TIME>\d{4}).002.grb') # Build output workspace if it doesn't exist if not os.path.isdir(ancillary_ws): os.makedirs(ancillary_ws) # Input paths input_elev_ascii = os.path.join(ancillary_ws, os.path.basename(elev_url)) input_mask_ascii = os.path.join(ancillary_ws, os.path.basename(mask_url)) # Output paths elev_ascii = os.path.join(ancillary_ws, 'nldas_elev.asc') mask_ascii = os.path.join(ancillary_ws, 'nldas_mask.asc') lat_ascii = os.path.join(ancillary_ws, 'nldas_lat.asc') lon_ascii = os.path.join(ancillary_ws, 'nldas_lon.asc') elev_raster = os.path.join(ancillary_ws, 'nldas_elev.img') mask_raster = os.path.join(ancillary_ws, 'nldas_mask.img') lat_raster = os.path.join(ancillary_ws, 'nldas_lat.img') lon_raster = os.path.join(ancillary_ws, 'nldas_lon.img') # Download the elevation data if necessary logging.info('\nDownloading ASCII files') if overwrite_flag or not os.path.isfile(input_elev_ascii): logging.info(" {}".format(os.path.basename(elev_url))) logging.debug(" {}".format(elev_url)) logging.debug(" {}".format(input_elev_ascii)) url_download(elev_url, input_elev_ascii) # try: # urllib.urlretrieve(elev_url, input_elev_ascii) # except: # logging.error(" ERROR: {}\n FILE: {}".format( # sys.exc_info()[0], input_elev_ascii)) # os.remove(input_elev_ascii) # Download the land/water mask if necessary if overwrite_flag or not os.path.isfile(input_mask_ascii): logging.info(" {}".format(os.path.basename(mask_url))) logging.debug(" {}".format(elev_url)) logging.debug(" {}".format(input_elev_ascii)) url_download(mask_url, input_mask_ascii) # try: # urllib.urlretrieve(mask_url, input_mask_ascii) # except: # logging.error(" ERROR: {}\n FILE: {}".format( # sys.exc_info()[0], input_mask_ascii)) # os.remove(input_mask_ascii) # The XYZ ASCII format is expecting LAT/LON/VALUE # Export new asc files with just the needed columns for each raster logging.debug('\nParsing input ASCII files') logging.debug(' {}'.format(elev_ascii)) elev_df = pd.read_table(input_elev_ascii, header=None, sep=r"\s+", engine='python', names=['COL', 'ROW', 'LAT', 'LON', 'VALUE']) elev_df = elev_df.sort_values(['LAT', 'LON']) if zero_elev_nodata_flag: elev_df.loc[elev_df['VALUE'] == nldas_nodata, 'VALUE'] = 0 elev_df[['LON', 'LAT', 'VALUE']].to_csv(elev_ascii, header=None, index=False) logging.debug(' {}'.format(input_mask_ascii)) mask_df = pd.read_table(input_mask_ascii, header=None, sep=r"\s+", engine='python', names=['COL', 'ROW', 'LAT', 'LON', 'VALUE']) mask_df = mask_df.sort_values(['LAT', 'LON']) mask_df[['LON', 'LAT', 'VALUE']].to_csv(mask_ascii, header=None, index=False) mask_df[['LON', 'LAT', 'LAT']].to_csv(lat_ascii, header=None, index=False) mask_df[['LON', 'LAT', 'LON']].to_csv(lon_ascii, header=None, index=False) # Remove existing rasters if necessary # -overwrite argument could be passed to gdalwarp instead if overwrite_flag: logging.info('\nRemoving existing rasters') if os.path.isfile(elev_raster): logging.info(' {}'.format(elev_raster)) subprocess.call(['gdalmanage', 'delete', elev_raster]) if os.path.isfile(mask_raster): logging.info(' {}'.format(mask_raster)) subprocess.call(['gdalmanage', 'delete', mask_raster]) if os.path.isfile(lat_raster): logging.info(' {}'.format(lat_raster)) subprocess.call(['gdalmanage', 'delete', lat_raster]) if os.path.isfile(lon_raster): logging.info(' {}'.format(lon_raster)) subprocess.call(['gdalmanage', 'delete', lon_raster]) # Convert XYZ ascii to raster logging.info('\nConverting ASCII to raster') if not os.path.isfile(elev_raster): logging.info(' {}'.format(elev_ascii)) subprocess.call([ 'gdalwarp', '-of', 'HFA', '-t_srs', nldas_epsg, '-co', 'COMPRESSED=TRUE', elev_ascii, elev_raster, '-ot', 'Float32', '-srcnodata', str(nldas_nodata), '-dstnodata', str(gdc.numpy_type_nodata(np.float32)) ], cwd=ancillary_ws) # subprocess.call( # ['gdal_translate', '-of', 'HFA', '-a_srs', nldas_epsg, # '-co', 'COMPRESSED=TRUE', elev_ascii, elev_raster], # cwd=ancillary_ws) if not os.path.isfile(mask_raster): logging.info(' {}'.format(mask_ascii)) subprocess.call([ 'gdalwarp', '-of', 'HFA', '-t_srs', nldas_epsg, '-co', 'COMPRESSED=TRUE', mask_ascii, mask_raster ], cwd=ancillary_ws) if not os.path.isfile(lat_raster): logging.info(' {}'.format(lat_ascii)) subprocess.call([ 'gdalwarp', '-of', 'HFA', '-t_srs', nldas_epsg, '-co', 'COMPRESSED=TRUE', lat_ascii, lat_raster ], cwd=ancillary_ws) if not os.path.isfile(lon_raster): logging.info(' {}'.format(lon_ascii)) subprocess.call([ 'gdalwarp', '-of', 'HFA', '-t_srs', nldas_epsg, '-co', 'COMPRESSED=TRUE', lon_ascii, lon_raster ], cwd=ancillary_ws) # Cleanup os.remove(elev_ascii) os.remove(mask_ascii) os.remove(lat_ascii) os.remove(lon_ascii) logging.debug('\nScript Complete')
def main(ancillary_ws=os.getcwd(), overwrite_flag=False): """Process CIMIS ancillary data Args: ancillary_ws (str): folder of ancillary rasters overwrite_flag (bool): if True, overwrite existing files Returns: None """ logging.info('\nProcess CIMIS ancillary data') # Site URL site_url = 'http://spatialcimis.water.ca.gov/cimis' # DEM for air pressure calculation # http://topotools.cr.usgs.gov/gmted_viewer/gmted2010_global_grids.php elev_full_url = 'http://edcintl.cr.usgs.gov/downloads/sciweb1/shared/topo/downloads/GMTED/Grid_ZipFiles/mn30_grd.zip' elev_full_zip = os.path.join(ancillary_ws, 'mn30_grd.zip') elev_full_raster = os.path.join(ancillary_ws, 'mn30_grd') # Get CIMIS grid properties from 2010/01/01 ETo raster # Grid of the spatial cimis input rasters # cimis_extent = gdc.Extent((-400000, -650000, 600000, 454000)) # cimis_cs = 2000 # cimis_geo = gdc.extent_geo(cimis_extent, cimis_cs) # Spatial reference parameters cimis_proj4 = ( "+proj=aea +lat_1=34 +lat_2=40.5 +lat_0=0 +lon_0=-120 +x_0=0 " + "+y_0=-4000000 +ellps=GRS80 +datum=NAD83 +units=m +no_defs") cimis_osr = gdc.proj4_osr(cimis_proj4) # cimis_epsg = 3310 # NAD_1983_California_Teale_Albers # cimis_osr = gdc.epsg_osr(cimis_epsg) cimis_osr.MorphToESRI() cimis_proj = cimis_osr.ExportToWkt() # snap_xmin, snap_ymin = (0, 0) # Build output workspace if it doesn't exist if not os.path.isdir(ancillary_ws): os.makedirs(ancillary_ws) # File paths mask_url = site_url + '/2010/01/01/ETo.asc.gz' mask_gz = os.path.join(ancillary_ws, 'cimis_mask.asc.gz') mask_ascii = os.path.join(ancillary_ws, 'cimis_mask.asc') mask_raster = os.path.join(ancillary_ws, 'cimis_mask.img') elev_raster = os.path.join(ancillary_ws, 'cimis_elev.img') lat_raster = os.path.join(ancillary_ws, 'cimis_lat.img') lon_raster = os.path.join(ancillary_ws, 'cimis_lon.img') # Download an ETo ASCII raster to generate the mask raster if overwrite_flag or not os.path.isfile(mask_raster): logging.info('\nCIMIS mask') logging.debug(' Downloading') logging.debug(" {}".format(mask_url)) logging.debug(" {}".format(mask_gz)) url_download(mask_url, mask_gz) # try: # # This actually downloads the data # # urllib.urlretrieve(mask_url, mask_gz) # # This will work also, I don't know which is better # # f = open(mask_gz,'wb') # # f.write(urllib2.urlopen(mask_gz_url).read()) # # f.close() # except: # logging.error(" ERROR: {}\n FILE: {}".format( # sys.exc_info()[0], mask_gz)) # # Try to remove the file since it may not have completely downloaded # os.remove(mask_gz) # Uncompress '.gz' file to a new file logging.debug(' Uncompressing') logging.debug(' {}'.format(mask_ascii)) try: input_f = gzip.open(mask_gz, 'rb') output_f = open(mask_ascii, 'wb') output_f.write(input_f.read()) output_f.close() input_f.close() del input_f, output_f except: logging.error(" ERROR EXTRACTING FILE") os.remove(mask_gz) # # Set spatial reference of the ASCII files # if build_prj_flag: # prj_file = open(mask_asc.replace('.asc','.prj'), 'w') # prj_file.write(output_proj) # prj_file.close() # Convert the ASCII raster to a IMG raster logging.debug(' Computing mask') logging.debug(' {}'.format(mask_raster)) mask_array = gdc.raster_to_array(mask_ascii, return_nodata=False) cimis_geo = gdc.raster_path_geo(mask_ascii) cimis_extent = gdc.raster_path_extent(mask_ascii) logging.debug(' {}'.format(cimis_geo)) mask_array = np.isfinite(mask_array).astype(np.uint8) gdc.array_to_raster( mask_array, mask_raster, output_geo=cimis_geo, output_proj=cimis_proj, output_nodata=0) # gdc.ascii_to_raster( # mask_ascii, mask_raster, np.float32, cimis_proj) os.remove(mask_ascii) # Compute latitude/longitude rasters if ((overwrite_flag or not os.path.isfile(lat_raster) or not os.path.isfile(lat_raster)) and os.path.isfile(mask_raster)): logging.info('\nCIMIS latitude/longitude') logging.debug(' {}'.format(lat_raster)) lat_array, lon_array = gdc.raster_lat_lon_func(mask_raster) gdc.array_to_raster( lat_array, lat_raster, output_geo=cimis_geo, output_proj=cimis_proj) logging.debug(' {}'.format(lon_raster)) gdc.array_to_raster( lon_array, lon_raster, output_geo=cimis_geo, output_proj=cimis_proj) # Compute DEM raster if overwrite_flag or not os.path.isfile(elev_raster): logging.info('\nCIMIS DEM') logging.debug(' Downloading GMTED2010 DEM') logging.debug(" {}".format(elev_full_url)) logging.debug(" {}".format(elev_full_zip)) if overwrite_flag or not os.path.isfile(elev_full_zip): url_download(elev_full_url, elev_full_zip) # try: # # This actually downloads the data # urllib.urlretrieve(elev_full_url, elev_full_zip) # # This will work also, I don't know which is better # # f = open(mask_gz,'wb') # # f.write(urllib2.urlopen(mask_gz_url).read()) # # f.close() # except: # logging.error(" ERROR: {}\n FILE: {}".format( # sys.exc_info()[0], elev_full_zip)) # # Try to remove the file since it may not have completely downloaded # os.remove(elev_full_zip) # Uncompress '.gz' file to a new file logging.debug(' Uncompressing') logging.debug(' {}'.format(elev_full_raster)) if overwrite_flag or not os.path.isfile(elev_full_raster): try: with zipfile.ZipFile(elev_full_zip, "r") as z: z.extractall(ancillary_ws) except: logging.error(" ERROR EXTRACTING FILE") os.remove(elev_full_zip) # Get the extent and cellsize from the mask logging.debug(' Projecting to CIMIS grid') cimis_cs = gdc.raster_path_cellsize(mask_raster)[0] cimis_extent = gdc.raster_path_extent(mask_raster) logging.debug(' Extent: {}'.format(cimis_extent)) logging.debug(' Cellsize: {}'.format(cimis_cs)) logging.info(' {}'.format(mask_ascii)) if overwrite_flag and os.path.isfile(elev_raster): subprocess.call(['gdalmanage', 'delete', elev_raster]) if not os.path.isfile(elev_raster): subprocess.call( ['gdalwarp', '-r', 'average', '-t_srs', cimis_proj4, '-te', str(cimis_extent.xmin), str(cimis_extent.ymin), str(cimis_extent.xmax), str(cimis_extent.ymax), '-tr', str(cimis_cs), str(cimis_cs), '-of', 'HFA', '-co', 'COMPRESSED=TRUE', elev_full_raster, elev_raster], cwd=ancillary_ws) logging.debug('\nScript Complete')
def main(output_ws=os.getcwd(), variables=['all'], start_date=None, end_date=None, overwrite_flag=False): """Download CIMIS data Args: output_ws (str): Folder path of the output tar.gz files variables (list): CIMIS variables to download ('ETo', 'Rs', 'Tdew', 'Tn', 'Tx', 'U2') ('K', 'Rnl', 'Rso') Set as ['all'] to download all variables start_date (str): ISO format date string (YYYY-MM-DD) end_date (str): ISO format date string (YYYY-MM-DD) overwrite_flag (bool): If True, overwrite existing files Returns: None """ logging.info('\nDownloading CIMIS data\n') # Site URL site_url = 'http://spatialcimis.water.ca.gov/cimis' # If a date is not set, process 2017 try: start_dt = dt.datetime.strptime(start_date, '%Y-%m-%d') logging.debug(' Start date: {}'.format(start_dt)) except: start_dt = dt.datetime(2017, 1, 1) logging.info(' Start date: {}'.format(start_dt)) try: end_dt = dt.datetime.strptime(end_date, '%Y-%m-%d') logging.debug(' End date: {}'.format(end_dt)) except: end_dt = dt.datetime(2017, 12, 31) logging.info(' End date: {}'.format(end_dt)) # CIMIS rasters to extract data_full_list = ['ETo', 'Rso', 'Rs', 'Tdew', 'Tn', 'Tx', 'U2'] if not variables: logging.error('\nERROR: variables parameter is empty\n') sys.exit() elif type(variables) is not list: logging.error('\nERROR: variables parameter must be a list\n') sys.exit() elif 'all' in variables: logging.error('Downloading all variables\n {}'.format( ','.join(data_full_list))) data_list = ['ETo', 'Rso', 'Rs', 'Tdew', 'Tn', 'Tx', 'U2'] elif not set(variables).issubset(set(data_full_list)): logging.error( '\nERROR: variables parameter is invalid\n {}'.format(variables)) sys.exit() else: data_list = variables # Build output workspace if it doesn't exist if not os.path.isdir(output_ws): os.makedirs(output_ws) # Set data types to upper case for comparison data_list = list(map(lambda x: x.lower(), data_list)) # Each sub folder in the main folder has all imagery for 1 day # The path for each subfolder is the /YYYY/MM/DD logging.info('') for input_date in date_range(start_dt, end_dt + dt.timedelta(1)): logging.info('{}'.format(input_date.date())) date_url = site_url + '/' + input_date.strftime("%Y/%m/%d") logging.debug(' {}'.format(date_url)) # Download a list of all files in the date sub folder try: date_html = requests.get(date_url + '/').text except: logging.error(" ERROR: {}".format(date_url)) continue file_list = sorted( list(set(re.findall(r'href=[\'"]?([^\'" >]+)', date_html)))) if not file_list: logging.debug(' Empty file list, skipping date') continue # Create a separate folder for each day year_ws = os.path.join(output_ws, input_date.strftime("%Y")) if not os.path.isdir(year_ws): os.mkdir(year_ws) date_ws = os.path.join(year_ws, input_date.strftime("%Y_%m_%d")) if not os.path.isdir(date_ws): os.mkdir(date_ws) # Process each file in sub folder for file_name in file_list: if not file_name.endswith('.asc.gz'): continue elif file_name.replace('.asc.gz', '').lower() not in data_list: continue file_url = '{}/{}'.format(date_url, file_name) save_path = os.path.join(date_ws, file_name) logging.info(' {}'.format(os.path.basename(save_path))) logging.debug(' {}'.format(file_url)) logging.debug(' {}'.format(save_path)) if os.path.isfile(save_path): if not overwrite_flag: logging.debug(' File already exists, skipping') continue else: logging.debug(' File already exists, removing existing') os.remove(save_path) url_download(file_url, save_path) logging.debug('\nScript Complete')
def main(netcdf_ws=os.getcwd(), variables=['all'], start_date=None, end_date=None, overwrite_flag=False): """Download DAYMET netcdf files Data is currently only available for 1980-2014 Data for 2015 will need to be downloaded a different way Args: netcdf_ws (str): root folder of DAYMET data variables (list): DAYMET variables to download ('prcp', 'srad', 'vp', 'tmmn', 'tmmx') Set as ['all'] to download all available variables start_date (str): ISO format date (YYYY-MM-DD) end_date (str): ISO format date (YYYY-MM-DD) overwrite_flag (bool): if True, overwrite existing files Returns: None """ logging.info('\nDownloading DAYMET data') site_url = 'http://thredds.daac.ornl.gov/thredds/fileServer/ornldaac/1328' # site_url = 'http://daac.ornl.gov/data/daymet/Daymet_mosaics/data' # If a date is not set, process 2015 try: start_dt = dt.datetime.strptime(start_date, '%Y-%m-%d') logging.debug(' Start date: {}'.format(start_dt)) except Exception as e: start_dt = dt.datetime(2015, 1, 1) logging.info(' Start date: {}'.format(start_dt)) logging.debug(e) try: end_dt = dt.datetime.strptime(end_date, '%Y-%m-%d') logging.debug(' End date: {}'.format(end_dt)) except Exception as e: end_dt = dt.datetime(2015, 12, 31) logging.info(' End date: {}'.format(end_dt)) logging.debug(e) # DAYMET rasters to extract var_full_list = ['prcp', 'srad', 'vp', 'tmin', 'tmax'] if not variables: logging.error('\nERROR: variables parameter is empty\n') sys.exit() elif type(variables) is not list: # DEADBEEF - I could try converting comma separated strings to lists? logging.warning('\nERROR: variables parameter must be a list\n') sys.exit() elif 'all' in variables: logging.error('\nDownloading all variables\n {}'.format( ','.join(var_full_list))) var_list = var_full_list elif not set(variables).issubset(set(var_full_list)): logging.error( '\nERROR: variables parameter is invalid\n {}'.format(variables)) sys.exit() else: var_list = variables[:] # Build output workspace if it doesn't exist if not os.path.isdir(netcdf_ws): os.makedirs(netcdf_ws) # DAYMET data is stored by year year_list = sorted( list( set([ i_dt.year for i_dt in date_range(start_dt, end_dt + dt.timedelta(1)) ]))) year_list = list(map(lambda x: '{:04d}'.format(x), year_list)) # Set data types to upper case for comparison var_list = list(map(lambda x: x.lower(), var_list)) # Each sub folder in the main folder has all imagery for 1 day # The path for each subfolder is the /YYYY/MM/DD logging.info('') for year_str in year_list: logging.info(year_str) # Process each file in sub folder for variable in var_list: file_name = 'daymet_v3_{}_{}_na.nc4'.format(variable, year_str) file_url = '{}/{}/{}'.format(site_url, year_str, file_name) save_path = os.path.join(netcdf_ws, file_name) logging.info(' {}'.format(file_name)) logging.debug(' {}'.format(file_url)) logging.debug(' {}'.format(save_path)) if os.path.isfile(save_path): if not overwrite_flag: logging.debug(' File already exists, skipping') continue else: logging.debug(' File already exists, removing existing') os.remove(save_path) url_download(file_url, save_path) logging.debug('\nScript Complete')