def main(grb_ws=os.getcwd(), ancillary_ws=os.getcwd(), output_ws=os.getcwd(), variables=['pr'], landsat_ws=None, start_date=None, end_date=None, times_str='', extent_path=None, output_extent=None, stats_flag=True, overwrite_flag=False): """Extract NLDAS target variable(s) Args: grb_ws (str): folder of NLDAS GRB files ancillary_ws (str): folder of ancillary rasters output_ws (str): folder of output rasters variable (list): NLDAS variables to download ('ppt', 'srad', 'sph', 'tair', tmmn', 'tmmx', 'vs') landsat_ws (str): folder of Landsat scenes or tar.gz files start_date (str): ISO format date (YYYY-MM-DD) end_date (str): ISO format date (YYYY-MM-DD) times (str): comma separated values and/or ranges of UTC hours (i.e. "1, 2, 5-8") Parsed with python_common.parse_int_set() extent_path (str): file path defining the output extent output_extent (list): decimal degrees values defining output extent stats_flag (bool): if True, compute raster statistics. Default is True. overwrite_flag (bool): if True, overwrite existing files Returns: None """ logging.info('\nExtract NLDAS target variable(s)') # input_fmt = 'NLDAS_FORA0125_H.A{:04d}{:02d}{:02d}.{}.002.grb' input_re = re.compile( 'NLDAS_FORA0125_H.A(?P<YEAR>\d{4})(?P<MONTH>\d{2})' + '(?P<DAY>\d{2}).(?P<TIME>\d{4}).002.grb$') output_fmt = '{}_{:04d}{:02d}{:02d}_hourly_nldas.img' # output_fmt = '{}_{:04d}{:02d}{:02d}_{:04d}_nldas.img' # If a date is not set, process 2017 try: start_dt = dt.datetime.strptime(start_date, '%Y-%m-%d') logging.debug(' Start date: {}'.format(start_dt)) except: start_dt = dt.datetime(2017, 1, 1) logging.info(' Start date: {}'.format(start_dt)) try: end_dt = dt.datetime.strptime(end_date, '%Y-%m-%d') logging.debug(' End date: {}'.format(end_dt)) except: end_dt = dt.datetime(2017, 12, 31) logging.info(' End date: {}'.format(end_dt)) # Only process a specific hours if not times_str: time_list = range(0, 24, 1) else: time_list = list(parse_int_set(times_str)) time_list = ['{:02d}00'.format(t) for t in time_list] # Assume NLDAS is NAD83 # input_epsg = 'EPSG:4269' # NLDAS rasters to extract data_full_list = ['pr', 'srad', 'sph', 'tair', 'tmmn', 'tmmx', 'vs'] if not variables: logging.error('\nERROR: variables parameter is empty\n') sys.exit() elif type(variables) is not list: # DEADBEEF - I could try converting comma separated strings to lists? logging.warning('\nERROR: variables parameter must be a list\n') sys.exit() elif not set(variables).issubset(set(data_full_list)): logging.error('\nERROR: variables parameter is invalid\n {}'.format( variables)) sys.exit() # Ancillary raster paths mask_path = os.path.join(ancillary_ws, 'nldas_mask.img') # Build a date list from landsat_ws scene folders or tar.gz files date_list = [] if landsat_ws is not None and os.path.isdir(landsat_ws): logging.info('\nReading dates from Landsat IDs') logging.info(' {}'.format(landsat_ws)) landsat_re = re.compile( '^(?:LT04|LT05|LE07|LC08)_(?:\d{3})(?:\d{3})_' + '(?P<year>\d{4})(?P<month>\d{2})(?P<day>\d{2})') for root, dirs, files in os.walk(landsat_ws, topdown=True): # If root matches, don't explore subfolders try: landsat_match = landsat_re.match(os.path.basename(root)) date_list.append(dt.datetime.strptime( '_'.join(landsat_match.groups()), '%Y_%m_%d').date().isoformat()) dirs[:] = [] except: pass for file in files: try: landsat_match = landsat_re.match(file) date_list.append(dt.datetime.strptime( '_'.join(landsat_match.groups()), '%Y_%m_%d').date().isoformat()) except: pass date_list = sorted(list(set(date_list))) # elif landsat_ws is not None and os.path.isfile(landsat_ws): # with open(landsat_ws) as landsat_f: # This allows GDAL to throw Python Exceptions # gdal.UseExceptions() # mem_driver = gdal.GetDriverByName('MEM') # Get the NLDAS spatial reference from the mask raster nldas_ds = gdal.Open(mask_path) nldas_osr = gdc.raster_ds_osr(nldas_ds) nldas_proj = gdc.osr_proj(nldas_osr) nldas_cs = gdc.raster_ds_cellsize(nldas_ds, x_only=True) nldas_extent = gdc.raster_ds_extent(nldas_ds) nldas_geo = nldas_extent.geo(nldas_cs) nldas_x, nldas_y = nldas_extent.origin() nldas_ds = None logging.debug(' Projection: {}'.format(nldas_proj)) logging.debug(' Cellsize: {}'.format(nldas_cs)) logging.debug(' Geo: {}'.format(nldas_geo)) logging.debug(' Extent: {}'.format(nldas_extent)) # Subset data to a smaller extent if output_extent is not None: logging.info('\nComputing subset extent & geo') logging.debug(' Extent: {}'.format(output_extent)) nldas_extent = gdc.Extent(output_extent) nldas_extent.adjust_to_snap('EXPAND', nldas_x, nldas_y, nldas_cs) nldas_geo = nldas_extent.geo(nldas_cs) logging.debug(' Geo: {}'.format(nldas_geo)) logging.debug(' Extent: {}'.format(output_extent)) elif extent_path is not None: logging.info('\nComputing subset extent & geo') if extent_path.lower().endswith('.shp'): nldas_extent = gdc.feature_path_extent(extent_path) extent_osr = gdc.feature_path_osr(extent_path) extent_cs = None else: nldas_extent = gdc.raster_path_extent(extent_path) extent_osr = gdc.raster_path_osr(extent_path) extent_cs = gdc.raster_path_cellsize(extent_path, x_only=True) nldas_extent = gdc.project_extent( nldas_extent, extent_osr, nldas_osr, extent_cs) nldas_extent.adjust_to_snap('EXPAND', nldas_x, nldas_y, nldas_cs) nldas_geo = nldas_extent.geo(nldas_cs) logging.debug(' Geo: {}'.format(nldas_geo)) logging.debug(' Extent: {}'.format(nldas_extent)) logging.debug('') # Read the NLDAS mask array if present if mask_path and os.path.isfile(mask_path): mask_array, mask_nodata = gdc.raster_to_array( mask_path, mask_extent=nldas_extent, fill_value=0, return_nodata=True) mask_array = mask_array != mask_nodata else: mask_array = None # NLDAS band name dictionary nldas_band_dict = dict() nldas_band_dict['pr'] = 'Total precipitation [kg/m^2]' nldas_band_dict['srad'] = 'Downward shortwave radiation flux [W/m^2]' nldas_band_dict['sph'] = 'Specific humidity [kg/kg]' nldas_band_dict['tair'] = 'Temperature [C]' nldas_band_dict['tmmn'] = 'Temperature [C]' nldas_band_dict['tmmx'] = 'Temperature [C]' nldas_band_dict['vs'] = [ 'u-component of wind [m/s]', 'v-component of wind [m/s]'] # NLDAS band name dictionary # nldas_band_dict = dict() # nldas_band_dict['pr'] = 'precipitation_amount' # nldas_band_dict['srad'] = 'surface_downwelling_shortwave_flux_in_air' # nldas_band_dict['sph'] = 'specific_humidity' # nldas_band_dict['tmmn'] = 'air_temperature' # nldas_band_dict['tmmx'] = 'air_temperature' # nldas_band_dict['vs'] = 'wind_speed' # NLDAS band name dictionary (EarthEngine keys, GRID_ELEMENT values) # nldas_band_dict = dict() # nldas_band_dict['total_precipitation'] = 'Total precipitation [kg/m^2]' # nldas_band_dict['shortwave_radiation'] = 'Downward shortwave radiation flux [W/m^2]' # nldas_band_dict['specific_humidity'] = 'Specific humidity [kg/kg]' # nldas_band_dict['pressure'] = 'Pressure [Pa]' # nldas_band_dict['temperature'] = 'Temperature [C]' # nldas_band_dict['wind_u'] = 'u-component of wind [m/s]' # nldas_band_dict['wind_v'] = 'v-component of wind [m/s]' # Process each variable logging.info('\nReading NLDAS GRIBs') for input_var in variables: logging.info("Variable: {}".format(input_var)) # Build output folder var_ws = os.path.join(output_ws, input_var) if not os.path.isdir(var_ws): os.makedirs(var_ws) # Each sub folder in the main folde has all imagery for 1 day # The path for each subfolder is the /YYYY/DOY # This approach will process files for target dates # for input_dt in date_range(start_dt, end_dt + dt.timedelta(1)): # logging.info(input_dt.date()) # Iterate all available files and check dates if necessary for root, folders, files in os.walk(grb_ws): root_split = os.path.normpath(root).split(os.sep) # If the year/doy is outside the range, skip if (re.match('\d{4}', root_split[-2]) and re.match('\d{3}', root_split[-1])): root_dt = dt.datetime.strptime('{}_{}'.format( root_split[-2], root_split[-1]), '%Y_%j') logging.info('{}-{:02d}-{:02d}'.format( root_dt.year, root_dt.month, root_dt.day)) if ((start_dt is not None and root_dt < start_dt) or (end_dt is not None and root_dt > end_dt)): continue elif date_list and root_dt.date().isoformat() not in date_list: continue # If the year is outside the range, don't search subfolders elif re.match('\d{4}', root_split[-1]): root_year = int(root_split[-1]) logging.info('Year: {}'.format(root_year)) if ((start_dt is not None and root_year < start_dt.year) or (end_dt is not None and root_year > end_dt.year)): folders[:] = [] else: folders[:] = sorted(folders) continue else: continue # Create a single raster for each day with 24 bands # Each time step will be stored in a separate band output_name = output_fmt.format( input_var, root_dt.year, root_dt.month, root_dt.day) output_path = os.path.join( var_ws, str(root_dt.year), output_name) logging.debug(' {}'.format(output_path)) if os.path.isfile(output_path): if not overwrite_flag: logging.debug(' File already exists, skipping') continue else: logging.debug(' File already exists, removing existing') os.remove(output_path) logging.debug(' {}'.format(root)) if not os.path.isdir(os.path.dirname(output_path)): os.makedirs(os.path.dirname(output_path)) gdc.build_empty_raster( output_path, band_cnt=24, output_dtype=np.float32, output_proj=nldas_proj, output_cs=nldas_cs, output_extent=nldas_extent, output_fill_flag=True) # Iterate through hourly files for input_name in sorted(files): logging.info(' {}'.format(input_name)) input_path = os.path.join(root, input_name) input_match = input_re.match(input_name) if input_match is None: logging.debug( ' Regular expression didn\'t match, skipping') continue input_dt = dt.datetime( int(input_match.group('YEAR')), int(input_match.group('MONTH')), int(input_match.group('DAY'))) time_str = input_match.group('TIME') band_num = int(time_str[:2]) + 1 # if start_dt is not None and input_dt < start_dt: # continue # elif end_dt is not None and input_dt > end_dt: # continue # elif date_list and input_dt.date().isoformat() not in date_list: # continue if time_str not in time_list: logging.debug(' Time not in list, skipping') continue logging.debug(' Time: {} {}'.format( input_dt.date(), time_str)) logging.debug(' Band: {}'.format(band_num)) # Determine band numbering/naming input_band_dict = grib_band_names(input_path) # Extract array and save input_ds = gdal.Open(input_path) # Convert Kelvin to Celsius (old NLDAS files were in K i think) if input_var in ['tair', 'tmmx', 'tmmn']: # Temperature should be in C for et_common.refet_hourly_func() if 'Temperature [K]' in input_band_dict.keys(): temp_band_units = 'K' output_array = gdc.raster_ds_to_array( input_ds, band=input_band_dict['Temperature [K]'], mask_extent=nldas_extent, return_nodata=False) elif 'Temperature [C]' in input_band_dict.keys(): temp_band_units = 'C' output_array = gdc.raster_ds_to_array( input_ds, band=input_band_dict['Temperature [C]'], mask_extent=nldas_extent, return_nodata=False) else: logging.error('Unknown Temperature units, skipping') logging.error(' {}'.format(input_band_dict.keys())) continue # DEADBEEF - Having issue with T appearing to be C but labeled as K # Try to determine temperature units from values temp_mean = float(np.nanmean(output_array)) temp_units_dict = {20: 'C', 293: 'K'} temp_array_units = temp_units_dict[ min(temp_units_dict, key=lambda x:abs(x - temp_mean))] if temp_array_units == 'K' and temp_band_units == 'K': logging.debug(' Converting temperature from K to C') output_array -= 273.15 elif temp_array_units == 'C' and temp_band_units == 'C': pass elif temp_array_units == 'C' and temp_band_units == 'K': logging.debug( (' Temperature units are K in the GRB band name, ' + 'but values appear to be C\n Mean temperature: {:.2f}\n' + ' Values will NOT be adjusted').format(temp_mean)) elif temp_array_units == 'K' and temp_band_units == 'C': logging.debug( (' Temperature units are C in the GRB band name, ' + 'but values appear to be K\n Mean temperature: {:.2f}\n' + ' Values will be adjusted from K to C').format(temp_mean)) output_array -= 273.15 # Compute wind speed from vectors elif input_var == 'vs': wind_u_array = gdc.raster_ds_to_array( input_ds, band=input_band_dict['u-component of wind [m/s]'], mask_extent=nldas_extent, return_nodata=False) wind_v_array = gdc.raster_ds_to_array( input_ds, band=input_band_dict['v-component of wind [m/s]'], mask_extent=nldas_extent, return_nodata=False) output_array = np.sqrt( wind_u_array ** 2 + wind_v_array ** 2) # Read all other variables directly else: output_array = gdc.raster_ds_to_array( input_ds, band=input_band_dict[nldas_band_dict[input_var]], mask_extent=nldas_extent, return_nodata=False) # Save the projected array as 32-bit floats gdc.array_to_comp_raster( output_array.astype(np.float32), output_path, band=band_num) # gdc.block_to_raster( # ea_array.astype(np.float32), output_path, band=band) # gdc.array_to_raster( # output_array.astype(np.float32), output_path, # output_geo=nldas_geo, output_proj=nldas_proj, # stats_flag=stats_flag) del output_array input_ds = None if stats_flag: gdc.raster_statistics(output_path) logging.debug('\nScript Complete')
def main(ini_path, tile_list=None, overwrite_flag=False, mp_procs=1): """Prep Landsat path/row specific data Parameters ---------- ini_path : str File path of the input parameters file. tile_list : list, optional Landsat path/rows to process (i.e. [p045r043, p045r033]). This will override the tile list in the INI file. overwrite_flag : bool, optional If True, overwrite existing files (the default is False). mp_procs : int, optional Number of cores to use (the default is 1). Returns ------- None """ logging.info('\nPrepare path/row data') # Open config file config = python_common.open_ini(ini_path) # Get input parameters logging.debug(' Reading Input File') year = config.getint('INPUTS', 'year') if tile_list is None: tile_list = python_common.read_param('tile_list', [], config, 'INPUTS') project_ws = config.get('INPUTS', 'project_folder') logging.debug(' Year: {}'.format(year)) logging.debug(' Path/rows: {}'.format(', '.join(tile_list))) logging.debug(' Project: {}'.format(project_ws)) # study_area_path = config.get('INPUTS', 'study_area_path') footprint_path = config.get('INPUTS', 'footprint_path') # For now, assume the UTM zone file is colocated with the footprints shapefile utm_path = python_common.read_param( 'utm_path', os.path.join(os.path.dirname(footprint_path), 'wrs2_tile_utm_zones.json'), config, 'INPUTS') skip_list_path = python_common.read_param('skip_list_path', '', config, 'INPUTS') landsat_flag = python_common.read_param('landsat_flag', True, config, 'INPUTS') ledaps_flag = False dem_flag = python_common.read_param('dem_flag', True, config, 'INPUTS') nlcd_flag = python_common.read_param('nlcd_flag', True, config, 'INPUTS') cdl_flag = python_common.read_param('cdl_flag', False, config, 'INPUTS') landfire_flag = python_common.read_param('landfire_flag', False, config, 'INPUTS') field_flag = python_common.read_param('field_flag', False, config, 'INPUTS') tile_gcs_buffer = python_common.read_param('tile_buffer', 0.25, config) # Input/output folder and file paths if landsat_flag: landsat_input_ws = config.get('INPUTS', 'landsat_input_folder') else: landsat_input_ws = None # if ledaps_flag: # ledaps_input_ws = config.get('INPUTS', 'ledaps_input_folder') # else: # ledaps_input_ws = None if dem_flag: dem_input_ws = config.get('INPUTS', 'dem_input_folder') dem_tile_fmt = config.get('INPUTS', 'dem_tile_fmt') dem_output_ws = config.get('INPUTS', 'dem_output_folder') dem_output_name = python_common.read_param('dem_output_name', 'dem.img', config) # dem_output_name = config.get('INPUTS', 'dem_output_name') else: dem_input_ws, dem_tile_fmt = None, None dem_output_ws, dem_output_name = None, None if nlcd_flag: nlcd_input_path = config.get('INPUTS', 'nlcd_input_path') nlcd_output_ws = config.get('INPUTS', 'nlcd_output_folder') nlcd_output_fmt = python_common.read_param('nlcd_output_fmt', 'nlcd_{:04d}.img', config) else: nlcd_input_path, nlcd_output_ws, nlcd_output_fmt = None, None, None if cdl_flag: cdl_input_path = config.get('INPUTS', 'cdl_input_path') cdl_ag_list = config.get('INPUTS', 'cdl_ag_list') cdl_ag_list = list(python_common.parse_int_set(cdl_ag_list)) # default_cdl_ag_list = range(1,62) + range(66,78) + range(204,255) # cdl_ag_list = python_common.read_param( # 'cdl_ag_list', default_cdl_ag_list, config) # cdl_ag_list = list(map(int, cdl_ag_list)) # cdl_non_ag_list = python_common.read_param( # 'cdl_non_ag_list', [], config) cdl_output_ws = config.get('INPUTS', 'cdl_output_folder') cdl_output_fmt = python_common.read_param('cdl_output_fmt', 'cdl_{:04d}.img', config) cdl_ag_output_fmt = python_common.read_param('cdl_ag_output_fmt', 'cdl_ag_{:04d}.img', config) else: cdl_input_path, cdl_ag_list = None, None cdl_output_ws, cdl_output_fmt, cdl_ag_output_fmt = None, None, None if landfire_flag: landfire_input_path = config.get('INPUTS', 'landfire_input_path') landfire_ag_list = config.get('INPUTS', 'landfire_ag_list') landfire_ag_list = list(python_common.parse_int_set(landfire_ag_list)) # default_landfire_ag_list = range(3960,4000) # landfire_ag_list = python_common.read_param( # 'landfire_ag_list', default_landfire_ag_list, config) # landfire_ag_list = list(map(int, landfire_ag_list)) landfire_output_ws = config.get('INPUTS', 'landfire_output_folder') landfire_output_fmt = python_common.read_param('landfire_output_fmt', 'landfire_{:04d}.img', config) landfire_ag_output_fmt = python_common.read_param( 'landfire_ag_output_fmt', 'landfire_ag_{:04d}.img', config) else: landfire_input_path, landfire_ag_list = None, None landfire_output_ws = None landfire_output_fmt, landfire_ag_output_fmt = None, None if field_flag: field_input_path = config.get('INPUTS', 'field_input_path') field_output_ws = config.get('INPUTS', 'field_output_folder') field_output_fmt = python_common.read_param('field_output_fmt', 'fields_{:04d}.img', config) else: field_input_path = None field_output_ws, field_output_fmt = None, None # File/folder names orig_data_folder_name = 'ORIGINAL_DATA' # Check inputs folders/paths logging.info('\nChecking input folders/files') file_check(footprint_path) file_check(utm_path) if landsat_flag: folder_check(landsat_input_ws) # if ledaps_flag: # folder_check(ledaps_input_ws) if dem_flag: folder_check(dem_input_ws) if nlcd_flag: file_check(nlcd_input_path) if cdl_flag: file_check(cdl_input_path) if landfire_flag: # Landfire will likely be an ESRI grid (set as a folder) if not (os.path.isdir(landfire_input_path) or os.path.isfile(landfire_input_path)): logging.error('\n {} does not exist'.format(landfire_input_path)) if field_flag: file_check(field_input_path) if skip_list_path: file_check(skip_list_path) # Build output folders if not os.path.isdir(project_ws): os.makedirs(project_ws) if dem_flag and not os.path.isdir(dem_output_ws): os.makedirs(dem_output_ws) if nlcd_flag and not os.path.isdir(nlcd_output_ws): os.makedirs(nlcd_output_ws) if cdl_flag and not os.path.isdir(cdl_output_ws): os.makedirs(cdl_output_ws) if landfire_flag and not os.path.isdir(landfire_output_ws): os.makedirs(landfire_output_ws) if field_flag and not os.path.isdir(field_output_ws): os.makedirs(field_output_ws) # For now assume path/row are two digit numbers tile_fmt = 'p{:03d}r{:03d}' tile_re = re.compile('p(\d{3})r(\d{3})') image_re = re.compile( '^(LT04|LT05|LE07|LC08)_(\d{3})(\d{3})_(\d{4})(\d{2})(\d{2})') snap_cs = 30 snap_xmin, snap_ymin = (15, 15) # Set snap environment parameters env = drigo.env env.cellsize = snap_cs env.snap_xmin, env.snap_ymin = snap_xmin, snap_ymin # Use WGSS84 (EPSG 4326) for GCS spatial reference # Could also use NAD83 (EPSG 4269) # gcs_epsg = 4326 # gcs_osr = epsg_osr(4326) # gcs_proj = osr_proj(gcs_osr) # Landsat Footprints (WRS2 Descending Polygons) logging.debug('\nFootprint (WRS2 descending should be GCS84):') tile_gcs_osr = drigo.feature_path_osr(footprint_path) logging.debug(' OSR: {}'.format(tile_gcs_osr)) # Doublecheck that WRS2 descending shapefile is GCS84 # if tile_gcs_osr != epsg_osr(4326): # logging.error(' WRS2 is not GCS84') # sys.exit() # Get geometry for each path/row tile_gcs_wkt_dict = path_row_wkt_func(footprint_path, path_field='PATH', row_field='ROW') # Get UTM zone for each path/row # DEADBEEF - Using "eval" is considered unsafe and should be changed tile_utm_zone_dict = eval(open(utm_path, 'r').read()) # Project study area geometry to GCS coordinates # logging.debug('\nStudy area') # study_area_geom = feature_path_geom_union(study_area_path) # study_area_gcs_geom = study_area_geom.Clone() # study_area_gcs_geom.TransformTo(tile_gcs_osr) # Get list of all intersecting Landsat path/rows # logging.info('\nLandsat path/rows') # tile_list = [] # for tile_name, tile_gcs_wkt in tile_gcs_wkt_dict.items(): # tile_gcs_geom = ogr.CreateGeometryFromWkt(tile_gcs_wkt) # if tile_gcs_geom.Intersects(study_area_gcs_geom): # tile_list.append(tile_name) # for tile_name in sorted(tile_list): # logging.debug(' {}'.format(tile_name)) # Check that each path/row extent and UTM zone exist logging.info('\nChecking path/row list against footprint shapefile') for tile_name in sorted(tile_list): if tile_name not in tile_gcs_wkt_dict.keys(): logging.error( ' {} feature not in footprint shapefile'.format(tile_name)) continue elif tile_name not in tile_utm_zone_dict.keys(): logging.error( ' {} UTM zone not in footprint shapefile'.format(tile_name)) continue elif tile_utm_zone_dict[tile_name] == 0: logging.error((' UTM zone is not set for {} in ' + 'footprint shapefile').format(tile_name)) continue # Build output folders for each path/row logging.info('\nBuilding path/row folders') for tile_name in tile_list: logging.debug(' {} {}'.format(year, tile_name)) tile_output_ws = os.path.join(project_ws, str(year), tile_name) if ((landsat_flag or ledaps_flag) and not os.path.isdir(tile_output_ws)): os.makedirs(tile_output_ws) if (dem_flag and not os.path.isdir(os.path.join(dem_output_ws, tile_name))): os.makedirs(os.path.join(dem_output_ws, tile_name)) if (nlcd_flag and not os.path.isdir(os.path.join(nlcd_output_ws, tile_name))): os.makedirs(os.path.join(nlcd_output_ws, tile_name)) if (cdl_flag and not os.path.isdir(os.path.join(cdl_output_ws, tile_name))): os.makedirs(os.path.join(cdl_output_ws, tile_name)) if (landfire_flag and not os.path.isdir( os.path.join(landfire_output_ws, tile_name))): os.makedirs(os.path.join(landfire_output_ws, tile_name)) if (field_flag and not os.path.isdir(os.path.join(field_output_ws, tile_name))): os.makedirs(os.path.join(field_output_ws, tile_name)) # Read skip list if (landsat_flag or ledaps_flag) and skip_list_path: logging.debug('\nReading scene skiplist') with open(skip_list_path) as skip_list_f: skip_list = skip_list_f.readlines() skip_list = [ scene.strip() for scene in skip_list if image_re.match(scene.strip()) ] else: logging.debug('\nSkip list not set in INI') skip_list = [] # Copy and unzip raw Landsat scenes # Use these for thermal band, MTL file (scene time), and to run FMask if landsat_flag: logging.info('\nExtract raw Landsat scenes') # Process each path/row extract_targz_list = [] for tile_name in tile_list: tile_output_ws = os.path.join(project_ws, str(year), tile_name) # path/row as strings with leading zeros path, row = map(str, tile_re.match(tile_name).groups()) tile_input_ws = os.path.join(landsat_input_ws, path, row, str(year)) if not os.path.isdir(tile_input_ws): continue logging.info(' {} {}'.format(year, tile_name)) # Process each tar.gz file for input_name in sorted(os.listdir(tile_input_ws)): if (not image_re.match(input_name) and not input_name.endswith('.tar.gz')): continue # Get Landsat scene ID from tar.gz file name # DEADBEEF - For now this is the EE scene ID, but it could be # changed to the full collection 1 ID scene_id = input_name.split('.')[0] # Output workspace image_output_ws = os.path.join(tile_output_ws, scene_id) orig_data_ws = os.path.join(image_output_ws, orig_data_folder_name) if skip_list and scene_id in skip_list: logging.debug(' {} - Skipping scene'.format(scene_id)) # DEADBEEF - Should the script always remove the scene # if it is in the skip list? # Maybe only if overwrite is set? if os.path.isdir(image_output_ws): # input('Press ENTER to delete {}'.format(scene_id)) shutil.rmtree(image_output_ws) continue # If orig_data_ws doesn't exist, don't check images if not os.path.isdir(orig_data_ws): os.makedirs(orig_data_ws) elif (not overwrite_flag and landsat_files_check(image_output_ws)): continue # Extract Landsat tar.gz file input_path = os.path.join(tile_input_ws, input_name) print(orig_data_ws) # sys.exit() if mp_procs > 1: extract_targz_list.append([input_path, orig_data_ws]) else: python_common.extract_targz_func(input_path, orig_data_ws) # # Use a command line call # input_path = os.path.join(tile_input_ws, input_name) # if job_i % pbs_jobs != 0: # job_list.append('tar -zxvf {} -C {} &\n'.format( # input_path, orig_data_ws)) # else: # job_list.append('tar -zxvf {} -C {}\n'.format( # input_path, orig_data_ws)) # # job_list.append('tar -zxvf {} -C {} &\n'.format( # # input_path, orig_data_ws)) # # job_list.append('wait\n') # job_i += 1 # Extract Landsat tar.gz files using multiprocessing if extract_targz_list: pool = mp.Pool(mp_procs) results = pool.map(python_common.extract_targz_mp, extract_targz_list, chunksize=1) pool.close() pool.join() del results, pool # Get projected extent for each path/row # This should probably be in a function if (dem_flag or nlcd_flag or cdl_flag or landfire_flag or field_flag): tile_utm_extent_dict = gcs_to_utm_dict(tile_list, tile_utm_zone_dict, tile_gcs_osr, tile_gcs_wkt_dict, tile_gcs_buffer, snap_xmin, snap_ymin, snap_cs) # Mosaic DEM tiles for each path/row if dem_flag: logging.info('\nBuild DEM for each path/row') mosaic_mp_list = [] for tile_name in tile_list: # Output folder and path tile_output_path = os.path.join(dem_output_ws, tile_name, dem_output_name) if not overwrite_flag and os.path.isfile(tile_output_path): logging.debug(' {} already exists, skipping'.format( os.path.basename(tile_output_path))) continue logging.info(' {}'.format(tile_name)) # Get the path/row geometry in GCS for selecting intersecting tiles tile_gcs_geom = ogr.CreateGeometryFromWkt( tile_gcs_wkt_dict[tile_name]) # Apply a small buffer (in degrees) to the extent # DEADBEEF - Buffer fails if GDAL is not built with GEOS support # tile_gcs_geom = tile_gcs_geom.Buffer(tile_gcs_buffer) tile_gcs_extent = drigo.Extent(tile_gcs_geom.GetEnvelope()) tile_gcs_extent = tile_gcs_extent.ogrenv_swap() tile_gcs_extent.buffer_extent(tile_gcs_buffer) # tile_gcs_extent.ymin, tile_gcs_extent.xmax = tile_gcs_extent.xmax, tile_gcs_extent.ymin # Offsets are needed since tile name is upper left corner of tile # Tile n36w120 spans -120 <-> -119 and 35 <-> 36 lon_list = range( int(tile_gcs_extent.xmin) - 1, int(tile_gcs_extent.xmax)) lat_list = range( int(tile_gcs_extent.ymin) + 1, int(tile_gcs_extent.ymax) + 2) # Get list of DEM tile rasters dem_tile_list = [] for lat, lon in itertools.product(lat_list, lon_list): # Convert sign of lat/lon to letter lat = ('n' + '{:02d}'.format(abs(lat)) if lat >= 0 else 's' + '{:02d}'.format(abs(lat))) lon = ('w' + '{:03d}'.format(abs(lon)) if lon < 0 else 'e' + '{:03d}'.format(abs(lon))) dem_tile_path = os.path.join(dem_input_ws, dem_tile_fmt.format(lat, lon)) if os.path.isfile(dem_tile_path): dem_tile_list.append(dem_tile_path) if not dem_tile_list: logging.warning(' WARNING: No DEM tiles were selected') continue # Mosaic tiles using mosaic function tile_utm_osr = drigo.epsg_osr(32600 + int(tile_utm_zone_dict[tile_name])) tile_utm_proj = drigo.epsg_proj(32600 + int(tile_utm_zone_dict[tile_name])) tile_utm_extent = tile_utm_extent_dict[tile_name] tile_utm_ullr = tile_utm_extent.ul_lr_swap() # Mosaic, clip, project using custom function if mp_procs > 1: mosaic_mp_list.append([ dem_tile_list, tile_output_path, tile_utm_proj, snap_cs, tile_utm_extent ]) else: drigo.mosaic_tiles(dem_tile_list, tile_output_path, tile_utm_osr, snap_cs, tile_utm_extent) # Cleanup del tile_output_path del tile_gcs_geom, tile_gcs_extent, tile_utm_extent del tile_utm_osr, tile_utm_proj del lon_list, lat_list, dem_tile_list # Mosaic DEM rasters using multiprocessing if mosaic_mp_list: pool = mp.Pool(mp_procs) results = pool.map(mosaic_tiles_mp, mosaic_mp_list, chunksize=1) pool.close() pool.join() del results, pool # Project/clip NLCD for each path/row if nlcd_flag: logging.info('\nBuild NLCD for each path/row') project_mp_list = [] for tile_name in tile_list: nlcd_output_path = os.path.join(nlcd_output_ws, tile_name, nlcd_output_fmt.format(year)) if not overwrite_flag and os.path.isfile(nlcd_output_path): logging.debug(' {} already exists, skipping'.format( os.path.basename(nlcd_output_path))) continue logging.info(' {}'.format(tile_name)) # Set the nodata value on the NLCD raster if it is not set nlcd_ds = gdal.Open(nlcd_input_path, 0) nlcd_band = nlcd_ds.GetRasterBand(1) nlcd_nodata = nlcd_band.GetNoDataValue() nlcd_ds = None if nlcd_nodata is None: nlcd_nodata = 255 # Clip and project tile_utm_osr = drigo.epsg_osr(32600 + int(tile_utm_zone_dict[tile_name])) tile_utm_proj = drigo.epsg_proj(32600 + int(tile_utm_zone_dict[tile_name])) tile_utm_extent = tile_utm_extent_dict[tile_name] tile_utm_ullr = tile_utm_extent.ul_lr_swap() if mp_procs > 1: project_mp_list.append([ nlcd_input_path, nlcd_output_path, gdal.GRA_NearestNeighbour, tile_utm_proj, snap_cs, tile_utm_extent, nlcd_nodata ]) else: drigo.project_raster(nlcd_input_path, nlcd_output_path, gdal.GRA_NearestNeighbour, tile_utm_osr, snap_cs, tile_utm_extent, nlcd_nodata) # Cleanup del nlcd_output_path del nlcd_ds, nlcd_band, nlcd_nodata del tile_utm_osr, tile_utm_proj, tile_utm_extent # Project NLCD rasters using multiprocessing if project_mp_list: pool = mp.Pool(mp_procs) results = pool.map(drigo.project_raster_mp, project_mp_list, chunksize=1) pool.close() pool.join() del results, pool # Project/clip CDL for each path/row if cdl_flag: logging.info('\nBuild CDL for each path/row') project_mp_list, remap_mp_list = [], [] for tile_name in tile_list: cdl_output_path = os.path.join(cdl_output_ws, tile_name, cdl_output_fmt.format(year)) cdl_ag_output_path = os.path.join(cdl_output_ws, tile_name, cdl_ag_output_fmt.format(year)) if not os.path.isfile(cdl_input_path): logging.error('\n\n {} does not exist'.format(cdl_input_path)) sys.exit() if not overwrite_flag and os.path.isfile(cdl_output_path): logging.debug(' {} already exists, skipping'.format( os.path.basename(cdl_output_path))) continue logging.info(' {}'.format(tile_name)) # Set the nodata value on the CDL raster if it is not set cdl_ds = gdal.Open(cdl_input_path, 0) cdl_band = cdl_ds.GetRasterBand(1) cdl_nodata = cdl_band.GetNoDataValue() cdl_ds = None if cdl_nodata is None: cdl_nodata = 255 # Clip and project tile_utm_osr = drigo.epsg_osr(32600 + int(tile_utm_zone_dict[tile_name])) tile_utm_proj = drigo.epsg_proj(32600 + int(tile_utm_zone_dict[tile_name])) tile_utm_extent = tile_utm_extent_dict[tile_name] if mp_procs > 1: project_mp_list.append([ cdl_input_path, cdl_output_path, gdal.GRA_NearestNeighbour, tile_utm_proj, snap_cs, tile_utm_extent, cdl_nodata ]) remap_mp_list.append( [cdl_output_path, cdl_ag_output_path, cdl_ag_list]) else: drigo.project_raster(cdl_input_path, cdl_output_path, gdal.GRA_NearestNeighbour, tile_utm_osr, snap_cs, tile_utm_extent, cdl_nodata) # Build a mask of CDL ag lands remap_mask_func(cdl_output_path, cdl_ag_output_path, cdl_ag_list) # Cleanup del cdl_output_path del cdl_ds, cdl_band, cdl_nodata del tile_utm_osr, tile_utm_proj, tile_utm_extent # Project CDL rasters using multiprocessing if project_mp_list: pool = mp.Pool(mp_procs) results = pool.map(drigo.project_raster_mp, project_mp_list, chunksize=1) pool.close() pool.join() del results, pool if remap_mp_list: pool = mp.Pool(mp_procs) results = pool.map(remap_mask_mp, remap_mp_list, chunksize=1) pool.close() pool.join() del results, pool # Project/clip LANDFIRE for each path/row if landfire_flag: logging.info('\nBuild LANDFIRE for each path/row') project_mp_list, remap_mp_list = [], [] for tile_name in tile_list: landfire_output_path = os.path.join( landfire_output_ws, tile_name, landfire_output_fmt.format(year)) landfire_ag_output_path = os.path.join( landfire_output_ws, tile_name, landfire_ag_output_fmt.format(year)) if not overwrite_flag and os.path.isfile(landfire_output_path): logging.debug(' {} already exists, skipping'.format( os.path.basename(landfire_output_path))) continue logging.info(' {}'.format(tile_name)) # Set the nodata value on the LANDFIRE raster if it is not set # landfire_ds = gdal.Open(landfire_input_path, 0) # landfire_band = landfire_ds.GetRasterBand(1) # landfire_nodata = landfire_band.GetNoDataValue() # landfire_ds = None # if landfire_nodata is None: # landfire_nodata = 32767 # del landfire_ds, landfire_band landfire_nodata = 32767 # Clip and project tile_utm_osr = drigo.epsg_osr(32600 + int(tile_utm_zone_dict[tile_name])) tile_utm_proj = drigo.epsg_proj(32600 + int(tile_utm_zone_dict[tile_name])) tile_utm_extent = tile_utm_extent_dict[tile_name] if mp_procs > 1: project_mp_list.append([ landfire_input_path, landfire_output_path, gdal.GRA_NearestNeighbour, tile_utm_proj, snap_cs, tile_utm_extent, landfire_nodata ]) remap_mp_list.append([ landfire_output_path, landfire_ag_output_path, landfire_ag_list ]) else: drigo.project_raster(landfire_input_path, landfire_output_path, gdal.GRA_NearestNeighbour, tile_utm_osr, snap_cs, tile_utm_extent, landfire_nodata) # Build a mask of LANDFIRE ag lands remap_mask_func(landfire_output_path, landfire_ag_output_path, landfire_ag_list) # Cleanup del landfire_output_path del tile_utm_osr, tile_utm_proj, tile_utm_extent # Project LANDFIRE rasters using multiprocessing if project_mp_list: pool = mp.Pool(mp_procs) results = pool.map(drigo.project_raster_mp, project_mp_list, chunksize=1) pool.close() pool.join() del results, pool if remap_mp_list: pool = mp.Pool(mp_procs) results = pool.map(remap_mask_mp, remap_mp_list, chunksize=1) pool.close() pool.join() del results, pool # Convert field shapefiles to raster if field_flag: logging.info('\nBuild field rasters for each path/row') for tile_name in tile_list: logging.info(' {}'.format(tile_name)) tile_output_ws = os.path.join(field_output_ws, tile_name) # Shapefile paths field_proj_name = ( os.path.splitext(field_output_fmt.format(year))[0] + "_wgs84z{}.shp".format(tile_utm_zone_dict[tile_name])) field_proj_path = os.path.join(tile_output_ws, field_proj_name) field_output_path = os.path.join(tile_output_ws, field_output_fmt.format(year)) if not overwrite_flag and os.path.isfile(field_output_path): logging.debug(' {} already exists, skipping'.format( os.path.basename(field_output_path))) continue # The ogr2ogr spatial query is in the input spatial reference # Project the path/row extent to the field osr/proj field_input_osr = drigo.feature_path_osr(field_input_path) tile_utm_osr = drigo.epsg_osr(32600 + int(tile_utm_zone_dict[tile_name])) # field_input_proj = drigo.osr_proj(field_input_osr) # tile_utm_proj = drigo.osr_proj(tile_utm_osr) field_tile_extent = drigo.project_extent( tile_utm_extent_dict[tile_name], tile_utm_osr, field_input_osr, 30) # Project shapefile to the path/row zone # Clipping requires GDAL to be built with GEOS support subprocess.call( [ 'ogr2ogr', '-t_srs', 'EPSG:326{}'.format( tile_utm_zone_dict[tile_name]), '-f', 'ESRI Shapefile', '-overwrite' ] + ['-spat'] + list(map(str, field_tile_extent)) + ['-clipdst'] + list(map(str, tile_utm_extent_dict[tile_name])) + # ['-clipdst'] + list(map(str, tile_utm_extent_dict[tile_name])) + # ['-clipsrc'] + list(map(str, field_tile_extent)) + # ['-clipsrc'] + list(map(str, field_tile_extent)) + [field_proj_path, field_input_path]) # Convert shapefile to raster field_mem_ds = drigo.polygon_to_raster_ds( field_proj_path, nodata_value=0, burn_value=1, output_osr=tile_utm_osr, output_extent=tile_utm_extent_dict[tile_name]) field_output_driver = drigo.raster_driver(field_output_path) if field_output_path.lower().endswith('.img'): field_output_ds = field_output_driver.CreateCopy( field_output_path, field_mem_ds, 0, ['COMPRESS=YES']) else: field_output_ds = field_output_driver.CreateCopy( field_output_path, field_mem_ds, 0) field_output_ds, field_mem_ds = None, None # Remove field shapefile # try: # remove_file(field_proj_path) # except: # pass # Cleanup del tile_utm_osr, field_tile_extent, field_input_osr # del tile_utm_proj, field_input_proj del field_proj_name, field_proj_path, field_output_path logging.debug('\nScript complete')
def main(ini_path, mc_iter_str='', tile_list=None, cold_tgt_pct=None, hot_tgt_pct=None, groupsize=64, blocksize=2048, multipoint_flag=True, shapefile_flag=True, stats_flag=True, overwrite_flag=False, mp_procs=1, delay=0, debug_flag=False, new_window_flag=False, no_file_logging=False, no_final_plots=None, no_temp_plots=None): """Run METRIC Monte Carlo for all Landsat scenes Parameters ---------- ini_path : str File path of the input parameters file. mc_iter_str : str MonteCarlo iteration list and/or range. tile_list : list, optional Landsat path/rows to process (i.e. [p045r043, p045r033]). This will override the tile list in the INI file. cold_tgt_pct : float, optional Target percentage of pixels with ETrF greater than cold Kc. hot_tgt_pct : float, optional Target percentage of pixels with ETrF less than hot Kc. groupsize : int, optional Script will try to place calibration point randomly into a labeled group of clustered values with at least n pixels (the default is 64). -1 = In the largest group 0 = Anywhere in the image (not currently implemented) 1 >= In any group with a pixel count greater or equal to n blocksize : int, optional Processing block size (the default is 2048). multipoint_flag : bool, optional If True, save cal. points to multipoint shapefile (the default is True). shapefile_flag : bool, optional If True, save calibration points to shapefile (the default False). stats_flag : bool, optional If True, compute raster statistics (the default is True). overwrite_flag : bool, optional If True, overwrite existing files (the default is False). mp_procs : int, optional Number of cores to use (the default is 1). delay : float, optional Max random delay starting function in seconds (the default is 0). debug_flag : bool, optional If True, enable debug level logging (the default is False). new_window_flag : bool, optional If True, open each process in new terminal window (the default is False). Microsoft Windows only. no_file_logging : bool If True, don't write logging to file (the default is False). no_final_plots : bool If True, don't save final ETrF histograms (the default is None). This will override the flag in the INI file no_temp_plots : bool If True, don't save temp ETrF histograms (the default is None). This will override the flag in the INI file Returns ------- None """ logging.info('\nRunning METRIC Monte Carlo') # Open config file config = dripy.open_ini(ini_path) # Get input parameters logging.debug(' Reading Input File') year = config.getint('INPUTS', 'year') if tile_list is None: tile_list = dripy.read_param('tile_list', [], config, 'INPUTS') project_ws = config.get('INPUTS', 'project_folder') logging.debug(' Year: {}'.format(year)) logging.debug(' Path/rows: {}'.format(', '.join(tile_list))) logging.debug(' Project: {}'.format(project_ws)) func_path = config.get('INPUTS', 'monte_carlo_func') keep_list_path = dripy.read_param('keep_list_path', '', config, 'INPUTS') # skip_list_path = dripy.read_param('skip_list_path', '', config, 'INPUTS') # For now, get mc_iter list from command line, not from project file # mc_iter_list = config.get('INPUTS', 'mc_iter_list') mc_iter_list = list(dripy.parse_int_set(mc_iter_str)) # Need soemthing in mc_iter_list to iterate over if not mc_iter_list: mc_iter_list = [None] # For now build INI file name from template INI names metric_ini_name = os.path.basename(config.get('INPUTS', 'metric_ini')) metric_ini_name = os.path.splitext(os.path.basename(metric_ini_name))[0] mc_ini_name = os.path.basename(config.get('INPUTS', 'monte_carlo_ini')) mc_ini_name = os.path.splitext(os.path.basename(mc_ini_name))[0] # INI file is built as a function of year and tile_name metric_ini_fmt = '{}_{}_{}.ini' mc_ini_fmt = '{}_{}_{}.ini' # Only allow new terminal windows on Windows if os.name is not 'nt': new_window_flag = False # if len(tile_list) == 1: # devel_flag = True # else: # devel_flag = False # # devel_flag = True # Regular expressions # For now assume path/row are two digit numbers tile_re = re.compile('p\d{3}r\d{3}', re.IGNORECASE) image_id_re = re.compile( '^(LT04|LT05|LE07|LC08)_(?:\w{4})_(\d{3})(\d{3})_' '(\d{4})(\d{2})(\d{2})_(?:\d{8})_(?:\d{2})_(?:\w{2})$') # Check inputs folders/paths if not os.path.isdir(project_ws): logging.error('\n Folder {} does not exist'.format(project_ws)) sys.exit() # Read keep/skip lists if keep_list_path: logging.debug('\nReading scene keep list') with open(keep_list_path) as keep_list_f: image_keep_list = keep_list_f.readlines() image_keep_list = [image_id.strip() for image_id in image_keep_list if image_id_re.match(image_id.strip())] else: logging.debug('\nScene keep list not set in INI') image_keep_list = [] # if skip_list_path: # logging.debug('\nReading scene skip list') # with open(skip_list_path) as skip_list_f: # image_skip_list = skip_list_f.readlines() # image_skip_list = [image_id.strip() for image_id in image_skip_list # if image_re.match(image_id.strip())] # else: # logging.debug('\nScene skip list not set in INI') # image_skip_list = [] mp_list = [] for tile_name in sorted(tile_list): logging.debug('\nTile: {}'.format(tile_name)) tile_ws = os.path.join(project_ws, str(year), tile_name) if not os.path.isdir(tile_ws) and not tile_re.match(tile_name): logging.debug(' {} {} - invalid tile, skipping'.format( year, tile_name)) continue # Check that there are image folders image_id_list = [ image_id for image_id in sorted(os.listdir(tile_ws)) if (image_id_re.match(image_id) and os.path.isdir(os.path.join(tile_ws, image_id)) and (image_keep_list and image_id in image_keep_list))] # (image_skip_list and image_id not in image_skip_list))] if not image_id_list: logging.debug(' {} {} - no available images, skipping'.format( year, tile_name)) continue else: logging.debug(' {} {}'.format(year, tile_name)) for image_id in image_id_list: image_ws = os.path.join(tile_ws, image_id) pixel_ws = os.path.join(image_ws, 'PIXELS') if not os.path.isdir(pixel_ws): os.mkdir(pixel_ws) # Since the multipoint shapefile will be appended, delete it # in the wrapper script if multipoint_flag and os.path.isdir(pixel_ws): for pixel_file in os.listdir(pixel_ws): if re.match('\w+_\w+.shp$', pixel_file): logging.info('\n Removing {}'.format(pixel_file)) os.remove(os.path.join(pixel_ws, pixel_file)) logging.debug(' {} {}'.format(year, tile_name)) # Check that there is an input file for the path/row metric_ini_path = os.path.join( tile_ws, metric_ini_fmt.format(metric_ini_name, year, tile_name)) mc_ini_path = os.path.join( tile_ws, mc_ini_fmt.format(mc_ini_name, year, tile_name)) if not os.path.join(metric_ini_path): logging.warning(' METRIC Input file {} does not exist'.format( metric_ini_path)) continue elif not os.path.join(mc_ini_path): logging.warning( ' Monte Carlo Input file {} does not exist'.format( mc_ini_path)) continue # Setup command line argument # call_args = [sys.executable, mc_func_path, '-i', ini_path] call_args = [sys.executable, func_path, '--metric_ini', metric_ini_path, '--mc_ini', mc_ini_path, '--groupsize', str(groupsize)] if cold_tgt_pct is not None and hot_tgt_pct is not None: call_args.extend(['-t', str(cold_tgt_pct), str(hot_tgt_pct)]) if blocksize: call_args.extend(['--blocksize', str(blocksize)]) if shapefile_flag: call_args.append('--shapefile') if multipoint_flag: call_args.append('--multipoint') if stats_flag: call_args.append('--stats') if overwrite_flag: call_args.append('--overwrite') if debug_flag: call_args.append('--debug') if no_file_logging: call_args.append('--no_file_logging') if no_final_plots: call_args.append('--no_final_plots') if no_temp_plots: call_args.append('--no_temp_plots') # Run all scenes for each Monte Carlo iteration for mc_iter in mc_iter_list: if mc_iter is not None: mc_args = ['-mc', str(mc_iter)] else: mc_args = [] for image_id in image_id_list: image_folder = os.path.join(tile_ws, image_id) logging.debug(' {}'.format(os.path.basename(image_folder))) if mp_procs > 1: mp_list.append([ call_args + mc_args, image_folder, delay, new_window_flag]) else: subprocess.call(call_args + mc_args, cwd=image_folder) if mp_list: pool = mp.Pool(mp_procs) results = pool.map(dripy.call_mp, mp_list, chunksize=1) pool.close() pool.join() del results, pool logging.debug('\nScript complete')
def main(ini_path, tile_list=None, overwrite_flag=False): """Prep Landsat path/row specific data Args: ini_path (str): file path of the input parameters file tile_list (list): list of Landsat path/row (i.e. [p45r43, p45r33]) This will override the tile list in the INI file overwrite_flag (bool): boolean, overwrite existing files mp_procs (int): number of cores to use Returns: None """ logging.info('\nPrepare path/row INI files') # Open config file config = python_common.open_ini(ini_path) # Get input parameters logging.debug(' Reading Input File') year = config.getint('INPUTS', 'year') if tile_list is None: tile_list = python_common.read_param('tile_list', [], config, 'INPUTS') project_ws = config.get('INPUTS', 'project_folder') logging.debug(' Year: {}'.format(year)) logging.debug(' Path/rows: {}'.format(', '.join(tile_list))) logging.debug(' Project: {}'.format(project_ws)) ini_file_flag = python_common.read_param('ini_file_flag', True, config, 'INPUTS') landsat_flag = python_common.read_param('landsat_flag', True, config, 'INPUTS') ledaps_flag = python_common.read_param('ledaps_flag', False, config, 'INPUTS') dem_flag = python_common.read_param('dem_flag', True, config, 'INPUTS') nlcd_flag = python_common.read_param('nlcd_flag', True, config, 'INPUTS') cdl_flag = python_common.read_param('cdl_flag', True, config, 'INPUTS') landfire_flag = python_common.read_param('landfire_flag', False, config, 'INPUTS') field_flag = python_common.read_param('field_flag', False, config, 'INPUTS') metric_flag = python_common.read_param('metric_flag', True, config, 'INPUTS') monte_carlo_flag = python_common.read_param('monte_carlo_flag', False, config, 'INPUTS') interp_rasters_flag = python_common.read_param('interpolate_rasters_flag', False, config, 'INPUTS') interp_tables_flag = python_common.read_param('interpolate_tables_flag', False, config, 'INPUTS') metric_hourly_weather = python_common.read_param('metric_hourly_weather', 'NLDAS', config, 'INPUTS') project_ws = config.get('INPUTS', 'project_folder') footprint_path = config.get('INPUTS', 'footprint_path') # For now, assume the UTM zone file is colocated with the footprints shapefile utm_path = python_common.read_param( 'utm_path', os.path.join(os.path.dirname(footprint_path), 'wrs2_tile_utm_zones.json'), config, 'INPUTS') skip_list_path = python_common.read_param('skip_list_path', '', config, 'INPUTS') # Ts and albedo corrections ts_correction_flag = python_common.read_param('Ts_correction_flag', True, config, 'INPUTS') k_value = python_common.read_param('K_value', 2, config, 'INPUTS') albedo_correction_flag = python_common.read_param( 'albedo_correction_flag ', True, config, 'INPUTS') dense_veg_min_albedo = python_common.read_param('dense_veg_min_albedo ', 0.18, config, 'INPUTS') # tile_gcs_buffer = read_param('tile_buffer', 0.1, config) # Template input files for scripts if metric_flag: metric_ini = config.get('INPUTS', 'metric_ini') pixel_rating_ini = config.get('INPUTS', 'pixel_rating_ini') if monte_carlo_flag: monte_carlo_ini = config.get('INPUTS', 'monte_carlo_ini') if interp_rasters_flag or interp_tables_flag: interpolate_folder = python_common.read_param('interpolate_folder', 'ET', config) interpolate_ini = config.get('INPUTS', 'interpolate_ini') if interp_rasters_flag: study_area_path = config.get('INPUTS', 'study_area_path') study_area_mask_flag = python_common.read_param( 'study_area_mask_flag', True, config) study_area_snap = python_common.read_param('study_area_snap', (0, 0), config) study_area_cellsize = python_common.read_param('study_area_cellsize', 30, config) study_area_buffer = python_common.read_param('study_area_buffer', 0, config) study_area_proj = python_common.read_param('study_area_proj', '', config) if interp_tables_flag: zones_path = config.get('INPUTS', 'zones_path') zones_name_field = python_common.read_param('zones_name_field', 'FID', config) # zones_buffer = read_param('zones_buffer', 0, config) zones_snap = python_common.read_param('zones_snap', (0, 0), config) zones_cellsize = python_common.read_param('zones_cellsize', 30, config) # zones_proj = read_param('zones_proj', '', config) zones_mask = python_common.read_param('zones_mask', None, config) zones_buffer = None zones_proj = None # Input/output folder and file paths if landsat_flag: landsat_input_ws = config.get('INPUTS', 'landsat_input_folder') else: landsat_input_ws = None if ledaps_flag: ledaps_input_ws = config.get('INPUTS', 'ledaps_input_folder') else: ledaps_input_ws = None if dem_flag: dem_input_ws = config.get('INPUTS', 'dem_input_folder') dem_tile_fmt = config.get('INPUTS', 'dem_tile_fmt') dem_output_ws = config.get('INPUTS', 'dem_output_folder') dem_output_name = python_common.read_param('dem_output_name', 'dem.img', config) # dem_output_name = config.get('INPUTS', 'dem_output_name') else: dem_input_ws, dem_tile_fmt = None, None dem_output_ws, dem_output_name = None, None if nlcd_flag: nlcd_input_path = config.get('INPUTS', 'nlcd_input_path') nlcd_output_ws = config.get('INPUTS', 'nlcd_output_folder') nlcd_output_fmt = python_common.read_param('nlcd_output_fmt', 'nlcd_{:04d}.img', config) else: nlcd_input_path, nlcd_output_ws, nlcd_output_fmt = None, None, None if cdl_flag: cdl_input_path = config.get('INPUTS', 'cdl_input_path') cdl_ag_list = config.get('INPUTS', 'cdl_ag_list') cdl_ag_list = list(python_common.parse_int_set(cdl_ag_list)) # default_cdl_ag_list = range(1,62) + range(66,78) + range(204,255) # cdl_ag_list = read_param( # 'cdl_ag_list', default_cdl_ag_list, config) # cdl_ag_list = list(map(int, cdl_ag_list)) # cdl_non_ag_list = read_param( # 'cdl_non_ag_list', [], config) cdl_output_ws = config.get('INPUTS', 'cdl_output_folder') cdl_output_fmt = python_common.read_param('cdl_output_fmt', 'cdl_{:04d}.img', config) cdl_ag_output_fmt = python_common.read_param('cdl_ag_output_fmt', 'cdl_ag_{:04d}.img', config) else: cdl_input_path, cdl_ag_list = None, None cdl_output_ws, cdl_output_fmt, cdl_ag_output_fmt = None, None, None if landfire_flag: landfire_input_path = config.get('INPUTS', 'landfire_input_path') landfire_ag_list = config.get('INPUTS', 'landfire_ag_list') landfire_ag_list = list(python_common.parse_int_set(landfire_ag_list)) # default_landfire_ag_list = range(3960,4000) # landfire_ag_list = read_param( # 'landfire_ag_list', default_landfire_ag_list, config) # landfire_ag_list = list(map(int, landfire_ag_list)) landfire_output_ws = config.get('INPUTS', 'landfire_output_folder') landfire_output_fmt = python_common.read_param('landfire_output_fmt', 'landfire_{:04d}.img', config) landfire_ag_output_fmt = python_common.read_param( 'landfire_ag_output_fmt', 'landfire_ag_{:04d}.img', config) else: landfire_input_path, landfire_ag_list = None, None landfire_output_ws = None landfire_output_fmt, landfire_ag_output_fmt = None, None if field_flag: field_input_path = config.get('INPUTS', 'field_input_path') field_output_ws = config.get('INPUTS', 'field_output_folder') field_output_fmt = python_common.read_param('field_output_fmt', 'fields_{:04d}.img', config) else: field_input_path = None field_output_ws, field_output_fmt = None, None if monte_carlo_flag: etrf_training_path = config.get('INPUTS', 'etrf_training_path') # mc_iter_list = config.get('INPUTS', 'mc_iter_list') # mc_iter_list = list(python_common.parse_int_set(mc_iter_list)) if monte_carlo_flag or interp_rasters_flag or interp_tables_flag: etrf_input_ws = python_common.read_param('etrf_input_folder', None, config) # if etrf_input_ws is None: # etrf_input_ws = os.path.join(project_ws, year) etr_input_ws = config.get('INPUTS', 'etr_input_folder') ppt_input_ws = config.get('INPUTS', 'ppt_input_folder') etr_input_re = config.get('INPUTS', 'etr_input_re') ppt_input_re = config.get('INPUTS', 'ppt_input_re') if monte_carlo_flag or interp_rasters_flag or interp_tables_flag: awc_input_path = config.get('INPUTS', 'awc_input_path') spinup_days = python_common.read_param('swb_spinup_days', 30, config, 'INPUTS') min_spinup_days = python_common.read_param('swb_min_spinup_days', 5, config, 'INPUTS') # Weather data parameters if metric_flag: metric_hourly_weather_list = ['NLDAS', 'REFET'] metric_hourly_weather = config.get('INPUTS', 'metric_hourly_weather').upper() if metric_hourly_weather not in metric_hourly_weather_list: logging.error( ('\nERROR: The METRIC hourly weather type {} is invalid.' + '\nERROR: Set metric_hourly_weather to {}').format( metric_hourly_weather, ','.join(metric_hourly_weather_list))) sys.exit() elif metric_hourly_weather == 'REFET': refet_params_path = os.path.normpath( config.get('INPUTS', 'refet_params_path')) elif metric_hourly_weather == 'NLDAS': # metric_hourly_re = config.get('INPUTS', 'metric_hourly_re') # metric_daily_re = config.get('INPUTS', 'metric_daily_re') metric_ea_input_ws = config.get('INPUTS', 'metric_ea_input_folder') metric_wind_input_ws = config.get('INPUTS', 'metric_wind_input_folder') metric_etr_input_ws = config.get('INPUTS', 'metric_etr_input_folder') try: calc_metric_tair_flag = config.getboolean( 'INPUTS', 'calc_metric_tair_flag') metric_tair_input_ws = config.get('INPUTS', 'metric_tair_input_folder') except: calc_metric_tair_flag = False metric_tair_input_ws = '' # Check inputs folders/paths logging.info('\nChecking input folders/files') file_check(footprint_path) if landsat_flag: folder_check(landsat_input_ws) if ledaps_flag: folder_check(ledaps_input_ws) if dem_flag: folder_check(dem_input_ws) if nlcd_flag: file_check(nlcd_input_path) if cdl_flag: file_check(cdl_input_path) if landfire_flag: # Landfire will likely be an ESRI grid (set as a folder) if not (os.path.isdir(landfire_input_path) or os.path.isfile(landfire_input_path)): logging.error(' {} does not exist.'.format(landfire_input_path)) sys.exit() if field_flag: file_check(field_input_path) if metric_flag: file_check(metric_ini) file_check(pixel_rating_ini) if interp_rasters_flag or interp_tables_flag or monte_carlo_flag: if etrf_input_ws is not None: folder_check(etrf_input_ws) folder_check(etr_input_ws) folder_check(ppt_input_ws) file_check(awc_input_path) if monte_carlo_flag: file_check(monte_carlo_ini) file_check(etrf_training_path) if metric_flag: if metric_hourly_weather == 'REFET': file_check(refet_params_path) elif metric_hourly_weather == 'NLDAS': folder_check(metric_ea_input_ws) folder_check(metric_wind_input_ws) folder_check(metric_etr_input_ws) if calc_metric_tair_flag: folder_check(metric_tair_input_ws) if skip_list_path: file_check(skip_list_path) # Build output folders if not os.path.isdir(project_ws): os.makedirs(project_ws) # For now assume path/row are two digit numbers tile_fmt = 'p{:03d}r{:03d}' # Set snap environment parameters snap_cs = 30 snap_xmin, snap_ymin = (15, 15) env = gdc.env env.cellsize = snap_cs env.snap_xmin, env.snap_ymin = snap_xmin, snap_ymin # Use WGSS84 (EPSG 4326) for GCS spatial reference # Could also use NAD83 (EPSG 4269) # gcs_epsg = 4326 # gcs_osr = epsg_osr(4326) # gcs_proj = osr_proj(gcs_osr) # Landsat Footprints (WRS2 Descending Polygons) logging.debug('\nFootprint (WRS2 descending should be GCS84):') tile_gcs_osr = gdc.feature_path_osr(footprint_path) logging.debug(' OSR: {}'.format(tile_gcs_osr)) # Doublecheck that WRS2 descending shapefile is GCS84 # if tile_gcs_osr != epsg_osr(4326): # logging.error(' WRS2 is not GCS84') # sys.exit() # Get geometry for each path/row tile_gcs_wkt_dict = path_row_wkt_func(footprint_path, path_field='PATH', row_field='ROW') # Get UTM zone for each path/row # DEADBEEF - Using "eval" is considered unsafe and should be changed tile_utm_zone_dict = eval(open(utm_path, 'r').read()) # Check that each path/row extent and UTM zone exist logging.info('\nChecking path/row list against footprint shapefile') for tile_name in sorted(tile_list): if tile_name not in tile_gcs_wkt_dict.keys(): logging.error( ' {} feature not in footprint shapefile'.format(tile_name)) continue elif tile_name not in tile_utm_zone_dict.keys(): logging.error( ' {} UTM zone not in footprint shapefile'.format(tile_name)) continue elif tile_utm_zone_dict[tile_name] == 0: logging.error((' UTM zone is not set for {} in ' + 'footprint shapefile').format(tile_name)) continue # Read RefET parameters if metric_hourly_weather == 'REFET': refet_ws = os.path.dirname(refet_params_path) with open(refet_params_path, 'r') as input_f: lines = input_f.readlines() lines = [line.strip() for line in lines] lines = [line.split(',') for line in lines if line] columns = lines.pop(0) refet_params_dict = defaultdict(dict) for line in lines: tile_name = tile_fmt.format(int(line[columns.index('PATH')]), int(line[columns.index('ROW')])) yr_tile_name = '{}_{}'.format(line[columns.index('YEAR')], tile_name) for i, column in enumerate(columns): if column not in ['YEAR', 'PATH', 'ROW']: refet_params_dict[yr_tile_name][column.lower()] = line[i] # Process input files for each year and path/row logging.info('\nBuilding path/row specific input files') for tile_name in tile_list: tile_output_ws = os.path.join(project_ws, str(year), tile_name) logging.info('{} {}'.format(year, tile_name)) yr_tile_name = '{}_{}'.format(year, tile_name) if not os.path.isdir(tile_output_ws): os.makedirs(tile_output_ws) # File paths if metric_flag: tile_metric_ini = os.path.join( tile_output_ws, os.path.basename(metric_ini).replace( '.ini', '_{}_{}.ini'.format(year, tile_name))) tile_pixel_rating_ini = os.path.join( tile_output_ws, os.path.basename(pixel_rating_ini).replace( '.ini', '_{}_{}.ini'.format(year, tile_name))) if overwrite_flag and os.path.isfile(tile_metric_ini): os.remove(tile_metric_ini) if overwrite_flag and os.path.isfile(tile_pixel_rating_ini): os.remove(tile_pixel_rating_ini) # Monte Carlo is independent of tile and year, but process # with METRIC input file if monte_carlo_flag: tile_monte_carlo_ini = os.path.join( tile_output_ws, os.path.basename(monte_carlo_ini).replace( '.ini', '_{}_{}.ini'.format(year, tile_name))) if overwrite_flag and os.path.isfile(tile_monte_carlo_ini): os.remove(tile_monte_carlo_ini) if dem_flag: dem_output_path = os.path.join(dem_output_ws, tile_name, dem_output_name) if nlcd_flag: nlcd_output_path = os.path.join(nlcd_output_ws, tile_name, nlcd_output_fmt.format(year)) if cdl_flag: cdl_ag_output_path = os.path.join(cdl_output_ws, tile_name, cdl_ag_output_fmt.format(year)) if landfire_flag: landfire_ag_output_path = os.path.join( landfire_output_ws, tile_name, landfire_output_fmt.format(year)) if field_flag: field_output_path = os.path.join(field_output_ws, tile_name, field_output_fmt.format(year)) # Check that the path/row was in the RefET parameters file if (metric_flag and metric_hourly_weather == 'REFET' and yr_tile_name not in refet_params_dict.keys()): logging.error( (' The year {} & path/row {} is not in the ' + 'RefET parameters csv, skipping').format(year, tile_name)) continue if metric_flag and not os.path.isfile(tile_metric_ini): # DEADBEEF - This approach removes all formatting and comments config = configparser.RawConfigParser() config.read(metric_ini) # shutil.copy(metric_ini, tile_metric_ini) # config.read(tile_metric_ini) if metric_hourly_weather == 'REFET': # Add RefET options config.set('INPUTS', 'weather_data_source', 'REFET') config.set( 'INPUTS', 'refet_file', os.path.join( refet_ws, os.path.normpath( refet_params_dict[yr_tile_name]['refet_file']))) config.set('INPUTS', 'gmt_offset', refet_params_dict[yr_tile_name]['gmt_offset']) config.set('INPUTS', 'datum', refet_params_dict[yr_tile_name]['datum']) elif metric_hourly_weather == 'NLDAS': # Add NLDAS options config.set('INPUTS', 'weather_data_source', 'NLDAS') # Remove RefET options try: config.remove_option('INPUTS', 'refet_file') except: pass try: config.remove_option('INPUTS', 'gmt_offset') except: pass # try: # config.remove_option('INPUTS', 'datum') # except: # pass if dem_flag: config.set('INPUTS', 'dem_raster', dem_output_path) else: try: config.remove_option('INPUTS', 'dem_raster') except: pass # config.set('INPUTS', 'dem_raster', 'None') if nlcd_flag: config.set('INPUTS', 'landuse_raster', nlcd_output_path) else: try: config.remove_option('INPUTS', 'landuse_raster') except: pass # config.set('INPUTS', 'landuse_raster', 'None') logging.debug(' {}'.format(tile_metric_ini)) with open(tile_metric_ini, 'w') as config_f: config.write(config_f) if metric_flag and not os.path.isfile(tile_pixel_rating_ini): config = configparser.RawConfigParser() config.read(pixel_rating_ini) if nlcd_flag: config.set('INPUTS', 'landuse_raster', nlcd_output_path) else: try: config.remove_option('INPUTS', 'landuse_raster') except: pass # config.set('INPUTS', 'landuse_raster', 'None') if cdl_flag: config.set('INPUTS', 'apply_cdl_ag_mask', True) config.set('INPUTS', 'cdl_ag_raster', cdl_ag_output_path) else: config.set('INPUTS', 'apply_cdl_ag_mask', False) try: config.remove_option('INPUTS', 'cdl_ag_raster') except: pass # config.set('INPUTS', 'cdl_ag_raster', 'None') if field_flag: config.set('INPUTS', 'apply_field_mask', True) config.set('INPUTS', 'fields_raster', field_output_path) else: config.set('INPUTS', 'apply_field_mask', False) try: config.remove_option('INPUTS', 'fields_raster') except: pass # config.set('INPUTS', 'fields_raster', 'None') # if landfire_flag: # config.set('INPUTS', 'apply_landfire_ag_mask', True) # config.set('INPUTS', 'landfire_ag_raster', cdl_ag_output_path) # else: # config.set('INPUTS', 'apply_landfire_ag_mask', False) # try: config.remove_option('INPUTS', 'landfire_ag_raster') # except: pass # # config.set('INPUTS', 'landfire_ag_raster', 'None') logging.debug(' {}'.format(tile_pixel_rating_ini)) with open(tile_pixel_rating_ini, 'w') as config_f: config.write(config_f) if monte_carlo_flag and not os.path.isfile(tile_monte_carlo_ini): config = configparser.RawConfigParser() config.read(monte_carlo_ini) config.set('INPUTS', 'etrf_training_path', etrf_training_path) config.set('INPUTS', 'etr_ws', etr_input_ws) config.set('INPUTS', 'ppt_ws', ppt_input_ws) config.set('INPUTS', 'etr_re', etr_input_re) config.set('INPUTS', 'ppt_re', ppt_input_re) config.set('INPUTS', 'awc_path', awc_input_path) config.set('INPUTS', 'swb_spinup_days', spinup_days) config.set('INPUTS', 'swb_min_spinup_days', min_spinup_days) logging.debug(' {}'.format(tile_monte_carlo_ini)) with open(tile_monte_carlo_ini, 'w') as config_f: config.write(config_f) # Cleanup del tile_output_ws, yr_tile_name # Interpolator input file if interp_rasters_flag or interp_tables_flag: logging.info('\nBuilding interpolator input files') year_interpolator_name = os.path.basename(interpolate_ini).replace( '.ini', '_{}_{}.ini'.format(year, interpolate_folder.lower())) year_interpolator_ini = os.path.join(project_ws, str(year), year_interpolator_name) if overwrite_flag and os.path.isfile(year_interpolator_ini): os.remove(year_interpolator_ini) if not os.path.isfile(year_interpolator_ini): # First copy the template config file to the year folder shutil.copy(interpolate_ini, year_interpolator_ini) # Open the existing config file and update the values # DEADBEEF - This approach removes all formatting and comments config = configparser.RawConfigParser() config.read(year_interpolator_ini) config.set('INPUTS', 'folder_name', interpolate_folder) config.set('INPUTS', 'tile_list', ', '.join(tile_list)) if interp_rasters_flag: config.set('INPUTS', 'study_area_path', study_area_path) config.set('INPUTS', 'study_area_mask_flag', study_area_mask_flag) config.set('INPUTS', 'study_area_snap', ', '.join(map(str, study_area_snap))) config.set('INPUTS', 'study_area_cellsize', study_area_cellsize) config.set('INPUTS', 'study_area_buffer', study_area_buffer) if study_area_proj: config.set('INPUTS', 'study_area_proj', study_area_proj) else: try: config.remove_option('INPUTS', 'study_area_proj', study_area_proj) except: pass if interp_tables_flag: config.set('INPUTS', 'zones_path', zones_path) config.set('INPUTS', 'zones_snap', ', '.join(map(str, zones_snap))) config.set('INPUTS', 'zones_cellsize', zones_cellsize) config.set('INPUTS', 'zones_name_field', zones_name_field) # zones_buffer is not currently implemented if zones_buffer: config.set('INPUTS', 'zones_buffer', zones_buffer) else: try: config.remove_option('INPUTS', 'zones_buffer', zones_buffer) except: pass # zones proj., cellsize, and snap are not needed or # read in if zones_mask is set # zones_proj is not currently implemented if zones_mask: config.set('INPUTS', 'zones_mask', zones_mask) try: config.remove_option('INPUTS', 'zones_proj') except: pass try: config.remove_option('INPUTS', 'zones_cellsize') except: pass try: config.remove_option('INPUTS', 'zones_snap') except: pass # elif zones_proj: # config.set('INPUTS', 'zones_proj', zones_proj) # try: # config.remove_option('INPUTS', 'zones_mask') # except: # pass else: try: config.remove_option('INPUTS', 'zones_proj') except: pass try: config.remove_option('INPUTS', 'zones_mask') except: pass config.set('INPUTS', 'year', year) config.set('INPUTS', 'footprint_path', footprint_path) if etrf_input_ws is not None: config.set('INPUTS', 'etrf_input_folder', etrf_input_ws) config.set('INPUTS', 'etr_input_folder', etr_input_ws) config.set('INPUTS', 'etr_input_re', etr_input_re) config.set('INPUTS', 'ppt_input_folder', ppt_input_ws) config.set('INPUTS', 'ppt_input_re', ppt_input_re) # DEADBEEF - add check for SWB flag config.set('INPUTS', 'awc_input_path', awc_input_path) config.set('INPUTS', 'swb_spinup_days', spinup_days) config.set('INPUTS', 'swb_min_spinup_days', min_spinup_days) # Albdeo and Ts correction config.set('INPUTS', 'Ts_correction_flag', ts_correction_flag) config.set('INPUTS', 'K_value ', k_value) config.set('INPUTS', 'albedo_correction_flag', albedo_correction_flag) config.set('INPUTS', 'dense_veg_min_albedo', dense_veg_min_albedo) logging.debug(' {}'.format(year_interpolator_ini)) with open(year_interpolator_ini, 'w') as config_f: config.write(config_f) logging.debug('\nScript complete')
def main(grb_ws=os.getcwd(), ancillary_ws=os.getcwd(), output_ws=os.getcwd(), etr_flag=False, eto_flag=False, landsat_ws=None, start_date=None, end_date=None, times_str='', extent_path=None, output_extent=None, daily_flag=True, stats_flag=True, overwrite_flag=False): """Compute hourly ETr/ETo from NLDAS data Args: grb_ws (str): folder of NLDAS GRB files ancillary_ws (str): folder of ancillary rasters output_ws (str): folder of output rasters etr_flag (bool): if True, compute alfalfa reference ET (ETr) eto_flag (bool): if True, compute grass reference ET (ETo) landsat_ws (str): folder of Landsat scenes or tar.gz files start_date (str): ISO format date (YYYY-MM-DD) end_date (str): ISO format date (YYYY-MM-DD) times (str): comma separated values and/or ranges of UTC hours (i.e. "1, 2, 5-8") Parsed with python_common.parse_int_set() extent_path (str): file path defining the output extent output_extent (list): decimal degrees values defining output extent daily_flag (bool): if True, save daily ETr/ETo sum raster. Default is True stats_flag (bool): if True, compute raster statistics. Default is True. overwrite_flag (bool): if True, overwrite existing files Returns: None """ logging.info('\nComputing NLDAS hourly ETr/ETo') np.seterr(invalid='ignore') # Compute ETr and/or ETo if not etr_flag and not eto_flag: logging.info(' ETo/ETr flag(s) not set, defaulting to ETr') etr_flag = True # If a date is not set, process 2017 try: start_dt = dt.datetime.strptime(start_date, '%Y-%m-%d') logging.debug(' Start date: {}'.format(start_dt)) except: start_dt = dt.datetime(2017, 1, 1) logging.info(' Start date: {}'.format(start_dt)) try: end_dt = dt.datetime.strptime(end_date, '%Y-%m-%d') logging.debug(' End date: {}'.format(end_dt)) except: end_dt = dt.datetime(2017, 12, 31) logging.info(' End date: {}'.format(end_dt)) # Only process a specific hours if not times_str: time_list = range(0, 24, 1) else: time_list = list(parse_int_set(times_str)) time_list = ['{:02d}00'.format(t) for t in time_list] etr_folder = 'etr' eto_folder = 'eto' hour_fmt = '{}_{:04d}{:02d}{:02d}_hourly_nldas.img' # hour_fmt = '{}_{:04d}{:02d}{:02d}_{4:04d}_nldas.img' day_fmt = '{}_{:04d}{:02d}{:02d}_nldas.img' # input_fmt = 'NLDAS_FORA0125_H.A{:04d}{:02d}{:02d}.{}.002.grb' input_re = re.compile('NLDAS_FORA0125_H.A(?P<YEAR>\d{4})(?P<MONTH>\d{2})' + '(?P<DAY>\d{2}).(?P<TIME>\d{4}).002.grb$') # Assume NLDAS is NAD83 # input_epsg = 'EPSG:4269' # Ancillary raster paths mask_path = os.path.join(ancillary_ws, 'nldas_mask.img') elev_path = os.path.join(ancillary_ws, 'nldas_elev.img') lat_path = os.path.join(ancillary_ws, 'nldas_lat.img') lon_path = os.path.join(ancillary_ws, 'nldas_lon.img') # Build a date list from landsat_ws scene folders or tar.gz files date_list = [] if landsat_ws is not None and os.path.isdir(landsat_ws): logging.info('\nReading dates from Landsat IDs') logging.info(' {}'.format(landsat_ws)) landsat_re = re.compile( '^(?:LT04|LT05|LE07|LC08)_(?:\d{3})(?:\d{3})_' + '(?P<year>\d{4})(?P<month>\d{2})(?P<day>\d{2})') for root, dirs, files in os.walk(landsat_ws, topdown=True): # If root matches, don't explore subfolders try: landsat_match = landsat_re.match(os.path.basename(root)) date_list.append( dt.datetime.strptime('_'.join(landsat_match.groups()), '%Y_%m_%d').date().isoformat()) dirs[:] = [] except: pass for file in files: try: landsat_match = landsat_re.match(file) date_list.append( dt.datetime.strptime('_'.join(landsat_match.groups()), '%Y_%m_%d').date().isoformat()) except: pass date_list = sorted(list(set(date_list))) # elif landsat_ws is not None and os.path.isfile(landsat_ws): # with open(landsat_ws) as landsat_f: # This allows GDAL to throw Python Exceptions # gdal.UseExceptions() # mem_driver = gdal.GetDriverByName('MEM') # Get the NLDAS spatial reference from the mask raster nldas_ds = gdal.Open(mask_path) nldas_osr = gdc.raster_ds_osr(nldas_ds) nldas_proj = gdc.osr_proj(nldas_osr) nldas_cs = gdc.raster_ds_cellsize(nldas_ds, x_only=True) nldas_extent = gdc.raster_ds_extent(nldas_ds) nldas_geo = nldas_extent.geo(nldas_cs) nldas_x, nldas_y = nldas_extent.origin() nldas_ds = None logging.debug(' Projection: {}'.format(nldas_proj)) logging.debug(' Cellsize: {}'.format(nldas_cs)) logging.debug(' Geo: {}'.format(nldas_geo)) logging.debug(' Extent: {}'.format(nldas_extent)) # Subset data to a smaller extent if output_extent is not None: logging.info('\nComputing subset extent & geo') logging.debug(' Extent: {}'.format(output_extent)) nldas_extent = gdc.Extent(output_extent) nldas_extent.adjust_to_snap('EXPAND', nldas_x, nldas_y, nldas_cs) nldas_geo = nldas_extent.geo(nldas_cs) logging.debug(' Geo: {}'.format(nldas_geo)) logging.debug(' Extent: {}'.format(output_extent)) elif extent_path is not None: logging.info('\nComputing subset extent & geo') if extent_path.lower().endswith('.shp'): nldas_extent = gdc.feature_path_extent(extent_path) extent_osr = gdc.feature_path_osr(extent_path) extent_cs = None else: nldas_extent = gdc.raster_path_extent(extent_path) extent_osr = gdc.raster_path_osr(extent_path) extent_cs = gdc.raster_path_cellsize(extent_path, x_only=True) nldas_extent = gdc.project_extent(nldas_extent, extent_osr, nldas_osr, extent_cs) nldas_extent.adjust_to_snap('EXPAND', nldas_x, nldas_y, nldas_cs) nldas_geo = nldas_extent.geo(nldas_cs) logging.debug(' Geo: {}'.format(nldas_geo)) logging.debug(' Extent: {}'.format(nldas_extent)) logging.debug('') # Read the NLDAS mask array if present if mask_path and os.path.isfile(mask_path): mask_array, mask_nodata = gdc.raster_to_array(mask_path, mask_extent=nldas_extent, fill_value=0, return_nodata=True) mask_array = mask_array != mask_nodata else: mask_array = None # Read ancillary arrays (or subsets?) elev_array = gdc.raster_to_array(elev_path, mask_extent=nldas_extent, return_nodata=False) # pair_array = et_common.air_pressure_func(elev_array) lat_array = gdc.raster_to_array(lat_path, mask_extent=nldas_extent, return_nodata=False) lon_array = gdc.raster_to_array(lon_path, mask_extent=nldas_extent, return_nodata=False) # Hourly RefET functions expects lat/lon in radians lat_array *= (math.pi / 180) lon_array *= (math.pi / 180) # Build output folder etr_ws = os.path.join(output_ws, etr_folder) eto_ws = os.path.join(output_ws, eto_folder) if etr_flag and not os.path.isdir(etr_ws): os.makedirs(etr_ws) if eto_flag and not os.path.isdir(eto_ws): os.makedirs(eto_ws) # DEADBEEF - Instead of processing all available files, the following # code will process files for target dates # for input_dt in date_range(start_dt, end_dt + dt.timedelta(1)): # logging.info(input_dt.date()) # Iterate all available files and check dates if necessary # Each sub folder in the main folder has all imagery for 1 day # (in UTC time) # The path for each subfolder is the /YYYY/DOY errors = defaultdict(list) for root, folders, files in os.walk(grb_ws): root_split = os.path.normpath(root).split(os.sep) # If the year/doy is outside the range, skip if (re.match('\d{4}', root_split[-2]) and re.match('\d{3}', root_split[-1])): root_dt = dt.datetime.strptime( '{}_{}'.format(root_split[-2], root_split[-1]), '%Y_%j') logging.info('{}'.format(root_dt.date())) if ((start_dt is not None and root_dt < start_dt) or (end_dt is not None and root_dt > end_dt)): continue elif date_list and root_dt.date().isoformat() not in date_list: continue # If the year is outside the range, don't search subfolders elif re.match('\d{4}', root_split[-1]): root_year = int(root_split[-1]) logging.info('Year: {}'.format(root_year)) if ((start_dt is not None and root_year < start_dt.year) or (end_dt is not None and root_year > end_dt.year)): folders[:] = [] else: folders[:] = sorted(folders) continue else: continue logging.debug(' {}'.format(root)) # Start off assuming every file needs to be processed day_skip_flag = False # Build output folders if necessary etr_year_ws = os.path.join(etr_ws, str(root_dt.year)) eto_year_ws = os.path.join(eto_ws, str(root_dt.year)) if etr_flag and not os.path.isdir(etr_year_ws): os.makedirs(etr_year_ws) if eto_flag and not os.path.isdir(eto_year_ws): os.makedirs(eto_year_ws) # Build daily total paths etr_day_path = os.path.join( etr_year_ws, day_fmt.format('etr', root_dt.year, root_dt.month, root_dt.day)) eto_day_path = os.path.join( eto_year_ws, day_fmt.format('eto', root_dt.year, root_dt.month, root_dt.day)) etr_hour_path = os.path.join( etr_year_ws, hour_fmt.format('etr', root_dt.year, root_dt.month, root_dt.day)) eto_hour_path = os.path.join( eto_year_ws, hour_fmt.format('eto', root_dt.year, root_dt.month, root_dt.day)) # logging.debug(' {}'.format(etr_hour_path)) # If daily ETr/ETo files are present, day can be skipped if not overwrite_flag and daily_flag: if etr_flag and not os.path.isfile(etr_day_path): pass elif eto_flag and not os.path.isfile(eto_day_path): pass else: day_skip_flag = True # If the hour and daily files don't need to be made, skip the day if not overwrite_flag: if etr_flag and not os.path.isfile(etr_hour_path): pass elif eto_flag and not os.path.isfile(eto_hour_path): pass elif day_skip_flag: logging.debug(' File(s) already exist, skipping') continue # Create a single raster for each day with 24 bands # Each time step will be stored in a separate band if etr_flag: logging.debug(' {}'.format(etr_day_path)) gdc.build_empty_raster(etr_hour_path, band_cnt=24, output_dtype=np.float32, output_proj=nldas_proj, output_cs=nldas_cs, output_extent=nldas_extent, output_fill_flag=True) if eto_flag: logging.debug(' {}'.format(eto_day_path)) gdc.build_empty_raster(eto_hour_path, band_cnt=24, output_dtype=np.float32, output_proj=nldas_proj, output_cs=nldas_cs, output_extent=nldas_extent, output_fill_flag=True) # Sum all ETr/ETo images in each folder to generate a UTC day total etr_day_array = 0 eto_day_array = 0 # Process each hour file for input_name in sorted(files): logging.info(' {}'.format(input_name)) input_match = input_re.match(input_name) if input_match is None: logging.debug(' Regular expression didn\'t match, skipping') continue input_dt = dt.datetime(int(input_match.group('YEAR')), int(input_match.group('MONTH')), int(input_match.group('DAY'))) input_doy = int(input_dt.strftime('%j')) time_str = input_match.group('TIME') band_num = int(time_str[:2]) + 1 # if start_dt is not None and input_dt < start_dt: # continue # elif end_dt is not None and input_dt > end_dt: # continue # elif date_list and input_dt.date().isoformat() not in date_list: # continue if not daily_flag and time_str not in time_list: logging.debug(' Time not in list and not daily, skipping') continue input_path = os.path.join(root, input_name) logging.debug(' Time: {} {}'.format(input_dt.date(), time_str)) logging.debug(' Band: {}'.format(band_num)) # Determine band numbering/naming try: input_band_dict = grib_band_names(input_path) except RuntimeError as e: errors[input_path].append(e) logging.error(' RuntimeError: {} Skipping: {}'.format( e, input_path)) continue # Read input bands input_ds = gdal.Open(input_path) # Temperature should be in C for et_common.refet_hourly_func() if 'Temperature [K]' in input_band_dict.keys(): temp_band_units = 'K' temp_array = gdc.raster_ds_to_array( input_ds, band=input_band_dict['Temperature [K]'], mask_extent=nldas_extent, return_nodata=False) elif 'Temperature [C]' in input_band_dict.keys(): temp_band_units = 'C' temp_array = gdc.raster_ds_to_array( input_ds, band=input_band_dict['Temperature [C]'], mask_extent=nldas_extent, return_nodata=False) else: logging.error('Unknown Temperature units, skipping') logging.error(' {}'.format(input_band_dict.keys())) continue # DEADBEEF - Having issue with T appearing to be C but labeled as K # Try to determine temperature units from values temp_mean = float(np.nanmean(temp_array)) temp_units_dict = {20: 'C', 293: 'K'} temp_array_units = temp_units_dict[min( temp_units_dict, key=lambda x: abs(x - temp_mean))] if temp_array_units == 'K' and temp_band_units == 'K': logging.debug(' Converting temperature from K to C') temp_array -= 273.15 elif temp_array_units == 'C' and temp_band_units == 'C': pass elif temp_array_units == 'C' and temp_band_units == 'K': logging.debug(( ' Temperature units are K in the GRB band name, ' + 'but values appear to be C\n Mean temperature: {:.2f}\n' + ' Values will NOT be adjusted').format(temp_mean)) elif temp_array_units == 'K' and temp_band_units == 'C': logging.debug(( ' Temperature units are C in the GRB band name, ' + 'but values appear to be K\n Mean temperature: {:.2f}\n' + ' Values will be adjusted from K to C').format(temp_mean)) temp_array -= 273.15 try: sph_array = gdc.raster_ds_to_array( input_ds, band=input_band_dict['Specific humidity [kg/kg]'], mask_extent=nldas_extent, return_nodata=False) rs_array = gdc.raster_ds_to_array( input_ds, band=input_band_dict[ 'Downward shortwave radiation flux [W/m^2]'], mask_extent=nldas_extent, return_nodata=False) wind_u_array = gdc.raster_ds_to_array( input_ds, band=input_band_dict['u-component of wind [m/s]'], mask_extent=nldas_extent, return_nodata=False) wind_v_array = gdc.raster_ds_to_array( input_ds, band=input_band_dict['v-component of wind [m/s]'], mask_extent=nldas_extent, return_nodata=False) input_ds = None except KeyError as e: errors[input_path].append(e) logging.error(' KeyError: {} Skipping: {}'.format( e, input_ds.GetDescription())) continue rs_array *= 0.0036 # W m-2 to MJ m-2 hr-1 wind_array = np.sqrt(wind_u_array**2 + wind_v_array**2) del wind_u_array, wind_v_array # ETr if etr_flag: etr_array = et_common.refet_hourly_func(temp_array, sph_array, rs_array, wind_array, zw=10, elev=elev_array, lat=lat_array, lon=lon_array, doy=input_doy, time=int(time_str) / 100, ref_type='ETR') if daily_flag: etr_day_array += etr_array if time_str in time_list: gdc.array_to_comp_raster(etr_array.astype(np.float32), etr_hour_path, band=band_num, stats_flag=False) del etr_array # ETo if eto_flag: eto_array = et_common.refet_hourly_func(temp_array, sph_array, rs_array, wind_array, zw=10, elev=elev_array, lat=lat_array, lon=lon_array, doy=input_doy, time=int(time_str) / 100, ref_type='ETO') if eto_flag and daily_flag: eto_day_array += eto_array if eto_flag and time_str in time_list: gdc.array_to_comp_raster(eto_array.astype(np.float32), eto_hour_path, band=band_num, stats_flag=False) del eto_array del temp_array, sph_array, rs_array, wind_array if stats_flag and etr_flag: gdc.raster_statistics(etr_hour_path) if stats_flag and eto_flag: gdc.raster_statistics(eto_hour_path) # Save the projected ETr/ETo as 32-bit floats if not day_skip_flag and daily_flag: if etr_flag: try: gdc.array_to_raster(etr_day_array.astype(np.float32), etr_day_path, output_geo=nldas_geo, output_proj=nldas_proj, stats_flag=stats_flag) except AttributeError: pass if eto_flag: try: gdc.array_to_raster(eto_day_array.astype(np.float32), eto_day_path, output_geo=nldas_geo, output_proj=nldas_proj, stats_flag=stats_flag) except AttributeError: pass del etr_day_array, eto_day_array if len(errors) > 0: logging.info('\nThe following errors were encountered:') for key, value in errors.items(): logging.error(' Filepath: {}, error: {}'.format(key, value)) logging.debug('\nScript Complete')
def main(grb_ws=os.getcwd(), ancillary_ws=os.getcwd(), output_ws=os.getcwd(), landsat_ws=None, start_date=None, end_date=None, times_str='', extent_path=None, output_extent=None, stats_flag=True, overwrite_flag=False): """Extract hourly NLDAS vapour pressure rasters Args: grb_ws (str): folder of NLDAS GRB files ancillary_ws (str): folder of ancillary rasters output_ws (str): folder of output rasters landsat_ws (str): folder of Landsat scenes or tar.gz files start_date (str): ISO format date (YYYY-MM-DD) end_date (str): ISO format date (YYYY-MM-DD) times (str): comma separated values and/or ranges of UTC hours (i.e. "1, 2, 5-8") Parsed with python_common.parse_int_set() extent_path (str): file path defining the output extent output_extent (list): decimal degrees values defining output extent stats_flag (bool): if True, compute raster statistics. Default is True. overwrite_flag (bool): if True, overwrite existing files Returns: None """ logging.info('\nExtracting NLDAS vapour pressure rasters') # input_fmt = 'NLDAS_FORA0125_H.A{:04d}{:02d}{:02d}.{}.002.grb' input_re = re.compile('NLDAS_FORA0125_H.A(?P<YEAR>\d{4})(?P<MONTH>\d{2})' + '(?P<DAY>\d{2}).(?P<TIME>\d{4}).002.grb$') output_folder = 'ea' output_fmt = 'ea_{:04d}{:02d}{:02d}_hourly_nldas.img' # output_fmt = 'ea_{:04d}{:02d}{:02d}_{:04d}_nldas.img' # If a date is not set, process 2017 try: start_dt = dt.datetime.strptime(start_date, '%Y-%m-%d') logging.debug(' Start date: {}'.format(start_dt)) except: start_dt = dt.datetime(2017, 1, 1) logging.info(' Start date: {}'.format(start_dt)) try: end_dt = dt.datetime.strptime(end_date, '%Y-%m-%d') logging.debug(' End date: {}'.format(end_dt)) except: end_dt = dt.datetime(2017, 12, 31) logging.info(' End date: {}'.format(end_dt)) # Only process a specific hours if not times_str: time_list = range(0, 24, 1) else: time_list = list(parse_int_set(times_str)) time_list = ['{:02d}00'.format(t) for t in time_list] # Assume NLDAS is NAD83 # input_epsg = 'EPSG:4269' # Ancillary raster paths mask_path = os.path.join(ancillary_ws, 'nldas_mask.img') elev_path = os.path.join(ancillary_ws, 'nldas_elev.img') # Build a date list from landsat_ws scene folders or tar.gz files date_list = [] if landsat_ws is not None and os.path.isdir(landsat_ws): logging.info('\nReading dates from Landsat IDs') logging.info(' {}'.format(landsat_ws)) landsat_re = re.compile( '^(?:LT04|LT05|LE07|LC08)_(?:\d{3})(?:\d{3})_' + '(?P<year>\d{4})(?P<month>\d{2})(?P<day>\d{2})') for root, dirs, files in os.walk(landsat_ws, topdown=True): # If root matches, don't explore subfolders try: landsat_match = landsat_re.match(os.path.basename(root)) date_list.append( dt.datetime.strptime('_'.join(landsat_match.groups()), '%Y_%m_%d').date().isoformat()) dirs[:] = [] except: pass for file in files: try: landsat_match = landsat_re.match(file) date_list.append( dt.datetime.strptime('_'.join(landsat_match.groups()), '%Y_%m_%d').date().isoformat()) except: pass date_list = sorted(list(set(date_list))) # elif landsat_ws is not None and os.path.isfile(landsat_ws): # with open(landsat_ws) as landsat_f: # This allows GDAL to throw Python Exceptions # gdal.UseExceptions() # mem_driver = gdal.GetDriverByName('MEM') # Get the NLDAS spatial reference from the mask raster nldas_ds = gdal.Open(mask_path) nldas_osr = gdc.raster_ds_osr(nldas_ds) nldas_proj = gdc.osr_proj(nldas_osr) nldas_cs = gdc.raster_ds_cellsize(nldas_ds, x_only=True) nldas_extent = gdc.raster_ds_extent(nldas_ds) nldas_geo = nldas_extent.geo(nldas_cs) nldas_x, nldas_y = nldas_extent.origin() nldas_ds = None logging.debug(' Projection: {}'.format(nldas_proj)) logging.debug(' Cellsize: {}'.format(nldas_cs)) logging.debug(' Geo: {}'.format(nldas_geo)) logging.debug(' Extent: {}'.format(nldas_extent)) # Subset data to a smaller extent if output_extent is not None: logging.info('\nComputing subset extent & geo') logging.debug(' Extent: {}'.format(output_extent)) nldas_extent = gdc.Extent(output_extent) nldas_extent.adjust_to_snap('EXPAND', nldas_x, nldas_y, nldas_cs) nldas_geo = nldas_extent.geo(nldas_cs) logging.debug(' Geo: {}'.format(nldas_geo)) logging.debug(' Extent: {}'.format(output_extent)) elif extent_path is not None: logging.info('\nComputing subset extent & geo') if extent_path.lower().endswith('.shp'): nldas_extent = gdc.feature_path_extent(extent_path) extent_osr = gdc.feature_path_osr(extent_path) extent_cs = None else: nldas_extent = gdc.raster_path_extent(extent_path) extent_osr = gdc.raster_path_osr(extent_path) extent_cs = gdc.raster_path_cellsize(extent_path, x_only=True) nldas_extent = gdc.project_extent(nldas_extent, extent_osr, nldas_osr, extent_cs) nldas_extent.adjust_to_snap('EXPAND', nldas_x, nldas_y, nldas_cs) nldas_geo = nldas_extent.geo(nldas_cs) logging.debug(' Geo: {}'.format(nldas_geo)) logging.debug(' Extent: {}'.format(nldas_extent)) logging.debug('') # Read the NLDAS mask array if present if mask_path and os.path.isfile(mask_path): mask_array, mask_nodata = gdc.raster_to_array(mask_path, mask_extent=nldas_extent, fill_value=0, return_nodata=True) mask_array = mask_array != mask_nodata else: mask_array = None # Read elevation arrays (or subsets?) elev_array = gdc.raster_to_array(elev_path, mask_extent=nldas_extent, return_nodata=False) pair_array = et_common.air_pressure_func(elev_array) # Build output folder var_ws = os.path.join(output_ws, output_folder) if not os.path.isdir(var_ws): os.makedirs(var_ws) # Each sub folder in the main folder has all imagery for 1 day # The path for each subfolder is the /YYYY/DOY # This approach will process files for target dates # for input_dt in date_range(start_dt, end_dt + dt.timedelta(1)): # logging.info(input_dt.date()) # Iterate all available files and check dates if necessary for root, folders, files in os.walk(grb_ws): root_split = os.path.normpath(root).split(os.sep) # If the year/doy is outside the range, skip if (re.match('\d{4}', root_split[-2]) and re.match('\d{3}', root_split[-1])): root_dt = dt.datetime.strptime( '{}_{}'.format(root_split[-2], root_split[-1]), '%Y_%j') logging.info('{}'.format(root_dt.date())) if ((start_dt is not None and root_dt < start_dt) or (end_dt is not None and root_dt > end_dt)): continue elif date_list and root_dt.date().isoformat() not in date_list: continue # If the year is outside the range, don't search subfolders elif re.match('\d{4}', root_split[-1]): root_year = int(root_split[-1]) logging.info('Year: {}'.format(root_year)) if ((start_dt is not None and root_year < start_dt.year) or (end_dt is not None and root_year > end_dt.year)): folders[:] = [] else: folders[:] = sorted(folders) continue else: continue # Create a single raster for each day with 24 bands # Each time step will be stored in a separate band output_name = output_fmt.format(root_dt.year, root_dt.month, root_dt.day) output_path = os.path.join(var_ws, str(root_dt.year), output_name) logging.debug(' {}'.format(output_path)) if os.path.isfile(output_path): if not overwrite_flag: logging.debug(' File already exists, skipping') continue else: logging.debug(' File already exists, removing existing') os.remove(output_path) logging.debug(' {}'.format(root)) if not os.path.isdir(os.path.dirname(output_path)): os.makedirs(os.path.dirname(output_path)) gdc.build_empty_raster(output_path, band_cnt=24, output_dtype=np.float32, output_proj=nldas_proj, output_cs=nldas_cs, output_extent=nldas_extent, output_fill_flag=True) # Iterate through hourly files for input_name in sorted(files): logging.info(' {}'.format(input_name)) input_path = os.path.join(root, input_name) input_match = input_re.match(input_name) if input_match is None: logging.debug(' Regular expression didn\'t match, skipping') continue input_dt = dt.datetime(int(input_match.group('YEAR')), int(input_match.group('MONTH')), int(input_match.group('DAY'))) time_str = input_match.group('TIME') band_num = int(time_str[:2]) + 1 # if start_dt is not None and input_dt < start_dt: # continue # elif end_dt is not None and input_dt > end_dt: # continue # elif date_list and input_dt.date().isoformat() not in date_list: # continue if time_str not in time_list: logging.debug(' Time not in list, skipping') continue logging.debug(' Time: {} {}'.format(input_dt.date(), time_str)) logging.debug(' Band: {}'.format(band_num)) # Determine band numbering/naming input_band_dict = grib_band_names(input_path) # Compute vapour pressure from specific humidity input_ds = gdal.Open(input_path) sph_array = gdc.raster_ds_to_array( input_ds, band=input_band_dict['Specific humidity [kg/kg]'], mask_extent=nldas_extent, return_nodata=False) ea_array = (sph_array * pair_array) / (0.622 + 0.378 * sph_array) # Save the projected array as 32-bit floats gdc.array_to_comp_raster(ea_array.astype(np.float32), output_path, band=band_num) # gdc.block_to_raster( # ea_array.astype(np.float32), output_path, band=band) # gdc.array_to_raster( # ea_array.astype(np.float32), output_path, # output_geo=nldas_geo, output_proj=nldas_proj, # stats_flag=stats_flag) del sph_array input_ds = None if stats_flag: gdc.raster_statistics(output_path) logging.debug('\nScript Complete')
def main(ini_path, rasters_flag=None, tables_flag=None, mc_iter_str='', tile_list=None, pyramids_flag=True, stats_flag=True, overwrite_flag=False, mp_procs=1, delay=0, debug_flag=False, no_file_logging=False): """Run interpolater for all Landsat scenes Parameters ---------- ini_path : str File path of the input parameters file. rasters_flag : bool, optional If True, override INI and interpolate rasters. tables_flag : bool, optional If True, override INI and interpolate zone tables. mc_iter_str : str, optional MonteCarlo iteration list and/or range. tile_list : list, optional Landsat path/rows to process (i.e. [p045r043, p045r033]). This will override the tile list in the INI file. pyramids_flag : bool, optional If True, compute raster pyramids (the default is True). stats_flag : bool, optional If True, compute raster statistics (the default is True). overwrite_flag : bool, optional If True, overwrite existing files (the default is False). mp_procs : int, optional Number of cpu cores to use (the default is 1). delay : float, optional Max random delay starting function in seconds (the default is 0). debug_flag : bool, optional If True, enable debug level logging (the default is False). no_file_logging : bool, optional If True, don't write logging to file (the default is False). Returns ------- None """ logging.info('\nRunning Interpolator') # Open config file config = open_ini(ini_path) # Get input parameters logging.debug(' Reading Input File') year = config.getint('INPUTS', 'year') if tile_list is None: tile_list = read_param('tile_list', [], config, 'INPUTS') project_ws = config.get('INPUTS', 'project_folder') logging.debug(' Year: {}'.format(year)) logging.debug(' Path/rows: {}'.format(', '.join(tile_list))) logging.debug(' Project: {}'.format(project_ws)) interpolate_folder = config.get('INPUTS', 'interpolate_folder') logging.debug(' Folder: {}'.format(interpolate_folder)) # If both flags were not set, read from INI if rasters_flag is None and tables_flag is None: logging.info(' Reading interpolator flags from INI file') if rasters_flag is None: rasters_flag = read_param('interpolate_rasters_flag', True, config, 'INPUTS') if tables_flag is None: tables_flag = read_param('interpolate_tables_flag', True, config, 'INPUTS') # If both flags were set false, for now, exit the script # It may make more sense to assumethe user wants to interpolate something elif rasters_flag is False and tables_flag is False: logging.error('Raster and table interpolator flags are both False\n') logging.error(' Exiting the script') return False # sys.exit() # logging.info('Raster and table interpolator flags are both False\n') # logging.info(' Defaulting to rasters_flag=True') # rasters_flag = True if rasters_flag: rasters_func_path = config.get('INPUTS', 'interpolate_rasters_func') if tables_flag: tables_func_path = config.get('INPUTS', 'interpolate_tables_func') # For now, get mc_iter list from command line, not from project file # mc_iter_list = config.get('INPUTS', 'mc_iter_list') mc_iter_list = list(parse_int_set(mc_iter_str)) # Need soemthing in mc_iter_list to iterate over if not mc_iter_list: mc_iter_list = [None] # For now build INI file name from template INI names ini_name = os.path.basename(config.get('INPUTS', 'interpolate_ini')) ini_name = os.path.splitext(os.path.basename(ini_name))[0] # INI file is built as a function of year ini_fmt = '{}_{}_{}.ini' # Regular expressions # For now assume path/row are two digit numbers # tile_re = re.compile('p(\d{3})r(\d{3})', re.IGNORECASE) # image_re = re.compile( # '^(LT04|LT05|LE07|LC08)_(\d{3})(\d{3})_(\d{4})(\d{2})(\d{2})') # Check inputs folders/paths if not os.path.isdir(project_ws): logging.error('\n Folder {} does not exist'.format(project_ws)) sys.exit() # Check that there is an input file for the year and folder year_ws = os.path.join(project_ws, str(year)) ini_path = os.path.join( year_ws, ini_fmt.format(ini_name, str(year), interpolate_folder.lower())) if not os.path.join(ini_path): logging.warning(' Input file does not exist\n {}'.format(ini_path)) return False # Run Interpolater for each Monte Carlo iteration # mp_list = [] for mc_iter in sorted(mc_iter_list): logging.debug(' Year: {} Iteration: {}'.format(str(year), mc_iter)) rasters_args = [] tables_args = [] if rasters_flag: rasters_args = [ 'python', rasters_func_path, year_ws, '-i', ini_path ] if tables_flag: tables_args = ['python', tables_func_path, year_ws, '-i', ini_path] if mc_iter is not None: rasters_args.extend(['-mc', str(mc_iter)]) tables_args.extend(['-mc', str(mc_iter)]) if pyramids_flag: rasters_args.append('--pyramids') if stats_flag: rasters_args.append('--stats') if overwrite_flag: rasters_args.append('--overwrite') tables_args.append('--overwrite') if debug_flag: rasters_args.append('--debug') tables_args.append('--debug') if delay > 0: rasters_args.extend(['--delay', str(delay)]) tables_args.extend(['--delay', str(delay)]) if no_file_logging: rasters_args.append('--no_file_logging') tables_args.append('--no_file_logging') if mp_procs > 1: rasters_args.extend(['-mp', str(mp_procs)]) tables_args.extend(['-mp', str(mp_procs)]) if rasters_flag: subprocess.call(rasters_args, cwd=year_ws) if tables_flag: subprocess.call(tables_args, cwd=year_ws) logging.debug('\nScript complete')
def main(csv_path, output_folder, fid_list='', bin_min=0, bin_max=5, bin_size=0.25, start_dt=None, end_dt=None, plots='all'): """Create Summary Histogram Plots from pymetric zonal csv output files Args: csv_path (str): zonal stats file path output_folder (str): Folder path where files will be saved default(...pymetric/summary_histograms) fid_list (list): list or range of FIDs to skip bin_min (int): Histogram Minimum (default: 0) bin_max (int): Histogram Max (default: 5) bin_size (int): Histogram bin size (default: 0.25) start_dt : datetime (start date; optional) end_dt : datetime (end date; optional) plots (str): Output plot options: all, acreage, or field (default: all) Returns: None """ logging.info('\nReading input csv file: {}'.format(csv_path)) # Check if csv file exist if not csv_path: logging.error('ERROR: csv file does not exist') sys.exit() # Attempt to read csv_file try: input_df = pd.read_csv(csv_path, sep=',') except: logging.error('Error reading file. Check csv path.') sys.exit() # Create Output Folder if it doesn't exist if not os.path.isdir(output_folder): os.makedirs(output_folder) # Filter FIDs based on fid_list (default []) fid_skiplist = [] if fid_list: fid_skiplist = sorted(list(dripy.parse_int_set(fid_list))) logging.info('Skipping FIDs: {}'.format(fid_skiplist)) input_df = input_df[~input_df['FID'].isin(fid_skiplist)] if (start_dt and not end_dt) or (end_dt and not start_dt): logging.error('\nPlease Specify Both Start and End Date:' '\nStart Date: {}' '\nEnd Date: {}'.format(start_dt, end_dt)) sys.exit() if (start_dt and end_dt) and (end_dt < start_dt): logging.error('End date cannot be before start date.' ' Exiting.') sys.exit() # Filter dataset if start and end dates are specified if start_dt and end_dt: if 'DATE' in input_df.columns: input_df['DATE'] = pd.to_datetime(input_df['DATE']) logging.info('\nFiltering By Date. Start: {:%Y-%m-%d}, ' 'End: {:%Y-%m-%d}'.format(start_dt, end_dt)) input_df = input_df[(input_df['DATE'] >= start_dt) & (input_df['DATE'] <= end_dt)] if input_df.empty: logging.error('Date Filter Removed All Data. Exiting.') sys.exit() else: logging.error('Cannot Apply Custom Date Range On Monthly OR Annual' ' Datasets. \nUse Daily Output. Exiting.') sys.exit() # Unit Conversions pix2acre = 0.222395 # 30x30m pixel to acres; From Google mm2ft = 0.00328084 # From Google # Add Acres input_df['Area_acres'] = input_df.PIXELS * pix2acre # Add FT Fields input_df['ET_FT'] = input_df.ET_MM * mm2ft input_df['ETR_FT'] = input_df.ETR_MM * mm2ft # Daily Volume Field input_df['Volume_acft'] = input_df.Area_acres * input_df.ET_FT # Net ET Field input_df['NetET_mm'] = input_df.ET_MM - input_df.PPT_MM input_df['NetET_FT'] = input_df.NetET_mm * mm2ft input_df['NetVolume_acft'] = input_df.Area_acres * input_df['NetET_FT'] # Growing Season Start/End Months (inclusive) start_month = 4 end_month = 10 # Create Growing Season Only Dataframe if 'MONTH' in input_df.columns: gs_df = input_df[(input_df['MONTH'] >= start_month) & (input_df['MONTH'] <= end_month)] # Dictionary to control agg of each variable a = { 'FID': 'mean', 'YEAR': 'mean', 'PIXELS': 'mean', 'NDVI': 'mean', 'ETRF': 'mean', 'ETR_MM': 'sum', 'ET_MM': 'sum', 'PPT_MM': 'sum', 'Area_acres': 'mean', 'ET_FT': 'sum', 'ETR_FT': 'sum', 'NetET_FT': 'sum', 'Volume_acft': 'sum', 'NetVolume_acft': 'sum' } # GS Grouped Dataframe (only for daily and monthly csv) if 'MONTH' in input_df.columns: gs_grp_df = gs_df.groupby('FID', as_index=True).agg(a) # Annual Grouped Dataframe ann_grp_df = input_df.groupby('FID', as_index=True).agg(a) # Field Count Histogram Function def field_count_hist(grp_df, rate_var, vol_var, title, xlab, filedesc): # Annotation Box Stats y = grp_df['YEAR'].mean() total_area = grp_df['Area_acres'].sum() total_vol = grp_df[vol_var].sum() m = total_vol / total_area.round(1) # Bins et_bins = np.linspace(bin_min, bin_max, ((bin_max - bin_min) / bin_size) + 1) # Make Figure font_size = 12 ann_font_size = 10 fig = plt.figure() ax = fig.add_subplot(111) ax.hist(grp_df[rate_var], bins=et_bins, align='mid', edgecolor='black') ax.set_title(title, size=font_size) ax.set_xlabel(xlab, size=font_size) ax.set_ylabel('Field Count', size=font_size) ax.set_xticks(np.arange(0, bin_max + (2 * bin_size), 2 * bin_size)) ax.tick_params(axis='x', labelsize=font_size) ax.tick_params(axis='y', labelsize=font_size) ymin, ymax = plt.ylim() # return the current ylim plt.ylim((ymin, ymax + ymax * 0.3)) # shift ymax for annotation space # Add mean vertical line ax.axvline(m, color='gray', linestyle='dashed', linewidth=1) # Add Annotation Text Box antext = ('Year {:.0f}\n' + 'Mean ET = {:.1f} ft\n' + 'Total Area = {:.1f} acres\n' + 'ET Volume = {:.1f} ac-ft').format(y, m, total_area, total_vol) at = AnchoredText(antext, prop=dict(size=ann_font_size), frameon=True, loc=2) at.patch.set_boxstyle("round,pad=0.,rounding_size=0.2") ax.add_artist(at) # Save Figure file_name = '{:.0f}_{}_Fields'.format(y, filedesc) fig.tight_layout(pad=3) plt.savefig(os.path.join(output_folder, file_name), dpi=300) plt.close(fig) fig.clf() return True # Acreage histogram (Bar Plot) def acreage_histogram(grp_df, rate_var, vol_var, title, xlab, filedesc): # Annotation Box Stats y = grp_df['YEAR'].mean() total_area = grp_df['Area_acres'].sum() total_vol = grp_df[vol_var].sum() m = total_vol / total_area.round(1) # Bins et_bins = np.linspace(bin_min, bin_max, ((bin_max - bin_min) / bin_size) + 1) # Acreage/ET Bins et_area_hist, et_bins, binnum = stats.binned_statistic( grp_df[rate_var], grp_df.Area_acres, 'sum', et_bins) # Make Figure font_size = 12 ann_font_size = 10 fig = plt.figure() ax = fig.add_subplot(111) ax.bar(et_bins[:-1], et_area_hist, width=bin_size, edgecolor='black', align='edge', color='r') ax.set_title(title, size=font_size) ax.set_xlabel(xlab, size=font_size) ax.set_ylabel('Acreage', size=font_size) ax.set_xticks(np.arange(0, bin_max + (2 * bin_size), 2 * bin_size)) ax.tick_params(axis='x', labelsize=font_size) ax.tick_params(axis='y', labelsize=font_size) ymin, ymax = plt.ylim() # return the current ylim plt.ylim((ymin, ymax + ymax * 0.3)) # shift ymax for annotation space # Add mean vertical line ax.axvline(m, color='gray', linestyle='dashed', linewidth=1) # Add Annotation Text Box antext = ('Year {:.0f}\n' + 'Mean ET = {:.1f} ft\n' + 'Total Area = {:.1f} acres\n' + 'ET Volume = {:.1f} ac-ft').format(y, m, total_area, total_vol) at = AnchoredText(antext, prop=dict(size=ann_font_size), frameon=True, loc=2) at.patch.set_boxstyle("round,pad=0.,rounding_size=0.2") ax.add_artist(at) # Save Figure file_name = '{:.0f}_{}_Acreage'.format(y, filedesc) fig.tight_layout(pad=3) plt.savefig(os.path.join(output_folder, file_name), dpi=300) plt.close(fig) fig.clf() return True logging.info('\nCreating Summary Histogram Plots.') if plots in ['acreage', 'field']: logging.info('Only outputting {} plots.'.format(plots)) if start_dt and end_dt: # custom date range plots if plots in ['all', 'acreage']: acreage_histogram( ann_grp_df, 'ET_FT', 'Volume_acft', 'Total ET: {:%Y-%m-%d} to {:%Y-%m-%d}'.format( start_dt, end_dt), 'Total ET (Feet)', 'TotalET') acreage_histogram( ann_grp_df, 'NetET_FT', 'NetVolume_acft', 'Net ET: {:%Y-%m-%d} to {:%Y-%m-%d}'.format(start_dt, end_dt), 'Net ET (Feet)', 'NetET') if plots in ['all', 'field']: field_count_hist( ann_grp_df, 'ET_FT', 'Volume_acft', 'Total ET: {:%Y-%m-%d} to {:%Y-%m-%d}'.format( start_dt, end_dt), 'Total ET (Feet)', 'TotalET') field_count_hist( ann_grp_df, 'NetET_FT', 'NetVolume_acft', 'Net ET: {:%Y-%m-%d} to {:%Y-%m-%d}'.format(start_dt, end_dt), 'Net ET (Feet)', 'NetET') else: # Default Annual and Growing Season Plots if no start/end date # Annual Plots if plots in ['all', 'acreage']: acreage_histogram(ann_grp_df, 'ET_FT', 'Volume_acft', 'Annual ET', 'Total ET (Feet)', 'Ann_TotalET') acreage_histogram(ann_grp_df, 'NetET_FT', 'NetVolume_acft', 'Annual Net ET', 'Net ET (Feet)', 'Ann_NetET') if plots in ['all', 'field']: field_count_hist(ann_grp_df, 'ET_FT', 'Volume_acft', 'Annual ET', 'Total ET (Feet)', 'Ann_TotalET') field_count_hist(ann_grp_df, 'NetET_FT', 'NetVolume_acft', 'Annual Net ET', 'Net ET (Feet)', 'Ann_NetET') # Growing Season Plots if 'MONTH' in input_df.columns: if plots in ['all', 'acreage']: acreage_histogram(gs_grp_df, 'ET_FT', 'Volume_acft', 'Growing Season ET', 'Total ET (Feet)', 'GS_TotalET') acreage_histogram(gs_grp_df, 'NetET_FT', 'NetVolume_acft', 'Growing Season Net ET', 'Net ET (Feet)', 'GS_NetET') if plots in ['all', 'field']: field_count_hist(gs_grp_df, 'ET_FT', 'Volume_acft', 'Growing Season ET', 'Total ET (Feet)', 'GS_TotalET') field_count_hist(gs_grp_df, 'NetET_FT', 'NetVolume_acft', 'Growing Season Net ET', 'Net ET (Feet)', 'GS_NetET')