def gcs_to_utm_dict(tile_list, tile_utm_zone_dict, tile_gcs_osr, tile_gcs_wkt_dict, gcs_buffer=0.25, snap_xmin=None, snap_ymin=None, snap_cs=None): """Return a dictionary of Landsat path/row GCS extents projected to UTM Args: tile_list: tile_utm_zone_dict: tile_gcs_osr: tile_gcs_wkt_dict: gcs_buffer: snap_xmin: snap_ymin: snap_cs: Returns: dictionary """ # If parameters are not set, try to get from env # if snap_xmin is None and env.snap_xmin: # snap_xmin = env.snap_xmin # if snap_ymin is None and env.snap_ymin: # snap_ymin = env.snap_ymin # if snap_cs is None and env.cellsize: # snap_cs = env.cellsize logging.info('\nCalculate projected extent for each path/row') output_dict = dict() for tile_name in sorted(tile_list): logging.info(' {}'.format(tile_name)) # Create an OSR object from the utm projection tile_utm_osr = gdc.epsg_osr(32600 + int(tile_utm_zone_dict[tile_name])) # tile_utm_proj = gdc.osr_proj(tile_utm_osr) # Create utm transformation tile_utm_tx = osr.CoordinateTransformation(tile_gcs_osr, tile_utm_osr) tile_gcs_geom = ogr.CreateGeometryFromWkt(tile_gcs_wkt_dict[tile_name]) # Buffer extent by 0.1 degrees # DEADBEEF - Buffer fails if GDAL is not built with GEOS support # tile_gcs_geom = tile_gcs_geom.Buffer(gcs_buffer) # Create gcs to utm transformer and apply it tile_utm_geom = tile_gcs_geom.Clone() tile_utm_geom.Transform(tile_utm_tx) tile_utm_extent = gdc.Extent(tile_utm_geom.GetEnvelope()) tile_utm_extent = tile_utm_extent.ogrenv_swap() # 0.1 degrees ~ 10 km tile_utm_extent.buffer_extent(gcs_buffer * 100000) tile_utm_extent.adjust_to_snap('EXPAND', snap_xmin, snap_ymin, snap_cs) output_dict[tile_name] = tile_utm_extent return output_dict
def main(netcdf_ws=os.getcwd(), ancillary_ws=os.getcwd(), output_ws=os.getcwd(), start_date=None, end_date=None, extent_path=None, output_extent=None, stats_flag=True, overwrite_flag=False): """Extract DAYMET precipitation Args: netcdf_ws (str): folder of DAYMET netcdf files ancillary_ws (str): folder of ancillary rasters output_ws (str): folder of output rasters start_date (str): ISO format date (YYYY-MM-DD) end_date (str): ISO format date (YYYY-MM-DD) extent_path (str): file path defining the output extent output_extent (list): decimal degrees values defining output extent stats_flag (bool): if True, compute raster statistics. Default is True. overwrite_flag (bool): if True, overwrite existing files Returns: None """ logging.info('\nExtracting DAYMET precipitation') # If a date is not set, process 2015 try: start_dt = dt.datetime.strptime(start_date, '%Y-%m-%d') logging.debug(' Start date: {}'.format(start_dt)) except: start_dt = dt.datetime(2015, 1, 1) logging.info(' Start date: {}'.format(start_dt)) try: end_dt = dt.datetime.strptime(end_date, '%Y-%m-%d') logging.debug(' End date: {}'.format(end_dt)) except: end_dt = dt.datetime(2015, 12, 31) logging.info(' End date: {}'.format(end_dt)) # Save DAYMET lat, lon, and elevation arrays mask_raster = os.path.join(ancillary_ws, 'daymet_mask.img') daymet_re = re.compile('daymet_v3_(?P<VAR>\w+)_(?P<YEAR>\d{4})_na.nc4$') # DAYMET band name dictionary # daymet_band_dict = dict() # daymet_band_dict['prcp'] = 'precipitation_amount' # daymet_band_dict['srad'] = 'surface_downwelling_shortwave_flux_in_air' # daymet_band_dict['sph'] = 'specific_humidity' # daymet_band_dict['tmin'] = 'air_temperature' # daymet_band_dict['tmax'] = 'air_temperature' # Get extent/geo from mask raster daymet_ds = gdal.Open(mask_raster) daymet_osr = gdc.raster_ds_osr(daymet_ds) daymet_proj = gdc.osr_proj(daymet_osr) daymet_cs = gdc.raster_ds_cellsize(daymet_ds, x_only=True) daymet_extent = gdc.raster_ds_extent(daymet_ds) daymet_geo = daymet_extent.geo(daymet_cs) daymet_x, daymet_y = daymet_extent.origin() daymet_ds = None logging.debug(' Projection: {}'.format(daymet_proj)) logging.debug(' Cellsize: {}'.format(daymet_cs)) logging.debug(' Geo: {}'.format(daymet_geo)) logging.debug(' Extent: {}'.format(daymet_extent)) logging.debug(' Origin: {} {}'.format(daymet_x, daymet_y)) # Subset data to a smaller extent if output_extent is not None: logging.info('\nComputing subset extent & geo') logging.debug(' Extent: {}'.format(output_extent)) # Assume input extent is in decimal degrees output_extent = gdc.project_extent( gdc.Extent(output_extent), gdc.epsg_osr(4326), daymet_osr, 0.001) output_extent = gdc.intersect_extents([daymet_extent, output_extent]) output_extent.adjust_to_snap('EXPAND', daymet_x, daymet_y, daymet_cs) output_geo = output_extent.geo(daymet_cs) logging.debug(' Geo: {}'.format(output_geo)) logging.debug(' Extent: {}'.format(output_extent)) elif extent_path is not None: logging.info('\nComputing subset extent & geo') if extent_path.lower().endswith('.shp'): output_extent = gdc.feature_path_extent(extent_path) extent_osr = gdc.feature_path_osr(extent_path) extent_cs = None else: output_extent = gdc.raster_path_extent(extent_path) extent_osr = gdc.raster_path_osr(extent_path) extent_cs = gdc.raster_path_cellsize(extent_path, x_only=True) output_extent = gdc.project_extent( output_extent, extent_osr, daymet_osr, extent_cs) output_extent = gdc.intersect_extents([daymet_extent, output_extent]) output_extent.adjust_to_snap('EXPAND', daymet_x, daymet_y, daymet_cs) output_geo = output_extent.geo(daymet_cs) logging.debug(' Geo: {}'.format(output_geo)) logging.debug(' Extent: {}'.format(output_extent)) else: output_extent = daymet_extent.copy() output_geo = daymet_geo[:] # output_shape = output_extent.shape(cs=daymet_cs) xi, yi = gdc.array_geo_offsets(daymet_geo, output_geo, daymet_cs) output_rows, output_cols = output_extent.shape(daymet_cs) logging.debug(' Shape: {} {}'.format(output_rows, output_cols)) logging.debug(' Offsets: {} {} (x y)'.format(xi, yi)) # Process each variable input_var = 'prcp' output_var = 'ppt' logging.info("\nVariable: {}".format(input_var)) # Build output folder var_ws = os.path.join(output_ws, output_var) if not os.path.isdir(var_ws): os.makedirs(var_ws) # Process each file in the input workspace for input_name in sorted(os.listdir(netcdf_ws)): logging.debug("{}".format(input_name)) input_match = daymet_re.match(input_name) if not input_match: logging.debug(' Regular expression didn\'t match, skipping') continue elif input_match.group('VAR') != input_var: logging.debug(' Variable didn\'t match, skipping') continue year_str = input_match.group('YEAR') logging.info(" Year: {}".format(year_str)) year_int = int(year_str) year_days = int(dt.datetime(year_int, 12, 31).strftime('%j')) if start_dt is not None and year_int < start_dt.year: logging.debug(' Before start date, skipping') continue elif end_dt is not None and year_int > end_dt.year: logging.debug(' After end date, skipping') continue # Build input file path input_raster = os.path.join(netcdf_ws, input_name) # if not os.path.isfile(input_raster): # logging.debug( # ' Input raster doesn\'t exist, skipping {}'.format( # input_raster)) # continue # Build output folder output_year_ws = os.path.join(var_ws, year_str) if not os.path.isdir(output_year_ws): os.makedirs(output_year_ws) # Read in the DAYMET NetCDF file input_nc_f = netCDF4.Dataset(input_raster, 'r') # logging.debug(input_nc_f.variables) # Check all valid dates in the year year_dates = date_range( dt.datetime(year_int, 1, 1), dt.datetime(year_int + 1, 1, 1)) for date_dt in year_dates: if start_dt is not None and date_dt < start_dt: logging.debug(' {} - before start date, skipping'.format( date_dt.date())) continue elif end_dt is not None and date_dt > end_dt: logging.debug(' {} - after end date, skipping'.format( date_dt.date())) continue else: logging.info(' {}'.format(date_dt.date())) output_path = os.path.join( output_year_ws, '{}_{}_daymet.img'.format( output_var, date_dt.strftime('%Y%m%d'))) if os.path.isfile(output_path): logging.debug(' {}'.format(output_path)) if not overwrite_flag: logging.debug(' File already exists, skipping') continue else: logging.debug(' File already exists, removing existing') os.remove(output_path) doy = int(date_dt.strftime('%j')) doy_i = range(1, year_days + 1).index(doy) # Arrays are being read as masked array with a fill value of -9999 # Convert to basic numpy array arrays with nan values try: input_ma = input_nc_f.variables[input_var][ doy_i, yi: yi + output_rows, xi: xi + output_cols] except IndexError: logging.info(' date not in netcdf, skipping') continue input_nodata = float(input_ma.fill_value) output_array = input_ma.data.astype(np.float32) output_array[output_array == input_nodata] = np.nan # Save the array as 32-bit floats gdc.array_to_raster( output_array.astype(np.float32), output_path, output_geo=output_geo, output_proj=daymet_proj, stats_flag=stats_flag) del input_ma, output_array input_nc_f.close() del input_nc_f logging.debug('\nScript Complete')
def main(extent_path, output_folder, overwrite_flag): """Download NED tiles that intersect the study_area Script assumes DEM data is in 1x1 WGS84 degree tiles Download 10m (1/3 arc-second) or 30m (1 arc-second) versions from: 10m: rockyftp.cr.usgs.gov/vdelivery/Datasets/Staged/Elevation/13/IMG 30m: rockyftp.cr.usgs.gov/vdelivery/Datasets/Staged/Elevation/1/IMG For this example, only download 30m DEM Args: extent_path (str): file path to study area shapefile output_folder (str): folder path where files will be saved overwrite_flag (bool): If True, overwrite existing files Returns: None """ logging.info('\nDownload NED tiles') site_url = 'rockyftp.cr.usgs.gov' site_folder = 'vdelivery/Datasets/Staged/Elevation/1/IMG' # site_url = 'ftp://rockyftp.cr.usgs.gov/vdelivery/Datasets/Staged/Elevation/1/IMG' zip_fmt = 'n{:02d}w{:03d}.zip' tile_fmt = 'imgn{:02d}w{:03d}_1.img' # tile_fmt = 'imgn{:02d}w{:03d}_13.img' # Use 1 degree snap point and "cellsize" to get 1x1 degree tiles tile_osr = gdc.epsg_osr(4326) tile_x, tile_y, tile_cs = 0, 0, 1 buffer_cells = 0 # Error checking if not os.path.isfile(extent_path): logging.error('\nERROR: The input_path does not exist\n') return False if not os.path.isdir(output_folder): os.makedirs(output_folder) # Check that input is a shapefile # Get the extent of each feature lat_lon_list = [] shp_driver = ogr.GetDriverByName('ESRI Shapefile') input_ds = shp_driver.Open(extent_path, 1) input_osr = gdc.feature_ds_osr(input_ds) input_layer = input_ds.GetLayer() input_ftr = input_layer.GetNextFeature() while input_ftr: input_geom = input_ftr.GetGeometryRef() input_extent = gdc.Extent(input_geom.GetEnvelope()) input_extent = input_extent.ogrenv_swap() input_ftr = input_layer.GetNextFeature() logging.debug('Input Extent: {}'.format(input_extent)) # Project study area extent to input raster coordinate system output_extent = gdc.project_extent( input_extent, input_osr, tile_osr) logging.debug('Output Extent: {}'.format(output_extent)) # Extent needed to select 1x1 degree tiles tile_extent = output_extent.copy() tile_extent.adjust_to_snap( 'EXPAND', tile_x, tile_y, tile_cs) logging.debug('Tile Extent: {}'.format(tile_extent)) # Get list of avaiable tiles that intersect the extent lat_lon_list.extend([ (lat, -lon) for lon in range(int(tile_extent.xmin), int(tile_extent.xmax)) for lat in range(int(tile_extent.ymax), int(tile_extent.ymin), -1)]) lat_lon_list = sorted(list(set(lat_lon_list))) # Attempt to download the tiles logging.info('') for lat_lon in lat_lon_list: logging.info('Tile: {}'.format(lat_lon)) zip_name = zip_fmt.format(*lat_lon) zip_url = '/'.join([site_url, site_folder, zip_name]) zip_path = os.path.join(output_folder, zip_name) tile_name = tile_fmt.format(*lat_lon) tile_path = os.path.join(output_folder, tile_name) logging.debug(' {}'.format(zip_url)) logging.debug(' {}'.format(zip_path)) if os.path.isfile(tile_path) and not overwrite_flag: logging.debug(' skipping') continue ftp_download(site_url, site_folder, zip_name, zip_path) logging.debug(' extracting') try: zip_f = zipfile.ZipFile(zip_path) zip_f.extract(tile_name, output_folder) zip_f.close() except Exception as e: logging.info(' Unhandled exception: {}'.format(e)) try: os.remove(zip_path) except Exception as e: logging.info(' Unhandled exception: {}'.format(e))
def main(gis_ws, tile_ws, dem_cs, mask_flag=False, overwrite_flag=False): """Download NED tiles that intersect the study_area Script assumes DEM data is in 1x1 WGS84 degree tiles Download 10m (1/3 arc-second) or 30m (1 arc-second) versions from: 10m: rockyftp.cr.usgs.gov/vdelivery/Datasets/Staged/Elevation/13/IMG 30m: rockyftp.cr.usgs.gov/vdelivery/Datasets/Staged/Elevation/1/IMG For this example, only download 30m DEM Args: gis_ws (str): Folder/workspace path of the GIS data for the project tile_ws (str): Folder/workspace path of the DEM tiles dem_cs (int): DEM cellsize (10 or 30m) mask_flag (bool): If True, only download tiles intersecting zones mask overwrite_flag (bool): If True, overwrite existing files Returns: None """ logging.info('\nDownload DEM tiles') zip_fmt = 'n{0:02d}w{1:03d}.zip' if dem_cs == 10: site_url = 'ftp://rockyftp.cr.usgs.gov/vdelivery/Datasets/Staged/Elevation/13/IMG' tile_fmt = 'imgn{0:02d}w{1:03d}_13.img' elif dem_cs == 30: site_url = 'ftp://rockyftp.cr.usgs.gov/vdelivery/Datasets/Staged/Elevation/1/IMG' tile_fmt = 'imgn{0:02d}w{1:03d}_1.img' else: logging.error('\nERROR: The input cellsize must be 10 or 30\n') sys.exit() # Use 1 degree snap point and "cellsize" to get 1x1 degree tiles tile_osr = gdc.epsg_osr(4269) tile_buffer = 0.5 tile_x, tile_y, tile_cs = 0, 0, 1 scratch_ws = os.path.join(gis_ws, 'scratch') zone_raster_path = os.path.join(scratch_ws, 'zone_raster.img') zone_polygon_path = os.path.join(scratch_ws, 'zone_polygon.shp') # Error checking if not os.path.isfile(zone_raster_path): logging.error(('\nERROR: The zone raster {} does not exist' + '\n Try re-running "build_study_area_raster.py"' ).format(zone_raster_path)) sys.exit() if mask_flag and not os.path.isfile(zone_polygon_path): logging.error( ('\nERROR: The zone polygon {} does not exist and mask_flag=True' + '\n Try re-running "build_study_area_raster.py"' ).format(zone_raster_path)) sys.exit() if not os.path.isdir(tile_ws): os.makedirs(tile_ws) # Reference all output rasters zone raster zone_raster_ds = gdal.Open(zone_raster_path) output_osr = gdc.raster_ds_osr(zone_raster_ds) # output_wkt = gdc.raster_ds_proj(zone_raster_ds) output_cs = gdc.raster_ds_cellsize(zone_raster_ds)[0] output_x, output_y = gdc.raster_ds_origin(zone_raster_ds) output_extent = gdc.raster_ds_extent(zone_raster_ds) output_ullr = output_extent.ul_lr_swap() zone_raster_ds = None logging.debug('\nStudy area properties') logging.debug(' Output OSR: {}'.format(output_osr)) logging.debug(' Output Extent: {}'.format(output_extent)) logging.debug(' Output cellsize: {}'.format(output_cs)) logging.debug(' Output UL/LR: {}'.format(output_ullr)) if mask_flag: # Keep tiles that intersect zone polygon lat_lon_list = polygon_tiles(zone_polygon_path, tile_osr, tile_x, tile_y, tile_cs, tile_buffer=0) else: # Keep tiles that intersect zone raster extent # Project study area extent to DEM tile coordinate system tile_extent = gdc.project_extent(output_extent, output_osr, tile_osr) logging.debug('Output Extent: {}'.format(tile_extent)) # Extent needed to select 1x1 degree tiles tile_extent.buffer_extent(tile_buffer) tile_extent.adjust_to_snap('EXPAND', tile_x, tile_y, tile_cs) logging.debug('Tile Extent: {}'.format(tile_extent)) # Get list of available tiles that intersect the extent lat_lon_list = sorted( list( set([(lat, -lon) for lon in range(int(tile_extent.xmin), int(tile_extent.xmax)) for lat in range(int(tile_extent.ymax), int(tile_extent.ymin), -1)]))) # Attempt to download the tiles logging.debug('Downloading') for lat_lon in lat_lon_list: logging.info(' {}'.format(lat_lon)) zip_name = zip_fmt.format(*lat_lon) zip_url = site_url + '/' + zip_name zip_path = os.path.join(tile_ws, zip_name) tile_name = tile_fmt.format(*lat_lon) tile_path = os.path.join(tile_ws, tile_name) logging.debug(zip_url) logging.debug(zip_path) if not os.path.isfile(tile_path) or overwrite_flag: try: urllib.urlretrieve(zip_url, zip_path) zip_f = zipfile.ZipFile(zip_path) zip_f.extract(tile_name, tile_ws) zip_f.close() except IOError: logging.debug(' IOError, skipping') try: os.remove(zip_path) except: pass
def polygon_tiles(input_path, tile_osr=gdc.epsg_osr(4269), tile_x=0, tile_y=0, tile_cs=1, tile_buffer=0.5): """""" lat_lon_list = [] shp_driver = ogr.GetDriverByName('ESRI Shapefile') input_ds = shp_driver.Open(input_path, 0) input_layer = input_ds.GetLayer() input_osr = input_layer.GetSpatialRef() input_ftr = input_layer.GetNextFeature() while input_ftr: input_fid = input_ftr.GetFID() logging.debug(' {0}'.format(input_fid)) input_geom = input_ftr.GetGeometryRef() # This finds the tiles that intersect the extent of each feature input_extent = gdc.extent(input_geom.GetEnvelope()) input_extent = input_extent.ogrenv_swap() logging.debug(' Feature Extent: {}'.format(input_extent)) # Project feature extent to the DEM tile coordinate system tile_extent = gdc.project_extent(input_extent, input_osr, tile_osr) logging.debug(' Feature Extent: {}'.format(tile_extent)) # Extent needed to select 1x1 degree tiles tile_extent.buffer_extent(tile_buffer) tile_extent.adjust_to_snap('EXPAND', tile_x, tile_y, tile_cs) logging.debug(' Tile Extent: {}'.format(tile_extent)) # Get list of available tiles that intersect the extent lat_lon_list.extend([ (lat, -lon) for lon in range(int(tile_extent.xmin), int(tile_extent.xmax)) for lat in range(int(tile_extent.ymax), int(tile_extent.ymin), -1) ]) del input_extent, tile_extent # # This finds the tiles that intersect the geometry of each feature # # Project the feature geometry to the DEM tile coordinate system # output_geom = input_geom.Clone() # output_geom.Transform(tx) # output_geom = output_geom.Buffer(tile_buffer) # logging.debug(' Geometry type: {}'.format(output_geom.GetGeometryName())) # # # Compute the upper left tile coordinate for each feature vertex # output_json = json.loads(output_geom.ExportToJson()) # # DEADBEEF - Add a point adjust_to_snap method # if output_geom.GetGeometryName() == 'POLYGON': # _list = sorted(list(set([ # (int(math.ceil((pnt[1] - tile_y) / tile_cs) * tile_cs + tile_y), # -int(math.floor((pnt[0] - tile_x) / tile_cs) * tile_cs + tile_x)) # for ring in output_json['coordinates'] # for pnt in ring]))) # elif output_geom.GetGeometryName() == 'MULTIPOLYGON': # _list = sorted(list(set([ # (int(math.ceil((pnt[1] - tile_y) / tile_cs) * tile_cs + tile_y), # -int(math.floor((pnt[0] - tile_x) / tile_cs) * tile_cs + tile_x)) # for poly in output_json['coordinates'] # for ring in poly # for pnt in ring]))) # else: # .error('Invalid geometry type') # .exit() # lat_lon_list.extend(output_list) # del output_geom, output_list # Cleanup input_geom = None del input_fid, input_geom input_ftr = input_layer.GetNextFeature() del input_ds return sorted(list(set(lat_lon_list)))
def main(gis_ws, tile_ws, dem_cs, overwrite_flag=False, pyramids_flag=False, stats_flag=False): """Merge, project, and clip NED tiles Args: gis_ws (str): Folder/workspace path of the GIS data for the project tile_ws (str): Folder/workspace path of the DEM tiles dem_cs (int): DEM cellsize (10 or 30m) overwrite_flag (bool): If True, overwrite existing files pyramids_flag (bool): If True, build pyramids/overviews for the output rasters stats_flag (bool): If True, compute statistics for the output rasters Returns: None """ logging.info('\nPrepare DEM tiles') # Inputs output_units = 'METERS' dem_ws = os.path.join(gis_ws, 'dem') scratch_ws = os.path.join(gis_ws, 'scratch') zone_raster_path = os.path.join(scratch_ws, 'zone_raster.img') # Use 1 degree snap point and "cellsize" to get 1x1 degree tiles tile_osr = gdc.epsg_osr(4269) tile_buffer = 0.5 tile_x, tile_y, tile_cs = 0, 0, 1 # Input error checking if not os.path.isdir(gis_ws): logging.error(('\nERROR: The GIS workspace {} ' + 'does not exist').format(gis_ws)) sys.exit() elif not os.path.isdir(tile_ws): logging.error(('\nERROR: The DEM tile workspace {} ' + 'does not exist').format(tile_ws)) sys.exit() elif not os.path.isfile(zone_raster_path): logging.error(('\nERROR: The zone raster {} does not exist' + '\n Try re-running "build_study_area_raster.py"' ).format(zone_raster_path)) sys.exit() elif output_units not in ['FEET', 'METERS']: logging.error('\nERROR: The output units must be FEET or METERS\n') sys.exit() logging.info('\nGIS Workspace: {}'.format(gis_ws)) logging.info('DEM Workspace: {}'.format(dem_ws)) logging.info('Tile Workspace: {}\n'.format(tile_ws)) # Input folder/files if dem_cs == 10: tile_fmt = 'imgn{0:02d}w{1:03d}_13.img' elif dem_cs == 30: tile_fmt = 'imgn{0:02d}w{1:03d}_1.img' # Output folder/files if not os.path.isdir(dem_ws): os.makedirs(dem_ws) # Output file names dem_fmt = 'ned_{0}m{1}.img' # dem_gcs = dem_fmt.format(dem_cs, '_nad83_meters') # dem_feet = dem_fmt.format(dem_cs, '_nad83_feet') # dem_proj = dem_fmt.format(dem_cs, '_albers') # dem_hs = dem_fmt.format(dem_cs, '_hs') dem_gcs_path = os.path.join(dem_ws, dem_fmt.format(dem_cs, '_nad83_meters')) dem_feet_path = os.path.join(dem_ws, dem_fmt.format(dem_cs, '_nad83_feet')) dem_proj_path = os.path.join(dem_ws, dem_fmt.format(dem_cs, '_albers')) dem_hs_path = os.path.join(dem_ws, dem_fmt.format(dem_cs, '_hs')) # f32_nodata = float(np.finfo(np.float32).min) if pyramids_flag: levels = '2 4 8 16 32 64 128' # gdal.SetConfigOption('USE_RRD', 'YES') # gdal.SetConfigOption('HFA_USE_RRD', 'YES') # Reference all output rasters zone raster zone_raster_ds = gdal.Open(zone_raster_path) output_osr = gdc.raster_ds_osr(zone_raster_ds) output_wkt = gdc.raster_ds_proj(zone_raster_ds) output_cs = gdc.raster_ds_cellsize(zone_raster_ds)[0] output_x, output_y = gdc.raster_ds_origin(zone_raster_ds) output_extent = gdc.raster_ds_extent(zone_raster_ds) zone_raster_ds = None logging.debug('\nStudy area properties') logging.debug(' Output OSR: {}'.format(output_osr)) logging.debug(' Output Extent: {}'.format(output_extent)) logging.debug(' Output cellsize: {}'.format(output_cs)) # Project study area extent to DEM tile coordinate system tile_extent = gdc.project_extent(output_extent, output_osr, tile_osr) logging.debug('Output Extent: {}'.format(tile_extent)) # Extent needed to select 1x1 degree tiles tile_extent.buffer_extent(tile_buffer) tile_extent.adjust_to_snap('EXPAND', tile_x, tile_y, tile_cs) logging.debug('Tile Extent: {}'.format(tile_extent)) # Get list of available tiles that intersect the extent input_path_list = sorted( list( set([ tile_fmt.format(lat, -lon) # os.path.join(tile_ws, tile_fmt.format(lat, -lon)) for lon in range(int(tile_extent.xmin), int(tile_extent.xmax)) for lat in range(int(tile_extent.ymax), int( tile_extent.ymin), -1) if os.path.isfile( os.path.join(tile_ws, tile_fmt.format(lat, -lon))) ]))) logging.debug('Tiles') # for input_path in input_path_list: # .debug(' {}'.format(input_path)) # Calculate using GDAL utilities if input_path_list: logging.info('Merging tiles') if os.path.isfile(dem_gcs_path) and overwrite_flag: util.remove_file(dem_gcs_path) # subprocess.call( # 'gdalmanage', 'delete', '-f', 'HFA', dem_gcs_path]) if not os.path.isfile(dem_gcs_path): # gdal_merge.py was only working if shell=True # It would also work to add the scripts folder to the path (in Pythong) # Or the scripts folder could be added to the system PYTHONPATH? args_list = [ 'python', '{}\scripts\gdal_merge.py'.format( sys.exec_prefix), '-o', dem_gcs_path, '-of', 'HFA', '-co', 'COMPRESSED=YES', '-a_nodata', str(f32_nodata) ] + input_path_list logging.debug(args_list) logging.debug('command length: {}'.format(len( ' '.join(args_list)))) subprocess.call(args_list, cwd=tile_ws) # subprocess.call( # 'set', 'GDAL_DATA={}\Lib\site-packages\osgeo\data\gdal'.format(sys.exec_prefix)], # =True) # subprocess.call( # 'gdal_merge.py', '-o', dem_gcs_path, '-of', 'HFA', # '-co', 'COMPRESSED=YES', '-a_nodata', # str(f32_nodata)] + input_path_list, # =True) # Convert DEM from meters to feet if output_units == 'FEET': # DEADBEEF - This won't run when called through subprocess? # subprocess.call( # 'gdal_calc.py', '-A', dem_gcs_path, # '--outfile={}'.format(dem_feet_path), '--calc="0.3048*A"', # '--format', 'HFA', '--co', 'COMPRESSED=YES', # '--NoDataValue={}'.format(str(f32_nodata)), # '--type', 'Float32', '--overwrite'], # =dem_ws, shell=True) # dem_gcs_path = dem_feet_path # Scale the values using custom function m2ft_func(dem_gcs_path) if os.path.isfile(dem_proj_path) and overwrite_flag: subprocess.call(['gdalmanage', 'delete', '-f', 'HFA', dem_proj_path]) if os.path.isfile(dem_hs_path) and overwrite_flag: subprocess.call(['gdalmanage', 'delete', '-f', 'HFA', dem_hs_path]) if (not os.path.isfile(dem_proj_path) and os.path.isfile(dem_gcs_path)): subprocess.call([ 'gdalwarp', '-r', 'bilinear', '-tr', str(output_cs), str(output_cs), '-s_srs', 'EPSG:4269', '-t_srs', output_wkt, '-ot', 'Float32' ] + ['-te'] + str(output_extent).split() + # ['-srcnodata', 'None', '-dstnodata', str(f32_nodata), [ '-of', 'HFA', '-co', 'COMPRESSED=YES', '-overwrite', '-multi', '-wm', '1024', '-wo', 'NUM_THREADS=ALL_CPUS', dem_gcs_path, dem_proj_path ]) if (not os.path.isfile(dem_hs_path) and os.path.isfile(dem_proj_path)): subprocess.call([ 'gdaldem', 'hillshade', dem_proj_path, dem_hs_path, '-of', 'HFA', '-co', 'COMPRESSED=YES' ]) if stats_flag: logging.info('Computing statistics') if os.path.isfile(dem_proj_path): logging.debug(' {}'.format(dem_proj_path)) subprocess.call(['gdalinfo', '-stats', '-nomd', dem_proj_path]) if os.path.isfile(dem_hs_path): logging.debug(' {}'.format(dem_hs_path)) subprocess.call(['gdalinfo', '-stats', '-nomd', dem_hs_path]) if pyramids_flag: logging.info('\nBuilding pyramids') if os.path.isfile(dem_proj_path): logging.debug(' {}'.format(dem_proj_path)) subprocess.call(['gdaladdo', '-ro', dem_proj_path] + levels.split()) if os.path.isfile(dem_hs_path): logging.debug(' {}'.format(dem_hs_path)) subprocess.call(['gdaladdo', '-ro', dem_hs_path] + levels.split()) # subprocess.call( # 'gdaladdo', '-ro', '--config', 'USE_RRD', 'YES', # '--config', 'HFA_USE_RRD', 'YES', dem_proj_path] + levels.split()]) # subprocess.call( # 'gdaladdo', '-ro', '--config', 'USE_RRD', 'YES', # '--config', 'HFA_USE_RRD', 'YES', dem_hs_path] + levels.split()]) if os.path.isfile(os.path.join(dem_ws, dem_gcs_path)): subprocess.call(['gdalmanage', 'delete', '-f', 'HFA', dem_gcs_path])
def zonal_stats(ini_path=None, overwrite_flag=False): """Offline Zonal Stats Args: ini_path (str): overwrite_flag (bool): if True, overwrite existing files Returns: None """ logging.info('\nCompute Offline Zonal Stats') landsat_flag = True gridmet_flag = True pdsi_flag = False landsat_images_folder = 'landsat' landsat_tables_folder = 'landsat_tables' gridmet_images_folder = 'gridmet_monthly' # Regular expression to pull out Landsat scene_id landsat_image_re = re.compile('^\d{8}_\d{3}_\w+.\w+.tif$') gridmet_image_re = re.compile('^\d{6}_gridmet.(eto|ppt).tif$') # For now, hardcode snap, cellsize and spatial reference logging.info('\nHardcoding zone/output cellsize and snap') zone_cs = 30 zone_x, zone_y = 15, 15 logging.debug(' Snap: {} {}'.format(zone_x, zone_y)) logging.debug(' Cellsize: {}'.format(zone_cs)) logging.info('Hardcoding Landsat snap, cellsize and spatial reference') landsat_x, landsat_y = 15, 15 landsat_cs = 30 landsat_osr = gdc.epsg_osr(32611) logging.debug(' Snap: {} {}'.format(landsat_x, landsat_y)) logging.debug(' Cellsize: {}'.format(landsat_cs)) logging.debug(' OSR: {}'.format(landsat_osr)) logging.info('Hardcoding GRIDMET snap, cellsize and spatial reference') gridmet_x, gridmet_y = -124.79299639209513, 49.41685579737572 gridmet_cs = 0.041666001963701 # gridmet_cs = [0.041666001963701, 0.041666001489718] # gridmet_x, gridmet_y = -124.79166666666666666667, 25.04166666666666666667 # gridmet_cs = 1. / 24 gridmet_osr = gdc.epsg_osr(4326) # gridmet_osr = gdc.epsg_osr(4269) logging.debug(' Snap: {} {}'.format(gridmet_x, gridmet_y)) logging.debug(' Cellsize: {}'.format(gridmet_cs)) logging.debug(' OSR: {}'.format(gridmet_osr)) landsat_daily_fields = [ 'DATE', 'SCENE_ID', 'LANDSAT', 'PATH', 'ROW', 'YEAR', 'MONTH', 'DAY', 'DOY', 'PIXEL_COUNT', 'FMASK_COUNT', 'DATA_COUNT', 'CLOUD_SCORE', 'TS', 'ALBEDO_SUR', 'NDVI_TOA', 'NDVI_SUR', 'EVI_SUR', 'NDWI_GREEN_NIR_SUR', 'NDWI_GREEN_SWIR1_SUR', 'NDWI_NIR_SWIR1_SUR', # 'NDWI_GREEN_NIR_TOA', 'NDWI_GREEN_SWIR1_TOA', 'NDWI_NIR_SWIR1_TOA', # 'NDWI_SWIR1_GREEN_TOA', 'NDWI_SWIR1_GREEN_SUR', # 'NDWI_TOA', 'NDWI_SUR', 'TC_BRIGHT', 'TC_GREEN', 'TC_WET'] # gridmet_daily_fields = [ # 'DATE', 'YEAR', 'MONTH', 'DAY', 'DOY', 'WATER_YEAR', 'ETO', 'PPT'] gridmet_monthly_fields = [ 'DATE', 'YEAR', 'MONTH', 'WATER_YEAR', 'ETO', 'PPT'] pdsi_dekad_fields = [ 'DATE', 'YEAR', 'MONTH', 'DAY', 'DOY', 'PDSI'] landsat_int_fields = [ 'YEAR', 'MONTH', 'DAY', 'DOY', 'PIXEL_COUNT', 'FMASK_COUNT', 'CLOUD_SCORE'] gridmet_int_fields = ['YEAR', 'MONTH', 'WATER_YEAR'] # To figure out which Landsat and path, # Compare date to reference dates and look for even multiples of 16 ref_dates = { datetime.datetime(1985, 3, 31): ['LT5', '039'], datetime.datetime(1985, 4, 7): ['LT5', '040'], datetime.datetime(1999, 7, 4): ['LE7', '039'], datetime.datetime(1999, 7, 27): ['LE7', '040'], datetime.datetime(2013, 4, 13): ['LC8', '039'], datetime.datetime(2013, 4, 20): ['LC8', '040'] # datetime.datetime(1984, , ): ['LT4', '039'], # datetime.datetime(1984, , ): ['LT4', '040'], } # Open config file config = ConfigParser.ConfigParser() try: config.readfp(open(ini_path)) except: logging.error(('\nERROR: Input file could not be read, ' + 'is not an input file, or does not exist\n' + 'ERROR: ini_path = {}\n').format(ini_path)) sys.exit() logging.debug('\nReading Input File') # Read in config file zone_input_ws = config.get('INPUTS', 'zone_input_ws') zone_filename = config.get('INPUTS', 'zone_filename') zone_field = config.get('INPUTS', 'zone_field') zone_path = os.path.join(zone_input_ws, zone_filename) landsat_daily_fields.insert(0, zone_field) # gridmet_daily_fields.insert(0, zone_field) gridmet_monthly_fields.insert(0, zone_field) pdsi_dekad_fields.insert(0, zone_field) images_ws = config.get('INPUTS', 'images_ws') # Build and check file paths if not os.path.isdir(zone_input_ws): logging.error( '\nERROR: The zone workspace does not exist, exiting\n {}'.format( zone_input_ws)) sys.exit() elif not os.path.isfile(zone_path): logging.error( '\nERROR: The zone shapefile does not exist, exiting\n {}'.format( zone_path)) sys.exit() elif not os.path.isdir(images_ws): logging.error( '\nERROR: The image workspace does not exist, exiting\n {}'.format( images_ws)) sys.exit() # Final output folder try: output_ws = config.get('INPUTS', 'output_ws') if not os.path.isdir(output_ws): os.makedirs(output_ws) except: output_ws = os.getcwd() logging.debug(' Defaulting output workspace to {}'.format(output_ws)) # Start/end year try: start_year = int(config.get('INPUTS', 'start_year')) except: start_year = 1984 logging.debug(' Defaulting start_year={}'.format(start_year)) try: end_year = int(config.get('INPUTS', 'end_year')) except: end_year = datetime.datetime.today().year logging.debug(' Defaulting end year to {}'.format(end_year)) if start_year and end_year and end_year < start_year: logging.error( '\nERROR: End year must be >= start year, exiting') sys.exit() default_end_year = datetime.datetime.today().year + 1 if (start_year and start_year not in range(1984, default_end_year) or end_year and end_year not in range(1984, default_end_year)): logging.error( ('\nERROR: Year must be an integer from 1984-{}, ' + 'exiting').format(default_end_year - 1)) sys.exit() # Start/end month try: start_month = int(config.get('INPUTS', 'start_month')) except: start_month = None logging.debug(' Defaulting start_month=None') try: end_month = int(config.get('INPUTS', 'end_month')) except: end_month = None logging.debug(' Defaulting end_month=None') if start_month and start_month not in range(1, 13): logging.error( '\nERROR: Start month must be an integer from 1-12, exiting') sys.exit() elif end_month and end_month not in range(1, 13): logging.error( '\nERROR: End month must be an integer from 1-12, exiting') sys.exit() month_list = common.wrapped_range(start_month, end_month, 1, 12) # Start/end DOY try: start_doy = int(config.get('INPUTS', 'start_doy')) except: start_doy = None logging.debug(' Defaulting start_doy=None') try: end_doy = int(config.get('INPUTS', 'end_doy')) except: end_doy = None logging.debug(' Defaulting end_doy=None') if end_doy and end_doy > 273: logging.error( '\nERROR: End DOY must be in the same water year as start DOY, ' + 'exiting') sys.exit() if start_doy and start_doy not in range(1, 367): logging.error( '\nERROR: Start DOY must be an integer from 1-366, exiting') sys.exit() elif end_doy and end_doy not in range(1, 367): logging.error( '\nERROR: End DOY must be an integer from 1-366, exiting') sys.exit() # if end_doy < start_doy: # logging.error( # '\nERROR: End DOY must be >= start DOY') # sys.exit() doy_list = common.wrapped_range(start_doy, end_doy, 1, 366) # Control which Landsat images are used try: landsat5_flag = config.getboolean('INPUTS', 'landsat5_flag') except: landsat5_flag = False logging.debug(' Defaulting landsat5_flag=False') try: landsat4_flag = config.getboolean('INPUTS', 'landsat4_flag') except: landsat4_flag = False logging.debug(' Defaulting landsat4_flag=False') try: landsat7_flag = config.getboolean('INPUTS', 'landsat7_flag') except: landsat7_flag = False logging.debug(' Defaulting landsat7_flag=False') try: landsat8_flag = config.getboolean('INPUTS', 'landsat8_flag') except: landsat8_flag = False logging.debug(' Defaulting landsat8_flag=False') # Cloudmasking try: apply_mask_flag = config.getboolean('INPUTS', 'apply_mask_flag') except: apply_mask_flag = False logging.debug(' Defaulting apply_mask_flag=False') try: acca_flag = config.getboolean('INPUTS', 'acca_flag') except: acca_flag = False try: fmask_flag = config.getboolean('INPUTS', 'fmask_flag') except: fmask_flag = False # Intentionally don't apply scene_id skip/keep lists # Compute zonal stats for all available images # Filter by scene_id when making summary tables scene_id_keep_list = [] scene_id_skip_list = [] # # Only process specific Landsat scenes # try: # scene_id_keep_path = config.get('INPUTS', 'scene_id_keep_path') # with open(scene_id_keep_path) as input_f: # scene_id_keep_list = input_f.readlines() # scene_id_keep_list = [x.strip()[:16] for x in scene_id_keep_list] # except IOError: # logging.error('\nFileIO Error: {}'.format(scene_id_keep_path)) # sys.exit() # except: # scene_id_keep_list = [] # # Skip specific landsat scenes # try: # scene_id_skip_path = config.get('INPUTS', 'scene_id_skip_path') # with open(scene_id_skip_path) as input_f: # scene_id_skip_list = input_f.readlines() # scene_id_skip_list = [x.strip()[:16] for x in scene_id_skip_list] # except IOError: # logging.error('\nFileIO Error: {}'.format(scene_id_skip_path)) # sys.exit() # except: # scene_id_skip_list = [] # Only process certain Landsat path/rows try: path_keep_list = list( common.parse_int_set(config.get('INPUTS', 'path_keep_list'))) except: path_keep_list = [] # try: # row_keep_list = list( # common.parse_int_set(config.get('INPUTS', 'row_keep_list'))) # except: # row_keep_list = [] # Skip or keep certain FID try: fid_skip_list = list( common.parse_int_set(config.get('INPUTS', 'fid_skip_list'))) except: fid_skip_list = [] try: fid_keep_list = list( common.parse_int_set(config.get('INPUTS', 'fid_keep_list'))) except: fid_keep_list = [] # For now, output projection must be manually set above to match zones zone_osr = gdc.feature_path_osr(zone_path) zone_proj = gdc.osr_proj(zone_osr) logging.info('\nThe zone shapefile must be in a projected coordinate system!') logging.info(' Proj4: {}'.format(zone_osr.ExportToProj4())) logging.info('{}'.format(zone_osr)) # Read in zone shapefile logging.info('\nRasterizing Zone Shapefile') zone_name_dict = dict() zone_extent_dict = dict() zone_mask_dict = dict() # First get FIDs and extents zone_ds = ogr.Open(zone_path, 0) zone_lyr = zone_ds.GetLayer() zone_lyr.ResetReading() for zone_ftr in zone_lyr: zone_fid = zone_ftr.GetFID() if zone_field.upper() == 'FID': zone_name_dict[zone_fid] = str(zone_fid) else: zone_name_dict[zone_fid] = zone_ftr.GetField(zone_field) zone_extent = gdc.Extent( zone_ftr.GetGeometryRef().GetEnvelope()).ogrenv_swap() zone_extent.adjust_to_snap('EXPAND', zone_x, zone_y, zone_cs) zone_extent_dict[zone_fid] = list(zone_extent) # Rasterize each FID separately # The RasterizeLayer function wants a "layer" # There might be an easier way to select each feature as a layer for zone_fid, zone_extent in sorted(zone_extent_dict.items()): logging.debug('FID: {}'.format(zone_fid)) logging.debug(' Name: {}'.format(zone_name_dict[zone_fid])) zone_ds = ogr.Open(zone_path, 0) zone_lyr = zone_ds.GetLayer() zone_lyr.ResetReading() zone_lyr.SetAttributeFilter("{0} = {1}".format('FID', zone_fid)) zone_extent = gdc.Extent(zone_extent) zone_rows, zone_cols = zone_extent.shape(zone_cs) logging.debug(' Extent: {}'.format(str(zone_extent))) logging.debug(' Rows/Cols: {} {}'.format(zone_rows, zone_cols)) # zones_lyr.SetAttributeFilter("{0} = {1}".format('FID', zone_fid)) # Initialize the zone in memory raster mem_driver = gdal.GetDriverByName('MEM') zone_raster_ds = mem_driver.Create( '', zone_cols, zone_rows, 1, gdal.GDT_Byte) zone_raster_ds.SetProjection(zone_proj) zone_raster_ds.SetGeoTransform( gdc.extent_geo(zone_extent, cs=zone_cs)) zone_band = zone_raster_ds.GetRasterBand(1) zone_band.SetNoDataValue(0) # Clear the raster before rasterizing zone_band.Fill(0) gdal.RasterizeLayer(zone_raster_ds, [1], zone_lyr) # zones_ftr_ds = None zone_array = gdc.raster_ds_to_array( zone_raster_ds, return_nodata=False) zone_mask = zone_array != 0 logging.debug(' Pixel Count: {}'.format(np.sum(zone_mask))) # logging.debug(' Mask:\n{}'.format(zone_mask)) # logging.debug(' Array:\n{}'.format(zone_array)) zone_mask_dict[zone_fid] = zone_mask zone_raster_ds = None del zone_raster_ds, zone_array, zone_mask zone_ds = None del zone_ds, zone_lyr # Calculate zonal stats for each feature separately logging.info('') for fid, zone_str in sorted(zone_name_dict.items()): if fid_keep_list and fid not in fid_keep_list: continue elif fid_skip_list and fid in fid_skip_list: continue logging.info('ZONE: {} (FID: {})'.format(zone_str, fid)) if not zone_field or zone_field.upper() == 'FID': zone_str = 'fid_' + zone_str else: zone_str = zone_str.lower().replace(' ', '_') zone_output_ws = os.path.join(output_ws, zone_str) if not os.path.isdir(zone_output_ws): os.makedirs(zone_output_ws) zone_extent = gdc.Extent(zone_extent_dict[fid]) zone_mask = zone_mask_dict[fid] # logging.debug(' Extent: {}'.format(zone_extent)) if landsat_flag: logging.info(' Landsat') landsat_output_ws = os.path.join( zone_output_ws, landsat_tables_folder) if not os.path.isdir(landsat_output_ws): os.makedirs(landsat_output_ws) logging.debug(' {}'.format(landsat_output_ws)) # Project the zone extent to the image OSR clip_extent = gdc.project_extent( zone_extent, zone_osr, landsat_osr, zone_cs) # logging.debug(' Extent: {}'.format(clip_extent)) clip_extent.adjust_to_snap('EXPAND', landsat_x, landsat_y, landsat_cs) logging.debug(' Extent: {}'.format(clip_extent)) # Process date range by year for year in xrange(start_year, end_year + 1): images_year_ws = os.path.join( images_ws, landsat_images_folder, str(year)) if not os.path.isdir(images_year_ws): logging.debug( ' Landsat year folder doesn\'t exist, skipping\n {}'.format( images_year_ws)) continue else: logging.info(' Year: {}'.format(year)) # Create an empty dataframe output_path = os.path.join( landsat_output_ws, '{}_landsat_{}.csv'.format(zone_str, year)) if os.path.isfile(output_path): if overwrite_flag: logging.debug( ' Output CSV already exists, removing\n {}'.format( output_path)) os.remove(output_path) else: logging.debug( ' Output CSV already exists, skipping\n {}'.format( output_path)) continue output_df = pd.DataFrame(columns=landsat_daily_fields) output_df[landsat_int_fields] = output_df[ landsat_int_fields].astype(int) # Get list of all images year_image_list = [ image for image in os.listdir(images_year_ws) if landsat_image_re.match(image)] # Get list of all unique dates (multiple images per date) year_dt_list = sorted(set([ datetime.datetime.strptime(image[:8], '%Y%m%d') for image in year_image_list])) # Filter date lists if necessary if month_list: year_dt_list = [ image_dt for image_dt in year_dt_list if image_dt.month in month_list] if doy_list: year_dt_list = [ image_dt for image_dt in year_dt_list if int(image_dt.strftime('%j')) in doy_list] output_list = [] for image_dt in year_dt_list: image_str = image_dt.date().isoformat() logging.debug('{}'.format(image_dt.date())) # Get the list of available images image_list = [ image for image in year_image_list if image_dt.strftime('%Y%m%d') in image] # This conditional is probably impossible if not image_list: logging.debug(' No images, skipping date') continue # Use date offsets to determine the Landsat and Path ref_match = [ lp for ref_dt, lp in ref_dates.items() if (((ref_dt - image_dt).days % 16 == 0) and ((lp[0].upper() == 'LT5' and image_dt.year < 2012) or (lp[0].upper() == 'LC8' and image_dt.year > 2012) or (lp[0].upper() == 'LE7')))] if ref_match: landsat, path = ref_match[0] else: landsat, path = 'XXX', '000' # Get Landsat type from first image in list # image_dict['LANDSAT'] = image_list[0].split('.')[0].split('_')[2] image_name_fmt = '{}_{}.{}.tif'.format( image_dt.strftime('%Y%m%d_%j'), landsat.lower(), '{}') if not landsat4_flag and landsat.upper() == 'LT4': logging.debug(' Landsat 4, skipping image') continue elif not landsat5_flag and landsat.upper() == 'LT5': logging.debug(' Landsat 5, skipping image') continue elif not landsat7_flag and landsat.upper() == 'LE7': logging.debug(' Landsat 7, skipping image') continue elif not landsat8_flag and landsat.upper() == 'LC8': logging.debug(' Landsat 8, skipping image') continue # Load the "mask" image first if it is available # The zone_mask could be applied to the mask_array here # or below where it is used to select from the image_array mask_name = image_name_fmt.format('mask') mask_path = os.path.join(images_year_ws, mask_name) if apply_mask_flag and mask_name in image_list: logging.info(' Applying mask raster: {}'.format( mask_path)) mask_input_array, mask_nodata = gdc.raster_to_array( mask_path, band=1, mask_extent=clip_extent, fill_value=None, return_nodata=True) mask_array = gdc.project_array( mask_input_array, gdal.GRA_NearestNeighbour, landsat_osr, landsat_cs, clip_extent, zone_osr, zone_cs, zone_extent, output_nodata=None) # Assume 0 and nodata indicate unmasked pixels # All other pixels are "masked" mask_array = (mask_array == 0) | (mask_array == mask_nodata) # Assume 0 and nodata indicate masked pixels # mask_array = (mask_array != 0) & (mask_array != mask_nodata) if not np.any(mask_array): logging.info(' No unmasked values') else: mask_array = np.ones(zone_mask.shape, dtype=np.bool) # Save date specific properties image_dict = dict() # Get Fmask and Cloud score separately from other bands # FMask image_name = image_name_fmt.format('fmask') image_path = os.path.join(images_year_ws, image_name) if not os.path.isfile(image_path): logging.error( ' Image {} does not exist, skipping date'.format( image_name)) continue image_input_array, image_nodata = gdc.raster_to_array( image_path, band=1, mask_extent=clip_extent, fill_value=None, return_nodata=True) fmask_array = gdc.project_array( image_input_array, gdal.GRA_NearestNeighbour, landsat_osr, landsat_cs, clip_extent, zone_osr, zone_cs, zone_extent, output_nodata=None) fmask_mask = np.copy(zone_mask) & mask_array if fmask_array.dtype in [np.float32, np.float64]: fmask_mask &= np.isfinite(fmask_array) else: fmask_mask &= fmask_array != image_nodata if not np.any(fmask_mask): logging.debug(' Empty Fmask array, skipping') continue # Convert Fmask array into a mask (1 is cloudy, 0 is clear) fmask_array = (fmask_array > 1.5) & (fmask_array < 4.5) image_dict['FMASK_COUNT'] = int(np.sum(fmask_array[fmask_mask])) image_dict['PIXEL_COUNT'] = int(np.sum(fmask_mask)) # image_dict['PIXEL_COUNT'] = int(np.sum(fmask_mask)) image_dict['MASK_COUNT'] = int(np.sum(mask_array)) # Cloud Score image_name = image_name_fmt.format('cloud_score') image_path = os.path.join(images_year_ws, image_name) image_input_array, image_nodata = gdc.raster_to_array( image_path, band=1, mask_extent=clip_extent, fill_value=None, return_nodata=True) cloud_array = gdc.project_array( image_input_array, gdal.GRA_NearestNeighbour, landsat_osr, landsat_cs, clip_extent, zone_osr, zone_cs, zone_extent, output_nodata=None) cloud_mask = np.copy(zone_mask) & mask_array if cloud_array.dtype in [np.float32, np.float64]: cloud_mask &= np.isfinite(cloud_array) else: cloud_mask &= cloud_array != image_nodata if not np.any(cloud_mask): logging.debug(' Empty Cloud Score array, skipping') continue image_dict['CLOUD_SCORE'] = float(np.mean(cloud_array[cloud_mask])) # Workflow zs_list = [ ['ts', 1, 'TS'], ['albedo_sur', 1, 'ALBEDO_SUR'], ['ndvi_toa', 1, 'NDVI_TOA'], ['ndvi_sur', 1, 'NDVI_SUR'], ['evi_sur', 1, 'EVI_SUR'], ['ndwi_green_nir_sur', 1, 'NDWI_GREEN_NIR_SUR'], ['ndwi_green_swir1_sur', 1, 'NDWI_GREEN_SWIR1_SUR'], ['ndwi_nir_swir1_sur', 1, 'NDWI_NIR_SWIR1_SUR'], ['tasseled_cap', 1, 'TC_BRIGHT'], ['tasseled_cap', 2, 'TC_GREEN'], ['tasseled_cap', 3, 'TC_WET'] ] for band_name, band_num, field in zs_list: image_name = image_name_fmt.format(band_name) logging.debug(' {} {}'.format(image_name, field)) if image_name not in image_list: logging.debug(' Image doesn\'t exist, skipping') continue image_path = os.path.join(images_year_ws, image_name) # logging.debug(' {}'.format(image_path)) image_input_array, image_nodata = gdc.raster_to_array( image_path, band=band_num, mask_extent=clip_extent, fill_value=None, return_nodata=True) # GRA_NearestNeighbour, GRA_Bilinear, GRA_Cubic, # GRA_CubicSpline image_array = gdc.project_array( image_input_array, gdal.GRA_NearestNeighbour, landsat_osr, landsat_cs, clip_extent, zone_osr, zone_cs, zone_extent, output_nodata=None) image_mask = np.copy(zone_mask) & mask_array if image_array.dtype in [np.float32, np.float64]: image_mask &= np.isfinite(image_array) else: image_mask &= image_array != image_nodata del image_input_array if fmask_flag: # Fmask array was converted into a mask # 1 for cloud, 0 for clear image_mask &= (fmask_array == 0) if acca_flag: image_mask &= (cloud_array < 50) # Skip fully masked zones # This would not work for FMASK and CLOUD_SCORE if we # weren't using nearest neighbor for resampling if not np.any(image_mask): logging.debug(' Empty array, skipping') continue image_dict[field] = float(np.mean( image_array[image_mask])) # Should check "first" image instead of Ts specifically if band_name == 'ts': image_dict['DATA_COUNT'] = int(np.sum(image_mask)) del image_array, image_mask if not image_dict: logging.debug( ' {} - no image data in zone, skipping'.format( image_str)) continue # Save date specific properties # Change fid zone strings back to integer values if zone_str.startswith('fid_'): image_dict[zone_field] = int(zone_str[4:]) else: image_dict[zone_field] = zone_str image_dict['DATE'] = image_str image_dict['LANDSAT'] = landsat.upper() image_dict['PATH'] = path image_dict['ROW'] = '000' image_dict['SCENE_ID'] = '{}{}{}{}'.format( image_dict['LANDSAT'], image_dict['PATH'], image_dict['ROW'], image_dt.strftime('%Y%j')) image_dict['YEAR'] = image_dt.year image_dict['MONTH'] = image_dt.month image_dict['DAY'] = image_dt.day image_dict['DOY'] = int(image_dt.strftime('%j')) # image_dict['PIXEL_COUNT'] = int(np.sum(zone_mask & mask_array)) # Save each row to a list output_list.append(image_dict) # Append all rows for the year to a dataframe if not output_list: logging.debug(' Empty output list, skipping') continue output_df = output_df.append(output_list, ignore_index=True) output_df.sort_values(by=['DATE'], inplace=True) logging.debug(' {}'.format(output_path)) output_df.to_csv(output_path, index=False, columns=landsat_daily_fields) # Combine/merge annual files into a single CSV logging.debug('\n Merging annual Landsat CSV files') output_df = None for year in xrange(start_year, end_year + 1): # logging.debug(' {}'.format(year)) input_path = os.path.join( landsat_output_ws, '{}_landsat_{}.csv'.format(zone_str, year)) try: input_df = pd.read_csv(input_path) except: continue try: output_df = output_df.append(input_df) except: output_df = input_df.copy() if output_df is not None and not output_df.empty: output_path = os.path.join( zone_output_ws, '{}_landsat_daily.csv'.format(zone_str)) logging.debug(' {}'.format(output_path)) output_df.sort_values(by=['DATE', 'ROW'], inplace=True) output_df.to_csv( output_path, index=False, columns=landsat_daily_fields) if gridmet_flag: logging.info(' GRIDMET ETo/PPT') # Project the zone extent to the image OSR clip_extent = gdc.project_extent( zone_extent, zone_osr, gridmet_osr, zone_cs) logging.debug(' Extent: {}'.format(clip_extent)) # clip_extent.buffer_extent(gridmet_cs) # logging.debug(' Extent: {}'.format(clip_extent)) clip_extent.adjust_to_snap('EXPAND', gridmet_x, gridmet_y, gridmet_cs) logging.debug(' Extent: {}'.format(clip_extent)) gridmet_images_ws = os.path.join(images_ws, gridmet_images_folder) if not os.path.isdir(gridmet_images_ws): logging.debug( ' GRIDMET folder doesn\'t exist, skipping\n {}'.format( gridmet_images_ws)) continue else: logging.info(' {}'.format(gridmet_images_ws)) # Create an empty dataframe output_path = os.path.join( zone_output_ws, '{}_gridmet_monthly.csv'.format(zone_str)) if os.path.isfile(output_path): if overwrite_flag: logging.debug( ' Output CSV already exists, removing\n {}'.format( output_path)) os.remove(output_path) else: logging.debug( ' Output CSV already exists, skipping\n {}'.format( output_path)) continue output_df = pd.DataFrame(columns=gridmet_monthly_fields) output_df[gridmet_int_fields] = output_df[gridmet_int_fields].astype(int) # Get list of all images image_list = [ image for image in os.listdir(gridmet_images_ws) if gridmet_image_re.match(image)] dt_list = sorted(set([ datetime.datetime(int(image[:4]), int(image[4:6]), 1) for image in image_list])) output_list = [] for image_dt in dt_list: image_str = image_dt.date().isoformat() logging.debug('{}'.format(image_dt.date())) image_name_fmt = '{}_gridmet.{}.tif'.format( image_dt.strftime('%Y%m'), '{}') # Save date specific properties image_dict = dict() # Workflow zs_list = [ ['eto', 'ETO'], ['ppt', 'PPT'], ] for band_name, field in zs_list: image_name = image_name_fmt.format(band_name) logging.debug(' {} {}'.format(image_name, field)) if image_name not in image_list: logging.debug(' Image doesn\'t exist, skipping') continue image_path = os.path.join(gridmet_images_ws, image_name) # logging.debug(' {}'.format(image_path)) image_input_array, image_nodata = gdc.raster_to_array( image_path, band=1, mask_extent=clip_extent, fill_value=None, return_nodata=True) # GRA_NearestNeighbour, GRA_Bilinear, GRA_Cubic, # GRA_CubicSpline image_array = gdc.project_array( image_input_array, gdal.GRA_NearestNeighbour, gridmet_osr, gridmet_cs, clip_extent, zone_osr, zone_cs, zone_extent, output_nodata=None) del image_input_array # Skip fully masked zones if (np.all(np.isnan(image_array)) or np.all(image_array == image_nodata)): logging.debug(' Empty array, skipping') continue image_dict[field] = np.mean(image_array[zone_mask]) del image_array if not image_dict: logging.debug( ' {} - no image data in zone, skipping'.format( image_str)) continue # Save date specific properties # Change fid zone strings back to integer values if zone_str.startswith('fid_'): image_dict[zone_field] = int(zone_str[4:]) else: image_dict[zone_field] = zone_str image_dict['DATE'] = image_str image_dict['YEAR'] = image_dt.year image_dict['MONTH'] = image_dt.month image_dict['WATER_YEAR'] = (image_dt + relativedelta(months=3)).year # Save each row to a list output_list.append(image_dict) # Append all rows for the year to a dataframe if not output_list: logging.debug(' Empty output list, skipping') continue output_df = output_df.append(output_list, ignore_index=True) output_df.sort_values(by=['DATE'], inplace=True) logging.debug(' {}'.format(output_path)) output_df.to_csv( output_path, index=False, columns=gridmet_monthly_fields) if pdsi_flag: logging.info(' GRIDMET PDSI') logging.info(' Not currently implemented')
def main(netcdf_ws=os.getcwd(), ancillary_ws=os.getcwd(), output_ws=os.getcwd(), variables=['prcp'], daily_flag=False, monthly_flag=True, annual_flag=False, start_year=1981, end_year=2010, extent_path=None, output_extent=None, stats_flag=True, overwrite_flag=False): """Extract DAYMET temperature Args: netcdf_ws (str): folder of DAYMET netcdf files ancillary_ws (str): folder of ancillary rasters output_ws (str): folder of output rasters variables (list): DAYMET variables to download ('prcp', 'srad', 'vp', 'tmmn', 'tmmx') Set as ['all'] to process all variables daily_flag (bool): if True, compute daily (DOY) climatologies monthly_flag (bool): if True, compute monthly climatologies annual_flag (bool): if True, compute annual climatologies start_year (int): YYYY end_year (int): YYYY extent_path (str): filepath a raster defining the output extent output_extent (list): decimal degrees values defining output extent stats_flag (bool): if True, compute raster statistics. Default is True. overwrite_flag (bool): if True, overwrite existing files Returns: None """ logging.info('\nGenerating DAYMET climatologies') daily_fmt = 'daymet_{var}_30yr_normal_{doy:03d}.img' monthly_fmt = 'daymet_{var}_30yr_normal_{month:02d}.img' annual_fmt = 'daymet_{var}_30yr_normal.img' # daily_fmt = 'daymet_{var}_normal_{start}_{end}_{doy:03d}.img' # monthly_fmt = 'daymet_{var}_normal_{start}_{end}_{month:02d}.img' # annual_fmt = 'daymet_{var}_normal_{start}_{end}.img' # If a date is not set, process 1981-2010 climatology try: start_dt = dt.datetime(start_year, 1, 1) logging.debug(' Start date: {}'.format(start_dt)) except: start_dt = dt.datetime(1981, 1, 1) logging.info(' Start date: {}'.format(start_dt)) try: end_dt = dt.datetime(end_year, 12, 31) logging.debug(' End date: {}'.format(end_dt)) except: end_dt = dt.datetime(2010, 12, 31) logging.info(' End date: {}'.format(end_dt)) # Get DAYMET spatial reference from an ancillary raster mask_raster = os.path.join(ancillary_ws, 'daymet_mask.img') daymet_re = re.compile('daymet_v3_(?P<VAR>\w+)_(?P<YEAR>\d{4})_na.nc4$') # DAYMET rasters to extract var_full_list = ['prcp', 'tmmn', 'tmmx'] # data_full_list = ['prcp', 'srad', 'vp', 'tmmn', 'tmmx'] if not variables: logging.error('\nERROR: variables parameter is empty\n') sys.exit() elif type(variables) is not list: # DEADBEEF - I could try converting comma separated strings to lists? logging.warning('\nERROR: variables parameter must be a list\n') sys.exit() elif 'all' in variables: logging.error('\nDownloading all variables\n {}'.format( ','.join(var_full_list))) var_list = var_full_list[:] elif not set(variables).issubset(set(var_full_list)): logging.error( '\nERROR: variables parameter is invalid\n {}'.format(variables)) sys.exit() else: var_list = variables[:] # Get extent/geo from mask raster daymet_ds = gdal.Open(mask_raster) daymet_osr = gdc.raster_ds_osr(daymet_ds) daymet_proj = gdc.osr_proj(daymet_osr) daymet_cs = gdc.raster_ds_cellsize(daymet_ds, x_only=True) daymet_extent = gdc.raster_ds_extent(daymet_ds) daymet_geo = daymet_extent.geo(daymet_cs) daymet_x, daymet_y = daymet_extent.origin() daymet_ds = None logging.debug(' Projection: {}'.format(daymet_proj)) logging.debug(' Cellsize: {}'.format(daymet_cs)) logging.debug(' Geo: {}'.format(daymet_geo)) logging.debug(' Extent: {}'.format(daymet_extent)) logging.debug(' Origin: {} {}'.format(daymet_x, daymet_y)) # Subset data to a smaller extent if output_extent is not None: logging.info('\nComputing subset extent & geo') logging.debug(' Extent: {}'.format(output_extent)) # Assume input extent is in decimal degrees output_extent = gdc.project_extent(gdc.Extent(output_extent), gdc.epsg_osr(4326), daymet_osr, 0.001) output_extent = gdc.intersect_extents([daymet_extent, output_extent]) output_extent.adjust_to_snap('EXPAND', daymet_x, daymet_y, daymet_cs) output_geo = output_extent.geo(daymet_cs) logging.debug(' Geo: {}'.format(output_geo)) logging.debug(' Extent: {}'.format(output_extent)) elif extent_path is not None: logging.info('\nComputing subset extent & geo') output_extent = gdc.project_extent( gdc.raster_path_extent(extent_path), gdc.raster_path_osr(extent_path), daymet_osr, gdc.raster_path_cellsize(extent_path, x_only=True)) output_extent = gdc.intersect_extents([daymet_extent, output_extent]) output_extent.adjust_to_snap('EXPAND', daymet_x, daymet_y, daymet_cs) output_geo = output_extent.geo(daymet_cs) logging.debug(' Geo: {}'.format(output_geo)) logging.debug(' Extent: {}'.format(output_extent)) else: output_extent = daymet_extent.copy() output_geo = daymet_geo[:] output_shape = output_extent.shape(cs=daymet_cs) xi, yi = gdc.array_geo_offsets(daymet_geo, output_geo, daymet_cs) output_rows, output_cols = output_extent.shape(daymet_cs) logging.debug(' Shape: {} {}'.format(output_rows, output_cols)) logging.debug(' Offsets: {} {} (x y)'.format(xi, yi)) # Process each variable for input_var in var_list: logging.info("\nVariable: {}".format(input_var)) # Rename variables to match cimis if input_var == 'prcp': output_var = 'ppt' else: output_var = input_var logging.debug("Output name: {}".format(output_var)) # Build output folder var_ws = os.path.join(output_ws, output_var) if not os.path.isdir(var_ws): os.makedirs(var_ws) # Build output arrays logging.debug(' Building arrays') if daily_flag: daily_sum = np.full((365, output_shape[0], output_shape[1]), 0, np.float64) daily_count = np.full((365, output_shape[0], output_shape[1]), 0, np.uint8) if monthly_flag: monthly_sum = np.full((12, output_shape[0], output_shape[1]), 0, np.float64) monthly_count = np.full((12, output_shape[0], output_shape[1]), 0, np.uint8) if monthly_flag: annual_sum = np.full((output_shape[0], output_shape[1]), 0, np.float64) annual_count = np.full((output_shape[0], output_shape[1]), 0, np.uint8) # Process each file/year separately for input_name in sorted(os.listdir(netcdf_ws)): logging.debug(" {}".format(input_name)) input_match = daymet_re.match(input_name) if not input_match: logging.debug(' Regular expression didn\'t match, skipping') continue elif input_match.group('VAR') != input_var: logging.debug(' Variable didn\'t match, skipping') continue year_str = input_match.group('YEAR') logging.info(" Year: {}".format(year_str)) year_int = int(year_str) year_days = int(dt.datetime(year_int, 12, 31).strftime('%j')) if start_dt is not None and year_int < start_dt.year: logging.debug(' Before start date, skipping') continue elif end_dt is not None and year_int > end_dt.year: logging.debug(' After end date, skipping') continue # Build input file path input_raster = os.path.join(netcdf_ws, input_name) if not os.path.isfile(input_raster): logging.debug( ' Input raster doesn\'t exist, skipping {}'.format( input_raster)) continue # Build output folder if daily_flag: daily_ws = os.path.join(var_ws, 'daily') if not os.path.isdir(daily_ws): os.makedirs(daily_ws) if monthly_flag: monthly_temp_sum = np.full( (12, output_shape[0], output_shape[1]), 0, np.float64) monthly_temp_count = np.full( (12, output_shape[0], output_shape[1]), 0, np.uint8) # Read in the DAYMET NetCDF file input_nc_f = netCDF4.Dataset(input_raster, 'r') # logging.debug(input_nc_f.variables) # Check all valid dates in the year year_dates = date_range(dt.datetime(year_int, 1, 1), dt.datetime(year_int + 1, 1, 1)) for date_dt in year_dates: logging.debug(' {}'.format(date_dt.date())) # if start_dt is not None and date_dt < start_dt: # logging.debug( # ' {} - before start date, skipping'.format( # date_dt.date())) # continue # elif end_dt is not None and date_dt > end_dt: # logging.debug(' {} - after end date, skipping'.format( # date_dt.date())) # continue # else: # logging.info(' {}'.format(date_dt.date())) doy = int(date_dt.strftime('%j')) doy_i = range(1, year_days + 1).index(doy) month_i = date_dt.month - 1 # Arrays are being read as masked array with a -9999 fill value # Convert to basic numpy array arrays with nan values try: input_ma = input_nc_f.variables[input_var][doy_i, yi:yi + output_rows, xi:xi + output_cols] except IndexError: logging.info(' date not in netcdf, skipping') continue input_nodata = float(input_ma.fill_value) output_array = input_ma.data.astype(np.float32) output_array[output_array == input_nodata] = np.nan output_mask = np.isfinite(output_array) # Convert Kelvin to Celsius if input_var in ['tmax', 'tmin']: output_array -= 273.15 # Save values if daily_flag: daily_sum[doy_i, :, :] += output_array daily_count[doy_i, :, :] += output_mask if monthly_flag: monthly_temp_sum[month_i, :, :] += output_array monthly_temp_count[month_i, :, :] += output_mask if annual_flag: annual_sum[:, :] += output_array annual_count[:, :] += output_mask # Cleanup # del input_ds, input_array del input_ma, output_array, output_mask # Compute mean monthly for the year if monthly_flag: # Sum precipitation if input_var == 'prcp': monthly_sum += monthly_temp_sum else: monthly_sum += monthly_temp_sum / monthly_temp_count # Is this the right count? monthly_count += np.any(monthly_temp_count, axis=0) del monthly_temp_sum, monthly_temp_count input_nc_f.close() del input_nc_f # Save the projected climatology arrays if daily_flag: for doy_i in range(daily_sum.shape[0]): daily_name = daily_fmt.format(var=output_var, start=start_year, end=end_year, doy=doy_i + 1) daily_path = os.path.join(daily_ws, daily_name) gdc.array_to_raster(daily_sum[doy_i, :, :] / daily_count[doy_i, :, :], daily_path, output_geo=output_geo, output_proj=daymet_proj, stats_flag=stats_flag) del daily_sum, daily_count if monthly_flag: for month_i in range(monthly_sum.shape[0]): monthly_name = monthly_fmt.format(var=output_var, start=start_year, end=end_year, month=month_i + 1) monthly_path = os.path.join(var_ws, monthly_name) gdc.array_to_raster(monthly_sum[month_i, :, :] / monthly_count[month_i, :, :], monthly_path, output_geo=output_geo, output_proj=daymet_proj, stats_flag=stats_flag) del monthly_sum, monthly_count if annual_flag: annual_name = annual_fmt.format(var=output_var, start=start_year, end=end_year) annual_path = os.path.join(var_ws, annual_name) gdc.array_to_raster(annual_sum / annual_count, annual_path, output_geo=output_geo, output_proj=daymet_proj, stats_flag=stats_flag) del annual_sum, annual_count logging.debug('\nScript Complete')
def main(ini_path, tile_list=None, overwrite_flag=False, mp_procs=1): """Prep Landsat path/row specific data Args: ini_path (str): file path of the input parameters file tile_list (list): list of Landsat path/row (i.e. [p45r43, p45r33]) This will override the tile list in the INI file overwrite_flag (bool): if True, overwrite existing files mp_procs (int): number of cores to use Returns: None """ logging.info('\nPrepare path/row data') # Open config file config = python_common.open_ini(ini_path) # Get input parameters logging.debug(' Reading Input File') year = config.getint('INPUTS', 'year') if tile_list is None: tile_list = python_common.read_param('tile_list', [], config, 'INPUTS') project_ws = config.get('INPUTS', 'project_folder') logging.debug(' Year: {}'.format(year)) logging.debug(' Path/rows: {}'.format(', '.join(tile_list))) logging.debug(' Project: {}'.format(project_ws)) # study_area_path = config.get('INPUTS', 'study_area_path') footprint_path = config.get('INPUTS', 'footprint_path') # For now, assume the UTM zone file is colocated with the footprints shapefile utm_path = python_common.read_param( 'utm_path', os.path.join(os.path.dirname(footprint_path), 'wrs2_tile_utm_zones.json'), config, 'INPUTS') skip_list_path = python_common.read_param('skip_list_path', '', config, 'INPUTS') landsat_flag = python_common.read_param('landsat_flag', True, config, 'INPUTS') ledaps_flag = False dem_flag = python_common.read_param('dem_flag', True, config, 'INPUTS') nlcd_flag = python_common.read_param('nlcd_flag', True, config, 'INPUTS') cdl_flag = python_common.read_param('cdl_flag', False, config, 'INPUTS') landfire_flag = python_common.read_param('landfire_flag', False, config, 'INPUTS') field_flag = python_common.read_param('field_flag', False, config, 'INPUTS') tile_gcs_buffer = python_common.read_param('tile_buffer', 0.25, config) # Input/output folder and file paths if landsat_flag: landsat_input_ws = config.get('INPUTS', 'landsat_input_folder') else: landsat_input_ws = None # if ledaps_flag: # ledaps_input_ws = config.get('INPUTS', 'ledaps_input_folder') # else: # ledaps_input_ws = None if dem_flag: dem_input_ws = config.get('INPUTS', 'dem_input_folder') dem_tile_fmt = config.get('INPUTS', 'dem_tile_fmt') dem_output_ws = config.get('INPUTS', 'dem_output_folder') dem_output_name = python_common.read_param('dem_output_name', 'dem.img', config) # dem_output_name = config.get('INPUTS', 'dem_output_name') else: dem_input_ws, dem_tile_fmt = None, None dem_output_ws, dem_output_name = None, None if nlcd_flag: nlcd_input_path = config.get('INPUTS', 'nlcd_input_path') nlcd_output_ws = config.get('INPUTS', 'nlcd_output_folder') nlcd_output_fmt = python_common.read_param('nlcd_output_fmt', 'nlcd_{:04d}.img', config) else: nlcd_input_path, nlcd_output_ws, nlcd_output_fmt = None, None, None if cdl_flag: cdl_input_path = config.get('INPUTS', 'cdl_input_path') cdl_ag_list = config.get('INPUTS', 'cdl_ag_list') cdl_ag_list = list(python_common.parse_int_set(cdl_ag_list)) # default_cdl_ag_list = range(1,62) + range(66,78) + range(204,255) # cdl_ag_list = python_common.read_param( # 'cdl_ag_list', default_cdl_ag_list, config) # cdl_ag_list = list(map(int, cdl_ag_list)) # cdl_non_ag_list = python_common.read_param( # 'cdl_non_ag_list', [], config) cdl_output_ws = config.get('INPUTS', 'cdl_output_folder') cdl_output_fmt = python_common.read_param('cdl_output_fmt', 'cdl_{:04d}.img', config) cdl_ag_output_fmt = python_common.read_param('cdl_ag_output_fmt', 'cdl_ag_{:04d}.img', config) else: cdl_input_path, cdl_ag_list = None, None cdl_output_ws, cdl_output_fmt, cdl_ag_output_fmt = None, None, None if landfire_flag: landfire_input_path = config.get('INPUTS', 'landfire_input_path') landfire_ag_list = config.get('INPUTS', 'landfire_ag_list') landfire_ag_list = list(python_common.parse_int_set(landfire_ag_list)) # default_landfire_ag_list = range(3960,4000) # landfire_ag_list = python_common.read_param( # 'landfire_ag_list', default_landfire_ag_list, config) # landfire_ag_list = list(map(int, landfire_ag_list)) landfire_output_ws = config.get('INPUTS', 'landfire_output_folder') landfire_output_fmt = python_common.read_param('landfire_output_fmt', 'landfire_{:04d}.img', config) landfire_ag_output_fmt = python_common.read_param( 'landfire_ag_output_fmt', 'landfire_ag_{:04d}.img', config) else: landfire_input_path, landfire_ag_list = None, None landfire_output_ws = None landfire_output_fmt, landfire_ag_output_fmt = None, None if field_flag: field_input_path = config.get('INPUTS', 'field_input_path') field_output_ws = config.get('INPUTS', 'field_output_folder') field_output_fmt = python_common.read_param('field_output_fmt', 'fields_{:04d}.img', config) else: field_input_path = None field_output_ws, field_output_fmt = None, None # File/folder names orig_data_folder_name = 'ORIGINAL_DATA' # Check inputs folders/paths logging.info('\nChecking input folders/files') file_check(footprint_path) file_check(utm_path) if landsat_flag: folder_check(landsat_input_ws) # if ledaps_flag: # folder_check(ledaps_input_ws) if dem_flag: folder_check(dem_input_ws) if nlcd_flag: file_check(nlcd_input_path) if cdl_flag: file_check(cdl_input_path) if landfire_flag: # Landfire will likely be an ESRI grid (set as a folder) if not (os.path.isdir(landfire_input_path) or os.path.isfile(landfire_input_path)): logging.error('\n {} does not exist'.format(landfire_input_path)) if field_flag: file_check(field_input_path) if skip_list_path: file_check(skip_list_path) # Build output folders if not os.path.isdir(project_ws): os.makedirs(project_ws) if dem_flag and not os.path.isdir(dem_output_ws): os.makedirs(dem_output_ws) if nlcd_flag and not os.path.isdir(nlcd_output_ws): os.makedirs(nlcd_output_ws) if cdl_flag and not os.path.isdir(cdl_output_ws): os.makedirs(cdl_output_ws) if landfire_flag and not os.path.isdir(landfire_output_ws): os.makedirs(landfire_output_ws) if field_flag and not os.path.isdir(field_output_ws): os.makedirs(field_output_ws) # For now assume path/row are two digit numbers tile_fmt = 'p{:03d}r{:03d}' tile_re = re.compile('p(\d{3})r(\d{3})') image_re = re.compile( '^(LT04|LT05|LE07|LC08)_(\d{3})(\d{3})_(\d{4})(\d{2})(\d{2})') snap_cs = 30 snap_xmin, snap_ymin = (15, 15) # Set snap environment parameters env = gdc.env env.cellsize = snap_cs env.snap_xmin, env.snap_ymin = snap_xmin, snap_ymin # Use WGSS84 (EPSG 4326) for GCS spatial reference # Could also use NAD83 (EPSG 4269) # gcs_epsg = 4326 # gcs_osr = epsg_osr(4326) # gcs_proj = osr_proj(gcs_osr) # Landsat Footprints (WRS2 Descending Polygons) logging.debug('\nFootprint (WRS2 descending should be GCS84):') tile_gcs_osr = gdc.feature_path_osr(footprint_path) logging.debug(' OSR: {}'.format(tile_gcs_osr)) # Doublecheck that WRS2 descending shapefile is GCS84 # if tile_gcs_osr != epsg_osr(4326): # logging.error(' WRS2 is not GCS84') # sys.exit() # Get geometry for each path/row tile_gcs_wkt_dict = path_row_wkt_func(footprint_path, path_field='PATH', row_field='ROW') # Get UTM zone for each path/row # DEADBEEF - Using "eval" is considered unsafe and should be changed tile_utm_zone_dict = eval(open(utm_path, 'r').read()) # Project study area geometry to GCS coordinates # logging.debug('\nStudy area') # study_area_geom = feature_path_geom_union(study_area_path) # study_area_gcs_geom = study_area_geom.Clone() # study_area_gcs_geom.TransformTo(tile_gcs_osr) # Get list of all intersecting Landsat path/rows # logging.info('\nLandsat path/rows') # tile_list = [] # for tile_name, tile_gcs_wkt in tile_gcs_wkt_dict.items(): # tile_gcs_geom = ogr.CreateGeometryFromWkt(tile_gcs_wkt) # if tile_gcs_geom.Intersects(study_area_gcs_geom): # tile_list.append(tile_name) # for tile_name in sorted(tile_list): # logging.debug(' {}'.format(tile_name)) # Check that each path/row extent and UTM zone exist logging.info('\nChecking path/row list against footprint shapefile') for tile_name in sorted(tile_list): if tile_name not in tile_gcs_wkt_dict.keys(): logging.error( ' {} feature not in footprint shapefile'.format(tile_name)) continue elif tile_name not in tile_utm_zone_dict.keys(): logging.error( ' {} UTM zone not in footprint shapefile'.format(tile_name)) continue elif tile_utm_zone_dict[tile_name] == 0: logging.error((' UTM zone is not set for {} in ' + 'footprint shapefile').format(tile_name)) continue # Build output folders for each path/row logging.info('\nBuilding path/row folders') for tile_name in tile_list: logging.debug(' {} {}'.format(year, tile_name)) tile_output_ws = os.path.join(project_ws, str(year), tile_name) if ((landsat_flag or ledaps_flag) and not os.path.isdir(tile_output_ws)): os.makedirs(tile_output_ws) if (dem_flag and not os.path.isdir(os.path.join(dem_output_ws, tile_name))): os.makedirs(os.path.join(dem_output_ws, tile_name)) if (nlcd_flag and not os.path.isdir(os.path.join(nlcd_output_ws, tile_name))): os.makedirs(os.path.join(nlcd_output_ws, tile_name)) if (cdl_flag and not os.path.isdir(os.path.join(cdl_output_ws, tile_name))): os.makedirs(os.path.join(cdl_output_ws, tile_name)) if (landfire_flag and not os.path.isdir( os.path.join(landfire_output_ws, tile_name))): os.makedirs(os.path.join(landfire_output_ws, tile_name)) if (field_flag and not os.path.isdir(os.path.join(field_output_ws, tile_name))): os.makedirs(os.path.join(field_output_ws, tile_name)) # Read skip list if (landsat_flag or ledaps_flag) and skip_list_path: logging.debug('\nReading scene skiplist') with open(skip_list_path) as skip_list_f: skip_list = skip_list_f.readlines() skip_list = [ scene.strip() for scene in skip_list if image_re.match(scene.strip()) ] else: logging.debug('\nSkip list not set in INI') skip_list = [] # Copy and unzip raw Landsat scenes # Use these for thermal band, MTL file (scene time), and to run FMask if landsat_flag: logging.info('\nExtract raw Landsat scenes') # Process each path/row extract_targz_list = [] for tile_name in tile_list: tile_output_ws = os.path.join(project_ws, str(year), tile_name) # Force path/row as strings without leading zeros path, row = map(str, map(int, tile_re.match(tile_name).groups())) tile_input_ws = os.path.join(landsat_input_ws, path, row, str(year)) if not os.path.isdir(tile_input_ws): continue logging.info(' {} {}'.format(year, tile_name)) # Process each tar.gz file for input_name in sorted(os.listdir(tile_input_ws)): if (not image_re.match(input_name) and not input_name.endswith('.tar.gz')): continue # Get Landsat scene ID from tar.gz file name # DEADBEEF - For now this is the EE scene ID, but it could be # changed to the full collection 1 ID scene_id = input_name.split('.')[0] # Output workspace image_output_ws = os.path.join(tile_output_ws, scene_id) orig_data_ws = os.path.join(image_output_ws, orig_data_folder_name) if skip_list and scene_id in skip_list: logging.debug(' {} - Skipping scene'.format(scene_id)) # DEADBEEF - Should the script always remove the scene # if it is in the skip list? # Maybe only if overwrite is set? if os.path.isdir(image_output_ws): # input('Press ENTER to delete {}'.format(scene_id)) shutil.rmtree(image_output_ws) continue # If orig_data_ws doesn't exist, don't check images if not os.path.isdir(orig_data_ws): os.makedirs(orig_data_ws) elif (not overwrite_flag and landsat_files_check(image_output_ws)): continue # Extract Landsat tar.gz file input_path = os.path.join(tile_input_ws, input_name) if mp_procs > 1: extract_targz_list.append([input_path, orig_data_ws]) else: python_common.extract_targz_func(input_path, orig_data_ws) # # Use a command line call # input_path = os.path.join(tile_input_ws, input_name) # if job_i % pbs_jobs != 0: # job_list.append('tar -zxvf {} -C {} &\n'.format( # input_path, orig_data_ws)) # else: # job_list.append('tar -zxvf {} -C {}\n'.format( # input_path, orig_data_ws)) # # job_list.append('tar -zxvf {} -C {} &\n'.format( # # input_path, orig_data_ws)) # # job_list.append('wait\n') # job_i += 1 # Extract Landsat tar.gz files using multiprocessing if extract_targz_list: pool = mp.Pool(mp_procs) results = pool.map(python_common.extract_targz_mp, extract_targz_list, chunksize=1) pool.close() pool.join() del results, pool # Get projected extent for each path/row # This should probably be in a function if (dem_flag or nlcd_flag or cdl_flag or landfire_flag or field_flag): tile_utm_extent_dict = gcs_to_utm_dict(tile_list, tile_utm_zone_dict, tile_gcs_osr, tile_gcs_wkt_dict, tile_gcs_buffer, snap_xmin, snap_ymin, snap_cs) # Mosaic DEM tiles for each path/row if dem_flag: logging.info('\nBuild DEM for each path/row') mosaic_mp_list = [] for tile_name in tile_list: # Output folder and path tile_output_path = os.path.join(dem_output_ws, tile_name, dem_output_name) if not overwrite_flag and os.path.isfile(tile_output_path): logging.debug(' {} already exists, skipping'.format( os.path.basename(tile_output_path))) continue logging.info(' {}'.format(tile_name)) # Get the path/row geometry in GCS for selecting intersecting tiles tile_gcs_geom = ogr.CreateGeometryFromWkt( tile_gcs_wkt_dict[tile_name]) # Apply a small buffer (in degrees) to the extent # DEADBEEF - Buffer fails if GDAL is not built with GEOS support # tile_gcs_geom = tile_gcs_geom.Buffer(tile_gcs_buffer) tile_gcs_extent = gdc.Extent(tile_gcs_geom.GetEnvelope()) tile_gcs_extent = tile_gcs_extent.ogrenv_swap() tile_gcs_extent.buffer_extent(tile_gcs_buffer) # tile_gcs_extent.ymin, tile_gcs_extent.xmax = tile_gcs_extent.xmax, tile_gcs_extent.ymin # Offsets are needed since tile name is upper left corner of tile # Tile n36w120 spans -120 <-> -119 and 35 <-> 36 lon_list = range( int(tile_gcs_extent.xmin) - 1, int(tile_gcs_extent.xmax)) lat_list = range( int(tile_gcs_extent.ymin) + 1, int(tile_gcs_extent.ymax) + 2) # Get list of DEM tile rasters dem_tile_list = [] for lat, lon in itertools.product(lat_list, lon_list): # Convert sign of lat/lon to letter lat = ('n' + '{:02d}'.format(abs(lat)) if lat >= 0 else 's' + '{:02d}'.format(abs(lat))) lon = ('w' + '{:03d}'.format(abs(lon)) if lon < 0 else 'e' + '{:03d}'.format(abs(lon))) dem_tile_path = os.path.join(dem_input_ws, dem_tile_fmt.format(lat, lon)) if os.path.isfile(dem_tile_path): dem_tile_list.append(dem_tile_path) if not dem_tile_list: logging.warning(' WARNING: No DEM tiles were selected') continue # Mosaic tiles using mosaic function tile_utm_osr = gdc.epsg_osr(32600 + int(tile_utm_zone_dict[tile_name])) tile_utm_proj = gdc.epsg_proj(32600 + int(tile_utm_zone_dict[tile_name])) tile_utm_extent = tile_utm_extent_dict[tile_name] tile_utm_ullr = tile_utm_extent.ul_lr_swap() # Mosaic, clip, project using custom function if mp_procs > 1: mosaic_mp_list.append([ dem_tile_list, tile_output_path, tile_utm_proj, snap_cs, tile_utm_extent ]) else: gdc.mosaic_tiles(dem_tile_list, tile_output_path, tile_utm_osr, snap_cs, tile_utm_extent) # Cleanup del tile_output_path del tile_gcs_geom, tile_gcs_extent, tile_utm_extent del tile_utm_osr, tile_utm_proj del lon_list, lat_list, dem_tile_list # Mosaic DEM rasters using multiprocessing if mosaic_mp_list: pool = mp.Pool(mp_procs) results = pool.map(mosaic_tiles_mp, mosaic_mp_list, chunksize=1) pool.close() pool.join() del results, pool # Project/clip NLCD for each path/row if nlcd_flag: logging.info('\nBuild NLCD for each path/row') project_mp_list = [] for tile_name in tile_list: nlcd_output_path = os.path.join(nlcd_output_ws, tile_name, nlcd_output_fmt.format(year)) if not overwrite_flag and os.path.isfile(nlcd_output_path): logging.debug(' {} already exists, skipping'.format( os.path.basename(nlcd_output_path))) continue logging.info(' {}'.format(tile_name)) # Set the nodata value on the NLCD raster if it is not set nlcd_ds = gdal.Open(nlcd_input_path, 0) nlcd_band = nlcd_ds.GetRasterBand(1) nlcd_nodata = nlcd_band.GetNoDataValue() nlcd_ds = None if nlcd_nodata is None: nlcd_nodata = 255 # Clip and project tile_utm_osr = gdc.epsg_osr(32600 + int(tile_utm_zone_dict[tile_name])) tile_utm_proj = gdc.epsg_proj(32600 + int(tile_utm_zone_dict[tile_name])) tile_utm_extent = tile_utm_extent_dict[tile_name] tile_utm_ullr = tile_utm_extent.ul_lr_swap() if mp_procs > 1: project_mp_list.append([ nlcd_input_path, nlcd_output_path, gdal.GRA_NearestNeighbour, tile_utm_proj, snap_cs, tile_utm_extent, nlcd_nodata ]) else: gdc.project_raster(nlcd_input_path, nlcd_output_path, gdal.GRA_NearestNeighbour, tile_utm_osr, snap_cs, tile_utm_extent, nlcd_nodata) # Cleanup del nlcd_output_path del nlcd_ds, nlcd_band, nlcd_nodata del tile_utm_osr, tile_utm_proj, tile_utm_extent # Project NLCD rasters using multiprocessing if project_mp_list: pool = mp.Pool(mp_procs) results = pool.map(gdc.project_raster_mp, project_mp_list, chunksize=1) pool.close() pool.join() del results, pool # Project/clip CDL for each path/row if cdl_flag: logging.info('\nBuild CDL for each path/row') project_mp_list, remap_mp_list = [], [] for tile_name in tile_list: cdl_output_path = os.path.join(cdl_output_ws, tile_name, cdl_output_fmt.format(year)) cdl_ag_output_path = os.path.join(cdl_output_ws, tile_name, cdl_ag_output_fmt.format(year)) if not os.path.isfile(cdl_input_path): logging.error('\n\n {} does not exist'.format(cdl_input_path)) sys.exit() if not overwrite_flag and os.path.isfile(cdl_output_path): logging.debug(' {} already exists, skipping'.format( os.path.basename(cdl_output_path))) continue logging.info(' {}'.format(tile_name)) # Set the nodata value on the CDL raster if it is not set cdl_ds = gdal.Open(cdl_input_path, 0) cdl_band = cdl_ds.GetRasterBand(1) cdl_nodata = cdl_band.GetNoDataValue() cdl_ds = None if cdl_nodata is None: cdl_nodata = 255 # Clip and project tile_utm_osr = gdc.epsg_osr(32600 + int(tile_utm_zone_dict[tile_name])) tile_utm_proj = gdc.epsg_proj(32600 + int(tile_utm_zone_dict[tile_name])) tile_utm_extent = tile_utm_extent_dict[tile_name] if mp_procs > 1: project_mp_list.append([ cdl_input_path, cdl_output_path, gdal.GRA_NearestNeighbour, tile_utm_proj, snap_cs, tile_utm_extent, cdl_nodata ]) remap_mp_list.append( [cdl_output_path, cdl_ag_output_path, cdl_ag_list]) else: gdc.project_raster(cdl_input_path, cdl_output_path, gdal.GRA_NearestNeighbour, tile_utm_osr, snap_cs, tile_utm_extent, cdl_nodata) # Build a mask of CDL ag lands remap_mask_func(cdl_output_path, cdl_ag_output_path, cdl_ag_list) # Cleanup del cdl_output_path del cdl_ds, cdl_band, cdl_nodata del tile_utm_osr, tile_utm_proj, tile_utm_extent # Project CDL rasters using multiprocessing if project_mp_list: pool = mp.Pool(mp_procs) results = pool.map(gdc.project_raster_mp, project_mp_list, chunksize=1) pool.close() pool.join() del results, pool if remap_mp_list: pool = mp.Pool(mp_procs) results = pool.map(remap_mask_mp, remap_mp_list, chunksize=1) pool.close() pool.join() del results, pool # Project/clip LANDFIRE for each path/row if landfire_flag: logging.info('\nBuild LANDFIRE for each path/row') project_mp_list, remap_mp_list = [], [] for tile_name in tile_list: landfire_output_path = os.path.join( landfire_output_ws, tile_name, landfire_output_fmt.format(year)) landfire_ag_output_path = os.path.join( landfire_output_ws, tile_name, landfire_ag_output_fmt.format(year)) if not overwrite_flag and os.path.isfile(landfire_output_path): logging.debug(' {} already exists, skipping'.format( os.path.basename(landfire_output_path))) continue logging.info(' {}'.format(tile_name)) # Set the nodata value on the LANDFIRE raster if it is not set # landfire_ds = gdal.Open(landfire_input_path, 0) # landfire_band = landfire_ds.GetRasterBand(1) # landfire_nodata = landfire_band.GetNoDataValue() # landfire_ds = None # if landfire_nodata is None: # landfire_nodata = 32767 # del landfire_ds, landfire_band landfire_nodata = 32767 # Clip and project tile_utm_osr = gdc.epsg_osr(32600 + int(tile_utm_zone_dict[tile_name])) tile_utm_proj = gdc.epsg_proj(32600 + int(tile_utm_zone_dict[tile_name])) tile_utm_extent = tile_utm_extent_dict[tile_name] if mp_procs > 1: project_mp_list.append([ landfire_input_path, landfire_output_path, gdal.GRA_NearestNeighbour, tile_utm_proj, snap_cs, tile_utm_extent, landfire_nodata ]) remap_mp_list.append([ landfire_output_path, landfire_ag_output_path, landfire_ag_list ]) else: gdc.project_raster(landfire_input_path, landfire_output_path, gdal.GRA_NearestNeighbour, tile_utm_osr, snap_cs, tile_utm_extent, landfire_nodata) # Build a mask of LANDFIRE ag lands remap_mask_func(landfire_output_path, landfire_ag_output_path, landfire_ag_list) # Cleanup del landfire_output_path del tile_utm_osr, tile_utm_proj, tile_utm_extent # Project LANDFIRE rasters using multiprocessing if project_mp_list: pool = mp.Pool(mp_procs) results = pool.map(gdc.project_raster_mp, project_mp_list, chunksize=1) pool.close() pool.join() del results, pool if remap_mp_list: pool = mp.Pool(mp_procs) results = pool.map(remap_mask_mp, remap_mp_list, chunksize=1) pool.close() pool.join() del results, pool # Convert field shapefiles to raster if field_flag: logging.info('\nBuild field rasters for each path/row') for tile_name in tile_list: logging.info(' {}'.format(tile_name)) tile_output_ws = os.path.join(field_output_ws, tile_name) # Shapefile paths field_proj_name = ( os.path.splitext(field_output_fmt.format(year))[0] + "_wgs84z{}.shp".format(tile_utm_zone_dict[tile_name])) field_proj_path = os.path.join(tile_output_ws, field_proj_name) field_output_path = os.path.join(tile_output_ws, field_output_fmt.format(year)) if not overwrite_flag and os.path.isfile(field_output_path): logging.debug(' {} already exists, skipping'.format( os.path.basename(field_output_path))) continue # The ogr2ogr spatial query is in the input spatial reference # Project the path/row extent to the field osr/proj field_input_osr = gdc.feature_path_osr(field_input_path) tile_utm_osr = gdc.epsg_osr(32600 + int(tile_utm_zone_dict[tile_name])) # field_input_proj = gdc.osr_proj(field_input_osr) # tile_utm_proj = gdc.osr_proj(tile_utm_osr) field_tile_extent = gdc.project_extent( tile_utm_extent_dict[tile_name], tile_utm_osr, field_input_osr, 30) # Project shapefile to the path/row zone # Clipping requires GDAL to be built with GEOS support subprocess.call( [ 'ogr2ogr', '-t_srs', 'EPSG:326{}'.format( tile_utm_zone_dict[tile_name]), '-f', 'ESRI Shapefile', '-overwrite' ] + ['-spat'] + list(map(str, field_tile_extent)) + ['-clipdst'] + list(map(str, tile_utm_extent_dict[tile_name])) + # ['-clipdst'] + list(map(str, tile_utm_extent_dict[tile_name])) + # ['-clipsrc'] + list(map(str, field_tile_extent)) + # ['-clipsrc'] + list(map(str, field_tile_extent)) + [field_proj_path, field_input_path]) # Convert shapefile to raster field_mem_ds = gdc.polygon_to_raster_ds( field_proj_path, nodata_value=0, burn_value=1, output_osr=tile_utm_osr, output_extent=tile_utm_extent_dict[tile_name]) field_output_driver = gdc.raster_driver(field_output_path) if field_output_path.lower().endswith('.img'): field_output_ds = field_output_driver.CreateCopy( field_output_path, field_mem_ds, 0, ['COMPRESS=YES']) else: field_output_ds = field_output_driver.CreateCopy( field_output_path, field_mem_ds, 0) field_output_ds, field_mem_ds = None, None # Remove field shapefile # try: # remove_file(field_proj_path) # except: # pass # Cleanup del tile_utm_osr, field_tile_extent, field_input_osr # del tile_utm_proj, field_input_proj del field_proj_name, field_proj_path, field_output_path logging.debug('\nScript complete')