def gcs_to_utm_dict(tile_list, tile_utm_zone_dict, tile_gcs_osr, tile_gcs_wkt_dict, gcs_buffer=0.25, snap_xmin=None, snap_ymin=None, snap_cs=None): """Return a dictionary of Landsat path/row GCS extents projected to UTM Parameters ---------- tile_list : list tile_utm_zone_dict : dict tile_gcs_osr : tile_gcs_wkt_dict : gcs_buffer : float, optional snap_xmin : float or None, optional snap_ymin : float or None, optional snap_cs : float or None, optional Returns ------- dict """ # If parameters are not set, try to get from env # if snap_xmin is None and env.snap_xmin: # snap_xmin = env.snap_xmin # if snap_ymin is None and env.snap_ymin: # snap_ymin = env.snap_ymin # if snap_cs is None and env.cellsize: # snap_cs = env.cellsize logging.info('\nCalculate projected extent for each path/row') output_dict = dict() for tile_name in sorted(tile_list): logging.info(' {}'.format(tile_name)) # Create an OSR object from the utm projection tile_utm_osr = drigo.epsg_osr(32600 + int(tile_utm_zone_dict[tile_name])) # tile_utm_proj = drigo.osr_proj(tile_utm_osr) # Create utm transformation tile_utm_tx = osr.CoordinateTransformation(tile_gcs_osr, tile_utm_osr) tile_gcs_geom = ogr.CreateGeometryFromWkt(tile_gcs_wkt_dict[tile_name]) # Buffer extent by 0.1 degrees # DEADBEEF - Buffer fails if GDAL is not built with GEOS support # tile_gcs_geom = tile_gcs_geom.Buffer(gcs_buffer) # Create gcs to utm transformer and apply it tile_utm_geom = tile_gcs_geom.Clone() tile_utm_geom.Transform(tile_utm_tx) tile_utm_extent = drigo.Extent(tile_utm_geom.GetEnvelope()) tile_utm_extent = tile_utm_extent.ogrenv_swap() # 0.1 degrees ~ 10 km tile_utm_extent.buffer_extent(gcs_buffer * 100000) tile_utm_extent.adjust_to_snap('EXPAND', snap_xmin, snap_ymin, snap_cs) output_dict[tile_name] = tile_utm_extent return output_dict
def main(img_ws=os.getcwd(), ancillary_ws=os.getcwd(), output_ws=os.getcwd(), etr_flag=False, eto_flag=False, start_date=None, end_date=None, extent_path=None, output_extent=None, stats_flag=True, overwrite_flag=False, use_cimis_eto_flag=False): """Compute daily ETr/ETo from CIMIS data Parameters ---------- img_ws : str Root folder of GRIDMET data. ancillary_ws : str Folder of ancillary rasters. output_ws : str Folder of output rasters. etr_flag : bool, optional If True, compute alfalfa reference ET (ETr). eto_flag : bool, optional If True, compute grass reference ET (ETo). start_date : str, optional ISO format date (YYYY-MM-DD). end_date : str, optional ISO format date (YYYY-MM-DD). extent_path : str, optional File path defining the output extent. output_extent : list, optional Decimal degrees values defining output extent. stats_flag : bool, optional If True, compute raster statistics (the default is True). overwrite_flag : bool, optional If True, overwrite existing files (the default is False). use_cimis_eto_flag : bool, optional If True, use CIMIS ETo raster if one of the component rasters is missing and ETo/ETr cannot be computed (te default is False). Returns ------- None """ logging.info('\nComputing CIMIS ETo/ETr') np.seterr(invalid='ignore') # Use CIMIS ETo raster directly instead of computing from components # Currently this will only be applied if one of the inputs is missing use_cimis_eto_flag = True # Compute ETr and/or ETo if not etr_flag and not eto_flag: logging.info(' ETo/ETr flag(s) not set, defaulting to ETr') etr_flag = True # If a date is not set, process 2017 try: start_dt = dt.datetime.strptime(start_date, '%Y-%m-%d') logging.debug(' Start date: {}'.format(start_dt)) except: start_dt = dt.datetime(2017, 1, 1) logging.info(' Start date: {}'.format(start_dt)) try: end_dt = dt.datetime.strptime(end_date, '%Y-%m-%d') logging.debug(' End date: {}'.format(end_dt)) except: end_dt = dt.datetime(2017, 12, 31) logging.info(' End date: {}'.format(end_dt)) etr_folder = 'etr' eto_folder = 'eto' etr_fmt = 'etr_{}_daily_cimis.img' eto_fmt = 'eto_{}_daily_cimis.img' # DEM for air pressure calculation mask_raster = os.path.join(ancillary_ws, 'cimis_mask.img') dem_raster = os.path.join(ancillary_ws, 'cimis_elev.img') lat_raster = os.path.join(ancillary_ws, 'cimis_lat.img') # lon_raster = os.path.join(ancillary_ws, 'cimis_lon.img') # Interpolate zero windspeed pixels # interpolate_zero_u2_flag = False # Interpolate edge and coastal cells # interpolate_edge_flag = False # Resample type # 0 = GRA_NearestNeighbour, Nearest neighbour (select on one input pixel) # 1 = GRA_Bilinear,Bilinear (2x2 kernel) # 2 = GRA_Cubic, Cubic Convolution Approximation (4x4 kernel) # 3 = GRA_CubicSpline, Cubic B-Spline Approximation (4x4 kernel) # 4 = GRA_Lanczos, Lanczos windowed sinc interpolation (6x6 kernel) # 5 = GRA_Average, Average (computes the average of all non-NODATA contributing pixels) # 6 = GRA_Mode, Mode (selects the value which appears most often of all the sampled points) resample_type = gdal.GRA_CubicSpline # Wind speed is measured at 2m zw = 2 # Output workspaces etr_ws = os.path.join(output_ws, etr_folder) eto_ws = os.path.join(output_ws, eto_folder) if etr_flag and not os.path.isdir(etr_ws): os.makedirs(etr_ws) if eto_flag and not os.path.isdir(eto_ws): os.makedirs(eto_ws) # Check ETr/ETo functions test_flag = False # Check that the daily_refet_func produces the correct values if test_flag: doy_test = 245 elev_test = 1050.0 lat_test = 39.9396 * math.pi / 180 tmin_test = 11.07 tmax_test = 34.69 rs_test = 22.38 u2_test = 1.94 zw_test = 2.5 tdew_test = -3.22 ea_test = refet.calcs.saturation_vapor_pressure_func(tdew_test) pair_test = 101.3 * np.power((285 - 0.0065 * elev_test) / 285, 5.26) q_test = 0.622 * ea_test / (pair_test - (0.378 * ea_test)) refet_obj = refet.Daily(tmin=tmin_test, tmax=tmax_test, q=q_test, rs=rs_test, u2=u2_test, zw=zw_test, elev=elev_test, doy=doy_test, lat=lat_test) etr = float(refet_obj.etr()) eto = float(refet_obj.eto()) print('ETr: 8.89', etr) print('ETo: 6.16', eto) sys.exit() # Get CIMIS grid properties from mask cimis_mask_ds = gdal.Open(mask_raster) cimis_osr = drigo.raster_ds_osr(cimis_mask_ds) cimis_proj = drigo.osr_proj(cimis_osr) cimis_cs = drigo.raster_ds_cellsize(cimis_mask_ds, x_only=True) cimis_extent = drigo.raster_ds_extent(cimis_mask_ds) cimis_full_geo = cimis_extent.geo(cimis_cs) cimis_x, cimis_y = cimis_extent.origin() cimis_mask_ds = None logging.debug(' Projection: {}'.format(cimis_proj)) logging.debug(' Cellsize: {}'.format(cimis_cs)) logging.debug(' Geo: {}'.format(cimis_full_geo)) logging.debug(' Extent: {}'.format(cimis_extent)) # Manually set CIMIS grid properties # cimis_extent = drigo.Extent((-400000, -650000, 600000, 454000)) # cimis_cs = 2000 # cimis_geo = drigo.extent_geo(cimis_extent, cellsize) # cimis_epsg = 3310 # NAD_1983_California_Teale_Albers # cimis_x, cimis_y = (0,0) # Subset data to a smaller extent if output_extent is not None: logging.info('\nComputing subset extent & geo') logging.debug(' Extent: {}'.format(output_extent)) cimis_extent = drigo.Extent(output_extent) cimis_extent.adjust_to_snap('EXPAND', cimis_x, cimis_y, cimis_cs) cimis_geo = cimis_extent.geo(cimis_cs) logging.debug(' Geo: {}'.format(cimis_geo)) logging.debug(' Extent: {}'.format(output_extent)) elif extent_path is not None: logging.info('\nComputing subset extent & geo') if extent_path.lower().endswith('.shp'): cimis_extent = drigo.feature_path_extent(extent_path) extent_osr = drigo.feature_path_osr(extent_path) extent_cs = None else: cimis_extent = drigo.raster_path_extent(extent_path) extent_osr = drigo.raster_path_osr(extent_path) extent_cs = drigo.raster_path_cellsize(extent_path, x_only=True) cimis_extent = drigo.project_extent(cimis_extent, extent_osr, cimis_osr, extent_cs) cimis_extent.adjust_to_snap('EXPAND', cimis_x, cimis_y, cimis_cs) cimis_geo = cimis_extent.geo(cimis_cs) logging.debug(' Geo: {}'.format(cimis_geo)) logging.debug(' Extent: {}'.format(cimis_extent)) else: cimis_geo = cimis_full_geo # Latitude lat_array = drigo.raster_to_array(lat_raster, mask_extent=cimis_extent, return_nodata=False) lat_array = lat_array.astype(np.float32) lat_array *= math.pi / 180 # Elevation data elev_array = drigo.raster_to_array(dem_raster, mask_extent=cimis_extent, return_nodata=False) elev_array = elev_array.astype(np.float32) # Process each year in the input workspace logging.info("") for year_str in sorted(os.listdir(img_ws)): logging.debug('{}'.format(year_str)) if not re.match('^\d{4}$', year_str): logging.debug(' Not a 4 digit year folder, skipping') continue year_ws = os.path.join(img_ws, year_str) year_int = int(year_str) # year_days = int(dt.datetime(year_int, 12, 31).strftime('%j')) if start_dt is not None and year_int < start_dt.year: logging.debug(' Before start date, skipping') continue elif end_dt is not None and year_int > end_dt.year: logging.debug(' After end date, skipping') continue logging.info('{}'.format(year_str)) # Output paths etr_raster = os.path.join(etr_ws, etr_fmt.format(year_str)) eto_raster = os.path.join(eto_ws, eto_fmt.format(year_str)) if etr_flag and (overwrite_flag or not os.path.isfile(etr_raster)): logging.debug(' {}'.format(etr_raster)) drigo.build_empty_raster(etr_raster, band_cnt=366, output_dtype=np.float32, output_proj=cimis_proj, output_cs=cimis_cs, output_extent=cimis_extent, output_fill_flag=True) if eto_flag and (overwrite_flag or not os.path.isfile(eto_raster)): logging.debug(' {}'.format(eto_raster)) drigo.build_empty_raster(eto_raster, band_cnt=366, output_dtype=np.float32, output_proj=cimis_proj, output_cs=cimis_cs, output_extent=cimis_extent, output_fill_flag=True) # Process each date in the year for date_str in sorted(os.listdir(year_ws)): logging.debug('{}'.format(date_str)) try: date_dt = dt.datetime.strptime(date_str, '%Y_%m_%d') except ValueError: logging.debug( ' Invalid folder date format (YYYY_MM_DD), skipping') continue if start_dt is not None and date_dt < start_dt: logging.debug(' Before start date, skipping') continue elif end_dt is not None and date_dt > end_dt: logging.debug(' After end date, skipping') continue logging.info(date_str) date_ws = os.path.join(year_ws, date_str) doy = int(date_dt.strftime('%j')) # Set file paths tmax_path = os.path.join(date_ws, 'Tx.img') tmin_path = os.path.join(date_ws, 'Tn.img') tdew_path = os.path.join(date_ws, 'Tdew.img') rso_path = os.path.join(date_ws, 'Rso.img') rs_path = os.path.join(date_ws, 'Rs.img') u2_path = os.path.join(date_ws, 'U2.img') eto_path = os.path.join(date_ws, 'ETo.img') # k_path = os.path.join(date_ws, 'K.img') # rnl_path = os.path.join(date_ws, 'Rnl.img') input_list = [ tmin_path, tmax_path, tdew_path, u2_path, rs_path, rso_path ] # If any input raster is missing, skip the day # Unless ETo is present (and use_cimis_eto_flag is True) day_skip_flag = False for t_path in input_list: if not os.path.isfile(t_path): logging.info(' {} is missing'.format(t_path)) day_skip_flag = True if (day_skip_flag and use_cimis_eto_flag and os.path.isfile(eto_path)): logging.info(' Using CIMIS ETo directly') eto_array = drigo.raster_to_array(eto_path, 1, cimis_extent, return_nodata=False) eto_array = eto_array.astype(np.float32) if not np.any(eto_array): logging.info(' {} is empty or missing'.format(eto_path)) logging.info(' Skipping date') continue # ETr if etr_flag: drigo.array_to_comp_raster(1.2 * eto_array, etr_raster, band=doy, stats_flag=False) # drigo.array_to_raster( # 1.2 * eto_array, etr_raster, # output_geo=cimis_geo, output_proj=cimis_proj, # stats_flag=stats_flag) # ETo if eto_flag: drigo.array_to_comp_raster(eto_array, eto_raster, band=doy, stats_flag=False) # drigo.array_to_raster( # eto_array, eto_raster, # output_geo=cimis_geo, output_proj=cimis_proj, # stats_flag=stats_flag) del eto_array continue elif not day_skip_flag: # Read in rasters tmin_array = drigo.raster_to_array(tmin_path, 1, cimis_extent, return_nodata=False) tmax_array = drigo.raster_to_array(tmax_path, 1, cimis_extent, return_nodata=False) tdew_array = drigo.raster_to_array(tdew_path, 1, cimis_extent, return_nodata=False) # rso_array = drigo.raster_to_array( # rso_path, 1, cimis_extent, return_nodata=False) rs_array = drigo.raster_to_array(rs_path, 1, cimis_extent, return_nodata=False) u2_array = drigo.raster_to_array(u2_path, 1, cimis_extent, return_nodata=False) # k_array = drigo.raster_to_array( # k_path, 1, cimis_extent, return_nodata=False) # rnl_array = drigo.raster_to_array( # rnl_path, 1, cimis_extent, return_nodata=False) # Check that all input arrays have data for t_name, t_array in [[tmin_path, tmin_array], [tmax_path, tmax_array], [tdew_path, tdew_array], [u2_path, u2_array], [rs_path, rs_array]]: if not np.any(t_array): logging.warning( ' {} is empty or missing'.format(t_name)) day_skip_flag = True if day_skip_flag: logging.warning(' Skipping date') continue # # DEADBEEF - Some arrays have a 500m cellsize # # i.e. 2011-07-25, 2010-01-01 -> 2010-07-27 # tmin_array = rescale_array_func(tmin_array, elev_array, 'tmin') # tmax_array = rescale_array_func(tmax_array, elev_array, 'tmax') # tdew_array = rescale_array_func(tdew_array, elev_array, 'tdew') # rso_array = rescale_array_func(rso_array, elev_array, 'rso') # rs_array = rescale_array_func(rs_array, elev_array, 'rs') # u2_array = rescale_array_func(u2_array, elev_array, 'u2') # # k_array = rescale_array_func(k_array, elev_array, 'k') # # rnl_array = rescale_array_func(rnl_array, elev_array, 'rnl') # Compute Ea from Tdew ea_array = refet.calcs.saturation_vapor_pressure_func( tdew_array) # # Calculate q from tdew by first calculating ea from tdew # ea_array = refet.calcs.saturation_vapor_pressure_func(tdew_array) # pair_array = refet.calcs.air_pressure_func(elev_array) # q_array = 0.622 * ea_array / (pair_array - (0.378 * ea_array)) # del es_array, pair_array, tdew_array # # Calculate rhmin/rhmax from tdew # ea_tmax = refet._calcs.saturation_vapor_pressure_func(tmax_array) # ea_tmin = refet._calcs.saturation_vapor_pressure_func(tmin_array) # rhmin = ea_tdew * 2 / (ea_tmax + ea_tmin); # rhmax = ea_tdew * 2 / (ea_tmax + ea_tmin); # del ea_tmax, ea_tmin refet_obj = refet.Daily(tmin=tmin_array, tmax=tmax_array, ea=ea_array, rs=rs_array, uz=u2_array, zw=zw, elev=elev_array, lat=lat_array, doy=doy, method='asce') # rso_type='ARRAY', rso=rso_array # ETr if etr_flag: drigo.array_to_comp_raster(refet_obj.etr().astype( np.float32), etr_raster, band=doy, stats_flag=False) # drigo.array_to_raster( # refet_obj.etr().astype(np.float32), etr_raster, # output_geo=cimis_geo, output_proj=cimis_proj, # stats_flag=stats_flag) # ETo if eto_flag: drigo.array_to_comp_raster(refet_obj.eto().astype( np.float32), eto_raster, band=doy, stats_flag=False) # drigo.array_to_raster( # refet_obj.eto().astype(np.float32), eto_raster, # output_geo=cimis_geo, output_proj=cimis_proj, # stats_flag=stats_flag) # Cleanup del tmin_array, tmax_array, u2_array, rs_array, ea_array # del rnl, rs, rso else: logging.info(' Skipping date') continue if stats_flag and etr_flag: drigo.raster_statistics(etr_raster) if stats_flag and eto_flag: drigo.raster_statistics(eto_raster) logging.debug('\nScript Complete')
def main(start_dt, end_dt, netcdf_ws, ancillary_ws, output_ws, variables=['etr', 'pr'], extent_path=None, output_extent=None, stats_flag=True, overwrite_flag=False): """Extract GRIDMET temperature Parameters ---------- start_dt : datetime Start date. end_dt : datetime End date. netcdf_ws : str Folder of GRIDMET netcdf files. ancillary_ws : str Folder of ancillary rasters. output_ws : str Folder of output rasters. variable : list, optional GRIDMET variables to download (the default is ['etr', 'pr']). Choices: 'eto', 'etr', 'pr', 'srad', 'sph', 'tmmn', 'tmmx', 'vs' Set as ['all'] to process all variables. extent_path : str, optional File path defining the output extent. output_extent : list, optional Decimal degrees values defining output extent. stats_flag : bool, optional If True, compute raster statistics (the default is True). overwrite_flag : bool, optional If True, overwrite existing files (the default is False). Returns ------- None """ logging.info('\nExtracting GRIDMET variables') logging.debug(' Start date: {}'.format(start_dt)) logging.debug(' End date: {}'.format(end_dt)) # Save GRIDMET lat, lon, and elevation arrays elev_raster = os.path.join(ancillary_ws, 'gridmet_elev.img') # GRIDMET rasters to extract data_full_list = ['eto', 'etr', 'pr', 'srad', 'sph', 'tmmn', 'tmmx', 'vs'] if not variables: logging.error('\nERROR: variables parameter is empty\n') sys.exit() elif type(variables) is not list: # DEADBEEF - I could try converting comma separated strings to lists? logging.warning('\nERROR: variables parameter must be a list\n') sys.exit() elif not set(variables).issubset(set(data_full_list)): logging.error('\nERROR: variables parameter is invalid\n {}'.format( variables)) sys.exit() output_fmt = '{}_{}_daily_gridmet.img' gridmet_re = re.compile('(?P<VAR>\w+)_(?P<YEAR>\d{4}).nc$') # GRIDMET band name dictionary gridmet_band_dict = dict() gridmet_band_dict['eto'] = 'potential_evapotranspiration' gridmet_band_dict['etr'] = 'potential_evapotranspiration' gridmet_band_dict['pr'] = 'precipitation_amount' gridmet_band_dict['srad'] = 'surface_downwelling_shortwave_flux_in_air' gridmet_band_dict['sph'] = 'specific_humidity' gridmet_band_dict['tmmn'] = 'air_temperature' gridmet_band_dict['tmmx'] = 'air_temperature' gridmet_band_dict['vs'] = 'wind_speed' # Get extent/geo from elevation raster gridmet_ds = gdal.Open(elev_raster) gridmet_osr = drigo.raster_ds_osr(gridmet_ds) gridmet_proj = drigo.osr_proj(gridmet_osr) gridmet_cs = drigo.raster_ds_cellsize(gridmet_ds, x_only=True) gridmet_extent = drigo.raster_ds_extent(gridmet_ds) gridmet_full_geo = gridmet_extent.geo(gridmet_cs) gridmet_x, gridmet_y = gridmet_extent.origin() gridmet_ds = None logging.debug(' Projection: {}'.format(gridmet_proj)) logging.debug(' Cellsize: {}'.format(gridmet_cs)) logging.debug(' Geo: {}'.format(gridmet_full_geo)) logging.debug(' Extent: {}'.format(gridmet_extent)) # Subset data to a smaller extent if output_extent is not None: logging.info('\nComputing subset extent & geo') logging.debug(' Extent: {}'.format(output_extent)) gridmet_extent = drigo.Extent(output_extent) gridmet_extent.adjust_to_snap( 'EXPAND', gridmet_x, gridmet_y, gridmet_cs) gridmet_geo = gridmet_extent.geo(gridmet_cs) logging.debug(' Geo: {}'.format(gridmet_geo)) logging.debug(' Extent: {}'.format(gridmet_extent)) elif extent_path is not None: logging.info('\nComputing subset extent & geo') if not os.path.isfile(extent_path): logging.error( '\nThe extent object does not exist, exiting\n' ' {}'.format(extent_path)) return False elif extent_path.lower().endswith('.shp'): gridmet_extent = drigo.feature_path_extent(extent_path) extent_osr = drigo.feature_path_osr(extent_path) extent_cs = None else: gridmet_extent = drigo.raster_path_extent(extent_path) extent_osr = drigo.raster_path_osr(extent_path) extent_cs = drigo.raster_path_cellsize(extent_path, x_only=True) gridmet_extent = drigo.project_extent( gridmet_extent, extent_osr, gridmet_osr, extent_cs) gridmet_extent.adjust_to_snap( 'EXPAND', gridmet_x, gridmet_y, gridmet_cs) gridmet_geo = gridmet_extent.geo(gridmet_cs) logging.debug(' Geo: {}'.format(gridmet_geo)) logging.debug(' Extent: {}'.format(gridmet_extent)) else: gridmet_geo = gridmet_full_geo # Get indices for slicing/clipping input arrays g_i, g_j = drigo.array_geo_offsets( gridmet_full_geo, gridmet_geo, cs=gridmet_cs) g_rows, g_cols = gridmet_extent.shape(cs=gridmet_cs) # Flip row indices since GRIDMET arrays are flipped up/down # Hard coding GRIDMET row count for now row_a, row_b = 585 - (g_j + g_rows), 585 - g_j, col_a, col_b = g_i, g_i + g_cols # Process each variable logging.info("") for input_var in variables: logging.info("\nVariable: {}".format(input_var)) # Rename variables to match cimis if input_var == 'pr': output_var = 'ppt' else: output_var = input_var # Build output folder var_ws = os.path.join(output_ws, output_var) if not os.path.isdir(var_ws): os.makedirs(var_ws) # Process each file in the input workspace for input_name in sorted(os.listdir(netcdf_ws)): input_match = gridmet_re.match(input_name) if not input_match: logging.debug("{}".format(input_name)) logging.debug(' Regular expression didn\'t match, skipping') continue elif input_match.group('VAR') != input_var: logging.debug("{}".format(input_name)) logging.debug(' Variable didn\'t match, skipping') continue else: logging.info("{}".format(input_name)) year_str = input_match.group('YEAR') logging.info(" {}".format(year_str)) year_int = int(year_str) year_days = int(dt.datetime(year_int, 12, 31).strftime('%j')) if start_dt is not None and year_int < start_dt.year: logging.debug(' Before start date, skipping') continue elif end_dt is not None and year_int > end_dt.year: logging.debug(' After end date, skipping') continue # Build input file path input_raster = os.path.join(netcdf_ws, input_name) # if not os.path.isfile(input_raster): # logging.debug( # ' Input NetCDF doesn\'t exist, skipping {}'.format( # input_raster)) # continue # Create a single raster for each year with 365 bands # Each day will be stored in a separate band output_path = os.path.join( var_ws, output_fmt.format(output_var, year_str)) logging.debug(' {}'.format(output_path)) if os.path.isfile(output_path): if not overwrite_flag: logging.debug(' File already exists, skipping') continue else: logging.debug(' File already exists, removing existing') os.remove(output_path) drigo.build_empty_raster( output_path, band_cnt=366, output_dtype=np.float32, output_proj=gridmet_proj, output_cs=gridmet_cs, output_extent=gridmet_extent, output_fill_flag=True) # Read in the GRIDMET NetCDF file # Immediately clip input array to save memory input_nc_f = netCDF4.Dataset(input_raster, 'r') input_nc = input_nc_f.variables[gridmet_band_dict[input_var]][ :, row_a: row_b, col_a: col_b].copy() input_nc = np.flip(input_nc, 1) input_nc_f.close() del input_nc_f # A numpy array is returned when slicing a masked array # if there are no masked pixels # This is a hack to force the numpy array back to a masked array if type(input_nc) != np.ma.core.MaskedArray: input_nc = np.ma.core.MaskedArray( input_nc, np.zeros(input_nc.shape, dtype=bool)) # Check all valid dates in the year year_dates = _utils.date_range( dt.datetime(year_int, 1, 1), dt.datetime(year_int + 1, 1, 1)) for date_dt in year_dates: if start_dt is not None and date_dt < start_dt: logging.debug(' {} - before start date, skipping'.format( date_dt.date())) continue elif end_dt is not None and date_dt > end_dt: logging.debug(' {} - after end date, skipping'.format( date_dt.date())) continue else: logging.info(' {}'.format(date_dt.date())) doy = int(date_dt.strftime('%j')) doy_i = range(1, year_days + 1).index(doy) # Arrays are read as masked array with a fill value of -9999 # Convert to basic numpy array arrays with nan values try: input_full_ma = input_nc[doy_i, :, :] except IndexError: logging.info(' date not in netcdf, skipping') continue input_full_array = input_full_ma.data.astype(np.float32) input_full_nodata = float(input_full_ma.fill_value) input_full_array[input_full_array == input_full_nodata] = np.nan # Since inputs are netcdf, need to create GDAL raster # datasets in order to use gdal_common functions # Create an in memory dataset of the full ETo array input_full_ds = drigo.array_to_mem_ds( input_full_array, output_geo=gridmet_geo, output_proj=gridmet_proj) # Then extract the subset from the in memory dataset output_array = drigo.raster_ds_to_array( input_full_ds, 1, mask_extent=gridmet_extent, return_nodata=False) # Convert Kelvin to Celsius if input_var in ['tmmx', 'tmmn']: output_array -= 273.15 # Save the projected array as 32-bit floats drigo.array_to_comp_raster( output_array.astype(np.float32), output_path, band=doy, stats_flag=False) # drigo.array_to_raster( # output_array.astype(np.float32), output_path, # output_geo=gridmet_geo, output_proj=gridmet_proj, # stats_flag=False) del output_array if stats_flag: drigo.raster_statistics(output_path) logging.debug('\nScript Complete')
def main(grb_ws, ancillary_ws, output_ws, etr_flag=False, eto_flag=False, scene_list_path=None, start_dt=None, end_dt=None, times_str='', extent_path=None, output_extent=None, daily_flag=True, stats_flag=True, overwrite_flag=False): """Compute hourly ETr/ETo from NLDAS data Parameters ---------- grb_ws : str Folder of NLDAS GRB files. ancillary_ws : str Folder of ancillary rasters. output_ws : str Folder of output rasters. etr_flag : bool, optional If True, compute alfalfa reference ET (ETr). eto_flag : bool, optional If True, compute grass reference ET (ETo). scene_list_path : str, optional Landsat scene keep list file path. start_date : str, optional ISO format date (YYYY-MM-DD). end_date : str, optional ISO format date (YYYY-MM-DD). times : str, optional Comma separated values and/or ranges of UTC hours (i.e. "1, 2, 5-8"). Parsed with python_common.parse_int_set(). extent_path : str, optional File path defining the output extent. output_extent : list, optional Decimal degrees values defining output extent. daily_flag : bool, optional If True, save daily ETr/ETo sum raster (the default is True). stats_flag : bool, optional If True, compute raster statistics (the default is True). overwrite_flag : bool, optional If True, overwrite existing files (the default is False). Returns ------- None """ logging.info('\nComputing NLDAS hourly ETr/ETo') np.seterr(invalid='ignore') # Compute ETr and/or ETo if not etr_flag and not eto_flag: logging.info(' ETo/ETr flag(s) not set, defaulting to ETr') etr_flag = True # Only process a specific hours if not times_str: time_list = range(0, 24, 1) else: time_list = list(_utils.parse_int_set(times_str)) time_list = ['{:02d}00'.format(t) for t in time_list] etr_folder = 'etr' eto_folder = 'eto' hour_fmt = '{}_{:04d}{:02d}{:02d}_hourly_nldas.img' # hour_fmt = '{}_{:04d}{:02d}{:02d}_{4:04d}_nldas.img' day_fmt = '{}_{:04d}{:02d}{:02d}_nldas.img' # input_fmt = 'NLDAS_FORA0125_H.A{:04d}{:02d}{:02d}.{}.002.grb' input_re = re.compile('NLDAS_FORA0125_H.A(?P<YEAR>\d{4})(?P<MONTH>\d{2})' + '(?P<DAY>\d{2}).(?P<TIME>\d{4}).002.grb$') # # Landsat Collection 1 Product ID # landsat_re = re.compile( # '^(?:LT04|LT05|LE07|LC08)_\w{4}_\d{3}\d{3}_(?P<DATE>\d{8})_' # '\w{8}_\w{2}_\w{2}') # Landsat Custom Scene ID landsat_re = re.compile('^(?:LT04|LT05|LE07|LC08)_\d{6}_(?P<DATE>\d{8})') # Assume NLDAS is NAD83 # input_epsg = 'EPSG:4269' # Ancillary raster paths mask_path = os.path.join(ancillary_ws, 'nldas_mask.img') elev_path = os.path.join(ancillary_ws, 'nldas_elev.img') lat_path = os.path.join(ancillary_ws, 'nldas_lat.img') lon_path = os.path.join(ancillary_ws, 'nldas_lon.img') # Process Landsat scene list and start/end input parameters if not scene_list_path and (not start_dt or not end_dt): logging.error( '\nERROR: A Landsat scene list or start/end dates must be set, ' 'exiting\n') return False if scene_list_path is not None and os.path.isfile(scene_list_path): # Build a date list from the Landsat scene keep list file logging.info('\nReading dates from scene keep list file') logging.info(' {}'.format(scene_list_path)) with open(scene_list_path) as input_f: keep_list = input_f.readlines() date_list = sorted([ dt.datetime.strptime(m.group('DATE'), '%Y%m%d').strftime('%Y-%m-%d') for image_id in keep_list for m in [landsat_re.match(image_id)] if m ]) logging.debug(' {}'.format(', '.join(date_list))) else: date_list = [] if start_dt and end_dt: logging.debug(' Start date: {}'.format(start_dt)) logging.debug(' End date: {}'.format(end_dt)) else: start_dt = dt.datetime.strptime(date_list[0], '%Y-%m-%d') end_dt = dt.datetime.strptime(date_list[-1], '%Y-%m-%d') # This allows GDAL to throw Python Exceptions # gdal.UseExceptions() # mem_driver = gdal.GetDriverByName('MEM') # Get the NLDAS spatial reference from the mask raster nldas_ds = gdal.Open(mask_path) nldas_osr = drigo.raster_ds_osr(nldas_ds) nldas_proj = drigo.osr_proj(nldas_osr) nldas_cs = drigo.raster_ds_cellsize(nldas_ds, x_only=True) nldas_extent = drigo.raster_ds_extent(nldas_ds) nldas_geo = nldas_extent.geo(nldas_cs) nldas_x, nldas_y = nldas_extent.origin() nldas_ds = None logging.debug(' Projection: {}'.format(nldas_proj)) logging.debug(' Cellsize: {}'.format(nldas_cs)) logging.debug(' Geo: {}'.format(nldas_geo)) logging.debug(' Extent: {}'.format(nldas_extent)) # Subset data to a smaller extent if output_extent is not None: logging.info('\nComputing subset extent & geo') logging.debug(' Extent: {}'.format(output_extent)) nldas_extent = drigo.Extent(output_extent) nldas_extent.adjust_to_snap('EXPAND', nldas_x, nldas_y, nldas_cs) nldas_geo = nldas_extent.geo(nldas_cs) logging.debug(' Geo: {}'.format(nldas_geo)) logging.debug(' Extent: {}'.format(output_extent)) elif extent_path is not None: logging.info('\nComputing subset extent & geo') if not os.path.isfile(extent_path): logging.error('\nThe extent object does not exist, exiting\n' ' {}'.format(extent_path)) return False elif extent_path.lower().endswith('.shp'): nldas_extent = drigo.feature_path_extent(extent_path) extent_osr = drigo.feature_path_osr(extent_path) extent_cs = None else: nldas_extent = drigo.raster_path_extent(extent_path) extent_osr = drigo.raster_path_osr(extent_path) extent_cs = drigo.raster_path_cellsize(extent_path, x_only=True) nldas_extent = drigo.project_extent(nldas_extent, extent_osr, nldas_osr, extent_cs) nldas_extent.adjust_to_snap('EXPAND', nldas_x, nldas_y, nldas_cs) nldas_geo = nldas_extent.geo(nldas_cs) logging.debug(' Geo: {}'.format(nldas_geo)) logging.debug(' Extent: {}'.format(nldas_extent)) logging.debug('') # Read the NLDAS mask array if present if mask_path and os.path.isfile(mask_path): mask_array, mask_nodata = drigo.raster_to_array( mask_path, mask_extent=nldas_extent, fill_value=0, return_nodata=True) mask_array = mask_array != mask_nodata else: mask_array = None # Read ancillary arrays (or subsets?) elev_array = drigo.raster_to_array(elev_path, mask_extent=nldas_extent, return_nodata=False) # pair_array = et_common.air_pressure_func(elev_array) lat_array = drigo.raster_to_array(lat_path, mask_extent=nldas_extent, return_nodata=False) lon_array = drigo.raster_to_array(lon_path, mask_extent=nldas_extent, return_nodata=False) # Hourly RefET functions expects lat/lon in radians lat_array *= (math.pi / 180) lon_array *= (math.pi / 180) # Build output folder etr_ws = os.path.join(output_ws, etr_folder) eto_ws = os.path.join(output_ws, eto_folder) if etr_flag and not os.path.isdir(etr_ws): os.makedirs(etr_ws) if eto_flag and not os.path.isdir(eto_ws): os.makedirs(eto_ws) # DEADBEEF - Instead of processing all available files, the following # code will process files for target dates # for input_dt in date_range(start_dt, end_dt + dt.timedelta(1)): # logging.info(input_dt.date()) # Iterate all available files and check dates if necessary # Each sub folder in the main folder has all imagery for 1 day # (in UTC time) # The path for each subfolder is the /YYYY/DOY errors = defaultdict(list) for root, folders, files in os.walk(grb_ws): root_split = os.path.normpath(root).split(os.sep) # If the year/doy is outside the range, skip if (re.match('\d{4}', root_split[-2]) and re.match('\d{3}', root_split[-1])): root_dt = dt.datetime.strptime( '{}_{}'.format(root_split[-2], root_split[-1]), '%Y_%j') logging.info('{}'.format(root_dt.date())) if ((start_dt is not None and root_dt < start_dt) or (end_dt is not None and root_dt > end_dt)): continue elif date_list and root_dt.date().isoformat() not in date_list: continue # If the year is outside the range, don't search subfolders elif re.match('\d{4}', root_split[-1]): root_year = int(root_split[-1]) logging.info('Year: {}'.format(root_year)) if ((start_dt is not None and root_year < start_dt.year) or (end_dt is not None and root_year > end_dt.year)): folders[:] = [] else: folders[:] = sorted(folders) continue else: continue logging.debug(' {}'.format(root)) # Start off assuming every file needs to be processed day_skip_flag = False # Build output folders if necessary etr_year_ws = os.path.join(etr_ws, str(root_dt.year)) eto_year_ws = os.path.join(eto_ws, str(root_dt.year)) if etr_flag and not os.path.isdir(etr_year_ws): os.makedirs(etr_year_ws) if eto_flag and not os.path.isdir(eto_year_ws): os.makedirs(eto_year_ws) # Build daily total paths etr_day_path = os.path.join( etr_year_ws, day_fmt.format('etr', root_dt.year, root_dt.month, root_dt.day)) eto_day_path = os.path.join( eto_year_ws, day_fmt.format('eto', root_dt.year, root_dt.month, root_dt.day)) etr_hour_path = os.path.join( etr_year_ws, hour_fmt.format('etr', root_dt.year, root_dt.month, root_dt.day)) eto_hour_path = os.path.join( eto_year_ws, hour_fmt.format('eto', root_dt.year, root_dt.month, root_dt.day)) # logging.debug(' {}'.format(etr_hour_path)) # If daily ETr/ETo files are present, day can be skipped if not overwrite_flag and daily_flag: if etr_flag and not os.path.isfile(etr_day_path): pass elif eto_flag and not os.path.isfile(eto_day_path): pass else: day_skip_flag = True # If the hour and daily files don't need to be made, skip the day if not overwrite_flag: if etr_flag and not os.path.isfile(etr_hour_path): pass elif eto_flag and not os.path.isfile(eto_hour_path): pass elif day_skip_flag: logging.debug(' File(s) already exist, skipping') continue # Create a single raster for each day with 24 bands # Each time step will be stored in a separate band if etr_flag: logging.debug(' {}'.format(etr_day_path)) drigo.build_empty_raster(etr_hour_path, band_cnt=24, output_dtype=np.float32, output_proj=nldas_proj, output_cs=nldas_cs, output_extent=nldas_extent, output_fill_flag=True) if eto_flag: logging.debug(' {}'.format(eto_day_path)) drigo.build_empty_raster(eto_hour_path, band_cnt=24, output_dtype=np.float32, output_proj=nldas_proj, output_cs=nldas_cs, output_extent=nldas_extent, output_fill_flag=True) # Sum all ETr/ETo images in each folder to generate a UTC day total etr_day_array = 0 eto_day_array = 0 # Process each hour file for input_name in sorted(files): logging.info(' {}'.format(input_name)) input_match = input_re.match(input_name) if input_match is None: logging.debug(' Regular expression didn\'t match, skipping') continue input_dt = dt.datetime(int(input_match.group('YEAR')), int(input_match.group('MONTH')), int(input_match.group('DAY'))) input_doy = int(input_dt.strftime('%j')) time_str = input_match.group('TIME') band_num = int(time_str[:2]) + 1 # if start_dt is not None and input_dt < start_dt: # continue # elif end_dt is not None and input_dt > end_dt: # continue # elif date_list and input_dt.date().isoformat() not in date_list: # continue if not daily_flag and time_str not in time_list: logging.debug(' Time not in list and not daily, skipping') continue input_path = os.path.join(root, input_name) logging.debug(' Time: {} {}'.format(input_dt.date(), time_str)) logging.debug(' Band: {}'.format(band_num)) # Determine band numbering/naming input_band_dict = grib_band_names(input_path) # Read input bands input_ds = gdal.Open(input_path) # Temperature should be in C for et_common.refet_hourly_func() if 'Temperature [K]' in input_band_dict.keys(): temp_band_units = 'K' temp_array = drigo.raster_ds_to_array( input_ds, band=input_band_dict['Temperature [K]'], mask_extent=nldas_extent, return_nodata=False) elif 'Temperature [C]' in input_band_dict.keys(): temp_band_units = 'C' temp_array = drigo.raster_ds_to_array( input_ds, band=input_band_dict['Temperature [C]'], mask_extent=nldas_extent, return_nodata=False) else: logging.error('Unknown Temperature units, skipping') logging.error(' {}'.format(input_band_dict.keys())) continue # DEADBEEF - Having issue with T appearing to be C but labeled as K # Try to determine temperature units from values temp_mean = float(np.nanmean(temp_array)) temp_units_dict = {20: 'C', 293: 'K'} temp_array_units = temp_units_dict[min( temp_units_dict, key=lambda x: abs(x - temp_mean))] if temp_array_units == 'K' and temp_band_units == 'K': logging.debug(' Converting temperature from K to C') temp_array -= 273.15 elif temp_array_units == 'C' and temp_band_units == 'C': pass elif temp_array_units == 'C' and temp_band_units == 'K': logging.debug(( ' Temperature units are K in the GRB band name, ' + 'but values appear to be C\n Mean temperature: {:.2f}\n' + ' Values will NOT be adjusted').format(temp_mean)) elif temp_array_units == 'K' and temp_band_units == 'C': logging.debug(( ' Temperature units are C in the GRB band name, ' + 'but values appear to be K\n Mean temperature: {:.2f}\n' + ' Values will be adjusted from K to C').format(temp_mean)) temp_array -= 273.15 try: sph_array = drigo.raster_ds_to_array( input_ds, band=input_band_dict['Specific humidity [kg/kg]'], mask_extent=nldas_extent, return_nodata=False) rs_array = drigo.raster_ds_to_array( input_ds, band=input_band_dict[ 'Downward shortwave radiation flux [W/m^2]'], mask_extent=nldas_extent, return_nodata=False) wind_u_array = drigo.raster_ds_to_array( input_ds, band=input_band_dict['u-component of wind [m/s]'], mask_extent=nldas_extent, return_nodata=False) wind_v_array = drigo.raster_ds_to_array( input_ds, band=input_band_dict['v-component of wind [m/s]'], mask_extent=nldas_extent, return_nodata=False) input_ds = None except KeyError as e: errors[input_path].append(e) logging.error(' KeyError: {} Skipping: {}'.format( e, input_ds.GetDescription())) continue rs_array *= 0.0036 # W m-2 to MJ m-2 hr-1 wind_array = np.sqrt(wind_u_array**2 + wind_v_array**2) del wind_u_array, wind_v_array # Compute vapor pressure from specific humidity pair_array = refet.calcs._air_pressure(elev=elev_array) ea_array = refet.calcs._actual_vapor_pressure(q=sph_array, pair=pair_array) refet_obj = refet.Hourly(tmean=temp_array, ea=ea_array, rs=rs_array, uz=wind_array, zw=10, elev=elev_array, lat=lat_array, lon=lon_array, doy=input_doy, time=int(time_str) / 100, method='asce') # ETr if etr_flag: etr_array = refet_obj.etr() if daily_flag: etr_day_array += etr_array if time_str in time_list: drigo.array_to_comp_raster(etr_array.astype(np.float32), etr_hour_path, band=band_num, stats_flag=False) del etr_array # ETo if eto_flag: eto_array = refet_obj.eto() if eto_flag and daily_flag: eto_day_array += eto_array if eto_flag and time_str in time_list: drigo.array_to_comp_raster(eto_array.astype(np.float32), eto_hour_path, band=band_num, stats_flag=False) del eto_array del temp_array, sph_array, rs_array, wind_array del pair_array, ea_array if stats_flag and etr_flag: drigo.raster_statistics(etr_hour_path) if stats_flag and eto_flag: drigo.raster_statistics(eto_hour_path) # Save the projected ETr/ETo as 32-bit floats if not day_skip_flag and daily_flag: if etr_flag: try: drigo.array_to_raster(etr_day_array.astype(np.float32), etr_day_path, output_geo=nldas_geo, output_proj=nldas_proj, stats_flag=stats_flag) except AttributeError: pass if eto_flag: try: drigo.array_to_raster(eto_day_array.astype(np.float32), eto_day_path, output_geo=nldas_geo, output_proj=nldas_proj, stats_flag=stats_flag) except AttributeError: pass del etr_day_array, eto_day_array if len(errors) > 0: logging.info('\nThe following errors were encountered:') for key, value in errors.items(): logging.error(' Filepath: {}, error: {}'.format(key, value)) logging.debug('\nScript Complete')
def main(netcdf_ws=os.getcwd(), ancillary_ws=os.getcwd(), output_ws=os.getcwd(), variables=['prcp'], daily_flag=False, monthly_flag=True, annual_flag=False, start_year=1981, end_year=2010, extent_path=None, output_extent=None, stats_flag=True, overwrite_flag=False): """Extract DAYMET temperature Parameters ---------- netcdf_ws : str Folder of DAYMET netcdf files. ancillary_ws : str Folder of ancillary rasters. output_ws : str Folder of output rasters. variables : list, optional DAYMET variables to download ('prcp', 'srad', 'vp', 'tmmn', 'tmmx'). Set as ['all'] to process all variables. daily_flag : bool, optional If True, compute daily (DOY) climatologies. monthly_flag : bool, optional If True, compute monthly climatologies. annual_flag : bool, optional If True, compute annual climatologies. start_year : int, optional Climatology start year. end_year : int, optional Climatology end year. extent_path : str, optional File path a raster defining the output extent. output_extent : list, optional Decimal degrees values defining output extent. stats_flag : bool, optional If True, compute raster statistics (the default is True). overwrite_flag : bool, optional If True, overwrite existing files (the default is False). Returns ------- None """ logging.info('\nGenerating DAYMET climatologies') daily_fmt = 'daymet_{var}_30yr_normal_{doy:03d}.img' monthly_fmt = 'daymet_{var}_30yr_normal_{month:02d}.img' annual_fmt = 'daymet_{var}_30yr_normal.img' # daily_fmt = 'daymet_{var}_normal_{start}_{end}_{doy:03d}.img' # monthly_fmt = 'daymet_{var}_normal_{start}_{end}_{month:02d}.img' # annual_fmt = 'daymet_{var}_normal_{start}_{end}.img' # If a date is not set, process 1981-2010 climatology try: start_dt = dt.datetime(start_year, 1, 1) logging.debug(' Start date: {}'.format(start_dt)) except: start_dt = dt.datetime(1981, 1, 1) logging.info(' Start date: {}'.format(start_dt)) try: end_dt = dt.datetime(end_year, 12, 31) logging.debug(' End date: {}'.format(end_dt)) except: end_dt = dt.datetime(2010, 12, 31) logging.info(' End date: {}'.format(end_dt)) # Get DAYMET spatial reference from an ancillary raster mask_raster = os.path.join(ancillary_ws, 'daymet_mask.img') daymet_re = re.compile('daymet_v3_(?P<VAR>\w+)_(?P<YEAR>\d{4})_na.nc4$') # DAYMET rasters to extract var_full_list = ['prcp', 'tmmn', 'tmmx'] # data_full_list = ['prcp', 'srad', 'vp', 'tmmn', 'tmmx'] if not variables: logging.error('\nERROR: variables parameter is empty\n') sys.exit() elif type(variables) is not list: # DEADBEEF - I could try converting comma separated strings to lists? logging.warning('\nERROR: variables parameter must be a list\n') sys.exit() elif 'all' in variables: logging.error('\nDownloading all variables\n {}'.format( ','.join(var_full_list))) var_list = var_full_list[:] elif not set(variables).issubset(set(var_full_list)): logging.error('\nERROR: variables parameter is invalid\n {}'.format( variables)) sys.exit() else: var_list = variables[:] # Get extent/geo from mask raster daymet_ds = gdal.Open(mask_raster) daymet_osr = drigo.raster_ds_osr(daymet_ds) daymet_proj = drigo.osr_proj(daymet_osr) daymet_cs = drigo.raster_ds_cellsize(daymet_ds, x_only=True) daymet_extent = drigo.raster_ds_extent(daymet_ds) daymet_geo = daymet_extent.geo(daymet_cs) daymet_x, daymet_y = daymet_extent.origin() daymet_ds = None logging.debug(' Projection: {}'.format(daymet_proj)) logging.debug(' Cellsize: {}'.format(daymet_cs)) logging.debug(' Geo: {}'.format(daymet_geo)) logging.debug(' Extent: {}'.format(daymet_extent)) logging.debug(' Origin: {} {}'.format(daymet_x, daymet_y)) # Subset data to a smaller extent if output_extent is not None: logging.info('\nComputing subset extent & geo') logging.debug(' Extent: {}'.format(output_extent)) # Assume input extent is in decimal degrees output_extent = drigo.project_extent( drigo.Extent(output_extent), drigo.epsg_osr(4326), daymet_osr, 0.001) output_extent = drigo.intersect_extents([daymet_extent, output_extent]) output_extent.adjust_to_snap('EXPAND', daymet_x, daymet_y, daymet_cs) output_geo = output_extent.geo(daymet_cs) logging.debug(' Geo: {}'.format(output_geo)) logging.debug(' Extent: {}'.format(output_extent)) elif extent_path is not None: logging.info('\nComputing subset extent & geo') output_extent = drigo.project_extent( drigo.raster_path_extent(extent_path), drigo.raster_path_osr(extent_path), daymet_osr, drigo.raster_path_cellsize(extent_path, x_only=True)) output_extent = drigo.intersect_extents([daymet_extent, output_extent]) output_extent.adjust_to_snap('EXPAND', daymet_x, daymet_y, daymet_cs) output_geo = output_extent.geo(daymet_cs) logging.debug(' Geo: {}'.format(output_geo)) logging.debug(' Extent: {}'.format(output_extent)) else: output_extent = daymet_extent.copy() output_geo = daymet_geo[:] output_shape = output_extent.shape(cs=daymet_cs) xi, yi = drigo.array_geo_offsets(daymet_geo, output_geo, daymet_cs) output_rows, output_cols = output_extent.shape(daymet_cs) logging.debug(' Shape: {} {}'.format(output_rows, output_cols)) logging.debug(' Offsets: {} {} (x y)'.format(xi, yi)) # Process each variable for input_var in var_list: logging.info("\nVariable: {}".format(input_var)) # Rename variables to match cimis if input_var == 'prcp': output_var = 'ppt' else: output_var = input_var logging.debug("Output name: {}".format(output_var)) # Build output folder var_ws = os.path.join(output_ws, output_var) if not os.path.isdir(var_ws): os.makedirs(var_ws) # Build output arrays logging.debug(' Building arrays') if daily_flag: daily_sum = np.full( (365, output_shape[0], output_shape[1]), 0, np.float64) daily_count = np.full( (365, output_shape[0], output_shape[1]), 0, np.uint8) if monthly_flag: monthly_sum = np.full( (12, output_shape[0], output_shape[1]), 0, np.float64) monthly_count = np.full( (12, output_shape[0], output_shape[1]), 0, np.uint8) if monthly_flag: annual_sum = np.full( (output_shape[0], output_shape[1]), 0, np.float64) annual_count = np.full( (output_shape[0], output_shape[1]), 0, np.uint8) # Process each file/year separately for input_name in sorted(os.listdir(netcdf_ws)): logging.debug(" {}".format(input_name)) input_match = daymet_re.match(input_name) if not input_match: logging.debug(' Regular expression didn\'t match, skipping') continue elif input_match.group('VAR') != input_var: logging.debug(' Variable didn\'t match, skipping') continue year_str = input_match.group('YEAR') logging.info(" Year: {}".format(year_str)) year_int = int(year_str) year_days = int(dt.datetime(year_int, 12, 31).strftime('%j')) if start_dt is not None and year_int < start_dt.year: logging.debug(' Before start date, skipping') continue elif end_dt is not None and year_int > end_dt.year: logging.debug(' After end date, skipping') continue # Build input file path input_raster = os.path.join(netcdf_ws, input_name) if not os.path.isfile(input_raster): logging.debug( ' Input raster doesn\'t exist, skipping {}'.format( input_raster)) continue # Build output folder if daily_flag: daily_ws = os.path.join(var_ws, 'daily') if not os.path.isdir(daily_ws): os.makedirs(daily_ws) if monthly_flag: monthly_temp_sum = np.full( (12, output_shape[0], output_shape[1]), 0, np.float64) monthly_temp_count = np.full( (12, output_shape[0], output_shape[1]), 0, np.uint8) # Read in the DAYMET NetCDF file input_nc_f = netCDF4.Dataset(input_raster, 'r') # logging.debug(input_nc_f.variables) # Check all valid dates in the year year_dates = _utils.date_range( dt.datetime(year_int, 1, 1), dt.datetime(year_int + 1, 1, 1)) for date_dt in year_dates: logging.debug(' {}'.format(date_dt.date())) # if start_dt is not None and date_dt < start_dt: # logging.debug( # ' {} - before start date, skipping'.format( # date_dt.date())) # continue # elif end_dt is not None and date_dt > end_dt: # logging.debug(' {} - after end date, skipping'.format( # date_dt.date())) # continue # else: # logging.info(' {}'.format(date_dt.date())) doy = int(date_dt.strftime('%j')) doy_i = range(1, year_days + 1).index(doy) month_i = date_dt.month - 1 # Arrays are being read as masked array with a -9999 fill value # Convert to basic numpy array arrays with nan values try: input_ma = input_nc_f.variables[input_var][ doy_i, yi: yi + output_rows, xi: xi + output_cols] except IndexError: logging.info(' date not in netcdf, skipping') continue input_nodata = float(input_ma.fill_value) output_array = input_ma.data.astype(np.float32) output_array[output_array == input_nodata] = np.nan output_mask = np.isfinite(output_array) # Convert Kelvin to Celsius if input_var in ['tmax', 'tmin']: output_array -= 273.15 # Save values if daily_flag: daily_sum[doy_i, :, :] += output_array daily_count[doy_i, :, :] += output_mask if monthly_flag: monthly_temp_sum[month_i, :, :] += output_array monthly_temp_count[month_i, :, :] += output_mask if annual_flag: annual_sum[:, :] += output_array annual_count[:, :] += output_mask # Cleanup # del input_ds, input_array del input_ma, output_array, output_mask # Compute mean monthly for the year if monthly_flag: # Sum precipitation if input_var == 'prcp': monthly_sum += monthly_temp_sum else: monthly_sum += monthly_temp_sum / monthly_temp_count # Is this the right count? monthly_count += np.any(monthly_temp_count, axis=0) del monthly_temp_sum, monthly_temp_count input_nc_f.close() del input_nc_f # Save the projected climatology arrays if daily_flag: for doy_i in range(daily_sum.shape[0]): daily_name = daily_fmt.format( var=output_var, start=start_year, end=end_year, doy=doy_i + 1) daily_path = os.path.join(daily_ws, daily_name) drigo.array_to_raster( daily_sum[doy_i, :, :] / daily_count[doy_i, :, :], daily_path, output_geo=output_geo, output_proj=daymet_proj, stats_flag=stats_flag) del daily_sum, daily_count if monthly_flag: for month_i in range(monthly_sum.shape[0]): monthly_name = monthly_fmt.format( var=output_var, start=start_year, end=end_year, month=month_i + 1) monthly_path = os.path.join(var_ws, monthly_name) drigo.array_to_raster( monthly_sum[month_i, :, :] / monthly_count[month_i, :, :], monthly_path, output_geo=output_geo, output_proj=daymet_proj, stats_flag=stats_flag) del monthly_sum, monthly_count if annual_flag: annual_name = annual_fmt.format( var=output_var, start=start_year, end=end_year) annual_path = os.path.join(var_ws, annual_name) drigo.array_to_raster( annual_sum / annual_count, annual_path, output_geo=output_geo, output_proj=daymet_proj, stats_flag=stats_flag) del annual_sum, annual_count logging.debug('\nScript Complete')
def main(ini_path, tile_list=None, overwrite_flag=False, mp_procs=1): """Prep Landsat path/row specific data Parameters ---------- ini_path : str File path of the input parameters file. tile_list : list, optional Landsat path/rows to process (i.e. [p045r043, p045r033]). This will override the tile list in the INI file. overwrite_flag : bool, optional If True, overwrite existing files (the default is False). mp_procs : int, optional Number of cores to use (the default is 1). Returns ------- None """ logging.info('\nPrepare path/row data') # Open config file config = python_common.open_ini(ini_path) # Get input parameters logging.debug(' Reading Input File') year = config.getint('INPUTS', 'year') if tile_list is None: tile_list = python_common.read_param('tile_list', [], config, 'INPUTS') project_ws = config.get('INPUTS', 'project_folder') logging.debug(' Year: {}'.format(year)) logging.debug(' Path/rows: {}'.format(', '.join(tile_list))) logging.debug(' Project: {}'.format(project_ws)) # study_area_path = config.get('INPUTS', 'study_area_path') footprint_path = config.get('INPUTS', 'footprint_path') # For now, assume the UTM zone file is colocated with the footprints shapefile utm_path = python_common.read_param( 'utm_path', os.path.join(os.path.dirname(footprint_path), 'wrs2_tile_utm_zones.json'), config, 'INPUTS') skip_list_path = python_common.read_param('skip_list_path', '', config, 'INPUTS') landsat_flag = python_common.read_param('landsat_flag', True, config, 'INPUTS') ledaps_flag = False dem_flag = python_common.read_param('dem_flag', True, config, 'INPUTS') nlcd_flag = python_common.read_param('nlcd_flag', True, config, 'INPUTS') cdl_flag = python_common.read_param('cdl_flag', False, config, 'INPUTS') landfire_flag = python_common.read_param('landfire_flag', False, config, 'INPUTS') field_flag = python_common.read_param('field_flag', False, config, 'INPUTS') tile_gcs_buffer = python_common.read_param('tile_buffer', 0.25, config) # Input/output folder and file paths if landsat_flag: landsat_input_ws = config.get('INPUTS', 'landsat_input_folder') else: landsat_input_ws = None # if ledaps_flag: # ledaps_input_ws = config.get('INPUTS', 'ledaps_input_folder') # else: # ledaps_input_ws = None if dem_flag: dem_input_ws = config.get('INPUTS', 'dem_input_folder') dem_tile_fmt = config.get('INPUTS', 'dem_tile_fmt') dem_output_ws = config.get('INPUTS', 'dem_output_folder') dem_output_name = python_common.read_param('dem_output_name', 'dem.img', config) # dem_output_name = config.get('INPUTS', 'dem_output_name') else: dem_input_ws, dem_tile_fmt = None, None dem_output_ws, dem_output_name = None, None if nlcd_flag: nlcd_input_path = config.get('INPUTS', 'nlcd_input_path') nlcd_output_ws = config.get('INPUTS', 'nlcd_output_folder') nlcd_output_fmt = python_common.read_param('nlcd_output_fmt', 'nlcd_{:04d}.img', config) else: nlcd_input_path, nlcd_output_ws, nlcd_output_fmt = None, None, None if cdl_flag: cdl_input_path = config.get('INPUTS', 'cdl_input_path') cdl_ag_list = config.get('INPUTS', 'cdl_ag_list') cdl_ag_list = list(python_common.parse_int_set(cdl_ag_list)) # default_cdl_ag_list = range(1,62) + range(66,78) + range(204,255) # cdl_ag_list = python_common.read_param( # 'cdl_ag_list', default_cdl_ag_list, config) # cdl_ag_list = list(map(int, cdl_ag_list)) # cdl_non_ag_list = python_common.read_param( # 'cdl_non_ag_list', [], config) cdl_output_ws = config.get('INPUTS', 'cdl_output_folder') cdl_output_fmt = python_common.read_param('cdl_output_fmt', 'cdl_{:04d}.img', config) cdl_ag_output_fmt = python_common.read_param('cdl_ag_output_fmt', 'cdl_ag_{:04d}.img', config) else: cdl_input_path, cdl_ag_list = None, None cdl_output_ws, cdl_output_fmt, cdl_ag_output_fmt = None, None, None if landfire_flag: landfire_input_path = config.get('INPUTS', 'landfire_input_path') landfire_ag_list = config.get('INPUTS', 'landfire_ag_list') landfire_ag_list = list(python_common.parse_int_set(landfire_ag_list)) # default_landfire_ag_list = range(3960,4000) # landfire_ag_list = python_common.read_param( # 'landfire_ag_list', default_landfire_ag_list, config) # landfire_ag_list = list(map(int, landfire_ag_list)) landfire_output_ws = config.get('INPUTS', 'landfire_output_folder') landfire_output_fmt = python_common.read_param('landfire_output_fmt', 'landfire_{:04d}.img', config) landfire_ag_output_fmt = python_common.read_param( 'landfire_ag_output_fmt', 'landfire_ag_{:04d}.img', config) else: landfire_input_path, landfire_ag_list = None, None landfire_output_ws = None landfire_output_fmt, landfire_ag_output_fmt = None, None if field_flag: field_input_path = config.get('INPUTS', 'field_input_path') field_output_ws = config.get('INPUTS', 'field_output_folder') field_output_fmt = python_common.read_param('field_output_fmt', 'fields_{:04d}.img', config) else: field_input_path = None field_output_ws, field_output_fmt = None, None # File/folder names orig_data_folder_name = 'ORIGINAL_DATA' # Check inputs folders/paths logging.info('\nChecking input folders/files') file_check(footprint_path) file_check(utm_path) if landsat_flag: folder_check(landsat_input_ws) # if ledaps_flag: # folder_check(ledaps_input_ws) if dem_flag: folder_check(dem_input_ws) if nlcd_flag: file_check(nlcd_input_path) if cdl_flag: file_check(cdl_input_path) if landfire_flag: # Landfire will likely be an ESRI grid (set as a folder) if not (os.path.isdir(landfire_input_path) or os.path.isfile(landfire_input_path)): logging.error('\n {} does not exist'.format(landfire_input_path)) if field_flag: file_check(field_input_path) if skip_list_path: file_check(skip_list_path) # Build output folders if not os.path.isdir(project_ws): os.makedirs(project_ws) if dem_flag and not os.path.isdir(dem_output_ws): os.makedirs(dem_output_ws) if nlcd_flag and not os.path.isdir(nlcd_output_ws): os.makedirs(nlcd_output_ws) if cdl_flag and not os.path.isdir(cdl_output_ws): os.makedirs(cdl_output_ws) if landfire_flag and not os.path.isdir(landfire_output_ws): os.makedirs(landfire_output_ws) if field_flag and not os.path.isdir(field_output_ws): os.makedirs(field_output_ws) # For now assume path/row are two digit numbers tile_fmt = 'p{:03d}r{:03d}' tile_re = re.compile('p(\d{3})r(\d{3})') image_re = re.compile( '^(LT04|LT05|LE07|LC08)_(\d{3})(\d{3})_(\d{4})(\d{2})(\d{2})') snap_cs = 30 snap_xmin, snap_ymin = (15, 15) # Set snap environment parameters env = drigo.env env.cellsize = snap_cs env.snap_xmin, env.snap_ymin = snap_xmin, snap_ymin # Use WGSS84 (EPSG 4326) for GCS spatial reference # Could also use NAD83 (EPSG 4269) # gcs_epsg = 4326 # gcs_osr = epsg_osr(4326) # gcs_proj = osr_proj(gcs_osr) # Landsat Footprints (WRS2 Descending Polygons) logging.debug('\nFootprint (WRS2 descending should be GCS84):') tile_gcs_osr = drigo.feature_path_osr(footprint_path) logging.debug(' OSR: {}'.format(tile_gcs_osr)) # Doublecheck that WRS2 descending shapefile is GCS84 # if tile_gcs_osr != epsg_osr(4326): # logging.error(' WRS2 is not GCS84') # sys.exit() # Get geometry for each path/row tile_gcs_wkt_dict = path_row_wkt_func(footprint_path, path_field='PATH', row_field='ROW') # Get UTM zone for each path/row # DEADBEEF - Using "eval" is considered unsafe and should be changed tile_utm_zone_dict = eval(open(utm_path, 'r').read()) # Project study area geometry to GCS coordinates # logging.debug('\nStudy area') # study_area_geom = feature_path_geom_union(study_area_path) # study_area_gcs_geom = study_area_geom.Clone() # study_area_gcs_geom.TransformTo(tile_gcs_osr) # Get list of all intersecting Landsat path/rows # logging.info('\nLandsat path/rows') # tile_list = [] # for tile_name, tile_gcs_wkt in tile_gcs_wkt_dict.items(): # tile_gcs_geom = ogr.CreateGeometryFromWkt(tile_gcs_wkt) # if tile_gcs_geom.Intersects(study_area_gcs_geom): # tile_list.append(tile_name) # for tile_name in sorted(tile_list): # logging.debug(' {}'.format(tile_name)) # Check that each path/row extent and UTM zone exist logging.info('\nChecking path/row list against footprint shapefile') for tile_name in sorted(tile_list): if tile_name not in tile_gcs_wkt_dict.keys(): logging.error( ' {} feature not in footprint shapefile'.format(tile_name)) continue elif tile_name not in tile_utm_zone_dict.keys(): logging.error( ' {} UTM zone not in footprint shapefile'.format(tile_name)) continue elif tile_utm_zone_dict[tile_name] == 0: logging.error((' UTM zone is not set for {} in ' + 'footprint shapefile').format(tile_name)) continue # Build output folders for each path/row logging.info('\nBuilding path/row folders') for tile_name in tile_list: logging.debug(' {} {}'.format(year, tile_name)) tile_output_ws = os.path.join(project_ws, str(year), tile_name) if ((landsat_flag or ledaps_flag) and not os.path.isdir(tile_output_ws)): os.makedirs(tile_output_ws) if (dem_flag and not os.path.isdir(os.path.join(dem_output_ws, tile_name))): os.makedirs(os.path.join(dem_output_ws, tile_name)) if (nlcd_flag and not os.path.isdir(os.path.join(nlcd_output_ws, tile_name))): os.makedirs(os.path.join(nlcd_output_ws, tile_name)) if (cdl_flag and not os.path.isdir(os.path.join(cdl_output_ws, tile_name))): os.makedirs(os.path.join(cdl_output_ws, tile_name)) if (landfire_flag and not os.path.isdir( os.path.join(landfire_output_ws, tile_name))): os.makedirs(os.path.join(landfire_output_ws, tile_name)) if (field_flag and not os.path.isdir(os.path.join(field_output_ws, tile_name))): os.makedirs(os.path.join(field_output_ws, tile_name)) # Read skip list if (landsat_flag or ledaps_flag) and skip_list_path: logging.debug('\nReading scene skiplist') with open(skip_list_path) as skip_list_f: skip_list = skip_list_f.readlines() skip_list = [ scene.strip() for scene in skip_list if image_re.match(scene.strip()) ] else: logging.debug('\nSkip list not set in INI') skip_list = [] # Copy and unzip raw Landsat scenes # Use these for thermal band, MTL file (scene time), and to run FMask if landsat_flag: logging.info('\nExtract raw Landsat scenes') # Process each path/row extract_targz_list = [] for tile_name in tile_list: tile_output_ws = os.path.join(project_ws, str(year), tile_name) # path/row as strings with leading zeros path, row = map(str, tile_re.match(tile_name).groups()) tile_input_ws = os.path.join(landsat_input_ws, path, row, str(year)) if not os.path.isdir(tile_input_ws): continue logging.info(' {} {}'.format(year, tile_name)) # Process each tar.gz file for input_name in sorted(os.listdir(tile_input_ws)): if (not image_re.match(input_name) and not input_name.endswith('.tar.gz')): continue # Get Landsat scene ID from tar.gz file name # DEADBEEF - For now this is the EE scene ID, but it could be # changed to the full collection 1 ID scene_id = input_name.split('.')[0] # Output workspace image_output_ws = os.path.join(tile_output_ws, scene_id) orig_data_ws = os.path.join(image_output_ws, orig_data_folder_name) if skip_list and scene_id in skip_list: logging.debug(' {} - Skipping scene'.format(scene_id)) # DEADBEEF - Should the script always remove the scene # if it is in the skip list? # Maybe only if overwrite is set? if os.path.isdir(image_output_ws): # input('Press ENTER to delete {}'.format(scene_id)) shutil.rmtree(image_output_ws) continue # If orig_data_ws doesn't exist, don't check images if not os.path.isdir(orig_data_ws): os.makedirs(orig_data_ws) elif (not overwrite_flag and landsat_files_check(image_output_ws)): continue # Extract Landsat tar.gz file input_path = os.path.join(tile_input_ws, input_name) print(orig_data_ws) # sys.exit() if mp_procs > 1: extract_targz_list.append([input_path, orig_data_ws]) else: python_common.extract_targz_func(input_path, orig_data_ws) # # Use a command line call # input_path = os.path.join(tile_input_ws, input_name) # if job_i % pbs_jobs != 0: # job_list.append('tar -zxvf {} -C {} &\n'.format( # input_path, orig_data_ws)) # else: # job_list.append('tar -zxvf {} -C {}\n'.format( # input_path, orig_data_ws)) # # job_list.append('tar -zxvf {} -C {} &\n'.format( # # input_path, orig_data_ws)) # # job_list.append('wait\n') # job_i += 1 # Extract Landsat tar.gz files using multiprocessing if extract_targz_list: pool = mp.Pool(mp_procs) results = pool.map(python_common.extract_targz_mp, extract_targz_list, chunksize=1) pool.close() pool.join() del results, pool # Get projected extent for each path/row # This should probably be in a function if (dem_flag or nlcd_flag or cdl_flag or landfire_flag or field_flag): tile_utm_extent_dict = gcs_to_utm_dict(tile_list, tile_utm_zone_dict, tile_gcs_osr, tile_gcs_wkt_dict, tile_gcs_buffer, snap_xmin, snap_ymin, snap_cs) # Mosaic DEM tiles for each path/row if dem_flag: logging.info('\nBuild DEM for each path/row') mosaic_mp_list = [] for tile_name in tile_list: # Output folder and path tile_output_path = os.path.join(dem_output_ws, tile_name, dem_output_name) if not overwrite_flag and os.path.isfile(tile_output_path): logging.debug(' {} already exists, skipping'.format( os.path.basename(tile_output_path))) continue logging.info(' {}'.format(tile_name)) # Get the path/row geometry in GCS for selecting intersecting tiles tile_gcs_geom = ogr.CreateGeometryFromWkt( tile_gcs_wkt_dict[tile_name]) # Apply a small buffer (in degrees) to the extent # DEADBEEF - Buffer fails if GDAL is not built with GEOS support # tile_gcs_geom = tile_gcs_geom.Buffer(tile_gcs_buffer) tile_gcs_extent = drigo.Extent(tile_gcs_geom.GetEnvelope()) tile_gcs_extent = tile_gcs_extent.ogrenv_swap() tile_gcs_extent.buffer_extent(tile_gcs_buffer) # tile_gcs_extent.ymin, tile_gcs_extent.xmax = tile_gcs_extent.xmax, tile_gcs_extent.ymin # Offsets are needed since tile name is upper left corner of tile # Tile n36w120 spans -120 <-> -119 and 35 <-> 36 lon_list = range( int(tile_gcs_extent.xmin) - 1, int(tile_gcs_extent.xmax)) lat_list = range( int(tile_gcs_extent.ymin) + 1, int(tile_gcs_extent.ymax) + 2) # Get list of DEM tile rasters dem_tile_list = [] for lat, lon in itertools.product(lat_list, lon_list): # Convert sign of lat/lon to letter lat = ('n' + '{:02d}'.format(abs(lat)) if lat >= 0 else 's' + '{:02d}'.format(abs(lat))) lon = ('w' + '{:03d}'.format(abs(lon)) if lon < 0 else 'e' + '{:03d}'.format(abs(lon))) dem_tile_path = os.path.join(dem_input_ws, dem_tile_fmt.format(lat, lon)) if os.path.isfile(dem_tile_path): dem_tile_list.append(dem_tile_path) if not dem_tile_list: logging.warning(' WARNING: No DEM tiles were selected') continue # Mosaic tiles using mosaic function tile_utm_osr = drigo.epsg_osr(32600 + int(tile_utm_zone_dict[tile_name])) tile_utm_proj = drigo.epsg_proj(32600 + int(tile_utm_zone_dict[tile_name])) tile_utm_extent = tile_utm_extent_dict[tile_name] tile_utm_ullr = tile_utm_extent.ul_lr_swap() # Mosaic, clip, project using custom function if mp_procs > 1: mosaic_mp_list.append([ dem_tile_list, tile_output_path, tile_utm_proj, snap_cs, tile_utm_extent ]) else: drigo.mosaic_tiles(dem_tile_list, tile_output_path, tile_utm_osr, snap_cs, tile_utm_extent) # Cleanup del tile_output_path del tile_gcs_geom, tile_gcs_extent, tile_utm_extent del tile_utm_osr, tile_utm_proj del lon_list, lat_list, dem_tile_list # Mosaic DEM rasters using multiprocessing if mosaic_mp_list: pool = mp.Pool(mp_procs) results = pool.map(mosaic_tiles_mp, mosaic_mp_list, chunksize=1) pool.close() pool.join() del results, pool # Project/clip NLCD for each path/row if nlcd_flag: logging.info('\nBuild NLCD for each path/row') project_mp_list = [] for tile_name in tile_list: nlcd_output_path = os.path.join(nlcd_output_ws, tile_name, nlcd_output_fmt.format(year)) if not overwrite_flag and os.path.isfile(nlcd_output_path): logging.debug(' {} already exists, skipping'.format( os.path.basename(nlcd_output_path))) continue logging.info(' {}'.format(tile_name)) # Set the nodata value on the NLCD raster if it is not set nlcd_ds = gdal.Open(nlcd_input_path, 0) nlcd_band = nlcd_ds.GetRasterBand(1) nlcd_nodata = nlcd_band.GetNoDataValue() nlcd_ds = None if nlcd_nodata is None: nlcd_nodata = 255 # Clip and project tile_utm_osr = drigo.epsg_osr(32600 + int(tile_utm_zone_dict[tile_name])) tile_utm_proj = drigo.epsg_proj(32600 + int(tile_utm_zone_dict[tile_name])) tile_utm_extent = tile_utm_extent_dict[tile_name] tile_utm_ullr = tile_utm_extent.ul_lr_swap() if mp_procs > 1: project_mp_list.append([ nlcd_input_path, nlcd_output_path, gdal.GRA_NearestNeighbour, tile_utm_proj, snap_cs, tile_utm_extent, nlcd_nodata ]) else: drigo.project_raster(nlcd_input_path, nlcd_output_path, gdal.GRA_NearestNeighbour, tile_utm_osr, snap_cs, tile_utm_extent, nlcd_nodata) # Cleanup del nlcd_output_path del nlcd_ds, nlcd_band, nlcd_nodata del tile_utm_osr, tile_utm_proj, tile_utm_extent # Project NLCD rasters using multiprocessing if project_mp_list: pool = mp.Pool(mp_procs) results = pool.map(drigo.project_raster_mp, project_mp_list, chunksize=1) pool.close() pool.join() del results, pool # Project/clip CDL for each path/row if cdl_flag: logging.info('\nBuild CDL for each path/row') project_mp_list, remap_mp_list = [], [] for tile_name in tile_list: cdl_output_path = os.path.join(cdl_output_ws, tile_name, cdl_output_fmt.format(year)) cdl_ag_output_path = os.path.join(cdl_output_ws, tile_name, cdl_ag_output_fmt.format(year)) if not os.path.isfile(cdl_input_path): logging.error('\n\n {} does not exist'.format(cdl_input_path)) sys.exit() if not overwrite_flag and os.path.isfile(cdl_output_path): logging.debug(' {} already exists, skipping'.format( os.path.basename(cdl_output_path))) continue logging.info(' {}'.format(tile_name)) # Set the nodata value on the CDL raster if it is not set cdl_ds = gdal.Open(cdl_input_path, 0) cdl_band = cdl_ds.GetRasterBand(1) cdl_nodata = cdl_band.GetNoDataValue() cdl_ds = None if cdl_nodata is None: cdl_nodata = 255 # Clip and project tile_utm_osr = drigo.epsg_osr(32600 + int(tile_utm_zone_dict[tile_name])) tile_utm_proj = drigo.epsg_proj(32600 + int(tile_utm_zone_dict[tile_name])) tile_utm_extent = tile_utm_extent_dict[tile_name] if mp_procs > 1: project_mp_list.append([ cdl_input_path, cdl_output_path, gdal.GRA_NearestNeighbour, tile_utm_proj, snap_cs, tile_utm_extent, cdl_nodata ]) remap_mp_list.append( [cdl_output_path, cdl_ag_output_path, cdl_ag_list]) else: drigo.project_raster(cdl_input_path, cdl_output_path, gdal.GRA_NearestNeighbour, tile_utm_osr, snap_cs, tile_utm_extent, cdl_nodata) # Build a mask of CDL ag lands remap_mask_func(cdl_output_path, cdl_ag_output_path, cdl_ag_list) # Cleanup del cdl_output_path del cdl_ds, cdl_band, cdl_nodata del tile_utm_osr, tile_utm_proj, tile_utm_extent # Project CDL rasters using multiprocessing if project_mp_list: pool = mp.Pool(mp_procs) results = pool.map(drigo.project_raster_mp, project_mp_list, chunksize=1) pool.close() pool.join() del results, pool if remap_mp_list: pool = mp.Pool(mp_procs) results = pool.map(remap_mask_mp, remap_mp_list, chunksize=1) pool.close() pool.join() del results, pool # Project/clip LANDFIRE for each path/row if landfire_flag: logging.info('\nBuild LANDFIRE for each path/row') project_mp_list, remap_mp_list = [], [] for tile_name in tile_list: landfire_output_path = os.path.join( landfire_output_ws, tile_name, landfire_output_fmt.format(year)) landfire_ag_output_path = os.path.join( landfire_output_ws, tile_name, landfire_ag_output_fmt.format(year)) if not overwrite_flag and os.path.isfile(landfire_output_path): logging.debug(' {} already exists, skipping'.format( os.path.basename(landfire_output_path))) continue logging.info(' {}'.format(tile_name)) # Set the nodata value on the LANDFIRE raster if it is not set # landfire_ds = gdal.Open(landfire_input_path, 0) # landfire_band = landfire_ds.GetRasterBand(1) # landfire_nodata = landfire_band.GetNoDataValue() # landfire_ds = None # if landfire_nodata is None: # landfire_nodata = 32767 # del landfire_ds, landfire_band landfire_nodata = 32767 # Clip and project tile_utm_osr = drigo.epsg_osr(32600 + int(tile_utm_zone_dict[tile_name])) tile_utm_proj = drigo.epsg_proj(32600 + int(tile_utm_zone_dict[tile_name])) tile_utm_extent = tile_utm_extent_dict[tile_name] if mp_procs > 1: project_mp_list.append([ landfire_input_path, landfire_output_path, gdal.GRA_NearestNeighbour, tile_utm_proj, snap_cs, tile_utm_extent, landfire_nodata ]) remap_mp_list.append([ landfire_output_path, landfire_ag_output_path, landfire_ag_list ]) else: drigo.project_raster(landfire_input_path, landfire_output_path, gdal.GRA_NearestNeighbour, tile_utm_osr, snap_cs, tile_utm_extent, landfire_nodata) # Build a mask of LANDFIRE ag lands remap_mask_func(landfire_output_path, landfire_ag_output_path, landfire_ag_list) # Cleanup del landfire_output_path del tile_utm_osr, tile_utm_proj, tile_utm_extent # Project LANDFIRE rasters using multiprocessing if project_mp_list: pool = mp.Pool(mp_procs) results = pool.map(drigo.project_raster_mp, project_mp_list, chunksize=1) pool.close() pool.join() del results, pool if remap_mp_list: pool = mp.Pool(mp_procs) results = pool.map(remap_mask_mp, remap_mp_list, chunksize=1) pool.close() pool.join() del results, pool # Convert field shapefiles to raster if field_flag: logging.info('\nBuild field rasters for each path/row') for tile_name in tile_list: logging.info(' {}'.format(tile_name)) tile_output_ws = os.path.join(field_output_ws, tile_name) # Shapefile paths field_proj_name = ( os.path.splitext(field_output_fmt.format(year))[0] + "_wgs84z{}.shp".format(tile_utm_zone_dict[tile_name])) field_proj_path = os.path.join(tile_output_ws, field_proj_name) field_output_path = os.path.join(tile_output_ws, field_output_fmt.format(year)) if not overwrite_flag and os.path.isfile(field_output_path): logging.debug(' {} already exists, skipping'.format( os.path.basename(field_output_path))) continue # The ogr2ogr spatial query is in the input spatial reference # Project the path/row extent to the field osr/proj field_input_osr = drigo.feature_path_osr(field_input_path) tile_utm_osr = drigo.epsg_osr(32600 + int(tile_utm_zone_dict[tile_name])) # field_input_proj = drigo.osr_proj(field_input_osr) # tile_utm_proj = drigo.osr_proj(tile_utm_osr) field_tile_extent = drigo.project_extent( tile_utm_extent_dict[tile_name], tile_utm_osr, field_input_osr, 30) # Project shapefile to the path/row zone # Clipping requires GDAL to be built with GEOS support subprocess.call( [ 'ogr2ogr', '-t_srs', 'EPSG:326{}'.format( tile_utm_zone_dict[tile_name]), '-f', 'ESRI Shapefile', '-overwrite' ] + ['-spat'] + list(map(str, field_tile_extent)) + ['-clipdst'] + list(map(str, tile_utm_extent_dict[tile_name])) + # ['-clipdst'] + list(map(str, tile_utm_extent_dict[tile_name])) + # ['-clipsrc'] + list(map(str, field_tile_extent)) + # ['-clipsrc'] + list(map(str, field_tile_extent)) + [field_proj_path, field_input_path]) # Convert shapefile to raster field_mem_ds = drigo.polygon_to_raster_ds( field_proj_path, nodata_value=0, burn_value=1, output_osr=tile_utm_osr, output_extent=tile_utm_extent_dict[tile_name]) field_output_driver = drigo.raster_driver(field_output_path) if field_output_path.lower().endswith('.img'): field_output_ds = field_output_driver.CreateCopy( field_output_path, field_mem_ds, 0, ['COMPRESS=YES']) else: field_output_ds = field_output_driver.CreateCopy( field_output_path, field_mem_ds, 0) field_output_ds, field_mem_ds = None, None # Remove field shapefile # try: # remove_file(field_proj_path) # except: # pass # Cleanup del tile_utm_osr, field_tile_extent, field_input_osr # del tile_utm_proj, field_input_proj del field_proj_name, field_proj_path, field_output_path logging.debug('\nScript complete')
def main(grb_ws, ancillary_ws, output_ws, scene_list_path=None, start_dt=None, end_dt=None, times_str='', extent_path=None, output_extent=None, stats_flag=True, overwrite_flag=False): """Extract hourly NLDAS vapour pressure rasters Parameters ---------- grb_ws : str Folder of NLDAS GRB files. ancillary_ws : str Folder of ancillary rasters. output_ws : str Folder of output rasters. scene_list_path : str, optional Landsat scene keep list file path. start_dt : datetime, optional Start date. end_dt : datetime, optional End date. times : str, optional Comma separated values and/or ranges of UTC hours (i.e. "1, 2, 5-8"). Parsed with python_common.parse_int_set(). extent_path : str, optional File path defining the output extent. output_extent : list, optional Decimal degrees values defining output extent. stats_flag : bool, optional If True, compute raster statistics (the default is True). overwrite_flag : bool, optional If True, overwrite existing files (the default is False). Returns ------- None """ logging.info('\nExtracting NLDAS vapour pressure rasters') # input_fmt = 'NLDAS_FORA0125_H.A{:04d}{:02d}{:02d}.{}.002.grb' input_re = re.compile('NLDAS_FORA0125_H.A(?P<YEAR>\d{4})(?P<MONTH>\d{2})' + '(?P<DAY>\d{2}).(?P<TIME>\d{4}).002.grb$') # # Landsat Collection 1 Product ID # landsat_re = re.compile( # '^(?:LT04|LT05|LE07|LC08)_\w{4}_\d{3}\d{3}_(?P<DATE>\d{8})_' # '\w{8}_\w{2}_\w{2}') # Landsat Custom Scene ID landsat_re = re.compile('^(?:LT04|LT05|LE07|LC08)_\d{6}_(?P<DATE>\d{8})') output_folder = 'ea' output_fmt = 'ea_{:04d}{:02d}{:02d}_hourly_nldas.img' # output_fmt = 'ea_{:04d}{:02d}{:02d}_{:04d}_nldas.img' # Only process a specific hours if not times_str: time_list = range(0, 24, 1) else: time_list = list(_utils.parse_int_set(times_str)) time_list = ['{:02d}00'.format(t) for t in time_list] # Assume NLDAS is NAD83 # input_epsg = 'EPSG:4269' # Ancillary raster paths mask_path = os.path.join(ancillary_ws, 'nldas_mask.img') elev_path = os.path.join(ancillary_ws, 'nldas_elev.img') # Process Landsat scene list and start/end input parameters if not scene_list_path and (not start_dt or not end_dt): logging.error( '\nERROR: A Landsat scene list or start/end dates must be set, ' 'exiting\n') return False if scene_list_path is not None and os.path.isfile(scene_list_path): # Build a date list from the Landsat scene keep list file logging.info('\nReading dates from scene keep list file') logging.info(' {}'.format(scene_list_path)) with open(scene_list_path) as input_f: keep_list = input_f.readlines() date_list = sorted([ dt.datetime.strptime(m.group('DATE'), '%Y%m%d').strftime('%Y-%m-%d') for image_id in keep_list for m in [landsat_re.match(image_id)] if m ]) logging.debug(' {}'.format(', '.join(date_list))) else: date_list = [] if start_dt and end_dt: logging.debug(' Start date: {}'.format(start_dt)) logging.debug(' End date: {}'.format(end_dt)) else: start_dt = dt.datetime.strptime(date_list[0], '%Y-%m-%d') end_dt = dt.datetime.strptime(date_list[-1], '%Y-%m-%d') # This allows GDAL to throw Python Exceptions # gdal.UseExceptions() # mem_driver = gdal.GetDriverByName('MEM') # Get the NLDAS spatial reference from the mask raster nldas_ds = gdal.Open(mask_path) nldas_osr = drigo.raster_ds_osr(nldas_ds) nldas_proj = drigo.osr_proj(nldas_osr) nldas_cs = drigo.raster_ds_cellsize(nldas_ds, x_only=True) nldas_extent = drigo.raster_ds_extent(nldas_ds) nldas_geo = nldas_extent.geo(nldas_cs) nldas_x, nldas_y = nldas_extent.origin() nldas_ds = None logging.debug(' Projection: {}'.format(nldas_proj)) logging.debug(' Cellsize: {}'.format(nldas_cs)) logging.debug(' Geo: {}'.format(nldas_geo)) logging.debug(' Extent: {}'.format(nldas_extent)) # Subset data to a smaller extent if output_extent is not None: logging.info('\nComputing subset extent & geo') logging.debug(' Extent: {}'.format(output_extent)) nldas_extent = drigo.Extent(output_extent) nldas_extent.adjust_to_snap('EXPAND', nldas_x, nldas_y, nldas_cs) nldas_geo = nldas_extent.geo(nldas_cs) logging.debug(' Geo: {}'.format(nldas_geo)) logging.debug(' Extent: {}'.format(output_extent)) elif extent_path is not None: logging.info('\nComputing subset extent & geo') if not os.path.isfile(extent_path): logging.error('\nThe extent object does not exist, exiting\n' ' {}'.format(extent_path)) return False elif extent_path.lower().endswith('.shp'): nldas_extent = drigo.feature_path_extent(extent_path) extent_osr = drigo.feature_path_osr(extent_path) extent_cs = None else: nldas_extent = drigo.raster_path_extent(extent_path) extent_osr = drigo.raster_path_osr(extent_path) extent_cs = drigo.raster_path_cellsize(extent_path, x_only=True) nldas_extent = drigo.project_extent(nldas_extent, extent_osr, nldas_osr, extent_cs) nldas_extent.adjust_to_snap('EXPAND', nldas_x, nldas_y, nldas_cs) nldas_geo = nldas_extent.geo(nldas_cs) logging.debug(' Geo: {}'.format(nldas_geo)) logging.debug(' Extent: {}'.format(nldas_extent)) logging.debug('') # Read the NLDAS mask array if present if mask_path and os.path.isfile(mask_path): mask_array, mask_nodata = drigo.raster_to_array( mask_path, mask_extent=nldas_extent, fill_value=0, return_nodata=True) mask_array = mask_array != mask_nodata else: mask_array = None # Read elevation arrays (or subsets?) elev_array = drigo.raster_to_array(elev_path, mask_extent=nldas_extent, return_nodata=False) pair_array = refet.calcs._air_pressure(elev_array) # Build output folder var_ws = os.path.join(output_ws, output_folder) if not os.path.isdir(var_ws): os.makedirs(var_ws) # Each sub folder in the main folder has all imagery for 1 day # The path for each subfolder is the /YYYY/DOY # This approach will process files for target dates # for input_dt in date_range(start_dt, end_dt + dt.timedelta(1)): # logging.info(input_dt.date()) # Iterate all available files and check dates if necessary for root, folders, files in os.walk(grb_ws): root_split = os.path.normpath(root).split(os.sep) # If the year/doy is outside the range, skip if (re.match('\d{4}', root_split[-2]) and re.match('\d{3}', root_split[-1])): root_dt = dt.datetime.strptime( '{}_{}'.format(root_split[-2], root_split[-1]), '%Y_%j') logging.info('{}'.format(root_dt.date())) if ((start_dt is not None and root_dt < start_dt) or (end_dt is not None and root_dt > end_dt)): continue elif date_list and root_dt.date().isoformat() not in date_list: continue # If the year is outside the range, don't search subfolders elif re.match('\d{4}', root_split[-1]): root_year = int(root_split[-1]) logging.info('Year: {}'.format(root_year)) if ((start_dt is not None and root_year < start_dt.year) or (end_dt is not None and root_year > end_dt.year)): folders[:] = [] else: folders[:] = sorted(folders) continue else: continue # Create a single raster for each day with 24 bands # Each time step will be stored in a separate band output_name = output_fmt.format(root_dt.year, root_dt.month, root_dt.day) output_path = os.path.join(var_ws, str(root_dt.year), output_name) logging.debug(' {}'.format(output_path)) if os.path.isfile(output_path): if not overwrite_flag: logging.debug(' File already exists, skipping') continue else: logging.debug(' File already exists, removing existing') os.remove(output_path) logging.debug(' {}'.format(root)) if not os.path.isdir(os.path.dirname(output_path)): os.makedirs(os.path.dirname(output_path)) drigo.build_empty_raster(output_path, band_cnt=24, output_dtype=np.float32, output_proj=nldas_proj, output_cs=nldas_cs, output_extent=nldas_extent, output_fill_flag=True) # Iterate through hourly files for input_name in sorted(files): logging.info(' {}'.format(input_name)) input_path = os.path.join(root, input_name) input_match = input_re.match(input_name) if input_match is None: logging.debug(' Regular expression didn\'t match, skipping') continue input_dt = dt.datetime(int(input_match.group('YEAR')), int(input_match.group('MONTH')), int(input_match.group('DAY'))) input_doy = int(input_dt.strftime('%j')) time_str = input_match.group('TIME') band_num = int(time_str[:2]) + 1 # if start_dt is not None and input_dt < start_dt: # continue # elif end_dt is not None and input_dt > end_dt: # continue # elif date_list and input_dt.date().isoformat() not in date_list: # continue if time_str not in time_list: logging.debug(' Time not in list, skipping') continue logging.debug(' Time: {} {}'.format(input_dt.date(), time_str)) logging.debug(' Band: {}'.format(band_num)) # Determine band numbering/naming input_band_dict = grib_band_names(input_path) # Compute vapour pressure from specific humidity input_ds = gdal.Open(input_path) sph_array = drigo.raster_ds_to_array( input_ds, band=input_band_dict['Specific humidity [kg/kg]'], mask_extent=nldas_extent, return_nodata=False) ea_array = refet.calcs._actual_vapor_pressure(q=sph_array, pair=pair_array) # ea_array = (sph_array * pair_array) / (0.622 + 0.378 * sph_array) # Save the projected array as 32-bit floats drigo.array_to_comp_raster(ea_array.astype(np.float32), output_path, band=band_num) # drigo.block_to_raster( # ea_array.astype(np.float32), output_path, band=band) # drigo.array_to_raster( # ea_array.astype(np.float32), output_path, # output_geo=nldas_geo, output_proj=nldas_proj, # stats_flag=stats_flag) del sph_array input_ds = None if stats_flag: drigo.raster_statistics(output_path) logging.debug('\nScript Complete')
def main(grb_ws, ancillary_ws, output_ws, variables=['pr'], scene_list_path=None, start_dt=None, end_dt=None, times_str='', extent_path=None, output_extent=None, stats_flag=True, overwrite_flag=False): """Extract NLDAS target variable(s) Parameters ---------- grb_ws : str Folder of NLDAS GRB files. ancillary_ws : str Folder of ancillary rasters. output_ws : str Folder of output rasters. variable : list NLDAS variables to download (the default is ['pr']). Choices: 'ppt', 'srad', 'sph', 'tair', tmmn', 'tmmx', 'vs'. keep_list_path : str, optional Landsat scene keep list file path. start_dt : datetime, optional Start date. end_dt : datetime, optional End date. times : str Comma separated values and/or ranges of UTC hours (i.e. "1, 2, 5-8"). Parsed with python_common.parse_int_set(). extent_path : str File path defining the output extent. output_extent : list Decimal degrees values defining output extent. stats_flag : bool, optional If True, compute raster statistics (the default is True). overwrite_flag : bool, optional If True, overwrite existing files (the default is False). Returns ------- None """ logging.info('\nExtract NLDAS target variable(s)') # input_fmt = 'NLDAS_FORA0125_H.A{:04d}{:02d}{:02d}.{}.002.grb' input_re = re.compile( 'NLDAS_FORA0125_H.A(?P<YEAR>\d{4})(?P<MONTH>\d{2})' + '(?P<DAY>\d{2}).(?P<TIME>\d{4}).002.grb$') output_fmt = '{}_{:04d}{:02d}{:02d}_hourly_nldas.img' # output_fmt = '{}_{:04d}{:02d}{:02d}_{:04d}_nldas.img' # # Landsat Collection 1 Product ID # landsat_re = re.compile( # '^(?:LT04|LT05|LE07|LC08)_\w{4}_\d{3}\d{3}_(?P<DATE>\d{8})_' # '\w{8}_\w{2}_\w{2}') # Landsat Custom Scene ID landsat_re = re.compile( '^(?:LT04|LT05|LE07|LC08)_\d{6}_(?P<DATE>\d{8})') # Only process a specific hours if not times_str: time_list = range(0, 24, 1) else: time_list = list(_utils.parse_int_set(times_str)) time_list = ['{:02d}00'.format(t) for t in time_list] # Assume NLDAS is NAD83 # input_epsg = 'EPSG:4269' # NLDAS rasters to extract data_full_list = ['pr', 'srad', 'sph', 'tair', 'tmmn', 'tmmx', 'vs'] if not variables: logging.error('\nERROR: variables parameter is empty\n') sys.exit() elif type(variables) is not list: # DEADBEEF - I could try converting comma separated strings to lists? logging.warning('\nERROR: variables parameter must be a list\n') sys.exit() elif not set(variables).issubset(set(data_full_list)): logging.error('\nERROR: variables parameter is invalid\n {}'.format( variables)) sys.exit() # Ancillary raster paths mask_path = os.path.join(ancillary_ws, 'nldas_mask.img') # Process Landsat scene list and start/end input parameters if not scene_list_path and (not start_dt or not end_dt): logging.error( '\nERROR: A Landsat scene list or start/end dates must be set, ' 'exiting\n') return False if scene_list_path is not None and os.path.isfile(scene_list_path): # Build a date list from the Landsat scene keep list file logging.info('\nReading dates from scene keep list file') logging.info(' {}'.format(scene_list_path)) with open(scene_list_path) as input_f: keep_list = input_f.readlines() date_list = sorted([ dt.datetime.strptime(m.group('DATE'), '%Y%m%d').strftime('%Y-%m-%d') for image_id in keep_list for m in [landsat_re.match(image_id)] if m]) logging.debug(' {}'.format(', '.join(date_list))) else: date_list = [] if start_dt and end_dt: logging.debug(' Start date: {}'.format(start_dt)) logging.debug(' End date: {}'.format(end_dt)) else: start_dt = dt.datetime.strptime(date_list[0], '%Y-%m-%d') end_dt = dt.datetime.strptime(date_list[-1], '%Y-%m-%d') # This allows GDAL to throw Python Exceptions # gdal.UseExceptions() # mem_driver = gdal.GetDriverByName('MEM') # Get the NLDAS spatial reference from the mask raster nldas_ds = gdal.Open(mask_path) nldas_osr = drigo.raster_ds_osr(nldas_ds) nldas_proj = drigo.osr_proj(nldas_osr) nldas_cs = drigo.raster_ds_cellsize(nldas_ds, x_only=True) nldas_extent = drigo.raster_ds_extent(nldas_ds) nldas_geo = nldas_extent.geo(nldas_cs) nldas_x, nldas_y = nldas_extent.origin() nldas_ds = None logging.debug(' Projection: {}'.format(nldas_proj)) logging.debug(' Cellsize: {}'.format(nldas_cs)) logging.debug(' Geo: {}'.format(nldas_geo)) logging.debug(' Extent: {}'.format(nldas_extent)) # Subset data to a smaller extent if output_extent is not None: logging.info('\nComputing subset extent & geo') logging.debug(' Extent: {}'.format(output_extent)) nldas_extent = drigo.Extent(output_extent) nldas_extent.adjust_to_snap('EXPAND', nldas_x, nldas_y, nldas_cs) nldas_geo = nldas_extent.geo(nldas_cs) logging.debug(' Geo: {}'.format(nldas_geo)) logging.debug(' Extent: {}'.format(output_extent)) elif extent_path is not None: logging.info('\nComputing subset extent & geo') if not os.path.isfile(extent_path): logging.error( '\nThe extent object does not exist, exiting\n' ' {}'.format(extent_path)) return False elif extent_path.lower().endswith('.shp'): nldas_extent = drigo.feature_path_extent(extent_path) extent_osr = drigo.feature_path_osr(extent_path) extent_cs = None else: nldas_extent = drigo.raster_path_extent(extent_path) extent_osr = drigo.raster_path_osr(extent_path) extent_cs = drigo.raster_path_cellsize(extent_path, x_only=True) nldas_extent = drigo.project_extent( nldas_extent, extent_osr, nldas_osr, extent_cs) nldas_extent.adjust_to_snap('EXPAND', nldas_x, nldas_y, nldas_cs) nldas_geo = nldas_extent.geo(nldas_cs) logging.debug(' Geo: {}'.format(nldas_geo)) logging.debug(' Extent: {}'.format(nldas_extent)) logging.debug('') # Read the NLDAS mask array if present if mask_path and os.path.isfile(mask_path): mask_array, mask_nodata = drigo.raster_to_array( mask_path, mask_extent=nldas_extent, fill_value=0, return_nodata=True) mask_array = mask_array != mask_nodata else: mask_array = None # NLDAS band name dictionary nldas_band_dict = dict() nldas_band_dict['pr'] = 'Total precipitation [kg/m^2]' nldas_band_dict['srad'] = 'Downward shortwave radiation flux [W/m^2]' nldas_band_dict['sph'] = 'Specific humidity [kg/kg]' nldas_band_dict['tair'] = 'Temperature [C]' nldas_band_dict['tmmn'] = 'Temperature [C]' nldas_band_dict['tmmx'] = 'Temperature [C]' nldas_band_dict['vs'] = [ 'u-component of wind [m/s]', 'v-component of wind [m/s]'] # NLDAS band name dictionary # nldas_band_dict = dict() # nldas_band_dict['pr'] = 'precipitation_amount' # nldas_band_dict['srad'] = 'surface_downwelling_shortwave_flux_in_air' # nldas_band_dict['sph'] = 'specific_humidity' # nldas_band_dict['tmmn'] = 'air_temperature' # nldas_band_dict['tmmx'] = 'air_temperature' # nldas_band_dict['vs'] = 'wind_speed' # NLDAS band name dictionary (EarthEngine keys, GRID_ELEMENT values) # nldas_band_dict = dict() # nldas_band_dict['total_precipitation'] = 'Total precipitation [kg/m^2]' # nldas_band_dict['shortwave_radiation'] = 'Downward shortwave radiation flux [W/m^2]' # nldas_band_dict['specific_humidity'] = 'Specific humidity [kg/kg]' # nldas_band_dict['pressure'] = 'Pressure [Pa]' # nldas_band_dict['temperature'] = 'Temperature [C]' # nldas_band_dict['wind_u'] = 'u-component of wind [m/s]' # nldas_band_dict['wind_v'] = 'v-component of wind [m/s]' # Process each variable logging.info('\nReading NLDAS GRIBs') for input_var in variables: logging.info("Variable: {}".format(input_var)) # Build output folder var_ws = os.path.join(output_ws, input_var) if not os.path.isdir(var_ws): os.makedirs(var_ws) # Each sub folder in the main folde has all imagery for 1 day # The path for each subfolder is the /YYYY/DOY # This approach will process files for target dates # for input_dt in date_range(start_dt, end_dt + dt.timedelta(1)): # logging.info(input_dt.date()) # Iterate all available files and check dates if necessary for root, folders, files in os.walk(grb_ws): root_split = os.path.normpath(root).split(os.sep) # If the year/doy is outside the range, skip if (re.match('\d{4}', root_split[-2]) and re.match('\d{3}', root_split[-1])): root_dt = dt.datetime.strptime('{}_{}'.format( root_split[-2], root_split[-1]), '%Y_%j') logging.info('{}-{:02d}-{:02d}'.format( root_dt.year, root_dt.month, root_dt.day)) if ((start_dt is not None and root_dt < start_dt) or (end_dt is not None and root_dt > end_dt)): continue elif date_list and root_dt.date().isoformat() not in date_list: continue # If the year is outside the range, don't search subfolders elif re.match('\d{4}', root_split[-1]): root_year = int(root_split[-1]) logging.info('Year: {}'.format(root_year)) if ((start_dt is not None and root_year < start_dt.year) or (end_dt is not None and root_year > end_dt.year)): folders[:] = [] else: folders[:] = sorted(folders) continue else: continue # Create a single raster for each day with 24 bands # Each time step will be stored in a separate band output_name = output_fmt.format( input_var, root_dt.year, root_dt.month, root_dt.day) output_path = os.path.join( var_ws, str(root_dt.year), output_name) logging.debug(' {}'.format(output_path)) if os.path.isfile(output_path): if not overwrite_flag: logging.debug(' File already exists, skipping') continue else: logging.debug(' File already exists, removing existing') os.remove(output_path) logging.debug(' {}'.format(root)) if not os.path.isdir(os.path.dirname(output_path)): os.makedirs(os.path.dirname(output_path)) drigo.build_empty_raster( output_path, band_cnt=24, output_dtype=np.float32, output_proj=nldas_proj, output_cs=nldas_cs, output_extent=nldas_extent, output_fill_flag=True) # Iterate through hourly files for input_name in sorted(files): logging.info(' {}'.format(input_name)) input_path = os.path.join(root, input_name) input_match = input_re.match(input_name) if input_match is None: logging.debug( ' Regular expression didn\'t match, skipping') continue input_dt = dt.datetime( int(input_match.group('YEAR')), int(input_match.group('MONTH')), int(input_match.group('DAY'))) time_str = input_match.group('TIME') band_num = int(time_str[:2]) + 1 # if start_dt is not None and input_dt < start_dt: # continue # elif end_dt is not None and input_dt > end_dt: # continue # elif date_list and input_dt.date().isoformat() not in date_list: # continue if time_str not in time_list: logging.debug(' Time not in list, skipping') continue logging.debug(' Time: {} {}'.format( input_dt.date(), time_str)) logging.debug(' Band: {}'.format(band_num)) # Determine band numbering/naming input_band_dict = grib_band_names(input_path) # Extract array and save input_ds = gdal.Open(input_path) # Convert Kelvin to Celsius (old NLDAS files were in K i think) if input_var in ['tair', 'tmmx', 'tmmn']: # Temperature should be in C for et_common.refet_hourly_func() if 'Temperature [K]' in input_band_dict.keys(): temp_band_units = 'K' output_array = drigo.raster_ds_to_array( input_ds, band=input_band_dict['Temperature [K]'], mask_extent=nldas_extent, return_nodata=False) elif 'Temperature [C]' in input_band_dict.keys(): temp_band_units = 'C' output_array = drigo.raster_ds_to_array( input_ds, band=input_band_dict['Temperature [C]'], mask_extent=nldas_extent, return_nodata=False) else: logging.error('Unknown Temperature units, skipping') logging.error(' {}'.format(input_band_dict.keys())) continue # DEADBEEF - Having issue with T appearing to be C but labeled as K # Try to determine temperature units from values temp_mean = float(np.nanmean(output_array)) temp_units_dict = {20: 'C', 293: 'K'} temp_array_units = temp_units_dict[ min(temp_units_dict, key=lambda x:abs(x - temp_mean))] if temp_array_units == 'K' and temp_band_units == 'K': logging.debug(' Converting temperature from K to C') output_array -= 273.15 elif temp_array_units == 'C' and temp_band_units == 'C': pass elif temp_array_units == 'C' and temp_band_units == 'K': logging.debug( (' Temperature units are K in the GRB band name, ' + 'but values appear to be C\n Mean temperature: {:.2f}\n' + ' Values will NOT be adjusted').format(temp_mean)) elif temp_array_units == 'K' and temp_band_units == 'C': logging.debug( (' Temperature units are C in the GRB band name, ' + 'but values appear to be K\n Mean temperature: {:.2f}\n' + ' Values will be adjusted from K to C').format(temp_mean)) output_array -= 273.15 # Compute wind speed from vectors elif input_var == 'vs': wind_u_array = drigo.raster_ds_to_array( input_ds, band=input_band_dict['u-component of wind [m/s]'], mask_extent=nldas_extent, return_nodata=False) wind_v_array = drigo.raster_ds_to_array( input_ds, band=input_band_dict['v-component of wind [m/s]'], mask_extent=nldas_extent, return_nodata=False) output_array = np.sqrt( wind_u_array ** 2 + wind_v_array ** 2) # Read all other variables directly else: output_array = drigo.raster_ds_to_array( input_ds, band=input_band_dict[nldas_band_dict[input_var]], mask_extent=nldas_extent, return_nodata=False) # Save the projected array as 32-bit floats drigo.array_to_comp_raster( output_array.astype(np.float32), output_path, band=band_num) # drigo.block_to_raster( # ea_array.astype(np.float32), output_path, band=band) # drigo.array_to_raster( # output_array.astype(np.float32), output_path, # output_geo=nldas_geo, output_proj=nldas_proj, # stats_flag=stats_flag) del output_array input_ds = None if stats_flag: drigo.raster_statistics(output_path) logging.debug('\nScript Complete')
def main(netcdf_ws=os.getcwd(), ancillary_ws=os.getcwd(), output_ws=os.getcwd(), variables=['prcp'], start_date=None, end_date=None, extent_path=None, output_extent=None, stats_flag=True, overwrite_flag=False): """Extract DAYMET temperature Parameters ---------- netcdf_ws : str Folder of DAYMET netcdf files. ancillary_ws : str Folder of ancillary rasters. output_ws : str Folder of output rasters. variables : list, optional DAYMET variables to download ('prcp', 'srad', 'vp', 'tmmn', 'tmmx'). Set as ['all'] to process all variables. start_date : str, optional ISO format date (YYYY-MM-DD). end_date : str, optional ISO format date (YYYY-MM-DD). extent_path : str, optional File path defining the output extent. output_extent : list, optional Decimal degrees values defining output extent. stats_flag : bool, optional If True, compute raster statistics (the default is True). overwrite_flag : bool, optional If True, overwrite existing files (the default is False). Returns ------- None """ logging.info('\nExtracting DAYMET variables') # If a date is not set, process 2015 try: start_dt = dt.datetime.strptime(start_date, '%Y-%m-%d') logging.debug(' Start date: {}'.format(start_dt)) except: start_dt = dt.datetime(2015, 1, 1) logging.info(' Start date: {}'.format(start_dt)) try: end_dt = dt.datetime.strptime(end_date, '%Y-%m-%d') logging.debug(' End date: {}'.format(end_dt)) except: end_dt = dt.datetime(2015, 12, 31) logging.info(' End date: {}'.format(end_dt)) # Get DAYMET spatial reference from an ancillary raster mask_raster = os.path.join(ancillary_ws, 'daymet_mask.img') daymet_re = re.compile('daymet_v3_(?P<VAR>\w+)_(?P<YEAR>\d{4})_na.nc4$') # DAYMET rasters to extract var_full_list = ['prcp', 'srad', 'vp', 'tmmn', 'tmmx'] if not variables: logging.error('\nERROR: variables parameter is empty\n') sys.exit() elif type(variables) is not list: # DEADBEEF - I could try converting comma separated strings to lists? logging.warning('\nERROR: variables parameter must be a list\n') sys.exit() elif 'all' in variables: logging.error('\nDownloading all variables\n {}'.format( ','.join(var_full_list))) var_list = var_full_list[:] elif not set(variables).issubset(set(var_full_list)): logging.error( '\nERROR: variables parameter is invalid\n {}'.format(variables)) sys.exit() else: var_list = variables[:] # DAYMET band name dictionary # daymet_band_dict = dict() # daymet_band_dict['prcp'] = 'precipitation_amount' # daymet_band_dict['srad'] = 'surface_downwelling_shortwave_flux_in_air' # daymet_band_dict['sph'] = 'specific_humidity' # daymet_band_dict['tmin'] = 'air_temperature' # daymet_band_dict['tmax'] = 'air_temperature' # Get extent/geo from mask raster daymet_ds = gdal.Open(mask_raster) daymet_osr = drigo.raster_ds_osr(daymet_ds) daymet_proj = drigo.osr_proj(daymet_osr) daymet_cs = drigo.raster_ds_cellsize(daymet_ds, x_only=True) daymet_extent = drigo.raster_ds_extent(daymet_ds) daymet_geo = daymet_extent.geo(daymet_cs) daymet_x, daymet_y = daymet_extent.origin() daymet_ds = None logging.debug(' Projection: {}'.format(daymet_proj)) logging.debug(' Cellsize: {}'.format(daymet_cs)) logging.debug(' Geo: {}'.format(daymet_geo)) logging.debug(' Extent: {}'.format(daymet_extent)) logging.debug(' Origin: {} {}'.format(daymet_x, daymet_y)) # Subset data to a smaller extent if output_extent is not None: logging.info('\nComputing subset extent & geo') logging.debug(' Extent: {}'.format(output_extent)) # Assume input extent is in decimal degrees output_extent = drigo.project_extent(drigo.Extent(output_extent), drigo.epsg_osr(4326), daymet_osr, 0.001) output_extent = drigo.intersect_extents([daymet_extent, output_extent]) output_extent.adjust_to_snap('EXPAND', daymet_x, daymet_y, daymet_cs) output_geo = output_extent.geo(daymet_cs) logging.debug(' Geo: {}'.format(output_geo)) logging.debug(' Extent: {}'.format(output_extent)) elif extent_path is not None: logging.info('\nComputing subset extent & geo') if extent_path.lower().endswith('.shp'): output_extent = drigo.feature_path_extent(extent_path) extent_osr = drigo.feature_path_osr(extent_path) extent_cs = None else: output_extent = drigo.raster_path_extent(extent_path) extent_osr = drigo.raster_path_osr(extent_path) extent_cs = drigo.raster_path_cellsize(extent_path, x_only=True) output_extent = drigo.project_extent(output_extent, extent_osr, daymet_osr, extent_cs) output_extent = drigo.intersect_extents([daymet_extent, output_extent]) output_extent.adjust_to_snap('EXPAND', daymet_x, daymet_y, daymet_cs) output_geo = output_extent.geo(daymet_cs) logging.debug(' Geo: {}'.format(output_geo)) logging.debug(' Extent: {}'.format(output_extent)) else: output_extent = daymet_extent.copy() output_geo = daymet_geo[:] # output_shape = output_extent.shape(cs=daymet_cs) xi, yi = drigo.array_geo_offsets(daymet_geo, output_geo, daymet_cs) output_rows, output_cols = output_extent.shape(daymet_cs) logging.debug(' Shape: {} {}'.format(output_rows, output_cols)) logging.debug(' Offsets: {} {} (x y)'.format(xi, yi)) # Process each variable for input_var in var_list: logging.info("\nVariable: {}".format(input_var)) # Rename variables to match cimis if input_var == 'prcp': output_var = 'ppt' else: output_var = input_var # Build output folder var_ws = os.path.join(output_ws, output_var) if not os.path.isdir(var_ws): os.makedirs(var_ws) # Process each file in the input workspace for input_name in sorted(os.listdir(netcdf_ws)): logging.debug("{}".format(input_name)) input_match = daymet_re.match(input_name) if not input_match: logging.debug(' Regular expression didn\'t match, skipping') continue elif input_match.group('VAR') != input_var: logging.debug(' Variable didn\'t match, skipping') continue year_str = input_match.group('YEAR') logging.info(" Year: {}".format(year_str)) year_int = int(year_str) year_days = int(dt.datetime(year_int, 12, 31).strftime('%j')) if start_dt is not None and year_int < start_dt.year: logging.debug(' Before start date, skipping') continue elif end_dt is not None and year_int > end_dt.year: logging.debug(' After end date, skipping') continue # Build input file path input_raster = os.path.join(netcdf_ws, input_name) # if not os.path.isfile(input_raster): # logging.debug( # ' Input raster doesn\'t exist, skipping {}'.format( # input_raster)) # continue # Build output folder output_year_ws = os.path.join(var_ws, year_str) if not os.path.isdir(output_year_ws): os.makedirs(output_year_ws) # Read in the DAYMET NetCDF file input_nc_f = netCDF4.Dataset(input_raster, 'r') # logging.debug(input_nc_f.variables) # Check all valid dates in the year year_dates = _utils.date_range(dt.datetime(year_int, 1, 1), dt.datetime(year_int + 1, 1, 1)) for date_dt in year_dates: if start_dt is not None and date_dt < start_dt: logging.debug(' {} - before start date, skipping'.format( date_dt.date())) continue elif end_dt is not None and date_dt > end_dt: logging.debug(' {} - after end date, skipping'.format( date_dt.date())) continue else: logging.info(' {}'.format(date_dt.date())) output_path = os.path.join( output_year_ws, '{}_{}_daymet.img'.format(output_var, date_dt.strftime('%Y%m%d'))) if os.path.isfile(output_path): logging.debug(' {}'.format(output_path)) if not overwrite_flag: logging.debug(' File already exists, skipping') continue else: logging.debug( ' File already exists, removing existing') os.remove(output_path) doy = int(date_dt.strftime('%j')) doy_i = range(1, year_days + 1).index(doy) # Arrays are being read as masked array with a fill value of -9999 # Convert to basic numpy array arrays with nan values try: input_ma = input_nc_f.variables[input_var][doy_i, yi:yi + output_rows, xi:xi + output_cols] except IndexError: logging.info(' date not in netcdf, skipping') continue input_nodata = float(input_ma.fill_value) output_array = input_ma.data.astype(np.float32) output_array[output_array == input_nodata] = np.nan # Convert Kelvin to Celsius if input_var in ['tmax', 'tmin']: output_array -= 273.15 # Save the array as 32-bit floats drigo.array_to_raster(output_array.astype(np.float32), output_path, output_geo=output_geo, output_proj=daymet_proj, stats_flag=stats_flag) del input_ma, output_array input_nc_f.close() del input_nc_f logging.debug('\nScript Complete')
def main(netcdf_ws=os.getcwd(), ancillary_ws=os.getcwd(), output_ws=os.getcwd(), etr_flag=False, eto_flag=False, start_date=None, end_date=None, extent_path=None, output_extent=None, stats_flag=True, overwrite_flag=False): """Compute daily ETr/ETo from GRIDMET data Parameters ---------- netcdf_ws : str Folder of GRIDMET netcdf files. ancillary_ws : str Folder of ancillary rasters. output_ws : str Folder of output rasters. etr_flag : str, optional If True, compute alfalfa reference ET (ETr) (the default is False). eto_flag : str, optional If True, compute grass reference ET (ETo) (the default is False). start_date : str, optional ISO format date (YYYY-MM-DD). end_date : str, optional ISO format date (YYYY-MM-DD). extent_path : str, optional File path defining the output extent. output_extent : list, optional Decimal degrees values defining output extent. stats_flag : bool, optional If True, compute raster statistics (the default is True). overwrite_flag : bool, optional If True, overwrite existing files (the default is False). Returns ------- None """ logging.info('\nComputing GRIDMET ETo/ETr') np.seterr(invalid='ignore') # Compute ETr and/or ETo if not etr_flag and not eto_flag: logging.info(' ETo/ETr flag(s) not set, defaulting to ETr') etr_flag = True # If a date is not set, process 2017 try: start_dt = dt.datetime.strptime(start_date, '%Y-%m-%d') logging.debug(' Start date: {}'.format(start_dt)) except: start_dt = dt.datetime(2017, 1, 1) logging.info(' Start date: {}'.format(start_dt)) try: end_dt = dt.datetime.strptime(end_date, '%Y-%m-%d') logging.debug(' End date: {}'.format(end_dt)) except: end_dt = dt.datetime(2017, 12, 31) logging.info(' End date: {}'.format(end_dt)) # Save GRIDMET lat, lon, and elevation arrays elev_raster = os.path.join(ancillary_ws, 'gridmet_elev.img') lat_raster = os.path.join(ancillary_ws, 'gridmet_lat.img') # Wind speed is measured at 2m zw = 10 etr_fmt = 'etr_{}_daily_gridmet.img' eto_fmt = 'eto_{}_daily_gridmet.img' # gridmet_re = re.compile('(?P<VAR>\w+)_(?P<YEAR>\d{4}).nc') # GRIDMET band name dictionary gridmet_band_dict = dict() gridmet_band_dict['eto'] = 'potential_evapotranspiration' gridmet_band_dict['etr'] = 'potential_evapotranspiration' # gridmet_band_dict['pr'] = 'precipitation_amount' # gridmet_band_dict['srad'] = 'surface_downwelling_shortwave_flux_in_air' # gridmet_band_dict['sph'] = 'specific_humidity' # gridmet_band_dict['tmmn'] = 'air_temperature' # gridmet_band_dict['tmmx'] = 'air_temperature' # gridmet_band_dict['vs'] = 'wind_speed' # Get extent/geo from elevation raster gridmet_ds = gdal.Open(elev_raster) gridmet_osr = drigo.raster_ds_osr(gridmet_ds) gridmet_proj = drigo.osr_proj(gridmet_osr) gridmet_cs = drigo.raster_ds_cellsize(gridmet_ds, x_only=True) gridmet_extent = drigo.raster_ds_extent(gridmet_ds) gridmet_full_geo = gridmet_extent.geo(gridmet_cs) gridmet_x, gridmet_y = gridmet_extent.origin() gridmet_ds = None logging.debug(' Projection: {}'.format(gridmet_proj)) logging.debug(' Cellsize: {}'.format(gridmet_cs)) logging.debug(' Geo: {}'.format(gridmet_full_geo)) logging.debug(' Extent: {}'.format(gridmet_extent)) # Subset data to a smaller extent if output_extent is not None: logging.info('\nComputing subset extent & geo') logging.debug(' Extent: {}'.format(output_extent)) gridmet_extent = drigo.Extent(output_extent) gridmet_extent.adjust_to_snap('EXPAND', gridmet_x, gridmet_y, gridmet_cs) gridmet_geo = gridmet_extent.geo(gridmet_cs) logging.debug(' Geo: {}'.format(gridmet_geo)) logging.debug(' Extent: {}'.format(output_extent)) elif extent_path is not None: logging.info('\nComputing subset extent & geo') if not os.path.isfile(extent_path): logging.error('\nThe extent object not exist, exiting\n' ' {}'.format(extent_path)) return False elif extent_path.lower().endswith('.shp'): gridmet_extent = drigo.feature_path_extent(extent_path) # DEADBEEF - Consider moving call into a try/except block # logging.error( # '\nThere was a problem reading the extent object' # '\nThe file path may be invalid or the file may not exist ' # 'or be corrupt.\n{}'.format(extent_path)) extent_osr = drigo.feature_path_osr(extent_path) extent_cs = None else: gridmet_extent = drigo.raster_path_extent(extent_path) extent_osr = drigo.raster_path_osr(extent_path) extent_cs = drigo.raster_path_cellsize(extent_path, x_only=True) gridmet_extent = drigo.project_extent(gridmet_extent, extent_osr, gridmet_osr, extent_cs) gridmet_extent.adjust_to_snap('EXPAND', gridmet_x, gridmet_y, gridmet_cs) gridmet_geo = gridmet_extent.geo(gridmet_cs) logging.debug(' Geo: {}'.format(gridmet_geo)) logging.debug(' Extent: {}'.format(gridmet_extent)) else: gridmet_geo = gridmet_full_geo # Get indices for slicing/clipping input arrays g_i, g_j = drigo.array_geo_offsets(gridmet_full_geo, gridmet_geo, cs=gridmet_cs) g_rows, g_cols = gridmet_extent.shape(cs=gridmet_cs) # Flip row indices since GRIDMET arrays are flipped up/down # Hard coding GRIDMET row count for now row_a, row_b = 585 - (g_j + g_rows), 585 - g_j, col_a, col_b = g_i, g_i + g_cols # Read the elevation and latitude arrays elev_array = drigo.raster_to_array(elev_raster, mask_extent=gridmet_extent, return_nodata=False) lat_array = drigo.raster_to_array(lat_raster, mask_extent=gridmet_extent, return_nodata=False) lat_array *= math.pi / 180 # Check elevation and latitude arrays if np.all(np.isnan(elev_array)): logging.error('\nERROR: The elevation array is all nodata, exiting\n') sys.exit() elif np.all(np.isnan(lat_array)): logging.error('\nERROR: The latitude array is all nodata, exiting\n') sys.exit() # Build output folder etr_ws = os.path.join(output_ws, 'etr') eto_ws = os.path.join(output_ws, 'eto') if etr_flag and not os.path.isdir(etr_ws): os.makedirs(etr_ws) if eto_flag and not os.path.isdir(eto_ws): os.makedirs(eto_ws) # By default, try to process all possible years if start_dt.year == end_dt.year: year_list = [str(start_dt.year)] year_list = sorted(map(str, range((start_dt.year), (end_dt.year + 1)))) # Process each year separately for year_str in year_list: logging.info("\nYear: {}".format(year_str)) year_int = int(year_str) year_days = int(dt.datetime(year_int, 12, 31).strftime('%j')) if start_dt is not None and year_int < start_dt.year: logging.debug(' Before start date, skipping') continue elif end_dt is not None and year_int > end_dt.year: logging.debug(' After end date, skipping') continue # Build input file path eto_path = os.path.join(netcdf_ws, 'eto_{}.nc'.format(year_str)) etr_path = os.path.join(netcdf_ws, 'etr_{}.nc'.format(year_str)) if eto_flag and not os.path.isfile(eto_path): logging.debug( ' ETo NetCDF doesn\'t exist\n {}'.format(eto_path)) continue if etr_flag and not os.path.isfile(etr_path): logging.debug( ' ETr NetCDF doesn\'t exist\n {}'.format(etr_path)) continue # Create a single raster for each year with 365 bands # Each day will be stored in a separate band etr_raster = os.path.join(etr_ws, etr_fmt.format(year_str)) eto_raster = os.path.join(eto_ws, eto_fmt.format(year_str)) if etr_flag and (overwrite_flag or not os.path.isfile(etr_raster)): logging.debug(' {}'.format(etr_raster)) drigo.build_empty_raster(etr_raster, band_cnt=366, output_dtype=np.float32, output_proj=gridmet_proj, output_cs=gridmet_cs, output_extent=gridmet_extent, output_fill_flag=True) if eto_flag and (overwrite_flag or not os.path.isfile(eto_raster)): logging.debug(' {}'.format(eto_raster)) drigo.build_empty_raster(eto_raster, band_cnt=366, output_dtype=np.float32, output_proj=gridmet_proj, output_cs=gridmet_cs, output_extent=gridmet_extent, output_fill_flag=True) # DEADBEEF - Need to find a way to test if both of these conditionals # did not pass and pass logging debug message to user # Read in the GRIDMET NetCDF file # Immediately clip input arrays to save memory # Transpose arrays back to row x col logging.info(' Reading NetCDFs into memory') if eto_flag: logging.debug(" {}".format(eto_path)) eto_nc_f = netCDF4.Dataset(eto_path, 'r') eto_nc = eto_nc_f.variables[ gridmet_band_dict['eto']][:, row_a:row_b, col_a:col_b].copy() eto_nc = np.flip(eto_nc, 1) eto_nc_f.close() del eto_nc_f if etr_flag: logging.debug(" {}".format(etr_path)) etr_nc_f = netCDF4.Dataset(etr_path, 'r') etr_nc = etr_nc_f.variables[ gridmet_band_dict['etr']][:, row_a:row_b, col_a:col_b].copy() etr_nc = np.flip(etr_nc, 1) etr_nc_f.close() del etr_nc_f # A numpy array is returned when slicing a masked array # if there are no masked pixels # This is a hack to force the numpy array back to a masked array # For now assume all arrays need to be converted if eto_flag and type(eto_nc) != np.ma.core.MaskedArray: eto_nc = np.ma.core.MaskedArray(eto_nc, np.zeros(eto_nc.shape, dtype=bool)) if etr_flag and type(etr_nc) != np.ma.core.MaskedArray: etr_nc = np.ma.core.MaskedArray(etr_nc, np.zeros(etr_nc.shape, dtype=bool)) # Check all valid dates in the year year_dates = _utils.date_range(dt.datetime(year_int, 1, 1), dt.datetime(year_int + 1, 1, 1)) for date_dt in year_dates: if start_dt is not None and date_dt < start_dt: logging.debug(' {} - before start date, skipping'.format( date_dt.date())) continue elif end_dt is not None and date_dt > end_dt: logging.debug(' {} - after end date, skipping'.format( date_dt.date())) continue else: logging.info(' {}'.format(date_dt.date())) doy = int(date_dt.strftime('%j')) doy_i = range(1, year_days + 1).index(doy) if eto_flag: # Arrays are being read as masked array with a fill value of -9999 # Convert to basic numpy array arrays with nan values try: eto_ma = eto_nc[doy_i, :, :] except IndexError: logging.info(' date not in netcdf, skipping') continue eto_array = eto_ma.data.astype(np.float32) eto_nodata = float(eto_ma.fill_value) eto_array[eto_array == eto_nodata] = np.nan # Since inputs are netcdf, need to create GDAL raster # datasets in order to use gdal_common functions # Create an in memory dataset of the full ETo array eto_ds = drigo.array_to_mem_ds(eto_array, output_geo=gridmet_geo, output_proj=gridmet_proj) # Then extract the subset from the in memory dataset eto_array = drigo.raster_ds_to_array( eto_ds, 1, mask_extent=gridmet_extent, return_nodata=False) # Save drigo.array_to_comp_raster(eto_array.astype(np.float32), eto_raster, band=doy, stats_flag=False) # drigo.array_to_raster( # eto_array.astype(np.float32), eto_raster, # output_geo=gridmet_geo, output_proj=gridmet_proj, # stats_flag=stats_flag) # Cleanup del eto_ds, eto_array if etr_flag: try: etr_ma = etr_nc[doy_i, :, :] except IndexError: logging.info(' date not in netcdf, skipping') continue etr_array = etr_ma.data.astype(np.float32) etr_nodata = float(etr_ma.fill_value) etr_array[etr_array == etr_nodata] = np.nan etr_ds = drigo.array_to_mem_ds(etr_array, output_geo=gridmet_geo, output_proj=gridmet_proj) etr_array = drigo.raster_ds_to_array( etr_ds, 1, mask_extent=gridmet_extent, return_nodata=False) drigo.array_to_comp_raster(etr_array.astype(np.float32), etr_raster, band=doy, stats_flag=False) # drigo.array_to_raster( # etr_array.astype(np.float32), etr_raster, # output_geo=gridmet_geo, output_proj=gridmet_proj, # stats_flag=stats_flag) del etr_ds, etr_array if stats_flag and eto_flag: drigo.raster_statistics(eto_raster) if stats_flag and etr_flag: drigo.raster_statistics(etr_raster) # DEADBEEF - Code for computing ETo/ETr from the component variables # # Build input file path # tmin_path = os.path.join(netcdf_ws, 'tmmn_{}.nc'.format(year_str)) # tmax_path = os.path.join(netcdf_ws, 'tmmx_{}.nc'.format(year_str)) # sph_path = os.path.join(netcdf_ws, 'sph_{}.nc'.format(year_str)) # rs_path = os.path.join(netcdf_ws, 'srad_{}.nc'.format(year_str)) # wind_path = os.path.join(netcdf_ws, 'vs_{}.nc'.format(year_str)) # # Check that all input files are present # missing_flag = False # for input_path in [tmin_path, tmax_path, sph_path, # rs_path, wind_path]: # if not os.path.isfile(input_path): # logging.debug(' Input NetCDF doesn\'t exist\n {}'.format( # input_path)) # missing_flag = True # if missing_flag: # logging.debug(' skipping') # continue # # # Create a single raster for each year with 365 bands # # Each day will be stored in a separate band # etr_raster = os.path.join(etr_ws, etr_fmt.format(year_str)) # eto_raster = os.path.join(eto_ws, eto_fmt.format(year_str)) # if etr_flag and (overwrite_flag or not os.path.isfile(etr_raster)): # logging.debug(' {}'.format(etr_raster)) # drigo.build_empty_raster( # etr_raster, band_cnt=366, output_dtype=np.float32, # output_proj=gridmet_proj, output_cs=gridmet_cs, # output_extent=gridmet_extent, output_fill_flag=True) # if eto_flag and (overwrite_flag or not os.path.isfile(eto_raster)): # logging.debug(' {}'.format(eto_raster)) # drigo.build_empty_raster( # eto_raster, band_cnt=366, output_dtype=np.float32, # output_proj=gridmet_proj, output_cs=gridmet_cs, # output_extent=gridmet_extent, output_fill_flag=True) # # DEADBEEF - Need to find a way to test if both of these conditionals # # did not pass and pass logging debug message to user # # # Read in the GRIDMET NetCDF file # # Immediately clip input arrays to save memory # # Transpose arrays back to row x col # logging.info(' Reading NetCDFs into memory') # logging.debug(" {}".format(tmin_path)) # tmin_nc_f = netCDF4.Dataset(tmin_path, 'r') # tmin_nc = tmin_nc_f.variables[gridmet_band_dict['tmmn']][ # :, row_a: row_b, col_a: col_b].copy() # tmin_nc = np.flip(tmin_nc, 1) # tmin_nc_f.close() # del tmin_nc_f # # logging.debug(" {}".format(tmax_path)) # tmax_nc_f = netCDF4.Dataset(tmax_path, 'r') # tmax_nc = tmax_nc_f.variables[gridmet_band_dict['tmmx']][ # :, row_a: row_b, col_a: col_b].copy() # tmax_nc = np.flip(tmax_nc, 1) # tmax_nc_f.close() # del tmax_nc_f # # logging.debug(" {}".format(sph_path)) # sph_nc_f = netCDF4.Dataset(sph_path, 'r') # sph_nc = sph_nc_f.variables[gridmet_band_dict['sph']][ # :, row_a: row_b, col_a: col_b].copy() # sph_nc = np.flip(sph_nc, 1) # sph_nc_f.close() # del sph_nc_f # # logging.debug(" {}".format(rs_path)) # rs_nc_f = netCDF4.Dataset(rs_path, 'r') # rs_nc = rs_nc_f.variables[gridmet_band_dict['srad']][ # :, row_a: row_b, col_a: col_b].copy() # rs_nc = np.flip(rs_nc, 1) # rs_nc_f.close() # del rs_nc_f # # logging.debug(" {}".format(wind_path)) # wind_nc_f = netCDF4.Dataset(wind_path, 'r') # wind_nc = wind_nc_f.variables[gridmet_band_dict['vs']][ # :, row_a: row_b, col_a: col_b].copy() # wind_nc = np.flip(wind_nc, 1) # wind_nc_f.close() # del wind_nc_f # # # A numpy array is returned when slicing a masked array # # if there are no masked pixels # # This is a hack to force the numpy array back to a masked array # # For now assume all arrays need to be converted # if type(tmax_nc) != np.ma.core.MaskedArray: # tmax_nc = np.ma.core.MaskedArray( # tmax_nc, np.zeros(tmax_nc.shape, dtype=bool)) # if type(sph_nc) != np.ma.core.MaskedArray: # sph_nc = np.ma.core.MaskedArray( # sph_nc, np.zeros(sph_nc.shape, dtype=bool)) # if type(rs_nc) != np.ma.core.MaskedArray: # rs_nc = np.ma.core.MaskedArray( # rs_nc, np.zeros(rs_nc.shape, dtype=bool)) # if type(wind_nc) != np.ma.core.MaskedArray: # wind_nc = np.ma.core.MaskedArray( # wind_nc, np.zeros(wind_nc.shape, dtype=bool)) # # # Check all valid dates in the year # year_dates = _utils.date_range( # dt.datetime(year_int, 1, 1), dt.datetime(year_int + 1, 1, 1)) # for date_dt in year_dates: # if start_dt is not None and date_dt < start_dt: # logging.debug(' {} - before start date, skipping'.format( # date_dt.date())) # continue # elif end_dt is not None and date_dt > end_dt: # logging.debug(' {} - after end date, skipping'.format( # date_dt.date())) # continue # else: # logging.info(' {}'.format(date_dt.date())) # # doy = int(date_dt.strftime('%j')) # doy_i = range(1, year_days + 1).index(doy) # # # Arrays are being read as masked array with a fill value of -9999 # # Convert to basic numpy array arrays with nan values # try: # tmin_ma = tmin_nc[doy_i, :, :] # except IndexError: # logging.info(' date not in netcdf, skipping') # continue # tmin_array = tmin_ma.data.astype(np.float32) # tmin_nodata = float(tmin_ma.fill_value) # tmin_array[tmin_array == tmin_nodata] = np.nan # # try: # tmax_ma = tmax_nc[doy_i, :, :] # except IndexError: # logging.info(' date not in netcdf, skipping') # continue # tmax_array = tmax_ma.data.astype(np.float32) # tmax_nodata = float(tmax_ma.fill_value) # tmax_array[tmax_array == tmax_nodata] = np.nan # # try: # sph_ma = sph_nc[doy_i, :, :] # except IndexError: # logging.info(' date not in netcdf, skipping') # continue # sph_array = sph_ma.data.astype(np.float32) # sph_nodata = float(sph_ma.fill_value) # sph_array[sph_array == sph_nodata] = np.nan # # try: # rs_ma = rs_nc[doy_i, :, :] # except IndexError: # logging.info(' date not in netcdf, skipping') # continue # rs_array = rs_ma.data.astype(np.float32) # rs_nodata = float(rs_ma.fill_value) # rs_array[rs_array == rs_nodata] = np.nan # # try: # wind_ma = wind_nc[doy_i, :, :] # except IndexError: # logging.info(' date not in netcdf, skipping') # continue # wind_array = wind_ma.data.astype(np.float32) # wind_nodata = float(wind_ma.fill_value) # wind_array[wind_array == wind_nodata] = np.nan # del tmin_ma, tmax_ma, sph_ma, rs_ma, wind_ma # # # Since inputs are netcdf, need to create GDAL raster # # datasets in order to use gdal_common functions # # Create an in memory dataset of the full ETo array # tmin_ds = drigo.array_to_mem_ds( # tmin_array, output_geo=gridmet_geo, # # tmin_array, output_geo=gridmet_full_geo, # output_proj=gridmet_proj) # tmax_ds = drigo.array_to_mem_ds( # tmax_array, output_geo=gridmet_geo, # # tmax_array, output_geo=gridmet_full_geo, # output_proj=gridmet_proj) # sph_ds = drigo.array_to_mem_ds( # sph_array, output_geo=gridmet_geo, # # sph_array, output_geo=gridmet_full_geo, # output_proj=gridmet_proj) # rs_ds = drigo.array_to_mem_ds( # rs_array, output_geo=gridmet_geo, # # rs_array, output_geo=gridmet_full_geo, # output_proj=gridmet_proj) # wind_ds = drigo.array_to_mem_ds( # wind_array, output_geo=gridmet_geo, # # wind_array, output_geo=gridmet_full_geo, # output_proj=gridmet_proj) # # # Then extract the subset from the in memory dataset # tmin_array = drigo.raster_ds_to_array( # tmin_ds, 1, mask_extent=gridmet_extent, return_nodata=False) # tmax_array = drigo.raster_ds_to_array( # tmax_ds, 1, mask_extent=gridmet_extent, return_nodata=False) # sph_array = drigo.raster_ds_to_array( # sph_ds, 1, mask_extent=gridmet_extent, return_nodata=False) # rs_array = drigo.raster_ds_to_array( # rs_ds, 1, mask_extent=gridmet_extent, return_nodata=False) # wind_array = drigo.raster_ds_to_array( # wind_ds, 1, mask_extent=gridmet_extent, return_nodata=False) # del tmin_ds, tmax_ds, sph_ds, rs_ds, wind_ds # # # Adjust units # tmin_array -= 273.15 # tmax_array -= 273.15 # rs_array *= 0.0864 # # # Compute vapor pressure from specific humidity # pair_array = refet.calcs._air_pressure(elev=elev_array) # ea_array = refet.calcs._actual_vapor_pressure( # q=sph_array, pair=pair_array) # # # ETr/ETo # refet_obj = refet.Daily( # tmin=tmin_array, tmax=tmax_array, ea=ea_array, rs=rs_array, # uz=wind_array, zw=zw, elev=elev_array, lat=lat_array, doy=doy, # method='asce') # if etr_flag: # etr_array = refet_obj.etr() # if eto_flag: # eto_array = refet_obj.eto() # # # Cleanup # del tmin_array, tmax_array, sph_array, rs_array, wind_array # del pair_array, ea_array # # # Save the projected array as 32-bit floats # if etr_flag: # drigo.array_to_comp_raster( # etr_array.astype(np.float32), etr_raster, # band=doy, stats_flag=False) # # drigo.array_to_raster( # # etr_array.astype(np.float32), etr_raster, # # output_geo=gridmet_geo, output_proj=gridmet_proj, # # stats_flag=stats_flag) # del etr_array # if eto_flag: # drigo.array_to_comp_raster( # eto_array.astype(np.float32), eto_raster, # band=doy, stats_flag=False) # # drigo.array_to_raster( # # eto_array.astype(np.float32), eto_raster, # # output_geo=gridmet_geo, output_proj=gridmet_proj, # # stats_flag=stats_flag) # del eto_array # # del tmin_nc # del tmax_nc # del sph_nc # del rs_nc # del wind_nc # # if stats_flag and etr_flag: # drigo.raster_statistics(etr_raster) # if stats_flag and eto_flag: # drigo.raster_statistics(eto_raster) logging.debug('\nScript Complete')
def main(extent_path, output_folder, overwrite_flag=False): """Download NED tiles that intersect the study_area Parameters ---------- extent_path : str File path to study area shapefile. output_folder : str Folder path where files will be saved. overwrite_flag : bool, optional If True, overwrite existing files (the default is False). Returns ------- None Notes ----- Script assumes DEM data is in 1x1 WGS84 degree tiles. Download 10m (1/3 arc-second) or 30m (1 arc-second) versions from: 10m: rockyftp.cr.usgs.gov/vdelivery/Datasets/Staged/Elevation/13/IMG 30m: rockyftp.cr.usgs.gov/vdelivery/Datasets/Staged/Elevation/1/IMG For this example, only download 30m DEM. """ logging.info('\nDownload NED tiles') site_url = 'rockyftp.cr.usgs.gov' site_folder = 'vdelivery/Datasets/Staged/Elevation/1/IMG' # site_url = 'ftp://rockyftp.cr.usgs.gov/vdelivery/Datasets/Staged/Elevation/1/IMG' zip_fmt = 'n{:02d}w{:03d}.zip' tile_fmt = 'imgn{:02d}w{:03d}_1.img' # tile_fmt = 'imgn{:02d}w{:03d}_13.img' # Use 1 degree snap point and "cellsize" to get 1x1 degree tiles tile_osr = drigo.epsg_osr(4326) tile_x, tile_y, tile_cs = 0, 0, 1 buffer_cells = 0 # Error checking if not os.path.isfile(extent_path): logging.error('\nERROR: The input_path does not exist\n') return False if not os.path.isdir(output_folder): os.makedirs(output_folder) # Check that input is a shapefile # Get the extent of each feature lat_lon_list = [] shp_driver = ogr.GetDriverByName('ESRI Shapefile') input_ds = shp_driver.Open(extent_path, 1) input_osr = drigo.feature_ds_osr(input_ds) input_layer = input_ds.GetLayer() input_ftr = input_layer.GetNextFeature() while input_ftr: input_geom = input_ftr.GetGeometryRef() input_extent = drigo.Extent(input_geom.GetEnvelope()) input_extent = input_extent.ogrenv_swap() input_ftr = input_layer.GetNextFeature() logging.debug('Input Extent: {}'.format(input_extent)) # Project study area extent to input raster coordinate system output_extent = drigo.project_extent(input_extent, input_osr, tile_osr) logging.debug('Output Extent: {}'.format(output_extent)) # Extent needed to select 1x1 degree tiles tile_extent = output_extent.copy() tile_extent.adjust_to_snap('EXPAND', tile_x, tile_y, tile_cs) logging.debug('Tile Extent: {}'.format(tile_extent)) # Get list of avaiable tiles that intersect the extent lat_lon_list.extend([ (lat, -lon) for lon in range(int(tile_extent.xmin), int(tile_extent.xmax)) for lat in range(int(tile_extent.ymax), int(tile_extent.ymin), -1) ]) lat_lon_list = sorted(list(set(lat_lon_list))) # Attempt to download the tiles logging.info('') for lat_lon in lat_lon_list: logging.info('Tile: {}'.format(lat_lon)) zip_name = zip_fmt.format(*lat_lon) zip_url = '/'.join([site_url, site_folder, zip_name]) zip_path = os.path.join(output_folder, zip_name) tile_name = tile_fmt.format(*lat_lon) tile_path = os.path.join(output_folder, tile_name) logging.debug(' {}'.format(zip_url)) logging.debug(' {}'.format(zip_path)) if os.path.isfile(tile_path) and not overwrite_flag: logging.debug(' skipping') continue _utils.ftp_download(site_url, site_folder, zip_name, zip_path) logging.debug(' extracting') try: zip_f = zipfile.ZipFile(zip_path) zip_f.extract(tile_name, output_folder) zip_f.close() except Exception as e: logging.info(' Unhandled exception: {}'.format(e)) try: os.remove(zip_path) except Exception as e: logging.info(' Unhandled exception: {}'.format(e))
def main(ancillary_ws=os.getcwd(), zero_elev_nodata_flag=False, overwrite_flag=False): """Process DAYMET ancillary data Parameters ---------- ancillary_ws : str Folder of ancillary rasters. zero_elev_nodata_flag : bool, optional If True, set elevation nodata values to 0 (the default is False). overwrite_flag : bool, optional If True, overwrite existing files (the default is False). Returns ------- None """ logging.info('\nProcess DAYMET ancillary rasters') # Site URL # ancillary_url = 'http://daymet.ornl.gov/files/ancillary_files.tgz' # Build output workspace if it doesn't exist if not os.path.isdir(ancillary_ws): os.makedirs(ancillary_ws) # Input paths # ancillary_targz = os.path.join( # ancillary_ws, os.path.basename(ancillary_url)) # dem_nc = os.path.join(ancillary_ws, 'dem_data.nc') # mask_nc = os.path.join(ancillary_ws, 'mask_data.nc') # Output paths dem_raster = os.path.join(ancillary_ws, 'daymet_elev.img') lat_raster = os.path.join(ancillary_ws, 'daymet_lat.img') lon_raster = os.path.join(ancillary_ws, 'daymet_lon.img') # mask_raster = os.path.join(ancillary_ws, 'daymet_mask.img') # Spatial reference parameters daymet_proj4 = ( "+proj=lcc +datum=WGS84 +lat_1=25 n " "+lat_2=60n +lat_0=42.5n +lon_0=100w") daymet_osr = drigo.proj4_osr(daymet_proj4) daymet_osr.MorphToESRI() daymet_proj = daymet_osr.ExportToWkt() daymet_cs = 1000 # daymet_nodata = -9999 # For now, hardcode the DAYMET extent/geo snap_xmin, snap_ymin = -4560750, -3090500 daymet_rows, daymet_cols = 8075, 7814 # snap_xmin, snap_ymin = -4659000, -3135000 # daymet_rows, daymet_cols = 8220, 8011 # daymet_geo = ( # snap_xmin, daymet_cs, 0., # snap_ymin + daymet_cs * daymet_rows, 0., -daymet_cs) daymet_extent = drigo.Extent([ snap_xmin, snap_ymin, snap_xmin + daymet_cs * daymet_cols, snap_ymin + daymet_cs * daymet_rows]) daymet_geo = daymet_extent.geo(daymet_cs) logging.debug(" Extent: {}".format(daymet_extent)) logging.debug(" Geo: {}".format(daymet_geo)) # logging.debug(" Cellsize: {}".format(daymet_cs)) # logging.debug(" Shape: {}".format(daymet_extent.shape(daymet_cs))) # # Download the ancillary raster tar.gz # if overwrite_flag or not os.path.isfile(ancillary_targz): # logging.info('\nDownloading ancillary tarball files') # logging.info(" {}".format(os.path.basename(ancillary_url))) # logging.debug(" {}".format(ancillary_url)) # logging.debug(" {}".format(ancillary_targz)) # url_download(ancillary_url, ancillary_targz) # try: # urllib.urlretrieve(ancillary_url, ancillary_targz) # except: # logging.error(" ERROR: {}\n FILE: {}".format( # sys.exc_info()[0], ancillary_targz)) # os.remove(ancillary_targz) # # Extract the ancillary rasters # ancillary_list = [dem_nc] # # ancillary_list = [dem_nc, mask_nc] # if (os.path.isfile(ancillary_targz) and # (overwrite_flag or # not all([os.path.isfile(os.path.join(ancillary_ws, x)) # for x in ancillary_list]))): # logging.info('\nExtracting ancillary rasters') # logging.debug(" {}".format(ancillary_targz)) # tar = tarfile.open(ancillary_targz) # for member in tar.getmembers(): # print member.name # member.name = os.path.basename(member.name) # # Strip off leading numbers from ancillary raster name # member.name = member.name.split('_', 1)[1] # member_path = os.path.join(ancillary_ws, member.name) # if not member.name.endswith('.nc'): # continue # elif member_path not in ancillary_list: # continue # elif os.path.isfile(member_path): # continue # logging.debug(" {}".format(member.name)) # tar.extract(member, ancillary_ws) # tar.close() # # Mask # if ((overwrite_flag or # not os.path.isfile(mask_raster)) and # os.path.isfile(mask_nc)): # logging.info('\nExtracting mask raster') # mask_nc_f = netCDF4.Dataset(mask_nc, 'r') # logging.debug(mask_nc_f) # # logging.debug(mask_nc_f.variables['image']) # mask_array = mask_nc_f.variables['image'][:] # mask_array[mask_array == daymet_nodata] = 255 # drigo.array_to_raster( # mask_array, mask_raster, # output_geo=daymet_geo, output_proj=daymet_proj, # output_nodata=255) # mask_nc_f.close() # # DEM # if ((overwrite_flag or not os.path.isfile(dem_raster)) and # os.path.isfile(dem_nc)): # logging.info('\nExtracting DEM raster') # dem_nc_f = netCDF4.Dataset(dem_nc, 'r') # logging.debug(dem_nc_f) # # logging.debug(dem_nc_f.variables['image']) # dem_array = dem_nc_f.variables['image'][:] # # Rounding issues of the nodata value when converting to float32 # dem_array[dem_array == daymet_nodata] -= 1 # dem_array = dem_array.astype(np.float32) # if zero_elev_nodata_flag: # dem_array[dem_array <= daymet_nodata] = 0 # else: # dem_array[dem_array <= daymet_nodata] = np.nan # drigo.array_to_raster( # dem_array, dem_raster, # output_geo=daymet_geo, output_proj=daymet_proj) # dem_nc_f.close() # Latitude/Longitude if (os.path.isfile(dem_raster) and (overwrite_flag or not os.path.isfile(lat_raster) or not os.path.isfile(lon_raster))): logging.info('\nDAYMET Latitude/Longitude') logging.debug(' {}'.format(lat_raster)) lat_array, lon_array = drigo.raster_lat_lon_func( dem_raster, gcs_cs=0.05) drigo.array_to_raster( lat_array.astype(np.float32), lat_raster, output_geo=daymet_geo, output_proj=daymet_proj) logging.debug(' {}'.format(lon_raster)) drigo.array_to_raster( lon_array.astype(np.float32), lon_raster, output_geo=daymet_geo, output_proj=daymet_proj) del lat_array, lon_array logging.debug('\nScript Complete')
def main(start_dt, end_dt, netcdf_ws, ancillary_ws, output_ws, extent_path=None, output_extent=None, stats_flag=True, overwrite_flag=False): """Extract DAYMET temperature Parameters ---------- start_dt : datetime Start date. end_dt : datetime End date. netcdf_ws : str Folder of DAYMET netcdf files. ancillary_ws : str Folder of ancillary rasters. output_ws : str Folder of output rasters. extent_path : str, optional File path defining the output extent. output_extent : list, optional Decimal degrees values defining output extent. stats_flag : bool, optional If True, compute raster statistics (the default is True). overwrite_flag : bool, optional If True, overwrite existing files (the default is False). Returns ------- None """ logging.info('\nExtracting DAYMET vapor pressure') logging.debug(' Start date: {}'.format(start_dt)) logging.debug(' End date: {}'.format(end_dt)) # Get DAYMET spatial reference from an ancillary raster mask_raster = os.path.join(ancillary_ws, 'daymet_mask.img') elev_raster = os.path.join(ancillary_ws, 'daymet_elev.img') daymet_re = re.compile('daymet_v3_(?P<VAR>\w+)_(?P<YEAR>\d{4})_na.nc4$') # DAYMET band name dictionary # daymet_band_dict = dict() # daymet_band_dict['prcp'] = 'precipitation_amount' # daymet_band_dict['srad'] = 'surface_downwelling_shortwave_flux_in_air' # daymet_band_dict['sph'] = 'specific_humidity' # daymet_band_dict['tmin'] = 'air_temperature' # daymet_band_dict['tmax'] = 'air_temperature' # Get extent/geo from mask raster daymet_ds = gdal.Open(mask_raster) daymet_osr = drigo.raster_ds_osr(daymet_ds) daymet_proj = drigo.osr_proj(daymet_osr) daymet_cs = drigo.raster_ds_cellsize(daymet_ds, x_only=True) daymet_extent = drigo.raster_ds_extent(daymet_ds) daymet_geo = daymet_extent.geo(daymet_cs) daymet_x, daymet_y = daymet_extent.origin() daymet_ds = None logging.debug(' Projection: {}'.format(daymet_proj)) logging.debug(' Cellsize: {}'.format(daymet_cs)) logging.debug(' Geo: {}'.format(daymet_geo)) logging.debug(' Extent: {}'.format(daymet_extent)) logging.debug(' Origin: {} {}'.format(daymet_x, daymet_y)) # Subset data to a smaller extent if output_extent is not None: logging.info('\nComputing subset extent & geo') logging.debug(' Extent: {}'.format(output_extent)) # Assume input extent is in decimal degrees output_extent = drigo.project_extent(drigo.Extent(output_extent), drigo.epsg_osr(4326), daymet_osr, 0.001) output_extent = drigo.intersect_extents([daymet_extent, output_extent]) output_extent.adjust_to_snap('EXPAND', daymet_x, daymet_y, daymet_cs) output_geo = output_extent.geo(daymet_cs) logging.debug(' Geo: {}'.format(output_geo)) logging.debug(' Extent: {}'.format(output_extent)) elif extent_path is not None: logging.info('\nComputing subset extent & geo') if extent_path.lower().endswith('.shp'): output_extent = drigo.feature_path_extent(extent_path) extent_osr = drigo.feature_path_osr(extent_path) extent_cs = None else: output_extent = drigo.raster_path_extent(extent_path) extent_osr = drigo.raster_path_osr(extent_path) extent_cs = drigo.raster_path_cellsize(extent_path, x_only=True) output_extent = drigo.project_extent(output_extent, extent_osr, daymet_osr, extent_cs) output_extent = drigo.intersect_extents([daymet_extent, output_extent]) output_extent.adjust_to_snap('EXPAND', daymet_x, daymet_y, daymet_cs) output_geo = output_extent.geo(daymet_cs) logging.debug(' Geo: {}'.format(output_geo)) logging.debug(' Extent: {}'.format(output_extent)) else: output_extent = daymet_extent.copy() output_geo = daymet_geo[:] # output_shape = output_extent.shape(cs=daymet_cs) xi, yi = drigo.array_geo_offsets(daymet_geo, output_geo, daymet_cs) output_rows, output_cols = output_extent.shape(daymet_cs) logging.debug(' Shape: {} {}'.format(output_rows, output_cols)) logging.debug(' Offsets: {} {} (x y)'.format(xi, yi)) # Read the elevation array elev_array = drigo.raster_to_array(elev_raster, mask_extent=output_extent, return_nodata=False) pair_array = refet.calcs._air_pressure_func(elev_array) del elev_array # Process each variable input_var = 'vp' output_var = 'ea' logging.info("\nVariable: {}".format(input_var)) # Build output folder var_ws = os.path.join(output_ws, output_var) if not os.path.isdir(var_ws): os.makedirs(var_ws) # Process each file in the input workspace for input_name in sorted(os.listdir(netcdf_ws)): logging.debug("{}".format(input_name)) input_match = daymet_re.match(input_name) if not input_match: logging.debug(' Regular expression didn\'t match, skipping') continue elif input_match.group('VAR') != input_var: logging.debug(' Variable didn\'t match, skipping') continue year_str = input_match.group('YEAR') logging.info(" Year: {}".format(year_str)) year_int = int(year_str) year_days = int(dt.datetime(year_int, 12, 31).strftime('%j')) if start_dt is not None and year_int < start_dt.year: logging.debug(' Before start date, skipping') continue elif end_dt is not None and year_int > end_dt.year: logging.debug(' After end date, skipping') continue # Build input file path input_raster = os.path.join(netcdf_ws, input_name) # if not os.path.isfile(input_raster): # logging.debug( # ' Input raster doesn\'t exist, skipping {}'.format( # input_raster)) # continue # Build output folder output_year_ws = os.path.join(var_ws, year_str) if not os.path.isdir(output_year_ws): os.makedirs(output_year_ws) # Read in the DAYMET NetCDF file input_nc_f = netCDF4.Dataset(input_raster, 'r') # logging.debug(input_nc_f.variables) # Check all valid dates in the year year_dates = _utils.date_range(dt.datetime(year_int, 1, 1), dt.datetime(year_int + 1, 1, 1)) for date_dt in year_dates: if start_dt is not None and date_dt < start_dt: logging.debug(' {} - before start date, skipping'.format( date_dt.date())) continue elif end_dt is not None and date_dt > end_dt: logging.debug(' {} - after end date, skipping'.format( date_dt.date())) continue else: logging.info(' {}'.format(date_dt.date())) output_path = os.path.join( output_year_ws, '{}_{}_daymet.img'.format(output_var, date_dt.strftime('%Y%m%d'))) if os.path.isfile(output_path): logging.debug(' {}'.format(output_path)) if not overwrite_flag: logging.debug(' File already exists, skipping') continue else: logging.debug(' File already exists, removing existing') os.remove(output_path) doy = int(date_dt.strftime('%j')) doy_i = range(1, year_days + 1).index(doy) # Arrays are being read as masked array with a fill value of -9999 # Convert to basic numpy array arrays with nan values try: input_ma = input_nc_f.variables[input_var][doy_i, yi:yi + output_rows, xi:xi + output_cols] except IndexError: logging.info(' date not in netcdf, skipping') continue input_nodata = float(input_ma.fill_value) sph_array = input_ma.data.astype(np.float32) sph_array[sph_array == input_nodata] = np.nan # Compute ea [kPa] from specific humidity [kg/kg] ea_array = (sph_array * pair_array) / (0.622 + 0.378 * sph_array) # Save the array as 32-bit floats drigo.array_to_raster(ea_array.astype(np.float32), output_path, output_geo=output_geo, output_proj=daymet_proj, stats_flag=stats_flag) del input_ma, ea_array, sph_array input_nc_f.close() del input_nc_f logging.debug('\nScript Complete')
def main(grb_ws=os.getcwd(), ancillary_ws=os.getcwd(), output_ws=os.getcwd(), keep_list_path=None, start_date=None, end_date=None, times_str='', extent_path=None, output_extent=None, stats_flag=True, overwrite_flag=False): """Extract hourly NLDAS wind rasters Parameters ---------- grb_ws : str Folder of NLDAS GRB files. ancillary_ws : str Folder of ancillary rasters. output_ws : str Folder of output rasters. keep_list_path : str, optional Landsat scene keep list file path. start_date : str, optional ISO format date (YYYY-MM-DD). end_date : str, optional ISO format date (YYYY-MM-DD). times : str, optional Comma separated values and/or ranges of UTC hours (i.e. "1, 2, 5-8"). Parsed with python_common.parse_int_set(). extent_path : str, optional File path defining the output extent. output_extent : ?, optional List decimal degrees values defining output extent. stats_flag : bool, optional If True, compute raster statistics (the default is True). overwrite_flag : bool, optional If True, overwrite existing files (the default is False). Returns ------- None """ logging.info('\nExtracting NLDAS wind rasters') # input_fmt = 'NLDAS_FORA0125_H.A{:04d}{:02d}{:02d}.{}.002.grb' input_re = re.compile('NLDAS_FORA0125_H.A(?P<YEAR>\d{4})(?P<MONTH>\d{2})' + '(?P<DAY>\d{2}).(?P<TIME>\d{4}).002.grb$') output_folder = 'wind' output_fmt = 'wind_{:04d}{:02d}{:02d}_hourly_nldas.img' # output_fmt = 'wind_{:04d}{:02d}{:02d}_{:04d}_nldas.img' # If a date is not set, process 2017 try: start_dt = dt.datetime.strptime(start_date, '%Y-%m-%d') logging.debug(' Start date: {}'.format(start_dt)) except: start_dt = dt.datetime(2017, 1, 1) logging.info(' Start date: {}'.format(start_dt)) try: end_dt = dt.datetime.strptime(end_date, '%Y-%m-%d') logging.debug(' End date: {}'.format(end_dt)) except: end_dt = dt.datetime(2017, 12, 31) logging.info(' End date: {}'.format(end_dt)) # Only process a specific hours if not times_str: time_list = range(0, 24, 1) else: time_list = list(_utils.parse_int_set(times_str)) time_list = ['{:02d}00'.format(t) for t in time_list] # Assume NLDAS is NAD83 # input_epsg = 'EPSG:4269' # Ancillary raster paths mask_path = os.path.join(ancillary_ws, 'nldas_mask.img') # Build a date list from the Landsat scene keep list file date_list = [] if keep_list_path is not None and os.path.isfile(keep_list_path): logging.info('\nReading dates from scene keep list file') logging.info(' {}'.format(keep_list_path)) landsat_re = re.compile( '^(?:LT04|LT05|LE07|LC08)_(?:\d{3})(?:\d{3})_' + '(?P<year>\d{4})(?P<month>\d{2})(?P<day>\d{2})') with open(keep_list_path) as input_f: keep_list = input_f.readlines() keep_list = [ image_id.strip() for image_id in keep_list if landsat_re.match(image_id.strip()) ] date_list = [ dt.datetime.strptime(image_id[12:20], '%Y%m%d').strftime('%Y-%m-%d') for image_id in keep_list ] logging.debug(' {}'.format(', '.join(date_list))) # DEADBEE # # Build a date list from landsat_ws scene folders or tar.gz files # date_list = [] # if landsat_ws is not None and os.path.isdir(landsat_ws): # logging.info('\nReading dates from Landsat IDs') # logging.info(' {}'.format(landsat_ws)) # landsat_re = re.compile( # '^(?:LT04|LT05|LE07|LC08)_(?:\d{3})(?:\d{3})_' + # '(?P<year>\d{4})(?P<month>\d{2})(?P<day>\d{2})') # for root, dirs, files in os.walk(landsat_ws, topdown=True): # # If root matches, don't explore subfolders # try: # landsat_match = landsat_re.match(os.path.basename(root)) # date_list.append(dt.datetime.strptime( # '_'.join(landsat_match.groups()), '%Y_%m_%d').date().isoformat()) # dirs[:] = [] # except: # pass # # for file in files: # try: # landsat_match = landsat_re.match(file) # date_list.append(dt.datetime.strptime( # '_'.join(landsat_match.groups()), '%Y_%m_%d').date().isoformat()) # except: # pass # date_list = sorted(list(set(date_list))) # This allows GDAL to throw Python Exceptions # gdal.UseExceptions() # mem_driver = gdal.GetDriverByName('MEM') # Get the NLDAS spatial reference from the mask raster nldas_ds = gdal.Open(mask_path) nldas_osr = drigo.raster_ds_osr(nldas_ds) nldas_proj = drigo.osr_proj(nldas_osr) nldas_cs = drigo.raster_ds_cellsize(nldas_ds, x_only=True) nldas_extent = drigo.raster_ds_extent(nldas_ds) nldas_geo = nldas_extent.geo(nldas_cs) nldas_x, nldas_y = nldas_extent.origin() nldas_ds = None logging.debug(' Projection: {}'.format(nldas_proj)) logging.debug(' Cellsize: {}'.format(nldas_cs)) logging.debug(' Geo: {}'.format(nldas_geo)) logging.debug(' Extent: {}'.format(nldas_extent)) # Subset data to a smaller extent if output_extent is not None: logging.info('\nComputing subset extent & geo') logging.debug(' Extent: {}'.format(output_extent)) nldas_extent = drigo.Extent(output_extent) nldas_extent.adjust_to_snap('EXPAND', nldas_x, nldas_y, nldas_cs) nldas_geo = nldas_extent.geo(nldas_cs) logging.debug(' Geo: {}'.format(nldas_geo)) logging.debug(' Extent: {}'.format(output_extent)) elif extent_path is not None: logging.info('\nComputing subset extent & geo') if extent_path.lower().endswith('.shp'): nldas_extent = drigo.feature_path_extent(extent_path) extent_osr = drigo.feature_path_osr(extent_path) extent_cs = None else: nldas_extent = drigo.raster_path_extent(extent_path) extent_osr = drigo.raster_path_osr(extent_path) extent_cs = drigo.raster_path_cellsize(extent_path, x_only=True) nldas_extent = drigo.project_extent(nldas_extent, extent_osr, nldas_osr, extent_cs) nldas_extent.adjust_to_snap('EXPAND', nldas_x, nldas_y, nldas_cs) nldas_geo = nldas_extent.geo(nldas_cs) logging.debug(' Geo: {}'.format(nldas_geo)) logging.debug(' Extent: {}'.format(nldas_extent)) logging.debug('') # Read the NLDAS mask array if present if mask_path and os.path.isfile(mask_path): mask_array, mask_nodata = drigo.raster_to_array( mask_path, mask_extent=nldas_extent, fill_value=0, return_nodata=True) mask_array = mask_array != mask_nodata else: mask_array = None # Build output folder var_ws = os.path.join(output_ws, output_folder) if not os.path.isdir(var_ws): os.makedirs(var_ws) # Each sub folder in the main folde has all imagery for 1 day # The path for each subfolder is the /YYYY/DOY # This approach will process files for target dates # for input_dt in date_range(start_dt, end_dt + dt.timedelta(1)): # logging.info(input_dt.date()) # Iterate all available files and check dates if necessary logging.info('\nReading NLDAS GRIBs') for root, folders, files in os.walk(grb_ws, topdown=True): root_split = os.path.normpath(root).split(os.sep) # If the year/doy is outside the range, skip if (re.match('\d{4}', root_split[-2]) and re.match('\d{3}', root_split[-1])): root_dt = dt.datetime.strptime( '{}_{}'.format(root_split[-2], root_split[-1]), '%Y_%j') logging.info('{}-{:02d}-{:02d}'.format(root_dt.year, root_dt.month, root_dt.day)) if ((start_dt is not None and root_dt < start_dt) or (end_dt is not None and root_dt > end_dt)): continue elif date_list and root_dt.date().isoformat() not in date_list: continue # If the year is outside the range, don't search subfolders elif re.match('\d{4}', root_split[-1]): root_year = int(root_split[-1]) logging.info('Year: {}'.format(root_year)) if ((start_dt is not None and root_year < start_dt.year) or (end_dt is not None and root_year > end_dt.year)): folders[:] = [] else: folders[:] = sorted(folders) continue else: continue # Create a single raster for each day with 24 bands # Each time step will be stored in a separate band output_name = output_fmt.format(root_dt.year, root_dt.month, root_dt.day) output_path = os.path.join(var_ws, str(root_dt.year), output_name) logging.debug(' {}'.format(output_path)) if os.path.isfile(output_path): if not overwrite_flag: logging.debug(' File already exists, skipping') continue else: logging.debug(' File already exists, removing existing') os.remove(output_path) logging.debug(' {}'.format(root)) if not os.path.isdir(os.path.dirname(output_path)): os.makedirs(os.path.dirname(output_path)) drigo.build_empty_raster(output_path, band_cnt=24, output_dtype=np.float32, output_proj=nldas_proj, output_cs=nldas_cs, output_extent=nldas_extent, output_fill_flag=True) # Iterate through hourly files for input_name in sorted(files): logging.info(' {}'.format(input_name)) input_path = os.path.join(root, input_name) input_match = input_re.match(input_name) if input_match is None: logging.debug(' Regular expression didn\'t match, skipping') continue input_dt = dt.datetime(int(input_match.group('YEAR')), int(input_match.group('MONTH')), int(input_match.group('DAY'))) time_str = input_match.group('TIME') band_num = int(time_str[:2]) + 1 # if start_dt is not None and input_dt < start_dt: # continue # elif end_dt is not None and input_dt > end_dt: # continue # elif date_list and input_dt.date().isoformat() not in date_list: # continue if time_str not in time_list: logging.debug(' Time not in list, skipping') continue logging.debug(' Time: {} {}'.format(input_dt.date(), time_str)) logging.debug(' Band: {}'.format(band_num)) # Determine band numbering/naming input_band_dict = grib_band_names(input_path) # Compute magnitude of wind from components input_ds = gdal.Open(input_path) wind_u_array = drigo.raster_ds_to_array( input_ds, band=input_band_dict['u-component of wind [m/s]'], mask_extent=nldas_extent, return_nodata=False) wind_v_array = drigo.raster_ds_to_array( input_ds, band=input_band_dict['v-component of wind [m/s]'], mask_extent=nldas_extent, return_nodata=False) wind_array = np.sqrt(wind_u_array**2 + wind_v_array**2) # Save the projected array as 32-bit floats drigo.array_to_comp_raster(wind_array.astype(np.float32), output_path, band=band_num) # drigo.block_to_raster( # ea_array.astype(np.float32), output_path, band=band_num) # drigo.array_to_raster( # wind_array.astype(np.float32), output_path, # output_geo=nldas_geo, output_proj=nldas_proj, # stats_flag=stats_flag) del wind_array, wind_u_array, wind_v_array input_ds = None if stats_flag: drigo.raster_statistics(output_path) logging.debug('\nScript Complete')
def main(extent_path, output_folder, overwrite_flag=False): """Download NED tiles that intersect the study_area Parameters ---------- extent_path : str File path to study area shapefile. output_folder : str Folder path where files will be saved. overwrite_flag : bool, optional If True, overwrite existing files (the default is False). Returns ------- None Notes ----- Script assumes DEM data is in 1x1 WGS84 degree tiles. Download 10m (1/3 arc-second) or 30m (1 arc-second) versions from: 10m: rockyftp.cr.usgs.gov/vdelivery/Datasets/Staged/Elevation/13/IMG 30m: rockyftp.cr.usgs.gov/vdelivery/Datasets/Staged/Elevation/1/IMG For this example, only download 30m DEM. """ logging.info('\nDownload NED tiles') # site_url = 'rockyftp.cr.usgs.gov' site_url = 'https://prd-tnm.s3.amazonaws.com' # site_folder = 'vdelivery/Datasets/Staged/Elevation/1/IMG' site_folder = 'StagedProducts/Elevation/1/IMG' # This path is what must be queried to list the links site_file_list_path = 'https://prd-tnm.s3.amazonaws.com/index.html?prefix=StagedProducts/Elevation/1/IMG/' # Use 1 degree snap point and "cellsize" to get 1x1 degree tiles tile_osr = drigo.epsg_osr(4326) tile_x, tile_y, tile_cs = 0, 0, 1 buffer_cells = 0 # Error checking if not os.path.isfile(extent_path): logging.error('\nERROR: The input_path does not exist\n') return False if not os.path.isdir(output_folder): os.makedirs(output_folder) # Check that input is a shapefile # Get the extent of each feature logging.debug(' Reading extents') lat_lon_list = [] shp_driver = ogr.GetDriverByName('ESRI Shapefile') input_ds = shp_driver.Open(extent_path, 1) input_osr = drigo.feature_ds_osr(input_ds) input_layer = input_ds.GetLayer() input_ftr = input_layer.GetNextFeature() while input_ftr: input_geom = input_ftr.GetGeometryRef() input_extent = drigo.Extent(input_geom.GetEnvelope()) input_extent = input_extent.ogrenv_swap() input_ftr = input_layer.GetNextFeature() logging.debug('Input Extent: {}'.format(input_extent)) # Project study area extent to input raster coordinate system output_extent = drigo.project_extent(input_extent, input_osr, tile_osr) logging.debug('Output Extent: {}'.format(output_extent)) # Extent needed to select 1x1 degree tiles tile_extent = output_extent.copy() tile_extent.adjust_to_snap('EXPAND', tile_x, tile_y, tile_cs) logging.debug('Tile Extent: {}'.format(tile_extent)) # Get list of avaiable tiles that intersect the extent lat_lon_list.extend([ (lat, -lon) for lon in range(int(tile_extent.xmin), int(tile_extent.xmax)) for lat in range(int(tile_extent.ymax), int(tile_extent.ymin), -1) ]) lat_lon_list = sorted(list(set(lat_lon_list))) # Retrieve a list of files available on the site (keyed by lat/lon) logging.debug(' Retrieving NED tile list from server') zip_files = { m.group(1): x.split('/')[-1] for x in utils.html_link_list(site_file_list_path) for m in [re.search('[\w]*(n\d{2}w\d{3})[\w]*.zip', x)] if m } # logging.debug(zip_files[:10]) # Attempt to download the tiles logging.debug('\nDownloading tiles') logging.info('') for lat_lon in lat_lon_list: logging.info('Tile: {}'.format(lat_lon)) lat_lon_key = 'n{:02d}w{:03d}'.format(*lat_lon) try: zip_name = zip_files[lat_lon_key] except KeyError: logging.exception( 'Error finding zip file for {}, skipping tile'.format(lat_lon)) continue zip_url = '/'.join([site_url, site_folder, zip_name]) zip_path = os.path.join(output_folder, zip_name) tile_path = os.path.join(output_folder, '{}.img'.format(lat_lon_key)) logging.debug(' {}'.format(zip_url)) logging.debug(' {}'.format(zip_path)) logging.debug(' {}'.format(tile_path)) if os.path.isfile(tile_path): if not overwrite_flag: logging.debug(' tile already exists, skipping') continue else: logging.debug(' tile already exists, removing') os.remove(tile_path) utils.url_download(zip_url, zip_path) logging.debug(' Extracting') try: zip_f = zipfile.ZipFile(zip_path) img_name = [ x for x in zip_f.namelist() if re.search('[\w]*(n\d{2}w\d{3})[\w]*.img$', x) ][0] img_path = os.path.join(output_folder, img_name) zip_f.extract(img_name, output_folder) zip_f.close() os.rename(img_path, tile_path) except Exception as e: logging.info(' Unhandled exception: {}'.format(e)) try: os.remove(zip_path) except Exception as e: logging.info(' Unhandled exception: {}'.format(e))