def main(ini_path=None, overwrite_flag=False, delay_time=0, gee_key_file=None, max_ready=-1, reverse_flag=False): """Compute annual Tcorr images from gridded images Parameters ---------- ini_path : str Input file path. overwrite_flag : bool, optional If True, overwrite existing files (the default is False). delay_time : float, optional Delay time in seconds between starting export tasks (or checking the number of queued tasks, see "max_ready" parameter). The default is 0. gee_key_file : str, None, optional Earth Engine service account JSON key file (the default is None). max_ready: int, optional Maximum number of queued "READY" tasks. The default is -1 which is implies no limit to the number of tasks that will be submitted. reverse_flag : bool, optional If True, process WRS2 tiles in reverse order. """ logging.info('\nCompute annual Tcorr images from gridded images') wrs2_coll_id = 'projects/earthengine-legacy/assets/' \ 'projects/usgs-ssebop/wrs2_descending_custom' wrs2_tile_field = 'WRS2_TILE' # CGM - Which format should we use for the WRS2 tile? wrs2_tile_fmt = 'p{:03d}r{:03d}' # wrs2_tile_fmt = '{:03d}{:03d}' wrs2_tile_re = re.compile('p?(\d{1,3})r?(\d{1,3})') # List of path/rows to skip wrs2_skip_list = [ 'p049r026', # Vancouver Island, Canada # 'p047r031', # North California coast 'p042r037', # San Nicholas Island, California # 'p041r037', # South California coast 'p040r038', 'p039r038', 'p038r038', # Mexico (by California) 'p037r039', 'p036r039', 'p035r039', # Mexico (by Arizona) 'p034r039', 'p033r039', # Mexico (by New Mexico) 'p032r040', # Mexico (West Texas) 'p029r041', 'p028r042', 'p027r043', 'p026r043', # Mexico (South Texas) 'p019r040', 'p018r040', # West Florida coast 'p016r043', 'p015r043', # South Florida coast 'p014r041', 'p014r042', 'p014r043', # East Florida coast 'p013r035', 'p013r036', # North Carolina Outer Banks 'p013r026', 'p012r026', # Canada (by Maine) 'p011r032', # Rhode Island coast ] wrs2_path_skip_list = [9, 49] wrs2_row_skip_list = [25, 24, 43] mgrs_skip_list = [] export_id_fmt = 'tcorr_gridded_{product}_{wrs2}_annual' asset_id_fmt = '{coll_id}/{wrs2}' # Read config file ini = configparser.ConfigParser(interpolation=None) ini.read_file(open(ini_path, 'r')) # ini = utils.read_ini(ini_path) # try: model_name = 'SSEBOP' # # model_name = ini['INPUTS']['et_model'].upper() # except KeyError: # raise ValueError('"et_model" parameter was not set in INI') # except Exception as e: # raise e try: tmax_source = ini[model_name]['tmax_source'] except KeyError: raise ValueError('"tmax_source" parameter was not set in INI') except Exception as e: raise e try: tcorr_source = ini[model_name]['tcorr_source'] except KeyError: raise ValueError('"tcorr_source" parameter was not set in INI') except Exception as e: raise e try: tcorr_annual_coll_id = '{}_annual'.format(ini['EXPORT']['export_coll']) except KeyError: raise ValueError('"export_coll" parameter was not set in INI') except Exception as e: raise e try: study_area_coll_id = str(ini['INPUTS']['study_area_coll']) except KeyError: raise ValueError('"study_area_coll" parameter was not set in INI') except Exception as e: raise e try: mgrs_ftr_coll_id = str(ini['EXPORT']['mgrs_ftr_coll']) except KeyError: raise ValueError('"mgrs_ftr_coll" parameter was not set in INI') except Exception as e: raise e # Optional parameters try: study_area_property = str(ini['INPUTS']['study_area_property']) except KeyError: study_area_property = None logging.debug(' study_area_property: not set in INI, defaulting to None') except Exception as e: raise e try: study_area_features = str(ini['INPUTS']['study_area_features']) study_area_features = sorted([ x.strip() for x in study_area_features.split(',')]) except KeyError: study_area_features = [] logging.debug(' study_area_features: not set in INI, defaulting to []') except Exception as e: raise e try: wrs2_tiles = str(ini['INPUTS']['wrs2_tiles']) wrs2_tiles = [x.strip() for x in wrs2_tiles.split(',')] wrs2_tiles = sorted([x.lower() for x in wrs2_tiles if x]) except KeyError: wrs2_tiles = [] logging.debug(' wrs2_tiles: not set in INI, defaulting to []') except Exception as e: raise e try: mgrs_tiles = str(ini['EXPORT']['mgrs_tiles']) mgrs_tiles = sorted([x.strip() for x in mgrs_tiles.split(',')]) # CGM - Remove empty strings caused by trailing or extra commas mgrs_tiles = [x.upper() for x in mgrs_tiles if x] logging.debug(f' mgrs_tiles: {mgrs_tiles}') except KeyError: mgrs_tiles = [] logging.debug(' mgrs_tiles: not set in INI, defaulting to []') except Exception as e: raise e try: utm_zones = str(ini['EXPORT']['utm_zones']) utm_zones = sorted([int(x.strip()) for x in utm_zones.split(',')]) logging.debug(f' utm_zones: {utm_zones}') except KeyError: utm_zones = [] logging.debug(' utm_zones: not set in INI, defaulting to []') except Exception as e: raise e # TODO: Add try/except blocks and default values? # TODO: Filter Tcorr scene collection based on collections parameter # collections = [x.strip() for x in ini['INPUTS']['collections'].split(',')] # cloud_cover = float(ini['INPUTS']['cloud_cover']) # min_pixel_count = float(ini['TCORR']['min_pixel_count']) min_scene_count = float(ini['TCORR']['min_scene_count']) # Limit by year month_list = list(range(1, 13)) # try: # month_list = sorted(list(utils.parse_int_set(ini['TCORR']['months']))) # except: # logging.info('\nTCORR "months" parameter not set in the INI,' # '\n Defaulting to all months (1-12)\n') # month_list = list(range(1, 13)) try: year_list = sorted(list(utils.parse_int_set(ini['TCORR']['years']))) except: logging.info('\nTCORR "years" parameter not set in the INI,' '\n Defaulting to all available years\n') year_list = [] # For now only support reading specific Tmax sources if (tmax_source.upper() not in ['DAYMET_MEDIAN_V2'] and not re.match('^projects/.+/tmax/.+_(mean|median)_\d{4}_\d{4}(_\w+)?', tmax_source)): raise ValueError(f'unsupported tmax_source: {tmax_source}') # if (tmax_name.upper() == 'CIMIS' and # ini['INPUTS']['end_date'] < '2003-10-01'): # logging.error( # '\nCIMIS is not currently available before 2003-10-01, exiting\n') # sys.exit() # elif (tmax_name.upper() == 'DAYMET' and # ini['INPUTS']['end_date'] > '2020-12-31'): # logging.warning( # '\nDAYMET is not currently available past 2020-12-31, ' # 'using median Tmax values\n') # # sys.exit() # # elif (tmax_name.upper() == 'TOPOWX' and # # ini['INPUTS']['end_date'] > '2017-12-31'): # # logging.warning( # # '\nDAYMET is not currently available past 2017-12-31, ' # # 'using median Tmax values\n') # # # sys.exit() logging.info('\nInitializing Earth Engine') if gee_key_file: logging.info(f' Using service account key file: {gee_key_file}') # The "EE_ACCOUNT" parameter is not used if the key file is valid ee.Initialize(ee.ServiceAccountCredentials('x', key_file=gee_key_file)) else: ee.Initialize() logging.debug('\nTmax properties') tmax_coll = ee.ImageCollection(tmax_source) tmax_mask = ee.Image(tmax_coll.first()).select([0]).multiply(0) logging.debug(f' {tmax_source}') # Get the Tcorr image collection properties logging.debug('\nTcorr scene collection') tcorr_coll_id = '{}'.format(ini['EXPORT']['export_coll']) # if not ee.data.getInfo(tcorr_annual_coll_id.rsplit('/', 1)[0]): # logging.info('\nExport collection does not exist and will be built' # '\n {}'.format(tcorr_annual_coll_id.rsplit('/', 1)[0])) # input('Press ENTER to continue') # ee.data.createAsset({'type': 'FOLDER'}, # tcorr_annual_coll_id.rsplit('/', 1)[0]) if not ee.data.getInfo(tcorr_annual_coll_id): logging.info('\nExport collection does not exist and will be built' '\n {}'.format(tcorr_annual_coll_id)) input('Press ENTER to continue') ee.data.createAsset({'type': 'IMAGE_COLLECTION'}, tcorr_annual_coll_id) # Get current running tasks tasks = utils.get_ee_tasks() ready_task_count = sum(1 for t in tasks.values() if t['state'] == 'READY') # ready_task_count = delay_task(ready_task_count, delay_time, max_ready) if logging.getLogger().getEffectiveLevel() == logging.DEBUG: utils.print_ee_tasks(tasks) input('ENTER') # Get current asset list logging.debug('\nGetting GEE asset list') asset_list = utils.get_ee_assets(tcorr_annual_coll_id) if logging.getLogger().getEffectiveLevel() == logging.DEBUG: pprint.pprint(asset_list[:10]) # Get list of MGRS tiles that intersect the study area logging.info('\nBuilding export list') export_list = mgrs_export_tiles( study_area_coll_id=study_area_coll_id, mgrs_coll_id=mgrs_ftr_coll_id, study_area_property=study_area_property, study_area_features=study_area_features, mgrs_tiles=mgrs_tiles, mgrs_skip_list=mgrs_skip_list, utm_zones=utm_zones, wrs2_tiles=wrs2_tiles, ) if not export_list: logging.error('\nEmpty export list, exiting') return False # pprint.pprint(export_list) # input('ENTER') # Build the complete/filtered WRS2 list wrs2_tile_list = list(set( wrs2 for tile_info in export_list for wrs2 in tile_info['wrs2_tiles'])) if wrs2_skip_list: wrs2_tile_list = [wrs2 for wrs2 in wrs2_tile_list if wrs2 not in wrs2_skip_list] if wrs2_path_skip_list: wrs2_tile_list = [wrs2 for wrs2 in wrs2_tile_list if int(wrs2[1:4]) not in wrs2_path_skip_list] if wrs2_row_skip_list: wrs2_tile_list = [wrs2 for wrs2 in wrs2_tile_list if int(wrs2[5:8]) not in wrs2_row_skip_list] wrs2_tile_list = sorted(wrs2_tile_list, reverse=not(reverse_flag)) # wrs2_tile_count = len(wrs2_tile_list) # Get the list of WRS2 tiles that intersect the data area and study area wrs2_coll = ee.FeatureCollection(wrs2_coll_id) \ .filter(ee.Filter.inList(wrs2_tile_field, wrs2_tile_list)) wrs2_info = wrs2_coll.getInfo()['features'] for wrs2_ftr in sorted(wrs2_info, key=lambda k: k['properties']['WRS2_TILE'], reverse=reverse_flag): wrs2_tile = wrs2_ftr['properties'][wrs2_tile_field] wrs2_path, wrs2_row = map(int, wrs2_tile_re.findall(wrs2_tile)[0]) logging.info(f'{wrs2_tile}') export_id = export_id_fmt.format( product=tmax_source.split('/')[-1], wrs2=wrs2_tile) logging.debug(f' Export ID: {export_id}') asset_id = asset_id_fmt.format( coll_id=tcorr_annual_coll_id, wrs2=wrs2_tile) asset_short_id = asset_id.replace( 'projects/earthengine-legacy/assets/', '') logging.debug(f' Asset ID: {asset_id}') if overwrite_flag: if export_id in tasks.keys(): logging.info(' Task already submitted, cancelling') ee.data.cancelTask(tasks[export_id]['id']) # This is intentionally not an "elif" so that a task can be # cancelled and an existing image/file/asset can be removed if asset_id in asset_list or asset_short_id in asset_list: logging.info(' Asset already exists, removing') ee.data.deleteAsset(asset_id) else: if export_id in tasks.keys(): logging.info(' Task already submitted, exiting') continue elif asset_id in asset_list or asset_short_id in asset_list: logging.info(' Asset already exists, skipping') continue # TODO: Move to separate function or outside loop export_crs = 'EPSG:{}'.format(wrs2_ftr['properties']['EPSG']) wrs2_extent = ee.Geometry(wrs2_ftr['geometry'])\ .bounds(1, ee.Projection(export_crs))\ .coordinates().get(0).getInfo() wrs2_extent = [ min([x[0] for x in wrs2_extent]), min([x[1] for x in wrs2_extent]), max([x[0] for x in wrs2_extent]), max([x[1] for x in wrs2_extent])] logging.debug(f' WRS2 Extent: {wrs2_extent}') # Adjust the image extent to the coarse resolution grid # EXPORT_GEO = [5000, 0, 15, 0, -5000, 15] export_cs = EXPORT_GEO[0] export_extent = [ round(math.floor((wrs2_extent[0] - EXPORT_GEO[2]) / export_cs) * export_cs + EXPORT_GEO[2], 8), round(math.floor((wrs2_extent[1] - EXPORT_GEO[5]) / export_cs) * export_cs + EXPORT_GEO[5], 8), round(math.ceil((wrs2_extent[2] - EXPORT_GEO[2]) / export_cs) * export_cs + EXPORT_GEO[2], 8), round(math.ceil((wrs2_extent[3] - EXPORT_GEO[5]) / export_cs) * export_cs + EXPORT_GEO[5], 8), ] export_geo = [export_cs, 0, export_extent[0], 0, -export_cs, export_extent[3]] export_shape = [ int(abs(export_extent[2] - export_extent[0]) / EXPORT_GEO[0]), int(abs(export_extent[3] - export_extent[1]) / EXPORT_GEO[0])] logging.debug(f' Export CRS: {export_crs}') logging.debug(f' Export Geo: {export_geo}') logging.debug(f' Export Extent: {export_extent}') logging.debug(f' Export Shape: {export_shape}') tcorr_coll = ee.ImageCollection(tcorr_coll_id) \ .filterMetadata('wrs2_tile', 'equals', wrs2_tile) \ .filter(ee.Filter.inList('year', year_list)) \ .filterMetadata('tcorr_index', 'equals', 1) \ .filterMetadata('tcorr_coarse_count', 'greater_than', 0) \ .select(['tcorr']) # .filterMetadata('tcorr_pixel_count', 'not_less_than', min_pixel_count) \ # TODO: Should CLOUD_COVER_LAND filter should be re-applied here? # .filterMetadata('CLOUD_COVER_LAND', 'less_than', cloud_cover) # .filterDate(start_date, end_date) # .filterBounds(ee.Geometry(wrs2_ftr['geometry'])) tcorr_count = tcorr_coll.size() # mask_img = ee.Image.constant(0).reproject(export_crs, export_geo) # Compute the gridded Tcorr climo image and count reducer = ee.Reducer.mean()\ .combine(ee.Reducer.count(), sharedInputs=True) tcorr_img = tcorr_coll.reduce(reducer).rename(['tcorr', 'count']) count_img = tcorr_img.select(['count']) output_img = tcorr_img.updateMask(count_img.gte(min_scene_count)) # # Compute stats from the image properties # tcorr_stats = ee.List(tcorr_coll.aggregate_array('tcorr_value')) \ # .reduce(reducer) # tcorr_stats = ee.Dictionary(tcorr_stats) \ # .combine({'median': 0, 'count': 0}, overwrite=False) # tcorr = ee.Number(tcorr_stats.get('median')) # count = ee.Number(tcorr_stats.get('count')) # index = count.lt(min_scene_count)\ # .multiply(TCORR_INDICES['NODATA'] - TCORR_INDICES['ANNUAL'])\ # .add(TCORR_INDICES['ANNUAL']) # # index = ee.Algorithms.If(count.gte(min_scene_count), 6, 9) # # Clip the mask image to the Landsat footprint # # Change mask values to 1 if count >= threshold # # Mask values of 0 will be set to nodata # mask_img = tmax_mask.add(count.gte(min_scene_count)) \ # .clip(ee.Geometry(wrs2_ftr['geometry'])) # output_img = ee.Image( # [mask_img.multiply(tcorr), mask_img.multiply(count)]) \ # .rename(['tcorr', 'count']) \ # .updateMask(mask_img.unmask(0)) # # Write an empty image if the pixel count is too low # # CGM: Check/test if this can be combined into a single If() # tcorr_img = ee.Algorithms.If( # count.gte(min_scene_count), # tmax_mask.add(tcorr), mask_img.updateMask(0)) # count_img = ee.Algorithms.If( # count.gte(min_scene_count), # tmax_mask.add(count), mask_img.updateMask(0)) # # # Clip to the Landsat image footprint # output_img = ee.Image([tcorr_img, count_img]) \ # .rename(['tcorr', 'count']) # Clip to the Landsat image footprint # output_img = output_img.clip(ee.Geometry(wrs2_ftr['geometry'])) # Clear the transparency mask # output_img = output_img.updateMask(output_img.unmask(0)) output_img = output_img.set({ 'date_ingested': datetime.datetime.today().strftime('%Y-%m-%d'), 'model_name': model_name, 'model_version': ssebop.__version__, # 'system:time_start': utils.millis(start_dt), # 'tcorr_value': tcorr, 'tcorr_index': TCORR_INDICES['ANNUAL'], 'tcorr_scene_count': tcorr_count, 'tcorr_source': tcorr_source, 'tmax_source': tmax_source, 'wrs2_path': wrs2_path, 'wrs2_row': wrs2_row, 'wrs2_tile': wrs2_tile, 'years': ','.join(map(str, year_list)), # 'year_start': year_list[0], # 'year_end': year_list[-1], }) # pprint.pprint(output_img.getInfo()) # input('ENTER') logging.debug(' Building export task') task = ee.batch.Export.image.toAsset( image=output_img, description=export_id, assetId=asset_id, crs=export_crs, crsTransform='[' + ','.join(list(map(str, export_geo))) + ']', dimensions='{0}x{1}'.format(*export_shape), ) logging.info(' Starting export task') utils.ee_task_start(task) # Pause before starting the next export task utils.delay_task(delay_time, max_ready) logging.debug('')
def delay_task(delay_time=0, task_max=-1, task_count=0): """Delay script execution based on number of READY tasks Parameters ---------- delay_time : float, int Delay time in seconds between starting export tasks or checking the number of queued tasks if "ready_task_max" is > 0. The default is 0. The delay time will be set to a minimum of 10 seconds if ready_task_max > 0. task_max : int, optional Maximum number of queued "READY" tasks. task_count : int The current/previous/assumed number of ready tasks. Value will only be updated if greater than or equal to ready_task_max. Returns ------- int : ready_task_count """ if task_max > 3000: raise ValueError( 'The maximum number of queued tasks must be less than 3000') # Force delay time to be a positive value since the parameter used to # support negative values if delay_time < 0: delay_time = abs(delay_time) if ((task_max is None or task_max <= 0) and (delay_time >= 0)): # Assume task_max was not set and just wait the delay time logging.debug( f' Pausing {delay_time} seconds, not checking task list') time.sleep(delay_time) return 0 elif task_max and (task_count < task_max): # Skip waiting or checking tasks if a maximum number of tasks was set # and the current task count is below the max logging.debug(f' Ready tasks: {task_count}') return task_count # If checking tasks, force delay_time to be at least 10 seconds if # ready_task_max is set to avoid excessive EE calls delay_time = max(delay_time, 10) # Make an initial pause before checking tasks lists to allow # for previous export to start up # CGM - I'm not sure what a good default first pause time should be, # but capping it at 30 seconds is probably fine for now logging.debug( f' Pausing {min(delay_time, 30)} seconds for tasks to start') time.sleep(delay_time) # If checking tasks, don't continue to the next export until the number # of READY tasks is greater than or equal to "ready_task_max" while True: ready_task_count = len(utils.get_ee_tasks(states=['READY']).keys()) logging.debug(f' Ready tasks: {ready_task_count}') if ready_task_count >= task_max: logging.debug(f' Pausing {delay_time} seconds') time.sleep(delay_time) else: logging.debug(f' {task_max - ready_task_count} open task ' f'slots, continuing processing') break return ready_task_count
def main(ini_path=None, overwrite_flag=False, delay_time=0, gee_key_file=None, ready_task_max=-1, reverse_flag=False, tiles=None, update_flag=False, log_tasks=True, recent_days=0, start_dt=None, end_dt=None): """Compute gridded Tcorr images by date Parameters ---------- ini_path : str Input file path. overwrite_flag : bool, optional If True, overwrite existing files if the export dates are the same and generate new images (but with different export dates) even if the tile lists are the same. The default is False. delay_time : float, optional Delay time in seconds between starting export tasks (or checking the number of queued tasks, see "max_ready" parameter). The default is 0. gee_key_file : str, None, optional Earth Engine service account JSON key file (the default is None). ready_task_max: int, optional Maximum number of queued "READY" tasks. reverse_flag : bool, optional If True, process WRS2 tiles in reverse order (the default is False). tiles : str, None, optional List of MGRS tiles to process (the default is None). update_flag : bool, optional If True, only overwrite scenes with an older model version. recent_days : int, optional Limit start/end date range to this many days before the current date (the default is 0 which is equivalent to not setting the parameter and will use the INI start/end date directly). start_dt : datetime, optional Override the start date in the INI file (the default is None which will use the INI start date). end_dt : datetime, optional Override the (inclusive) end date in the INI file (the default is None which will use the INI end date). """ logging.info('\nCompute gridded Tcorr images by date') # CGM - Which format should we use for the WRS2 tile? wrs2_tile_fmt = 'p{:03d}r{:03d}' # wrs2_tile_fmt = '{:03d}{:03d}' wrs2_tile_re = re.compile('p?(\d{1,3})r?(\d{1,3})') # List of path/rows to skip wrs2_skip_list = [ 'p049r026', # Vancouver Island, Canada # 'p047r031', # North California coast 'p042r037', # San Nicholas Island, California # 'p041r037', # South California coast 'p040r038', 'p039r038', 'p038r038', # Mexico (by California) 'p037r039', 'p036r039', 'p035r039', # Mexico (by Arizona) 'p034r039', 'p033r039', # Mexico (by New Mexico) 'p032r040', # Mexico (West Texas) 'p029r041', 'p028r042', 'p027r043', 'p026r043', # Mexico (South Texas) 'p019r040', 'p018r040', # West Florida coast 'p016r043', 'p015r043', # South Florida coast 'p014r041', 'p014r042', 'p014r043', # East Florida coast 'p013r035', 'p013r036', # North Carolina Outer Banks 'p013r026', 'p012r026', # Canada (by Maine) 'p011r032', # Rhode Island coast ] wrs2_path_skip_list = [9, 49] wrs2_row_skip_list = [25, 24, 43] mgrs_skip_list = [] export_id_fmt = 'tcorr_gridded_{product}_{scene_id}' asset_id_fmt = '{coll_id}/{scene_id}' # TODO: Move to INI or function input parameter clip_ocean_flag = True # Read config file ini = configparser.ConfigParser(interpolation=None) ini.read_file(open(ini_path, 'r')) # ini = utils.read_ini(ini_path) model_name = 'SSEBOP' try: study_area_coll_id = str(ini['INPUTS']['study_area_coll']) except KeyError: raise ValueError('"study_area_coll" parameter was not set in INI') except Exception as e: raise e try: start_date = str(ini['INPUTS']['start_date']) except KeyError: raise ValueError('"start_date" parameter was not set in INI') except Exception as e: raise e try: end_date = str(ini['INPUTS']['end_date']) except KeyError: raise ValueError('"end_date" parameter was not set in INI') except Exception as e: raise e try: collections = str(ini['INPUTS']['collections']) collections = sorted([x.strip() for x in collections.split(',')]) except KeyError: raise ValueError('"collections" parameter was not set in INI') except Exception as e: raise e try: mgrs_ftr_coll_id = str(ini['EXPORT']['mgrs_ftr_coll']) except KeyError: raise ValueError('"mgrs_ftr_coll" parameter was not set in INI') except Exception as e: raise e # Optional parameters try: study_area_property = str(ini['INPUTS']['study_area_property']) except KeyError: study_area_property = None logging.debug( ' study_area_property: not set in INI, defaulting to None') except Exception as e: raise e try: study_area_features = str(ini['INPUTS']['study_area_features']) study_area_features = sorted( [x.strip() for x in study_area_features.split(',')]) except KeyError: study_area_features = [] logging.debug( ' study_area_features: not set in INI, defaulting to []') except Exception as e: raise e try: wrs2_tiles = str(ini['INPUTS']['wrs2_tiles'])\ .replace('"', '').replace("'", '') wrs2_tiles = sorted([x.strip() for x in wrs2_tiles.split(',')]) except KeyError: wrs2_tiles = [] logging.debug(' wrs2_tiles: not set in INI, defaulting to []') except Exception as e: raise e try: mgrs_tiles = str(ini['EXPORT']['mgrs_tiles']) mgrs_tiles = sorted([x.strip() for x in mgrs_tiles.split(',')]) # CGM - Remove empty strings caused by trailing or extra commas mgrs_tiles = [x.upper() for x in mgrs_tiles if x] logging.debug(f' mgrs_tiles: {mgrs_tiles}') except KeyError: mgrs_tiles = [] logging.debug(' mgrs_tiles: not set in INI, defaulting to []') except Exception as e: raise e try: utm_zones = str(ini['EXPORT']['utm_zones']) utm_zones = sorted([int(x.strip()) for x in utm_zones.split(',')]) logging.debug(f' utm_zones: {utm_zones}') except KeyError: utm_zones = [] logging.debug(' utm_zones: not set in INI, defaulting to []') except Exception as e: raise e # TODO: Add try/except blocks and default values? cloud_cover = float(ini['INPUTS']['cloud_cover']) # Model specific parameters # Set the property name to lower case and try to cast values to numbers model_args = { k.lower(): float(v) if utils.is_number(v) else v for k, v in dict(ini[model_name]).items() } filter_args = {} tmax_source = ini[model_name]['tmax_source'] tcorr_source = ini[model_name]['tcorr_source'] tcorr_scene_coll_id = '{}'.format(ini['EXPORT']['export_coll']) # tcorr_scene_coll_id = '{}/{}_scene'.format( # ini['EXPORT']['export_coll'], tmax_source.lower()) if tcorr_source.upper() not in ['GRIDDED_COLD', 'GRIDDED']: raise ValueError('unsupported tcorr_source for these tools') # For now only support reading specific Tmax sources if (tmax_source.upper() not in ['DAYMET_MEDIAN_V2'] and not re.match( 'projects/.+/tmax/.+_(mean|median)_\d{4}_\d{4}', tmax_source)): raise ValueError(f'unsupported tmax_source: {tmax_source}') # if (tmax_source.upper() == 'CIMIS' and # ini['INPUTS']['end_date'] < '2003-10-01'): # raise ValueError('CIMIS is not currently available before 2003-10-01') # elif (tmax_source.upper() == 'DAYMET' and # ini['INPUTS']['end_date'] > '2018-12-31'): # logging.warning('\nDAYMET is not currently available past 2018-12-31, ' # 'using median Tmax values\n') # If the user set the tiles argument, use these instead of the INI values if tiles: logging.info('\nOverriding INI mgrs_tiles and utm_zones parameters') logging.info(f' user tiles: {tiles}') mgrs_tiles = sorted([y.strip() for x in tiles for y in x.split(',')]) mgrs_tiles = [x.upper() for x in mgrs_tiles if x] logging.info(f' mgrs_tiles: {", ".join(mgrs_tiles)}') utm_zones = sorted(list(set([int(x[:2]) for x in mgrs_tiles]))) logging.info(f' utm_zones: {", ".join(map(str, utm_zones))}') today_dt = datetime.datetime.now() today_dt = today_dt.replace(hour=0, minute=0, second=0, microsecond=0) if recent_days: logging.info('\nOverriding INI "start_date" and "end_date" parameters') logging.info(f' Recent days: {recent_days}') end_dt = today_dt - datetime.timedelta(days=1) start_dt = today_dt - datetime.timedelta(days=recent_days) start_date = start_dt.strftime('%Y-%m-%d') end_date = end_dt.strftime('%Y-%m-%d') elif start_dt and end_dt: # Attempt to use the function start/end dates logging.info('\nOverriding INI "start_date" and "end_date" parameters') logging.info(' Custom date range') start_date = start_dt.strftime('%Y-%m-%d') end_date = end_dt.strftime('%Y-%m-%d') else: # Parse the INI start/end dates logging.info('\nINI date range') try: start_dt = datetime.datetime.strptime(start_date, '%Y-%m-%d') end_dt = datetime.datetime.strptime(end_date, '%Y-%m-%d') except Exception as e: raise e logging.info(f' Start: {start_date}') logging.info(f' End: {end_date}') # TODO: Add a few more checks on the dates if end_dt < start_dt: raise ValueError('end date can not be before start date') # logging.debug('\nInterpolation date range') # iter_start_dt = start_dt # iter_end_dt = end_dt + datetime.timedelta(days=1) # # iter_start_dt = start_dt - datetime.timedelta(days=interp_days) # # iter_end_dt = end_dt + datetime.timedelta(days=interp_days+1) # logging.debug(' Start: {}'.format(iter_start_dt.strftime('%Y-%m-%d'))) # logging.debug(' End: {}'.format(iter_end_dt.strftime('%Y-%m-%d'))) logging.info('\nInitializing Earth Engine') if gee_key_file: logging.info(f' Using service account key file: {gee_key_file}') # The "EE_ACCOUNT" parameter is not used if the key file is valid ee.Initialize(ee.ServiceAccountCredentials('x', key_file=gee_key_file)) else: ee.Initialize() logging.debug('\nTmax properties') logging.debug(f' Source: {tmax_source}') # # DEADBEEF - Not needed with gridded Tcorr # # Get a Tmax image to set the Tcorr values to # if 'MEDIAN' in tmax_name.upper(): # tmax_coll_id = 'projects/earthengine-legacy/assets/' \ # 'projects/usgs-ssebop/tmax/{}'.format(tmax_name.lower()) # tmax_coll = ee.ImageCollection(tmax_coll_id) # tmax_mask = ee.Image(tmax_coll.first()).select([0]).multiply(0) # # else: # # raise ValueError(f'unsupported tmax_source: {tmax_name}') # logging.debug(f' Collection: {tmax_coll_id}') if not ee.data.getInfo(tcorr_scene_coll_id.rsplit('/', 1)[0]): logging.info('\nExport folder does not exist and will be built' '\n {}'.format(tcorr_scene_coll_id.rsplit('/', 1)[0])) input('Press ENTER to continue') ee.data.createAsset({'type': 'FOLDER'}, tcorr_scene_coll_id.rsplit('/', 1)[0]) if not ee.data.getInfo(tcorr_scene_coll_id): logging.info('\nExport collection does not exist and will be built' '\n {}'.format(tcorr_scene_coll_id)) input('Press ENTER to continue') ee.data.createAsset({'type': 'IMAGE_COLLECTION'}, tcorr_scene_coll_id) # DEADBEEF - The asset list will be retrieved before each date is processed # # Get current asset list # logging.debug('\nGetting GEE asset list') # asset_list = utils.get_ee_assets(tcorr_scene_coll_id) # # if logging.getLogger().getEffectiveLevel() == logging.DEBUG: # # pprint.pprint(asset_list[:10]) # Get current running tasks tasks = utils.get_ee_tasks() ready_task_count = sum(1 for t in tasks.values() if t['state'] == 'READY') # ready_task_count = delay_task(ready_task_count, delay_time, max_ready) if logging.getLogger().getEffectiveLevel() == logging.DEBUG: utils.print_ee_task(tasks) input('ENTER') # Get list of MGRS tiles that intersect the study area logging.debug('\nMGRS Tiles/Zones') export_list = mgrs_export_tiles( study_area_coll_id=study_area_coll_id, mgrs_coll_id=mgrs_ftr_coll_id, study_area_property=study_area_property, study_area_features=study_area_features, mgrs_tiles=mgrs_tiles, mgrs_skip_list=mgrs_skip_list, utm_zones=utm_zones, wrs2_tiles=wrs2_tiles, ) if not export_list: logging.error('\nEmpty export list, exiting') return False # Build the complete WRS2 list for filtering the image list wrs2_tile_list = sorted( list( set(wrs2 for tile_info in export_list for wrs2 in tile_info['wrs2_tiles']))) if wrs2_skip_list: wrs2_tile_list = [ wrs2 for wrs2 in wrs2_tile_list if wrs2 not in wrs2_skip_list ] if wrs2_path_skip_list: wrs2_tile_list = [ wrs2 for wrs2 in wrs2_tile_list if int(wrs2[1:4]) not in wrs2_path_skip_list ] if wrs2_row_skip_list: wrs2_tile_list = [ wrs2 for wrs2 in wrs2_tile_list if int(wrs2[5:8]) not in wrs2_row_skip_list ] # CGM - This is kind of backwards, but rebuild the MGRS geometry in order # to filter the model collection object mgrs_tile_list = sorted( list(set(tile_info['index'] for tile_info in export_list))) mgrs_geom = ee.FeatureCollection(mgrs_ftr_coll_id)\ .filter(ee.Filter.inList('mgrs', mgrs_tile_list))\ .geometry() for export_dt in sorted(utils.date_range(start_dt, end_dt), reverse=reverse_flag): export_date = export_dt.strftime('%Y-%m-%d') next_date = (export_dt + datetime.timedelta(days=1)).strftime('%Y-%m-%d') logging.info(f'Date: {export_date}') model_obj = ssebop.Collection( collections=collections, start_date=export_date, end_date=next_date, cloud_cover_max=cloud_cover, geometry=mgrs_geom, model_args=model_args, # filter_args=filter_args, ) landsat_coll = model_obj.overpass(variables=['ndvi']) # pprint.pprint(landsat_coll.aggregate_array('system:id').getInfo()) # input('ENTER') try: image_id_list = landsat_coll.aggregate_array('system:id').getInfo() except Exception as e: logging.warning(' Error getting image ID list, skipping date') logging.debug(f' {e}') continue # Get list of existing images for the target date logging.debug(' Getting GEE asset list') asset_coll = ee.ImageCollection(tcorr_scene_coll_id) \ .filterDate(export_date, next_date) \ .filter(ee.Filter.inList('wrs2_tile', wrs2_tile_list)) asset_props = { f'{tcorr_scene_coll_id}/{x["properties"]["system:index"]}': x['properties'] for x in utils.get_info(asset_coll)['features'] } # asset_props = {x['id']: x['properties'] for x in assets_info['features']} # Sort image ID list by path/row image_id_list = sorted(image_id_list, key=lambda k: k.split('/')[-1].split('_')[-2], reverse=True) for image_id in image_id_list: coll_id, scene_id = image_id.rsplit('/', 1) wrs2_path = int(scene_id[5:8]) wrs2_row = int(scene_id[8:11]) wrs2_tile = 'p{:03d}r{:03d}'.format(wrs2_path, wrs2_row) if wrs2_tile not in wrs2_tile_list: logging.debug(f'{scene_id} - not in wrs2 tile list, skipping') continue else: logging.info(f'{scene_id}') export_id = export_id_fmt.format( product=tmax_source.split('/')[-1].lower(), scene_id=scene_id) logging.debug(f' Export ID: {export_id}') asset_id = asset_id_fmt.format(coll_id=tcorr_scene_coll_id, scene_id=scene_id) logging.debug(f' Collection: {os.path.dirname(asset_id)}') logging.debug(f' Image ID: {os.path.basename(asset_id)}') if update_flag: def version_number(version_str): return list(map(int, version_str.split('.'))) if export_id in tasks.keys(): logging.info(' Task already submitted, skipping') continue # In update mode only overwrite if the version is old if asset_props and asset_id in asset_props.keys(): model_ver = version_number(ssebop.__version__) asset_ver = version_number( asset_props[asset_id]['model_version']) if asset_ver < model_ver: logging.info(' Existing asset model version is old, ' 'removing') logging.debug(f' asset: {asset_ver}\n' f' model: {model_ver}') try: ee.data.deleteAsset(asset_id) except: logging.info(' Error removing asset, skipping') continue elif (('T1_RT_TOA' in asset_props[asset_id]['coll_id']) and ('T1_RT_TOA' not in image_id)): logging.info(' Existing asset is from realtime ' 'Landsat collection, removing') try: ee.data.deleteAsset(asset_id) except: logging.info(' Error removing asset, skipping') continue else: logging.debug(' Asset is up to date, skipping') continue elif overwrite_flag: if export_id in tasks.keys(): logging.info(' Task already submitted, cancelling') ee.data.cancelTask(tasks[export_id]['id']) # This is intentionally not an "elif" so that a task can be # cancelled and an existing image/file/asset can be removed if asset_props and asset_id in asset_props.keys(): logging.info(' Asset already exists, removing') ee.data.deleteAsset(asset_id) else: if export_id in tasks.keys(): logging.info(' Task already submitted, skipping') continue elif asset_props and asset_id in asset_props.keys(): logging.info(' Asset already exists, skipping') continue # Get the input image grid and spatial reference image_info = ee.Image(image_id).select([3]).getInfo() image_geo = image_info['bands'][0]['crs_transform'] image_crs = image_info['bands'][0]['crs'] image_shape = image_info['bands'][0]['dimensions'] # Transform format: [30, 0, 591285, 0, -30, 4256115] image_extent = [ image_geo[2], image_geo[5] + image_shape[1] * image_geo[4], image_geo[2] + image_shape[0] * image_geo[0], image_geo[5] ] logging.debug(f' Image CRS: {image_crs}') logging.debug(f' Image Geo: {image_geo}') logging.debug(f' Image Extent: {image_extent}') logging.debug(f' Image Shape: {image_shape}') # Adjust the image extent to the coarse resolution grid # EXPORT_GEO = [5000, 0, 15, 0, -5000, 15] export_cs = EXPORT_GEO[0] export_extent = [ round( math.floor((image_extent[0] - EXPORT_GEO[2]) / export_cs) * export_cs + EXPORT_GEO[2], 8), round( math.floor((image_extent[1] - EXPORT_GEO[5]) / export_cs) * export_cs + EXPORT_GEO[5], 8), round( math.ceil((image_extent[2] - EXPORT_GEO[2]) / export_cs) * export_cs + EXPORT_GEO[2], 8), round( math.ceil((image_extent[3] - EXPORT_GEO[5]) / export_cs) * export_cs + EXPORT_GEO[5], 8), ] export_geo = [ export_cs, 0, export_extent[0], 0, -export_cs, export_extent[3] ] export_shape = [ int(abs(export_extent[2] - export_extent[0]) / EXPORT_GEO[0]), int(abs(export_extent[3] - export_extent[1]) / EXPORT_GEO[0]) ] logging.debug(f' Export CRS: {image_crs}') logging.debug(f' Export Geo: {export_geo}') logging.debug(f' Export Extent: {export_extent}') logging.debug(f' Export Shape: {export_shape}') # CGM - Why are we not using the from_image_id() method? # t_obj = ssebop.Image.from_image_id(ee.Image(image_id), **model_args) if coll_id.endswith('_L2'): t_obj = ssebop.Image.from_landsat_c2_sr( sr_image=ee.Image(image_id), cloudmask_args={ 'cirrus_flag': True, 'dilate_flag': True, 'shadow_flag': True, 'snow_flag': True }, **model_args) elif coll_id.endswith('_SR'): t_obj = ssebop.Image.from_landsat_c1_sr( ee.Image(image_id), **model_args) elif coll_id.endswith('_TOA'): t_obj = ssebop.Image.from_landsat_c1_toa( ee.Image(image_id), **model_args) else: raise ValueError('Could not determine Landsat type') # CGM - Intentionally not calling the tcorr method directly since # there may be compositing with climos or the scene average if tcorr_source == 'GRIDDED': tcorr_img = t_obj.tcorr_gridded elif tcorr_source == 'GRIDDED_COLD': tcorr_img = t_obj.tcorr_gridded_cold # tcorr_img = t_obj.tcorr # Clip to the Landsat image footprint tcorr_img = ee.Image(tcorr_img).clip(ee.Image(image_id).geometry()) # Clear the transparency mask (from clipping) tcorr_img = tcorr_img.updateMask(tcorr_img.unmask(0)) if clip_ocean_flag: tcorr_img = tcorr_img\ .updateMask(ee.Image('projects/openet/ocean_mask')) # # CGM - The NLCD mask will only work for CONUS # output_img = output_img.updateMask( # ee.Image('USGS/NLCD/NLCD2016').select(['landcover']).mask()) tcorr_img = tcorr_img\ .set({ 'CLOUD_COVER': image_info['properties']['CLOUD_COVER'], 'CLOUD_COVER_LAND': image_info['properties']['CLOUD_COVER_LAND'], # 'SPACECRAFT_ID': image.get('SPACECRAFT_ID'), 'coll_id': coll_id, 'date_ingested': datetime.datetime.today().strftime('%Y-%m-%d'), 'date': export_dt.strftime('%Y-%m-%d'), 'doy': int(export_dt.strftime('%j')), 'model_name': model_name, 'model_version': ssebop.__version__, 'month': int(export_dt.month), 'realtime': 'true' if '/T1_RT' in coll_id else 'false', 'scene_id': scene_id, 'system:time_start': image_info['properties']['system:time_start'], 'tcorr_index': TCORR_INDICES[tcorr_source.upper()], 'tcorr_source': tcorr_source, 'tmax_source': tmax_source, # 'tmax_source': tmax_source.replace( # 'projects/earthengine-legacy/assets/', ''), 'tool_name': TOOL_NAME, 'tool_version': TOOL_VERSION, 'wrs2_path': wrs2_path, 'wrs2_row': wrs2_row, 'wrs2_tile': wrs2_tile, 'year': int(export_dt.year), }) # pprint.pprint(output_img.getInfo()['properties']) # input('ENTER') logging.debug(' Building export task') task = ee.batch.Export.image.toAsset( image=tcorr_img, description=export_id, assetId=asset_id, crs=image_crs, crsTransform='[' + ','.join(list(map(str, export_geo))) + ']', dimensions='{0}x{1}'.format(*export_shape), ) logging.info(' Starting export task') utils.ee_task_start(task) ready_task_count += 1 # logging.debug(f' Ready tasks: {ready_task_count}') # Pause before starting the next date (not export task) ready_task_count = delay_task(delay_time=delay_time, task_max=ready_task_max, task_count=ready_task_count)
def tcorr_gridded_images(start_dt, end_dt, overwrite_flag=False, gee_key_file=None, realtime_flag=False): """Identify missing gridded Tcorr assets Parameters ---------- start_dt : datetime end_dt : datetime overwrite_flag : bool, optional gee_key_file : str, optional If not set, will attempt to initialize using the user credentials realtime_flag : bool, optional If True, build the image list using the Landsat realtime collections If False, allow existing realtime images to be overwritten Returns ------- list : Landsat image IDs """ logging.info('Building gridded Tcorr image list') model_args = {'tmax_source': 'DAYMET_MEDIAN_V2'} start_date = start_dt.strftime('%Y-%m-%d') end_date = end_dt.strftime('%Y-%m-%d') next_date = (end_dt + datetime.timedelta(days=1)).strftime('%Y-%m-%d') logging.info(f' Start Date: {start_date}') logging.info(f' End Date: {end_date}') # TODO: Add a check for dates before 2013, since L5 isn't in collections logging.info(f' Realtime: {realtime_flag}') if realtime_flag: collections = COLLECTIONS_RT[:] else: collections = COLLECTIONS[:] # TODO: Move to config.py? logging.debug('\nInitializing Earth Engine') if gee_key_file: logging.debug(f' Using service account key file: {gee_key_file}') # The "EE_ACCOUNT" parameter is not used if the key file is valid ee.Initialize(ee.ServiceAccountCredentials('', key_file=gee_key_file)) else: ee.Initialize() if not ee.data.getInfo(ASSET_COLL_ID): logging.error('Export collection does not exist') return [] # Get list of MGRS tiles that intersect the study area logging.debug('\nMGRS Tiles/Zones') export_list = mgrs_export_tiles( study_area_coll_id=STUDY_AREA_COLL_ID, mgrs_coll_id=MGRS_FTR_COLL_ID, study_area_property=STUDY_AREA_PROPERTY, study_area_features=STUDY_AREA_FEATURES, # mgrs_tiles=MGRS_TILES, # mgrs_skip_list=mgrs_skip_list, # utm_zones=UTM_ZONES, # wrs2_tiles=WRS2_TILES, ) if not export_list: logging.warning('Empty export list') return [] mgrs_tile_list = sorted(list(set( tile_info['index'] for tile_info in export_list))) logging.debug(f' MGRS Tiles: {",".join(mgrs_tile_list)}') # Build the complete WRS2 list for filtering the image list wrs2_tile_list = sorted(list(set( wrs2 for tile_info in export_list for wrs2 in tile_info['wrs2_tiles']))) if WRS2_SKIP_LIST: wrs2_tile_list = [wrs2 for wrs2 in wrs2_tile_list if wrs2 not in WRS2_SKIP_LIST] if WRS2_PATH_SKIP_LIST: wrs2_tile_list = [wrs2 for wrs2 in wrs2_tile_list if int(wrs2[1:4]) not in WRS2_PATH_SKIP_LIST] if WRS2_ROW_SKIP_LIST: wrs2_tile_list = [wrs2 for wrs2 in wrs2_tile_list if int(wrs2[5:8]) not in WRS2_ROW_SKIP_LIST] # logging.debug(f' WRS2 Tiles: {",".join(wrs2_tile_list)}') # CGM - This is kind of backwards, but rebuild the MGRS geometry in order # to filter the model collection object mgrs_geom = ee.FeatureCollection(MGRS_FTR_COLL_ID)\ .filter(ee.Filter.inList('mgrs', mgrs_tile_list))\ .geometry() # study_area_geom = ee.Geometry.BBox(**STUDY_AREA_EXTENT) logging.debug('\nRequesting image ID list') model_obj = ssebop.Collection( collections=collections, start_date=start_date, end_date=next_date, cloud_cover_max=CLOUD_COVER, geometry=mgrs_geom, model_args=model_args, # filter_args=filter_args, ) landsat_coll = model_obj.overpass(variables=['ndvi']) try: image_id_list = landsat_coll.aggregate_array('system:id').getInfo() except Exception as e: logging.error(f'Error requesting image ID list\n{e}') return [] # pprint.pprint(image_id_list) # input('ENTER') # Filter the image ID list to the WRS2 tile list image_id_list = [ x for x in image_id_list if f'p{x.split("/")[-1][5:8]}r{x.split("/")[-1][8:11]}' in wrs2_tile_list] # pprint.pprint(sorted(image_id_list)) # input('ENTER') # Get list of existing images for the target date logging.debug('\nRequesting existing asset list') logging.debug(f' {ASSET_COLL_ID}') asset_coll = ee.ImageCollection(ASSET_COLL_ID) \ .filterDate(start_date, next_date) \ .filter(ee.Filter.inList('wrs2_tile', wrs2_tile_list)) # pprint.pprint(asset_coll.getInfo()) # pprint.pprint(asset_coll.aggregate_array('system:index').getInfo()) # input('ENTER') if realtime_flag: # Keep all image IDs in the existing asset list pass else: # Only keep non-realtime image IDs in the existing asset list # This will allow existing realtime images to be overwritten logging.info(' Removing realtime image IDs from existing asset list') asset_coll = asset_coll.filter(ee.Filter.inList('coll_id', COLLECTIONS)) # asset_coll = asset_coll.filterMetadata('realtime', 'equals', 'false')) # pprint.pprint(asset_coll.aggregate_array('system:index').getInfo()) # input('ENTER') # Only keep the scene ID component for filtering the image ID list try: asset_id_list = asset_coll.aggregate_array('system:index').getInfo() # asset_id_list = [f'{ASSET_COLL_ID}/{asset_id}' # for asset_id in asset_id_list] except Exception as e: logging.error(f'Error requesting asset ID list\n{e}') return [] # print('Asset ID List') # pprint.pprint(sorted(asset_id_list)) # input('ENTER') # Skip image IDs that are already built image_id_list = [ image_id for image_id in image_id_list if image_id.split('/')[-1].upper() not in asset_id_list] # print('Final image ID List') # pprint.pprint(sorted(image_id_list)) # input('ENTER') # Get current running tasks logging.debug('\nRequesting task list') tasks = utils.get_ee_tasks() # Skip image IDs that are already in the task queue image_id_list = [ image_id for image_id in image_id_list if EXPORT_ID_FMT.format(product=TMAX_SOURCE.split('/')[-1].lower(), scene_id=image_id.split('/')[-1]) not in tasks.keys() ] # pprint.pprint(image_id_list) # input('ENTER') # Limit the image ID list to avoid exceeding the maximum number of tasks task_limit = 2500 - len(tasks.keys()) if len(image_id_list) > task_limit: logging.warning(f'Limiting list to {task_limit} images to avoid ' f'exceeding the maximum number of tasks') image_id_list = image_id_list[:task_limit] # Sort image ID list by path/row # image_id_list = sorted(image_id_list, reverse=True, # key=lambda k: k.split('/')[-1].split('_')[-2]) # Sort image ID list by date image_id_list = sorted(image_id_list, reverse=False, key=lambda k: k.split('/')[-1].split('_')[-1]) # pprint.pprint(image_id_list) # input('ENTER') return image_id_list
def main(ini_path=None, overwrite_flag=False, delay=0, key=None, reverse_flag=False): """Compute daily dT images Parameters ---------- ini_path : str Input file path. overwrite_flag : bool, optional If True, generate new images (but with different export dates) even if the dates already have images. If False, only generate images for dates that are missing. The default is False. delay : float, optional Delay time between each export task (the default is 0). key : str, optional File path to an Earth Engine json key file (the default is None). reverse_flag : bool, optional If True, process dates in reverse order. """ logging.info('\nCompute daily dT images') ini = read_ini(ini_path) model_name = 'SSEBOP' # model_name = ini['INPUTS']['et_model'].upper() if ini[model_name]['dt_source'].upper() == 'CIMIS': daily_coll_id = 'projects/climate-engine/cimis/daily' elif ini[model_name]['dt_source'].upper() == 'DAYMET': daily_coll_id = 'NASA/ORNL/DAYMET_V3' elif ini[model_name]['dt_source'].upper() == 'GRIDMET': daily_coll_id = 'IDAHO_EPSCOR/GRIDMET' else: raise ValueError('dt_source must be CIMIS, DAYMET, or GRIDMET') # Check dates if (ini[model_name]['dt_source'].upper() == 'CIMIS' and ini['INPUTS']['end_date'] < '2003-10-01'): logging.error( '\nCIMIS is not currently available before 2003-10-01, exiting\n') sys.exit() elif (ini[model_name]['dt_source'].upper() == 'DAYMET' and ini['INPUTS']['end_date'] > '2017-12-31'): logging.warning('\nDAYMET is not currently available past 2017-12-31, ' 'using median Tmax values\n') # sys.exit() # elif (ini[model_name]['dt_source'].upper() == 'TOPOWX' and # ini['INPUTS']['end_date'] > '2017-12-31'): # logging.warning( # '\nDAYMET is not currently available past 2017-12-31, ' # 'using median Tmax values\n') # # sys.exit() logging.info('\nInitializing Earth Engine') if key: logging.info(' Using service account key file: {}'.format(key)) # The "EE_ACCOUNT" parameter is not used if the key file is valid ee.Initialize(ee.ServiceAccountCredentials('deadbeef', key_file=key)) else: ee.Initialize() # Output dT daily image collection dt_daily_coll_id = '{}/{}_daily'.format( ini['EXPORT']['export_coll'], ini[model_name]['dt_source'].lower()) # Get an input image to set the dT values to logging.debug('\nInput properties') dt_name = ini[model_name]['dt_source'] dt_source = dt_name.split('_', 1)[0] # dt_version = dt_name.split('_', 1)[1] daily_coll = ee.ImageCollection(daily_coll_id) dt_img = ee.Image(daily_coll.first()).select([0]) dt_mask = dt_img.multiply(0) logging.debug(' Collection: {}'.format(daily_coll_id)) logging.debug(' Source: {}'.format(dt_source)) # logging.debug(' Version: {}'.format(dt_version)) logging.debug('\nExport properties') export_proj = dt_img.projection().getInfo() export_geo = export_proj['transform'] if 'crs' in export_proj.keys(): export_crs = export_proj['crs'] elif 'wkt' in export_proj.keys(): export_crs = re.sub(',\s+', ',', export_proj['wkt']) export_shape = dt_img.getInfo()['bands'][0]['dimensions'] export_extent = [ export_geo[2], export_geo[5] + export_shape[1] * export_geo[4], export_geo[2] + export_shape[0] * export_geo[0], export_geo[5] ] logging.debug(' CRS: {}'.format(export_crs)) logging.debug(' Extent: {}'.format(export_extent)) logging.debug(' Geo: {}'.format(export_geo)) logging.debug(' Shape: {}'.format(export_shape)) # Get current asset list if ini['EXPORT']['export_dest'].upper() == 'ASSET': logging.debug('\nGetting asset list') # DEADBEEF - daily is hardcoded in the asset_id for now asset_list = utils.get_ee_assets(dt_daily_coll_id) else: raise ValueError('invalid export destination: {}'.format( ini['EXPORT']['export_dest'])) # Get current running tasks tasks = utils.get_ee_tasks() if logging.getLogger().getEffectiveLevel() == logging.DEBUG: utils.print_ee_tasks() input('ENTER') # Limit by year and month try: month_list = sorted(list(utils.parse_int_set(ini['INPUTS']['months']))) except: logging.info('\nINPUTS "months" parameter not set in the INI,' '\n Defaulting to all months (1-12)\n') month_list = list(range(1, 13)) # try: # year_list = sorted(list(utils.parse_int_set(ini['INPUTS']['years']))) # except: # logging.info('\nINPUTS "years" parameter not set in the INI,' # '\n Defaulting to all available years\n') # year_list = [] # Group asset IDs by image date asset_id_dict = defaultdict(list) for asset_id in asset_list: asset_dt = datetime.datetime.strptime( asset_id.split('/')[-1].split('_')[0], '%Y%m%d') asset_id_dict[asset_dt.strftime('%Y-%m-%d')].append(asset_id) # pprint.pprint(export_dt_dict) iter_start_dt = datetime.datetime.strptime(ini['INPUTS']['start_date'], '%Y-%m-%d') iter_end_dt = datetime.datetime.strptime(ini['INPUTS']['end_date'], '%Y-%m-%d') logging.debug('Start Date: {}'.format(iter_start_dt.strftime('%Y-%m-%d'))) logging.debug('End Date: {}\n'.format(iter_end_dt.strftime('%Y-%m-%d'))) for export_dt in sorted(utils.date_range(iter_start_dt, iter_end_dt), reverse=reverse_flag): export_date = export_dt.strftime('%Y-%m-%d') # if ((month_list and export_dt.month not in month_list) or # (year_list and export_dt.year not in year_list)): if month_list and export_dt.month not in month_list: logging.debug(f'Date: {export_date} - month not in INI - skipping') continue elif export_date >= datetime.datetime.today().strftime('%Y-%m-%d'): logging.debug(f'Date: {export_date} - unsupported date - skipping') continue logging.info(f'Date: {export_date}') export_id = ini['EXPORT']['export_id_fmt'] \ .format( product=dt_name.lower(), date=export_dt.strftime('%Y%m%d'), export=datetime.datetime.today().strftime('%Y%m%d'), dest=ini['EXPORT']['export_dest'].lower()) logging.debug(' Export ID: {}'.format(export_id)) if ini['EXPORT']['export_dest'] == 'ASSET': asset_id = '{}/{}_{}'.format( dt_daily_coll_id, export_dt.strftime('%Y%m%d'), datetime.datetime.today().strftime('%Y%m%d')) logging.debug(' Asset ID: {}'.format(asset_id)) if overwrite_flag: if export_id in tasks.keys(): logging.debug(' Task already submitted, cancelling') ee.data.cancelTask(tasks[export_id]) # This is intentionally not an "elif" so that a task can be # cancelled and an existing image/file/asset can be removed if (ini['EXPORT']['export_dest'].upper() == 'ASSET' and asset_id in asset_list): logging.debug(' Asset already exists, removing') ee.data.deleteAsset(asset_id) else: if export_id in tasks.keys(): logging.debug(' Task already submitted, exiting') continue elif (ini['EXPORT']['export_dest'].upper() == 'ASSET' and asset_id in asset_list): logging.debug( ' Asset with current export date already exists, ' 'skipping') continue elif len(asset_id_dict[export_date]) > 0: logging.debug( ' Asset with earlier export date already exists, ' 'skipping') continue # Compute dT using a fake Landsat image # The system:time_start property is the only needed value model_obj = ssebop.Image( ee.Image.constant([0, 0]).rename(['ndvi', 'lst']).set({ 'system:time_start': utils.millis(export_dt), 'system:index': 'LC08_043033_20170716', 'system:id': 'LC08_043033_20170716' }), dt_source=dt_source.upper(), elev_source='SRTM', dt_min=ini['SSEBOP']['dt_min'], dt_max=ini['SSEBOP']['dt_max'], ) # Cast to float and set properties dt_img = model_obj.dt.float() \ .set({ 'system:time_start': utils.millis(export_dt), 'date_ingested': datetime.datetime.today().strftime('%Y-%m-%d'), 'date': export_dt.strftime('%Y-%m-%d'), 'year': int(export_dt.year), 'month': int(export_dt.month), 'day': int(export_dt.day), 'doy': int(export_dt.strftime('%j')), 'model_name': model_name, 'model_version': ssebop.__version__, 'dt_source': dt_source.upper(), # 'dt_version': dt_version.upper(), }) # Build export tasks if ini['EXPORT']['export_dest'] == 'ASSET': logging.debug(' Building export task') task = ee.batch.Export.image.toAsset( image=ee.Image(dt_img), description=export_id, assetId=asset_id, crs=export_crs, crsTransform='[' + ','.join(list(map(str, export_geo))) + ']', dimensions='{0}x{1}'.format(*export_shape), ) logging.info(' Starting export task') utils.ee_task_start(task) # Pause before starting next task utils.delay_task(delay_time=delay) logging.debug('')
def main(tmax_source, statistic, year_start, year_end, doy_list=range(1, 367), gee_key_file=None, delay_time=0, max_ready=-1, overwrite_flag=False, elr_flag=False, reverse_flag=False): """Tmax Climatology Assets Parameters ---------- tmax_source : {'CIMIS', 'DAYMET_V3', 'DAYMET_V4', 'GRIDMET'} Maximum air temperature source keyword. statistic : {'median', 'mean'} Climatology statistic. year_start : int Start year. year_end : int End year (inclusive). doy_list : list(int), optional Days of year to process (the default is 1-365). gee_key_file : str, None, optional File path to a service account json key file. delay_time : float, optional Delay time in seconds between starting export tasks (or checking the number of queued tasks, see "max_ready" parameter). The default is 0. max_ready: int, optional Maximum number of queued "READY" tasks. The default is -1 which is implies no limit to the number of tasks that will be submitted. overwrite_flag : bool, optional If True, overwrite existing files (the default is False). key_path : str, None, optional elr_flag : bool, optional If True, apply Elevation Lapse Rate (ELR) adjustment (the default is False). reverse_flag : bool, optional If True, process days in reverse order (the default is False). Returns ------- None Notes ----- Collection is built/filtered using "day of year" based on the system:time_start The DOY 366 collection is built by selecting only the DOY 365 images (so the DOY 366 image should be a copy of the DOY 365 image) Daymet calendar definition https://daac.ornl.gov/DAYMET/guides/Daymet_Daily_V4.html The Daymet calendar is based on a standard calendar year. All Daymet years, including leap years, have 1–365 days. For leap years, the Daymet data include leap day (February 29) and December 31 is discarded from leap years to maintain a 365-day year. """ logging.info(f'\nGenerating {tmax_source} {statistic} asset') tmax_folder = 'projects/earthengine-legacy/assets/projects/usgs-ssebop/tmax' # CGM - Intentionally not setting the time_start # time_start_year = 1980 if statistic.lower() not in ['median', 'mean']: raise ValueError(f'unsupported statistic: {statistic}') logging.info('\nInitializing Earth Engine') if gee_key_file and os.path.isfile(gee_key_file): logging.info( ' Using service account key file: {}'.format(gee_key_file)) # The "EE_ACCOUNT" doesn't seem to be used if the key file is valid ee.Initialize(ee.ServiceAccountCredentials('', key_file=gee_key_file)) else: ee.Initialize() # CGM - Should we set default start/end years if they are not set by the user? if tmax_source.upper() in ['DAYMET_V3', 'DAYMET_V4']: tmax_coll = ee.ImageCollection('NASA/ORNL/' + tmax_source.upper()) \ .select(['tmax']).map(c_to_k) elif tmax_source.upper() == 'CIMIS': tmax_coll = ee.ImageCollection('projects/climate-engine/cimis/daily') \ .select(['Tx'], ['tmax']).map(c_to_k) elif tmax_source.upper() == 'GRIDMET': tmax_coll = ee.ImageCollection('IDAHO_EPSCOR/GRIDMET') \ .select(['tmmx'], ['tmax']) # elif tmax_source.upper() == 'TOPOWX': # tmax_coll = ee.ImageCollection('TOPOWX') \ # .select(['tmmx'], ['tmax']) else: logging.error('Unsupported tmax_source: {}'.format(tmax_source)) return False if elr_flag: id_flag = 'elr' coll_id = f'{tmax_folder}/' \ f'{tmax_source.lower()}_{statistic}_{year_start}_{year_end}_{id_flag}' else: coll_id = f'{tmax_folder}/' \ f'{tmax_source.lower()}_{statistic}_{year_start}_{year_end}' tmax_info = ee.Image(tmax_coll.first()).getInfo() tmax_proj = ee.Image(tmax_coll.first()).projection().getInfo() if 'wkt' in tmax_proj.keys(): tmax_crs = tmax_proj['wkt'].replace(' ', '').replace('\n', '') else: # TODO: Add support for projection have a "crs" key instead of "wkt" raise Exception('unsupported projection type') if tmax_source.upper() in ['DAYMET_V3', 'DAYMET_V4']: # TODO: Check if the DAYMET_V4 grid is aligned to DAYMET_V3 # Custom smaller extent for DAYMET focused on CONUS extent = [-1999750, -1890500, 2500250, 1109500] dimensions = [4500, 3000] transform = [1000, 0, -1999750, 0, -1000, 1109500] # Custom medium extent for DAYMET of CONUS, Mexico, and southern Canada # extent = [-2099750, -3090500, 2900250, 1909500] # dimensions = [5000, 5000] # transform = [1000, 0, -2099750, 0, -1000, 1909500] else: transform = tmax_proj['transform'] dimensions = tmax_info['bands'][0]['dimensions'] logging.info(' CRS: {}'.format(tmax_crs)) logging.info(' Transform: {}'.format(transform)) logging.info(' Dimensions: {}\n'.format(dimensions)) # Build the export collection if it doesn't exist if not ee.data.getInfo(coll_id): logging.info('\nImage collection does not exist and will be built' '\n {}'.format(coll_id)) input('Press ENTER to continue') ee.data.createAsset({'type': 'ImageCollection'}, coll_id) # # Switch type string if use_cloud_api=True # ee.data.createAsset({'type': 'IMAGE_COLLECTION'}, coll_id) # Get current running assets # CGM: This is currently returning the asset IDs without earthengine-legacy assets = utils.get_ee_assets(coll_id) # assets = [asset_id.replace('projects/earthengine-legacy/assets/', '') # for asset_id in assets] # Get current running tasks tasks = utils.get_ee_tasks() if logging.getLogger().getEffectiveLevel() == logging.DEBUG: logging.debug(' Tasks: {}'.format(len(tasks))) input('ENTER') for doy in sorted(doy_list, reverse=reverse_flag): logging.info('DOY: {:03d}'.format(doy)) # CGM - Intentionally not setting the time_start # What year should we use for the system:time_start? # time_start_dt = datetime.datetime.strptime( # '{}_{:03d}'.format(time_start_year, doy), '%Y_%j') # logging.debug(' Time Start Date: {}'.format( # time_start_dt.strftime('%Y-%m-%d'))) asset_id = '{}/{:03d}'.format(coll_id, doy) asset_short_id = asset_id.replace( 'projects/earthengine-legacy/assets/', '') export_id = 'tmax_{}_{}_{}_{}_day{:03d}'.format( tmax_source.lower(), statistic, year_start, year_end, doy) logging.debug(' Asset ID: {}'.format(asset_id)) logging.debug(' Export ID: {}'.format(export_id)) if overwrite_flag: if export_id in tasks.keys(): logging.info(' Task already submitted, cancelling') ee.data.cancelTask(tasks[export_id]) if asset_short_id in assets or asset_id in assets: logging.info(' Asset already exists, removing') ee.data.deleteAsset(asset_id) else: if export_id in tasks.keys(): logging.info(' Task already submitted, skipping') continue elif asset_short_id in assets: logging.info(' Asset already exists, skipping') continue # Filter the Tmax collection the target day of year if doy < 366: tmax_doy_coll = tmax_coll \ .filter(ee.Filter.calendarRange(doy, doy, 'day_of_year')) \ .filter(ee.Filter.calendarRange(year_start, year_end, 'year')) else: # Compute DOY 366 as a copy of the DOY 365 values tmax_doy_coll = tmax_coll \ .filter(ee.Filter.calendarRange(365, 365, 'day_of_year')) \ .filter(ee.Filter.calendarRange(year_start, year_end, 'year')) # Compute the Tmax climo image if statistic.lower() == 'median': tmax_img = ee.Image(tmax_doy_coll.median()) elif statistic.lower() == 'mean': tmax_img = ee.Image(tmax_doy_coll.mean()) # Fill interior water holes with the mean of the surrounding cells # Use the filled image as the source to the where since tmax is nodata # CGM - Check if this is needed for DAYMET_V4 if tmax_source.upper() in ['DAYMET_V3', 'DAYMET_V4']: filled_img = tmax_img.focal_mean(4000, 'circle', 'meters') \ .reproject(tmax_crs, transform) tmax_img = filled_img.where(tmax_img.gt(0), tmax_img) # tmax_img = filled_img.where(tmax_img, tmax_img) if elr_flag: # MF - Could eventually make the DEM source (keyword-based) as an input argument. srtm = ee.Image("CGIAR/SRTM90_V4") srtm_proj = srtm.projection().getInfo() srtm_crs = srtm_proj['crs'] # MF - The SRTM image has crs not wkt. # if 'crs' in srtm_proj.keys(): # srtm_crs = srtm_proj['crs'].replace(' ', '').replace('\n', '') # else: # # TODO: Add support for projection have a "crs" key instead of "wkt" # raise Exception('unsupported projection type') # MF - This should be properly defined at L238(?) # srtm_proj20km = srtm_proj.scale(200,200) # Reduce DEM to median of ~20km cells srtmMedian = srtm.reduceResolution(reducer=ee.Reducer.median(), maxPixels=65536) # Smooth median DEM with 5x5 pixel radius srtmMedian_5x5 = srtmMedian.reduceNeighborhood( ee.Reducer.mean(), ee.Kernel.square(radius=5, units='pixels')) # Reproject to ~20km srtmMedian20km = srtmMedian_5x5.reproject(crs=srtm_crs, scale=200) # Final ELR mask: (DEM-(medDEM.add(100)).gt(0)) srtm_diff = (srtm.subtract(srtmMedian20km.add(100))) srtm_diff_positive = (srtm.subtract(srtmMedian20km.add(100))).gt(0) # Reproject to match Tmax source projection srtm_diff = srtm_diff.reproject(crs=tmax_crs, crsTransform=transform) srtm_diff_positive = srtm_diff_positive.reproject( crs=tmax_crs, crsTransform=transform) srtm_diff_final = srtm_diff.mask(srtm_diff_positive) elr_adjust = ee.Image(tmax_img).expression( '(temperature - (0.005 * (elr_layer)))', { 'temperature': tmax_img, 'elr_layer': srtm_diff_final }) tmax_img = tmax_img.where(srtm_diff_final, elr_adjust) tmax_img = tmax_img.set({ 'date_ingested': datetime.datetime.today().strftime('%Y-%m-%d'), 'doy': int(doy), # 'doy': ee.String(ee.Number(doy).format('%03d')), 'year_start': year_start, 'year_end': year_end, 'years': tmax_doy_coll.size(), # CGM - Intentionally not setting the time_start # 'system:time_start': ee.Date( # time_start_dt.strftime('%Y-%m-%d')).millis() }) # Build export tasks logging.debug(' Building export task') task = ee.batch.Export.image.toAsset( tmax_img, description=export_id, assetId=asset_id, dimensions='{0}x{1}'.format(*dimensions), crs=tmax_crs, crsTransform='[' + ','.join(map(str, transform)) + ']', maxPixels=int(1E10), ) # task = ee.batch.Export.image.toCloudStorage( # tmax_img, # description=export_id, # bucket='tmax_', # fileNamePrefix=export_id, # dimensions='{0}x{1}'.format(*dimensions), # crs=tmax_crs, # crsTransform='[' + ','.join(map(str, transform)) + ']', # maxPixels=int(1E10), # fileFormat='GeoTIFF', # formatOptions={'cloudOptimized': True}, # ) logging.info(' Starting export task') utils.ee_task_start(task) # Pause before starting next task utils.delay_task(delay_time, max_ready)