def deleteExcessAssets(dates, max_assets): '''Delete assets if too many''' # oldest first dates.sort() if len(dates) > max_assets: for date in dates[:-max_assets]: eeUtil.removeAsset(getAssetName(date))
def main(): logging.basicConfig(stream=sys.stderr, level=logging.INFO) logging.info('STARTING') # Initialize eeUtil eeUtil.initJson() # Clear the GEE collection, if specified above if CLEAR_COLLECTION_FIRST: if eeUtil.exists(EE_COLLECTION): eeUtil.removeAsset(EE_COLLECTION, recursive=True) # Check if collection exists, create it if it does not # If it exists return the list of assets currently in the collection existing_assets = checkCreateCollection(EE_COLLECTION) existing_dates = [getDate(a) for a in existing_assets] # Fetch, process, and upload the new data new_assets = processNewData(existing_dates) # Get the dates of the new data we have added new_dates = [getDate(a) for a in new_assets] logging.info('Previous assets: {}, new: {}, max: {}'.format( len(existing_dates), len(new_dates), MAX_ASSETS)) # Delete excess assets deleteExcessAssets(existing_dates + new_dates, MAX_ASSETS) # Update Resource Watch updateResourceWatch() logging.info('SUCCESS')
def main(): '''Ingest new data into EE and delete old data''' logging.basicConfig(stream=sys.stderr, level=LOG_LEVEL) logging.info('STARTING') # Initialize eeUtil and clear collection in GEE if desired eeUtil.initJson() if CLEAR_COLLECTION_FIRST: if eeUtil.exists(EE_COLLECTION): eeUtil.removeAsset(EE_COLLECTION, recursive=True) # 1. Check if collection exists and create existing_assets = checkCreateCollection( EE_COLLECTION) #make image collection if doesn't have one existing_dates = [getDate(a) for a in existing_assets] # 2. Fetch, process, stage, ingest, clean new_assets = processNewData(existing_dates) new_dates = [getDate(a) for a in new_assets] # 3. Delete old assets existing_dates = existing_dates + new_dates logging.info('Existing assets: {}, new: {}, max: {}'.format( len(existing_dates), len(new_dates), MAX_ASSETS)) deleteExcessAssets(existing_dates, MAX_ASSETS) logging.info(new_dates) # Get most recent update date most_recent_date = get_most_recent_date(EE_COLLECTION) lastUpdateDate(DATASET_ID, most_recent_date) logging.info('SUCCESS')
def main(): '''Ingest new data into EE and delete old data''' logging.basicConfig(stream=sys.stderr, level=LOG_LEVEL) logging.info('STARTING') # Initialize eeUtil eeUtil.initJson() # 1. Check if collection exists and create if CLEAR_COLLECTION_FIRST: if eeUtil.exists(EE_COLLECTION): eeUtil.removeAsset(EE_COLLECTION, recursive=True) existing_assets = checkCreateCollection(EE_COLLECTION) existing_dates = [getDate(a) for a in existing_assets] # 2. Fetch, process, stage, ingest, clean new_assets = processNewData(existing_dates) new_dates = [getDate(a) for a in new_assets] # 3. Delete old assets existing_dates = existing_dates + new_dates logging.info('Existing assets: {}, new: {}, max: {}'.format( len(existing_dates), len(new_dates), MAX_ASSETS)) deleteExcessAssets(existing_dates, MAX_ASSETS) ### logging.info('SUCCESS')
def main(): '''Ingest new data into EE and delete old data''' logging.basicConfig(stream=sys.stderr, level=LOG_LEVEL) logging.info('STARTING') ### 0. Initialize GEE eeUtil.initJson() ### 1. Create collection names, clear if desired collections = {} for rw_id, varname in ASSET_NAMES.items(): collections[rw_id] = EE_COLLECTION.format(rw_id=rw_id, varname=varname) if CLEAR_COLLECTION_FIRST: for collection in collections.values(): if eeUtil.exists(collection): eeUtil.removeAsset(collection, recursive=True) ### 2. Grab existing assets and their dates existing_assets = {} for rw_id, coll in collections.items(): existing_assets[rw_id] = checkCreateCollection(coll) existing_dates = {} for rw_id, ex_assets in existing_assets.items(): existing_dates[rw_id] = list(map(getRasterDate, ex_assets)) # This will be a list of objects new_assets = processNewRasterData(existing_dates) new_dates = {} for rw_id, nw_assets in new_assets.items(): new_dates[rw_id] = list(map(getRasterDate, nw_assets)) ### 5. Delete old assets for rw_id, collection in collections.items(): e = existing_dates[rw_id] n = new_dates[rw_id] if rw_id in new_dates else [] total = e + n logging.info('Existing assets in {}: {}, new: {}, max: {}'.format( rw_id, len(e), len(n), MAX_DATES)) deleteExcessAssets(total, rw_id, ASSET_NAMES[rw_id], MAX_DATES) # Get most recent update date for collection, id in DATASET_IDS.items(): most_recent_date = get_most_recent_date(collection) current_date = getLastUpdate(id) if current_date != most_recent_date: logging.info('Updating last update date and flushing cache.') # Update data set's last update date on Resource Watch lastUpdateDate(id, most_recent_date) # get layer ids and flush tile cache for each layer_ids = getLayerIDs(id) for layer_id in layer_ids: flushTileCache(layer_id) logging.info('SUCCESS')
def deleteExcessAssets(dates, rw_id, varname, max_assets): '''Delete assets if too many''' # oldest first dates.sort() logging.debug('ordered dates: {}'.format(dates)) if len(dates) > max_assets: for date in set(dates[:-max_assets]): logging.debug('deleting asset from date: {}'.format(date)) asset_name = os.path.join( EE_COLLECTION.format(rw_id=rw_id, varname=varname), ASSET_NAME.format(rw_id=rw_id, varname=varname, date=date)) eeUtil.removeAsset(asset_name)
def deleteExcessAssets(dates, orig_or_reproj, arctic_or_antarctic, max_assets, new_or_hist): '''Delete assets if too many''' # oldest first dates.sort() logging.debug('ordered dates: {}'.format(dates)) if len(dates) > max_assets: for date in dates[:-max_assets]: eeUtil.removeAsset( getAssetName(date, orig_or_reproj, new_or_hist, arctic_or_antarctic=arctic_or_antarctic))
def deleteExcessAssets(dates, max_assets): ''' Delete oldest assets, if more than specified in max_assets variable INPUT dates: dates for all the assets currently in the GEE collection; dates should be in the format specified in DATE_FORMAT variable (list of strings) max_assets: maximum number of assets allowed in the collection (int) ''' # sort the list of dates so that the oldest is first dates.sort() # if we have more dates of data than allowed, if len(dates) > max_assets: # go through each date, starting with the oldest, and delete until we only have the max number of assets left for date in dates[:-max_assets]: eeUtil.removeAsset(getAssetName(date))
def main(): '''Ingest new data into EE and delete old data''' logging.basicConfig(stream=sys.stderr, level=LOG_LEVEL) logging.info('STARTING') ### 0. Initialize GEE eeUtil.initJson() ### 1. Create collection names, clear if desired collections = {} for rw_id, varname in ASSET_NAMES.items(): collections[rw_id] = EE_COLLECTION.format(rw_id=rw_id, varname=varname) if CLEAR_COLLECTION_FIRST: for collection in collections.values(): if eeUtil.exists(collection): eeUtil.removeAsset(collection, recursive=True) ### 2. Grab existing assets and their dates existing_assets = {} for rw_id, coll in collections.items(): existing_assets[rw_id] = checkCreateCollection(coll) existing_dates = {} for rw_id, ex_assets in existing_assets.items(): existing_dates[rw_id] = list(map(getRasterDate, ex_assets)) # This will be a list of objects new_assets = processNewRasterData(existing_dates) new_dates = {} for rw_id, nw_assets in new_assets.items(): new_dates[rw_id] = list(map(getRasterDate, nw_assets)) ### 5. Delete old assets for rw_id, collection in collections.items(): e = existing_dates[rw_id] n = new_dates[rw_id] if rw_id in new_dates else [] total = e + n logging.info('Existing assets in {}: {}, new: {}, max: {}'.format( rw_id, len(e), len(n), MAX_DATES)) deleteExcessAssets(total, rw_id, ASSET_NAMES[rw_id], MAX_DATES) ### logging.info('SUCCESS')
def main(): global VAR global BAND global EE_COLLECTION global PARENT_FOLDER global FILENAME global DAYS_TO_AVERAGE logging.basicConfig(stream=sys.stderr, level=LOG_LEVEL) # Initialize eeUtil and ee eeUtil.initJson() initialize_ee() if DAYS_TO_AVERAGE == 1: PARENT_FOLDER = COLLECTION EE_COLLECTION_GEN = COLLECTION + '/{var}' FILENAME = COLLECTION + '_{var}_{date}' else: PARENT_FOLDER = COLLECTION + '_{days}day_avg'.format( days=DAYS_TO_AVERAGE) EE_COLLECTION_GEN = COLLECTION + '_%sday_avg/{var}' % DAYS_TO_AVERAGE FILENAME = COLLECTION + '_{days}day_avg_{var}_{date}' for i in range(len(VARS)): VAR = VARS[i] logging.info('STARTING {var}'.format(var=VAR)) BAND = BANDS[i] EE_COLLECTION = EE_COLLECTION_GEN.format(var=VAR) # Clear collection in GEE if desired if CLEAR_COLLECTION_FIRST: if eeUtil.exists(EE_COLLECTION): eeUtil.removeAsset(EE_COLLECTION, recursive=True) # 1. Check if collection exists and create existing_assets = checkCreateCollection( EE_COLLECTION) #make image collection if doesn't have one existing_dates = [getDate(a) for a in existing_assets] # 2. Fetch, process, stage, ingest, clean new_assets = processNewData(existing_dates) new_dates = [getDate(a) for a in new_assets] # 3. Delete old assets existing_dates = existing_dates + new_dates logging.info('Existing assets: {}, new: {}, max: {}'.format( len(existing_dates), len(new_dates), MAX_ASSETS)) deleteExcessAssets(existing_dates, MAX_ASSETS) # Get most recent update date most_recent_date = get_most_recent_date(EE_COLLECTION) lastUpdateDate(DATASET_IDS[VAR], most_recent_date) logging.info('SUCCESS for {var}'.format(var=VAR))
def main(): '''Ingest new data into EE and delete old data''' logging.basicConfig(stream=sys.stderr, level=LOG_LEVEL) logging.info('STARTING') ### 1. Initialize eeUtil eeUtil.initJson() ### 2. Create collection names, clear if desired arctic_collection_orig = EE_COLLECTION.format(arctic_or_antarctic='arctic', orig_or_reproj='orig') arctic_collection_reproj = EE_COLLECTION.format( arctic_or_antarctic='arctic', orig_or_reproj='reproj') antarctic_collection_orig = EE_COLLECTION.format( arctic_or_antarctic='antarctic', orig_or_reproj='orig') antarctic_collection_reproj = EE_COLLECTION.format( arctic_or_antarctic='antarctic', orig_or_reproj='reproj') collections = [ arctic_collection_orig, arctic_collection_reproj, antarctic_collection_orig, antarctic_collection_reproj ] if CLEAR_COLLECTION_FIRST: for collection in collections: if eeUtil.exists(collection): eeUtil.removeAsset(collection, recursive=True) ### 3. Process arctic data arctic_data = collections[0:2] arctic_assets_orig = checkCreateCollection(arctic_data[0]) arctic_assets_reproj = checkCreateCollection(arctic_data[1]) arctic_dates_orig = [getRasterDate(a) for a in arctic_assets_orig] arctic_dates_reproj = [getRasterDate(a) for a in arctic_assets_reproj] new_arctic_assets_orig, new_arctic_assets_reproj = processNewRasterData( arctic_dates_reproj, 'arctic', new_or_hist='new') new_arctic_dates_orig = [getRasterDate(a) for a in new_arctic_assets_orig] new_arctic_dates_reproj = [ getRasterDate(a) for a in new_arctic_assets_reproj ] ### 4. Process antarctic data antarctic_data = collections[2:] antarctic_assets_orig = checkCreateCollection(antarctic_data[0]) antarctic_assets_reproj = checkCreateCollection(antarctic_data[1]) antarctic_dates_orig = [getRasterDate(a) for a in antarctic_assets_orig] antarctic_dates_reproj = [ getRasterDate(a) for a in antarctic_assets_reproj ] new_antarctic_assets_orig, new_antarctic_assets_reproj = processNewRasterData( antarctic_dates_reproj, 'antarctic', new_or_hist='new') new_antarctic_dates_orig = [ getRasterDate(a) for a in new_antarctic_assets_orig ] new_antarctic_dates_reproj = [ getRasterDate(a) for a in new_antarctic_assets_reproj ] ### 5. Delete old assets e_dates = [ arctic_dates_orig, arctic_dates_reproj, antarctic_dates_orig, antarctic_dates_reproj ] n_dates = [ new_arctic_dates_orig, new_arctic_dates_reproj, new_antarctic_dates_orig, new_antarctic_dates_reproj ] for i in range(4): orig_or_reproj = 'orig' if i % 2 == 0 else 'reproj' arctic_or_antarctic = 'arctic' if i < 2 else 'antarctic' e = e_dates[i] n = n_dates[i] total = e + n logging.info('Existing {} {} assets: {}, new: {}, max: {}'.format( orig_or_reproj, arctic_or_antarctic, len(e), len(n), MAX_DATES)) deleteExcessAssets(total, orig_or_reproj, arctic_or_antarctic, MAX_DATES, 'new') ### for dataset, id in DATASET_ID.items(): # Get most recent update date most_recent_date = get_most_recent_date(dataset) current_date = getLastUpdate(id) if current_date != most_recent_date: logging.info('Updating last update date and flushing cache.') # Update data set's last update date on Resource Watch lastUpdateDate(id, most_recent_date) # get layer ids and flush tile cache for each layer_ids = getLayerIDs(id) for layer_id in layer_ids: flushTileCache(layer_id) ## Process historical data if COLLECT_BACK_HISTORY == True: for month in HISTORICAL_MONTHS: logging.info( 'Processing historical data for month {}'.format(month)) ### 2. Create collection names, clear if desired arctic_collection_orig = EE_COLLECTION_BY_MONTH.format( arctic_or_antarctic='arctic', orig_or_reproj='orig', month="{:02d}".format(month)) arctic_collection_reproj = EE_COLLECTION_BY_MONTH.format( arctic_or_antarctic='arctic', orig_or_reproj='reproj', month="{:02d}".format(month)) antarctic_collection_orig = EE_COLLECTION_BY_MONTH.format( arctic_or_antarctic='antarctic', orig_or_reproj='orig', month="{:02d}".format(month)) antarctic_collection_reproj = EE_COLLECTION_BY_MONTH.format( arctic_or_antarctic='antarctic', orig_or_reproj='reproj', month="{:02d}".format(month)) collections = [ arctic_collection_orig, arctic_collection_reproj, antarctic_collection_orig, antarctic_collection_reproj ] ### 3. Process arctic data arctic_data = collections[0:2] arctic_assets_orig = checkCreateCollection(arctic_data[0]) arctic_assets_reproj = checkCreateCollection(arctic_data[1]) arctic_dates_orig = [getRasterDate(a) for a in arctic_assets_orig] arctic_dates_reproj = [ getRasterDate(a) for a in arctic_assets_reproj ] new_arctic_assets_orig, new_arctic_assets_reproj = processNewRasterData( arctic_dates_orig, 'arctic', new_or_hist='hist', month=month) new_arctic_dates_orig = [ getRasterDate(a) for a in new_arctic_assets_orig ] new_arctic_dates_reproj = [ getRasterDate(a) for a in new_arctic_assets_reproj ] ### 4. Process antarctic data antarctic_data = collections[2:] antarctic_assets_orig = checkCreateCollection(antarctic_data[0]) antarctic_assets_reproj = checkCreateCollection(antarctic_data[1]) antarctic_dates_orig = [ getRasterDate(a) for a in antarctic_assets_orig ] antarctic_dates_reproj = [ getRasterDate(a) for a in antarctic_assets_reproj ] new_antarctic_assets_orig, new_antarctic_assets_reproj = processNewRasterData( antarctic_dates_orig, 'antarctic', new_or_hist='hist', month=month) new_antarctic_dates_orig = [ getRasterDate(a) for a in new_antarctic_assets_orig ] new_antarctic_dates_reproj = [ getRasterDate(a) for a in new_antarctic_assets_reproj ] ### 5. Delete old assets e_dates = [ arctic_dates_orig, arctic_dates_reproj, antarctic_dates_orig, antarctic_dates_reproj ] n_dates = [ new_arctic_dates_orig, new_arctic_dates_reproj, new_antarctic_dates_orig, new_antarctic_dates_reproj ] for i in range(4): orig_or_reproj = 'orig' if i % 2 == 0 else 'reproj' arctic_or_antarctic = 'arctic' if i < 2 else 'antarctic' e = e_dates[i] n = n_dates[i] total = e + n logging.info('Existing {} {} assets: {}, new: {}'.format( orig_or_reproj, arctic_or_antarctic, len(e), len(n))) #uncomment if we want to put a limit on how many years of historical data we have #deleteExcessAssets(total, orig_or_reproj, arctic_or_antarctic, MAX_DATES,'hist') ### for dataset, id in HIST_DATASET_ID.items(): # Get most recent update date most_recent_date = get_most_recent_date(dataset) lastUpdateDate(id, most_recent_date) logging.info('SUCCESS')
import eeUtil eeUtil.initJson() collection = 'cli_012_co2_concentrations' print(eeUtil.exists(f'test_{collection}')) eeUtil.createFolder(f'test_{collection}', True, public=True) print('hola holita!') print(eeUtil.exists(f'test_{collection}')) eeUtil.removeAsset(f'test_{collection}') print(eeUtil.exists(f'test_{collection}'))
def deleteExcessAssets(all_assets, max_assets): '''Delete assets if too many''' # oldest first if len(all_assets) > max_assets: for asset in all_assets[:-max_assets]: eeUtil.removeAsset(EE_COLLECTION + '/' + asset)