def main(): logging.basicConfig(stream=sys.stderr, level=logging.INFO) logging.info('STARTING') # Initialize eeUtil eeUtil.initJson() # Clear the GEE collection, if specified above if CLEAR_COLLECTION_FIRST: if eeUtil.exists(EE_COLLECTION): eeUtil.removeAsset(EE_COLLECTION, recursive=True) # Check if collection exists, create it if it does not # If it exists return the list of assets currently in the collection existing_assets = checkCreateCollection(EE_COLLECTION) existing_dates = [getDate(a) for a in existing_assets] # Fetch, process, and upload the new data new_assets = processNewData(existing_dates) # Get the dates of the new data we have added new_dates = [getDate(a) for a in new_assets] logging.info('Previous assets: {}, new: {}, max: {}'.format( len(existing_dates), len(new_dates), MAX_ASSETS)) # Delete excess assets deleteExcessAssets(existing_dates + new_dates, MAX_ASSETS) # Update Resource Watch updateResourceWatch() logging.info('SUCCESS')
def main(): '''Ingest new data into EE and delete old data''' logging.basicConfig(stream=sys.stderr, level=logging.INFO) logging.info('STARTING') # Initialize eeUtil eeUtil.initJson() # 1. Check if collection exists and create existing_assets = checkCreateCollection(EE_COLLECTION) existing_dates = [getDate(a) for a in existing_assets] # 2. Fetch, process, stage, ingest, clean new_assets = processNewData(existing_dates) new_dates = [getDate(a) for a in new_assets] # 3. Delete old assets existing_dates = existing_dates + new_dates logging.info('Existing assets: {}, new: {}, max: {}'.format( len(existing_dates), len(new_dates), MAX_ASSETS)) deleteExcessAssets(existing_dates, MAX_ASSETS) # 4. After asset update lets reflect it on the dataset most_recent_date = get_most_recent_date(EE_COLLECTION) lastUpdateDate(DATASET_ID, most_recent_date) logging.info('SUCCESS')
def main(): '''Ingest new data into EE and delete old data''' logging.basicConfig(stream=sys.stderr, level=LOG_LEVEL) logging.info('STARTING') # Initialize eeUtil and clear collection in GEE if desired eeUtil.initJson() if CLEAR_COLLECTION_FIRST: if eeUtil.exists(EE_COLLECTION): eeUtil.removeAsset(EE_COLLECTION, recursive=True) # 1. Check if collection exists and create existing_assets = checkCreateCollection( EE_COLLECTION) #make image collection if doesn't have one existing_dates = [getDate(a) for a in existing_assets] # 2. Fetch, process, stage, ingest, clean new_assets = processNewData(existing_dates) new_dates = [getDate(a) for a in new_assets] # 3. Delete old assets existing_dates = existing_dates + new_dates logging.info('Existing assets: {}, new: {}, max: {}'.format( len(existing_dates), len(new_dates), MAX_ASSETS)) deleteExcessAssets(existing_dates, MAX_ASSETS) logging.info(new_dates) # Get most recent update date most_recent_date = get_most_recent_date(EE_COLLECTION) lastUpdateDate(DATASET_ID, most_recent_date) logging.info('SUCCESS')
def main(): '''Ingest new data into EE and delete old data''' logging.basicConfig(stream=sys.stderr, level=LOG_LEVEL) logging.info('STARTING') # Initialize eeUtil eeUtil.initJson() # 1. Check if collection exists and create if CLEAR_COLLECTION_FIRST: if eeUtil.exists(EE_COLLECTION): eeUtil.removeAsset(EE_COLLECTION, recursive=True) existing_assets = checkCreateCollection(EE_COLLECTION) existing_dates = [getDate(a) for a in existing_assets] # 2. Fetch, process, stage, ingest, clean new_assets = processNewData(existing_dates) new_dates = [getDate(a) for a in new_assets] # 3. Delete old assets existing_dates = existing_dates + new_dates logging.info('Existing assets: {}, new: {}, max: {}'.format( len(existing_dates), len(new_dates), MAX_ASSETS)) deleteExcessAssets(existing_dates, MAX_ASSETS) ### logging.info('SUCCESS')
def main(): '''Ingest new data into EE and delete old data''' logging.basicConfig(stream=sys.stderr, level=LOG_LEVEL) logging.info('STARTING') ### 0. Initialize GEE eeUtil.initJson() ### 1. Create collection names, clear if desired collections = {} for rw_id, varname in ASSET_NAMES.items(): collections[rw_id] = EE_COLLECTION.format(rw_id=rw_id, varname=varname) if CLEAR_COLLECTION_FIRST: for collection in collections.values(): if eeUtil.exists(collection): eeUtil.removeAsset(collection, recursive=True) ### 2. Grab existing assets and their dates existing_assets = {} for rw_id, coll in collections.items(): existing_assets[rw_id] = checkCreateCollection(coll) existing_dates = {} for rw_id, ex_assets in existing_assets.items(): existing_dates[rw_id] = list(map(getRasterDate, ex_assets)) # This will be a list of objects new_assets = processNewRasterData(existing_dates) new_dates = {} for rw_id, nw_assets in new_assets.items(): new_dates[rw_id] = list(map(getRasterDate, nw_assets)) ### 5. Delete old assets for rw_id, collection in collections.items(): e = existing_dates[rw_id] n = new_dates[rw_id] if rw_id in new_dates else [] total = e + n logging.info('Existing assets in {}: {}, new: {}, max: {}'.format( rw_id, len(e), len(n), MAX_DATES)) deleteExcessAssets(total, rw_id, ASSET_NAMES[rw_id], MAX_DATES) # Get most recent update date for collection, id in DATASET_IDS.items(): most_recent_date = get_most_recent_date(collection) current_date = getLastUpdate(id) if current_date != most_recent_date: logging.info('Updating last update date and flushing cache.') # Update data set's last update date on Resource Watch lastUpdateDate(id, most_recent_date) # get layer ids and flush tile cache for each layer_ids = getLayerIDs(id) for layer_id in layer_ids: flushTileCache(layer_id) logging.info('SUCCESS')
def main(): logging.basicConfig(stream=sys.stderr, level=logging.INFO) logging.info('STARTING') # Initialize eeUtil and ee modules eeUtil.initJson() initialize_ee() # Clear collection in GEE if desired if CLEAR_COLLECTION_FIRST: clearCollectionMultiVar() # Check if collection exists. If not, create it. # Return a list of dates that exist for all variables collections in GEE (existing_dates), # as well as a list of which dates exist for each individual variable (existing_dates_by_var). # The latter will be used in case the previous script run crashed before completing the data upload for every variable. logging.info('Getting existing dates.') existing_dates, existing_dates_by_var = checkCreateCollection(VARS) # Get a list of the dates that are available, minus the ones we have already uploaded correctly for all variables. logging.info('Getting new dates to pull.') all_new_dates, last_date = getNewDates(existing_dates) # if new data is available, clear the collection because we want to store the most # recent forecast, not the old forecast if all_new_dates: logging.info('New forecast available.') clearCollectionMultiVar() else: logging.info('No new forecast.') # The Docker container isonly big enough to hold 3 files at once, # so break into groups to process new_date_groups = [all_new_dates[x:x+3] for x in range(0, len(all_new_dates), 3)] for new_dates in new_date_groups: # Fetch new files logging.info('Fetching files for {}'.format(new_dates)) files = fetch(new_dates, SOURCE_URL) # Process data, one variable at a time for var_num in range(len(VARS)): # get variable name var = VARS[var_num] # Process new data files, delete all forecast assets currently in collection new_assets = processNewData(files, var_num, last_date) logging.info('New assets for {}: {}'.format(var, len(new_assets))) logging.info('SUCCESS for {}'.format(var)) # Delete local netcdf files delete_local() # Update Resource Watch updateResourceWatch() logging.info('SUCCESS')
def main(): '''Ingest new data into EE and delete old data''' logging.basicConfig(stream=sys.stderr, level=LOG_LEVEL) logging.info('STARTING') ### 0. Initialize GEE eeUtil.initJson() ### 1. Create collection names, clear if desired collections = {} for rw_id, varname in ASSET_NAMES.items(): collections[rw_id] = EE_COLLECTION.format(rw_id=rw_id, varname=varname) if CLEAR_COLLECTION_FIRST: for collection in collections.values(): if eeUtil.exists(collection): eeUtil.removeAsset(collection, recursive=True) ### 2. Grab existing assets and their dates existing_assets = {} for rw_id, coll in collections.items(): existing_assets[rw_id] = checkCreateCollection(coll) existing_dates = {} for rw_id, ex_assets in existing_assets.items(): existing_dates[rw_id] = list(map(getRasterDate, ex_assets)) # This will be a list of objects new_assets = processNewRasterData(existing_dates) new_dates = {} for rw_id, nw_assets in new_assets.items(): new_dates[rw_id] = list(map(getRasterDate, nw_assets)) ### 5. Delete old assets for rw_id, collection in collections.items(): e = existing_dates[rw_id] n = new_dates[rw_id] if rw_id in new_dates else [] total = e + n logging.info('Existing assets in {}: {}, new: {}, max: {}'.format( rw_id, len(e), len(n), MAX_DATES)) deleteExcessAssets(total, rw_id, ASSET_NAMES[rw_id], MAX_DATES) ### logging.info('SUCCESS')
def main(): global VAR global BAND global EE_COLLECTION global PARENT_FOLDER global FILENAME global DAYS_TO_AVERAGE logging.basicConfig(stream=sys.stderr, level=LOG_LEVEL) # Initialize eeUtil and ee eeUtil.initJson() initialize_ee() if DAYS_TO_AVERAGE == 1: PARENT_FOLDER = COLLECTION EE_COLLECTION_GEN = COLLECTION + '/{var}' FILENAME = COLLECTION + '_{var}_{date}' else: PARENT_FOLDER = COLLECTION + '_{days}day_avg'.format( days=DAYS_TO_AVERAGE) EE_COLLECTION_GEN = COLLECTION + '_%sday_avg/{var}' % DAYS_TO_AVERAGE FILENAME = COLLECTION + '_{days}day_avg_{var}_{date}' for i in range(len(VARS)): VAR = VARS[i] logging.info('STARTING {var}'.format(var=VAR)) BAND = BANDS[i] EE_COLLECTION = EE_COLLECTION_GEN.format(var=VAR) # Clear collection in GEE if desired if CLEAR_COLLECTION_FIRST: if eeUtil.exists(EE_COLLECTION): eeUtil.removeAsset(EE_COLLECTION, recursive=True) # 1. Check if collection exists and create existing_assets = checkCreateCollection( EE_COLLECTION) #make image collection if doesn't have one existing_dates = [getDate(a) for a in existing_assets] # 2. Fetch, process, stage, ingest, clean new_assets = processNewData(existing_dates) new_dates = [getDate(a) for a in new_assets] # 3. Delete old assets existing_dates = existing_dates + new_dates logging.info('Existing assets: {}, new: {}, max: {}'.format( len(existing_dates), len(new_dates), MAX_ASSETS)) deleteExcessAssets(existing_dates, MAX_ASSETS) # Get most recent update date most_recent_date = get_most_recent_date(EE_COLLECTION) lastUpdateDate(DATASET_IDS[VAR], most_recent_date) logging.info('SUCCESS for {var}'.format(var=VAR))
def main(): logging.basicConfig(stream=sys.stderr, level=logging.INFO) logging.info('STARTING') # Initialize eeUtil and ee modules eeUtil.initJson() initialize_ee() # Clear collection in GEE if desired if CLEAR_COLLECTION_FIRST: clearCollectionMultiVar() # Process data, one variable at a time for i in range(len(VARS)): # get variable name var = VARS[i] logging.info('STARTING {var}'.format(var=var)) # Check if collection exists, create it if it does not # If it exists return the list of assets currently in the collection existing_assets = checkCreateCollection('/'+getCollectionName(var)) #make image collection if doesn't have one existing_dates = [getDate_GEE(a) for a in existing_assets] # Fetch, process, and upload the new data new_assets = processNewData(var, existing_dates) # Get the dates of the new data we have added new_dates = [getDate_GEE(a) for a in new_assets] logging.info('Previous assets: {}, new: {}, max: {}'.format( len(existing_dates), len(new_dates), MAX_ASSETS)) # Delete excess assets deleteExcessAssets(var, existing_dates+new_dates, MAX_ASSETS) logging.info('SUCCESS for {var}'.format(var=var)) # Update Resource Watch updateResourceWatch() logging.info('SUCCESS')
def main(): ### # Configure logging logging.basicConfig(stream=sys.stderr, level=logging.DEBUG) # Authenticate to GEE eeUtil.initJson() ### ### # Configure the ImageCollection you're going to add the rasters to ### GS_FOLDER = 'wat_038_modis_surface_water' EE_COLLECTION = 'wat_038_modis_surface_water' def ic(asset): return '{}/{}'.format(EE_COLLECTION, os.path.splitext(asset)[0]) def checkCreateCollection(collection): '''List assests in collection else create new collection''' if eeUtil.exists(collection): return eeUtil.ls(collection) else: logging.info('{} does not exist, creating'.format(collection)) eeUtil.createFolder(collection, True, public=True) return [] existing_files = checkCreateCollection(EE_COLLECTION) ### # Obtain names of files to upload # Load file names for tifs and netcdfs ### # TIF_DATA_DIR = 'tifs' # os.chdir(TIF_DATA_DIR) # tifs = os.listdir('.') #[f for f in os.listdir('.') if os.path.splitext(f)[1] == '.tif'] # logging.info('TIFFs: {}'.format(tifs)) # # NC_DATA_DIR = 'ncs' # os.chdir(NC_DATA_DIR) # ncs = os.listdir('.') #[f for f in os.listdir('.') if os.path.splitext(f)[1] == '.tif'] # logging.info('NetCDFs: {}'.format(ncs)) ### # Priority 1: Load files to GEE and register w/ RW API ### from ftplib import FTP ftp = FTP('ftp.soilgrids.org') ftp.login() lines = [] ftp.retrlines('NLST', lines.append) data = [] ftp.retrlines('NLST data/recent', data.append) data = [f.split('/')[2] for f in data] logging.info("Data:") logging.info(data) import re pattern = re.compile('OCDENS_M_sl._250m.tif') soilcarbon = [f for f in data if pattern.match(f)] logging.info("SoilCarbon data:") logging.info(soilcarbon) #for datum in data: for datum in soilcarbon: logging.info('Processing {}'.format(datum)) with open('ncs/{}'.format(datum), 'wb') as f: ftp.retrbinary('RETR ' + datum, f.write) ### # Priority 2: Access pre-made SLDs for loading to layers ### ### ### # Retrieving legends for upload to RW API ### legends = [] ftp.retrlines('NLST legends', legends.append) slds = [ f.split('/')[1] for f in legends if os.path.splitext(f)[1] == '.sld' ] for sld in slds: logging.info('Processing {}'.format(sld)) f = open(os.path.join(os.getcwd(), sld), 'wb') ftp.retrbinary('RETR ' + sld, f.write) ftp.close() # Q: Is this possible? ### reduce(lambda obj, elem: obj.append(elem), ftp.retrlines('NLST'), []) ### # To upload to GEE, need to specify the date # Date formats vary by provider, some common ones include: ### ### Date encoded in asset name DATE_FORMAT = '%Y%j' # Year and week of year def getDate(asset): return asset[-7:] DATE_FORMAT = '%Y-%m-%d' # Year, month, day def getDate(asset): return asset[-10:] DATE_FORMAT = '%Y' # Year def getDate(asset): return asset[-4:] ### Constant year DATE_FORMAT = '%Y' # Year def getDate(asset): return '2017' ### Grab dates, create datestamps, upload through GEE dates = list(map(getDate, tifs)) datestamps = [datetime.strptime(date, DATE_FORMAT) for date in dates] asset_names = [ic(t) for t in tifs] eeUtil.uploadAssets(tifs, asset_names, GS_FOLDER, datestamps, public=True, timeout=30000)
def main(): '''Ingest new data into EE and delete old data''' logging.basicConfig(stream=sys.stderr, level=LOG_LEVEL) logging.info('STARTING') ### 1. Initialize eeUtil eeUtil.initJson() ### 2. Create collection names, clear if desired arctic_collection_orig = EE_COLLECTION.format(arctic_or_antarctic='arctic', orig_or_reproj='orig') arctic_collection_reproj = EE_COLLECTION.format( arctic_or_antarctic='arctic', orig_or_reproj='reproj') antarctic_collection_orig = EE_COLLECTION.format( arctic_or_antarctic='antarctic', orig_or_reproj='orig') antarctic_collection_reproj = EE_COLLECTION.format( arctic_or_antarctic='antarctic', orig_or_reproj='reproj') collections = [ arctic_collection_orig, arctic_collection_reproj, antarctic_collection_orig, antarctic_collection_reproj ] if CLEAR_COLLECTION_FIRST: for collection in collections: if eeUtil.exists(collection): eeUtil.removeAsset(collection, recursive=True) ### 3. Process arctic data arctic_data = collections[0:2] arctic_assets_orig = checkCreateCollection(arctic_data[0]) arctic_assets_reproj = checkCreateCollection(arctic_data[1]) arctic_dates_orig = [getRasterDate(a) for a in arctic_assets_orig] arctic_dates_reproj = [getRasterDate(a) for a in arctic_assets_reproj] new_arctic_assets_orig, new_arctic_assets_reproj = processNewRasterData( arctic_dates_reproj, 'arctic', new_or_hist='new') new_arctic_dates_orig = [getRasterDate(a) for a in new_arctic_assets_orig] new_arctic_dates_reproj = [ getRasterDate(a) for a in new_arctic_assets_reproj ] ### 4. Process antarctic data antarctic_data = collections[2:] antarctic_assets_orig = checkCreateCollection(antarctic_data[0]) antarctic_assets_reproj = checkCreateCollection(antarctic_data[1]) antarctic_dates_orig = [getRasterDate(a) for a in antarctic_assets_orig] antarctic_dates_reproj = [ getRasterDate(a) for a in antarctic_assets_reproj ] new_antarctic_assets_orig, new_antarctic_assets_reproj = processNewRasterData( antarctic_dates_reproj, 'antarctic', new_or_hist='new') new_antarctic_dates_orig = [ getRasterDate(a) for a in new_antarctic_assets_orig ] new_antarctic_dates_reproj = [ getRasterDate(a) for a in new_antarctic_assets_reproj ] ### 5. Delete old assets e_dates = [ arctic_dates_orig, arctic_dates_reproj, antarctic_dates_orig, antarctic_dates_reproj ] n_dates = [ new_arctic_dates_orig, new_arctic_dates_reproj, new_antarctic_dates_orig, new_antarctic_dates_reproj ] for i in range(4): orig_or_reproj = 'orig' if i % 2 == 0 else 'reproj' arctic_or_antarctic = 'arctic' if i < 2 else 'antarctic' e = e_dates[i] n = n_dates[i] total = e + n logging.info('Existing {} {} assets: {}, new: {}, max: {}'.format( orig_or_reproj, arctic_or_antarctic, len(e), len(n), MAX_DATES)) deleteExcessAssets(total, orig_or_reproj, arctic_or_antarctic, MAX_DATES, 'new') ### for dataset, id in DATASET_ID.items(): # Get most recent update date most_recent_date = get_most_recent_date(dataset) current_date = getLastUpdate(id) if current_date != most_recent_date: logging.info('Updating last update date and flushing cache.') # Update data set's last update date on Resource Watch lastUpdateDate(id, most_recent_date) # get layer ids and flush tile cache for each layer_ids = getLayerIDs(id) for layer_id in layer_ids: flushTileCache(layer_id) ## Process historical data if COLLECT_BACK_HISTORY == True: for month in HISTORICAL_MONTHS: logging.info( 'Processing historical data for month {}'.format(month)) ### 2. Create collection names, clear if desired arctic_collection_orig = EE_COLLECTION_BY_MONTH.format( arctic_or_antarctic='arctic', orig_or_reproj='orig', month="{:02d}".format(month)) arctic_collection_reproj = EE_COLLECTION_BY_MONTH.format( arctic_or_antarctic='arctic', orig_or_reproj='reproj', month="{:02d}".format(month)) antarctic_collection_orig = EE_COLLECTION_BY_MONTH.format( arctic_or_antarctic='antarctic', orig_or_reproj='orig', month="{:02d}".format(month)) antarctic_collection_reproj = EE_COLLECTION_BY_MONTH.format( arctic_or_antarctic='antarctic', orig_or_reproj='reproj', month="{:02d}".format(month)) collections = [ arctic_collection_orig, arctic_collection_reproj, antarctic_collection_orig, antarctic_collection_reproj ] ### 3. Process arctic data arctic_data = collections[0:2] arctic_assets_orig = checkCreateCollection(arctic_data[0]) arctic_assets_reproj = checkCreateCollection(arctic_data[1]) arctic_dates_orig = [getRasterDate(a) for a in arctic_assets_orig] arctic_dates_reproj = [ getRasterDate(a) for a in arctic_assets_reproj ] new_arctic_assets_orig, new_arctic_assets_reproj = processNewRasterData( arctic_dates_orig, 'arctic', new_or_hist='hist', month=month) new_arctic_dates_orig = [ getRasterDate(a) for a in new_arctic_assets_orig ] new_arctic_dates_reproj = [ getRasterDate(a) for a in new_arctic_assets_reproj ] ### 4. Process antarctic data antarctic_data = collections[2:] antarctic_assets_orig = checkCreateCollection(antarctic_data[0]) antarctic_assets_reproj = checkCreateCollection(antarctic_data[1]) antarctic_dates_orig = [ getRasterDate(a) for a in antarctic_assets_orig ] antarctic_dates_reproj = [ getRasterDate(a) for a in antarctic_assets_reproj ] new_antarctic_assets_orig, new_antarctic_assets_reproj = processNewRasterData( antarctic_dates_orig, 'antarctic', new_or_hist='hist', month=month) new_antarctic_dates_orig = [ getRasterDate(a) for a in new_antarctic_assets_orig ] new_antarctic_dates_reproj = [ getRasterDate(a) for a in new_antarctic_assets_reproj ] ### 5. Delete old assets e_dates = [ arctic_dates_orig, arctic_dates_reproj, antarctic_dates_orig, antarctic_dates_reproj ] n_dates = [ new_arctic_dates_orig, new_arctic_dates_reproj, new_antarctic_dates_orig, new_antarctic_dates_reproj ] for i in range(4): orig_or_reproj = 'orig' if i % 2 == 0 else 'reproj' arctic_or_antarctic = 'arctic' if i < 2 else 'antarctic' e = e_dates[i] n = n_dates[i] total = e + n logging.info('Existing {} {} assets: {}, new: {}'.format( orig_or_reproj, arctic_or_antarctic, len(e), len(n))) #uncomment if we want to put a limit on how many years of historical data we have #deleteExcessAssets(total, orig_or_reproj, arctic_or_antarctic, MAX_DATES,'hist') ### for dataset, id in HIST_DATASET_ID.items(): # Get most recent update date most_recent_date = get_most_recent_date(dataset) lastUpdateDate(id, most_recent_date) logging.info('SUCCESS')
import eeUtil eeUtil.initJson() collection = 'cli_012_co2_concentrations' print(eeUtil.exists(f'test_{collection}')) eeUtil.createFolder(f'test_{collection}', True, public=True) print('hola holita!') print(eeUtil.exists(f'test_{collection}')) eeUtil.removeAsset(f'test_{collection}') print(eeUtil.exists(f'test_{collection}'))
def main(): global VAR global EE_COLLECTION global EE_COLLECTION_GEN global PARENT_FOLDER global FILENAME global GS_FOLDER PARENT_FOLDER = COLLECTION EE_COLLECTION_GEN = COLLECTION + '/{var}' FILENAME = COLLECTION[29:] + '_{var}_{date}' '''Ingest new data into EE and delete old data''' logging.basicConfig(stream=sys.stderr, level=LOG_LEVEL) logging.info('STARTING') # Initialize eeUtil and clear collection in GEE if desired eeUtil.initJson() initialize_ee() if CLEAR_COLLECTION_FIRST: clearCollection() # 1. Check if collection exists and create existing_dates, existing_dates_by_var = checkCreateCollection(VARS) # Determine which files to fetch all_new_dates = getNewDates(existing_dates) # if new data is available, clear the collection because we want to store the most # recent forecast, not the old forecast if all_new_dates: clearCollection() #container only big enough to hold 3 files at once, so break into groups to process new_date_groups = [ all_new_dates[x:x + 3] for x in range(0, len(all_new_dates), 3) ] for new_dates in new_date_groups: # Fetch new files logging.info('Fetching files for {}'.format(new_dates)) files = fetch( new_dates) #get list of locations of netcdfs in docker container # get last date because this file only has one time output so we need to process it differently last_file = files[-1] for var_num in range(len(VARS)): # get variable name VAR = VARS[var_num] # specify GEE collection name and Google Cloud Storage folder names EE_COLLECTION = EE_COLLECTION_GEN.format(var=VAR) GS_FOLDER = COLLECTION[1:] + '_' + VAR existing_assets = eeUtil.ls(EE_COLLECTION) # 2. Fetch, process, stage, ingest, clean new_assets = processNewData(files, var_num, last_file) new_dates = [getDateTime(a) for a in new_assets] # 3. Delete old assets all_dates = existing_dates_by_var[var_num] + new_dates all_assets = np.sort( np.unique(existing_assets + [os.path.split(asset)[1] for asset in new_assets])) logging.info('Existing assets for {}: {}, new: {}, max: {}'.format( VAR, len(all_dates), len(new_dates), MAX_ASSETS)) deleteExcessAssets(all_assets, (MAX_ASSETS)) logging.info('SUCCESS for {}'.format(VAR)) if var_num == len(VARS) - 1: # Get most recent update date most_recent_date = get_most_recent_date(all_assets) lastUpdateDate(DATASET_ID, most_recent_date) # Delete local netcdf files if DELETE_LOCAL: logging.info('Cleaning local NETCDF files') for f in files: os.remove(f)
def main(): ### # Configure logging logging.basicConfig(stream=sys.stderr, level=logging.DEBUG) # Authenticate to GEE eeUtil.initJson() ### ### # Configure the ImageCollection you're going to add the rasters to ### GS_FOLDER = 'foo_054_soil_organic_carbon' EE_COLLECTION = 'foo_054_soil_organic_carbon' def ic(asset): return '{}/{}'.format(EE_COLLECTION, os.path.splitext(asset)[0]) def checkCreateCollection(collection): '''List assests in collection else create new collection''' if eeUtil.exists(collection): return eeUtil.ls(collection) else: logging.info('{} does not exist, creating'.format(collection)) eeUtil.createFolder(collection, True, public=True) return [] existing_files = checkCreateCollection(EE_COLLECTION) ### # Obtain names of files to upload ### ### # Priority 1: Load files to GEE and register w/ RW API ### from ftplib import FTP ftp = FTP('ftp.soilgrids.org') ftp.login() folders = [] ftp.retrlines('NLST', folders.append) logging.info("Folders:") logging.info(folders) data = [] ftp.retrlines('NLST data/recent', data.append) data = [f.split('/')[2] for f in data] logging.info("Data:") logging.info(data) import re # Matches soil carbon for different depths: # 0, 5, 15, 30, 60, 100, 200 cm depth tifs available, # labeled sl1 - sl7 # http://data.isric.org/geonetwork/srv/eng/catalog.search;jsessionid=A5137293CC6B3D96CBA35808CA155341#/metadata/98062ae9-911d-4e04-80a9-e4b480f87799 pattern = re.compile('OCSTHA_M_sd._250m.tif') soilcarbon = [f for f in data if pattern.match(f)] logging.info("SoilCarbon data:") logging.info(soilcarbon) SOURCE_URL = 'ftp://ftp.soilgrids.org/data/recent/{f}' def getUrl(lvl): return SOURCE_URL.format(f=lvl) def getFilename(lvl): return 'tifs/{}'.format(lvl) ## Download with ftplib # Track progress: # https://stackoverflow.com/questions/21343029/how-do-i-keep-track-of-percentage-downloaded-with-ftp-retrbinary def download_file(f, block, totalSize, sizeWritten): f.write(block) sizeWritten.append(len(block)) logging.info("{} = size written, {} = total size".format( sum(sizeWritten), totalSize)) percentComplete = sum(sizeWritten) / totalSize logging.info("{} percent complete".format(percentComplete)) for data in soilcarbon: logging.info('Processing {}'.format(data)) totalSize = ftp.size('data/recent/' + data) sizeWritten = [] with open('tifs/{}'.format(data), 'wb') as f: ftp.retrbinary( 'RETR data/recent/' + data, lambda block: download_file(f, block, totalSize, sizeWritten)) ### ## Download with urllib # def fetch(files): # '''Fetch files by datestamp''' # tifs = [] # for lvl in files: # url = getUrl(lvl) # f = getFilename(lvl) # logging.debug('Fetching {}'.format(url)) # # New data may not yet be posted # try: # urllib.request.urlretrieve(url, f) # tifs.append(f) # except Exception as e: # logging.warning('Could not fetch {}'.format(url)) # logging.debug(e) # return tifs # # # tifs = fetch(soilcarbon) ### # To upload to GEE, need to specify the date # Date formats vary by provider, some common ones include: ### ### Constant year DATE_FORMAT = '%Y' # Year def getDate(asset): return '2017' ### Grab dates, create datestamps, upload through GEE dates = list(map(getDate, tifs)) datestamps = [datetime.strptime(date, DATE_FORMAT) for date in dates] asset_names = [ic(t) for t in tifs] eeUtil.uploadAssets(tifs, asset_names, GS_FOLDER, datestamps, public=True, timeout=30000) ### # Upload to RW API # For this and writing in the SLDs, could use Brookie's class # Would match the SLD name to the tif name, pair them and upload (like a zip) ### API_TOKEN = os.environ.get('rw_api_token', None) def createHeaders(): return { 'content-type': "application/json", 'authorization': "Bearer {}".format(AUTH_TOKEN) } def upload_ic_to_backoffice(wri_id, imageCollectionName, datasetName): ds_specs = { "connectorType": "rest", "provider": "gee", "tableName": imageCollectionName, "application": ["rw"], "geoInfo": True, "type": "raster", "name": "{}_{}".format(wri_id, datasetName) } create_res = req.request( "POST", 'https://staging-api.globalforestwatch.org/v1/dataset', data=json.dumps(ds_specs), headers=createHeaders()) logging.info(create_res.text) return create_res.json()['data']['id'] rw_id = upload_ic_to_backoffice('foo.054', EE_COLLECTION, 'Soil Organic Carbon') ### # Priority 2: Access pre-made SLDs for loading to layers ### ### ### # Retrieving legends for upload to RW API ### legends = [] ftp.retrlines('NLST legends', legends.append) slds = [ f.split('/')[1] for f in legends if os.path.splitext(f)[1] == '.sld' ] for sld in slds: logging.info('Processing {}'.format(sld)) with open('slds/{}'.format(sld), 'wb') as f: ftp.retrbinary('RETR legends/' + sld, f.write) ftp.close()