Ejemplo n.º 1
0
def main():

    logging.basicConfig(stream=sys.stderr, level=logging.INFO)
    logging.info('STARTING')

    # Initialize eeUtil
    eeUtil.initJson()

    # Clear the GEE collection, if specified above
    if CLEAR_COLLECTION_FIRST:
        if eeUtil.exists(EE_COLLECTION):
            eeUtil.removeAsset(EE_COLLECTION, recursive=True)

    # Check if collection exists, create it if it does not
    # If it exists return the list of assets currently in the collection
    existing_assets = checkCreateCollection(EE_COLLECTION)
    existing_dates = [getDate(a) for a in existing_assets]

    # Fetch, process, and upload the new data
    new_assets = processNewData(existing_dates)
    # Get the dates of the new data we have added
    new_dates = [getDate(a) for a in new_assets]

    logging.info('Previous assets: {}, new: {}, max: {}'.format(
        len(existing_dates), len(new_dates), MAX_ASSETS))

    # Delete excess assets
    deleteExcessAssets(existing_dates + new_dates, MAX_ASSETS)

    # Update Resource Watch
    updateResourceWatch()

    logging.info('SUCCESS')
Ejemplo n.º 2
0
def main():
    '''Ingest new data into EE and delete old data'''
    logging.basicConfig(stream=sys.stderr, level=logging.INFO)
    logging.info('STARTING')

    # Initialize eeUtil
    eeUtil.initJson()

    # 1. Check if collection exists and create
    existing_assets = checkCreateCollection(EE_COLLECTION)
    existing_dates = [getDate(a) for a in existing_assets]

    # 2. Fetch, process, stage, ingest, clean
    new_assets = processNewData(existing_dates)
    new_dates = [getDate(a) for a in new_assets]

    # 3. Delete old assets
    existing_dates = existing_dates + new_dates
    logging.info('Existing assets: {}, new: {}, max: {}'.format(
        len(existing_dates), len(new_dates), MAX_ASSETS))
    deleteExcessAssets(existing_dates, MAX_ASSETS)

    # 4. After asset update lets reflect it on the dataset
    most_recent_date = get_most_recent_date(EE_COLLECTION)
    lastUpdateDate(DATASET_ID, most_recent_date)

    logging.info('SUCCESS')
Ejemplo n.º 3
0
def main():
    '''Ingest new data into EE and delete old data'''
    logging.basicConfig(stream=sys.stderr, level=LOG_LEVEL)
    logging.info('STARTING')

    # Initialize eeUtil and clear collection in GEE if desired
    eeUtil.initJson()

    if CLEAR_COLLECTION_FIRST:
        if eeUtil.exists(EE_COLLECTION):
            eeUtil.removeAsset(EE_COLLECTION, recursive=True)

    # 1. Check if collection exists and create
    existing_assets = checkCreateCollection(
        EE_COLLECTION)  #make image collection if doesn't have one
    existing_dates = [getDate(a) for a in existing_assets]

    # 2. Fetch, process, stage, ingest, clean
    new_assets = processNewData(existing_dates)
    new_dates = [getDate(a) for a in new_assets]

    # 3. Delete old assets
    existing_dates = existing_dates + new_dates
    logging.info('Existing assets: {}, new: {}, max: {}'.format(
        len(existing_dates), len(new_dates), MAX_ASSETS))
    deleteExcessAssets(existing_dates, MAX_ASSETS)

    logging.info(new_dates)

    # Get most recent update date
    most_recent_date = get_most_recent_date(EE_COLLECTION)
    lastUpdateDate(DATASET_ID, most_recent_date)

    logging.info('SUCCESS')
Ejemplo n.º 4
0
def main():
    '''Ingest new data into EE and delete old data'''
    logging.basicConfig(stream=sys.stderr, level=LOG_LEVEL)
    logging.info('STARTING')

    # Initialize eeUtil
    eeUtil.initJson()

    # 1. Check if collection exists and create
    if CLEAR_COLLECTION_FIRST:
        if eeUtil.exists(EE_COLLECTION):
            eeUtil.removeAsset(EE_COLLECTION, recursive=True)

    existing_assets = checkCreateCollection(EE_COLLECTION)
    existing_dates = [getDate(a) for a in existing_assets]

    # 2. Fetch, process, stage, ingest, clean
    new_assets = processNewData(existing_dates)
    new_dates = [getDate(a) for a in new_assets]

    # 3. Delete old assets
    existing_dates = existing_dates + new_dates
    logging.info('Existing assets: {}, new: {}, max: {}'.format(
        len(existing_dates), len(new_dates), MAX_ASSETS))
    deleteExcessAssets(existing_dates, MAX_ASSETS)

    ###

    logging.info('SUCCESS')
Ejemplo n.º 5
0
def main():
    '''Ingest new data into EE and delete old data'''
    logging.basicConfig(stream=sys.stderr, level=LOG_LEVEL)
    logging.info('STARTING')

    ### 0. Initialize GEE
    eeUtil.initJson()

    ### 1. Create collection names, clear if desired
    collections = {}
    for rw_id, varname in ASSET_NAMES.items():
        collections[rw_id] = EE_COLLECTION.format(rw_id=rw_id, varname=varname)

    if CLEAR_COLLECTION_FIRST:
        for collection in collections.values():
            if eeUtil.exists(collection):
                eeUtil.removeAsset(collection, recursive=True)

    ### 2. Grab existing assets and their dates
    existing_assets = {}
    for rw_id, coll in collections.items():
        existing_assets[rw_id] = checkCreateCollection(coll)

    existing_dates = {}
    for rw_id, ex_assets in existing_assets.items():
        existing_dates[rw_id] = list(map(getRasterDate, ex_assets))

    # This will be a list of objects
    new_assets = processNewRasterData(existing_dates)

    new_dates = {}
    for rw_id, nw_assets in new_assets.items():
        new_dates[rw_id] = list(map(getRasterDate, nw_assets))

    ### 5. Delete old assets
    for rw_id, collection in collections.items():
        e = existing_dates[rw_id]
        n = new_dates[rw_id] if rw_id in new_dates else []
        total = e + n
        logging.info('Existing assets in {}: {}, new: {}, max: {}'.format(
            rw_id, len(e), len(n), MAX_DATES))
        deleteExcessAssets(total, rw_id, ASSET_NAMES[rw_id], MAX_DATES)

    # Get most recent update date
    for collection, id in DATASET_IDS.items():
        most_recent_date = get_most_recent_date(collection)
        current_date = getLastUpdate(id)

        if current_date != most_recent_date:
            logging.info('Updating last update date and flushing cache.')
            # Update data set's last update date on Resource Watch
            lastUpdateDate(id, most_recent_date)
            # get layer ids and flush tile cache for each
            layer_ids = getLayerIDs(id)
            for layer_id in layer_ids:
                flushTileCache(layer_id)

    logging.info('SUCCESS')
Ejemplo n.º 6
0
def main():
    logging.basicConfig(stream=sys.stderr, level=logging.INFO)
    logging.info('STARTING')

    # Initialize eeUtil and ee modules
    eeUtil.initJson()
    initialize_ee()

    # Clear collection in GEE if desired
    if CLEAR_COLLECTION_FIRST:
        clearCollectionMultiVar()

    # Check if collection exists. If not, create it.
    # Return a list of dates that exist for all variables collections in GEE (existing_dates),
    # as well as a list of which dates exist for each individual variable (existing_dates_by_var).
    # The latter will be used in case the previous script run crashed before completing the data upload for every variable.
    logging.info('Getting existing dates.')
    existing_dates, existing_dates_by_var = checkCreateCollection(VARS)

    # Get a list of the dates that are available, minus the ones we have already uploaded correctly for all variables.
    logging.info('Getting new dates to pull.')
    all_new_dates, last_date = getNewDates(existing_dates)

    # if new data is available, clear the collection because we want to store the most
    # recent forecast, not the old forecast
    if all_new_dates:
        logging.info('New forecast available.')
        clearCollectionMultiVar()
    else:
        logging.info('No new forecast.')

    # The Docker container isonly big enough to hold 3 files at once,
    # so break into groups to process
    new_date_groups = [all_new_dates[x:x+3] for x in range(0, len(all_new_dates), 3)]
    for new_dates in new_date_groups:
        # Fetch new files
        logging.info('Fetching files for {}'.format(new_dates))
        files = fetch(new_dates, SOURCE_URL)

        # Process data, one variable at a time
        for var_num in range(len(VARS)):
            # get variable name
            var = VARS[var_num]

            # Process new data files, delete all forecast assets currently in collection
            new_assets = processNewData(files, var_num, last_date)

            logging.info('New assets for {}: {}'.format(var, len(new_assets)))
            logging.info('SUCCESS for {}'.format(var))

    # Delete local netcdf files
    delete_local()

    # Update Resource Watch
    updateResourceWatch()

    logging.info('SUCCESS')
Ejemplo n.º 7
0
def main():
    '''Ingest new data into EE and delete old data'''
    logging.basicConfig(stream=sys.stderr, level=LOG_LEVEL)
    logging.info('STARTING')

    ### 0. Initialize GEE
    eeUtil.initJson()

    ### 1. Create collection names, clear if desired
    collections = {}
    for rw_id, varname in ASSET_NAMES.items():
        collections[rw_id] = EE_COLLECTION.format(rw_id=rw_id, varname=varname)

    if CLEAR_COLLECTION_FIRST:
        for collection in collections.values():
            if eeUtil.exists(collection):
                eeUtil.removeAsset(collection, recursive=True)

    ### 2. Grab existing assets and their dates
    existing_assets = {}
    for rw_id, coll in collections.items():
        existing_assets[rw_id] = checkCreateCollection(coll)

    existing_dates = {}
    for rw_id, ex_assets in existing_assets.items():
        existing_dates[rw_id] = list(map(getRasterDate, ex_assets))

    # This will be a list of objects
    new_assets = processNewRasterData(existing_dates)

    new_dates = {}
    for rw_id, nw_assets in new_assets.items():
        new_dates[rw_id] = list(map(getRasterDate, nw_assets))

    ### 5. Delete old assets
    for rw_id, collection in collections.items():
        e = existing_dates[rw_id]
        n = new_dates[rw_id] if rw_id in new_dates else []
        total = e + n
        logging.info('Existing assets in {}: {}, new: {}, max: {}'.format(
            rw_id, len(e), len(n), MAX_DATES))
        deleteExcessAssets(total, rw_id, ASSET_NAMES[rw_id], MAX_DATES)

    ###

    logging.info('SUCCESS')
Ejemplo n.º 8
0
def main():
    global VAR
    global BAND
    global EE_COLLECTION
    global PARENT_FOLDER
    global FILENAME
    global DAYS_TO_AVERAGE
    logging.basicConfig(stream=sys.stderr, level=LOG_LEVEL)
    # Initialize eeUtil and ee
    eeUtil.initJson()
    initialize_ee()
    if DAYS_TO_AVERAGE == 1:
        PARENT_FOLDER = COLLECTION
        EE_COLLECTION_GEN = COLLECTION + '/{var}'
        FILENAME = COLLECTION + '_{var}_{date}'
    else:
        PARENT_FOLDER = COLLECTION + '_{days}day_avg'.format(
            days=DAYS_TO_AVERAGE)
        EE_COLLECTION_GEN = COLLECTION + '_%sday_avg/{var}' % DAYS_TO_AVERAGE
        FILENAME = COLLECTION + '_{days}day_avg_{var}_{date}'
    for i in range(len(VARS)):
        VAR = VARS[i]
        logging.info('STARTING {var}'.format(var=VAR))
        BAND = BANDS[i]
        EE_COLLECTION = EE_COLLECTION_GEN.format(var=VAR)
        # Clear collection in GEE if desired
        if CLEAR_COLLECTION_FIRST:
            if eeUtil.exists(EE_COLLECTION):
                eeUtil.removeAsset(EE_COLLECTION, recursive=True)
        # 1. Check if collection exists and create
        existing_assets = checkCreateCollection(
            EE_COLLECTION)  #make image collection if doesn't have one
        existing_dates = [getDate(a) for a in existing_assets]
        # 2. Fetch, process, stage, ingest, clean
        new_assets = processNewData(existing_dates)
        new_dates = [getDate(a) for a in new_assets]
        # 3. Delete old assets
        existing_dates = existing_dates + new_dates
        logging.info('Existing assets: {}, new: {}, max: {}'.format(
            len(existing_dates), len(new_dates), MAX_ASSETS))
        deleteExcessAssets(existing_dates, MAX_ASSETS)
        # Get most recent update date
        most_recent_date = get_most_recent_date(EE_COLLECTION)
        lastUpdateDate(DATASET_IDS[VAR], most_recent_date)
        logging.info('SUCCESS for {var}'.format(var=VAR))
Ejemplo n.º 9
0
def main():
    logging.basicConfig(stream=sys.stderr, level=logging.INFO)
    logging.info('STARTING')

    # Initialize eeUtil and ee modules
    eeUtil.initJson()
    initialize_ee()

    # Clear collection in GEE if desired
    if CLEAR_COLLECTION_FIRST:
        clearCollectionMultiVar()

    # Process data, one variable at a time
    for i in range(len(VARS)):
        # get variable name
        var = VARS[i]
        logging.info('STARTING {var}'.format(var=var))

        # Check if collection exists, create it if it does not
        # If it exists return the list of assets currently in the collection
        existing_assets = checkCreateCollection('/'+getCollectionName(var)) #make image collection if doesn't have one
        existing_dates = [getDate_GEE(a) for a in existing_assets]

        # Fetch, process, and upload the new data
        new_assets = processNewData(var, existing_dates)
        # Get the dates of the new data we have added
        new_dates = [getDate_GEE(a) for a in new_assets]

        logging.info('Previous assets: {}, new: {}, max: {}'.format(
            len(existing_dates), len(new_dates), MAX_ASSETS))

        # Delete excess assets
        deleteExcessAssets(var, existing_dates+new_dates, MAX_ASSETS)
        logging.info('SUCCESS for {var}'.format(var=var))

    # Update Resource Watch
    updateResourceWatch()

    logging.info('SUCCESS')
Ejemplo n.º 10
0
def main():

    ###
    # Configure logging
    logging.basicConfig(stream=sys.stderr, level=logging.DEBUG)
    # Authenticate to GEE
    eeUtil.initJson()
    ###

    ###
    # Configure the ImageCollection you're going to add the rasters to
    ###

    GS_FOLDER = 'wat_038_modis_surface_water'
    EE_COLLECTION = 'wat_038_modis_surface_water'

    def ic(asset):
        return '{}/{}'.format(EE_COLLECTION, os.path.splitext(asset)[0])

    def checkCreateCollection(collection):
        '''List assests in collection else create new collection'''
        if eeUtil.exists(collection):
            return eeUtil.ls(collection)
        else:
            logging.info('{} does not exist, creating'.format(collection))
            eeUtil.createFolder(collection, True, public=True)
            return []

    existing_files = checkCreateCollection(EE_COLLECTION)

    ###
    # Obtain names of files to upload
    # Load file names for tifs and netcdfs
    ###

    # TIF_DATA_DIR = 'tifs'
    # os.chdir(TIF_DATA_DIR)
    # tifs = os.listdir('.') #[f for f in os.listdir('.') if os.path.splitext(f)[1] == '.tif']
    # logging.info('TIFFs: {}'.format(tifs))
    #
    # NC_DATA_DIR = 'ncs'
    # os.chdir(NC_DATA_DIR)
    # ncs = os.listdir('.') #[f for f in os.listdir('.') if os.path.splitext(f)[1] == '.tif']
    # logging.info('NetCDFs: {}'.format(ncs))

    ###
    # Priority 1: Load files to GEE and register w/ RW API
    ###

    from ftplib import FTP
    ftp = FTP('ftp.soilgrids.org')
    ftp.login()

    lines = []
    ftp.retrlines('NLST', lines.append)

    data = []
    ftp.retrlines('NLST data/recent', data.append)
    data = [f.split('/')[2] for f in data]
    logging.info("Data:")
    logging.info(data)

    import re

    pattern = re.compile('OCDENS_M_sl._250m.tif')
    soilcarbon = [f for f in data if pattern.match(f)]
    logging.info("SoilCarbon data:")
    logging.info(soilcarbon)

    #for datum in data:
    for datum in soilcarbon:
        logging.info('Processing {}'.format(datum))
        with open('ncs/{}'.format(datum), 'wb') as f:
            ftp.retrbinary('RETR ' + datum, f.write)

###
# Priority 2: Access pre-made SLDs for loading to layers ###
###

###
# Retrieving legends for upload to RW API
###

    legends = []
    ftp.retrlines('NLST legends', legends.append)
    slds = [
        f.split('/')[1] for f in legends if os.path.splitext(f)[1] == '.sld'
    ]

    for sld in slds:
        logging.info('Processing {}'.format(sld))
        f = open(os.path.join(os.getcwd(), sld), 'wb')
        ftp.retrbinary('RETR ' + sld, f.write)

    ftp.close()

    # Q: Is this possible?
    ### reduce(lambda obj, elem: obj.append(elem),  ftp.retrlines('NLST'), [])

    ###
    # To upload to GEE, need to specify the date
    # Date formats vary by provider, some common ones include:
    ###

    ### Date encoded in asset name

    DATE_FORMAT = '%Y%j'  # Year and week of year

    def getDate(asset):
        return asset[-7:]

    DATE_FORMAT = '%Y-%m-%d'  # Year, month, day

    def getDate(asset):
        return asset[-10:]

    DATE_FORMAT = '%Y'  # Year

    def getDate(asset):
        return asset[-4:]

    ### Constant year

    DATE_FORMAT = '%Y'  # Year

    def getDate(asset):
        return '2017'

    ### Grab dates, create datestamps, upload through GEE

    dates = list(map(getDate, tifs))
    datestamps = [datetime.strptime(date, DATE_FORMAT) for date in dates]

    asset_names = [ic(t) for t in tifs]
    eeUtil.uploadAssets(tifs,
                        asset_names,
                        GS_FOLDER,
                        datestamps,
                        public=True,
                        timeout=30000)
Ejemplo n.º 11
0
def main():
    '''Ingest new data into EE and delete old data'''
    logging.basicConfig(stream=sys.stderr, level=LOG_LEVEL)
    logging.info('STARTING')

    ### 1. Initialize eeUtil
    eeUtil.initJson()

    ### 2. Create collection names, clear if desired
    arctic_collection_orig = EE_COLLECTION.format(arctic_or_antarctic='arctic',
                                                  orig_or_reproj='orig')
    arctic_collection_reproj = EE_COLLECTION.format(
        arctic_or_antarctic='arctic', orig_or_reproj='reproj')
    antarctic_collection_orig = EE_COLLECTION.format(
        arctic_or_antarctic='antarctic', orig_or_reproj='orig')
    antarctic_collection_reproj = EE_COLLECTION.format(
        arctic_or_antarctic='antarctic', orig_or_reproj='reproj')

    collections = [
        arctic_collection_orig, arctic_collection_reproj,
        antarctic_collection_orig, antarctic_collection_reproj
    ]

    if CLEAR_COLLECTION_FIRST:
        for collection in collections:
            if eeUtil.exists(collection):
                eeUtil.removeAsset(collection, recursive=True)

    ### 3. Process arctic data
    arctic_data = collections[0:2]
    arctic_assets_orig = checkCreateCollection(arctic_data[0])
    arctic_assets_reproj = checkCreateCollection(arctic_data[1])
    arctic_dates_orig = [getRasterDate(a) for a in arctic_assets_orig]
    arctic_dates_reproj = [getRasterDate(a) for a in arctic_assets_reproj]

    new_arctic_assets_orig, new_arctic_assets_reproj = processNewRasterData(
        arctic_dates_reproj, 'arctic', new_or_hist='new')
    new_arctic_dates_orig = [getRasterDate(a) for a in new_arctic_assets_orig]
    new_arctic_dates_reproj = [
        getRasterDate(a) for a in new_arctic_assets_reproj
    ]

    ### 4. Process antarctic data
    antarctic_data = collections[2:]
    antarctic_assets_orig = checkCreateCollection(antarctic_data[0])
    antarctic_assets_reproj = checkCreateCollection(antarctic_data[1])
    antarctic_dates_orig = [getRasterDate(a) for a in antarctic_assets_orig]
    antarctic_dates_reproj = [
        getRasterDate(a) for a in antarctic_assets_reproj
    ]

    new_antarctic_assets_orig, new_antarctic_assets_reproj = processNewRasterData(
        antarctic_dates_reproj, 'antarctic', new_or_hist='new')
    new_antarctic_dates_orig = [
        getRasterDate(a) for a in new_antarctic_assets_orig
    ]
    new_antarctic_dates_reproj = [
        getRasterDate(a) for a in new_antarctic_assets_reproj
    ]

    ### 5. Delete old assets
    e_dates = [
        arctic_dates_orig, arctic_dates_reproj, antarctic_dates_orig,
        antarctic_dates_reproj
    ]
    n_dates = [
        new_arctic_dates_orig, new_arctic_dates_reproj,
        new_antarctic_dates_orig, new_antarctic_dates_reproj
    ]

    for i in range(4):
        orig_or_reproj = 'orig' if i % 2 == 0 else 'reproj'
        arctic_or_antarctic = 'arctic' if i < 2 else 'antarctic'
        e = e_dates[i]
        n = n_dates[i]
        total = e + n

        logging.info('Existing {} {} assets: {}, new: {}, max: {}'.format(
            orig_or_reproj, arctic_or_antarctic, len(e), len(n), MAX_DATES))
        deleteExcessAssets(total, orig_or_reproj, arctic_or_antarctic,
                           MAX_DATES, 'new')

    ###
    for dataset, id in DATASET_ID.items():
        # Get most recent update date
        most_recent_date = get_most_recent_date(dataset)
        current_date = getLastUpdate(id)

        if current_date != most_recent_date:
            logging.info('Updating last update date and flushing cache.')
            # Update data set's last update date on Resource Watch
            lastUpdateDate(id, most_recent_date)
            # get layer ids and flush tile cache for each
            layer_ids = getLayerIDs(id)
            for layer_id in layer_ids:
                flushTileCache(layer_id)

    ## Process historical data
    if COLLECT_BACK_HISTORY == True:
        for month in HISTORICAL_MONTHS:
            logging.info(
                'Processing historical data for month {}'.format(month))
            ### 2. Create collection names, clear if desired
            arctic_collection_orig = EE_COLLECTION_BY_MONTH.format(
                arctic_or_antarctic='arctic',
                orig_or_reproj='orig',
                month="{:02d}".format(month))
            arctic_collection_reproj = EE_COLLECTION_BY_MONTH.format(
                arctic_or_antarctic='arctic',
                orig_or_reproj='reproj',
                month="{:02d}".format(month))
            antarctic_collection_orig = EE_COLLECTION_BY_MONTH.format(
                arctic_or_antarctic='antarctic',
                orig_or_reproj='orig',
                month="{:02d}".format(month))
            antarctic_collection_reproj = EE_COLLECTION_BY_MONTH.format(
                arctic_or_antarctic='antarctic',
                orig_or_reproj='reproj',
                month="{:02d}".format(month))

            collections = [
                arctic_collection_orig, arctic_collection_reproj,
                antarctic_collection_orig, antarctic_collection_reproj
            ]

            ### 3. Process arctic data
            arctic_data = collections[0:2]
            arctic_assets_orig = checkCreateCollection(arctic_data[0])
            arctic_assets_reproj = checkCreateCollection(arctic_data[1])
            arctic_dates_orig = [getRasterDate(a) for a in arctic_assets_orig]
            arctic_dates_reproj = [
                getRasterDate(a) for a in arctic_assets_reproj
            ]

            new_arctic_assets_orig, new_arctic_assets_reproj = processNewRasterData(
                arctic_dates_orig, 'arctic', new_or_hist='hist', month=month)
            new_arctic_dates_orig = [
                getRasterDate(a) for a in new_arctic_assets_orig
            ]
            new_arctic_dates_reproj = [
                getRasterDate(a) for a in new_arctic_assets_reproj
            ]

            ### 4. Process antarctic data
            antarctic_data = collections[2:]
            antarctic_assets_orig = checkCreateCollection(antarctic_data[0])
            antarctic_assets_reproj = checkCreateCollection(antarctic_data[1])
            antarctic_dates_orig = [
                getRasterDate(a) for a in antarctic_assets_orig
            ]
            antarctic_dates_reproj = [
                getRasterDate(a) for a in antarctic_assets_reproj
            ]

            new_antarctic_assets_orig, new_antarctic_assets_reproj = processNewRasterData(
                antarctic_dates_orig,
                'antarctic',
                new_or_hist='hist',
                month=month)
            new_antarctic_dates_orig = [
                getRasterDate(a) for a in new_antarctic_assets_orig
            ]
            new_antarctic_dates_reproj = [
                getRasterDate(a) for a in new_antarctic_assets_reproj
            ]

            ### 5. Delete old assets
            e_dates = [
                arctic_dates_orig, arctic_dates_reproj, antarctic_dates_orig,
                antarctic_dates_reproj
            ]
            n_dates = [
                new_arctic_dates_orig, new_arctic_dates_reproj,
                new_antarctic_dates_orig, new_antarctic_dates_reproj
            ]

            for i in range(4):
                orig_or_reproj = 'orig' if i % 2 == 0 else 'reproj'
                arctic_or_antarctic = 'arctic' if i < 2 else 'antarctic'
                e = e_dates[i]
                n = n_dates[i]
                total = e + n

                logging.info('Existing {} {} assets: {}, new: {}'.format(
                    orig_or_reproj, arctic_or_antarctic, len(e), len(n)))
                #uncomment if we want to put a limit on how many years of historical data we have
                #deleteExcessAssets(total, orig_or_reproj, arctic_or_antarctic, MAX_DATES,'hist')

        ###
        for dataset, id in HIST_DATASET_ID.items():
            # Get most recent update date
            most_recent_date = get_most_recent_date(dataset)
            lastUpdateDate(id, most_recent_date)

    logging.info('SUCCESS')
Ejemplo n.º 12
0
import eeUtil

eeUtil.initJson()
collection = 'cli_012_co2_concentrations'
print(eeUtil.exists(f'test_{collection}'))

eeUtil.createFolder(f'test_{collection}', True, public=True)

print('hola holita!')
print(eeUtil.exists(f'test_{collection}'))
eeUtil.removeAsset(f'test_{collection}')
print(eeUtil.exists(f'test_{collection}'))
Ejemplo n.º 13
0
def main():
    global VAR
    global EE_COLLECTION
    global EE_COLLECTION_GEN
    global PARENT_FOLDER
    global FILENAME
    global GS_FOLDER
    PARENT_FOLDER = COLLECTION
    EE_COLLECTION_GEN = COLLECTION + '/{var}'
    FILENAME = COLLECTION[29:] + '_{var}_{date}'
    '''Ingest new data into EE and delete old data'''
    logging.basicConfig(stream=sys.stderr, level=LOG_LEVEL)
    logging.info('STARTING')
    # Initialize eeUtil and clear collection in GEE if desired
    eeUtil.initJson()
    initialize_ee()
    if CLEAR_COLLECTION_FIRST:
        clearCollection()
    # 1. Check if collection exists and create
    existing_dates, existing_dates_by_var = checkCreateCollection(VARS)
    # Determine which files to fetch
    all_new_dates = getNewDates(existing_dates)
    # if new data is available, clear the collection because we want to store the most
    # recent forecast, not the old forecast
    if all_new_dates:
        clearCollection()
    #container only big enough to hold 3 files at once, so break into groups to process
    new_date_groups = [
        all_new_dates[x:x + 3] for x in range(0, len(all_new_dates), 3)
    ]
    for new_dates in new_date_groups:
        # Fetch new files
        logging.info('Fetching files for {}'.format(new_dates))
        files = fetch(
            new_dates)  #get list of locations of netcdfs in docker container
        # get last date because this file only has one time output so we need to process it differently
        last_file = files[-1]
        for var_num in range(len(VARS)):
            # get variable name
            VAR = VARS[var_num]
            # specify GEE collection name and Google Cloud Storage folder names
            EE_COLLECTION = EE_COLLECTION_GEN.format(var=VAR)
            GS_FOLDER = COLLECTION[1:] + '_' + VAR
            existing_assets = eeUtil.ls(EE_COLLECTION)
            # 2. Fetch, process, stage, ingest, clean
            new_assets = processNewData(files, var_num, last_file)
            new_dates = [getDateTime(a) for a in new_assets]
            # 3. Delete old assets
            all_dates = existing_dates_by_var[var_num] + new_dates
            all_assets = np.sort(
                np.unique(existing_assets +
                          [os.path.split(asset)[1] for asset in new_assets]))
            logging.info('Existing assets for {}: {}, new: {}, max: {}'.format(
                VAR, len(all_dates), len(new_dates), MAX_ASSETS))
            deleteExcessAssets(all_assets, (MAX_ASSETS))
            logging.info('SUCCESS for {}'.format(VAR))
            if var_num == len(VARS) - 1:
                # Get most recent update date
                most_recent_date = get_most_recent_date(all_assets)
                lastUpdateDate(DATASET_ID, most_recent_date)

        # Delete local netcdf files
        if DELETE_LOCAL:
            logging.info('Cleaning local NETCDF files')
            for f in files:
                os.remove(f)
Ejemplo n.º 14
0
def main():

    ###
    # Configure logging
    logging.basicConfig(stream=sys.stderr, level=logging.DEBUG)
    # Authenticate to GEE
    eeUtil.initJson()
    ###

    ###
    # Configure the ImageCollection you're going to add the rasters to
    ###

    GS_FOLDER = 'foo_054_soil_organic_carbon'
    EE_COLLECTION = 'foo_054_soil_organic_carbon'

    def ic(asset):
        return '{}/{}'.format(EE_COLLECTION, os.path.splitext(asset)[0])

    def checkCreateCollection(collection):
        '''List assests in collection else create new collection'''
        if eeUtil.exists(collection):
            return eeUtil.ls(collection)
        else:
            logging.info('{} does not exist, creating'.format(collection))
            eeUtil.createFolder(collection, True, public=True)
            return []

    existing_files = checkCreateCollection(EE_COLLECTION)

    ###
    # Obtain names of files to upload
    ###

    ###
    # Priority 1: Load files to GEE and register w/ RW API
    ###

    from ftplib import FTP
    ftp = FTP('ftp.soilgrids.org')
    ftp.login()

    folders = []
    ftp.retrlines('NLST', folders.append)
    logging.info("Folders:")
    logging.info(folders)

    data = []
    ftp.retrlines('NLST data/recent', data.append)
    data = [f.split('/')[2] for f in data]
    logging.info("Data:")
    logging.info(data)

    import re
    # Matches soil carbon for different depths:
    # 0, 5, 15, 30, 60, 100, 200 cm depth tifs available,
    # labeled sl1 - sl7
    # http://data.isric.org/geonetwork/srv/eng/catalog.search;jsessionid=A5137293CC6B3D96CBA35808CA155341#/metadata/98062ae9-911d-4e04-80a9-e4b480f87799
    pattern = re.compile('OCSTHA_M_sd._250m.tif')
    soilcarbon = [f for f in data if pattern.match(f)]
    logging.info("SoilCarbon data:")
    logging.info(soilcarbon)

    SOURCE_URL = 'ftp://ftp.soilgrids.org/data/recent/{f}'

    def getUrl(lvl):
        return SOURCE_URL.format(f=lvl)

    def getFilename(lvl):
        return 'tifs/{}'.format(lvl)

    ## Download with ftplib
    # Track progress:
    # https://stackoverflow.com/questions/21343029/how-do-i-keep-track-of-percentage-downloaded-with-ftp-retrbinary

    def download_file(f, block, totalSize, sizeWritten):
        f.write(block)
        sizeWritten.append(len(block))
        logging.info("{} = size written, {} = total size".format(
            sum(sizeWritten), totalSize))
        percentComplete = sum(sizeWritten) / totalSize
        logging.info("{} percent complete".format(percentComplete))

    for data in soilcarbon:
        logging.info('Processing {}'.format(data))
        totalSize = ftp.size('data/recent/' + data)
        sizeWritten = []
        with open('tifs/{}'.format(data), 'wb') as f:
            ftp.retrbinary(
                'RETR data/recent/' + data,
                lambda block: download_file(f, block, totalSize, sizeWritten))

    ###
    ## Download with urllib

    # def fetch(files):
    #     '''Fetch files by datestamp'''
    #     tifs = []
    #     for lvl in files:
    #         url = getUrl(lvl)
    #         f = getFilename(lvl)
    #         logging.debug('Fetching {}'.format(url))
    #         # New data may not yet be posted
    #         try:
    #             urllib.request.urlretrieve(url, f)
    #             tifs.append(f)
    #         except Exception as e:
    #             logging.warning('Could not fetch {}'.format(url))
    #             logging.debug(e)
    #     return tifs
    #
    #
    # tifs = fetch(soilcarbon)

    ###
    # To upload to GEE, need to specify the date
    # Date formats vary by provider, some common ones include:
    ###
    ### Constant year

    DATE_FORMAT = '%Y'  # Year

    def getDate(asset):
        return '2017'

    ### Grab dates, create datestamps, upload through GEE

    dates = list(map(getDate, tifs))
    datestamps = [datetime.strptime(date, DATE_FORMAT) for date in dates]

    asset_names = [ic(t) for t in tifs]
    eeUtil.uploadAssets(tifs,
                        asset_names,
                        GS_FOLDER,
                        datestamps,
                        public=True,
                        timeout=30000)

    ###
    # Upload to RW API
    # For this and writing in the SLDs, could use Brookie's class
    # Would match the SLD name to the tif name, pair them and upload (like a zip)
    ###

    API_TOKEN = os.environ.get('rw_api_token', None)

    def createHeaders():
        return {
            'content-type': "application/json",
            'authorization': "Bearer {}".format(AUTH_TOKEN)
        }

    def upload_ic_to_backoffice(wri_id, imageCollectionName, datasetName):

        ds_specs = {
            "connectorType": "rest",
            "provider": "gee",
            "tableName": imageCollectionName,
            "application": ["rw"],
            "geoInfo": True,
            "type": "raster",
            "name": "{}_{}".format(wri_id, datasetName)
        }

        create_res = req.request(
            "POST",
            'https://staging-api.globalforestwatch.org/v1/dataset',
            data=json.dumps(ds_specs),
            headers=createHeaders())

        logging.info(create_res.text)

        return create_res.json()['data']['id']

    rw_id = upload_ic_to_backoffice('foo.054', EE_COLLECTION,
                                    'Soil Organic Carbon')

    ###
    # Priority 2: Access pre-made SLDs for loading to layers ###
    ###

    ###
    # Retrieving legends for upload to RW API
    ###

    legends = []
    ftp.retrlines('NLST legends', legends.append)
    slds = [
        f.split('/')[1] for f in legends if os.path.splitext(f)[1] == '.sld'
    ]

    for sld in slds:
        logging.info('Processing {}'.format(sld))
        with open('slds/{}'.format(sld), 'wb') as f:
            ftp.retrbinary('RETR legends/' + sld, f.write)

    ftp.close()