def handler(payload, context={}, local=None): """ Handle Cirrus payload (STAC Process Catalog) """ # get catalog catalog = Catalog.from_payload(payload) # configure logger logger = get_task_logger(f"{__name__}.{TASK_NAME}", catalog=catalog) # these are any optional parameter provided for this task config = catalog['process']['tasks'].get(TASK_NAME, {}) # these are general options used when uploading output data to s3 outopts = payload['process'].get('output_options', {}) # validation - add specific checks on input # e.g., if task operates on one and only Item use this: assert (len(catalog['features']) == 1) item = catalog['features'][0] # create temporary work directory if not running locally tmpdir = mkdtemp() if local is None else local outpath = op.join(tmpdir, 'output') os.makedirs(outpath, exist_ok=True) try: # main logic - replace with own # download asset, e.g. a thumbnail item = download_item_assets(item, path=outpath, assets=['thumbnail']) # do something, e.g. modify asset, create new asset # item['assets']['asset2'] = create_new_asset(item) # upload new assets if local is not None: item = upload_item_assets(item, assets=['asset2'], **outopts) # recommended to add derived_from link links = [l['href'] for l in item['links'] if l['rel'] == 'self'] if len(links) == 1: # add derived from link item['links'].append({ 'title': 'Source STAC Item', 'rel': 'derived_from', 'href': links[0], 'type': 'application/json' }) catalog['features'][0] = item except Exception as err: msg = f"**task** failed: {err}" logger.error(msg, exc_info=True) raise Exception(msg) finally: # remove work directory if not running locally if local is None: logger.debug('Removing work directory %s' % tmpdir) rmtree(tmpdir) return catalog
def lambda_handler(payload, context): logger.debug(json.dumps(payload)) # Read SQS payload if 'Records' not in payload: raise ValueError("Input not from SQS") catalogs = [] for record in [json.loads(r['body']) for r in payload['Records']]: cat = json.loads(record['Message']) logger.debug('cat: %s' % json.dumps(cat)) # expand catids to full catalogs if 'catids' in cat: _cats = Catalogs.from_catids(cat['catids']) if 'process_update' in cat: logger.debug( f"Process update: {json.dumps(cat['process_update'])}") for c in _cats: c['process'] = dict_merge(c['process'], cat['process_update']) catalogs += _cats elif cat.get('type', '') == 'Feature': # If Item, create Catalog and use default process for that collection if cat['collection'] not in PROCESSES.keys(): raise ValueError( f"Default process not provided for collection {cat['collection']}" ) cat_json = { 'type': 'FeatureCollection', 'features': [cat], 'process': PROCESSES[cat['collection']] } catalogs.append(Catalog(cat_json, update=True)) else: catalogs.append(Catalog(cat, update=True)) if len(catalogs) > 0: cats = Catalogs(catalogs) cats.process() return len(catalogs)
def handler(payload, context): catalog = Catalog.from_payload(payload) logger = get_task_logger(f"{__name__}.publish", catalog=catalog) config = catalog['process']['tasks'].get('publish', {}) public = config.get('public', False) # additional SNS topics to publish to topics = config.get('sns', []) # these are the URLs to the canonical records on s3 s3urls = [] try: logger.debug("Publishing items to s3 and SNS") if API_URL is not None: link = { 'title': catalog['id'], 'rel': 'via-cirrus', 'href': f"{API_URL}/catid/{catalog['id']}" } logger.debug(json.dumps(link)) # add cirrus-source relation for item in catalog['features']: item['links'].append(link) # publish to s3 s3urls = catalog.publish_to_s3(DATA_BUCKET, public=public) # publish to Cirrus SNS publish topic catalog.publish_to_sns() # Deprecated additional topics if PUBLISH_TOPICS: for t in PUBLISH_TOPICS.split(','): catalog.publish_to_sns(t) for t in topics: catalog.publish_to_sns(t) except Exception as err: msg = f"publish: failed publishing output items ({err})" logger.error(msg, exc_info=True) raise Exception(msg) from err try: # update processing in table statedb.set_completed(catalog['id'], outputs=s3urls) except Exception as err: msg = f"publish: failed setting as complete ({err})" logger.error(msg, exc_info=True) raise Exception(msg) from err return catalog
def handler(payload, context): catalog = Catalog.from_payload(payload) logger = get_task_logger(f"{__name__}.pre-batch", catalog=catalog) url = f"s3://{CATALOG_BUCKET}/batch/{catalog['id']}/{uuid.uuid1()}.json" try: # copy payload to s3 s3().upload_json(catalog, url) logger.debug(f"Uploaded catalog to {url}") return {'url': url} except Exception as err: msg = f"pre-batch: failed pre processing batch job for ({err})" logger.error(msg, exc_info=True) raise Exception(msg) from err
def handler(payload, context={}): catalog = Catalog.from_payload(payload) logger = get_task_logger(f"{__name__}.copy-assets", catalog=catalog) # TODO - make this more general for more items/collections item = catalog['features'][0] #, collection=catalog['collections'][0]) # configuration options config = catalog['process']['tasks'].get('copy-assets', {}) outopts = catalog['process'].get('output_options', {}) # asset config assets = config.get('assets', item['assets'].keys()) drop_assets = config.get('drop_assets', []) # drop specified assets for asset in [a for a in drop_assets if a in item['assets'].keys()]: logger.debug(f'Dropping asset {asset}') item['assets'].pop(asset) if type(assets) is str and assets == 'ALL': assets = item['assets'].keys() # create temporary work directory tmpdir = mkdtemp() try: # copy specified assets _assets = [a for a in assets if a in item['assets'].keys()] for asset in _assets: item = download_item_assets(item, path=tmpdir, assets=[asset]) item = upload_item_assets(item, assets=[asset], **outopts) # replace item in catalog catalog['features'][0] = item except Exception as err: msg = f"copy-assets: failed processing {catalog['id']} ({err})" logger.error(msg, exc_info=True) raise Exception(msg) from err finally: # remove work directory....very important for Lambdas! logger.debug('Removing work directory %s' % tmpdir) rmtree(tmpdir) return catalog
def handler(payload, context): catalog = Catalog.from_payload(payload) logger = get_task_logger(f"{__name__}.workflow-failed", catalog=catalog) # parse errors error = payload.get('error', {}) # error type error_type = error.get('Error', "unknown") # check if cause is JSON try: cause = json.loads(error['Cause']) error_msg = 'unknown' if 'errorMessage' in cause: error_msg = cause.get('errorMessage', 'unknown') elif 'Attempts' in cause: try: # batch reason = cause['Attempts'][-1]['StatusReason'] if 'Essential container in task exited' in reason: # get the message from batch logs logname = cause['Attempts'][-1]['Container'][ 'LogStreamName'] error_type, error_msg = get_error_from_batch(logname) except Exception as err: logger.error(err, exc_info=True) except Exception: error_msg = error['Cause'] error = f"{error_type}: {error_msg}" logger.info(error) try: if error_type == "InvalidInput": statedb.set_invalid(catalog['id'], error) else: statedb.set_failed(catalog['id'], error) except Exception as err: msg = f"Failed marking as failed: {err}" logger.error(msg, exc_info=True) raise err if FAILED_TOPIC_ARN is not None: try: item = statedb.dbitem_to_item(statedb.get_dbitem(catalog['id'])) attrs = { 'collections': { 'DataType': 'String', 'StringValue': item['collections'] }, 'workflow': { 'DataType': 'String', 'StringValue': item['workflow'] }, 'error': { 'DataType': 'String', 'StringValue': error } } logger.debug(f"Publishing item to {FAILED_TOPIC_ARN}") SNS_CLIENT.publish(TopicArn=FAILED_TOPIC_ARN, Message=json.dumps(item), MessageAttributes=attrs) except Exception as err: msg = f"Failed publishing to {FAILED_TOPIC_ARN}: {err}" logger.error(msg, exc_info=True) raise err return catalog
def handler(payload, context={}): catalog = Catalog.from_payload(payload) logger = get_task_logger(f"{__name__}.add-preview", catalog=catalog) # get step configuration config = catalog['process']['tasks'].get('add-preview', {}) outopts = catalog['process'].get('output_options', {}) assets = config.pop('assets', None) thumb = config.pop('thumbnail', False) config.pop('batch') if assets is None: msg = f"add-preview: no asset specified for preview" logger.error(msg) raise Exception(msg) # create temporary work directory tmpdir = tempfile.mkdtemp() items = [] for item in catalog['features']: # find asset to use for preview asset = None for a in assets: if a in item['assets']: asset = a break if asset is None: msg = f"add-preview: no available asset for preview" logger.warning(msg) items.append(item) continue try: # keep original href href = item['assets'][asset]['href'] # download asset item = download_item_assets(item, path=tmpdir, assets=[asset]) filename = item['assets'][asset]['href'] # add preview to item item['assets']['preview'] = create_preview(filename, logger, **config) if thumb: # add thumbnail to item item['assets']['thumbnail'] = create_thumbnail( item['assets']['preview']['href'], logger) # put back original href item['assets'][asset]['href'] = href # upload these new assets item = upload_item_assets(item, assets=['preview', 'thumbnail'], **outopts) items.append(item) except Exception as err: msg = f"add-preview: failed creating preview/thumbnail ({err})" logger.error(msg, exc_info=True) # remove work directory....very important for Lambdas! shutil.rmtree(tmpdir) raise Exception(msg) from err # remove work directory....very important for Lambdas! shutil.rmtree(tmpdir) # return new items catalog['features'] = items return catalog
def handler(payload, context={}): catalog = Catalog.from_payload(payload) logger = get_task_logger(f"{__name__}.convert-to-cog", catalog=catalog) # TODO - make this more general for more items/collections item = catalog['features'][0] #, collection=catalog['collections'][0]) # configuration options config = catalog['process']['tasks'].get('convert-to-cog', {}) outopts = catalog['process'].get('output_options', {}) assets = config.get('assets') # create temporary work directory tmpdir = mkdtemp() try: asset_keys = [a for a in assets if a in item['assets'].keys()] for asset in asset_keys: logger.info(f"Converting {asset} to COG") # download asset item = download_item_assets(item, path=tmpdir, assets=[asset]) # cogify fn = item['assets'][asset]['href'] fnout = cogify(fn, op.splitext(fn)[0] + '.tif', **assets[asset]) item['assets'][asset]['href'] = fnout item['assets'][asset][ 'type'] = "image/tiff; application=geotiff; profile=cloud-optimized" with rasterio.open(fnout) as src: item['assets'][asset]['proj:shape'] = src.shape item['assets'][asset]['proj:transform'] = src.transform # upload assets item = upload_item_assets(item, assets=[asset], **outopts) # cleanup files if op.exists(fn): remove(fn) if op.exists(fnout): remove(fnout) # add derived_from link links = [l['href'] for l in item['links'] if l['rel'] == 'self'] if len(links) == 1: # add derived from link item['links'].append({ 'title': 'Source STAC Item', 'rel': 'derived_from', 'href': links[0], 'type': 'application/json' }) # drop any specified assets for asset in [ a for a in config.get('drop_assets', []) if a in item['assets'].keys() ]: item['assets'].pop(asset) catalog['features'][0] = item except CRSError as err: msg = f"convert-to-cog: invalid CRS ({err})" logger.error(msg, exc_info=True) raise InvalidInput(msg) except s3_sessions[list(s3_sessions)[0]].s3.exceptions.NoSuchKey as err: msg = f"convert-to-cog: failed fetching {asset} asset ({err})" logger.error(msg, exc_info=True) raise InvalidInput(msg) except Exception as err: msg = f"convert-to-cog: failed creating COGs ({err})" logger.error(msg, exc_info=True) raise Exception(msg) finally: # remove work directory....very important for Lambdas! logger.debug('Removing work directory %s' % tmpdir) rmtree(tmpdir) return catalog
def handler(payload, context={}): logger.debug('Payload: %s' % json.dumps(payload)) catalog = Catalog.from_payload(payload) # TODO - make this more general for more items/collections assert (len(catalog['features']) == 1) # configuration options #config = catalog['process']['functions'].get('landsat-to-stac', {}) #output_options = catalog['process'].get('output_options', {}) #output_credentials = output_options.get('credentials', {}) # this process assumes single output collection, as it's just converting from original Sentinel to STAC for 1 scene #output_collection = list(catalog['process']['output_options']['collections'].keys())[0] #output_collection = 'landsat-c1-l2a' items = [] # get metadata url = s3().s3_to_https( catalog['features'][0]['assets']['txt']['href'].rstrip()) base_url = url.rstrip('_MTL.txt') # get metadata and convert to JSON metadata = landsat.mtl_to_json(fetch_url_as_text(url)) # get ANG metadata, used for geometry ang_text = fetch_url_as_text(base_url + '_ANG.txt') bbox = landsat.get_bbox(metadata) try: item = pystac.Item(id=metadata['LANDSAT_PRODUCT_ID'], datetime=landsat.get_datetime(metadata), bbox=bbox, geometry=landsat.get_geometry(ang_text, bbox), properties={}) # add common metadata item.common_metadata.gsd = 30.0 item.common_metadata.platform = metadata['SPACECRAFT_ID'] item.common_metadata.instruments = metadata['SENSOR_ID'].split('_') # add EO extension item.ext.enable('eo') item.ext.eo.cloud_cover = float(metadata['CLOUD_COVER']) # add proj extension item.ext.enable('projection') item.ext.projection.epsg = landsat.get_epsg(metadata, item.bbox[1], item.bbox[3]) item.ext.enable('view') view_info = landsat.get_view_info(metadata) item.ext.view.sun_azimuth = view_info['sun_azimuth'] item.ext.view.sun_elevation = view_info['sun_elevation'] item.ext.view.off_nadir = abs(view_info['off_nadir']) # collection 2 #item.ext.enable('scientific') #item.ext.sci.doi = metadata['DIGITAL_OBJECT_IDENTIFIER'] item.ext.enable('landsat') item.ext.landsat.apply(**landsat.get_landsat_info(metadata)) #item.ext.landsat landsat.add_assets(item, base_url) #item.validate() items.append(item.to_dict()) except Exception as err: msg = f"landsat-to-stac: failed creating STAC for {catalog['id']} ({err})" logger.error(msg) logger.error(format_exc()) raise Exception(msg) # discard if crossing antimeridian logger.debug(f"bbox = {item.bbox}") if item.bbox[2] - item.bbox[0] > 300: msg = f"{item['id']} crosses antimeridian, discarding" logger.error(msg) raise InvalidInput(msg) # update STAC catalog catalog['features'] = items logger.debug(f"STAC Output: {json.dumps(catalog)}") logger.debug(f"Items: {json.dumps(items)}") return catalog
def handler(payload, context={}): catalog = Catalog.from_payload(payload) logger = get_task_logger(f"{__name__}.sentinel-to-stac", catalog=catalog) items = [] # get metadata url = catalog['features'][0]['assets']['json']['href'].rstrip() # if this is the FREE URL, get s3 base if url[0:5] == 'https': base_url = 's3:/' + op.dirname(urlparse(url).path) else: base_url = op.dirname(url) # TODO - handle getting from s3 as well as http? # get metadata metadata = fetch_metadata(url, logger) #if 'tileDataGeometry' in metadata: # coords = metadata['tileDataGeometry'].get('coordinates', [[]]) # if len(coords) == 1 and len(coords[0]) == 0: # if empty list then drop tileDataGeometry, will try to get from L1C # metadata.pop('tileDataGeometry') # need to get cloud cover from sentinel-s2-l1c since missing from l2a so fetch and publish l1c as well try: _url = url.replace('sentinel-s2-l2a', 'sentinel-s2-l1c') l1c_metadata = fetch_metadata(_url, logger) except InvalidInput: l1c_metadata = None if l1c_metadata is not None: # tileDataGeometry in L2A but not in L1C if 'tileDataGeometry' not in l1c_metadata: if 'tileDataGeometry' in metadata: l1c_metadata['tileDataGeometry'] = metadata['tileDataGeometry'] else: msg = "sentinel-to-stac: no valid data geometry available" logger.error(msg, exc_info=True) raise InvalidInput(msg) try: _item = sentinel_s2_l1c( l1c_metadata, base_url.replace('sentinel-s2-l2a', 'sentinel-s2-l1c')) for a in ['thumbnail', 'info', 'metadata']: _item['assets'][a]['href'] = _item['assets'][a][ 'href'].replace('s3:/', 'https://roda.sentinel-hub.com') # if dataCoveragePercentage not in L1 data, try getting from L2 if 'dataCoveragePercentage' not in l1c_metadata and 'dataCoveragePercentage' in metadata: _item['properties']['sentinel:data_coverage'] = float( metadata['dataCoveragePercentage']) items.append(_item) except Exception as err: msg = f"sentinel-to-stac: failed creating L1C STAC ({err})" logger.error(msg, exc_info=True) raise InvalidInput(msg) # use L1C cloudyPixelPercentage metadata['cloudyPixelPercentage'] = l1c_metadata[ 'cloudyPixelPercentage'] # tileDataGeometry in L1C but not L2A if 'tileDataGeometry' not in metadata and 'tileDataGeometry' in l1c_metadata: metadata['tileDataGeometry'] = l1c_metadata['tileDataGeometry'] # tileDataGeometry not available if 'tileDataGeometry' not in metadata: msg = "sentinel-to-stac: no valid data geometry available" logger.error(msg) raise InvalidInput(msg) try: item = sentinel_s2_l2a(metadata, base_url) for a in ['thumbnail', 'info', 'metadata']: item['assets'][a]['href'] = item['assets'][a]['href'].replace( 's3:/', 'https://roda.sentinel-hub.com') if l1c_metadata is not None: item['properties']['sentinel:valid_cloud_cover'] = True else: item['properties']['sentinel:valid_cloud_cover'] = False items.append(item) except Exception as err: msg = f"sentinel-to-stac: failed creating STAC ({err})" logger.error(msg, exc_info=True) raise InvalidInput(msg) # discard if crossing antimeridian if item['bbox'][2] - item['bbox'][0] > 300: msg = "sentinel-to-stac: crosses antimeridian, discarding" logger.error(msg) raise InvalidInput(msg) # update STAC catalog catalog['features'] = items return catalog
def handler(payload, context): catalog = Catalog.from_payload(payload) return catalog