def lambda_handler(event, context): logger.debug(json.dumps(event)) # Read SQS event if 'Records' not in event: raise ValueError("Input not from SQS") # TODO: a large number of input collections will cause a timeout # find a way to process each input message, deleting it from the queue # any not processed before timeout will be retried on the next execution payloads = [] for record in [json.loads(r['body']) for r in event['Records']]: payload = json.loads(record['Message']) logger.debug('payload: %s', json.dumps(payload)) # expand payload_ids to full payloads if 'payload_ids' in payload: _payloads = ProcessPayloads.from_payload_ids( payload['payload_ids']) if 'process_update' in payload: logger.debug( "Process update: %s", json.dumps(payload['process_update']), ) for c in _payloads: c['process'] = dict_merge( c['process'], payload['process_update'], ) ProcessPayloads(_payloads).process(replace=True) elif payload.get('type', '') == 'Feature': # If Item, create ProcessPayload and # use default process for that collection if payload['collection'] not in PROCESSES.keys(): raise ValueError( "Default process not provided for " f"collection {payload['collection']}", ) payload_json = { 'type': 'FeatureCollection', 'features': [payload], 'process': PROCESSES[payload['collection']] } payloads.append(ProcessPayload(payload_json, update=True)) else: payloads.append(ProcessPayload(payload, update=True)) if len(payloads) > 0: _payloads = ProcessPayloads(payloads) _payloads.process() return len(payloads)
def parse_event(event): # return a tuple of: # - workflow input ProcessPayload object # - workflow output ProcessPayload object or None (if not success) # - status string # - error object try: if 'error' in event: logger.debug( 'looks like a payload with an error message, i.e., workflow-failed' ) return ( ProcessPayload.from_event(event), None, FAILED, event.get('error', {}), ) elif event.get('source', '') == "aws.states": status = event['detail']['status'] error = None if status == SUCCEEDED: pass elif status == FAILED: error = get_execution_error(event['detail']['executionArn']) elif status == ABORTED: pass elif status == TIMED_OUT: error = mk_error( 'TimedOutError', 'The step function execution timed out.', ) else: logger.warning('Unknown status: %s', status) return ( ProcessPayload(json.loads(event['detail']['input'])), ProcessPayload(json.loads(event['detail']['output'])) if event['detail'].get('output', None) else None, status, error, ) else: raise Exception() except Exception: logger.error('Unknown event: %s', json.dumps(event)) return None, None, None, None
def lambda_handler(event, context): payload = ProcessPayload.from_event(event) logger = get_task_logger("task.publish", payload=payload) config = payload.get_task('publish', {}) public = config.get('public', False) # additional SNS topics to publish to topics = config.get('sns', []) # these are the URLs to the canonical records on s3 s3urls = [] try: logger.debug("Publishing items to s3 and SNS") if API_URL is not None: link = { 'title': payload['id'], 'rel': 'via-cirrus', 'href': f"{API_URL}/catid/{payload['id']}" } logger.debug(json.dumps(link)) # add cirrus-source relation for item in payload['features']: item['links'].append(link) # publish to s3 s3urls = payload.publish_items_to_s3(DATA_BUCKET, public=public) # publish to Cirrus SNS publish topic payload.publish_items_to_sns() # Deprecated additional topics if PUBLISH_TOPICS: for topic in PUBLISH_TOPICS.split(','): payload.publish_items_to_sns(topic) for topic in topics: payload.publish_items_to_sns(topic) except Exception as err: msg = f"publish: failed publishing output items ({err})" logger.error(msg, exc_info=True) raise Exception(msg) from err try: # update job outputs in table statedb.set_outputs(payload['id'], outputs=s3urls) except Exception as err: msg = f"publish: failed setting statedb outputs ({err})" logger.error(msg, exc_info=True) raise Exception(msg) from err return payload
def test_get_items_by_properties(base_payload): base_payload['process']['item-queries'] = { 'test': { 'platform': 'sentinel-2b' }, 'empty-test': { 'platform': 'test-platform' } } payload = ProcessPayload.from_event(base_payload) assert payload.get_items_by_properties("test") == base_payload['features'] assert payload.get_items_by_properties("empty-test") == []
def test_get_item_by_properties(base_payload): base_payload['process']['item-queries'] = { 'feature1': { 'platform': 'sentinel-2b' }, 'feature2': { 'platform': 'test-platform' } } feature1 = copy.deepcopy(base_payload['features'][0]) feature2 = copy.deepcopy(base_payload['features'][0]) feature2['properties']['platform'] = 'test-platform' base_payload['features'] = [feature1, feature2] payload = ProcessPayload.from_event(base_payload) assert payload.get_item_by_properties("feature1") == feature1 assert payload.get_item_by_properties("feature2") == feature2
def lambda_handler(event, context): payload = ProcessPayload.from_event(event) logger = get_task_logger("task.pre-batch", payload=payload) url = f"s3://{PAYLOAD_BUCKET}/batch/{payload['id']}/{uuid.uuid1()}.json" try: # copy payload to s3 s3().upload_json(payload, url) logger.debug(f"Uploaded payload to {url}") return {'url': url} except Exception as err: msg = f"pre-batch: failed pre processing batch job for ({err})" logger.error(msg, exc_info=True) raise Exception(msg) from err
def lambda_handler(event, context={}): payload = ProcessPayload.from_event(event) logger = get_task_logger("task.copy-assets", payload=payload) # TODO - make this more general for more items/collections item = payload['features'][0] # collection=payload['collections'][0]) # configuration options config = payload.get_task('copy-assets', {}) outopts = payload.process.get('output_options', {}) # asset config assets = config.get('assets', item['assets'].keys()) drop_assets = config.get('drop_assets', []) # drop specified assets for asset in [a for a in drop_assets if a in item['assets'].keys()]: logger.debug(f'Dropping asset {asset}') item['assets'].pop(asset) if type(assets) is str and assets == 'ALL': assets = item['assets'].keys() # create temporary work directory tmpdir = mkdtemp() try: # copy specified assets _assets = [a for a in assets if a in item['assets'].keys()] for asset in _assets: item = download_item_assets(item, path=tmpdir, assets=[asset]) item = upload_item_assets(item, assets=[asset], **outopts) # replace item in payload payload['features'][0] = item except Exception as err: msg = f"copy-assets: failed processing {payload['id']} ({err})" logger.error(msg, exc_info=True) raise Exception(msg) from err finally: # remove work directory....very important for Lambdas! logger.debug('Removing work directory %s' % tmpdir) rmtree(tmpdir) return payload
def lambda_handler(event, context): if 'error' not in event: return ProcessPayload.from_event(event) error = event.get('error', {}) cause = json.loads(error['Cause']) logname = cause['Attempts'][-1]['Container']['LogStreamName'] try: error_type, error_msg = get_error_from_batch(logname) except Exception: # lambda does not currently support exeception chaining, # so we have to log the original exception separately logger.exception("Original exception:") raise Exception("Unable to get error log") exception_class = type(error_type, (Exception, ), {}) raise exception_class(error_msg)
def test_next_payloads_list_of_four(base_payload): length = 4 list_payload = copy.deepcopy(base_payload) list_payload['process'] = [base_payload['process']] * length # We should now have something like this: # # payload # process: # - wf1 # - wf2 # - wf3 # - wf4 payloads = list(ProcessPayload.from_event(list_payload).next_payloads()) # When we call next_payloads, we find one next payload (wf2) # with two to follow. So the length of the list returned should be # one, a process payload with a process array of length 3. assert len(payloads) == 1 assert payloads[0]['process'] == [base_payload['process']] * (length - 1)
def test_next_payloads_list_of_four_fork(base_payload): length = 3 list_payload = copy.deepcopy(base_payload) list_payload['process'] = [base_payload['process']] * length list_payload['process'][1] = [base_payload['process']] * 2 # We should now have something like this: # # payload # process: # - wf1 # - [ wf2a, wf2b] # - wf3 # - wf4 payloads = list(ProcessPayload.from_event(list_payload).next_payloads()) # When we call next_payloads, we find two next payloads # (wf2a and wf2b), each with two to follow. So the length of # the list returned should be two, each a process payload # with a process array of length 3. assert len(payloads) == 2 assert payloads[0]['process'] == [base_payload['process']] * (length - 1) assert payloads[1]['process'] == [base_payload['process']] * (length - 1)
def test_from_event(sqs_event): payload = ProcessPayload.from_event(sqs_event, update=True) assert len(payload['features']) == 1 assert payload['id'] == \ 'sentinel-s2-l2a-aws/workflow-publish-sentinel/tiles-17-H-QD-2020-11-3-0'
def test_next_payloads_no_list(base_payload): payloads = list(ProcessPayload.from_event(base_payload).next_payloads()) assert len(payloads) == 0
def test_next_payloads_list_of_one(base_payload): base_payload['process'] = [base_payload['process']] payloads = list(ProcessPayload.from_event(base_payload).next_payloads()) assert len(payloads) == 0
def lambda_handler(event, context={}): payload = ProcessPayload.from_event(event) logger = get_task_logger("task.add-preview", payload=payload) # get step configuration config = payload.get_task('add-preview', {}) outopts = payload.process.get('output_options', {}) assets = config.pop('assets', None) thumb = config.pop('thumbnail', False) config.pop('batch') if assets is None: msg = "add-preview: no asset specified for preview" logger.error(msg) raise Exception(msg) # create temporary work directory tmpdir = tempfile.mkdtemp() items = [] for item in payload['features']: # find asset to use for preview asset = None for a in assets: if a in item['assets']: asset = a break if asset is None: msg = "add-preview: no available asset for preview" logger.warning(msg) items.append(item) continue try: # keep original href href = item['assets'][asset]['href'] # download asset item = download_item_assets(item, path=tmpdir, assets=[asset]) filename = item['assets'][asset]['href'] # add preview to item item['assets']['preview'] = create_preview(filename, logger, **config) if thumb: # add thumbnail to item item['assets']['thumbnail'] = create_thumbnail( item['assets']['preview']['href'], logger) # put back original href item['assets'][asset]['href'] = href # upload these new assets item = upload_item_assets(item, assets=['preview', 'thumbnail'], **outopts) items.append(item) except Exception as err: msg = f"add-preview: failed creating preview/thumbnail ({err})" logger.error(msg, exc_info=True) # remove work directory....very important for Lambdas! shutil.rmtree(tmpdir) raise Exception(msg) from err # remove work directory....very important for Lambdas! shutil.rmtree(tmpdir) # return new items payload['features'] = items return payload
def test_sns_attributes(base_payload): payload = ProcessPayload(base_payload) attr = ProcessPayload.sns_attributes(payload['features'][0]) assert attr['cloud_cover']['StringValue'] == '51.56' assert attr['datetime']['StringValue'] == '2020-11-03T15:22:26Z'
def test_assign_collections(base_payload): payload = ProcessPayload(base_payload) payload['process']['output_options']['collections'] = {'test': '.*'} payload.assign_collections() assert payload['features'][0]['collection'] == 'test'
def lambda_handler(event, context={}): payload = ProcessPayload.from_event(event) logger = get_task_logger("task.convert-to-cog", payload=payload) # TODO - make this more general for more items/collections item = payload['features'][0] # collection=payload['collections'][0]) # configuration options config = payload.get_task('convert-to-cog', {}) outopts = payload.process.get('output_options', {}) assets = config.get('assets') # create temporary work directory tmpdir = mkdtemp() try: asset_keys = [a for a in assets if a in item['assets'].keys()] for asset in asset_keys: logger.info(f"Converting {asset} to COG") # download asset item = download_item_assets(item, path=tmpdir, assets=[asset]) # cogify fn = item['assets'][asset]['href'] fnout = cogify(fn, os.path.splitext(fn)[0] + '.tif', **assets[asset]) item['assets'][asset]['href'] = fnout item['assets'][asset]['type'] = "image/tiff; application=geotiff; profile=cloud-optimized" with rasterio.open(fnout) as src: item['assets'][asset]['proj:shape'] = src.shape item['assets'][asset]['proj:transform'] = src.transform # upload assets item = upload_item_assets(item, assets=[asset], **outopts) # cleanup files if os.path.exists(fn): os.remove(fn) if os.path.exists(fnout): os.remove(fnout) # add derived_from link links = [link['href'] for link in item['links'] if link['rel'] == 'self'] if len(links) == 1: # add derived from link item['links'].append({ 'title': 'Source STAC Item', 'rel': 'derived_from', 'href': links[0], 'type': 'application/json' }) # drop any specified assets for asset in [a for a in config.get('drop_assets', []) if a in item['assets'].keys()]: item['assets'].pop(asset) payload['features'][0] = item except CRSError as err: msg = f"convert-to-cog: invalid CRS ({err})" logger.error(msg, exc_info=True) raise InvalidInput(msg) except s3_sessions[list(s3_sessions)[0]].s3.exceptions.NoSuchKey as err: msg = f"convert-to-cog: failed fetching {asset} asset ({err})" logger.error(msg, exc_info=True) raise InvalidInput(msg) except Exception as err: msg = f"convert-to-cog: failed creating COGs ({err})" logger.error(msg, exc_info=True) raise Exception(msg) finally: # remove work directory....very important for Lambdas! logger.debug('Removing work directory %s' % tmpdir) rmtree(tmpdir) return payload
def test_update_payload(base_payload): del base_payload['id'] del base_payload['features'][0]['links'] payload = ProcessPayload(**base_payload, update=True) assert payload['id'] == \ "sentinel-s2-l2a/workflow-cog-archive/S2B_17HQD_20201103_0_L2A"
def test_open_payload(base_payload): payload = ProcessPayload(**base_payload) assert payload['id'] == \ "sentinel-s2-l2a/workflow-cog-archive/S2B_17HQD_20201103_0_L2A"
def test_next_payloads_chain_filter(chain_payload, chain_filter_payload): payloads = list(ProcessPayload(chain_payload, update=True).next_payloads()) assert len(payloads) == 1 assert not recursive_compare(payloads[0], chain_payload) assert recursive_compare(payloads[0], chain_filter_payload)