Ejemplo n.º 1
0
def lambda_handler(event, context):
    logger.debug(json.dumps(event))

    # Read SQS event
    if 'Records' not in event:
        raise ValueError("Input not from SQS")

    # TODO: a large number of input collections will cause a timeout
    # find a way to process each input message, deleting it from the queue
    # any not processed before timeout will be retried on the next execution
    payloads = []
    for record in [json.loads(r['body']) for r in event['Records']]:
        payload = json.loads(record['Message'])
        logger.debug('payload: %s', json.dumps(payload))
        # expand payload_ids to full payloads
        if 'payload_ids' in payload:
            _payloads = ProcessPayloads.from_payload_ids(
                payload['payload_ids'])
            if 'process_update' in payload:
                logger.debug(
                    "Process update: %s",
                    json.dumps(payload['process_update']),
                )
                for c in _payloads:
                    c['process'] = dict_merge(
                        c['process'],
                        payload['process_update'],
                    )
            ProcessPayloads(_payloads).process(replace=True)
        elif payload.get('type', '') == 'Feature':
            # If Item, create ProcessPayload and
            # use default process for that collection
            if payload['collection'] not in PROCESSES.keys():
                raise ValueError(
                    "Default process not provided for "
                    f"collection {payload['collection']}", )
            payload_json = {
                'type': 'FeatureCollection',
                'features': [payload],
                'process': PROCESSES[payload['collection']]
            }
            payloads.append(ProcessPayload(payload_json, update=True))
        else:
            payloads.append(ProcessPayload(payload, update=True))

    if len(payloads) > 0:
        _payloads = ProcessPayloads(payloads)
        _payloads.process()

    return len(payloads)
Ejemplo n.º 2
0
def parse_event(event):
    # return a tuple of:
    #   - workflow input ProcessPayload object
    #   - workflow output ProcessPayload object or None (if not success)
    #   - status string
    #   - error object
    try:
        if 'error' in event:
            logger.debug(
                'looks like a payload with an error message, i.e., workflow-failed'
            )
            return (
                ProcessPayload.from_event(event),
                None,
                FAILED,
                event.get('error', {}),
            )
        elif event.get('source', '') == "aws.states":
            status = event['detail']['status']
            error = None
            if status == SUCCEEDED:
                pass
            elif status == FAILED:
                error = get_execution_error(event['detail']['executionArn'])
            elif status == ABORTED:
                pass
            elif status == TIMED_OUT:
                error = mk_error(
                    'TimedOutError',
                    'The step function execution timed out.',
                )
            else:
                logger.warning('Unknown status: %s', status)
            return (
                ProcessPayload(json.loads(event['detail']['input'])),
                ProcessPayload(json.loads(event['detail']['output']))
                if event['detail'].get('output', None) else None,
                status,
                error,
            )
        else:
            raise Exception()
    except Exception:
        logger.error('Unknown event: %s', json.dumps(event))
        return None, None, None, None
Ejemplo n.º 3
0
def lambda_handler(event, context):
    payload = ProcessPayload.from_event(event)
    logger = get_task_logger("task.publish", payload=payload)

    config = payload.get_task('publish', {})
    public = config.get('public', False)
    # additional SNS topics to publish to
    topics = config.get('sns', [])

    # these are the URLs to the canonical records on s3
    s3urls = []

    try:
        logger.debug("Publishing items to s3 and SNS")

        if API_URL is not None:
            link = {
                'title': payload['id'],
                'rel': 'via-cirrus',
                'href': f"{API_URL}/catid/{payload['id']}"
            }
            logger.debug(json.dumps(link))
            # add cirrus-source relation
            for item in payload['features']:
                item['links'].append(link)

        # publish to s3
        s3urls = payload.publish_items_to_s3(DATA_BUCKET, public=public)

        # publish to Cirrus SNS publish topic
        payload.publish_items_to_sns()

        # Deprecated additional topics
        if PUBLISH_TOPICS:
            for topic in PUBLISH_TOPICS.split(','):
                payload.publish_items_to_sns(topic)

        for topic in topics:
            payload.publish_items_to_sns(topic)
    except Exception as err:
        msg = f"publish: failed publishing output items ({err})"
        logger.error(msg, exc_info=True)
        raise Exception(msg) from err

    try:
        # update job outputs in table
        statedb.set_outputs(payload['id'], outputs=s3urls)
    except Exception as err:
        msg = f"publish: failed setting statedb outputs ({err})"
        logger.error(msg, exc_info=True)
        raise Exception(msg) from err

    return payload
Ejemplo n.º 4
0
def test_get_items_by_properties(base_payload):
    base_payload['process']['item-queries'] = {
        'test': {
            'platform': 'sentinel-2b'
        },
        'empty-test': {
            'platform': 'test-platform'
        }
    }
    payload = ProcessPayload.from_event(base_payload)
    assert payload.get_items_by_properties("test") == base_payload['features']
    assert payload.get_items_by_properties("empty-test") == []
Ejemplo n.º 5
0
def test_get_item_by_properties(base_payload):
    base_payload['process']['item-queries'] = {
        'feature1': {
            'platform': 'sentinel-2b'
        },
        'feature2': {
            'platform': 'test-platform'
        }
    }
    feature1 = copy.deepcopy(base_payload['features'][0])
    feature2 = copy.deepcopy(base_payload['features'][0])
    feature2['properties']['platform'] = 'test-platform'
    base_payload['features'] = [feature1, feature2]
    payload = ProcessPayload.from_event(base_payload)
    assert payload.get_item_by_properties("feature1") == feature1
    assert payload.get_item_by_properties("feature2") == feature2
Ejemplo n.º 6
0
def lambda_handler(event, context):
    payload = ProcessPayload.from_event(event)
    logger = get_task_logger("task.pre-batch", payload=payload)

    url = f"s3://{PAYLOAD_BUCKET}/batch/{payload['id']}/{uuid.uuid1()}.json"

    try:
        # copy payload to s3
        s3().upload_json(payload, url)

        logger.debug(f"Uploaded payload to {url}")
        return {'url': url}
    except Exception as err:
        msg = f"pre-batch: failed pre processing batch job for ({err})"
        logger.error(msg, exc_info=True)
        raise Exception(msg) from err
Ejemplo n.º 7
0
def lambda_handler(event, context={}):
    payload = ProcessPayload.from_event(event)
    logger = get_task_logger("task.copy-assets", payload=payload)

    # TODO - make this more general for more items/collections
    item = payload['features'][0]  # collection=payload['collections'][0])

    # configuration options
    config = payload.get_task('copy-assets', {})
    outopts = payload.process.get('output_options', {})

    # asset config
    assets = config.get('assets', item['assets'].keys())
    drop_assets = config.get('drop_assets', [])
    # drop specified assets
    for asset in [a for a in drop_assets if a in item['assets'].keys()]:
        logger.debug(f'Dropping asset {asset}')
        item['assets'].pop(asset)
    if type(assets) is str and assets == 'ALL':
        assets = item['assets'].keys()

    # create temporary work directory
    tmpdir = mkdtemp()

    try:
        # copy specified assets
        _assets = [a for a in assets if a in item['assets'].keys()]

        for asset in _assets:
            item = download_item_assets(item, path=tmpdir, assets=[asset])

            item = upload_item_assets(item, assets=[asset], **outopts)

        # replace item in payload
        payload['features'][0] = item
    except Exception as err:
        msg = f"copy-assets: failed processing {payload['id']} ({err})"
        logger.error(msg, exc_info=True)
        raise Exception(msg) from err
    finally:
        # remove work directory....very important for Lambdas!
        logger.debug('Removing work directory %s' % tmpdir)
        rmtree(tmpdir)

    return payload
Ejemplo n.º 8
0
def lambda_handler(event, context):
    if 'error' not in event:
        return ProcessPayload.from_event(event)

    error = event.get('error', {})
    cause = json.loads(error['Cause'])
    logname = cause['Attempts'][-1]['Container']['LogStreamName']

    try:
        error_type, error_msg = get_error_from_batch(logname)
    except Exception:
        # lambda does not currently support exeception chaining,
        # so we have to log the original exception separately
        logger.exception("Original exception:")
        raise Exception("Unable to get error log")

    exception_class = type(error_type, (Exception, ), {})
    raise exception_class(error_msg)
Ejemplo n.º 9
0
def test_next_payloads_list_of_four(base_payload):
    length = 4
    list_payload = copy.deepcopy(base_payload)
    list_payload['process'] = [base_payload['process']] * length

    # We should now have something like this:
    #
    # payload
    #   process:
    #     - wf1
    #     - wf2
    #     - wf3
    #     - wf4
    payloads = list(ProcessPayload.from_event(list_payload).next_payloads())

    # When we call next_payloads, we find one next payload (wf2)
    # with two to follow. So the length of the list returned should be
    # one, a process payload with a process array of length 3.
    assert len(payloads) == 1
    assert payloads[0]['process'] == [base_payload['process']] * (length - 1)
Ejemplo n.º 10
0
def test_next_payloads_list_of_four_fork(base_payload):
    length = 3
    list_payload = copy.deepcopy(base_payload)
    list_payload['process'] = [base_payload['process']] * length
    list_payload['process'][1] = [base_payload['process']] * 2

    # We should now have something like this:
    #
    # payload
    #   process:
    #     - wf1
    #     - [ wf2a, wf2b]
    #     - wf3
    #     - wf4
    payloads = list(ProcessPayload.from_event(list_payload).next_payloads())

    # When we call next_payloads, we find two next payloads
    # (wf2a and wf2b), each with two to follow. So the length of
    # the list returned should be two, each a process payload
    # with a process array of length 3.
    assert len(payloads) == 2
    assert payloads[0]['process'] == [base_payload['process']] * (length - 1)
    assert payloads[1]['process'] == [base_payload['process']] * (length - 1)
Ejemplo n.º 11
0
def test_from_event(sqs_event):
    payload = ProcessPayload.from_event(sqs_event, update=True)
    assert len(payload['features']) == 1
    assert payload['id'] == \
        'sentinel-s2-l2a-aws/workflow-publish-sentinel/tiles-17-H-QD-2020-11-3-0'
Ejemplo n.º 12
0
def test_next_payloads_no_list(base_payload):
    payloads = list(ProcessPayload.from_event(base_payload).next_payloads())
    assert len(payloads) == 0
Ejemplo n.º 13
0
def test_next_payloads_list_of_one(base_payload):
    base_payload['process'] = [base_payload['process']]
    payloads = list(ProcessPayload.from_event(base_payload).next_payloads())
    assert len(payloads) == 0
Ejemplo n.º 14
0
def lambda_handler(event, context={}):
    payload = ProcessPayload.from_event(event)
    logger = get_task_logger("task.add-preview", payload=payload)

    # get step configuration
    config = payload.get_task('add-preview', {})
    outopts = payload.process.get('output_options', {})
    assets = config.pop('assets', None)
    thumb = config.pop('thumbnail', False)
    config.pop('batch')

    if assets is None:
        msg = "add-preview: no asset specified for preview"
        logger.error(msg)
        raise Exception(msg)

    # create temporary work directory
    tmpdir = tempfile.mkdtemp()
    items = []
    for item in payload['features']:
        # find asset to use for preview
        asset = None
        for a in assets:
            if a in item['assets']:
                asset = a
                break
        if asset is None:
            msg = "add-preview: no available asset for preview"
            logger.warning(msg)
            items.append(item)
            continue

        try:
            # keep original href
            href = item['assets'][asset]['href']

            # download asset
            item = download_item_assets(item, path=tmpdir, assets=[asset])
            filename = item['assets'][asset]['href']

            # add preview to item
            item['assets']['preview'] = create_preview(filename, logger,
                                                       **config)
            if thumb:
                # add thumbnail to item
                item['assets']['thumbnail'] = create_thumbnail(
                    item['assets']['preview']['href'], logger)

            # put back original href
            item['assets'][asset]['href'] = href

            # upload these new assets
            item = upload_item_assets(item,
                                      assets=['preview', 'thumbnail'],
                                      **outopts)
            items.append(item)
        except Exception as err:
            msg = f"add-preview: failed creating preview/thumbnail ({err})"
            logger.error(msg, exc_info=True)
            # remove work directory....very important for Lambdas!
            shutil.rmtree(tmpdir)
            raise Exception(msg) from err

    # remove work directory....very important for Lambdas!
    shutil.rmtree(tmpdir)

    # return new items
    payload['features'] = items
    return payload
Ejemplo n.º 15
0
def test_sns_attributes(base_payload):
    payload = ProcessPayload(base_payload)
    attr = ProcessPayload.sns_attributes(payload['features'][0])
    assert attr['cloud_cover']['StringValue'] == '51.56'
    assert attr['datetime']['StringValue'] == '2020-11-03T15:22:26Z'
Ejemplo n.º 16
0
def test_assign_collections(base_payload):
    payload = ProcessPayload(base_payload)
    payload['process']['output_options']['collections'] = {'test': '.*'}
    payload.assign_collections()
    assert payload['features'][0]['collection'] == 'test'
Ejemplo n.º 17
0
def lambda_handler(event, context={}):
    payload = ProcessPayload.from_event(event)
    logger = get_task_logger("task.convert-to-cog", payload=payload)

    # TODO - make this more general for more items/collections
    item = payload['features'][0]  # collection=payload['collections'][0])

    # configuration options
    config = payload.get_task('convert-to-cog', {})
    outopts = payload.process.get('output_options', {})
    assets = config.get('assets')

    # create temporary work directory
    tmpdir = mkdtemp()

    try:
        asset_keys = [a for a in assets if a in item['assets'].keys()]

        for asset in asset_keys:
            logger.info(f"Converting {asset} to COG")
            # download asset
            item = download_item_assets(item, path=tmpdir, assets=[asset])

            # cogify
            fn = item['assets'][asset]['href']
            fnout = cogify(fn, os.path.splitext(fn)[0] + '.tif', **assets[asset])
            item['assets'][asset]['href'] = fnout
            item['assets'][asset]['type'] = "image/tiff; application=geotiff; profile=cloud-optimized"
            with rasterio.open(fnout) as src:
                item['assets'][asset]['proj:shape'] = src.shape
                item['assets'][asset]['proj:transform'] = src.transform

            # upload assets
            item = upload_item_assets(item, assets=[asset], **outopts)
            # cleanup files
            if os.path.exists(fn):
                os.remove(fn)
            if os.path.exists(fnout):
                os.remove(fnout)

        # add derived_from link
        links = [link['href'] for link in item['links'] if link['rel'] == 'self']
        if len(links) == 1:
            # add derived from link
            item['links'].append({
                'title': 'Source STAC Item',
                'rel': 'derived_from',
                'href': links[0],
                'type': 'application/json'
            })

        # drop any specified assets
        for asset in [a for a in config.get('drop_assets', []) if a in item['assets'].keys()]:
            item['assets'].pop(asset)

        payload['features'][0] = item
    except CRSError as err:
        msg = f"convert-to-cog: invalid CRS ({err})"
        logger.error(msg, exc_info=True)
        raise InvalidInput(msg)
    except s3_sessions[list(s3_sessions)[0]].s3.exceptions.NoSuchKey as err:
        msg = f"convert-to-cog: failed fetching {asset} asset ({err})"
        logger.error(msg, exc_info=True)
        raise InvalidInput(msg)
    except Exception as err:
        msg = f"convert-to-cog: failed creating COGs ({err})"
        logger.error(msg, exc_info=True)
        raise Exception(msg)
    finally:
        # remove work directory....very important for Lambdas!
        logger.debug('Removing work directory %s' % tmpdir)
        rmtree(tmpdir)

    return payload
Ejemplo n.º 18
0
def test_update_payload(base_payload):
    del base_payload['id']
    del base_payload['features'][0]['links']
    payload = ProcessPayload(**base_payload, update=True)
    assert payload['id'] == \
        "sentinel-s2-l2a/workflow-cog-archive/S2B_17HQD_20201103_0_L2A"
Ejemplo n.º 19
0
def test_open_payload(base_payload):
    payload = ProcessPayload(**base_payload)
    assert payload['id'] == \
        "sentinel-s2-l2a/workflow-cog-archive/S2B_17HQD_20201103_0_L2A"
Ejemplo n.º 20
0
def test_next_payloads_chain_filter(chain_payload, chain_filter_payload):
    payloads = list(ProcessPayload(chain_payload, update=True).next_payloads())
    assert len(payloads) == 1
    assert not recursive_compare(payloads[0], chain_payload)
    assert recursive_compare(payloads[0], chain_filter_payload)