Exemple #1
0
def handler(payload, context={}, local=None):
    """ Handle Cirrus payload (STAC Process Catalog) """
    # get catalog
    catalog = Catalog.from_payload(payload)

    # configure logger
    logger = get_task_logger(f"{__name__}.{TASK_NAME}", catalog=catalog)

    # these are any optional parameter provided for this task
    config = catalog['process']['tasks'].get(TASK_NAME, {})
    # these are general options used when uploading output data to s3
    outopts = payload['process'].get('output_options', {})

    # validation - add specific checks on input
    # e.g., if task operates on one and only Item use this:
    assert (len(catalog['features']) == 1)
    item = catalog['features'][0]

    # create temporary work directory if not running locally
    tmpdir = mkdtemp() if local is None else local
    outpath = op.join(tmpdir, 'output')
    os.makedirs(outpath, exist_ok=True)

    try:
        # main logic - replace with own
        # download asset, e.g. a thumbnail
        item = download_item_assets(item, path=outpath, assets=['thumbnail'])

        # do something, e.g. modify asset, create new asset
        # item['assets']['asset2'] = create_new_asset(item)

        # upload new assets
        if local is not None:
            item = upload_item_assets(item, assets=['asset2'], **outopts)

        # recommended to add derived_from link
        links = [l['href'] for l in item['links'] if l['rel'] == 'self']
        if len(links) == 1:
            # add derived from link
            item['links'].append({
                'title': 'Source STAC Item',
                'rel': 'derived_from',
                'href': links[0],
                'type': 'application/json'
            })

        catalog['features'][0] = item

    except Exception as err:
        msg = f"**task** failed: {err}"
        logger.error(msg, exc_info=True)
        raise Exception(msg)
    finally:
        # remove work directory if not running locally
        if local is None:
            logger.debug('Removing work directory %s' % tmpdir)
            rmtree(tmpdir)

    return catalog
Exemple #2
0
def handler(payload, context):
    catalog = Catalog.from_payload(payload)
    logger = get_task_logger(f"{__name__}.publish", catalog=catalog)

    config = catalog['process']['tasks'].get('publish', {})
    public = config.get('public', False)
    # additional SNS topics to publish to
    topics = config.get('sns', [])

    # these are the URLs to the canonical records on s3
    s3urls = []

    try:
        logger.debug("Publishing items to s3 and SNS")

        if API_URL is not None:
            link = {
                'title': catalog['id'],
                'rel': 'via-cirrus',
                'href': f"{API_URL}/catid/{catalog['id']}"
            }
            logger.debug(json.dumps(link))
            # add cirrus-source relation
            for item in catalog['features']:
                item['links'].append(link)

        # publish to s3
        s3urls = catalog.publish_to_s3(DATA_BUCKET, public=public)

        # publish to Cirrus SNS publish topic
        catalog.publish_to_sns()

        # Deprecated additional topics
        if PUBLISH_TOPICS:
            for t in PUBLISH_TOPICS.split(','):
                catalog.publish_to_sns(t)

        for t in topics:
            catalog.publish_to_sns(t)
    except Exception as err:
        msg = f"publish: failed publishing output items ({err})"
        logger.error(msg, exc_info=True)
        raise Exception(msg) from err

    try:
        # update processing in table
        statedb.set_completed(catalog['id'], outputs=s3urls)
    except Exception as err:
        msg = f"publish: failed setting as complete ({err})"
        logger.error(msg, exc_info=True)
        raise Exception(msg) from err

    return catalog
Exemple #3
0
def handler(payload, context):
    catalog = Catalog.from_payload(payload)
    logger = get_task_logger(f"{__name__}.pre-batch", catalog=catalog)

    url = f"s3://{CATALOG_BUCKET}/batch/{catalog['id']}/{uuid.uuid1()}.json"

    try:
        # copy payload to s3
        s3().upload_json(catalog, url)

        logger.debug(f"Uploaded catalog to {url}")
        return {'url': url}
    except Exception as err:
        msg = f"pre-batch: failed pre processing batch job for ({err})"
        logger.error(msg, exc_info=True)
        raise Exception(msg) from err
Exemple #4
0
def handler(payload, context={}):
    catalog = Catalog.from_payload(payload)
    logger = get_task_logger(f"{__name__}.copy-assets", catalog=catalog)

    # TODO - make this more general for more items/collections
    item = catalog['features'][0]  #, collection=catalog['collections'][0])

    # configuration options
    config = catalog['process']['tasks'].get('copy-assets', {})
    outopts = catalog['process'].get('output_options', {})

    # asset config
    assets = config.get('assets', item['assets'].keys())
    drop_assets = config.get('drop_assets', [])
    # drop specified assets
    for asset in [a for a in drop_assets if a in item['assets'].keys()]:
        logger.debug(f'Dropping asset {asset}')
        item['assets'].pop(asset)
    if type(assets) is str and assets == 'ALL':
        assets = item['assets'].keys()

    # create temporary work directory
    tmpdir = mkdtemp()

    try:
        # copy specified assets
        _assets = [a for a in assets if a in item['assets'].keys()]

        for asset in _assets:
            item = download_item_assets(item, path=tmpdir, assets=[asset])

            item = upload_item_assets(item, assets=[asset], **outopts)

        # replace item in catalog
        catalog['features'][0] = item
    except Exception as err:
        msg = f"copy-assets: failed processing {catalog['id']} ({err})"
        logger.error(msg, exc_info=True)
        raise Exception(msg) from err
    finally:
        # remove work directory....very important for Lambdas!
        logger.debug('Removing work directory %s' % tmpdir)
        rmtree(tmpdir)

    return catalog
Exemple #5
0
def handler(payload, context):
    catalog = Catalog.from_payload(payload)
    logger = get_task_logger(f"{__name__}.workflow-failed", catalog=catalog)

    # parse errors
    error = payload.get('error', {})

    # error type
    error_type = error.get('Error', "unknown")

    # check if cause is JSON
    try:
        cause = json.loads(error['Cause'])
        error_msg = 'unknown'
        if 'errorMessage' in cause:
            error_msg = cause.get('errorMessage', 'unknown')
        elif 'Attempts' in cause:
            try:
                # batch
                reason = cause['Attempts'][-1]['StatusReason']
                if 'Essential container in task exited' in reason:
                    # get the message from batch logs
                    logname = cause['Attempts'][-1]['Container'][
                        'LogStreamName']
                    error_type, error_msg = get_error_from_batch(logname)
            except Exception as err:
                logger.error(err, exc_info=True)
    except Exception:
        error_msg = error['Cause']

    error = f"{error_type}: {error_msg}"
    logger.info(error)

    try:
        if error_type == "InvalidInput":
            statedb.set_invalid(catalog['id'], error)
        else:
            statedb.set_failed(catalog['id'], error)
    except Exception as err:
        msg = f"Failed marking as failed: {err}"
        logger.error(msg, exc_info=True)
        raise err

    if FAILED_TOPIC_ARN is not None:
        try:
            item = statedb.dbitem_to_item(statedb.get_dbitem(catalog['id']))
            attrs = {
                'collections': {
                    'DataType': 'String',
                    'StringValue': item['collections']
                },
                'workflow': {
                    'DataType': 'String',
                    'StringValue': item['workflow']
                },
                'error': {
                    'DataType': 'String',
                    'StringValue': error
                }
            }
            logger.debug(f"Publishing item to {FAILED_TOPIC_ARN}")
            SNS_CLIENT.publish(TopicArn=FAILED_TOPIC_ARN,
                               Message=json.dumps(item),
                               MessageAttributes=attrs)
        except Exception as err:
            msg = f"Failed publishing to {FAILED_TOPIC_ARN}: {err}"
            logger.error(msg, exc_info=True)
            raise err

    return catalog
Exemple #6
0
def handler(payload, context={}):
    catalog = Catalog.from_payload(payload)
    logger = get_task_logger(f"{__name__}.add-preview", catalog=catalog)

    # get step configuration
    config = catalog['process']['tasks'].get('add-preview', {})
    outopts = catalog['process'].get('output_options', {})
    assets = config.pop('assets', None)
    thumb = config.pop('thumbnail', False)
    config.pop('batch')

    if assets is None:
        msg = f"add-preview: no asset specified for preview"
        logger.error(msg)
        raise Exception(msg)

    # create temporary work directory
    tmpdir = tempfile.mkdtemp()
    items = []
    for item in catalog['features']:
        # find asset to use for preview
        asset = None
        for a in assets:
            if a in item['assets']:
                asset = a
                break
        if asset is None:
            msg = f"add-preview: no available asset for preview"
            logger.warning(msg)
            items.append(item)
            continue

        try:
            # keep original href
            href = item['assets'][asset]['href']

            # download asset
            item = download_item_assets(item, path=tmpdir, assets=[asset])
            filename = item['assets'][asset]['href']

            # add preview to item
            item['assets']['preview'] = create_preview(filename, logger,
                                                       **config)
            if thumb:
                # add thumbnail to item
                item['assets']['thumbnail'] = create_thumbnail(
                    item['assets']['preview']['href'], logger)

            # put back original href
            item['assets'][asset]['href'] = href

            # upload these new assets
            item = upload_item_assets(item,
                                      assets=['preview', 'thumbnail'],
                                      **outopts)
            items.append(item)
        except Exception as err:
            msg = f"add-preview: failed creating preview/thumbnail ({err})"
            logger.error(msg, exc_info=True)
            # remove work directory....very important for Lambdas!
            shutil.rmtree(tmpdir)
            raise Exception(msg) from err

    # remove work directory....very important for Lambdas!
    shutil.rmtree(tmpdir)

    # return new items
    catalog['features'] = items
    return catalog
Exemple #7
0
def handler(payload, context={}):
    catalog = Catalog.from_payload(payload)
    logger = get_task_logger(f"{__name__}.convert-to-cog", catalog=catalog)

    # TODO - make this more general for more items/collections
    item = catalog['features'][0]  #, collection=catalog['collections'][0])

    # configuration options
    config = catalog['process']['tasks'].get('convert-to-cog', {})
    outopts = catalog['process'].get('output_options', {})
    assets = config.get('assets')

    # create temporary work directory
    tmpdir = mkdtemp()

    try:
        asset_keys = [a for a in assets if a in item['assets'].keys()]

        for asset in asset_keys:
            logger.info(f"Converting {asset} to COG")
            # download asset
            item = download_item_assets(item, path=tmpdir, assets=[asset])

            # cogify
            fn = item['assets'][asset]['href']
            fnout = cogify(fn, op.splitext(fn)[0] + '.tif', **assets[asset])
            item['assets'][asset]['href'] = fnout
            item['assets'][asset][
                'type'] = "image/tiff; application=geotiff; profile=cloud-optimized"
            with rasterio.open(fnout) as src:
                item['assets'][asset]['proj:shape'] = src.shape
                item['assets'][asset]['proj:transform'] = src.transform

            # upload assets
            item = upload_item_assets(item, assets=[asset], **outopts)
            # cleanup files
            if op.exists(fn):
                remove(fn)
            if op.exists(fnout):
                remove(fnout)

        # add derived_from link
        links = [l['href'] for l in item['links'] if l['rel'] == 'self']
        if len(links) == 1:
            # add derived from link
            item['links'].append({
                'title': 'Source STAC Item',
                'rel': 'derived_from',
                'href': links[0],
                'type': 'application/json'
            })

        # drop any specified assets
        for asset in [
                a for a in config.get('drop_assets', [])
                if a in item['assets'].keys()
        ]:
            item['assets'].pop(asset)

        catalog['features'][0] = item
    except CRSError as err:
        msg = f"convert-to-cog: invalid CRS ({err})"
        logger.error(msg, exc_info=True)
        raise InvalidInput(msg)
    except s3_sessions[list(s3_sessions)[0]].s3.exceptions.NoSuchKey as err:
        msg = f"convert-to-cog: failed fetching {asset} asset ({err})"
        logger.error(msg, exc_info=True)
        raise InvalidInput(msg)
    except Exception as err:
        msg = f"convert-to-cog: failed creating COGs ({err})"
        logger.error(msg, exc_info=True)
        raise Exception(msg)
    finally:
        # remove work directory....very important for Lambdas!
        logger.debug('Removing work directory %s' % tmpdir)
        rmtree(tmpdir)

    return catalog
Exemple #8
0
def handler(payload, context={}):
    catalog = Catalog.from_payload(payload)

    logger = get_task_logger(f"{__name__}.sentinel-to-stac", catalog=catalog)

    items = []
    # get metadata
    url = catalog['features'][0]['assets']['json']['href'].rstrip()
    # if this is the FREE URL, get s3 base
    if url[0:5] == 'https':
        base_url = 's3:/' + op.dirname(urlparse(url).path)
    else:
        base_url = op.dirname(url)

    # TODO - handle getting from s3 as well as http?
    # get metadata
    metadata = fetch_metadata(url, logger)

    #if 'tileDataGeometry' in metadata:
    #    coords = metadata['tileDataGeometry'].get('coordinates', [[]])
    #    if len(coords) == 1 and len(coords[0]) == 0:
    # if empty list then drop tileDataGeometry, will try to get from L1C
    #        metadata.pop('tileDataGeometry')

    # need to get cloud cover from sentinel-s2-l1c since missing from l2a so fetch and publish l1c as well
    try:
        _url = url.replace('sentinel-s2-l2a', 'sentinel-s2-l1c')
        l1c_metadata = fetch_metadata(_url, logger)
    except InvalidInput:
        l1c_metadata = None

    if l1c_metadata is not None:
        # tileDataGeometry in L2A but not in L1C
        if 'tileDataGeometry' not in l1c_metadata:
            if 'tileDataGeometry' in metadata:
                l1c_metadata['tileDataGeometry'] = metadata['tileDataGeometry']
            else:
                msg = "sentinel-to-stac: no valid data geometry available"
                logger.error(msg, exc_info=True)
                raise InvalidInput(msg)

        try:
            _item = sentinel_s2_l1c(
                l1c_metadata,
                base_url.replace('sentinel-s2-l2a', 'sentinel-s2-l1c'))
            for a in ['thumbnail', 'info', 'metadata']:
                _item['assets'][a]['href'] = _item['assets'][a][
                    'href'].replace('s3:/', 'https://roda.sentinel-hub.com')
            # if dataCoveragePercentage not in L1 data, try getting from L2
            if 'dataCoveragePercentage' not in l1c_metadata and 'dataCoveragePercentage' in metadata:
                _item['properties']['sentinel:data_coverage'] = float(
                    metadata['dataCoveragePercentage'])
            items.append(_item)
        except Exception as err:
            msg = f"sentinel-to-stac: failed creating L1C STAC ({err})"
            logger.error(msg, exc_info=True)
            raise InvalidInput(msg)

        # use L1C cloudyPixelPercentage
        metadata['cloudyPixelPercentage'] = l1c_metadata[
            'cloudyPixelPercentage']

        # tileDataGeometry in L1C but not L2A
        if 'tileDataGeometry' not in metadata and 'tileDataGeometry' in l1c_metadata:
            metadata['tileDataGeometry'] = l1c_metadata['tileDataGeometry']

    # tileDataGeometry not available
    if 'tileDataGeometry' not in metadata:
        msg = "sentinel-to-stac: no valid data geometry available"
        logger.error(msg)
        raise InvalidInput(msg)

    try:
        item = sentinel_s2_l2a(metadata, base_url)
        for a in ['thumbnail', 'info', 'metadata']:
            item['assets'][a]['href'] = item['assets'][a]['href'].replace(
                's3:/', 'https://roda.sentinel-hub.com')

        if l1c_metadata is not None:
            item['properties']['sentinel:valid_cloud_cover'] = True
        else:
            item['properties']['sentinel:valid_cloud_cover'] = False
        items.append(item)
    except Exception as err:
        msg = f"sentinel-to-stac: failed creating STAC ({err})"
        logger.error(msg, exc_info=True)
        raise InvalidInput(msg)

    # discard if crossing antimeridian
    if item['bbox'][2] - item['bbox'][0] > 300:
        msg = "sentinel-to-stac: crosses antimeridian, discarding"
        logger.error(msg)
        raise InvalidInput(msg)

    # update STAC catalog
    catalog['features'] = items

    return catalog