Example #1
0
def get_s3_session(bucket: str=None, s3url: str=None, **kwargs) -> s3:
    """Get boto3-utils s3 class for interacting with an s3 bucket. A secret will be looked for with the name
    `cirrus-creds-<bucket-name>`. If no secret is found the default session will be used

    Args:
        bucket (str, optional): Bucket name to access. Defaults to None.
        url (str, optional): The s3 URL to access. Defaults to None.

    Returns:
        s3: A boto3-utils s3 class
    """
    if s3url:
        parts = s3.urlparse(s3url)
        bucket = parts['bucket']

    if bucket and bucket in s3_sessions:
        return s3_sessions[bucket]
    # otherwise, create new session for this bucket
    creds = deepcopy(kwargs)
    
    try:
        # get credentials from AWS secret
        secret_name = f"cirrus-creds-{bucket}"
        _creds = secrets.get_secret(secret_name)
        creds.update(_creds)
        logger.debug(f"Using credentials for bucket {bucket}: {json.dumps(creds)}")
    except ClientError:
        logger.debug(f"Using default credentials for bucket {bucket}")

    requester_pays = creds.pop('requester_pays', False)
    session = boto3.Session(**creds)
    s3_sessions[bucket] = s3(session, requester_pays=requester_pays)
    return s3_sessions[bucket]
Example #2
0
def upload_item_assets(item: Dict, assets: List[str]=None, public_assets: List[str]=[], 
                       path_template: str='${collection}/${id}', s3_urls: bool=False,
                       headers: Dict={}, s3_session: s3=None, **kwargs) -> Dict:
    """Upload Item assets to s3 bucket

    Args:
        item (Dict): STAC Item
        assets (List[str], optional): List of asset keys to upload. Defaults to None.
        public_assets (List[str], optional): List of assets keys that should be public. Defaults to [].
        path_template (str, optional): Path string template. Defaults to '${collection}/${id}'.
        s3_urls (bool, optional): Return s3 URLs instead of http URLs. Defaults to False.
        headers (Dict, optional): Dictionary of headers to set on uploaded assets. Defaults to {}.
        s3_session (s3, optional): boto3-utils s3 object for s3 interactions. Defaults to None

    Returns:
        Dict: A new STAC Item with uploaded assets pointing to newly uploaded file URLs
    """
    # if assets not provided, upload all assets
    _assets = assets if assets is not None else item['assets'].keys()

    # determine which assets should be public
    if type(public_assets) is str and public_assets == 'ALL':
        public_assets = item['assets'].keys()

    # deepcopy of item
    _item = deepcopy(item)

    for key in [a for a in _assets if a in item['assets'].keys()]:
        asset = item['assets'][key]
        filename = asset['href']
        if not op.exists(filename):
            logger.warning(f"Cannot upload {filename}: does not exist")
            continue
        public = True if key in public_assets else False
        _headers = {}
        if 'type' in asset:
            _headers['ContentType'] = asset['type']
        _headers.update(headers)
        # output URL
        url = get_path(item, op.join(path_template, op.basename(filename)))
        # if output URL is relative, put it in the default data bucket
        if url[0:5] != 's3://':
            url = f"s3://{DATA_BUCKET}/{url}"
        parts = s3.urlparse(url)
        s3_session = get_s3_session(parts['bucket'])

        # upload
        logger.info(f"Uploading {filename} to {url}")
        url_out = s3_session.upload(filename, url, public=public, extra=_headers, http_url=not s3_urls)
        _item['assets'][key]['href'] = url_out
    return _item
Example #3
0
def download_item_assets(item: Dict, path: str='', assets: Optional[List[str]]=None) -> Dict:
    """Download STAC Item assets to local filesystem

    Args:
        item (Dict): A STAC Item dictionary
        path (str, optional): Path to download files to. Defaults to current directory
        assets (Optional[List[str]], optional): List of asset keys to download. Defaults to all assets
        s3_session (s3, optional): boto3-utils s3 object for s3 interactions. Defaults to None

    Returns:
        Dict: A new STAC Item with downloaded assets pointing to newly downloaded files
    """

    # if assets not provided, download all assets
    assets = assets if assets is not None else item['assets'].keys()

    _item = deepcopy(item)

    for a in assets:
        # download each asset
        url = item['assets'][a]['href']
        logger.debug(f"Downloading {url}")

        # http URL to s3 source
        if 'amazonaws.com' in url:
            url = s3.https_to_s3(url)

        filename = None
        # s3 source
        if url.startswith('s3://'):
            parts = s3.urlparse(url)
            s3_session = get_s3_session(parts['bucket'])
            filename = s3_session.download(url, path=path)
        # general http source
        elif url.startswith('http'):
            filename = download_from_http(url, path=path)
        else:
            logger.error(f"Unknown protocol for {url}")

        # if downloaded update href in Item
        if filename:
            _item['assets'][a]['href'] = op.abspath(filename)
    return _item
Example #4
0
def get_s3_session(bucket: str=None, s3url: str=None, **kwargs) -> s3:
    """Get boto3-utils s3 class for interacting with an s3 bucket. A secret will be looked for with the name
    `cirrus-creds-<bucket-name>`. If no secret is found the default session will be used

    Args:
        bucket (str, optional): Bucket name to access. Defaults to None.
        url (str, optional): The s3 URL to access. Defaults to None.

    Returns:
        s3: A boto3-utils s3 class
    """
    if s3url:
        parts = s3.urlparse(s3url)
        bucket = parts['bucket']

    if bucket and bucket in s3_sessions:
        return s3_sessions[bucket]
    # otherwise, create new session for this bucket
    creds = deepcopy(kwargs)

    try:
        # get credentials from AWS secret
        secret_name = f"cirrus-creds-{bucket}"
        _creds = secrets.get_secret(secret_name)
        creds.update(_creds)
    except ClientError as e:
        if e.response["Error"]["Code"] != "ResourceNotFoundException":
            # some other client error we cannot handle
            raise e
        logger.info(f"Secret not found, using default credentials: '{secret_name}'")


    requester_pays = creds.pop('requester_pays', False)
    session = boto3.Session(**creds)
    s3_sessions[bucket] = s3(session, requester_pays=requester_pays)
    return s3_sessions[bucket]
Example #5
0
def test_urlparse_invalid():
    with pytest.raises(Exception):
        s3.urlparse('invalid')
Example #6
0
def test_urlparse_nokey():
    parts = s3.urlparse('s3://bucket')
    assert (parts['bucket'] == 'bucket')
    assert (parts['key'] == '')
    assert (parts['filename'] == '')
Example #7
0
def test_urlparse():
    parts = s3.urlparse('s3://bucket/path')
    assert (parts['bucket'] == 'bucket')
    assert (parts['key'] == 'path')
    assert (parts['key'] == parts['filename'])