def get_s3_session(bucket: str=None, s3url: str=None, **kwargs) -> s3: """Get boto3-utils s3 class for interacting with an s3 bucket. A secret will be looked for with the name `cirrus-creds-<bucket-name>`. If no secret is found the default session will be used Args: bucket (str, optional): Bucket name to access. Defaults to None. url (str, optional): The s3 URL to access. Defaults to None. Returns: s3: A boto3-utils s3 class """ if s3url: parts = s3.urlparse(s3url) bucket = parts['bucket'] if bucket and bucket in s3_sessions: return s3_sessions[bucket] # otherwise, create new session for this bucket creds = deepcopy(kwargs) try: # get credentials from AWS secret secret_name = f"cirrus-creds-{bucket}" _creds = secrets.get_secret(secret_name) creds.update(_creds) logger.debug(f"Using credentials for bucket {bucket}: {json.dumps(creds)}") except ClientError: logger.debug(f"Using default credentials for bucket {bucket}") requester_pays = creds.pop('requester_pays', False) session = boto3.Session(**creds) s3_sessions[bucket] = s3(session, requester_pays=requester_pays) return s3_sessions[bucket]
def upload_item_assets(item: Dict, assets: List[str]=None, public_assets: List[str]=[], path_template: str='${collection}/${id}', s3_urls: bool=False, headers: Dict={}, s3_session: s3=None, **kwargs) -> Dict: """Upload Item assets to s3 bucket Args: item (Dict): STAC Item assets (List[str], optional): List of asset keys to upload. Defaults to None. public_assets (List[str], optional): List of assets keys that should be public. Defaults to []. path_template (str, optional): Path string template. Defaults to '${collection}/${id}'. s3_urls (bool, optional): Return s3 URLs instead of http URLs. Defaults to False. headers (Dict, optional): Dictionary of headers to set on uploaded assets. Defaults to {}. s3_session (s3, optional): boto3-utils s3 object for s3 interactions. Defaults to None Returns: Dict: A new STAC Item with uploaded assets pointing to newly uploaded file URLs """ # if assets not provided, upload all assets _assets = assets if assets is not None else item['assets'].keys() # determine which assets should be public if type(public_assets) is str and public_assets == 'ALL': public_assets = item['assets'].keys() # deepcopy of item _item = deepcopy(item) for key in [a for a in _assets if a in item['assets'].keys()]: asset = item['assets'][key] filename = asset['href'] if not op.exists(filename): logger.warning(f"Cannot upload {filename}: does not exist") continue public = True if key in public_assets else False _headers = {} if 'type' in asset: _headers['ContentType'] = asset['type'] _headers.update(headers) # output URL url = get_path(item, op.join(path_template, op.basename(filename))) # if output URL is relative, put it in the default data bucket if url[0:5] != 's3://': url = f"s3://{DATA_BUCKET}/{url}" parts = s3.urlparse(url) s3_session = get_s3_session(parts['bucket']) # upload logger.info(f"Uploading {filename} to {url}") url_out = s3_session.upload(filename, url, public=public, extra=_headers, http_url=not s3_urls) _item['assets'][key]['href'] = url_out return _item
def download_item_assets(item: Dict, path: str='', assets: Optional[List[str]]=None) -> Dict: """Download STAC Item assets to local filesystem Args: item (Dict): A STAC Item dictionary path (str, optional): Path to download files to. Defaults to current directory assets (Optional[List[str]], optional): List of asset keys to download. Defaults to all assets s3_session (s3, optional): boto3-utils s3 object for s3 interactions. Defaults to None Returns: Dict: A new STAC Item with downloaded assets pointing to newly downloaded files """ # if assets not provided, download all assets assets = assets if assets is not None else item['assets'].keys() _item = deepcopy(item) for a in assets: # download each asset url = item['assets'][a]['href'] logger.debug(f"Downloading {url}") # http URL to s3 source if 'amazonaws.com' in url: url = s3.https_to_s3(url) filename = None # s3 source if url.startswith('s3://'): parts = s3.urlparse(url) s3_session = get_s3_session(parts['bucket']) filename = s3_session.download(url, path=path) # general http source elif url.startswith('http'): filename = download_from_http(url, path=path) else: logger.error(f"Unknown protocol for {url}") # if downloaded update href in Item if filename: _item['assets'][a]['href'] = op.abspath(filename) return _item
def get_s3_session(bucket: str=None, s3url: str=None, **kwargs) -> s3: """Get boto3-utils s3 class for interacting with an s3 bucket. A secret will be looked for with the name `cirrus-creds-<bucket-name>`. If no secret is found the default session will be used Args: bucket (str, optional): Bucket name to access. Defaults to None. url (str, optional): The s3 URL to access. Defaults to None. Returns: s3: A boto3-utils s3 class """ if s3url: parts = s3.urlparse(s3url) bucket = parts['bucket'] if bucket and bucket in s3_sessions: return s3_sessions[bucket] # otherwise, create new session for this bucket creds = deepcopy(kwargs) try: # get credentials from AWS secret secret_name = f"cirrus-creds-{bucket}" _creds = secrets.get_secret(secret_name) creds.update(_creds) except ClientError as e: if e.response["Error"]["Code"] != "ResourceNotFoundException": # some other client error we cannot handle raise e logger.info(f"Secret not found, using default credentials: '{secret_name}'") requester_pays = creds.pop('requester_pays', False) session = boto3.Session(**creds) s3_sessions[bucket] = s3(session, requester_pays=requester_pays) return s3_sessions[bucket]
def test_urlparse_invalid(): with pytest.raises(Exception): s3.urlparse('invalid')
def test_urlparse_nokey(): parts = s3.urlparse('s3://bucket') assert (parts['bucket'] == 'bucket') assert (parts['key'] == '') assert (parts['filename'] == '')
def test_urlparse(): parts = s3.urlparse('s3://bucket/path') assert (parts['bucket'] == 'bucket') assert (parts['key'] == 'path') assert (parts['key'] == parts['filename'])