def list_dumps(): s3_base = get_s3_dump() s3 = boto3.client('s3') res = s3.list_objects_v2(Delimiter='/', **s3_base.kw(prefix=True)) return [ S3Path.from_key_parts(s3_base.bucket, d['Prefix']) for d in res['CommonPrefixes'] ]
def list_dumps(started=None, ended=None): """List all dumps, optionally filtered by their status. Parameters ---------- started : Optional[bool] If True, find dumps that have started. If False, find dumps that have NOT been started. If None, do not filter by start status. ended : Optional[bool] The same as `started`, but checking whether the dump is ended or not. Returns ------- list of S3Path objects Each S3Path object contains the bucket and key prefix information for a set of dump files, e.g. [S3Path(bigmech, indra-db/dumps/2020-07-16/), S3Path(bigmech, indra-db/dumps/2020-08-28/), S3Path(bigmech, indra-db/dumps/2020-09-18/), S3Path(bigmech, indra-db/dumps/2020-11-12/), S3Path(bigmech, indra-db/dumps/2020-11-13/)] """ # Get all the dump "directories". s3_base = get_s3_dump() s3 = boto3.client('s3') res = s3.list_objects_v2(Delimiter='/', **s3_base.kw(prefix=True)) if res['KeyCount'] == 0: return [] dumps = [ S3Path.from_key_parts(s3_base.bucket, d['Prefix']) for d in res['CommonPrefixes'] ] # Filter to those that have "started" if started is not None: dumps = [ p for p in dumps if p.get_element_path(Start.file_name()).exists(s3) == started ] # Filter to those that have "ended" if ended is not None: dumps = [ p for p in dumps if p.get_element_path(End.file_name()).exists(s3) == ended ] return dumps