예제 #1
0
def list_dumps():
    s3_base = get_s3_dump()
    s3 = boto3.client('s3')
    res = s3.list_objects_v2(Delimiter='/', **s3_base.kw(prefix=True))
    return [
        S3Path.from_key_parts(s3_base.bucket, d['Prefix'])
        for d in res['CommonPrefixes']
    ]
예제 #2
0
def list_dumps(started=None, ended=None):
    """List all dumps, optionally filtered by their status.

    Parameters
    ----------
    started : Optional[bool]
        If True, find dumps that have started. If False, find dumps that have
        NOT been started. If None, do not filter by start status.
    ended : Optional[bool]
        The same as `started`, but checking whether the dump is ended or not.

    Returns
    -------
    list of S3Path objects
        Each S3Path object contains the bucket and key prefix information for
        a set of dump files, e.g.

            [S3Path(bigmech, indra-db/dumps/2020-07-16/),
             S3Path(bigmech, indra-db/dumps/2020-08-28/),
             S3Path(bigmech, indra-db/dumps/2020-09-18/),
             S3Path(bigmech, indra-db/dumps/2020-11-12/),
             S3Path(bigmech, indra-db/dumps/2020-11-13/)]
    """
    # Get all the dump "directories".
    s3_base = get_s3_dump()
    s3 = boto3.client('s3')
    res = s3.list_objects_v2(Delimiter='/', **s3_base.kw(prefix=True))
    if res['KeyCount'] == 0:
        return []
    dumps = [
        S3Path.from_key_parts(s3_base.bucket, d['Prefix'])
        for d in res['CommonPrefixes']
    ]

    # Filter to those that have "started"
    if started is not None:
        dumps = [
            p for p in dumps
            if p.get_element_path(Start.file_name()).exists(s3) == started
        ]

    # Filter to those that have "ended"
    if ended is not None:
        dumps = [
            p for p in dumps
            if p.get_element_path(End.file_name()).exists(s3) == ended
        ]

    return dumps