Ejemplo n.º 1
0
async def _s3_find_via_cbk(url, cbk, s3, pred=None, glob=None):
    """ List all objects under certain path

        each s3 object is represented by a SimpleNamespace with attributes:
        - url
        - size
        - last_modified
        - etag
    """
    pred = norm_predicate(pred=pred, glob=glob)

    bucket, prefix = s3_url_parse(url)

    if len(prefix) > 0 and not prefix.endswith('/'):
        prefix = prefix + '/'

    pp = s3.get_paginator('list_objects_v2')

    n_total, n = 0, 0

    async for o in pp.paginate(Bucket=bucket, Prefix=prefix):
        for f in o.get('Contents', []):
            n_total += 1
            f = s3_file_info(f, bucket)
            if pred is None or pred(f):
                n += 1
                await cbk(f)

    return n_total, n
Ejemplo n.º 2
0
async def s3_dir(url, s3, pred=None, glob=None):
    """ List s3 "directory" without descending into sub directories.

        pred: predicate for file objects file_info -> True|False
        glob: glob pattern for files only

        Returns: (dirs, files)

        where
          dirs -- list of subdirectories in `s3://bucket/path/` format

          files -- list of objects with attributes: url, size, last_modified, etag
    """
    bucket, prefix = s3_url_parse(url)
    pred = norm_predicate(pred=pred, glob=glob)

    if len(prefix) > 0 and not prefix.endswith('/'):
        prefix = prefix + '/'

    pp = s3.get_paginator('list_objects_v2')

    _dirs = []
    _files = []

    async for o in pp.paginate(Bucket=bucket, Prefix=prefix, Delimiter='/'):
        for d in o.get('CommonPrefixes', []):
            d = d.get('Prefix')
            _dirs.append('s3://{}/{}'.format(bucket, d))
        for f in o.get('Contents', []):
            f = s3_file_info(f, bucket)
            if pred is None or pred(f):
                _files.append(f)

    return _dirs, _files