async def _s3_find_via_cbk(url, cbk, s3, pred=None, glob=None): """ List all objects under certain path each s3 object is represented by a SimpleNamespace with attributes: - url - size - last_modified - etag """ pred = norm_predicate(pred=pred, glob=glob) bucket, prefix = s3_url_parse(url) if len(prefix) > 0 and not prefix.endswith('/'): prefix = prefix + '/' pp = s3.get_paginator('list_objects_v2') n_total, n = 0, 0 async for o in pp.paginate(Bucket=bucket, Prefix=prefix): for f in o.get('Contents', []): n_total += 1 f = s3_file_info(f, bucket) if pred is None or pred(f): n += 1 await cbk(f) return n_total, n
async def s3_dir(url, s3, pred=None, glob=None): """ List s3 "directory" without descending into sub directories. pred: predicate for file objects file_info -> True|False glob: glob pattern for files only Returns: (dirs, files) where dirs -- list of subdirectories in `s3://bucket/path/` format files -- list of objects with attributes: url, size, last_modified, etag """ bucket, prefix = s3_url_parse(url) pred = norm_predicate(pred=pred, glob=glob) if len(prefix) > 0 and not prefix.endswith('/'): prefix = prefix + '/' pp = s3.get_paginator('list_objects_v2') _dirs = [] _files = [] async for o in pp.paginate(Bucket=bucket, Prefix=prefix, Delimiter='/'): for d in o.get('CommonPrefixes', []): d = d.get('Prefix') _dirs.append('s3://{}/{}'.format(bucket, d)) for f in o.get('Contents', []): f = s3_file_info(f, bucket) if pred is None or pred(f): _files.append(f) return _dirs, _files