def scanb(start="", end=None): bucket_start, first_bucket_key_prefix = bucket_and_key(start) dbg("start b:{} k:{}", bucket_start, first_bucket_key_prefix if first_bucket_key_prefix else "(unbounded)") bucket_end = None last_bucket_key_end = None if end: bucket_end, last_bucket_key_end = bucket_and_key(end) if not start <= end: err("start must be lexically before end") dbg("end b:{} k:{}", bucket_end, last_bucket_key_end if last_bucket_key_end else "(unbounded)") s3 = boto3.resource("s3") def bucket_gen(): _buckets = s3.buckets.all() for _bucket in _buckets: if _bucket.name < bucket_start: continue if bucket_end is not None and _bucket.name >= bucket_end: break yield _bucket def returned_generator(): buckets = list(bucket_gen()) for index, bucket in enumerate(buckets): key_start = first_bucket_key_prefix if index == 0 else "" key_end = last_bucket_key_end if index == len(buckets) - 1 else None yield scank(bucket.name, key_start, key_end) return magic.flatten(returned_generator())
def returned_gen(): list_objects = s3.get_paginator("list_objects") response_iterator = list_objects.paginate(Bucket=bucket_name, Prefix=key_prefix, Delimiter="/") try: for entry in magic.flatten(map(generate_objs, response_iterator)): yield entry except SignalUnslashedDirectory: dbg("Saw unslashed directory at {}. Restarting paginator.", key_prefix) response_iterator = list_objects.paginate( Bucket=bucket_name, Prefix=key_prefix + "/", # Step into that dir! The user left off the trailing '/'. Delimiter="/", ) for entry in magic.flatten(map(generate_objs, response_iterator)): yield entry