def du(summarize, human_readable, s3_paths): """ display disk usage statistics """ size_formatter = magic.human_bytes if human_readable else lambda s: s for s3_path in s3_paths: totals = {} # In recursive tallying of totals this key is the stop condition. stop_path = '/' + s3_util.bucket_and_key(s3_path)[1].rstrip('/') def tally(path_segment, size): if not summarize or path_segment == stop_path: # If the summarize option is present, we only care about the grand total. if path_segment not in totals: totals[path_segment] = 0 totals[path_segment] += size if path_segment != stop_path: parent_dir = path_segment.rsplit('/', 1)[0] if parent_dir == '': # Edge case when totalling the entire bucket. parent_dir = '/' tally(parent_dir, size) for obj in s3_util.keys(s3_path): # usage for all given paths, and recursively for directories (excludes individual files) dbg('adding {}', obj) dir_key = '/' + obj['key'].rsplit('/', 1)[0] tally(dir_key, obj['len']) for path, total in sorted(totals.items()): out('{}\t{}', size_formatter(total), path)
def keys(s3_paths): """ Prefix key scan. Outputs keys in the format s3://bucket/key """ if not len(s3_paths): s3_paths = [''] for s3_path in s3_paths: for obj in s3_util.keys(s3_path): out('s3://{}/{}', obj['bucket'], obj['key'])
def scank(bucket, start, end): """ Lexical scan within a bucket. This scans over keys within a particular bucket where each key k: START_KEY_OR_PREFIX <= k < END_KEY_OR_PREFIX Outputs keys in the format s3://bucket/key Example: Scan all keys in mybucket that are lexically equal or greater than 'folder/a' to the end of the bucket. zs3 scank mybucket folder/a Authors note: This is particularly useful if you have keyed objects with a date component (e.g. mybucket/logs/2015-05-10). You could list a range say for everything that happened in 2014 and 2015 with `zs3 scank mybucket/logs/2014-01-01 /logs/2016-01-01` """ for obj in s3_util.scank(bucket, start, end): out('s3://{}/{}', obj['bucket'], obj['key'])
def scanb(start, end): """ Lexical scan across buckets. This scan traverses across multiple buckets for keys where each key k: START_KEY_OR_PREFIX <= k < END_KEY_OR_PREFIX Outputs keys in the format s3://bucket/key Example: Scan all keys that are lexically equal or greater than 'mybucket_a/logs/' and less than 'mybucket_c/logs/2016'. If there are three buckets mybucket_a, mybucket_b, and mybucket_c, this lists everything in mybucket_a equal to or greater than logs/, everything in mybucket_b, and all keys less than 'logs/2016' in mybucket_c. zs3 scanb mybucket_a/logs/ mybucket_c/logs/2016 Authors note: I'm not totally sure why would need to page across buckets. """ for obj in s3_util.scanb(start, end): out('s3://{}/{}', obj['bucket'], obj['key'])
def ls(color, long_format, s3_paths): """ list directory contents. zs3 ls [bucket][/key_or_prefix]... """ if color: # Click actually does some of this for us, but keep it in here to be explicit and to mirror `ls --color` if color.lower() == 'always': color = True elif color.lower() == 'auto': color = sys.stdout.isatty() if not len(s3_paths): s3_paths = [''] for s3_path in s3_paths: # ls normally prints names aligned to a grid objs = list(s3_util.ls(s3_path)) if not objs: _, key = s3_util.bucket_and_key(s3_path) if key: raise ClickException("ls {}: No such file or directory".format(s3_path)) else: # TODO same sort of exception if the bucket doesn't exist continue objs = sorted(objs, key=itemgetter('path')) if long_format: def out_gen(): for obj in objs: filename = os.path.basename(obj['path']) if color and obj['is_dir']: out_key = Style.BRIGHT + Fore.BLUE + filename + Style.RESET_ALL else: out_key = filename if obj['is_dir']: yield ['?' * 10, '??', '?????', '?????', '0', '??? ?? ??:??', out_key] else: yield ['?' * 10, '??', '?????', '?????', '{}'.format(obj['size']), '??? ?? ??:??', out_key] out(tabulate(out_gen(), tablefmt='plain')) else: # not long_format def out_gen(): for obj in objs: filename = os.path.basename(obj['path']) if color and obj['is_dir']: yield Style.BRIGHT + Fore.BLUE + filename + Style.RESET_ALL else: yield filename out_grid(list(out_gen()))