Exemple #1
0
def du(summarize, human_readable, s3_paths):
    """
    display disk usage statistics
    """
    size_formatter = magic.human_bytes if human_readable else lambda s: s

    for s3_path in s3_paths:

        totals = {}
        # In recursive tallying of totals this key is the stop condition.
        stop_path = '/' + s3_util.bucket_and_key(s3_path)[1].rstrip('/')

        def tally(path_segment, size):
            if not summarize or path_segment == stop_path:
                # If the summarize option is present, we only care about the grand total.
                if path_segment not in totals:
                    totals[path_segment] = 0
                totals[path_segment] += size

            if path_segment != stop_path:
                parent_dir = path_segment.rsplit('/', 1)[0]
                if parent_dir == '':
                    # Edge case when totalling the entire bucket.
                    parent_dir = '/'
                tally(parent_dir, size)

        for obj in s3_util.keys(s3_path):
            # usage for all given paths, and recursively for directories (excludes individual files)
            dbg('adding {}', obj)
            dir_key = '/' + obj['key'].rsplit('/', 1)[0]
            tally(dir_key, obj['len'])

        for path, total in sorted(totals.items()):
            out('{}\t{}', size_formatter(total), path)
Exemple #2
0
def scanb(start="", end=None):
    bucket_start, first_bucket_key_prefix = bucket_and_key(start)

    dbg("start b:{} k:{}", bucket_start, first_bucket_key_prefix if first_bucket_key_prefix else "(unbounded)")

    bucket_end = None
    last_bucket_key_end = None
    if end:
        bucket_end, last_bucket_key_end = bucket_and_key(end)

        if not start <= end:
            err("start must be lexically before end")

    dbg("end b:{} k:{}", bucket_end, last_bucket_key_end if last_bucket_key_end else "(unbounded)")

    s3 = boto3.resource("s3")

    def bucket_gen():
        _buckets = s3.buckets.all()
        for _bucket in _buckets:
            if _bucket.name < bucket_start:
                continue
            if bucket_end is not None and _bucket.name >= bucket_end:
                break
            yield _bucket

    def returned_generator():
        buckets = list(bucket_gen())
        for index, bucket in enumerate(buckets):
            key_start = first_bucket_key_prefix if index == 0 else ""
            key_end = last_bucket_key_end if index == len(buckets) - 1 else None
            yield scank(bucket.name, key_start, key_end)

    return magic.flatten(returned_generator())
Exemple #3
0
        def returned_gen():
            list_objects = s3.get_paginator("list_objects")

            response_iterator = list_objects.paginate(Bucket=bucket_name, Prefix=key_prefix, Delimiter="/")

            try:
                for entry in magic.flatten(map(generate_objs, response_iterator)):
                    yield entry
            except SignalUnslashedDirectory:
                dbg("Saw unslashed directory at {}. Restarting paginator.", key_prefix)
                response_iterator = list_objects.paginate(
                    Bucket=bucket_name,
                    Prefix=key_prefix + "/",
                    # Step into that dir! The user left off the trailing '/'.
                    Delimiter="/",
                )
                for entry in magic.flatten(map(generate_objs, response_iterator)):
                    yield entry
Exemple #4
0
def scank(bucket_name, key_start="", key_end=None):
    dbg("scanning b:{} s:{} e:{}", bucket_name, key_start, str(key_end))

    prefix = ""
    if key_start and key_end:
        # Skip some unnecessary scanning by only going over the common prefix
        prefix = os.path.commonprefix([key_start, key_end])

    s3 = boto3.resource("s3")
    bucket = s3.Bucket(bucket_name)

    def key_gen():
        _objects = bucket.objects.filter(Prefix=prefix)
        for _obj in _objects:
            if _obj.key < key_start:
                continue
            if key_end is not None and _obj.key >= key_end:
                break
            yield _obj

    return ({"bucket": obj.bucket_name, "key": obj.key} for obj in key_gen())