def parse(data, sstable_format):
    def read_metadata_offset(stream):
        return (sstablelib.Stream.uint32, sstablelib.Stream.uint32)

    offsets = sstablelib.Stream(data).array32(
        sstablelib.Stream.instantiate(
            sstablelib.Stream.tuple, sstablelib.Stream.uint32, sstablelib.Stream.uint32,
        )
    )

    return {METADATA_TYPE_TO_NAME[typ]: READ_METADATA[typ](sstablelib.Stream(data, offset), sstable_format)
            for typ, offset in offsets}
Exemple #2
0
def parse(data, sstable_format):
    disk_token_bound = sstablelib.Stream.instantiate(
        sstablelib.Stream.struct,
        ('exclusive', sstablelib.Stream.uint8),
        ('token', sstablelib.Stream.string16),
    )
    disk_token_range = sstablelib.Stream.instantiate(
        sstablelib.Stream.struct,
        ('left', disk_token_bound),
        ('right', disk_token_bound),
    )
    sharding_metadata = sstablelib.Stream.instantiate(
        sstablelib.Stream.struct,
        ('token_ranges', sstablelib.Stream.instantiate(sstablelib.Stream.array32, disk_token_range)),
    )

    sstable_enabled_features = sstablelib.Stream.instantiate(
        sstablelib.Stream.struct,
        ('enabled_features', sstablelib.Stream.uint64),
    )

    extension_attributes = sstablelib.Stream.instantiate(
        sstablelib.Stream.map32, sstablelib.Stream.string32, sstablelib.Stream.string32,
    )

    UUID = sstablelib.Stream.instantiate(
        sstablelib.Stream.struct,
        ('msb', sstablelib.Stream.uint64),
        ('lsb', sstablelib.Stream.uint64),
    )
    run_identifier = sstablelib.Stream.instantiate(
        sstablelib.Stream.struct,
        ('id', UUID),
    )

    scylla_component_data = sstablelib.Stream.instantiate(
        sstablelib.Stream.set_of_tagged_union,
        sstablelib.Stream.uint32,
        (1, "sharding", sharding_metadata),
        (2, "features", sstable_enabled_features),
        (3, "extension_attributes", extension_attributes),
        (4, "run_identifier", run_identifier),
    )

    schema = (
        ('data', scylla_component_data),
    )

    return sstablelib.parse(sstablelib.Stream(data), schema)
import binascii
import struct
import sstable_tools.sstablelib as sstablelib
import itertools
import operator
import statistics
import textwrap

cmdline_parser = argparse.ArgumentParser()
cmdline_parser.add_argument('compressioninfo_file', nargs='+', help='CompressionInfo file(s) to parse')
cmdline_parser.add_argument('--summary', action='store_true', help='generate a summary instead of full output')

args = cmdline_parser.parse_args()

for fname in args.compressioninfo_file:
    s = sstablelib.Stream(open(fname, 'rb').read())
    algo = s.string16()
    options = s.map32()
    chunk_size = s.int32()
    data_len = s.int64()
    offsets = s.array32(sstablelib.Stream.int64)
    end = offsets[1:] + [offsets[-1]]
    diffs = list(itertools.starmap(operator.__sub__, zip(end, offsets)))
    avg_chunk = int(statistics.mean(diffs[:-1]))
    min_chunk = min(diffs[:-1])
    max_chunk = max(diffs[:-1])
    nr_chunks = len(offsets)

    if args.summary:
        print('{data_len:12} {chunk_size:6} {min_chunk:6} {avg_chunk:6} {max_chunk:6} {fname}'.format(**locals()))
    else:
Exemple #4
0
def parse(data, sstable_format):
    disk_token_bound = sstablelib.Stream.instantiate(
        sstablelib.Stream.struct,
        ('exclusive', sstablelib.Stream.uint8),
        ('token', sstablelib.Stream.string16),
    )
    disk_token_range = sstablelib.Stream.instantiate(
        sstablelib.Stream.struct,
        ('left', disk_token_bound),
        ('right', disk_token_bound),
    )
    sharding_metadata = sstablelib.Stream.instantiate(
        sstablelib.Stream.struct,
        ('token_ranges',
         sstablelib.Stream.instantiate(sstablelib.Stream.array32,
                                       disk_token_range)),
    )

    sstable_enabled_features = sstablelib.Stream.instantiate(
        sstablelib.Stream.struct,
        ('enabled_features', sstablelib.Stream.uint64),
    )

    extension_attributes = sstablelib.Stream.instantiate(
        sstablelib.Stream.map32,
        sstablelib.Stream.string32,
        sstablelib.Stream.string32,
    )

    UUID = sstablelib.Stream.instantiate(
        sstablelib.Stream.struct,
        ('msb', sstablelib.Stream.uint64),
        ('lsb', sstablelib.Stream.uint64),
    )
    run_identifier = sstablelib.Stream.instantiate(
        sstablelib.Stream.struct,
        ('id', UUID),
    )

    large_data_type = sstablelib.Stream.instantiate(
        sstablelib.Stream.enum32,
        (1, "partition_size"),
        (2, "row_size"),
        (3, "cell_size"),
        (4, "rows_in_partition"),
    )
    large_data_stats_entry = sstablelib.Stream.instantiate(
        sstablelib.Stream.struct,
        ('max_value', sstablelib.Stream.uint64),
        ('threshold', sstablelib.Stream.uint64),
        ('above_threshold', sstablelib.Stream.uint32),
    )
    large_data_stats = sstablelib.Stream.instantiate(
        sstablelib.Stream.map32,
        large_data_type,
        large_data_stats_entry,
    )

    scylla_component_data = sstablelib.Stream.instantiate(
        sstablelib.Stream.set_of_tagged_union,
        sstablelib.Stream.uint32,
        (1, "sharding", sharding_metadata),
        (2, "features", sstable_enabled_features),
        (3, "extension_attributes", extension_attributes),
        (4, "run_identifier", run_identifier),
        (5, "large_data_stats", large_data_stats),
        (6, "sstable_origin", sstablelib.Stream.string32),
    )

    schema = (('data', scylla_component_data), )

    return sstablelib.parse(sstablelib.Stream(data), schema)
import operator
import statistics
import textwrap

cmdline_parser = argparse.ArgumentParser()
cmdline_parser.add_argument("compressioninfo_file",
                            nargs="+",
                            help="CompressionInfo file(s) to parse")
cmdline_parser.add_argument("--summary",
                            action="store_true",
                            help="generate a summary instead of full output")

args = cmdline_parser.parse_args()

for fname in args.compressioninfo_file:
    s = sstablelib.Stream(open(fname, "rb").read())
    algo = s.string16()
    options = s.map32()
    chunk_size = s.int32()
    data_len = s.int64()
    offsets = s.array32(sstablelib.Stream.int64)
    end = offsets[1:] + [offsets[-1]]
    diffs = list(itertools.starmap(operator.__sub__, zip(end, offsets)))
    avg_chunk = int(statistics.mean(diffs[:-1]))
    min_chunk = min(diffs[:-1])
    max_chunk = max(diffs[:-1])
    nr_chunks = len(offsets)

    if args.summary:
        print(
            "{data_len:12} {chunk_size:6} {min_chunk:6} {avg_chunk:6} {max_chunk:6} {fname}"