def parse(data, sstable_format): def read_metadata_offset(stream): return (sstablelib.Stream.uint32, sstablelib.Stream.uint32) offsets = sstablelib.Stream(data).array32( sstablelib.Stream.instantiate( sstablelib.Stream.tuple, sstablelib.Stream.uint32, sstablelib.Stream.uint32, ) ) return {METADATA_TYPE_TO_NAME[typ]: READ_METADATA[typ](sstablelib.Stream(data, offset), sstable_format) for typ, offset in offsets}
def parse(data, sstable_format): disk_token_bound = sstablelib.Stream.instantiate( sstablelib.Stream.struct, ('exclusive', sstablelib.Stream.uint8), ('token', sstablelib.Stream.string16), ) disk_token_range = sstablelib.Stream.instantiate( sstablelib.Stream.struct, ('left', disk_token_bound), ('right', disk_token_bound), ) sharding_metadata = sstablelib.Stream.instantiate( sstablelib.Stream.struct, ('token_ranges', sstablelib.Stream.instantiate(sstablelib.Stream.array32, disk_token_range)), ) sstable_enabled_features = sstablelib.Stream.instantiate( sstablelib.Stream.struct, ('enabled_features', sstablelib.Stream.uint64), ) extension_attributes = sstablelib.Stream.instantiate( sstablelib.Stream.map32, sstablelib.Stream.string32, sstablelib.Stream.string32, ) UUID = sstablelib.Stream.instantiate( sstablelib.Stream.struct, ('msb', sstablelib.Stream.uint64), ('lsb', sstablelib.Stream.uint64), ) run_identifier = sstablelib.Stream.instantiate( sstablelib.Stream.struct, ('id', UUID), ) scylla_component_data = sstablelib.Stream.instantiate( sstablelib.Stream.set_of_tagged_union, sstablelib.Stream.uint32, (1, "sharding", sharding_metadata), (2, "features", sstable_enabled_features), (3, "extension_attributes", extension_attributes), (4, "run_identifier", run_identifier), ) schema = ( ('data', scylla_component_data), ) return sstablelib.parse(sstablelib.Stream(data), schema)
import binascii import struct import sstable_tools.sstablelib as sstablelib import itertools import operator import statistics import textwrap cmdline_parser = argparse.ArgumentParser() cmdline_parser.add_argument('compressioninfo_file', nargs='+', help='CompressionInfo file(s) to parse') cmdline_parser.add_argument('--summary', action='store_true', help='generate a summary instead of full output') args = cmdline_parser.parse_args() for fname in args.compressioninfo_file: s = sstablelib.Stream(open(fname, 'rb').read()) algo = s.string16() options = s.map32() chunk_size = s.int32() data_len = s.int64() offsets = s.array32(sstablelib.Stream.int64) end = offsets[1:] + [offsets[-1]] diffs = list(itertools.starmap(operator.__sub__, zip(end, offsets))) avg_chunk = int(statistics.mean(diffs[:-1])) min_chunk = min(diffs[:-1]) max_chunk = max(diffs[:-1]) nr_chunks = len(offsets) if args.summary: print('{data_len:12} {chunk_size:6} {min_chunk:6} {avg_chunk:6} {max_chunk:6} {fname}'.format(**locals())) else:
def parse(data, sstable_format): disk_token_bound = sstablelib.Stream.instantiate( sstablelib.Stream.struct, ('exclusive', sstablelib.Stream.uint8), ('token', sstablelib.Stream.string16), ) disk_token_range = sstablelib.Stream.instantiate( sstablelib.Stream.struct, ('left', disk_token_bound), ('right', disk_token_bound), ) sharding_metadata = sstablelib.Stream.instantiate( sstablelib.Stream.struct, ('token_ranges', sstablelib.Stream.instantiate(sstablelib.Stream.array32, disk_token_range)), ) sstable_enabled_features = sstablelib.Stream.instantiate( sstablelib.Stream.struct, ('enabled_features', sstablelib.Stream.uint64), ) extension_attributes = sstablelib.Stream.instantiate( sstablelib.Stream.map32, sstablelib.Stream.string32, sstablelib.Stream.string32, ) UUID = sstablelib.Stream.instantiate( sstablelib.Stream.struct, ('msb', sstablelib.Stream.uint64), ('lsb', sstablelib.Stream.uint64), ) run_identifier = sstablelib.Stream.instantiate( sstablelib.Stream.struct, ('id', UUID), ) large_data_type = sstablelib.Stream.instantiate( sstablelib.Stream.enum32, (1, "partition_size"), (2, "row_size"), (3, "cell_size"), (4, "rows_in_partition"), ) large_data_stats_entry = sstablelib.Stream.instantiate( sstablelib.Stream.struct, ('max_value', sstablelib.Stream.uint64), ('threshold', sstablelib.Stream.uint64), ('above_threshold', sstablelib.Stream.uint32), ) large_data_stats = sstablelib.Stream.instantiate( sstablelib.Stream.map32, large_data_type, large_data_stats_entry, ) scylla_component_data = sstablelib.Stream.instantiate( sstablelib.Stream.set_of_tagged_union, sstablelib.Stream.uint32, (1, "sharding", sharding_metadata), (2, "features", sstable_enabled_features), (3, "extension_attributes", extension_attributes), (4, "run_identifier", run_identifier), (5, "large_data_stats", large_data_stats), (6, "sstable_origin", sstablelib.Stream.string32), ) schema = (('data', scylla_component_data), ) return sstablelib.parse(sstablelib.Stream(data), schema)
import operator import statistics import textwrap cmdline_parser = argparse.ArgumentParser() cmdline_parser.add_argument("compressioninfo_file", nargs="+", help="CompressionInfo file(s) to parse") cmdline_parser.add_argument("--summary", action="store_true", help="generate a summary instead of full output") args = cmdline_parser.parse_args() for fname in args.compressioninfo_file: s = sstablelib.Stream(open(fname, "rb").read()) algo = s.string16() options = s.map32() chunk_size = s.int32() data_len = s.int64() offsets = s.array32(sstablelib.Stream.int64) end = offsets[1:] + [offsets[-1]] diffs = list(itertools.starmap(operator.__sub__, zip(end, offsets))) avg_chunk = int(statistics.mean(diffs[:-1])) min_chunk = min(diffs[:-1]) max_chunk = max(diffs[:-1]) nr_chunks = len(offsets) if args.summary: print( "{data_len:12} {chunk_size:6} {min_chunk:6} {avg_chunk:6} {max_chunk:6} {fname}"