def watch(limit): """watch scan rates across the cluster""" period = 5.0 prev = db.db() prev_totals = None while True: click.clear() time.sleep(period) cur = db.db() cur.data['gkrate'] = {} progress = [] prev_buckets = {b.bucket_id: b for b in prev.buckets()} totals = {'scanned': 0, 'krate': 0, 'lrate': 0, 'bucket_id': 'totals'} for b in cur.buckets(): if not b.scanned: continue totals['scanned'] += b.scanned totals['krate'] += b.krate totals['lrate'] += b.lrate if b.bucket_id not in prev_buckets: b.data['gkrate'][b.bucket_id] = b.scanned / period elif b.scanned == prev_buckets[b.bucket_id].scanned: continue else: b.data['gkrate'][b.bucket_id] = ( b.scanned - prev_buckets[b.bucket_id].scanned) / period progress.append(b) if prev_totals is None: totals['gkrate'] = '...' else: totals['gkrate'] = (totals['scanned'] - prev_totals['scanned']) / period prev = cur prev_totals = totals progress = sorted(progress, key=lambda x: x.gkrate, reverse=True) if limit: progress = progress[:limit] progress.insert(0, Bag(totals)) format_plain(progress, None, explicit_only=True, keys=['bucket_id', 'scanned', 'gkrate', 'lrate', 'krate'])
def watch(limit): """watch scan rates across the cluster""" period = 5.0 prev = db.db() prev_totals = None while True: click.clear() time.sleep(period) cur = db.db() cur.data['gkrate'] = {} progress = [] prev_buckets = {b.bucket_id: b for b in prev.buckets()} totals = {'scanned': 0, 'krate': 0, 'lrate': 0, 'bucket_id': 'totals'} for b in cur.buckets(): if not b.scanned: continue totals['scanned'] += b.scanned totals['krate'] += b.krate totals['lrate'] += b.lrate if b.bucket_id not in prev_buckets: b.data['gkrate'][b.bucket_id] = b.scanned / period elif b.scanned == prev_buckets[b.bucket_id].scanned: continue else: b.data['gkrate'][b.bucket_id] = ( b.scanned - prev_buckets[b.bucket_id].scanned) / period progress.append(b) if prev_totals is None: totals['gkrate'] = '...' else: totals['gkrate'] = (totals['scanned'] - prev_totals['scanned']) / period prev = cur prev_totals = totals progress = sorted(progress, key=lambda x: x.gkrate, reverse=True) if limit: progress = progress[:limit] progress.insert(0, utils.Bag(totals)) format_plain( progress, None, explicit_only=True, keys=['bucket_id', 'scanned', 'gkrate', 'lrate', 'krate'])
def inspect_bucket(bucket): """Show all information known on a bucket.""" state = db.db() found = None for b in state.buckets(): if b.name == bucket: found = b if not found: click.echo("no bucket named: %s" % bucket) return click.echo("Bucket: %s" % found.name) click.echo("Account: %s" % found.account) click.echo("Region: %s" % found.region) click.echo("Created: %s" % found.created) click.echo("Size: %s" % found.size) click.echo("Inventory: %s" % found.inventory) click.echo("Partitions: %s" % found.partitions) click.echo("Scanned: %0.2f%%" % found.percent_scanned) click.echo("") click.echo("Errors") click.echo("Denied: %s" % found.keys_denied) click.echo("BErrors: %s" % found.error_count) click.echo("KErrors: %s" % found.data['keys-error'].get(found.bucket_id, 0)) click.echo("Throttle: %s" % found.data['keys-throttled'].get(found.bucket_id, 0)) click.echo("Missing: %s" % found.data['keys-missing'].get(found.bucket_id, 0)) click.echo("Session: %s" % found.data['keys-sesserr'].get(found.bucket_id, 0)) click.echo("Connection: %s" % found.data['keys-connerr'].get(found.bucket_id, 0)) click.echo("Endpoint: %s" % found.data['keys-enderr'].get(found.bucket_id, 0))
def buckets(bucket=None, account=None, matched=False, kdenied=False, errors=False, dbpath=None, size=None, denied=False): """Report on stats by bucket""" d = db.db(dbpath) def _repr(b): return ( "account:%s name:%s percent:%0.2f matched:%d " "scanned:%d size:%d kdenied:%d errors:%d partitions:%d") % ( b.account, b.name, b.percent_scanned, b.matched, b.scanned, b.size, b.keys_denied, b.error_count, b.partitions) for b in d.buckets(account): if bucket and b.name not in bucket: continue if matched and not b.matched: continue if kdenied and not b.keys_denied: continue if errors and not b.errors: continue if size and b.size < size: continue if denied and not b.denied: continue click.echo(_repr(b))
def buckets(bucket=None, account=None, matched=False, kdenied=False, errors=False, dbpath=None, size=None, denied=False, format=None, output=None): """Report on stats by bucket""" d = db.db(dbpath) buckets = [] for b in sorted(d.buckets(account), key=operator.attrgetter('bucket_id')): if bucket and b.name not in bucket: continue if matched and not b.matched: continue if kdenied and not b.keys_denied: continue if errors and not b.errors: continue if size and b.size < size: continue if denied and not b.denied: continue buckets.append(b) formatter = format == 'csv' and format_csv or format_plain formatter(buckets, output)
def buckets(bucket=None, account=None, matched=False, kdenied=False, errors=False, dbpath=None, size=None, denied=False, format=None, incomplete=False, oversize=False, region=(), not_region=(), inventory=None, output=None, config=None, sort=None, tagprefix=None, not_bucket=None): """Report on stats by bucket""" d = db.db(dbpath) if tagprefix and not config: raise ValueError( "account tag value inclusion requires account config file") if config and tagprefix: with open(config) as fh: data = json.load(fh).get('accounts') account_data = {} for a in data: for t in a['tags']: if t.startswith(tagprefix): account_data[a['name']] = t[len(tagprefix):] buckets = [] for b in sorted(d.buckets(account), key=operator.attrgetter('bucket_id')): if bucket and b.name not in bucket: continue if not_bucket and b.name in not_bucket: continue if matched and not b.matched: continue if kdenied and not b.keys_denied: continue if errors and not b.error_count: continue if size and b.size < size: continue if inventory and not b.using_inventory: continue if denied and not b.denied: continue if oversize and b.scanned <= b.size: continue if incomplete and b.percent_scanned >= incomplete: continue if region and b.region not in region: continue if not_region and b.region in not_region: continue if tagprefix: setattr(b, tagprefix[:-1], account_data[b.account]) buckets.append(b) if sort: key = operator.attrgetter(sort) buckets = list(reversed(sorted(buckets, key=key))) formatter = format == 'csv' and format_csv or format_plain keys = tagprefix and (tagprefix[:-1],) or () formatter(buckets, output, keys=keys)
def accounts(dbpath, output, format, account, config=None, tag=None, tagprefix=None, region=(), not_region=(), not_bucket=None): """Report on stats by account""" d = db.db(dbpath) accounts = d.accounts() formatter = (format == 'csv' and format_accounts_csv or format_accounts_plain) if region: for a in accounts: a.buckets = [b for b in a.buckets if b.region in region] accounts = [a for a in accounts if a.bucket_count] if not_region: for a in accounts: a.buckets = [b for b in a.buckets if b.region not in not_region] accounts = [a for a in accounts if a.bucket_count] if not_bucket: for a in accounts: a.buckets = [b for b in a.buckets if b.name not in not_bucket] if config and tagprefix: account_map = {account.name: account for account in accounts} with open(config) as fh: account_data = json.load(fh).get('accounts') tag_groups = {} for a in account_data: if tag is not None and tag not in a['tags']: continue for t in a['tags']: if t.startswith(tagprefix): tvalue = t[len(tagprefix):] if not tvalue: continue if tvalue not in tag_groups: tag_groups[tvalue] = db.Account(tvalue, []) account_results = account_map.get(a['name']) if not account_results: print("missing %s" % a['name']) continue tag_groups[tvalue].buckets.extend( account_map[a['name']].buckets) accounts = tag_groups.values() formatter(accounts, output)
def inspect_partitions(bucket): """Discover the partitions on a bucket via introspection. For large buckets which lack s3 inventories, salactus will attempt to process objects in parallel on the bucket by breaking the bucket into a separate keyspace partitions. It does this with a heurestic that attempts to sample the keyspace and determine appropriate subparts. This command provides additional visibility into the partitioning of a bucket by showing how salactus would partition a given bucket. """ logging.basicConfig( level=logging.INFO, format="%(asctime)s: %(name)s:%(levelname)s %(message)s") logging.getLogger('botocore').setLevel(level=logging.WARNING) state = db.db() # add db.bucket accessor found = None for b in state.buckets(): if b.name == bucket: found = b break if not found: click.echo("no bucket named: %s" % bucket) return keyset = [] partitions = [] def process_keyset(bid, page): keyset.append(len(page)) def process_bucket_iterator(bid, prefix, delimiter="", **continuation): partitions.append(prefix) # synchronous execution def invoke(f, *args, **kw): return f(*args, **kw) # unleash the monkies ;-) worker.connection.hincrby = lambda x, y, z: True worker.invoke = invoke worker.process_keyset = process_keyset worker.process_bucket_iterator = process_bucket_iterator # kick it off worker.process_bucket_partitions(b.bucket_id) keys_scanned = sum(keyset) click.echo( "Found %d partitions %s keys scanned during partitioning" % ( len(partitions), keys_scanned)) click.echo("\n".join(partitions))
def accounts(dbpath, account): """Report on stats by account""" d = db.db(dbpath) def _repr(a): return "name:%s, matched:%d percent:%0.2f scanned:%d size:%d buckets:%d" % ( a.name, a.matched, a.percent_scanned, a.scanned, a.size, len(a.buckets)) for a in sorted(d.accounts(), key=operator.attrgetter('name')): click.echo(_repr(a))
def accounts(dbpath, output, format, account, config=None, tag=None, tagprefix=None, region=(), not_region=(), not_bucket=None): """Report on stats by account""" d = db.db(dbpath) accounts = d.accounts() formatter = ( format == 'csv' and format_accounts_csv or format_accounts_plain) if region: for a in accounts: a.buckets = [b for b in a.buckets if b.region in region] accounts = [a for a in accounts if a.bucket_count] if not_region: for a in accounts: a.buckets = [b for b in a.buckets if b.region not in not_region] accounts = [a for a in accounts if a.bucket_count] if not_bucket: for a in accounts: a.buckets = [b for b in a.buckets if b.name not in not_bucket] if config and tagprefix: account_map = {account.name: account for account in accounts} with open(config) as fh: account_data = json.load(fh).get('accounts') tag_groups = {} for a in account_data: if tag is not None and tag not in a['tags']: continue for t in a['tags']: if t.startswith(tagprefix): tvalue = t[len(tagprefix):] if not tvalue: continue if tvalue not in tag_groups: tag_groups[tvalue] = db.Account(tvalue, []) account_results = account_map.get(a['name']) if not account_results: print("missing %s" % a['name']) continue tag_groups[tvalue].buckets.extend( account_map[a['name']].buckets) accounts = tag_groups.values() formatter(accounts, output)
def save(dbpath): """Save the current state to a json file """ d = db.db() d.save(dbpath)