def _command_autoscale_status(self, cmd): osdmap = self.get_osdmap() pools = osdmap.get_pools_by_name() ps, root_map, pool_root = self._get_pool_status(osdmap, pools) if cmd.get('format') == 'json' or cmd.get('format') == 'json-pretty': return 0, json.dumps(ps, indent=2), '' else: table = PrettyTable(['POOL', 'SIZE', 'TARGET SIZE', 'RATE', 'RAW CAPACITY', 'RATIO', 'TARGET RATIO', 'BIAS', 'PG_NUM', # 'IDEAL', 'NEW PG_NUM', 'AUTOSCALE'], border=False) table.left_padding_width = 0 table.right_padding_width = 1 table.align['POOL'] = 'l' table.align['SIZE'] = 'r' table.align['TARGET SIZE'] = 'r' table.align['RATE'] = 'r' table.align['RAW CAPACITY'] = 'r' table.align['RATIO'] = 'r' table.align['TARGET RATIO'] = 'r' table.align['BIAS'] = 'r' table.align['PG_NUM'] = 'r' # table.align['IDEAL'] = 'r' table.align['NEW PG_NUM'] = 'r' table.align['AUTOSCALE'] = 'l' for p in ps: if p['would_adjust']: final = str(p['pg_num_final']) else: final = '' if p['target_bytes'] > 0: ts = mgr_util.format_bytes(p['target_bytes'], 6) else: ts = '' if p['target_ratio'] > 0.0: tr = '%.4f' % p['target_ratio'] else: tr = '' table.add_row([ p['pool_name'], mgr_util.format_bytes(p['logical_used'], 6), ts, p['raw_used_rate'], mgr_util.format_bytes(p['subtree_capacity'], 6), '%.4f' % p['capacity_ratio'], tr, p['bias'], p['pg_num_target'], # p['pg_num_ideal'], final, p['pg_autoscale_mode'], ]) return 0, table.get_string(), ''
def _command_autoscale_status(self, cmd): osdmap = self.get_osdmap() pools = osdmap.get_pools_by_name() ps, root_map, pool_root = self._get_pool_status(osdmap, pools) if cmd.get('format') == 'json' or cmd.get('format') == 'json-pretty': return 0, json.dumps(ps, indent=2), '' else: table = PrettyTable(['POOL', 'SIZE', 'TARGET SIZE', 'RATE', 'RAW CAPACITY', 'RATIO', 'TARGET RATIO', 'BIAS', 'PG_NUM', # 'IDEAL', 'NEW PG_NUM', 'AUTOSCALE'], border=False) table.align['POOL'] = 'l' table.align['SIZE'] = 'r' table.align['TARGET SIZE'] = 'r' table.align['RATE'] = 'r' table.align['RAW CAPACITY'] = 'r' table.align['RATIO'] = 'r' table.align['TARGET RATIO'] = 'r' table.align['BIAS'] = 'r' table.align['PG_NUM'] = 'r' # table.align['IDEAL'] = 'r' table.align['NEW PG_NUM'] = 'r' table.align['AUTOSCALE'] = 'l' for p in ps: if p['would_adjust']: final = str(p['pg_num_final']) else: final = '' if p['target_bytes'] > 0: ts = mgr_util.format_bytes(p['target_bytes'], 6) else: ts = '' if p['target_ratio'] > 0.0: tr = '%.4f' % p['target_ratio'] else: tr = '' table.add_row([ p['pool_name'], mgr_util.format_bytes(p['logical_used'], 6), ts, p['raw_used_rate'], mgr_util.format_bytes(p['subtree_capacity'], 6), '%.4f' % p['capacity_ratio'], tr, p['bias'], p['pg_num_target'], # p['pg_num_ideal'], final, p['pg_autoscale_mode'], ]) return 0, table.get_string(), ''
def handle_osd_status(self, cmd): osd_table = PrettyTable(['id', 'host', 'used', 'avail', 'wr ops', 'wr data', 'rd ops', 'rd data', 'state']) osdmap = self.get("osd_map") filter_osds = set() bucket_filter = None if 'bucket' in cmd: self.log.debug("Filtering to bucket '{0}'".format(cmd['bucket'])) bucket_filter = cmd['bucket'] crush = self.get("osd_map_crush") found = False for bucket in crush['buckets']: if fnmatch.fnmatch(bucket['name'], bucket_filter): found = True filter_osds.update([i['id'] for i in bucket['items']]) if not found: msg = "Bucket '{0}' not found".format(bucket_filter) return errno.ENOENT, msg, "" # Build dict of OSD ID to stats osd_stats = dict([(o['osd'], o) for o in self.get("osd_stats")['osd_stats']]) for osd in osdmap['osds']: osd_id = osd['osd'] if bucket_filter and osd_id not in filter_osds: continue hostname = "" kb_used = 0 kb_avail = 0 if osd_id in osd_stats: metadata = self.get_metadata('osd', "%s" % osd_id) stats = osd_stats[osd_id] hostname = metadata['hostname'] kb_used = stats['kb_used'] * 1024 kb_avail = stats['kb_avail'] * 1024 osd_table.add_row([osd_id, hostname, mgr_util.format_bytes(kb_used, 5), mgr_util.format_bytes(kb_avail, 5), mgr_util.format_dimless(self.get_rate("osd", osd_id.__str__(), "osd.op_w") + self.get_rate("osd", osd_id.__str__(), "osd.op_rw"), 5), mgr_util.format_bytes(self.get_rate("osd", osd_id.__str__(), "osd.op_in_bytes"), 5), mgr_util.format_dimless(self.get_rate("osd", osd_id.__str__(), "osd.op_r"), 5), mgr_util.format_bytes(self.get_rate("osd", osd_id.__str__(), "osd.op_out_bytes"), 5), ','.join(osd['state']), ]) return 0, osd_table.get_string(), ""
def _list_devices(self, hostname=None, format='plain', refresh=False): # type: (Optional[List[str]], str, bool) -> HandleCommandResult """ Provide information about storage devices present in cluster hosts Note: this does not have to be completely synchronous. Slightly out of date hardware inventory is fine as long as hardware ultimately appears in the output of this command. """ nf = InventoryFilter(hosts=hostname) if hostname else None completion = self.get_inventory(host_filter=nf, refresh=refresh) self._orchestrator_wait([completion]) raise_if_exception(completion) if format == 'json': data = [n.to_json() for n in completion.result] return HandleCommandResult(stdout=json.dumps(data)) else: out = [] table = PrettyTable( ['HOST', 'PATH', 'TYPE', 'SIZE', 'DEVICE', 'AVAIL', 'REJECT REASONS'], border=False) table.align = 'l' table._align['SIZE'] = 'r' table.left_padding_width = 0 table.right_padding_width = 1 for host_ in completion.result: # type: InventoryHost for d in host_.devices.devices: # type: Device table.add_row( ( host_.name, d.path, d.human_readable_type, format_bytes(d.sys_api.get('size', 0), 5), d.device_id, d.available, ', '.join(d.rejected_reasons) ) ) out.append(table.get_string()) return HandleCommandResult(stdout='\n'.join(out))
def pretty_print(self, only_header=False): """Print a human friendly line with the information of the device :param only_header: Print only the name of the device attributes Ex: > Device Path Type Size Rotates Available Model > /dev/sdc hdd 50.00 GB True True ATA/QEMU """ row_format = " {0:<15} {1:>10} {2:>10} {3:>10} {4:>10} {5:<15}\n" if only_header: return row_format.format("Device Path", "Type", "Size", "Rotates", "Available", "Model") else: return row_format.format(self.id, self.type, format_bytes(self.size, 5, colored=False), str(self.rotates), str(self.available), self.dev_id)
def _list_devices(self, host=None, format='plain', refresh=False): # type: (List[str], str, bool) -> HandleCommandResult """ Provide information about storage devices present in cluster hosts Note: this does not have to be completely synchronous. Slightly out of date hardware inventory is fine as long as hardware ultimately appears in the output of this command. """ nf = orchestrator.InventoryFilter(nodes=host) if host else None completion = self.get_inventory(node_filter=nf, refresh=refresh) self._orchestrator_wait([completion]) orchestrator.raise_if_exception(completion) if format == 'json': data = [n.to_json() for n in completion.result] return HandleCommandResult(stdout=json.dumps(data)) else: out = [] for host in completion.result: # type: orchestrator.InventoryNode out.append('Host {}:'.format(host.name)) table = PrettyTable( ['Path', 'Type', 'Size', 'Available', 'Ceph Device ID', 'Reject Reasons'], border=False) table._align['Path'] = 'l' for d in host.devices.devices: # type: Device table.add_row( ( d.path, d.human_readable_type, format_bytes(d.sys_api.get('size', 0), 5, colored=False), d.available, d.device_id, ', '.join(d.rejected_reasons) ) ) out.append(table.get_string()) return HandleCommandResult(stdout='\n'.join(out))
def handle_fs_status(self, cmd): output = "" json_output = defaultdict(list) output_format = cmd.get('format', 'plain') fs_filter = cmd.get('fs', None) mds_versions = defaultdict(list) fsmap = self.get("fs_map") for filesystem in fsmap['filesystems']: if fs_filter and filesystem['mdsmap']['fs_name'] != fs_filter: continue rank_table = PrettyTable( ("RANK", "STATE", "MDS", "ACTIVITY", "DNS", "INOS"), border=False, ) rank_table.left_padding_width = 0 rank_table.right_padding_width = 2 mdsmap = filesystem['mdsmap'] client_count = 0 for rank in mdsmap["in"]: up = "mds_{0}".format(rank) in mdsmap["up"] if up: gid = mdsmap['up']["mds_{0}".format(rank)] info = mdsmap['info']['gid_{0}'.format(gid)] dns = self.get_latest("mds", info['name'], "mds_mem.dn") inos = self.get_latest("mds", info['name'], "mds_mem.ino") if rank == 0: client_count = self.get_latest( "mds", info['name'], "mds_sessions.session_count") elif client_count == 0: # In case rank 0 was down, look at another rank's # sessionmap to get an indication of clients. client_count = self.get_latest( "mds", info['name'], "mds_sessions.session_count") laggy = "laggy_since" in info state = info['state'].split(":")[1] if laggy: state += "(laggy)" if state == "active" and not laggy: c_state = mgr_util.colorize(state, mgr_util.GREEN) else: c_state = mgr_util.colorize(state, mgr_util.YELLOW) # Populate based on context of state, e.g. client # ops for an active daemon, replay progress, reconnect # progress activity = "" if state == "active": rate = self.get_rate( "mds", info['name'], "mds_server.handle_client_request") if output_format not in ('json', 'json-pretty'): activity = "Reqs: " + mgr_util.format_dimless( rate, 5) + "/s" metadata = self.get_metadata('mds', info['name']) mds_versions[metadata.get('ceph_version', "unknown")].append(info['name']) if output_format in ('json', 'json-pretty'): json_output['mdsmap'].append({ 'rank': rank, 'name': info['name'], 'state': state, 'rate': rate if state == "active" else "0", 'dns': dns, 'inos': inos }) else: rank_table.add_row([ mgr_util.bold(rank.__str__()), c_state, info['name'], activity, mgr_util.format_dimless(dns, 5), mgr_util.format_dimless(inos, 5) ]) else: if output_format in ('json', 'json-pretty'): json_output['mdsmap'].append({ 'rank': rank, 'state': "failed" }) else: rank_table.add_row([rank, "failed", "", "", "", ""]) # Find the standby replays for gid_str, daemon_info in six.iteritems(mdsmap['info']): if daemon_info['state'] != "up:standby-replay": continue inos = self.get_latest("mds", daemon_info['name'], "mds_mem.ino") dns = self.get_latest("mds", daemon_info['name'], "mds_mem.dn") events = self.get_rate("mds", daemon_info['name'], "mds_log.replayed") if output_format not in ('json', 'json-pretty'): activity = "Evts: " + mgr_util.format_dimless(events, 5) + "/s" metadata = self.get_metadata('mds', daemon_info['name']) mds_versions[metadata.get('ceph_version', "unknown")].append( daemon_info['name']) if output_format in ('json', 'json-pretty'): json_output['mdsmap'].append({ 'rank': rank, 'name': daemon_info['name'], 'state': 'standby-replay', 'events': events, 'dns': 5, 'inos': 5 }) else: rank_table.add_row([ "{0}-s".format(daemon_info['rank']), "standby-replay", daemon_info['name'], activity, mgr_util.format_dimless(dns, 5), mgr_util.format_dimless(inos, 5) ]) df = self.get("df") pool_stats = dict([(p['id'], p['stats']) for p in df['pools']]) osdmap = self.get("osd_map") pools = dict([(p['pool'], p) for p in osdmap['pools']]) metadata_pool_id = mdsmap['metadata_pool'] data_pool_ids = mdsmap['data_pools'] pools_table = PrettyTable(["POOL", "TYPE", "USED", "AVAIL"], border=False) pools_table.left_padding_width = 0 pools_table.right_padding_width = 2 for pool_id in [metadata_pool_id] + data_pool_ids: pool_type = "metadata" if pool_id == metadata_pool_id else "data" stats = pool_stats[pool_id] if output_format in ('json', 'json-pretty'): json_output['pools'].append({ 'id': pool_id, 'name': pools[pool_id]['pool_name'], 'type': pool_type, 'used': stats['bytes_used'], 'avail': stats['max_avail'] }) else: pools_table.add_row([ pools[pool_id]['pool_name'], pool_type, mgr_util.format_bytes(stats['bytes_used'], 5), mgr_util.format_bytes(stats['max_avail'], 5) ]) if output_format in ('json', 'json-pretty'): json_output['clients'].append({ 'fs': mdsmap['fs_name'], 'clients': client_count, }) else: output += "{0} - {1} clients\n".format(mdsmap['fs_name'], client_count) output += "=" * len(mdsmap['fs_name']) + "\n" output += rank_table.get_string() output += "\n" + pools_table.get_string() + "\n" if not output and not json_output and fs_filter is not None: return errno.EINVAL, "", "Invalid filesystem: " + fs_filter standby_table = PrettyTable(["STANDBY MDS"], border=False) standby_table.left_padding_width = 0 standby_table.right_padding_width = 2 for standby in fsmap['standbys']: metadata = self.get_metadata('mds', standby['name']) mds_versions[metadata.get('ceph_version', "unknown")].append(standby['name']) if output_format in ('json', 'json-pretty'): json_output['mdsmap'].append({ 'name': standby['name'], 'state': "standby" }) else: standby_table.add_row([standby['name']]) if output_format not in ('json', 'json-pretty'): output += "\n" + standby_table.get_string() + "\n" if len(mds_versions) == 1: if output_format in ('json', 'json-pretty'): json_output['mds_version'] = list(mds_versions)[0] else: output += "MDS version: {0}".format(list(mds_versions)[0]) else: version_table = PrettyTable(["VERSION", "DAEMONS"], border=False) version_table.left_padding_width = 0 version_table.right_padding_width = 2 for version, daemons in six.iteritems(mds_versions): if output_format in ('json', 'json-pretty'): json_output['mds_version'].append({ 'version': version, 'daemons': daemons }) else: version_table.add_row([version, ", ".join(daemons)]) if output_format not in ('json', 'json-pretty'): output += version_table.get_string() + "\n" if output_format == "json": return HandleCommandResult( stdout=json.dumps(json_output, sort_keys=True)) elif output_format == "json-pretty": return HandleCommandResult(stdout=json.dumps( json_output, sort_keys=True, indent=4, separators=(',', ': '))) else: return HandleCommandResult(stdout=output)
def _maybe_adjust(self): self.log.info('_maybe_adjust') osdmap = self.get_osdmap() pools = osdmap.get_pools_by_name() ps, root_map, pool_root = self._get_pool_status(osdmap, pools) # Anyone in 'warn', set the health message for them and then # drop them from consideration. too_few = [] too_many = [] health_checks = {} total_ratio = dict([(r, 0.0) for r in iter(root_map)]) total_target_ratio = dict([(r, 0.0) for r in iter(root_map)]) target_ratio_pools = dict([(r, []) for r in iter(root_map)]) total_bytes = dict([(r, 0) for r in iter(root_map)]) total_target_bytes = dict([(r, 0.0) for r in iter(root_map)]) target_bytes_pools = dict([(r, []) for r in iter(root_map)]) for p in ps: total_ratio[p['crush_root_id']] += max(p['actual_capacity_ratio'], p['target_ratio']) if p['target_ratio'] > 0: total_target_ratio[p['crush_root_id']] += p['target_ratio'] target_ratio_pools[p['crush_root_id']].append(p['pool_name']) total_bytes[p['crush_root_id']] += max( p['actual_raw_used'], p['target_bytes'] * p['raw_used_rate']) if p['target_bytes'] > 0: total_target_bytes[p[ 'crush_root_id']] += p['target_bytes'] * p['raw_used_rate'] target_bytes_pools[p['crush_root_id']].append(p['pool_name']) if not p['would_adjust']: continue if p['pg_autoscale_mode'] == 'warn': msg = 'Pool %s has %d placement groups, should have %d' % ( p['pool_name'], p['pg_num_target'], p['pg_num_final']) if p['pg_num_final'] > p['pg_num_target']: too_few.append(msg) else: too_many.append(msg) if p['pg_autoscale_mode'] == 'on': # Note that setting pg_num actually sets pg_num_target (see # OSDMonitor.cc) r = self.mon_command({ 'prefix': 'osd pool set', 'pool': p['pool_name'], 'var': 'pg_num', 'val': str(p['pg_num_final']) }) if r[0] != 0: # FIXME: this is a serious and unexpected thing, # we should expose it as a cluster log error once # the hook for doing that from ceph-mgr modules is # in. self.log.error( "pg_num adjustment on {0} to {1} failed: {2}".format( p['pool_name'], p['pg_num_final'], r)) if too_few: summary = "{0} pools have too few placement groups".format( len(too_few)) health_checks['POOL_TOO_FEW_PGS'] = { 'severity': 'warning', 'summary': summary, 'detail': too_few } if too_many: summary = "{0} pools have too many placement groups".format( len(too_many)) health_checks['POOL_TOO_MANY_PGS'] = { 'severity': 'warning', 'summary': summary, 'detail': too_many } too_much_target_ratio = [] for root_id, total in iteritems(total_ratio): total_target = total_target_ratio[root_id] if total > 1.0: too_much_target_ratio.append( 'Pools %s overcommit available storage by %.03fx due to ' 'target_size_ratio %.03f on pools %s' % (root_map[root_id].pool_names, total, total_target, target_ratio_pools[root_id])) elif total_target > 1.0: too_much_target_ratio.append( 'Pools %s have collective target_size_ratio %.03f > 1.0' % (root_map[root_id].pool_names, total_target)) if too_much_target_ratio: health_checks['POOL_TARGET_SIZE_RATIO_OVERCOMMITTED'] = { 'severity': 'warning', 'summary': "%d subtrees have overcommitted pool target_size_ratio" % len(too_much_target_ratio), 'detail': too_much_target_ratio, } too_much_target_bytes = [] for root_id, total in iteritems(total_bytes): total_target = total_target_bytes[root_id] if total > root_map[root_id].capacity: too_much_target_bytes.append( 'Pools %s overcommit available storage by %.03fx due to ' 'target_size_bytes %s on pools %s' % (root_map[root_id].pool_names, total / root_map[root_id].capacity, mgr_util.format_bytes(total_target, 5, colored=False), target_bytes_pools[root_id])) elif total_target > root_map[root_id].capacity: too_much_target_bytes.append( 'Pools %s overcommit available storage by %.03fx due to ' 'collective target_size_bytes of %s' % ( root_map[root_id].pool_names, total / root_map[root_id].capacity, mgr_util.format_bytes(total_target, 5, colored=False), )) if too_much_target_bytes: health_checks['POOL_TARGET_SIZE_BYTES_OVERCOMMITTED'] = { 'severity': 'warning', 'summary': "%d subtrees have overcommitted pool target_size_bytes" % len(too_much_target_bytes), 'detail': too_much_target_bytes, } self.set_health_checks(health_checks)
def _maybe_adjust(self): self.log.info('_maybe_adjust') osdmap = self.get_osdmap() if osdmap.get_require_osd_release() < 'nautilus': return pools = osdmap.get_pools_by_name() ps, root_map, pool_root = self._get_pool_status(osdmap, pools) # Anyone in 'warn', set the health message for them and then # drop them from consideration. too_few = [] too_many = [] bytes_and_ratio = [] health_checks = {} total_bytes = dict([(r, 0) for r in iter(root_map)]) total_target_bytes = dict([(r, 0.0) for r in iter(root_map)]) target_bytes_pools = dict([(r, []) for r in iter(root_map)]) for p in ps: pool_id = p['pool_id'] pool_opts = pools[p['pool_name']]['options'] if pool_opts.get('target_size_ratio', 0) > 0 and pool_opts.get('target_size_bytes', 0) > 0: bytes_and_ratio.append('Pool %s has target_size_bytes and target_size_ratio set' % p['pool_name']) total_bytes[p['crush_root_id']] += max( p['actual_raw_used'], p['target_bytes'] * p['raw_used_rate']) if p['target_bytes'] > 0: total_target_bytes[p['crush_root_id']] += p['target_bytes'] * p['raw_used_rate'] target_bytes_pools[p['crush_root_id']].append(p['pool_name']) if not p['would_adjust']: continue if p['pg_autoscale_mode'] == 'warn': msg = 'Pool %s has %d placement groups, should have %d' % ( p['pool_name'], p['pg_num_target'], p['pg_num_final']) if p['pg_num_final'] > p['pg_num_target']: too_few.append(msg) else: too_many.append(msg) if p['pg_autoscale_mode'] == 'on': # Note that setting pg_num actually sets pg_num_target (see # OSDMonitor.cc) r = self.mon_command({ 'prefix': 'osd pool set', 'pool': p['pool_name'], 'var': 'pg_num', 'val': str(p['pg_num_final']) }) # create new event or update existing one to reflect # progress from current state to the new pg_num_target pool_data = pools[p['pool_name']] pg_num = pool_data['pg_num'] new_target = p['pg_num_final'] if pool_id in self._event: self._event[pool_id].reset(pg_num, new_target) else: self._event[pool_id] = PgAdjustmentProgress(pool_id, pg_num, new_target) self._event[pool_id].update(self, 0.0) if r[0] != 0: # FIXME: this is a serious and unexpected thing, # we should expose it as a cluster log error once # the hook for doing that from ceph-mgr modules is # in. self.log.error("pg_num adjustment on {0} to {1} failed: {2}" .format(p['pool_name'], p['pg_num_final'], r)) if too_few: summary = "{0} pools have too few placement groups".format( len(too_few)) health_checks['POOL_TOO_FEW_PGS'] = { 'severity': 'warning', 'summary': summary, 'count': len(too_few), 'detail': too_few } if too_many: summary = "{0} pools have too many placement groups".format( len(too_many)) health_checks['POOL_TOO_MANY_PGS'] = { 'severity': 'warning', 'summary': summary, 'count': len(too_many), 'detail': too_many } too_much_target_bytes = [] for root_id, total in iteritems(total_bytes): total_target = total_target_bytes[root_id] if total_target > 0 and total > root_map[root_id].capacity: too_much_target_bytes.append( 'Pools %s overcommit available storage by %.03fx due to ' 'target_size_bytes %s on pools %s' % ( root_map[root_id].pool_names, total / root_map[root_id].capacity, mgr_util.format_bytes(total_target, 5, colored=False), target_bytes_pools[root_id] ) ) elif total_target > root_map[root_id].capacity: too_much_target_bytes.append( 'Pools %s overcommit available storage by %.03fx due to ' 'collective target_size_bytes of %s' % ( root_map[root_id].pool_names, total / root_map[root_id].capacity, mgr_util.format_bytes(total_target, 5, colored=False), ) ) if too_much_target_bytes: health_checks['POOL_TARGET_SIZE_BYTES_OVERCOMMITTED'] = { 'severity': 'warning', 'summary': "%d subtrees have overcommitted pool target_size_bytes" % len(too_much_target_bytes), 'count': len(too_much_target_bytes), 'detail': too_much_target_bytes, } if bytes_and_ratio: health_checks['POOL_HAS_TARGET_SIZE_BYTES_AND_RATIO'] = { 'severity': 'warning', 'summary': "%d pools have both target_size_bytes and target_size_ratio set" % len(bytes_and_ratio), 'count': len(bytes_and_ratio), 'detail': bytes_and_ratio, } self.set_health_checks(health_checks)
def handle_osd_status(self, bucket: Optional[str] = None ) -> Tuple[int, str, str]: """ Show the status of OSDs within a bucket, or all """ osd_table = PrettyTable([ 'ID', 'HOST', 'USED', 'AVAIL', 'WR OPS', 'WR DATA', 'RD OPS', 'RD DATA', 'STATE' ], border=False) osd_table.align['ID'] = 'r' osd_table.align['HOST'] = 'l' osd_table.align['USED'] = 'r' osd_table.align['AVAIL'] = 'r' osd_table.align['WR OPS'] = 'r' osd_table.align['WR DATA'] = 'r' osd_table.align['RD OPS'] = 'r' osd_table.align['RD DATA'] = 'r' osd_table.align['STATE'] = 'l' osd_table.left_padding_width = 0 osd_table.right_padding_width = 2 osdmap = self.get("osd_map") filter_osds = set() bucket_filter = None if bucket is not None: self.log.debug(f"Filtering to bucket '{bucket}'") bucket_filter = bucket crush = self.get("osd_map_crush") found = False for bucket_ in crush['buckets']: if fnmatch.fnmatch(bucket_['name'], bucket_filter): found = True filter_osds.update([i['id'] for i in bucket_['items']]) if not found: msg = "Bucket '{0}' not found".format(bucket_filter) return errno.ENOENT, msg, "" # Build dict of OSD ID to stats osd_stats = dict([(o['osd'], o) for o in self.get("osd_stats")['osd_stats']]) for osd in osdmap['osds']: osd_id = osd['osd'] if bucket_filter and osd_id not in filter_osds: continue hostname = "" kb_used = 0 kb_avail = 0 if osd_id in osd_stats: metadata = self.get_metadata('osd', str(osd_id), default=defaultdict(str)) stats = osd_stats[osd_id] assert metadata hostname = metadata['hostname'] kb_used = stats['kb_used'] * 1024 kb_avail = stats['kb_avail'] * 1024 wr_ops_rate = (self.get_rate("osd", osd_id.__str__(), "osd.op_w") + self.get_rate("osd", osd_id.__str__(), "osd.op_rw")) wr_byte_rate = self.get_rate("osd", osd_id.__str__(), "osd.op_in_bytes") rd_ops_rate = self.get_rate("osd", osd_id.__str__(), "osd.op_r") rd_byte_rate = self.get_rate("osd", osd_id.__str__(), "osd.op_out_bytes") osd_table.add_row([ osd_id, hostname, mgr_util.format_bytes(kb_used, 5), mgr_util.format_bytes(kb_avail, 5), mgr_util.format_dimless(wr_ops_rate, 5), mgr_util.format_bytes(wr_byte_rate, 5), mgr_util.format_dimless(rd_ops_rate, 5), mgr_util.format_bytes(rd_byte_rate, 5), ','.join(osd['state']), ]) return 0, osd_table.get_string(), ""
def _maybe_adjust(self): self.log.info('_maybe_adjust') osdmap = self.get_osdmap() pools = osdmap.get_pools_by_name() ps, root_map, pool_root = self._get_pool_status(osdmap, pools) # Anyone in 'warn', set the health message for them and then # drop them from consideration. too_few = [] too_many = [] health_checks = {} total_ratio = dict([(r, 0.0) for r in iter(root_map)]) total_target_ratio = dict([(r, 0.0) for r in iter(root_map)]) target_ratio_pools = dict([(r, []) for r in iter(root_map)]) total_bytes = dict([(r, 0) for r in iter(root_map)]) total_target_bytes = dict([(r, 0.0) for r in iter(root_map)]) target_bytes_pools = dict([(r, []) for r in iter(root_map)]) for p in ps: total_ratio[p['crush_root_id']] += max(p['actual_capacity_ratio'], p['target_ratio']) if p['target_ratio'] > 0: total_target_ratio[p['crush_root_id']] += p['target_ratio'] target_ratio_pools[p['crush_root_id']].append(p['pool_name']) total_bytes[p['crush_root_id']] += max( p['actual_raw_used'], p['target_bytes'] * p['raw_used_rate']) if p['target_bytes'] > 0: total_target_bytes[p['crush_root_id']] += p['target_bytes'] * p['raw_used_rate'] target_bytes_pools[p['crush_root_id']].append(p['pool_name']) if not p['would_adjust']: continue if p['pg_autoscale_mode'] == 'warn': msg = 'Pool %s has %d placement groups, should have %d' % ( p['pool_name'], p['pg_num_target'], p['pg_num_final']) if p['pg_num_final'] > p['pg_num_target']: too_few.append(msg) else: too_many.append(msg) if p['pg_autoscale_mode'] == 'on': # Note that setting pg_num actually sets pg_num_target (see # OSDMonitor.cc) r = self.mon_command({ 'prefix': 'osd pool set', 'pool': p['pool_name'], 'var': 'pg_num', 'val': str(p['pg_num_final']) }) if r[0] != 0: # FIXME: this is a serious and unexpected thing, # we should expose it as a cluster log error once # the hook for doing that from ceph-mgr modules is # in. self.log.error("pg_num adjustment on {0} to {1} failed: {2}" .format(p['pool_name'], p['pg_num_final'], r)) if too_few: summary = "{0} pools have too few placement groups".format( len(too_few)) health_checks['POOL_TOO_FEW_PGS'] = { 'severity': 'warning', 'summary': summary, 'detail': too_few } if too_many: summary = "{0} pools have too many placement groups".format( len(too_many)) health_checks['POOL_TOO_MANY_PGS'] = { 'severity': 'warning', 'summary': summary, 'detail': too_many } too_much_target_ratio = [] for root_id, total in iteritems(total_ratio): total_target = total_target_ratio[root_id] if total > 1.0: too_much_target_ratio.append( 'Pools %s overcommit available storage by %.03fx due to ' 'target_size_ratio %.03f on pools %s' % ( root_map[root_id].pool_names, total, total_target, target_ratio_pools[root_id] ) ) elif total_target > 1.0: too_much_target_ratio.append( 'Pools %s have collective target_size_ratio %.03f > 1.0' % ( root_map[root_id].pool_names, total_target ) ) if too_much_target_ratio: health_checks['POOL_TARGET_SIZE_RATIO_OVERCOMMITTED'] = { 'severity': 'warning', 'summary': "%d subtrees have overcommitted pool target_size_ratio" % len(too_much_target_ratio), 'detail': too_much_target_ratio, } too_much_target_bytes = [] for root_id, total in iteritems(total_bytes): total_target = total_target_bytes[root_id] if total > root_map[root_id].capacity: too_much_target_bytes.append( 'Pools %s overcommit available storage by %.03fx due to ' 'target_size_bytes %s on pools %s' % ( root_map[root_id].pool_names, total / root_map[root_id].capacity, mgr_util.format_bytes(total_target, 5, colored=False), target_bytes_pools[root_id] ) ) elif total_target > root_map[root_id].capacity: too_much_target_bytes.append( 'Pools %s overcommit available storage by %.03fx due to ' 'collective target_size_bytes of %s' % ( root_map[root_id].pool_names, total / root_map[root_id].capacity, mgr_util.format_bytes(total_target, 5, colored=False), ) ) if too_much_target_bytes: health_checks['POOL_TARGET_SIZE_BYTES_OVERCOMMITTED'] = { 'severity': 'warning', 'summary': "%d subtrees have overcommitted pool target_size_bytes" % len(too_much_target_bytes), 'detail': too_much_target_bytes, } self.set_health_checks(health_checks)
def _command_autoscale_status(self, format: str = 'plain' ) -> Tuple[int, str, str]: """ report on pool pg_num sizing recommendation and intent """ osdmap = self.get_osdmap() pools = osdmap.get_pools_by_name() ps, root_map = self._get_pool_status(osdmap, pools) if format in ('json', 'json-pretty'): return 0, json.dumps(ps, indent=4, sort_keys=True), '' else: table = PrettyTable( [ 'POOL', 'SIZE', 'TARGET SIZE', 'RATE', 'RAW CAPACITY', 'RATIO', 'TARGET RATIO', 'EFFECTIVE RATIO', 'BIAS', 'PG_NUM', # 'IDEAL', 'NEW PG_NUM', 'AUTOSCALE', 'BULK' ], border=False) table.left_padding_width = 0 table.right_padding_width = 2 table.align['POOL'] = 'l' table.align['SIZE'] = 'r' table.align['TARGET SIZE'] = 'r' table.align['RATE'] = 'r' table.align['RAW CAPACITY'] = 'r' table.align['RATIO'] = 'r' table.align['TARGET RATIO'] = 'r' table.align['EFFECTIVE RATIO'] = 'r' table.align['BIAS'] = 'r' table.align['PG_NUM'] = 'r' # table.align['IDEAL'] = 'r' table.align['NEW PG_NUM'] = 'r' table.align['AUTOSCALE'] = 'l' table.align['BULK'] = 'l' for p in ps: if p['would_adjust']: final = str(p['pg_num_final']) else: final = '' if p['target_bytes'] > 0: ts = mgr_util.format_bytes(p['target_bytes'], 6) else: ts = '' if p['target_ratio'] > 0.0: tr = '%.4f' % p['target_ratio'] else: tr = '' if p['effective_target_ratio'] > 0.0: etr = '%.4f' % p['effective_target_ratio'] else: etr = '' table.add_row([ p['pool_name'], mgr_util.format_bytes(p['logical_used'], 6), ts, p['raw_used_rate'], mgr_util.format_bytes(p['subtree_capacity'], 6), '%.4f' % p['capacity_ratio'], tr, etr, p['bias'], p['pg_num_target'], # p['pg_num_ideal'], final, p['pg_autoscale_mode'], str(p['bulk']) ]) return 0, table.get_string(), ''
def _maybe_adjust(self): self.log.info('_maybe_adjust') osdmap = self.get_osdmap() if osdmap.get_require_osd_release() < 'nautilus': return pools = osdmap.get_pools_by_name() ps, root_map, pool_root = self._get_pool_status(osdmap, pools) # Anyone in 'warn', set the health message for them and then # drop them from consideration. too_few = [] too_many = [] health_checks = {} total_ratio = dict([(r, 0.0) for r in iter(root_map)]) total_target_ratio = dict([(r, 0.0) for r in iter(root_map)]) target_ratio_pools = dict([(r, []) for r in iter(root_map)]) total_bytes = dict([(r, 0) for r in iter(root_map)]) total_target_bytes = dict([(r, 0.0) for r in iter(root_map)]) target_bytes_pools = dict([(r, []) for r in iter(root_map)]) for p in ps: pool_id = str(p['pool_id']) total_ratio[p['crush_root_id']] += max(p['actual_capacity_ratio'], p['target_ratio']) if p['target_ratio'] > 0: total_target_ratio[p['crush_root_id']] += p['target_ratio'] target_ratio_pools[p['crush_root_id']].append(p['pool_name']) total_bytes[p['crush_root_id']] += max( p['actual_raw_used'], p['target_bytes'] * p['raw_used_rate']) if p['target_bytes'] > 0: total_target_bytes[p[ 'crush_root_id']] += p['target_bytes'] * p['raw_used_rate'] target_bytes_pools[p['crush_root_id']].append(p['pool_name']) if not p['would_adjust']: continue if p['pg_autoscale_mode'] == 'warn': msg = 'Pool %s has %d placement groups, should have %d' % ( p['pool_name'], p['pg_num_target'], p['pg_num_final']) if p['pg_num_final'] > p['pg_num_target']: too_few.append(msg) else: too_many.append(msg) if p['pg_autoscale_mode'] == 'on': # Note that setting pg_num actually sets pg_num_target (see # OSDMonitor.cc) r = self.mon_command({ 'prefix': 'osd pool set', 'pool': p['pool_name'], 'var': 'pg_num', 'val': str(p['pg_num_final']) }) # Create new event for each pool # and update existing events # Call Progress Module to create progress event if pool_id not in self._event: osdmap = self.get_osdmap() pools = osdmap.get_pools() pool_data = pools[int(pool_id)] pg_num = pool_data['pg_num'] pg_num_target = pool_data['pg_num_target'] ev_id = str(uuid.uuid4()) pg_adj_obj = None if pg_num < pg_num_target: pg_adj_obj = PgAdjustmentProgress( pg_num, pg_num_target, ev_id, 'increasing') self._event[pool_id] = pg_adj_obj else: pg_adj_obj = PgAdjustmentProgress( pg_num, pg_num_target, ev_id, 'decreasing') self._event[pool_id] = pg_adj_obj self.remote( 'progress', 'update', ev_id, ev_msg="PG autoscaler %s pool %s PGs from %d to %d" % (pg_adj_obj._increase_decrease, pool_id, pg_num, pg_num_target), ev_progress=0.0, refs=[("pool", int(pool_id))]) if r[0] != 0: # FIXME: this is a serious and unexpected thing, # we should expose it as a cluster log error once # the hook for doing that from ceph-mgr modules is # in. self.log.error( "pg_num adjustment on {0} to {1} failed: {2}".format( p['pool_name'], p['pg_num_final'], r)) if too_few: summary = "{0} pools have too few placement groups".format( len(too_few)) health_checks['POOL_TOO_FEW_PGS'] = { 'severity': 'warning', 'summary': summary, 'count': len(too_few), 'detail': too_few } if too_many: summary = "{0} pools have too many placement groups".format( len(too_many)) health_checks['POOL_TOO_MANY_PGS'] = { 'severity': 'warning', 'summary': summary, 'count': len(too_many), 'detail': too_many } too_much_target_ratio = [] for root_id, total in iteritems(total_ratio): total_target = total_target_ratio[root_id] if total_target > 0 and total > 1.0: too_much_target_ratio.append( 'Pools %s overcommit available storage by %.03fx due to ' 'target_size_ratio %.03f on pools %s' % (root_map[root_id].pool_names, total, total_target, target_ratio_pools[root_id])) elif total_target > 1.0: too_much_target_ratio.append( 'Pools %s have collective target_size_ratio %.03f > 1.0' % (root_map[root_id].pool_names, total_target)) if too_much_target_ratio: health_checks['POOL_TARGET_SIZE_RATIO_OVERCOMMITTED'] = { 'severity': 'warning', 'summary': "%d subtrees have overcommitted pool target_size_ratio" % len(too_much_target_ratio), 'count': len(too_much_target_ratio), 'detail': too_much_target_ratio, } too_much_target_bytes = [] for root_id, total in iteritems(total_bytes): total_target = total_target_bytes[root_id] if total_target > 0 and total > root_map[root_id].capacity: too_much_target_bytes.append( 'Pools %s overcommit available storage by %.03fx due to ' 'target_size_bytes %s on pools %s' % (root_map[root_id].pool_names, total / root_map[root_id].capacity, mgr_util.format_bytes(total_target, 5, colored=False), target_bytes_pools[root_id])) elif total_target > root_map[root_id].capacity: too_much_target_bytes.append( 'Pools %s overcommit available storage by %.03fx due to ' 'collective target_size_bytes of %s' % ( root_map[root_id].pool_names, total / root_map[root_id].capacity, mgr_util.format_bytes(total_target, 5, colored=False), )) if too_much_target_bytes: health_checks['POOL_TARGET_SIZE_BYTES_OVERCOMMITTED'] = { 'severity': 'warning', 'summary': "%d subtrees have overcommitted pool target_size_bytes" % len(too_much_target_bytes), 'count': len(too_much_target_bytes), 'detail': too_much_target_bytes, } self.set_health_checks(health_checks)
def handle_fs_status(self, cmd): output = "" fs_filter = cmd.get('fs', None) mds_versions = defaultdict(list) fsmap = self.get("fs_map") for filesystem in fsmap['filesystems']: if fs_filter and filesystem['mdsmap']['fs_name'] != fs_filter: continue rank_table = PrettyTable( ("Rank", "State", "MDS", "Activity", "dns", "inos"), hrules=prettytable.FRAME) mdsmap = filesystem['mdsmap'] client_count = 0 for rank in mdsmap["in"]: up = "mds_{0}".format(rank) in mdsmap["up"] if up: gid = mdsmap['up']["mds_{0}".format(rank)] info = mdsmap['info']['gid_{0}'.format(gid)] dns = self.get_latest("mds", info['name'], "mds_mem.dn") inos = self.get_latest("mds", info['name'], "mds_mem.ino") if rank == 0: client_count = self.get_latest( "mds", info['name'], "mds_sessions.session_count") elif client_count == 0: # In case rank 0 was down, look at another rank's # sessionmap to get an indication of clients. client_count = self.get_latest( "mds", info['name'], "mds_sessions.session_count") laggy = "laggy_since" in info state = info['state'].split(":")[1] if laggy: state += "(laggy)" if state == "active" and not laggy: c_state = mgr_util.colorize(state, mgr_util.GREEN) else: c_state = mgr_util.colorize(state, mgr_util.YELLOW) # Populate based on context of state, e.g. client # ops for an active daemon, replay progress, reconnect # progress activity = "" if state == "active": activity = "Reqs: " + mgr_util.format_dimless( self.get_rate("mds", info['name'], "mds_server.handle_client_request"), 5) + "/s" defaults = defaultdict(lambda: None, {'version': 'unknown'}) metadata = self.get_metadata('mds', info['name'], default=defaults) mds_versions[metadata['ceph_version']].append(info['name']) rank_table.add_row([ mgr_util.bold(rank.__str__()), c_state, info['name'], activity, mgr_util.format_dimless(dns, 5), mgr_util.format_dimless(inos, 5) ]) else: rank_table.add_row([rank, "failed", "", "", "", ""]) # Find the standby replays for gid_str, daemon_info in six.iteritems(mdsmap['info']): if daemon_info['state'] != "up:standby-replay": continue inos = self.get_latest("mds", daemon_info['name'], "mds_mem.ino") dns = self.get_latest("mds", daemon_info['name'], "mds_mem.dn") activity = "Evts: " + mgr_util.format_dimless( self.get_rate("mds", daemon_info['name'], "mds_log.replayed"), 5) + "/s" defaults = defaultdict(lambda: None, {'version': 'unknown'}) metadata = self.get_metadata('mds', daemon_info['name'], default=defaults) mds_versions[metadata['ceph_version']].append( daemon_info['name']) rank_table.add_row([ "{0}-s".format(daemon_info['rank']), "standby-replay", daemon_info['name'], activity, mgr_util.format_dimless(dns, 5), mgr_util.format_dimless(inos, 5) ]) df = self.get("df") pool_stats = dict([(p['id'], p['stats']) for p in df['pools']]) osdmap = self.get("osd_map") pools = dict([(p['pool'], p) for p in osdmap['pools']]) metadata_pool_id = mdsmap['metadata_pool'] data_pool_ids = mdsmap['data_pools'] pools_table = PrettyTable(["Pool", "type", "used", "avail"]) for pool_id in [metadata_pool_id] + data_pool_ids: pool_type = "metadata" if pool_id == metadata_pool_id else "data" stats = pool_stats[pool_id] pools_table.add_row([ pools[pool_id]['pool_name'], pool_type, mgr_util.format_bytes(stats['bytes_used'], 5), mgr_util.format_bytes(stats['max_avail'], 5) ]) output += "{0} - {1} clients\n".format(mdsmap['fs_name'], client_count) output += "=" * len(mdsmap['fs_name']) + "\n" output += rank_table.get_string() output += "\n" + pools_table.get_string() + "\n" if not output and fs_filter is not None: return errno.EINVAL, "", "Invalid filesystem: " + fs_filter standby_table = PrettyTable(["Standby MDS"]) for standby in fsmap['standbys']: defaults = defaultdict(lambda: None, {'version': 'unknown'}) metadata = self.get_metadata('mds', standby['name'], default=defaults) mds_versions[metadata['ceph_version']].append(standby['name']) standby_table.add_row([standby['name']]) output += "\n" + standby_table.get_string() + "\n" if len(mds_versions) == 1: output += "MDS version: {0}".format(list(mds_versions)[0]) else: version_table = PrettyTable(["version", "daemons"]) for version, daemons in six.iteritems(mds_versions): version_table.add_row([version, ", ".join(daemons)]) output += version_table.get_string() + "\n" return 0, output, ""
def handle_fs_status(self, cmd): output = "" fs_filter = cmd.get('fs', None) mds_versions = defaultdict(list) fsmap = self.get("fs_map") for filesystem in fsmap['filesystems']: if fs_filter and filesystem['mdsmap']['fs_name'] != fs_filter: continue rank_table = PrettyTable( ("Rank", "State", "MDS", "Activity", "dns", "inos"), hrules=prettytable.FRAME ) mdsmap = filesystem['mdsmap'] client_count = 0 for rank in mdsmap["in"]: up = "mds_{0}".format(rank) in mdsmap["up"] if up: gid = mdsmap['up']["mds_{0}".format(rank)] info = mdsmap['info']['gid_{0}'.format(gid)] dns = self.get_latest("mds", info['name'], "mds_mem.dn") inos = self.get_latest("mds", info['name'], "mds_mem.ino") if rank == 0: client_count = self.get_latest("mds", info['name'], "mds_sessions.session_count") elif client_count == 0: # In case rank 0 was down, look at another rank's # sessionmap to get an indication of clients. client_count = self.get_latest("mds", info['name'], "mds_sessions.session_count") laggy = "laggy_since" in info state = info['state'].split(":")[1] if laggy: state += "(laggy)" if state == "active" and not laggy: c_state = self.colorize(state, self.GREEN) else: c_state = self.colorize(state, self.YELLOW) # Populate based on context of state, e.g. client # ops for an active daemon, replay progress, reconnect # progress activity = "" if state == "active": activity = "Reqs: " + mgr_util.format_dimless( self.get_rate("mds", info['name'], "mds_server.handle_client_request"), 5 ) + "/s" metadata = self.get_metadata('mds', info['name']) mds_versions[metadata.get('ceph_version', "unknown")].append(info['name']) rank_table.add_row([ self.bold(rank.__str__()), c_state, info['name'], activity, mgr_util.format_dimless(dns, 5), mgr_util.format_dimless(inos, 5) ]) else: rank_table.add_row([ rank, "failed", "", "", "", "" ]) # Find the standby replays for gid_str, daemon_info in six.iteritems(mdsmap['info']): if daemon_info['state'] != "up:standby-replay": continue inos = self.get_latest("mds", daemon_info['name'], "mds_mem.ino") dns = self.get_latest("mds", daemon_info['name'], "mds_mem.dn") activity = "Evts: " + mgr_util.format_dimless( self.get_rate("mds", daemon_info['name'], "mds_log.replayed"), 5 ) + "/s" metadata = self.get_metadata('mds', daemon_info['name']) mds_versions[metadata.get('ceph_version', "unknown")].append(daemon_info['name']) rank_table.add_row([ "{0}-s".format(daemon_info['rank']), "standby-replay", daemon_info['name'], activity, mgr_util.format_dimless(dns, 5), mgr_util.format_dimless(inos, 5) ]) df = self.get("df") pool_stats = dict([(p['id'], p['stats']) for p in df['pools']]) osdmap = self.get("osd_map") pools = dict([(p['pool'], p) for p in osdmap['pools']]) metadata_pool_id = mdsmap['metadata_pool'] data_pool_ids = mdsmap['data_pools'] pools_table = PrettyTable(["Pool", "type", "used", "avail"]) for pool_id in [metadata_pool_id] + data_pool_ids: pool_type = "metadata" if pool_id == metadata_pool_id else "data" stats = pool_stats[pool_id] pools_table.add_row([ pools[pool_id]['pool_name'], pool_type, mgr_util.format_bytes(stats['bytes_used'], 5), mgr_util.format_bytes(stats['max_avail'], 5) ]) output += "{0} - {1} clients\n".format( mdsmap['fs_name'], client_count) output += "=" * len(mdsmap['fs_name']) + "\n" output += rank_table.get_string() output += "\n" + pools_table.get_string() + "\n" if not output and fs_filter is not None: return errno.EINVAL, "", "Invalid filesystem: " + fs_filter standby_table = PrettyTable(["Standby MDS"]) for standby in fsmap['standbys']: metadata = self.get_metadata('mds', standby['name']) mds_versions[metadata.get('ceph_version', "unknown")].append(standby['name']) standby_table.add_row([standby['name']]) output += "\n" + standby_table.get_string() + "\n" if len(mds_versions) == 1: output += "MDS version: {0}".format(list(mds_versions)[0]) else: version_table = PrettyTable(["version", "daemons"]) for version, daemons in six.iteritems(mds_versions): version_table.add_row([ version, ", ".join(daemons) ]) output += version_table.get_string() + "\n" return 0, output, ""