def _osd(self, osd_id): osd_id = int(osd_id) osd_map = global_instance().get("osd_map") osd = None for o in osd_map['osds']: if o['osd'] == osd_id: osd = o break assert osd is not None # TODO 400 osd_spec = "{0}".format(osd_id) osd_metadata = global_instance().get_metadata( "osd", osd_spec) result = CommandResult("") global_instance().send_command(result, "osd", osd_spec, json.dumps({ "prefix": "perf histogram dump", }), "") r, outb, outs = result.wait() assert r == 0 histogram = json.loads(outb) return { "osd": osd, "osd_metadata": osd_metadata, "osd_histogram": histogram }
def handle_command(self, inbuf, cmd): self.log.error("handle_command") if cmd['prefix'] == 'device query-daemon-health-metrics': who = cmd.get('who', '') if who[0:4] != 'osd.': return (-errno.EINVAL, '', 'not a valid <osd.NNN> id') osd_id = who[4:] result = CommandResult('') self.send_command(result, 'osd', osd_id, json.dumps({ 'prefix': 'smart', 'format': 'json', }), '') r, outb, outs = result.wait() return (r, outb, outs) elif cmd['prefix'] == 'device scrape-daemon-health-metrics': who = cmd.get('who', '') if who[0:4] != 'osd.': return (-errno.EINVAL, '', 'not a valid <osd.NNN> id') id = int(who[4:]) return self.scrape_osd(id) elif cmd['prefix'] == 'device scrape-health-metrics': if 'devid' in cmd: return self.scrape_device(cmd['devid']) return self.scrape_all(); elif cmd['prefix'] == 'device show-health-metrics': return self.show_device_metrics(cmd['devid'], cmd.get('sample')) else: # mgr should respect our self.COMMANDS and not call us for # any prefix we don't advertise raise NotImplementedError(cmd['prefix'])
def _open_connection(self, pool_name='device_health_metrics'): pools = self.module.rados.list_pools() is_pool = False for pool in pools: if pool == pool_name: is_pool = True break if not is_pool: self.module.log.debug('create %s pool' % pool_name) # create pool result = CommandResult('') self.module.send_command(result, 'mon', '', json.dumps({ 'prefix': 'osd pool create', 'format': 'json', 'pool': pool_name, 'pg_num': 1, }), '') r, outb, outs = result.wait() assert r == 0 # set pool application result = CommandResult('') self.module.send_command(result, 'mon', '', json.dumps({ 'prefix': 'osd pool application enable', 'format': 'json', 'pool': pool_name, 'app': 'mgr_devicehealth', }), '') r, outb, outs = result.wait() assert r == 0 ioctx = self.module.rados.open_ioctx(pool_name) return ioctx
def get_file_sd_config(self): servers = self.list_servers() targets = [] for server in servers: hostname = server.get('hostname', '') for service in server.get('services', []): if service['type'] != 'mgr': continue id_ = service['id'] # get port for prometheus module at mgr with id_ # TODO use get_config_prefix or get_config here once # https://github.com/ceph/ceph/pull/20458 is merged result = CommandResult("") global_instance().send_command( result, "mon", '', json.dumps({ "prefix": "config-key get", 'key': "config/mgr/mgr/prometheus/{}/server_port".format(id_), }), "") r, outb, outs = result.wait() if r != 0: global_instance().log.error("Failed to retrieve port for mgr {}: {}".format(id_, outs)) targets.append('{}:{}'.format(hostname, DEFAULT_PORT)) else: port = json.loads(outb) targets.append('{}:{}'.format(hostname, port)) ret = [ { "targets": targets, "labels": {} } ] return 0, json.dumps(ret), ""
def reset_device_life_expectancy(self, device_id): result = CommandResult('') self.module.send_command(result, 'mon', '', json.dumps({ 'prefix': 'device rm-life-expectancy', 'devid': device_id }), '') ret, outb, outs = result.wait() if ret != 0: self.module.log.error( 'failed to reset device life expectancy, %s' % outs) return ret
def _get(self): mds_spec = "{0}:0".format(self.fscid) result = CommandResult("") self._module.send_command(result, "mds", mds_spec, json.dumps({ "prefix": "session ls", }), "") r, outb, outs = result.wait() # TODO handle nonzero returns, e.g. when rank isn't active assert r == 0 return json.loads(outb)
def compat_weight_set_reweight(self, osd, new_weight): self.log.debug('ceph osd crush weight-set reweight-compat') result = CommandResult('') self.send_command(result, 'mon', '', json.dumps({ 'prefix': 'osd crush weight-set reweight-compat', 'format': 'json', 'item': 'osd.%d' % osd, 'weight': [new_weight], }), '') r, outb, outs = result.wait() if r != 0: self.log.error('Error setting compat weight-set osd.%d to %f' % (osd, new_weight)) return
def get_compat_weight_set_weights(self, ms): if not CRUSHMap.have_default_choose_args(ms.crush_dump): # enable compat weight-set first self.log.debug('ceph osd crush weight-set create-compat') result = CommandResult('') self.send_command(result, 'mon', '', json.dumps({ 'prefix': 'osd crush weight-set create-compat', 'format': 'json', }), '') r, outb, outs = result.wait() if r != 0: self.log.error('Error creating compat weight-set') return result = CommandResult('') self.send_command(result, 'mon', '', json.dumps({ 'prefix': 'osd crush dump', 'format': 'json', }), '') r, outb, outs = result.wait() if r != 0: self.log.error('Error dumping crush map') return try: crushmap = json.loads(outb) except: raise RuntimeError('unable to parse crush map') else: crushmap = ms.crush_dump raw = CRUSHMap.get_default_choose_args(crushmap) weight_set = {} for b in raw: bucket = None for t in crushmap['buckets']: if t['id'] == b['bucket_id']: bucket = t break if not bucket: raise RuntimeError('could not find bucket %s' % b['bucket_id']) self.log.debug('bucket items %s' % bucket['items']) self.log.debug('weight set %s' % b['weight_set'][0]) if len(bucket['items']) != len(b['weight_set'][0]): raise RuntimeError('weight-set size does not match bucket items') for pos in range(len(bucket['items'])): weight_set[bucket['items'][pos]['id']] = b['weight_set'][0][pos] self.log.debug('weight_set weights %s' % weight_set) return weight_set
def _config_dump(self): """Report cluster configuration This report is the standard `config dump` report. It does not include configuration defaults; these can be inferred from the version number. """ result = CommandResult("") args = dict(prefix = "config dump", format = "json") self.send_command(result, "mon", "", json.dumps(args), "") ret, outb, outs = result.wait() if ret == 0: return json.loads(outb), [] else: self.log.warning("send_command 'config dump' failed. \ ret={}, outs=\"{}\"".format(ret, outs)) return [], ["Failed to read monitor config dump"]
def do_scrape_osd(self, osd_id, ioctx, devid=''): self.log.debug('do_scrape_osd osd.%d' % osd_id) # scrape from osd result = CommandResult('') self.send_command(result, 'osd', str(osd_id), json.dumps({ 'prefix': 'smart', 'format': 'json', 'devid': devid, }), '') r, outb, outs = result.wait() try: return json.loads(outb) except: self.log.debug('Fail to parse JSON result from "%s"' % outb)
def handle_command(self, cmd): self.log.error("handle_command") if cmd['prefix'] == 'osd smart get': result = CommandResult('') self.send_command(result, 'osd', cmd['osd_id'], json.dumps({ 'prefix': 'smart', 'format': 'json', }), '') r, outb, outs = result.wait() return (r, outb, outs) else: # mgr should respect our self.COMMANDS and not call us for # any prefix we don't advertise raise NotImplementedError(cmd['prefix'])
def get(self, svc_id): result = CommandResult('') mgr.send_command(result, 'osd', svc_id, json.dumps({ 'prefix': 'perf histogram dump', }), '') r, outb, outs = result.wait() if r != 0: logger.warning('Failed to load histogram for OSD %s', svc_id) logger.debug(outs) histogram = outs else: histogram = json.loads(outb) return { 'osd_map': self.get_osd_map()[svc_id], 'osd_metadata': mgr.get_metadata('osd', svc_id), 'histogram': histogram, }
def do_scrape_daemon(self, daemon_type, daemon_id, devid=''): """ :return: a dict, or None if the scrape failed. """ self.log.debug('do_scrape_daemon %s.%s' % (daemon_type, daemon_id)) result = CommandResult('') self.send_command(result, daemon_type, daemon_id, json.dumps({ 'prefix': 'smart', 'format': 'json', 'devid': devid, }), '') r, outb, outs = result.wait() try: return json.loads(outb) except (IndexError, ValueError): self.log.error( "Fail to parse JSON result from daemon {0}.{1} ({2})".format( daemon_type, daemon_id, outb))
def handle_command(self, _, cmd): self.log.error("handle_command") if cmd['prefix'] == 'device query-daemon-health-metrics': who = cmd.get('who', '') if not self.is_valid_daemon_name(who): return -errno.EINVAL, '', 'not a valid mon or osd daemon name' (daemon_type, daemon_id) = cmd.get('who', '').split('.') result = CommandResult('') self.send_command(result, daemon_type, daemon_id, json.dumps({ 'prefix': 'smart', 'format': 'json', }), '') r, outb, outs = result.wait() return r, outb, outs elif cmd['prefix'] == 'device scrape-daemon-health-metrics': who = cmd.get('who', '') if not self.is_valid_daemon_name(who): return -errno.EINVAL, '', 'not a valid mon or osd daemon name' (daemon_type, daemon_id) = cmd.get('who', '').split('.') return self.scrape_daemon(daemon_type, daemon_id) elif cmd['prefix'] == 'device scrape-health-metrics': if 'devid' in cmd: return self.scrape_device(cmd['devid']) return self.scrape_all() elif cmd['prefix'] == 'device get-health-metrics': return self.show_device_metrics(cmd['devid'], cmd.get('sample')) elif cmd['prefix'] == 'device check-health': return self.check_health() elif cmd['prefix'] == 'device monitoring on': self.set_module_option('enable_monitoring', True) self.event.set() return 0, '', '' elif cmd['prefix'] == 'device monitoring off': self.set_module_option('enable_monitoring', False) self.set_health_checks({}) # avoid stuck health alerts return 0, '', '' elif cmd['prefix'] == 'device predict-life-expectancy': return self.predict_lift_expectancy(cmd['devid']) else: # mgr should respect our self.COMMANDS and not call us for # any prefix we don't advertise raise NotImplementedError(cmd['prefix'])
def run(self, commands): """ A static method that will execute the given list of commands in parallel and will return the list of command results. """ # Gather the results (in parallel) results = [] for index in range(len(commands)): tag = '%s:%d' % (str(self.id), index) # Store the result result = CommandResult(tag) result.command = common.humanify_command(commands[index]) results.append(result) # Run the command context.instance.send_command(result, 'mon', '', json.dumps(commands[index]), tag) return results
def get_object_pg_info(self, pool_name, object_name): result = CommandResult('') data_jaon = {} self.module.send_command( result, 'mon', '', json.dumps({ 'prefix': 'osd map', 'format': 'json', 'pool': pool_name, 'object': object_name, }), '') ret, outb, outs = result.wait() try: if outb: data_jaon = json.loads(outb) else: self.module.log.error('unable to get %s pg info' % pool_name) except Exception as e: self.module.log.error( 'unable to get %s pg, error: %s' % (pool_name, str(e))) return data_jaon
def handle_command(self, _, cmd): self.log.error("handle_command") if cmd['prefix'] == 'device query-daemon-health-metrics': who = cmd.get('who', '') if who[0:4] != 'osd.': return -errno.EINVAL, '', 'not a valid <osd.NNN> id' osd_id = who[4:] result = CommandResult('') self.send_command(result, 'osd', osd_id, json.dumps({ 'prefix': 'smart', 'format': 'json', }), '') r, outb, outs = result.wait() return r, outb, outs elif cmd['prefix'] == 'device scrape-daemon-health-metrics': who = cmd.get('who', '') if who[0:4] != 'osd.': return -errno.EINVAL, '', 'not a valid <osd.NNN> id' osd_id = int(who[4:]) return self.scrape_osd(osd_id) elif cmd['prefix'] == 'device scrape-health-metrics': if 'devid' in cmd: return self.scrape_device(cmd['devid']) return self.scrape_all() elif cmd['prefix'] == 'device get-health-metrics': return self.show_device_metrics(cmd['devid'], cmd.get('sample')) elif cmd['prefix'] == 'device check-health': return self.check_health() elif cmd['prefix'] == 'device monitoring on': self.set_config('enable_monitoring', 'true') self.event.set() return 0, '', '' elif cmd['prefix'] == 'device monitoring off': self.set_config('enable_monitoring', 'false') self.set_health_checks({}) # avoid stuck health alerts return 0, '', '' else: # mgr should respect our self.COMMANDS and not call us for # any prefix we don't advertise raise NotImplementedError(cmd['prefix'])
def load_buffer(buf, channel_name): result = CommandResult("") self.send_command(result, "mon", "", json.dumps({ "prefix": "log last", "format": "json", "channel": channel_name, "num": LOG_BUFFER_SIZE }), "") r, outb, outs = result.wait() if r != 0: # Oh well. We won't let this stop us though. self.log.error("Error fetching log history (r={0}, \"{1}\")".format( r, outs)) else: try: lines = json.loads(outb) except ValueError: self.log.error("Error decoding log history") else: for l in lines: buf.appendleft(l)
def _command_spam(self): self.log.info("Starting command_spam workload...") while not self._event.is_set(): osdmap = self.get_osdmap() dump = osdmap.dump() count = len(dump['osds']) i = int(random.random() * count) w = random.random() result = CommandResult('') self.send_command(result, 'mon', '', json.dumps({ 'prefix': 'osd reweight', 'id': i, 'weight': w }), '') crush = osdmap.get_crush().dump() r, outb, outs = result.wait() self._event.clear() self.log.info("Ended command_spam workload...")
def set_device_life_expectancy(self, device_id, from_date, to_date=None): result = CommandResult('') if to_date is None: self.module.send_command(result, 'mon', '', json.dumps({ 'prefix': 'device set-life-expectancy', 'devid': device_id, 'from': from_date }), '') else: self.module.send_command(result, 'mon', '', json.dumps({ 'prefix': 'device set-life-expectancy', 'devid': device_id, 'from': from_date, 'to': to_date }), '') ret, outb, outs = result.wait() if ret != 0: self.module.log.error( 'failed to set device life expectancy, %s' % outs) return ret
def do_scrape_osd(self, osd_id, devid=''): """ :return: a dict, or None if the scrape failed. """ self.log.debug('do_scrape_osd osd.%d' % osd_id) # scrape from osd result = CommandResult('') self.send_command(result, 'osd', str(osd_id), json.dumps({ 'prefix': 'smart', 'format': 'json', 'devid': devid, }), '') r, outb, outs = result.wait() try: return json.loads(outb) except (IndexError, ValueError): self.log.error( "Fail to parse JSON result from OSD {0} ({1})".format( osd_id, outb))
def open_connection(self, create_if_missing=True): pools = self.rados.list_pools() is_pool = False for pool in pools: if pool == self.pool_name: is_pool = True break if not is_pool: if not create_if_missing: return None self.log.debug('create %s pool' % self.pool_name) # create pool result = CommandResult('') self.send_command(result, 'mon', '', json.dumps({ 'prefix': 'osd pool create', 'format': 'json', 'pool': self.pool_name, 'pg_num': 1, }), '') r, outb, outs = result.wait() assert r == 0 # set pool application result = CommandResult('') self.send_command(result, 'mon', '', json.dumps({ 'prefix': 'osd pool application enable', 'format': 'json', 'pool': self.pool_name, 'app': 'mgr_devicehealth', }), '') r, outb, outs = result.wait() assert r == 0 ioctx = self.rados.open_ioctx(self.pool_name) return ioctx
def mark_out_etc(self, osd_ids: List[str]) -> None: self.log.info('Marking out OSDs: %s' % osd_ids) result = CommandResult('') self.send_command(result, 'mon', '', json.dumps({ 'prefix': 'osd out', 'format': 'json', 'ids': osd_ids, }), '') r, outb, outs = result.wait() if r != 0: self.log.warning('Could not mark OSD %s out. r: [%s], outb: [%s], outs: [%s]', osd_ids, r, outb, outs) for osd_id in osd_ids: result = CommandResult('') self.send_command(result, 'mon', '', json.dumps({ 'prefix': 'osd primary-affinity', 'format': 'json', 'id': int(osd_id), 'weight': 0.0, }), '') r, outb, outs = result.wait() if r != 0: self.log.warning('Could not set osd.%s primary-affinity, ' 'r: [%s], outb: [%s], outs: [%s]', osd_id, r, outb, outs)
def create_device_pool(self): self.log.debug('create %s pool' % self.pool_name) # create pool result = CommandResult('') self.send_command( result, 'mon', '', json.dumps({ 'prefix': 'osd pool create', 'format': 'json', 'pool': self.pool_name, 'pg_num': 1, 'pg_num_min': 1, }), '') r, outb, outs = result.wait() assert r == 0 # set pool application result = CommandResult('') self.send_command( result, 'mon', '', json.dumps({ 'prefix': 'osd pool application enable', 'format': 'json', 'pool': self.pool_name, 'app': 'mgr_devicehealth', }), '') r, outb, outs = result.wait() assert r == 0
def mark_out_etc(self, osd_ids): self.log.info('Marking out OSDs: %s' % osd_ids) result = CommandResult('') self.send_command(result, 'mon', '', json.dumps({ 'prefix': 'osd out', 'format': 'json', 'ids': osd_ids, }), '') r, outb, outs = result.wait() if r != 0: self.log.warn('Could not mark OSD %s out. r: [%s], outb: [%s], outs: [%s]' % (osd_ids, r, outb, outs)) for osd_id in osd_ids: result = CommandResult('') self.send_command(result, 'mon', '', json.dumps({ 'prefix': 'osd primary-affinity', 'format': 'json', 'id': int(osd_id), 'weight': 0.0, }), '') r, outb, outs = result.wait() if r != 0: self.log.warn('Could not set osd.%s primary-affinity, r: [%s], outs: [%s]' % (osd_id, r, outb, outs))
def run(self): cmd = self._commands[0] self._commands = self._commands[1:] self.result = CommandResult(self._tag) log.debug("cmd={0}".format(cmd)) # Commands come in as 2-tuple of args and prefix, convert them # to the form that send_command uses command = cmd[1] command['prefix'] = cmd[0] rest_plugin().send_command(self.result, json.dumps(command), self._tag)
def send_command(srv_type, prefix, srv_spec='', **kwargs): # type: (str, str, Optional[str], Any) -> Any """ :type prefix: str :param srv_type: mon | :param kwargs: will be added to argdict :param srv_spec: typically empty. or something like "<fs_id>:0" :raises PermissionError: See rados.make_ex :raises ObjectNotFound: See rados.make_ex :raises IOError: See rados.make_ex :raises NoSpace: See rados.make_ex :raises ObjectExists: See rados.make_ex :raises ObjectBusy: See rados.make_ex :raises NoData: See rados.make_ex :raises InterruptedOrTimeoutError: See rados.make_ex :raises TimedOut: See rados.make_ex :raises ValueError: return code != 0 """ argdict = { "prefix": prefix, "format": "json", } argdict.update({k: v for k, v in kwargs.items() if v is not None}) result = CommandResult("") mgr.send_command(result, srv_type, srv_spec, json.dumps(argdict), "") r, outb, outs = result.wait() if r != 0: logger.error( "send_command '%s' failed. (r=%s, outs=\"%s\", kwargs=%s)", prefix, r, outs, kwargs) raise SendCommandError(outs, prefix, argdict, r) try: return json.loads(outb or outs) except Exception: # pylint: disable=broad-except return outb
def load_buffer(self, buf, channel_name): result = CommandResult("") mgr.send_command( result, "mon", "", json.dumps({ "prefix": "log last", "format": "json", "channel": channel_name, "num": LOG_BUFFER_SIZE }), "") r, outb, outs = result.wait() if r != 0: # Oh well. We won't let this stop us though. self.log.error( "Error fetching log history (r={0}, \"{1}\")".format(r, outs)) else: try: lines = json.loads(outb) except ValueError: self.log.error("Error decoding log history") else: for l in lines: buf.appendleft(l)
def do_scrape_daemon(self, daemon_type: str, daemon_id: str, devid: str = '') -> Optional[Dict[str, Any]]: """ :return: a dict, or None if the scrape failed. """ self.log.debug('do_scrape_daemon %s.%s' % (daemon_type, daemon_id)) result = CommandResult('') self.send_command(result, daemon_type, daemon_id, json.dumps({ 'prefix': 'smart', 'format': 'json', 'devid': devid, }), '') r, outb, outs = result.wait() try: return json.loads(outb) except (IndexError, ValueError): self.log.error( "Fail to parse JSON result from daemon {0}.{1} ({2})".format( daemon_type, daemon_id, outb)) return None
def do_scrape_osd(self, osd_id, devid=''): """ :return: a dict, or None if the scrape failed. """ self.log.debug('do_scrape_osd osd.%d' % osd_id) # scrape from osd result = CommandResult('') self.send_command( result, 'osd', str(osd_id), json.dumps({ 'prefix': 'smart', 'format': 'json', 'devid': devid, }), '') r, outb, outs = result.wait() try: return json.loads(outb) except (IndexError, ValueError): self.log.error( "Fail to parse JSON result from OSD {0} ({1})".format( osd_id, outb))
def _command_spam(self): self.log.info("Starting command_spam workload...") while not self._event.is_set(): osdmap = self.get_osdmap() dump = osdmap.dump() count = len(dump['osds']) i = int(random.random() * count) w = random.random() result = CommandResult('') self.send_command( result, 'mon', '', json.dumps({ 'prefix': 'osd reweight', 'id': i, 'weight': w }), '') crush = osdmap.get_crush().dump() r, outb, outs = result.wait() self._event.clear() self.log.info("Ended command_spam workload...")
def _osd(self, osd_id): osd_map = global_instance().get("osd_map") osd = None for o in osd_map['osds']: if o['osd'] == osd_id: osd = o break if osd is None: raise cherrypy.HTTPError(404, "No OSD with id {0}".format(osd_id)) osd_spec = "{0}".format(osd_id) osd_metadata = global_instance().get_metadata( "osd", osd_spec) result = CommandResult("") global_instance().send_command(result, "osd", osd_spec, json.dumps({ "prefix": "perf histogram dump", }), "") r, outb, outs = result.wait() if r != 0: histogram = None global_instance().log.error("Failed to load histogram for OSD {}".format(osd_id)) else: histogram = json.loads(outb) # TODO(chang liu): use to_sorted_array to simpify javascript code return { "osd": osd, "osd_metadata": osd_metadata, "osd_histogram": histogram, "url_perf": "/perf_counters/osd/" + str(osd_id) }
def send_command(cls, srv_type, prefix, srv_spec='', **kwargs): """ :type prefix: str :param srv_type: mon | :param kwargs: will be added to argdict :param srv_spec: typically empty. or something like "<fs_id>:0" :raises PermissionError: See rados.make_ex :raises ObjectNotFound: See rados.make_ex :raises IOError: See rados.make_ex :raises NoSpace: See rados.make_ex :raises ObjectExists: See rados.make_ex :raises ObjectBusy: See rados.make_ex :raises NoData: See rados.make_ex :raises InterruptedOrTimeoutError: See rados.make_ex :raises TimedOut: See rados.make_ex :raises ValueError: return code != 0 """ argdict = { "prefix": prefix, "format": "json", } argdict.update({k: v for k, v in kwargs.items() if v}) result = CommandResult("") mgr.send_command(result, srv_type, srv_spec, json.dumps(argdict), "") r, outb, outs = result.wait() if r != 0: msg = "send_command '{}' failed. (r={}, outs=\"{}\", kwargs={})".format(prefix, r, outs, kwargs) logger.error(msg) raise SendCommandError(outs, prefix, argdict, r) else: try: return json.loads(outb) except Exception: # pylint: disable=broad-except return outb
def send_command(cls, srv_type, prefix, srv_spec='', **kwargs): """ :type prefix: str :param srv_type: mon | :param kwargs: will be added to argdict :param srv_spec: typically empty. or something like "<fs_id>:0" :raises PermissionError: See rados.make_ex :raises ObjectNotFound: See rados.make_ex :raises IOError: See rados.make_ex :raises NoSpace: See rados.make_ex :raises ObjectExists: See rados.make_ex :raises ObjectBusy: See rados.make_ex :raises NoData: See rados.make_ex :raises InterruptedOrTimeoutError: See rados.make_ex :raises TimedOut: See rados.make_ex :raises ValueError: return code != 0 """ argdict = { "prefix": prefix, "format": "json", } argdict.update({k: v for k, v in kwargs.items() if v}) result = CommandResult("") mgr.send_command(result, srv_type, srv_spec, json.dumps(argdict), "") r, outb, outs = result.wait() if r != 0: msg = "send_command '{}' failed. (r={}, outs=\"{}\", kwargs={})".format( prefix, r, outs, kwargs) logger.error(msg) raise ValueError(msg) else: try: return json.loads(outb) except Exception: # pylint: disable=broad-except return outb
def handle_osd_map(self): """ Check pools on each OSDMap change """ subtree_type = self.get_config('subtree') or 'rack' failure_domain = self.get_config('failure_domain') or 'host' pg_num = self.get_config('pg_num') or '128' num_rep = self.get_config('num_rep') or '2' prefix = self.get_config('prefix') or 'by-' + subtree_type + '-' osdmap = self.get("osd_map") lpools = [] for pool in osdmap['pools']: if pool['pool_name'].find(prefix) == 0: lpools.append(pool['pool_name']) self.log.debug('localized pools = %s', lpools) subtrees = [] tree = self.get('osd_map_tree') for node in tree['nodes']: if node['type'] == subtree_type: subtrees.append(node['name']) pool_name = prefix + node['name'] if pool_name not in lpools: self.log.info('Creating localized pool %s', pool_name) # result = CommandResult("") self.send_command(result, "mon", "", json.dumps({ "prefix": "osd crush rule create-replicated", "format": "json", "name": pool_name, "root": node['name'], "type": failure_domain, }), "") r, outb, outs = result.wait() result = CommandResult("") self.send_command(result, "mon", "", json.dumps({ "prefix": "osd pool create", "format": "json", "pool": pool_name, 'rule': pool_name, "pool_type": 'replicated', 'pg_num': str(pg_num), }), "") r, outb, outs = result.wait() result = CommandResult("") self.send_command(result, "mon", "", json.dumps({ "prefix": "osd pool set", "format": "json", "pool": pool_name, 'var': 'size', "val": str(num_rep), }), "") r, outb, outs = result.wait()
def _osd(self, osd_id): osd_map = global_instance().get("osd_map") osd = None for o in osd_map['osds']: if o['osd'] == osd_id: osd = o break if osd is None: raise cherrypy.HTTPError(404, "No OSD with id {0}".format(osd_id)) osd_spec = "{0}".format(osd_id) osd_metadata = global_instance().get_metadata( "osd", osd_spec) result = CommandResult("") global_instance().send_command(result, "osd", osd_spec, json.dumps({ "prefix": "perf histogram dump", }), "") r, outb, outs = result.wait() if r != 0: histogram = None global_instance().log.error("Failed to load histogram for OSD {}".format(osd_id)) else: histogram = json.loads(outb) return { "osd": osd, "osd_metadata": osd_metadata, "osd_histogram": histogram }
def _set_device_life_expectancy(self, device_id: str, from_date: str, to_date: Optional[str] = None) -> int: result = CommandResult('') if to_date is None: self.send_command(result, 'mon', '', json.dumps({ 'prefix': 'device set-life-expectancy', 'devid': device_id, 'from': from_date }), '') else: self.send_command(result, 'mon', '', json.dumps({ 'prefix': 'device set-life-expectancy', 'devid': device_id, 'from': from_date, 'to': to_date }), '') ret, _, outs = result.wait() if ret != 0: self.log.error( 'failed to set device life expectancy, %s' % outs) return ret
def get_file_sd_config(self): servers = self.list_servers() targets = [] for server in servers: hostname = server.get('hostname', '') for service in server.get('services', []): if service['type'] != 'mgr': continue id_ = service['id'] # get port for prometheus module at mgr with id_ # TODO use get_config_prefix or get_config here once # https://github.com/ceph/ceph/pull/20458 is merged result = CommandResult("") assert isinstance(_global_instance, Module) _global_instance.send_command( result, "mon", '', json.dumps({ "prefix": "config-key get", 'key': "config/mgr/mgr/prometheus/{}/server_port".format(id_), }), "") r, outb, outs = result.wait() if r != 0: _global_instance.log.error("Failed to retrieve port for mgr {}: {}".format(id_, outs)) targets.append('{}:{}'.format(hostname, DEFAULT_PORT)) else: port = json.loads(outb) targets.append('{}:{}'.format(hostname, port)) ret = [ { "targets": targets, "labels": {} } ] return 0, json.dumps(ret), ""
def get_compat_weight_set_weights(self, ms): if '-1' not in ms.crush_dump.get('choose_args', {}): # enable compat weight-set first self.log.debug('ceph osd crush weight-set create-compat') result = CommandResult('') self.send_command( result, 'mon', '', json.dumps({ 'prefix': 'osd crush weight-set create-compat', 'format': 'json', }), '') r, outb, outs = result.wait() if r != 0: self.log.error('Error creating compat weight-set') return result = CommandResult('') self.send_command( result, 'mon', '', json.dumps({ 'prefix': 'osd crush dump', 'format': 'json', }), '') r, outb, outs = result.wait() if r != 0: self.log.error('Error dumping crush map') return try: crushmap = json.loads(outb) except: raise RuntimeError('unable to parse crush map') else: crushmap = ms.crush_dump raw = crushmap.get('choose_args', {}).get('-1', []) weight_set = {} for b in raw: bucket = None for t in crushmap['buckets']: if t['id'] == b['bucket_id']: bucket = t break if not bucket: raise RuntimeError('could not find bucket %s' % b['bucket_id']) self.log.debug('bucket items %s' % bucket['items']) self.log.debug('weight set %s' % b['weight_set'][0]) if len(bucket['items']) != len(b['weight_set'][0]): raise RuntimeError( 'weight-set size does not match bucket items') for pos in range(len(bucket['items'])): weight_set[bucket['items'][pos] ['id']] = b['weight_set'][0][pos] self.log.debug('weight_set weights %s' % weight_set) return weight_set
def update_client_meta(self, rank_set): new_updates = {} pending_updates = [v[0] for v in self.client_metadata['in_progress'].values()] with self.meta_lock: for rank in rank_set: if rank in pending_updates: continue tag = str(uuid.uuid4()) result = CommandResult(tag) new_updates[tag] = (rank, result) self.client_metadata['in_progress'].update(new_updates) self.log.debug("updating client metadata from {0}".format(new_updates)) cmd_dict = {'prefix': 'client ls'} for tag,val in new_updates.items(): self.module.send_command(val[1], "mds", str(val[0]), json.dumps(cmd_dict), tag)
def update_client_meta(self): new_updates = {} pending_updates = [ v[0] for v in self.client_metadata['in_progress'].values() ] with self.meta_lock: fsmap = self.module.get('fs_map') for fs in fsmap['filesystems']: mdsmap = fs['mdsmap'] gid = mdsmap['up']["mds_0"] if gid in pending_updates: continue tag = str(uuid.uuid4()) result = CommandResult(tag) new_updates[tag] = (gid, result) self.client_metadata['in_progress'].update(new_updates) self.log.debug(f"updating client metadata from {new_updates}") cmd_dict = {'prefix': 'client ls'} for tag, val in new_updates.items(): self.module.send_command(val[1], "mds", str(val[0]), json.dumps(cmd_dict), tag)
class RadosCommands(object): def __init__(self, tag, commands): self.result = None self._tag = tag self._commands = commands self.r = None self.outs = None self.outb = None def run(self): cmd = self._commands[0] self._commands = self._commands[1:] self.result = CommandResult(self._tag) log.debug("cmd={0}".format(cmd)) # Commands come in as 2-tuple of args and prefix, convert them # to the form that send_command uses command = cmd[1] command['prefix'] = cmd[0] rest_plugin().send_command(self.result, json.dumps(command), self._tag) def is_complete(self): return self.result is None and not self._commands def advance(self): self.r, self.outb, self.outs = self.result.wait() self.result = None if self.r == 0: if self._commands: self.run() else: # Stop on errors self._commands = []
def _open_connection(self, pool_name='device_health_metrics'): pools = self.module.rados.list_pools() is_pool = False for pool in pools: if pool == pool_name: is_pool = True break if not is_pool: self.module.log.debug('create %s pool' % pool_name) # create pool result = CommandResult('') self.module.send_command( result, 'mon', '', json.dumps({ 'prefix': 'osd pool create', 'format': 'json', 'pool': pool_name, 'pg_num': 1, }), '') r, outb, outs = result.wait() assert r == 0 # set pool application result = CommandResult('') self.module.send_command( result, 'mon', '', json.dumps({ 'prefix': 'osd pool application enable', 'format': 'json', 'pool': pool_name, 'app': 'mgr_devicehealth', }), '') r, outb, outs = result.wait() assert r == 0 ioctx = self.module.rados.open_ioctx(pool_name) return ioctx
def serve(self): current_dir = os.path.dirname(os.path.abspath(__file__)) jinja_loader = jinja2.FileSystemLoader(current_dir) env = jinja2.Environment(loader=jinja_loader) result = CommandResult("") self.send_command(result, "mon", "", json.dumps({ "prefix": "log last", "format": "json" }), "") r, outb, outs = result.wait() if r != 0: # Oh well. We won't let this stop us though. self.log.error( "Error fetching log history (r={0}, \"{1}\")".format(r, outs)) else: try: lines = json.loads(outb) except ValueError: self.log.error("Error decoding log history") else: for l in lines: if l['channel'] == 'audit': self.audit_buffer.appendleft(l) else: self.log_buffer.appendleft(l) self.log_primed = True class Root(object): def _toplevel_data(self): """ Data consumed by the base.html template """ status, data = global_instance().rbd_pool_ls.get() if data is None: log.warning("Failed to get RBD pool list") data = [] rbd_pools = sorted([{ "name": name, "url": "/rbd/{0}/".format(name) } for name in data], key=lambda k: k['name']) fsmap = global_instance().get_sync_object(FsMap) filesystems = [{ "id": f['id'], "name": f['mdsmap']['fs_name'], "url": "/filesystem/{0}/".format(f['id']) } for f in fsmap.data['filesystems']] return { 'health': global_instance().get_sync_object(Health).data, 'rbd_pools': rbd_pools, 'filesystems': filesystems } @cherrypy.expose def filesystem(self, fs_id): template = env.get_template("filesystem.html") toplevel_data = self._toplevel_data() content_data = { "fs_status": global_instance().fs_status(int(fs_id)) } return template.render(ceph_version=global_instance().version, toplevel_data=json.dumps(toplevel_data, indent=2), content_data=json.dumps(content_data, indent=2)) @cherrypy.expose @cherrypy.tools.json_out() def filesystem_data(self, fs_id): return global_instance().fs_status(int(fs_id)) def _osd(self, osd_id): #global_instance().fs_status(int(fs_id)) osd_id = int(osd_id) osd_map = global_instance().get("osd_map") osd = None for o in osd_map['osds']: if o['osd'] == osd_id: osd = o break assert osd is not None # TODO 400 osd_spec = "{0}".format(osd_id) osd_metadata = global_instance().get_metadata("osd", osd_spec) result = CommandResult("") global_instance().send_command( result, "osd", osd_spec, json.dumps({ "prefix": "perf histogram dump", }), "") r, outb, outs = result.wait() assert r == 0 histogram = json.loads(outb) return { "osd": osd, "osd_metadata": osd_metadata, "osd_histogram": histogram } @cherrypy.expose def osd_perf(self, osd_id): template = env.get_template("osd_perf.html") toplevel_data = self._toplevel_data() return template.render(ceph_version=global_instance().version, toplevel_data=json.dumps(toplevel_data, indent=2), content_data=json.dumps( self._osd(osd_id), indent=2)) @cherrypy.expose @cherrypy.tools.json_out() def osd_perf_data(self, osd_id): return self._osd(osd_id) def _clients(self, fs_id): cephfs_clients = global_instance().cephfs_clients.get( fs_id, None) if cephfs_clients is None: cephfs_clients = CephFSClients(global_instance(), fs_id) global_instance().cephfs_clients[fs_id] = cephfs_clients status, clients = cephfs_clients.get() #TODO do something sensible with status # Decorate the metadata with some fields that will be # indepdendent of whether it's a kernel or userspace # client, so that the javascript doesn't have to grok that. for client in clients: if "ceph_version" in client['client_metadata']: client['type'] = "userspace" client['version'] = client['client_metadata'][ 'ceph_version'] client['hostname'] = client['client_metadata'][ 'hostname'] elif "kernel_version" in client['client_metadata']: client['type'] = "kernel" client['version'] = client['client_metadata'][ 'kernel_version'] client['hostname'] = client['client_metadata'][ 'hostname'] else: client['type'] = "unknown" client['version'] = "" client['hostname'] = "" return clients @cherrypy.expose def clients(self, fs_id): template = env.get_template("clients.html") toplevel_data = self._toplevel_data() clients = self._clients(int(fs_id)) global_instance().log.debug(json.dumps(clients, indent=2)) content_data = {"clients": clients, "fscid": fs_id} return template.render(ceph_version=global_instance().version, toplevel_data=json.dumps(toplevel_data, indent=2), content_data=json.dumps(content_data, indent=2)) @cherrypy.expose @cherrypy.tools.json_out() def clients_data(self, fs_id): return self._clients(int(fs_id)) def _rbd(self, pool_name): rbd_ls = global_instance().rbd_ls.get(pool_name, None) if rbd_ls is None: rbd_ls = RbdLs(global_instance(), pool_name) global_instance().rbd_ls[pool_name] = rbd_ls status, value = rbd_ls.get() interval = 5 wait = interval - rbd_ls.latency def wait_and_load(): time.sleep(wait) rbd_ls.get() threading.Thread(target=wait_and_load).start() assert status != RbdLs.VALUE_NONE # FIXME bubble status up to UI return value @cherrypy.expose def rbd(self, pool_name): template = env.get_template("rbd.html") toplevel_data = self._toplevel_data() images = self._rbd(pool_name) content_data = {"images": images, "pool_name": pool_name} return template.render(ceph_version=global_instance().version, toplevel_data=json.dumps(toplevel_data, indent=2), content_data=json.dumps(content_data, indent=2)) @cherrypy.expose @cherrypy.tools.json_out() def rbd_data(self, pool_name): return self._rbd(pool_name) @cherrypy.expose def health(self): template = env.get_template("health.html") return template.render(ceph_version=global_instance().version, toplevel_data=json.dumps( self._toplevel_data(), indent=2), content_data=json.dumps(self._health(), indent=2)) @cherrypy.expose def servers(self): template = env.get_template("servers.html") return template.render(ceph_version=global_instance().version, toplevel_data=json.dumps( self._toplevel_data(), indent=2), content_data=json.dumps(self._servers(), indent=2)) def _servers(self): servers = global_instance().list_servers() return {'servers': global_instance().list_servers()} @cherrypy.expose @cherrypy.tools.json_out() def servers_data(self): return self._servers() def _health(self): # Fuse osdmap with pg_summary to get description of pools # including their PG states osd_map = global_instance().get_sync_object(OsdMap).data pg_summary = global_instance().get_sync_object(PgSummary).data pools = [] if len(global_instance().pool_stats) == 0: global_instance().update_pool_stats() for pool in osd_map['pools']: pool['pg_status'] = pg_summary['by_pool'][ pool['pool'].__str__()] stats = global_instance().pool_stats[pool['pool']] s = {} def get_rate(series): if len(series) >= 2: return (float(series[0][1]) - float( series[1][1])) / (float(series[0][0]) - float(series[1][0])) else: return 0 for stat_name, stat_series in stats.items(): s[stat_name] = { 'latest': stat_series[0][1], 'rate': get_rate(stat_series), 'series': [i for i in stat_series] } pool['stats'] = s pools.append(pool) # Not needed, skip the effort of transmitting this # to UI del osd_map['pg_temp'] return { "health": global_instance().get_sync_object(Health).data, "mon_status": global_instance().get_sync_object(MonStatus).data, "osd_map": osd_map, "clog": list(global_instance().log_buffer), "audit_log": list(global_instance().audit_buffer), "pools": pools } @cherrypy.expose @cherrypy.tools.json_out() def health_data(self): return self._health() @cherrypy.expose def index(self): return self.health() @cherrypy.expose @cherrypy.tools.json_out() def toplevel_data(self): return self._toplevel_data() def _get_mds_names(self, filesystem_id=None): names = [] fsmap = global_instance().get("fs_map") for fs in fsmap['filesystems']: if filesystem_id is not None and fs['id'] != filesystem_id: continue names.extend([ info['name'] for _, info in fs['mdsmap']['info'].items() ]) if filesystem_id is None: names.extend(info['name'] for info in fsmap['standbys']) return names @cherrypy.expose @cherrypy.tools.json_out() def mds_counters(self, fs_id): """ Result format: map of daemon name to map of counter to list of datapoints """ # Opinionated list of interesting performance counters for the GUI -- # if you need something else just add it. See how simple life is # when you don't have to write general purpose APIs? counters = [ "mds_server.handle_client_request", "mds_log.ev", "mds_cache.num_strays", "mds.exported", "mds.exported_inodes", "mds.imported", "mds.imported_inodes", "mds.inodes", "mds.caps", "mds.subtrees" ] result = {} mds_names = self._get_mds_names(int(fs_id)) for mds_name in mds_names: result[mds_name] = {} for counter in counters: data = global_instance().get_counter( "mds", mds_name, counter) if data is not None: result[mds_name][counter] = data[counter] else: result[mds_name][counter] = [] return dict(result) server_addr = self.get_localized_config('server_addr') server_port = self.get_localized_config('server_port') or '7000' if server_addr is None: raise RuntimeError( 'no server_addr configured; try "ceph config-key put mgr/dashboard/server_addr <ip>"' ) log.info("server_addr: %s server_port: %s" % (server_addr, server_port)) cherrypy.config.update({ 'server.socket_host': server_addr, 'server.socket_port': int(server_port), 'engine.autoreload.on': False }) static_dir = os.path.join(current_dir, 'static') conf = { "/static": { "tools.staticdir.on": True, 'tools.staticdir.dir': static_dir } } log.info("Serving static from {0}".format(static_dir)) cherrypy.tree.mount(Root(), "/", conf) log.info("Starting engine...") cherrypy.engine.start() log.info("Waiting for engine...") cherrypy.engine.block() log.info("Engine done.")
def serve(self): current_dir = os.path.dirname(os.path.abspath(__file__)) jinja_loader = jinja2.FileSystemLoader(current_dir) env = jinja2.Environment(loader=jinja_loader) result = CommandResult("") self.send_command(result, "mon", "", json.dumps({ "prefix":"log last", "format": "json" }), "") r, outb, outs = result.wait() if r != 0: # Oh well. We won't let this stop us though. self.log.error("Error fetching log history (r={0}, \"{1}\")".format( r, outs)) else: try: lines = json.loads(outb) except ValueError: self.log.error("Error decoding log history") else: for l in lines: if l['channel'] == 'audit': self.audit_buffer.appendleft(l) else: self.log_buffer.appendleft(l) self.log_primed = True class EndPoint(object): def _health_data(self): health = global_instance().get_sync_object(Health).data # Transform the `checks` dict into a list for the convenience # of rendering from javascript. checks = [] for k, v in health['checks'].iteritems(): v['type'] = k checks.append(v) checks = sorted(checks, cmp=lambda a, b: a['severity'] > b['severity']) health['checks'] = checks return health def _toplevel_data(self): """ Data consumed by the base.html template """ status, data = global_instance().rbd_pool_ls.get() if data is None: log.warning("Failed to get RBD pool list") data = [] rbd_pools = sorted([ { "name": name, "url": get_prefixed_url("/rbd_pool/{0}/".format(name)) } for name in data ], key=lambda k: k['name']) status, rbd_mirroring = global_instance().rbd_mirroring.toplevel.get() if rbd_mirroring is None: log.warning("Failed to get RBD mirroring summary") rbd_mirroring = {} fsmap = global_instance().get_sync_object(FsMap) filesystems = [ { "id": f['id'], "name": f['mdsmap']['fs_name'], "url": get_prefixed_url("/filesystem/{0}/".format(f['id'])) } for f in fsmap.data['filesystems'] ] return { 'rbd_pools': rbd_pools, 'rbd_mirroring': rbd_mirroring, 'health_status': self._health_data()['status'], 'filesystems': filesystems } class Root(EndPoint): @cherrypy.expose def filesystem(self, fs_id): template = env.get_template("filesystem.html") toplevel_data = self._toplevel_data() content_data = { "fs_status": global_instance().fs_status(int(fs_id)) } return template.render( url_prefix = global_instance().url_prefix, ceph_version=global_instance().version, path_info=cherrypy.request.path_info, toplevel_data=json.dumps(toplevel_data, indent=2), content_data=json.dumps(content_data, indent=2) ) @cherrypy.expose @cherrypy.tools.json_out() def filesystem_data(self, fs_id): return global_instance().fs_status(int(fs_id)) def _clients(self, fs_id): cephfs_clients = global_instance().cephfs_clients.get(fs_id, None) if cephfs_clients is None: cephfs_clients = CephFSClients(global_instance(), fs_id) global_instance().cephfs_clients[fs_id] = cephfs_clients status, clients = cephfs_clients.get() #TODO do something sensible with status # Decorate the metadata with some fields that will be # indepdendent of whether it's a kernel or userspace # client, so that the javascript doesn't have to grok that. for client in clients: if "ceph_version" in client['client_metadata']: client['type'] = "userspace" client['version'] = client['client_metadata']['ceph_version'] client['hostname'] = client['client_metadata']['hostname'] elif "kernel_version" in client['client_metadata']: client['type'] = "kernel" client['version'] = client['client_metadata']['kernel_version'] client['hostname'] = client['client_metadata']['hostname'] else: client['type'] = "unknown" client['version'] = "" client['hostname'] = "" return clients @cherrypy.expose def clients(self, fscid_str): try: fscid = int(fscid_str) except ValueError: raise cherrypy.HTTPError(400, "Invalid filesystem id {0}".format(fscid_str)) try: fs_name = FsMap(global_instance().get( "fs_map")).get_filesystem(fscid)['mdsmap']['fs_name'] except NotFound: log.warning("Missing FSCID, dumping fsmap:\n{0}".format( json.dumps(global_instance().get("fs_map"), indent=2) )) raise cherrypy.HTTPError(404, "No filesystem with id {0}".format(fscid)) clients = self._clients(fscid) global_instance().log.debug(json.dumps(clients, indent=2)) content_data = { "clients": clients, "fs_name": fs_name, "fscid": fscid, "fs_url": get_prefixed_url("/filesystem/" + fscid_str + "/") } template = env.get_template("clients.html") return template.render( url_prefix = global_instance().url_prefix, ceph_version=global_instance().version, path_info=cherrypy.request.path_info, toplevel_data=json.dumps(self._toplevel_data(), indent=2), content_data=json.dumps(content_data, indent=2) ) @cherrypy.expose @cherrypy.tools.json_out() def clients_data(self, fs_id): return self._clients(int(fs_id)) def _rbd_pool(self, pool_name): rbd_ls = global_instance().rbd_ls.get(pool_name, None) if rbd_ls is None: rbd_ls = RbdLs(global_instance(), pool_name) global_instance().rbd_ls[pool_name] = rbd_ls status, value = rbd_ls.get() interval = 5 wait = interval - rbd_ls.latency def wait_and_load(): time.sleep(wait) rbd_ls.get() threading.Thread(target=wait_and_load).start() assert status != RbdLs.VALUE_NONE # FIXME bubble status up to UI return value @cherrypy.expose def rbd_pool(self, pool_name): template = env.get_template("rbd_pool.html") toplevel_data = self._toplevel_data() images = self._rbd_pool(pool_name) content_data = { "images": images, "pool_name": pool_name } return template.render( url_prefix = global_instance().url_prefix, ceph_version=global_instance().version, path_info=cherrypy.request.path_info, toplevel_data=json.dumps(toplevel_data, indent=2), content_data=json.dumps(content_data, indent=2) ) @cherrypy.expose @cherrypy.tools.json_out() def rbd_pool_data(self, pool_name): return self._rbd_pool(pool_name) def _rbd_mirroring(self): status, data = global_instance().rbd_mirroring.content_data.get() if data is None: log.warning("Failed to get RBD mirroring status") return {} return data @cherrypy.expose def rbd_mirroring(self): template = env.get_template("rbd_mirroring.html") toplevel_data = self._toplevel_data() content_data = self._rbd_mirroring() return template.render( url_prefix = global_instance().url_prefix, ceph_version=global_instance().version, path_info=cherrypy.request.path_info, toplevel_data=json.dumps(toplevel_data, indent=2), content_data=json.dumps(content_data, indent=2) ) @cherrypy.expose @cherrypy.tools.json_out() def rbd_mirroring_data(self): return self._rbd_mirroring() def _rbd_iscsi(self): status, data = global_instance().rbd_iscsi.content_data.get() if data is None: log.warning("Failed to get RBD iSCSI status") return {} return data @cherrypy.expose def rbd_iscsi(self): template = env.get_template("rbd_iscsi.html") toplevel_data = self._toplevel_data() content_data = self._rbd_iscsi() return template.render( url_prefix = global_instance().url_prefix, ceph_version=global_instance().version, path_info=cherrypy.request.path_info, toplevel_data=json.dumps(toplevel_data, indent=2), content_data=json.dumps(content_data, indent=2) ) @cherrypy.expose @cherrypy.tools.json_out() def rbd_iscsi_data(self): return self._rbd_iscsi() @cherrypy.expose def health(self): template = env.get_template("health.html") return template.render( url_prefix = global_instance().url_prefix, ceph_version=global_instance().version, path_info=cherrypy.request.path_info, toplevel_data=json.dumps(self._toplevel_data(), indent=2), content_data=json.dumps(self._health(), indent=2) ) @cherrypy.expose def servers(self): template = env.get_template("servers.html") return template.render( url_prefix = global_instance().url_prefix, ceph_version=global_instance().version, path_info=cherrypy.request.path_info, toplevel_data=json.dumps(self._toplevel_data(), indent=2), content_data=json.dumps(self._servers(), indent=2) ) def _servers(self): return { 'servers': global_instance().list_servers() } @cherrypy.expose @cherrypy.tools.json_out() def servers_data(self): return self._servers() def _health(self): # Fuse osdmap with pg_summary to get description of pools # including their PG states osd_map = global_instance().get_sync_object(OsdMap).data pg_summary = global_instance().get_sync_object(PgSummary).data pools = [] if len(global_instance().pool_stats) == 0: global_instance().update_pool_stats() for pool in osd_map['pools']: pool['pg_status'] = pg_summary['by_pool'][pool['pool'].__str__()] stats = global_instance().pool_stats[pool['pool']] s = {} def get_rate(series): if len(series) >= 2: return (float(series[0][1]) - float(series[1][1])) / (float(series[0][0]) - float(series[1][0])) else: return 0 for stat_name, stat_series in stats.items(): s[stat_name] = { 'latest': stat_series[0][1], 'rate': get_rate(stat_series), 'series': [i for i in stat_series] } pool['stats'] = s pools.append(pool) # Not needed, skip the effort of transmitting this # to UI del osd_map['pg_temp'] df = global_instance().get("df") df['stats']['total_objects'] = sum( [p['stats']['objects'] for p in df['pools']]) return { "health": self._health_data(), "mon_status": global_instance().get_sync_object( MonStatus).data, "fs_map": global_instance().get_sync_object(FsMap).data, "osd_map": osd_map, "clog": list(global_instance().log_buffer), "audit_log": list(global_instance().audit_buffer), "pools": pools, "mgr_map": global_instance().get("mgr_map"), "df": df } @cherrypy.expose @cherrypy.tools.json_out() def health_data(self): return self._health() @cherrypy.expose def index(self): return self.health() @cherrypy.expose @cherrypy.tools.json_out() def toplevel_data(self): return self._toplevel_data() def _get_mds_names(self, filesystem_id=None): names = [] fsmap = global_instance().get("fs_map") for fs in fsmap['filesystems']: if filesystem_id is not None and fs['id'] != filesystem_id: continue names.extend([info['name'] for _, info in fs['mdsmap']['info'].items()]) if filesystem_id is None: names.extend(info['name'] for info in fsmap['standbys']) return names @cherrypy.expose @cherrypy.tools.json_out() def mds_counters(self, fs_id): """ Result format: map of daemon name to map of counter to list of datapoints """ # Opinionated list of interesting performance counters for the GUI -- # if you need something else just add it. See how simple life is # when you don't have to write general purpose APIs? counters = [ "mds_server.handle_client_request", "mds_log.ev", "mds_cache.num_strays", "mds.exported", "mds.exported_inodes", "mds.imported", "mds.imported_inodes", "mds.inodes", "mds.caps", "mds.subtrees" ] result = {} mds_names = self._get_mds_names(int(fs_id)) for mds_name in mds_names: result[mds_name] = {} for counter in counters: data = global_instance().get_counter("mds", mds_name, counter) if data is not None: result[mds_name][counter] = data[counter] else: result[mds_name][counter] = [] return dict(result) @cherrypy.expose @cherrypy.tools.json_out() def get_counter(self, type, id, path): return global_instance().get_counter(type, id, path) @cherrypy.expose @cherrypy.tools.json_out() def get_perf_schema(self, **args): type = args.get('type', '') id = args.get('id', '') schema = global_instance().get_perf_schema(type, id) ret = dict() for k1 in schema.keys(): # 'perf_schema' ret[k1] = collections.OrderedDict() for k2 in sorted(schema[k1].keys()): sorted_dict = collections.OrderedDict( sorted(schema[k1][k2].items(), key=lambda i: i[0]) ) ret[k1][k2] = sorted_dict return ret url_prefix = self.get_config('url_prefix') if url_prefix == None: url_prefix = '' else: if len(url_prefix) != 0: if url_prefix[0] != '/': url_prefix = '/'+url_prefix if url_prefix[-1] == '/': url_prefix = url_prefix[:-1] self.url_prefix = url_prefix server_addr = self.get_localized_config('server_addr', '::') server_port = self.get_localized_config('server_port', '7000') if server_addr is None: raise RuntimeError('no server_addr configured; try "ceph config-key set mgr/dashboard/server_addr <ip>"') log.info("server_addr: %s server_port: %s" % (server_addr, server_port)) cherrypy.config.update({ 'server.socket_host': server_addr, 'server.socket_port': int(server_port), 'engine.autoreload.on': False }) static_dir = os.path.join(current_dir, 'static') conf = { "/static": { "tools.staticdir.on": True, 'tools.staticdir.dir': static_dir } } log.info("Serving static from {0}".format(static_dir)) class OSDEndpoint(EndPoint): def _osd(self, osd_id): osd_id = int(osd_id) osd_map = global_instance().get("osd_map") osd = None for o in osd_map['osds']: if o['osd'] == osd_id: osd = o break assert osd is not None # TODO 400 osd_spec = "{0}".format(osd_id) osd_metadata = global_instance().get_metadata( "osd", osd_spec) result = CommandResult("") global_instance().send_command(result, "osd", osd_spec, json.dumps({ "prefix": "perf histogram dump", }), "") r, outb, outs = result.wait() assert r == 0 histogram = json.loads(outb) return { "osd": osd, "osd_metadata": osd_metadata, "osd_histogram": histogram } @cherrypy.expose def perf(self, osd_id): template = env.get_template("osd_perf.html") toplevel_data = self._toplevel_data() return template.render( url_prefix = global_instance().url_prefix, ceph_version=global_instance().version, path_info='/osd' + cherrypy.request.path_info, toplevel_data=json.dumps(toplevel_data, indent=2), content_data=json.dumps(self._osd(osd_id), indent=2) ) @cherrypy.expose @cherrypy.tools.json_out() def perf_data(self, osd_id): return self._osd(osd_id) @cherrypy.expose @cherrypy.tools.json_out() def list_data(self): return self._osds_by_server() def _osd_summary(self, osd_id, osd_info): """ The info used for displaying an OSD in a table """ osd_spec = "{0}".format(osd_id) result = {} result['id'] = osd_id result['stats'] = {} result['stats_history'] = {} # Counter stats for s in ['osd.op_w', 'osd.op_in_bytes', 'osd.op_r', 'osd.op_out_bytes']: result['stats'][s.split(".")[1]] = global_instance().get_rate('osd', osd_spec, s) result['stats_history'][s.split(".")[1]] = \ global_instance().get_counter('osd', osd_spec, s)[s] # Gauge stats for s in ["osd.numpg", "osd.stat_bytes", "osd.stat_bytes_used"]: result['stats'][s.split(".")[1]] = global_instance().get_latest('osd', osd_spec, s) result['up'] = osd_info['up'] result['in'] = osd_info['in'] result['url'] = get_prefixed_url("/osd/perf/{0}".format(osd_id)) return result def _osds_by_server(self): result = defaultdict(list) servers = global_instance().list_servers() osd_map = global_instance().get_sync_object(OsdMap) for server in servers: hostname = server['hostname'] services = server['services'] first = True for s in services: if s["type"] == "osd": osd_id = int(s["id"]) # If metadata doesn't tally with osdmap, drop it. if osd_id not in osd_map.osds_by_id: global_instance().log.warn( "OSD service {0} missing in OSDMap, stale metadata?".format(osd_id)) continue summary = self._osd_summary(osd_id, osd_map.osds_by_id[osd_id]) if first: # A little helper for rendering summary['first'] = True first = False result[hostname].append(summary) global_instance().log.warn("result.size {0} servers.size {1}".format( len(result), len(servers) )) # Return list form for convenience of rendering return result.items() @cherrypy.expose def index(self): """ List of all OSDS grouped by host :return: """ template = env.get_template("osds.html") toplevel_data = self._toplevel_data() content_data = { "osds_by_server": self._osds_by_server() } return template.render( url_prefix = global_instance().url_prefix, ceph_version=global_instance().version, path_info='/osd' + cherrypy.request.path_info, toplevel_data=json.dumps(toplevel_data, indent=2), content_data=json.dumps(content_data, indent=2) ) cherrypy.tree.mount(Root(), get_prefixed_url("/"), conf) cherrypy.tree.mount(OSDEndpoint(), get_prefixed_url("/osd"), conf) log.info("Starting engine...") cherrypy.engine.start() log.info("Waiting for engine...") cherrypy.engine.block() log.info("Engine done.")
def handle_osd_map(self): """ Check pools on each OSDMap change """ subtree_type = self.get_config('subtree') or 'rack' failure_domain = self.get_config('failure_domain') or 'host' pg_num = self.get_config('pg_num') or '128' num_rep = self.get_config('num_rep') or '3' min_size = self.get_config('min_size') prefix = self.get_config('prefix') or 'by-' + subtree_type + '-' osdmap = self.get("osd_map") lpools = [] for pool in osdmap['pools']: if pool['pool_name'].find(prefix) == 0: lpools.append(pool['pool_name']) self.log.debug('localized pools = %s', lpools) subtrees = [] tree = self.get('osd_map_tree') for node in tree['nodes']: if node['type'] == subtree_type: subtrees.append(node['name']) pool_name = prefix + node['name'] if pool_name not in lpools: self.log.info('Creating localized pool %s', pool_name) # result = CommandResult("") self.send_command( result, "mon", "", json.dumps({ "prefix": "osd crush rule create-replicated", "format": "json", "name": pool_name, "root": node['name'], "type": failure_domain, }), "") r, outb, outs = result.wait() result = CommandResult("") self.send_command( result, "mon", "", json.dumps({ "prefix": "osd pool create", "format": "json", "pool": pool_name, 'rule': pool_name, "pool_type": 'replicated', 'pg_num': int(pg_num), }), "") r, outb, outs = result.wait() result = CommandResult("") self.send_command( result, "mon", "", json.dumps({ "prefix": "osd pool set", "format": "json", "pool": pool_name, 'var': 'size', "val": str(num_rep), }), "") r, outb, outs = result.wait() if min_size: result = CommandResult("") self.send_command( result, "mon", "", json.dumps({ "prefix": "osd pool set", "format": "json", "pool": pool_name, 'var': 'min_size', "val": str(min_size), }), "") r, outb, outs = result.wait()
def run_command(self, command): # tag with 'seq' so that we can ingore these in notify function result = CommandResult('seq') self.send_command(result, 'mon', '', json.dumps(command), 'seq') return result.wait()
def execute(self, plan): self.log.info('Executing plan %s' % plan.name) commands = [] # compat weight-set if len(plan.compat_ws) and \ '-1' not in plan.initial.crush_dump.get('choose_args', {}): self.log.debug('ceph osd crush weight-set create-compat') result = CommandResult('') self.send_command( result, 'mon', '', json.dumps({ 'prefix': 'osd crush weight-set create-compat', 'format': 'json', }), '') r, outb, outs = result.wait() if r != 0: self.log.error('Error creating compat weight-set') return r, outs for osd, weight in six.iteritems(plan.compat_ws): self.log.info( 'ceph osd crush weight-set reweight-compat osd.%d %f', osd, weight) result = CommandResult('') self.send_command( result, 'mon', '', json.dumps({ 'prefix': 'osd crush weight-set reweight-compat', 'format': 'json', 'item': 'osd.%d' % osd, 'weight': [weight], }), '') commands.append(result) # new_weight reweightn = {} for osd, weight in six.iteritems(plan.osd_weights): reweightn[str(osd)] = str(int(weight * float(0x10000))) if len(reweightn): self.log.info('ceph osd reweightn %s', reweightn) result = CommandResult('') self.send_command( result, 'mon', '', json.dumps({ 'prefix': 'osd reweightn', 'format': 'json', 'weights': json.dumps(reweightn), }), '') commands.append(result) # upmap incdump = plan.inc.dump() for pgid in incdump.get('old_pg_upmap_items', []): self.log.info('ceph osd rm-pg-upmap-items %s', pgid) result = CommandResult('foo') self.send_command( result, 'mon', '', json.dumps({ 'prefix': 'osd rm-pg-upmap-items', 'format': 'json', 'pgid': pgid, }), 'foo') commands.append(result) for item in incdump.get('new_pg_upmap_items', []): self.log.info('ceph osd pg-upmap-items %s mappings %s', item['pgid'], item['mappings']) osdlist = [] for m in item['mappings']: osdlist += [m['from'], m['to']] result = CommandResult('foo') self.send_command( result, 'mon', '', json.dumps({ 'prefix': 'osd pg-upmap-items', 'format': 'json', 'pgid': item['pgid'], 'id': osdlist, }), 'foo') commands.append(result) # wait for commands self.log.debug('commands %s' % commands) for result in commands: r, outb, outs = result.wait() if r != 0: self.log.error('execute error: r = %d, detail = %s' % (r, outs)) return r, outs self.log.debug('done') return 0, ''