def get_fs(self): fs_map = self.get('fs_map') servers = self.get_service_list() self.log.debug('standbys: {}'.format(fs_map['standbys'])) # export standby mds metadata, default standby fs_id is '-1' for standby in fs_map['standbys']: id_ = standby['name'] host_version = servers.get((id_, 'mds'), ('', '')) self.metrics['mds_metadata'].set(1, ( 'mds.{}'.format(id_), '-1', host_version[0], standby['addr'], standby['rank'], host_version[1] )) for fs in fs_map['filesystems']: # collect fs metadata data_pools = ",".join([str(pool) for pool in fs['mdsmap']['data_pools']]) self.metrics['fs_metadata'].set(1, ( data_pools, fs['id'], fs['mdsmap']['metadata_pool'], fs['mdsmap']['fs_name'] )) self.log.debug('mdsmap: {}'.format(fs['mdsmap'])) for gid, daemon in fs['mdsmap']['info'].items(): id_ = daemon['name'] host_version = servers.get((id_, 'mds'), ('', '')) self.metrics['mds_metadata'].set(1, ( 'mds.{}'.format(id_), fs['id'], host_version[0], daemon['addr'], daemon['rank'], host_version[1] ))
def get_quorum_status(self): mon_status = json.loads(self.get('mon_status')['json']) servers = self.get_service_list() for mon in mon_status['monmap']['mons']: rank = mon['rank'] id_ = mon['name'] host_version = servers.get((id_, 'mon'), ('', '')) self.metrics['mon_metadata'].set( 1, ('mon.{}'.format(id_), host_version[0], mon['public_addr'].split(':')[0], rank, host_version[1])) in_quorum = int(rank in mon_status['quorum']) self.metrics['mon_quorum_status'].set(in_quorum, ('mon.{}'.format(id_), ))
def get_quorum_status(self): mon_status = json.loads(self.get('mon_status')['json']) servers = self.get_service_list() for mon in mon_status['monmap']['mons']: rank = mon['rank'] id_ = mon['name'] host_version = servers.get((id_, 'mon'), ('', '')) self.metrics['mon_metadata'].set(1, ( 'mon.{}'.format(id_), host_version[0], mon['public_addr'].split(':')[0], rank, host_version[1] )) in_quorum = int(rank in mon_status['quorum']) self.metrics['mon_quorum_status'].set(in_quorum, ( 'mon.{}'.format(id_), ))
def get_mgr_status(self): mgr_map = self.get('mgr_map') servers = self.get_service_list() active = mgr_map['active_name'] standbys = [s.get('name') for s in mgr_map['standbys']] all_mgrs = list(standbys) all_mgrs.append(active) all_modules = {module.get('name'):module.get('can_run') for module in mgr_map['available_modules']} ceph_release = None for mgr in all_mgrs: host_version = servers.get((mgr, 'mgr'), ('', '')) if mgr == active: _state = 1 ceph_release = host_version[1].split()[-2] # e.g. nautilus else: _state = 0 self.metrics['mgr_metadata'].set(1, ( 'mgr.{}'.format(mgr), host_version[0], host_version[1] )) self.metrics['mgr_status'].set(_state, ( 'mgr.{}'.format(mgr), )) always_on_modules = mgr_map['always_on_modules'].get(ceph_release, []) active_modules = list(always_on_modules) active_modules.extend(mgr_map['modules']) for mod_name in all_modules.keys(): if mod_name in always_on_modules: _state = 2 elif mod_name in active_modules: _state = 1 else: _state = 0 _can_run = 1 if all_modules[mod_name] else 0 self.metrics['mgr_module_status'].set(_state, (mod_name,)) self.metrics['mgr_module_can_run'].set(_can_run, (mod_name,))
def get_fs(self): fs_map = self.get('fs_map') servers = self.get_service_list() for fs in fs_map['filesystems']: # collect fs metadata data_pools = ",".join([str(pool) for pool in fs['mdsmap']['data_pools']]) self.metrics['fs_metadata'].set(1, ( data_pools, fs['id'], fs['mdsmap']['metadata_pool'], fs['mdsmap']['fs_name'] )) self.log.debug('mdsmap: {}'.format(fs['mdsmap'])) for gid, daemon in fs['mdsmap']['info'].items(): id_ = daemon['name'] host_version = servers.get((id_, 'mds'), ('', '')) self.metrics['mds_metadata'].set(1, ( 'mds.{}'.format(id_), fs['id'], host_version[0], daemon['addr'], daemon['rank'], host_version[1] ))
def get_metadata_and_osd_status(self): osd_map = self.get('osd_map') osd_flags = osd_map['flags'].split(',') for flag in OSD_FLAGS: self.metrics['osd_flag_{}'.format(flag)].set( int(flag in osd_flags) ) osd_devices = self.get('osd_map_crush')['devices'] servers = self.get_service_list() for osd in osd_map['osds']: # id can be used to link osd metrics and metadata id_ = osd['osd'] # collect osd metadata p_addr = osd['public_addr'].split(':')[0] c_addr = osd['cluster_addr'].split(':')[0] if p_addr == "-" or c_addr == "-": self.log.info( "Missing address metadata for osd {0}, skipping occupation" " and metadata records for this osd".format(id_) ) continue dev_class = None for osd_device in osd_devices: if osd_device['id'] == id_: dev_class = osd_device.get('class', '') break if dev_class is None: self.log.info("OSD {0} is missing from CRUSH map, " "skipping output".format(id_)) continue host_version = servers.get((str(id_), 'osd'), ('', '')) # collect disk occupation metadata osd_metadata = self.get_metadata("osd", str(id_)) if osd_metadata is None: continue obj_store = osd_metadata.get('osd_objectstore', '') f_iface = osd_metadata.get('front_iface', '') b_iface = osd_metadata.get('back_iface', '') self.metrics['osd_metadata'].set(1, ( b_iface, 'osd.{}'.format(id_), c_addr, dev_class, f_iface, host_version[0], obj_store, p_addr, host_version[1] )) # collect osd status for state in OSD_STATUS: status = osd[state] self.metrics['osd_{}'.format(state)].set(status, ( 'osd.{}'.format(id_), )) osd_dev_node = None if obj_store == "filestore": # collect filestore backend device osd_dev_node = osd_metadata.get( 'backend_filestore_dev_node', None) # collect filestore journal device osd_wal_dev_node = osd_metadata.get('osd_journal', '') osd_db_dev_node = '' elif obj_store == "bluestore": # collect bluestore backend device osd_dev_node = osd_metadata.get( 'bluestore_bdev_dev_node', None) # collect bluestore wal backend osd_wal_dev_node = osd_metadata.get('bluefs_wal_dev_node', '') # collect bluestore db backend osd_db_dev_node = osd_metadata.get('bluefs_db_dev_node', '') if osd_dev_node and osd_dev_node == "unknown": osd_dev_node = None osd_hostname = osd_metadata.get('hostname', None) if osd_dev_node and osd_hostname: self.log.debug("Got dev for osd {0}: {1}/{2}".format( id_, osd_hostname, osd_dev_node)) self.metrics['disk_occupation'].set(1, ( "osd.{0}".format(id_), osd_dev_node, osd_db_dev_node, osd_wal_dev_node, osd_hostname )) else: self.log.info("Missing dev node metadata for osd {0}, skipping " "occupation record for this osd".format(id_)) for pool in osd_map['pools']: self.metrics['pool_metadata'].set( 1, (pool['pool'], pool['pool_name'])) # Populate other servers metadata for key, value in servers.items(): service_id, service_type = key if service_type == 'rgw': hostname, version = value self.metrics['rgw_metadata'].set( 1, ('{}.{}'.format(service_type, service_id), hostname, version) ) elif service_type == 'rbd-mirror': mirror_metadata = self.get_metadata('rbd-mirror', service_id) if mirror_metadata is None: continue mirror_metadata['ceph_daemon'] = '{}.{}'.format(service_type, service_id) self.metrics['rbd_mirror_metadata'].set( 1, (mirror_metadata.get(k, '') for k in RBD_MIRROR_METADATA) )
def get_metadata_and_osd_status(self): osd_map = self.get('osd_map') osd_flags = osd_map['flags'].split(',') for flag in OSD_FLAGS: self.metrics['osd_flag_{}'.format(flag)].set( int(flag in osd_flags) ) osd_devices = self.get('osd_map_crush')['devices'] servers = self.get_service_list() for osd in osd_map['osds']: # id can be used to link osd metrics and metadata id_ = osd['osd'] # collect osd metadata p_addr = osd['public_addr'].split(':')[0] c_addr = osd['cluster_addr'].split(':')[0] if p_addr == "-" or c_addr == "-": self.log.info( "Missing address metadata for osd {0}, skipping occupation" " and metadata records for this osd".format(id_) ) continue dev_class = None for osd_device in osd_devices: if osd_device['id'] == id_: dev_class = osd_device.get('class', '') break if dev_class is None: self.log.info( "OSD {0} is missing from CRUSH map, skipping output".format( id_)) continue host_version = servers.get((str(id_), 'osd'), ('', '')) # collect disk occupation metadata osd_metadata = self.get_metadata("osd", str(id_)) if osd_metadata is None: continue obj_store = osd_metadata.get('osd_objectstore', '') f_iface = osd_metadata.get('front_iface', '') b_iface = osd_metadata.get('back_iface', '') self.metrics['osd_metadata'].set(1, ( b_iface, 'osd.{}'.format(id_), c_addr, dev_class, f_iface, host_version[0], obj_store, p_addr, host_version[1] )) # collect osd status for state in OSD_STATUS: status = osd[state] self.metrics['osd_{}'.format(state)].set(status, ( 'osd.{}'.format(id_), )) if obj_store == "filestore": # collect filestore backend device osd_dev_node = osd_metadata.get( 'backend_filestore_dev_node', None) # collect filestore journal device osd_wal_dev_node = osd_metadata.get('osd_journal', '') osd_db_dev_node = '' elif obj_store == "bluestore": # collect bluestore backend device osd_dev_node = osd_metadata.get( 'bluestore_bdev_dev_node', None) # collect bluestore wal backend osd_wal_dev_node = osd_metadata.get('bluefs_wal_dev_node', '') # collect bluestore db backend osd_db_dev_node = osd_metadata.get('bluefs_db_dev_node', '') if osd_dev_node and osd_dev_node == "unknown": osd_dev_node = None osd_hostname = osd_metadata.get('hostname', None) if osd_dev_node and osd_hostname: self.log.debug("Got dev for osd {0}: {1}/{2}".format( id_, osd_hostname, osd_dev_node)) self.metrics['disk_occupation'].set(1, ( "osd.{0}".format(id_), osd_dev_node, osd_db_dev_node, osd_wal_dev_node, osd_hostname )) else: self.log.info("Missing dev node metadata for osd {0}, skipping " "occupation record for this osd".format(id_)) pool_meta = [] for pool in osd_map['pools']: self.metrics['pool_metadata'].set( 1, (pool['pool'], pool['pool_name'])) # Populate other servers metadata for key, value in servers.items(): service_id, service_type = key if service_type == 'rgw': hostname, version = value self.metrics['rgw_metadata'].set( 1, ('{}.{}'.format(service_type, service_id), hostname, version) ) elif service_type == 'rbd-mirror': mirror_metadata = self.get_metadata('rbd-mirror', service_id) if mirror_metadata is None: continue mirror_metadata['ceph_daemon'] = '{}.{}'.format(service_type, service_id) self.metrics['rbd_mirror_metadata'].set( 1, (mirror_metadata.get(k, '') for k in RBD_MIRROR_METADATA) )