Example #1
0
 def get_fs(self):
     fs_map = self.get('fs_map')
     servers = self.get_service_list()
     self.log.debug('standbys: {}'.format(fs_map['standbys']))
     # export standby mds metadata, default standby fs_id is '-1'
     for standby in fs_map['standbys']:
         id_ = standby['name']
         host_version = servers.get((id_, 'mds'), ('', ''))
         self.metrics['mds_metadata'].set(1, (
             'mds.{}'.format(id_), '-1',
             host_version[0], standby['addr'],
             standby['rank'], host_version[1]
         ))
     for fs in fs_map['filesystems']:
         # collect fs metadata
         data_pools = ",".join([str(pool)
                                for pool in fs['mdsmap']['data_pools']])
         self.metrics['fs_metadata'].set(1, (
             data_pools,
             fs['id'],
             fs['mdsmap']['metadata_pool'],
             fs['mdsmap']['fs_name']
         ))
         self.log.debug('mdsmap: {}'.format(fs['mdsmap']))
         for gid, daemon in fs['mdsmap']['info'].items():
             id_ = daemon['name']
             host_version = servers.get((id_, 'mds'), ('', ''))
             self.metrics['mds_metadata'].set(1, (
                 'mds.{}'.format(id_), fs['id'],
                 host_version[0], daemon['addr'],
                 daemon['rank'], host_version[1]
             ))
Example #2
0
 def get_quorum_status(self):
     mon_status = json.loads(self.get('mon_status')['json'])
     servers = self.get_service_list()
     for mon in mon_status['monmap']['mons']:
         rank = mon['rank']
         id_ = mon['name']
         host_version = servers.get((id_, 'mon'), ('', ''))
         self.metrics['mon_metadata'].set(
             1, ('mon.{}'.format(id_), host_version[0],
                 mon['public_addr'].split(':')[0], rank, host_version[1]))
         in_quorum = int(rank in mon_status['quorum'])
         self.metrics['mon_quorum_status'].set(in_quorum,
                                               ('mon.{}'.format(id_), ))
Example #3
0
 def get_quorum_status(self):
     mon_status = json.loads(self.get('mon_status')['json'])
     servers = self.get_service_list()
     for mon in mon_status['monmap']['mons']:
         rank = mon['rank']
         id_ = mon['name']
         host_version = servers.get((id_, 'mon'), ('', ''))
         self.metrics['mon_metadata'].set(1, (
             'mon.{}'.format(id_), host_version[0],
             mon['public_addr'].split(':')[0], rank,
             host_version[1]
         ))
         in_quorum = int(rank in mon_status['quorum'])
         self.metrics['mon_quorum_status'].set(in_quorum, (
             'mon.{}'.format(id_),
         ))
Example #4
0
    def get_mgr_status(self):
        mgr_map = self.get('mgr_map')
        servers = self.get_service_list()

        active = mgr_map['active_name']
        standbys = [s.get('name') for s in mgr_map['standbys']]

        all_mgrs = list(standbys)
        all_mgrs.append(active)

        all_modules = {module.get('name'):module.get('can_run') for module in mgr_map['available_modules']}

        ceph_release = None
        for mgr in all_mgrs:
            host_version = servers.get((mgr, 'mgr'), ('', ''))
            if mgr == active:
                _state = 1
                ceph_release = host_version[1].split()[-2] # e.g. nautilus
            else:
                _state = 0
            
            self.metrics['mgr_metadata'].set(1, (
                'mgr.{}'.format(mgr), host_version[0],
                host_version[1]
            ))
            self.metrics['mgr_status'].set(_state, (
                'mgr.{}'.format(mgr), 
            ))
        always_on_modules = mgr_map['always_on_modules'].get(ceph_release, [])
        active_modules = list(always_on_modules)
        active_modules.extend(mgr_map['modules'])

        for mod_name in all_modules.keys():

            if mod_name in always_on_modules:
                _state = 2
            elif mod_name in active_modules:
                _state = 1
            else:
                _state = 0

            _can_run = 1 if all_modules[mod_name] else 0
            self.metrics['mgr_module_status'].set(_state, (mod_name,))
            self.metrics['mgr_module_can_run'].set(_can_run, (mod_name,))
Example #5
0
 def get_fs(self):
     fs_map = self.get('fs_map')
     servers = self.get_service_list()
     for fs in fs_map['filesystems']:
         # collect fs metadata
         data_pools = ",".join([str(pool)
                                for pool in fs['mdsmap']['data_pools']])
         self.metrics['fs_metadata'].set(1, (
             data_pools,
             fs['id'],
             fs['mdsmap']['metadata_pool'],
             fs['mdsmap']['fs_name']
         ))
         self.log.debug('mdsmap: {}'.format(fs['mdsmap']))
         for gid, daemon in fs['mdsmap']['info'].items():
             id_ = daemon['name']
             host_version = servers.get((id_, 'mds'), ('', ''))
             self.metrics['mds_metadata'].set(1, (
                 'mds.{}'.format(id_), fs['id'],
                 host_version[0], daemon['addr'],
                 daemon['rank'], host_version[1]
             ))
Example #6
0
    def get_metadata_and_osd_status(self):
        osd_map = self.get('osd_map')
        osd_flags = osd_map['flags'].split(',')
        for flag in OSD_FLAGS:
            self.metrics['osd_flag_{}'.format(flag)].set(
                int(flag in osd_flags)
            )

        osd_devices = self.get('osd_map_crush')['devices']
        servers = self.get_service_list()
        for osd in osd_map['osds']:
            # id can be used to link osd metrics and metadata
            id_ = osd['osd']
            # collect osd metadata
            p_addr = osd['public_addr'].split(':')[0]
            c_addr = osd['cluster_addr'].split(':')[0]
            if p_addr == "-" or c_addr == "-":
                self.log.info(
                    "Missing address metadata for osd {0}, skipping occupation"
                    " and metadata records for this osd".format(id_)
                )
                continue

            dev_class = None
            for osd_device in osd_devices:
                if osd_device['id'] == id_:
                    dev_class = osd_device.get('class', '')
                    break

            if dev_class is None:
                self.log.info("OSD {0} is missing from CRUSH map, "
                              "skipping output".format(id_))
                continue

            host_version = servers.get((str(id_), 'osd'), ('', ''))

            # collect disk occupation metadata
            osd_metadata = self.get_metadata("osd", str(id_))
            if osd_metadata is None:
                continue

            obj_store = osd_metadata.get('osd_objectstore', '')
            f_iface = osd_metadata.get('front_iface', '')
            b_iface = osd_metadata.get('back_iface', '')

            self.metrics['osd_metadata'].set(1, (
                b_iface,
                'osd.{}'.format(id_),
                c_addr,
                dev_class,
                f_iface,
                host_version[0],
                obj_store,
                p_addr,
                host_version[1]
            ))

            # collect osd status
            for state in OSD_STATUS:
                status = osd[state]
                self.metrics['osd_{}'.format(state)].set(status, (
                    'osd.{}'.format(id_),
                ))

            osd_dev_node = None
            if obj_store == "filestore":
                # collect filestore backend device
                osd_dev_node = osd_metadata.get(
                    'backend_filestore_dev_node', None)
                # collect filestore journal device
                osd_wal_dev_node = osd_metadata.get('osd_journal', '')
                osd_db_dev_node = ''
            elif obj_store == "bluestore":
                # collect bluestore backend device
                osd_dev_node = osd_metadata.get(
                    'bluestore_bdev_dev_node', None)
                # collect bluestore wal backend
                osd_wal_dev_node = osd_metadata.get('bluefs_wal_dev_node', '')
                # collect bluestore db backend
                osd_db_dev_node = osd_metadata.get('bluefs_db_dev_node', '')
            if osd_dev_node and osd_dev_node == "unknown":
                osd_dev_node = None

            osd_hostname = osd_metadata.get('hostname', None)
            if osd_dev_node and osd_hostname:
                self.log.debug("Got dev for osd {0}: {1}/{2}".format(
                    id_, osd_hostname, osd_dev_node))
                self.metrics['disk_occupation'].set(1, (
                    "osd.{0}".format(id_),
                    osd_dev_node,
                    osd_db_dev_node,
                    osd_wal_dev_node,
                    osd_hostname
                ))
            else:
                self.log.info("Missing dev node metadata for osd {0}, skipping "
                              "occupation record for this osd".format(id_))

        for pool in osd_map['pools']:
            self.metrics['pool_metadata'].set(
                1, (pool['pool'], pool['pool_name']))

        # Populate other servers metadata
        for key, value in servers.items():
            service_id, service_type = key
            if service_type == 'rgw':
                hostname, version = value
                self.metrics['rgw_metadata'].set(
                    1,
                    ('{}.{}'.format(service_type, service_id),
                     hostname, version)
                )
            elif service_type == 'rbd-mirror':
                mirror_metadata = self.get_metadata('rbd-mirror', service_id)
                if mirror_metadata is None:
                    continue
                mirror_metadata['ceph_daemon'] = '{}.{}'.format(service_type,
                                                                service_id)
                self.metrics['rbd_mirror_metadata'].set(
                    1, (mirror_metadata.get(k, '')
                        for k in RBD_MIRROR_METADATA)
                )
Example #7
0
    def get_metadata_and_osd_status(self):
        osd_map = self.get('osd_map')
        osd_flags = osd_map['flags'].split(',')
        for flag in OSD_FLAGS:
            self.metrics['osd_flag_{}'.format(flag)].set(
                int(flag in osd_flags)
            )

        osd_devices = self.get('osd_map_crush')['devices']
        servers = self.get_service_list()
        for osd in osd_map['osds']:
            # id can be used to link osd metrics and metadata
            id_ = osd['osd']
            # collect osd metadata
            p_addr = osd['public_addr'].split(':')[0]
            c_addr = osd['cluster_addr'].split(':')[0]
            if p_addr == "-" or c_addr == "-":
                self.log.info(
                    "Missing address metadata for osd {0}, skipping occupation"
                    " and metadata records for this osd".format(id_)
                )
                continue

            dev_class = None
            for osd_device in osd_devices:
                if osd_device['id'] == id_:
                    dev_class = osd_device.get('class', '')
                    break

            if dev_class is None:
                self.log.info(
                    "OSD {0} is missing from CRUSH map, skipping output".format(
                        id_))
                continue

            host_version = servers.get((str(id_), 'osd'), ('', ''))

            # collect disk occupation metadata
            osd_metadata = self.get_metadata("osd", str(id_))
            if osd_metadata is None:
                continue

            obj_store = osd_metadata.get('osd_objectstore', '')
            f_iface = osd_metadata.get('front_iface', '')
            b_iface = osd_metadata.get('back_iface', '')

            self.metrics['osd_metadata'].set(1, (
                b_iface,
                'osd.{}'.format(id_),
                c_addr,
                dev_class,
                f_iface,
                host_version[0],
                obj_store,
                p_addr,
                host_version[1]
            ))

            # collect osd status
            for state in OSD_STATUS:
                status = osd[state]
                self.metrics['osd_{}'.format(state)].set(status, (
                    'osd.{}'.format(id_),
                ))

            if obj_store == "filestore":
                # collect filestore backend device
                osd_dev_node = osd_metadata.get(
                    'backend_filestore_dev_node', None)
                # collect filestore journal device
                osd_wal_dev_node = osd_metadata.get('osd_journal', '')
                osd_db_dev_node = ''
            elif obj_store == "bluestore":
                # collect bluestore backend device
                osd_dev_node = osd_metadata.get(
                    'bluestore_bdev_dev_node', None)
                # collect bluestore wal backend
                osd_wal_dev_node = osd_metadata.get('bluefs_wal_dev_node', '')
                # collect bluestore db backend
                osd_db_dev_node = osd_metadata.get('bluefs_db_dev_node', '')
            if osd_dev_node and osd_dev_node == "unknown":
                osd_dev_node = None

            osd_hostname = osd_metadata.get('hostname', None)
            if osd_dev_node and osd_hostname:
                self.log.debug("Got dev for osd {0}: {1}/{2}".format(
                    id_, osd_hostname, osd_dev_node))
                self.metrics['disk_occupation'].set(1, (
                    "osd.{0}".format(id_),
                    osd_dev_node,
                    osd_db_dev_node,
                    osd_wal_dev_node,
                    osd_hostname
                ))
            else:
                self.log.info("Missing dev node metadata for osd {0}, skipping "
                              "occupation record for this osd".format(id_))

        pool_meta = []
        for pool in osd_map['pools']:
            self.metrics['pool_metadata'].set(
                1, (pool['pool'], pool['pool_name']))

        # Populate other servers metadata
        for key, value in servers.items():
            service_id, service_type = key
            if service_type == 'rgw':
                hostname, version = value
                self.metrics['rgw_metadata'].set(
                    1,
                    ('{}.{}'.format(service_type, service_id), hostname, version)
                )
            elif service_type == 'rbd-mirror':
                mirror_metadata = self.get_metadata('rbd-mirror', service_id)
                if mirror_metadata is None:
                    continue
                mirror_metadata['ceph_daemon'] = '{}.{}'.format(service_type,
                                                                service_id)
                self.metrics['rbd_mirror_metadata'].set(
                    1, (mirror_metadata.get(k, '')
                        for k in RBD_MIRROR_METADATA)
                )