Exemple #1
0
 def get_mon_status_wise_counts(self, cluster_id):
     mon_status_wise_counts = {
         'outside_quorum': 0,
         'total': 0
     }
     try:
         mons = etcd_read_key(
             '/clusters/%s/maps/mon_map/data' % cluster_id
         )
         mons = json.loads(mons.get('data', '{}'))
         mons = mons['mons']
         mon_status_wise_counts['total'] = len(mons)
         outside_quorum = etcd_read_key(
             '/clusters/%s/maps/mon_status/data' % cluster_id
         )
         outside_quorum = json.loads(outside_quorum.get('data', '{}'))
         outside_quorum = outside_quorum.get('outside_quorum', [])
         mon_status_wise_counts['outside_quorum'] = len(outside_quorum)
     except EtcdKeyNotFound:
         pass
     except (ValueError, TendrlPerformanceMonitoringException) as ex:
         Event(
             ExceptionMessage(
                 priority="debug",
                 publisher=NS.publisher_id,
                 payload={
                     "message": "Exception caught computing mon status "
                     "wise counts",
                     "exception": ex
                 }
             )
         )
     return mon_status_wise_counts
 def get_node_osd_status_wise_counts(self, node_id):
     osds_in_node = []
     osd_status_wise_counts = {
         'total': 0,
         'down': 0,
         pm_consts.CRITICAL_ALERTS: 0,
         pm_consts.WARNING_ALERTS: 0
     }
     cluster_id = central_store_util.get_node_cluster_id(node_id)
     node_ip = ''
     ip_indexes = etcd_read_key('/indexes/ip')
     for ip, indexed_node_id in ip_indexes.iteritems():
         if node_id == indexed_node_id:
             node_ip = ip
     try:
         osds = etcd_read_key('/clusters/%s/maps/osd_map/data/osds' %
                              cluster_id)
         osds = ast.literal_eval(osds.get('osds', '[]'))
         for osd in osds:
             if (node_ip in osd.get('cluster_addr', '')
                     or node_ip in osd.get('public_addr', '')):
                 osds_in_node.append(osd.get('osd'))
                 if 'up' not in osd.get('state'):
                     osd_status_wise_counts['down'] = \
                         osd_status_wise_counts['down'] + 1
                 osd_status_wise_counts['total'] = \
                     osd_status_wise_counts['total'] + 1
         crit_alerts, warn_alerts = parse_resource_alerts(
             'osd', pm_consts.CLUSTER, cluster_id=cluster_id)
         count = 0
         for alert in crit_alerts:
             plugin_instance = alert['tags'].get('plugin_instance', '')
             if int(plugin_instance[len('osd_'):]) in osds_in_node:
                 count = count + 1
         osd_status_wise_counts[pm_consts.CRITICAL_ALERTS] = count
         count = 0
         for alert in warn_alerts:
             plugin_instance = alert['tags'].get('plugin_instance', '')
             if int(plugin_instance[len('osd_'):]) in osds_in_node:
                 count = count + 1
         osd_status_wise_counts[pm_consts.WARNING_ALERTS] = count
     except Exception as ex:
         Event(
             ExceptionMessage(priority="debug",
                              publisher=NS.publisher_id,
                              payload={
                                  "message":
                                  "Exception caught computing node osd "
                                  "counts",
                                  "exception":
                                  ex
                              }))
     return osd_status_wise_counts
 def get_mon_status_wise_counts(self, cluster_id):
     mon_status_wise_counts = {'outside_quorum': 0, 'total': 0}
     try:
         mons = etcd_read_key('/clusters/%s/maps/mon_map/data' % cluster_id)
         mons = json.loads(mons.get('data', '{}'))
         mons = mons['mons']
         mon_status_wise_counts['total'] = len(mons)
         outside_quorum = etcd_read_key(
             '/clusters/%s/maps/mon_status/data' % cluster_id)
         outside_quorum = json.loads(outside_quorum.get('data', '{}'))
         outside_quorum = outside_quorum.get('outside_quorum', [])
         mon_status_wise_counts['outside_quorum'] = len(outside_quorum)
     except EtcdKeyNotFound:
         pass
     return mon_status_wise_counts
 def get_nw_node_interfaces(self, node_id, nw_type, cluster_id):
     nw_node_interfaces = []
     try:
         nw_subnet = etcd_read_key('/clusters/%s/maps/config/data' %
                                   (cluster_id))
         nw_subnet = json.loads(nw_subnet.get('data', '{}'))
         nw_subnet = nw_subnet.get(nw_type, '')
         if nw_subnet:
             nw_subnet = nw_subnet.replace('/', '_')
             networks = etcd_read_key('/networks/%s/%s' %
                                      (nw_subnet, node_id))
             for interface_id, interface_det in networks.iteritems():
                 nw_node_interfaces.append(interface_det.get('interface'))
     except Exception:
         pass
     return nw_node_interfaces
 def get_cluster_osds(self, cluster_id):
     osds = []
     try:
         osd_data = etcd_read_key('/clusters/%s/maps/osd_map' % cluster_id)
         osd_data = json.loads(osd_data['data'])
         osds = osd_data.get('osds')
     except EtcdKeyNotFound:
         pass
     return osds
 def get_cluster_pools(self, cluster_id):
     pools = {}
     try:
         pool_ids = self.get_cluster_pool_ids(cluster_id)
         for pool_id in pool_ids:
             pool = etcd_read_key('/clusters/%s/Pools/%s' %
                                  (cluster_id, pool_id))
             pools[pool_id] = pool
     except EtcdKeyNotFound:
         pass
     return pools
Exemple #7
0
 def get_cluster_osds(self, cluster_id):
     osds = []
     try:
         osd_data = etcd_read_key(
             '/clusters/%s/maps/osd_map' % cluster_id
         )
         osd_data = json.loads(osd_data['data'])
         osds = osd_data.get('osds')
         if not osds:
             osds = []
     except (EtcdKeyNotFound, TendrlPerformanceMonitoringException):
         pass
     return osds
Exemple #8
0
 def get_nw_node_interfaces(self, node_id, nw_type, cluster_id):
     nw_node_interfaces = []
     try:
         nw_subnet = etcd_read_key(
             '/clusters/%s/maps/config/data' % (
                 cluster_id
             )
         )
         nw_subnet = json.loads(nw_subnet.get('data', '{}'))
         nw_subnet = nw_subnet.get(nw_type, '')
         if nw_subnet:
             nw_subnet = nw_subnet.replace('/', '_')
             networks = etcd_read_key(
                 '/networks/%s/%s' % (nw_subnet, node_id)
             )
             for interface_id, interface_det in networks.iteritems():
                 nw_node_interfaces.append(
                     interface_det.get('interface')
                 )
     except (
         EtcdKeyNotFound,
         ValueError,
         TypeError,
         AttributeError
     ):
         pass
     except TendrlPerformanceMonitoringException as ex:
         Event(
             ExceptionMessage(
                 priority="debug",
                 publisher=NS.publisher_id,
                 payload={
                     "message": "Error fetching %s n/w info for node "
                     "%s" % (nw_type, node_id),
                     "exception": ex
                 }
             )
         )
     return nw_node_interfaces
 def get_pg_counts(self, cluster_id):
     try:
         pg_summary = etcd_read_key('/clusters/%s/maps/pg_summary/data' %
                                    cluster_id)
         pg_summary = json.loads(pg_summary.get('data', '{}'))
         if 'all' not in pg_summary:
             return {}
         pg_summary = pg_summary['all']
         if isinstance(pg_summary, basestring):
             pg_summary = ast.literal_eval(pg_summary)
         return _calculate_pg_counters(pg_summary)
     except EtcdKeyNotFound:
         return {}
Exemple #10
0
 def get_cluster_volumes(self, cluster_id):
     volumes = {}
     try:
         volume_ids = self.get_cluster_volume_ids(cluster_id)
     except EtcdKeyNotFound:
         return volumes
     for volume_id in volume_ids:
         try:
             volume = etcd_read_key('/clusters/%s/Volumes/%s' %
                                    (cluster_id, volume_id))
             volumes[volume_id] = volume
         except EtcdKeyNotFound:
             continue
     return volumes
Exemple #11
0
 def get_cluster_bricks(self, cluster_id):
     ret_val = {}
     try:
         etcd_bricks = central_store_util.read_key(
             '/clusters/%s/Bricks/all' % cluster_id
         )
     except EtcdKeyNotFound:
         return ret_val
     for etcd_brick in etcd_bricks.leaves:
         try:
             etcd_brick_key_contents = etcd_brick.key.split('/')
             brick = etcd_read_key(
                 '/clusters/%s/Bricks/all/%s' % (
                     cluster_id,
                     etcd_brick_key_contents[5]
                 )
             )
             if 'vol_id' not in brick:
                 continue
             if (
                 'utilization' in brick and
                 'brick_path' in brick
             ):
                 brick['utilization']['vol_name'] = \
                     central_store_util.get_volume_name(
                         cluster_id,
                         brick['vol_id']
                 )
                 brick['utilization']['cluster_name'] = \
                     central_store_util.get_cluster_name(cluster_id)
                 brick['utilization']['brick_path'] = \
                     brick['brick_path']
                 brick['utilization']['hostname'] = \
                     brick['hostname']
             ret_val[etcd_brick_key_contents[5]] = brick
         except EtcdKeyNotFound as ex:
             Event(
                 ExceptionMessage(
                     priority="debug",
                     publisher=NS.publisher_id,
                     payload={
                         "message": "Error fetching details for %s"
                         " brick" % etcd_brick.key,
                         "exception": ex
                     }
                 )
             )
             continue
     return ret_val
Exemple #12
0
 def get_node_brick_status_counts(self, node_id):
     node_name = central_store_util.get_node_name_from_id(node_id)
     ip_indexes = etcd_read_key('/indexes/ip')
     node_ip = ''
     for ip, indexed_node_id in ip_indexes.iteritems():
         if node_id == indexed_node_id:
             node_ip = ip
     brick_status_wise_counts = {
         'stopped': 0,
         'total': 0,
         pm_consts.WARNING_ALERTS: 0,
         pm_consts.CRITICAL_ALERTS: 0
     }
     try:
         cluster_id = central_store_util.get_node_cluster_id(node_id)
         if cluster_id:
             volumes_det = self.get_cluster_volumes(cluster_id)
             for volume_id, volume_det in volumes_det.iteritems():
                 for brick_path, brick_det in volume_det.get(
                         'Bricks', {}).iteritems():
                     if (brick_det['hostname'] == node_name
                             or brick_det['hostname'] == node_ip):
                         if brick_det['status'] == 'Stopped':
                             brick_status_wise_counts['stopped'] = \
                                 brick_status_wise_counts['stopped'] + 1
                         brick_status_wise_counts['total'] = \
                             brick_status_wise_counts['total'] + 1
         crit_alerts, warn_alerts = parse_resource_alerts(
             'brick', pm_consts.CLUSTER, cluster_id=cluster_id)
         count = 0
         for alert in crit_alerts:
             if alert['node_id'] == node_id:
                 count = count + 1
         brick_status_wise_counts[pm_consts.CRITICAL_ALERTS] = count
         count = 0
         for alert in warn_alerts:
             if alert['node_id'] == node_id:
                 count = count + 1
         brick_status_wise_counts[pm_consts.WARNING_ALERTS] = count
     except Exception as ex:
         Event(
             Message(priority="info",
                     publisher=NS.publisher_id,
                     payload={
                         "message": "Exception caught fetching node brick"
                         " status wise counts",
                         "exception": ex
                     }))
     return brick_status_wise_counts
Exemple #13
0
 def get_cluster_pools(self, cluster_id):
     pools = {}
     try:
         pool_ids = self.get_cluster_pool_ids(cluster_id)
         for pool_id in pool_ids:
             pool = etcd_read_key(
                 '/clusters/%s/Pools/%s' % (
                     cluster_id,
                     pool_id
                 )
             )
             pools[pool_id] = pool
     except (EtcdKeyNotFound, TendrlPerformanceMonitoringException):
         pass
     return pools
 def get_rbds(self, cluster_id, pools):
     rbds = []
     pool_ids = []
     try:
         for pool_id, pool in pools.iteritems():
             pool_ids.append(pool_id)
         rbd_names = self.get_rbd_names(cluster_id, pool_ids)
     except EtcdKeyNotFound:
         pass
     for pool_id, pool_rbds in rbd_names.iteritems():
         for rbd in pool_rbds:
             try:
                 rbd_dict = etcd_read_key('/clusters/%s/Pools/%s/Rbds/%s' %
                                          (cluster_id, pool_id, rbd))
                 rbds.append(rbd_dict)
             except EtcdKeyNotFound:
                 continue
     return rbds
Exemple #15
0
 def configure_monitoring(self, sds_tendrl_context):
     configs = []
     cluster_node_ids = \
         central_store_util.get_cluster_node_ids(
             sds_tendrl_context['integration_id']
         )
     for node_id in cluster_node_ids:
         sds_node_context = etcd_read_key(
             '/clusters/%s/nodes/%s/NodeContext' % (
                 sds_tendrl_context['integration_id'],
                 node_id
             )
         )
         config = NS.performance_monitoring.config.data['thresholds']
         if isinstance(config, basestring):
             config = ast.literal_eval(config.encode('ascii', 'ignore'))
         for plugin, plugin_config in config[self.name].iteritems():
             if isinstance(plugin_config, basestring):
                 plugin_config = ast.literal_eval(
                     plugin_config.encode('ascii', 'ignore')
                 )
             p_conf = copy.deepcopy(plugin_config)
             p_conf['cluster_id'] = \
                 sds_tendrl_context['integration_id']
             p_conf['cluster_name'] = \
                 sds_tendrl_context['cluster_name']
             configs.append({
                 'plugin': "tendrl_%sfs_%s" % (self.name, plugin),
                 'plugin_conf': p_conf,
                 'node_id': node_id,
                 'fqdn': sds_node_context['fqdn']
             })
         configs.append({
             'plugin': "tendrl_%sfs_peer_network_throughput" % (
                 self.name
             ),
             'plugin_conf': {
                 'peer_name': sds_node_context['fqdn']
             },
             'node_id': node_id,
             'fqdn': sds_node_context['fqdn']
         })
     return configs
 def configure_monitoring(self, sds_tendrl_context):
     configs = []
     cluster_node_ids = \
         central_store_util.get_cluster_node_ids(
             sds_tendrl_context['integration_id']
         )
     for node_id in cluster_node_ids:
         sds_node_context = etcd_read_key(
             '/clusters/%s/nodes/%s/NodeContext' %
             (sds_tendrl_context['integration_id'], node_id))
         if 'mon' in sds_node_context['tags']:
             config = NS.performance_monitoring.config.data['thresholds']
             if isinstance(config, basestring):
                 config = ast.literal_eval(config.encode('ascii', 'ignore'))
             for plugin, plugin_config in config[self.name].iteritems():
                 if isinstance(plugin_config, basestring):
                     plugin_config = ast.literal_eval(
                         plugin_config.encode('ascii', 'ignore'))
                 is_configured = True
                 if node_id not in self.configured_nodes:
                     self.configured_nodes[node_id] = [
                         "tendrl_%s_%s" % (self.name, plugin)
                     ]
                     is_configured = False
                 if ("tendrl_%s_%s" % (self.name, plugin)
                         not in self.configured_nodes.get(node_id, [])):
                     node_plugins = self.configured_nodes.get(node_id, [])
                     node_plugins.append("tendrl_%s_%s" %
                                         (self.name, plugin))
                     self.configured_nodes[node_id] = node_plugins
                     is_configured = False
                 if not is_configured:
                     plugin_config['cluster_id'] = \
                         sds_tendrl_context['integration_id']
                     plugin_config['cluster_name'] = \
                         sds_tendrl_context['cluster_name']
                     configs.append({
                         'plugin':
                         "tendrl_%s_%s" % (self.name, plugin),
                         'plugin_conf':
                         plugin_config,
                         'node_id':
                         node_id,
                         'fqdn':
                         sds_node_context['fqdn']
                     })
             is_configured = True
             if ("tendrl_ceph_cluster_iops"
                     not in self.configured_nodes.get(node_id, [])):
                 node_plugins = self.configured_nodes.get(node_id, [])
                 node_plugins.append("tendrl_ceph_cluster_iops")
                 self.configured_nodes[node_id] = node_plugins
                 is_configured = False
             if not is_configured:
                 plugin_config = {
                     'cluster_id': sds_tendrl_context['integration_id'],
                     'cluster_name': sds_tendrl_context['cluster_name']
                 }
                 configs.append({
                     'plugin': "tendrl_ceph_cluster_iops",
                     'plugin_conf': plugin_config,
                     'node_id': node_id,
                     'fqdn': sds_node_context['fqdn']
                 })
         is_configured = True
         if ("tendrl_ceph_node_network_throughput"
                 not in self.configured_nodes.get(node_id, [])):
             plugin_config = {}
             plugin_config['cluster_network'] = ' '.join(
                 self.get_nw_node_interfaces(
                     node_id, 'cluster_network',
                     sds_tendrl_context['integration_id']))
             plugin_config['public_network'] = ' '.join(
                 self.get_nw_node_interfaces(
                     node_id, 'public_network',
                     sds_tendrl_context['integration_id']))
             if (plugin_config['cluster_network']
                     and plugin_config['public_network']):
                 node_plugins = self.configured_nodes.get(node_id, [])
                 node_plugins.append("tendrl_ceph_node_network_throughput")
                 self.configured_nodes[node_id] = node_plugins
                 configs.append({
                     'plugin':
                     "tendrl_%s_node_network_throughput" % (self.name),
                     'plugin_conf':
                     plugin_config,
                     'node_id':
                     node_id,
                     'fqdn':
                     sds_node_context['fqdn']
                 })
     return configs
Exemple #17
0
 def get_node_brick_status_counts(self, node_id):
     brick_status_wise_counts = {
         'stopped': 0,
         'total': 0,
         pm_consts.WARNING_ALERTS: 0,
         pm_consts.CRITICAL_ALERTS: 0
     }
     try:
         node_name = central_store_util.get_node_name_from_id(node_id)
     except EtcdKeyNotFound as ex:
         Event(
             ExceptionMessage(
                 priority="error",
                 publisher=NS.publisher_id,
                 payload={
                     "message": "Error fetching node name for node "
                     "%s" % node_id,
                     "exception": ex
                 }
             )
         )
         return brick_status_wise_counts
     try:
         ip_indexes = etcd_read_key('/indexes/ip')
     except EtcdKeyNotFound as ex:
         Event(
             ExceptionMessage(
                 priority="error",
                 publisher=NS.publisher_id,
                 payload={
                     "message": "Error fetching ip indexes",
                     "exception": ex
                 }
             )
         )
         return brick_status_wise_counts
     node_ip = ''
     for ip, indexed_node_id in ip_indexes.iteritems():
         if node_id == indexed_node_id:
             node_ip = ip
             break
     try:
         cluster_id = central_store_util.get_node_cluster_id(
             node_id
         )
         if cluster_id:
             bricks = self.get_cluster_bricks(cluster_id)
             for brick_path, brick_det in bricks.iteritems():
                 if (
                     brick_det['hostname'] == node_name or
                     brick_det['hostname'] == node_ip
                 ):
                     if (
                         'status' in brick_det and
                         brick_det['status'] == 'Stopped'
                     ):
                         brick_status_wise_counts['stopped'] = \
                             brick_status_wise_counts['stopped'] + 1
                     brick_status_wise_counts['total'] = \
                         brick_status_wise_counts['total'] + 1
         crit_alerts, warn_alerts = parse_resource_alerts(
             'brick',
             pm_consts.CLUSTER,
             cluster_id=cluster_id
         )
         count = 0
         for alert in crit_alerts:
             if alert['node_id'] == node_id:
                 count = count + 1
         brick_status_wise_counts[
             pm_consts.CRITICAL_ALERTS
         ] = count
         count = 0
         for alert in warn_alerts:
             if alert['node_id'] == node_id:
                 count = count + 1
         brick_status_wise_counts[
             pm_consts.WARNING_ALERTS
         ] = count
     except (
         TendrlPerformanceMonitoringException,
         AttributeError,
         ValueError,
         KeyError
     ) as ex:
         Event(
             Message(
                 priority="info",
                 publisher=NS.publisher_id,
                 payload={
                     "message": "Exception caught fetching node brick"
                     " status wise counts",
                     "exception": ex
                 }
             )
         )
     return brick_status_wise_counts
Exemple #18
0
 def configure_monitoring(self, sds_tendrl_context):
     configs = []
     cluster_node_ids = \
         central_store_util.get_cluster_node_ids(
             sds_tendrl_context['integration_id']
         )
     for node_id in cluster_node_ids:
         sds_node_context = etcd_read_key(
             '/clusters/%s/nodes/%s/NodeContext' %
             (sds_tendrl_context['integration_id'], node_id))
         config = NS.performance_monitoring.config.data['thresholds']
         if isinstance(config, basestring):
             config = ast.literal_eval(config.encode('ascii', 'ignore'))
         for plugin, plugin_config in config[self.name].iteritems():
             if isinstance(plugin_config, basestring):
                 plugin_config = ast.literal_eval(
                     plugin_config.encode('ascii', 'ignore'))
             is_configured = True
             if node_id not in self.configured_nodes:
                 self.configured_nodes[node_id] = [plugin]
                 is_configured = False
             if ("tendrl_%sfs_%s" % (self.name, plugin)
                     not in self.configured_nodes[node_id]):
                 node_plugins = self.configured_nodes[node_id]
                 node_plugins.append("tendrl_%sfs_%s" % (self.name, plugin))
                 self.configured_nodes[node_id] = node_plugins
                 is_configured = False
             if not is_configured:
                 plugin_config['cluster_id'] = \
                     sds_tendrl_context['integration_id']
                 plugin_config['cluster_name'] = \
                     sds_tendrl_context['cluster_name']
                 configs.append({
                     'plugin':
                     "tendrl_%sfs_%s" % (self.name, plugin),
                     'plugin_conf':
                     plugin_config,
                     'node_id':
                     node_id,
                     'fqdn':
                     sds_node_context['fqdn']
                 })
         is_configured = True
         if ("%sfs_peer_network_throughput" % (self.name)
                 not in self.configured_nodes[node_id]):
             node_plugins = self.configured_nodes[node_id]
             node_plugins.append("%sfs_peer_network_throughput" %
                                 (self.name))
             self.configured_nodes[node_id] = node_plugins
             is_configured = False
         if not is_configured:
             configs.append({
                 'plugin':
                 "tendrl_%sfs_peer_network_throughput" % (self.name),
                 'plugin_conf': {
                     'peer_name': sds_node_context['fqdn']
                 },
                 'node_id':
                 node_id,
                 'fqdn':
                 sds_node_context['fqdn']
             })
     return configs