def get_brick_utilization(self): self.brick_utilizations = {} volumes = self.CLUSTER_TOPOLOGY.get('volumes', []) threads = [] for volume in volumes: for sub_volume_index, sub_volume_bricks in volume.get( 'bricks', {}).iteritems(): for brick in sub_volume_bricks: # Check if current brick is from localhost else utilization # of brick from some other host can't be computed here.. brick_hostname = tendrl_glusterfs_utils.find_brick_host( self.etcd_client, self.CONFIG['integration_id'], brick['hostname']) if brick_hostname: brick_ip = socket.gethostbyname(brick_hostname) if (brick_ip == socket.gethostbyname( self.CONFIG['peer_name']) or brick_hostname == self.CONFIG['peer_name']): thread = threading.Thread( target=self.calc_brick_utilization, args=( volume['name'], brick, )) thread.start() threads.append(thread) for thread in threads: thread.join(1) for thread in threads: del thread return self.brick_utilizations
def get_metrics(self): self.initial_io_stats = psutil.disk_io_counters(perdisk=True) curr_host_ip = socket.gethostbyname(self.CONFIG['peer_name']) time.sleep(self.STAT_INTERVAL_FOR_PER_SEC_COUNTER) self.current_io_stats = psutil.disk_io_counters(perdisk=True) threads = [] for volume in self.CLUSTER_TOPOLOGY.get('volumes', []): for sub_volume_index, sub_volume_bricks in volume.get( 'bricks', []).iteritems(): for brick in sub_volume_bricks: brick_hostname = gluster_utils.find_brick_host( self.etcd_client, self.CONFIG['integration_id'], brick['hostname']) if brick_hostname: brick_ip = socket.gethostbyname(brick_hostname) if (brick_ip == curr_host_ip or brick_hostname == self.CONFIG['peer_name']): thread = threading.Thread( target=self.populate_disk_details, args=( volume['name'], self.CONFIG['peer_name'], brick['path'], )) thread.start() threads.append(thread) for thread in threads: thread.join(1) for thread in threads: del thread return self.brick_details
def _parse_heal_info_stats(tree, integration_id, etcd_client): bricks_dict = {} for brick in tree.findall("healInfo/bricks/brick"): brick_name = brick.find("name").text brick_host = brick_name.split(":")[0] brick_path = brick_name.split(":")[1] brick_host = tendrl_glusterfs_utils.find_brick_host( etcd_client, integration_id, brick_host) try: no_of_entries = int(brick.find("numberOfEntries").text) except ValueError: no_of_entries = 0 bricks_dict["%s:%s" % (brick_host, brick_path)] = no_of_entries return bricks_dict
def process_volume_profile_info(self, volume): volName = volume['name'] volId = volume['id'] try: volData = self.etcd_client.read( '/clusters/%s/Volumes/%s/data' % (self.CONFIG['integration_id'], volId)).value if not volData: return else: profiling_enabled = json.loads(volData).get( 'profiling_enabled') if profiling_enabled and profiling_enabled != 'yes': return except etcd.EtcdKeyNotFound: # Volume key not found return return vol_iops = self.get_volume_profile_info(volName, self.CONFIG['integration_id']) if not vol_iops: return read_write_hits = 0 inode_hits = 0 entry_hits = 0 lock_hits = 0 for brick_det in vol_iops.get('bricks', {}): brickName = brick_det.get('brick', '') brick_host = brick_det.get('brick', '').split(':')[0] brick_host = tendrl_glusterfs_utils.find_brick_host( self.etcd_client, self.CONFIG['integration_id'], brick_host) if not brick_host: continue if int(brick_det.get('intervalStats').get('duration')) > 0: # A sample output from command # `gluster v profile {vol} info --xml` is as below # # <?xml version="1.0" encoding="UTF-8" standalone="yes"?> # <cliOutput> # <opRet>0</opRet> # <opErrno>0</opErrno> # <opErrstr/> # <volProfile> # <volname>vol1</volname> # <profileOp>3</profileOp> # <brickCount>3</brickCount> # <brick> # <brickName> # dhcp42-80.lab.eng.blr.redhat.com:/root/gluster_bricks/vol1_b3 # </brickName> # ........... # <intervalStats> # <blockStats> # <block> # <size>1</size> # <reads>0</reads> # <writes>512</writes> # </block> # <block> # <size>2</size> # <reads>0</reads> # <writes>124</writes> # </block> # <block> # <size>4</size> # <reads>0</reads> # <writes>100</writes> # </block> # ............. # </blockStats> # </intervalStats> # </brick> # </volProfile> # </cliOutput> # # For calculating iops values we aggregate the reads # and writes across all blocks and then divide them with # `duration` value. These values are finally saved as # under `gauge_read` and `gauge_write` fields of graphite total_reads = 0 for entry in brick_det.get('intervalStats').get('blockStats'): total_reads += int(entry['read']) total_writes = 0 for entry in brick_det.get('intervalStats').get('blockStats'): total_writes += int(entry['write']) t_name = "clusters.%s.volumes.%s.nodes.%s.bricks.%s.iops." \ "gauge-read" self.profile_info[ t_name % (self.CONFIG['integration_id'], volName, brick_host.replace('.', '_'), brickName.split(':') [1].replace('/', self.brick_path_separator))] = math.ceil( total_reads / float(brick_det.get('intervalStats').get('duration'))) t_name = "clusters.%s.volumes.%s.nodes.%s.bricks.%s.iops." \ "gauge-write" self.profile_info[ t_name % (self.CONFIG['integration_id'], volName, brick_host.replace('.', '_'), brickName.split(':') [1].replace('/', self.brick_path_separator))] = math.ceil( total_writes / float(brick_det.get('intervalStats').get('duration'))) t_name = "clusters.%s.nodes.%s.bricks.%s.iops." \ "gauge-read" self.profile_info[ t_name % (self.CONFIG['integration_id'], brick_host.replace( '.', '_'), brickName.split(':') [1].replace('/', self.brick_path_separator))] = math.ceil( total_reads / float(brick_det.get('intervalStats').get('duration'))) t_name = "clusters.%s.nodes.%s.bricks.%s.iops." \ "gauge-write" self.profile_info[ t_name % (self.CONFIG['integration_id'], brick_host.replace( '.', '_'), brickName.split(':') [1].replace('/', self.brick_path_separator))] = math.ceil( total_writes / float(brick_det.get('intervalStats').get('duration'))) fopIntervalStats = brick_det.get('intervalStats').get('fopStats') for fopStat in fopIntervalStats: t_name = "clusters.%s.volumes.%s.nodes.%s.bricks.%s.fop." \ "%s.hits" self.profile_info[t_name % (self.CONFIG['integration_id'], volName, brick_host.replace('.', '_'), brickName.split(':')[1].replace( '/', self.brick_path_separator), fopStat.get('name'))] = float( fopStat.get('hits')) t_name = "clusters.%s.volumes.%s.nodes.%s.bricks.%s.fop." \ "%s.latencyAvg" self.profile_info[t_name % (self.CONFIG['integration_id'], volName, brick_host.replace('.', '_'), brickName.split(':')[1].replace( '/', self.brick_path_separator), fopStat.get('name'))] = float( fopStat.get('latencyAvg')) t_name = "clusters.%s.volumes.%s.nodes.%s.bricks.%s.fop." \ "%s.latencyMin" self.profile_info[t_name % (self.CONFIG['integration_id'], volName, brick_host.replace('.', '_'), brickName.split(':')[1].replace( '/', self.brick_path_separator), fopStat.get('name'))] = float( fopStat.get('latencyMin')) t_name = "clusters.%s.volumes.%s.nodes.%s.bricks.%s.fop." \ "%s.latencyMax" self.profile_info[t_name % (self.CONFIG['integration_id'], volName, brick_host.replace('.', '_'), brickName.split(':')[1].replace( '/', self.brick_path_separator), fopStat.get('name'))] = float( fopStat.get('latencyMax')) t_name = "clusters.%s.nodes.%s.bricks.%s.fop." \ "%s.hits" self.profile_info[t_name % (self.CONFIG['integration_id'], brick_host.replace('.', '_'), brickName.split(':')[1].replace( '/', self.brick_path_separator), fopStat.get('name'))] = float( fopStat.get('hits')) t_name = "clusters.%s.nodes.%s.bricks.%s.fop." \ "%s.latencyAvg" self.profile_info[t_name % (self.CONFIG['integration_id'], brick_host.replace('.', '_'), brickName.split(':')[1].replace( '/', self.brick_path_separator), fopStat.get('name'))] = float( fopStat.get('latencyAvg')) t_name = "clusters.%s.nodes.%s.bricks.%s.fop." \ "%s.latencyMin" self.profile_info[t_name % (self.CONFIG['integration_id'], brick_host.replace('.', '_'), brickName.split(':')[1].replace( '/', self.brick_path_separator), fopStat.get('name'))] = float( fopStat.get('latencyMin')) t_name = "clusters.%s.nodes.%s.bricks.%s.fop." \ "%s.latencyMax" self.profile_info[t_name % (self.CONFIG['integration_id'], brick_host.replace('.', '_'), brickName.split(':')[1].replace( '/', self.brick_path_separator), fopStat.get('name'))] = float( fopStat.get('latencyMax')) if fopStat.get('name') in READ_WRITE_OPS: read_write_hits = read_write_hits + float( fopStat.get('hits')) if fopStat.get('name') in LOCK_OPS: lock_hits = lock_hits + float(fopStat.get('hits')) if fopStat.get('name') in INODE_OPS: inode_hits = inode_hits + float(fopStat.get('hits')) if fopStat.get('name') in ENTRY_OPS: entry_hits = entry_hits + float(fopStat.get('hits')) t_name = "clusters.%s.volumes.%s.nodes.%s.bricks.%s." \ "read_write_ops" self.profile_info[ t_name % (self.CONFIG['integration_id'], volName, brick_host.replace('.', '_'), brickName.split(':')[1].replace( '/', self.brick_path_separator))] = read_write_hits t_name = "clusters.%s.volumes.%s.nodes.%s.bricks.%s." \ "lock_ops" self.profile_info[ t_name % (self.CONFIG['integration_id'], volName, brick_host.replace('.', '_'), brickName.split(':')[1].replace( '/', self.brick_path_separator))] = lock_hits t_name = "clusters.%s.volumes.%s.nodes.%s.bricks.%s." \ "inode_ops" self.profile_info[ t_name % (self.CONFIG['integration_id'], volName, brick_host.replace('.', '_'), brickName.split(':')[1].replace( '/', self.brick_path_separator))] = inode_hits t_name = "clusters.%s.volumes.%s.nodes.%s.bricks.%s." \ "entry_ops" self.profile_info[ t_name % (self.CONFIG['integration_id'], volName, brick_host.replace('.', '_'), brickName.split(':')[1].replace( '/', self.brick_path_separator))] = entry_hits t_name = "clusters.%s.nodes.%s.bricks.%s." \ "read_write_ops" self.profile_info[ t_name % (self.CONFIG['integration_id'], brick_host.replace( '.', '_'), brickName.split(':')[1].replace( '/', self.brick_path_separator))] = read_write_hits t_name = "clusters.%s.nodes.%s.bricks.%s." \ "lock_ops" self.profile_info[ t_name % (self.CONFIG['integration_id'], brick_host.replace( '.', '_'), brickName.split(':')[1].replace( '/', self.brick_path_separator))] = lock_hits t_name = "clusters.%s.nodes.%s.bricks.%s." \ "inode_ops" self.profile_info[ t_name % (self.CONFIG['integration_id'], brick_host.replace( '.', '_'), brickName.split(':')[1].replace( '/', self.brick_path_separator))] = inode_hits t_name = "clusters.%s.nodes.%s.bricks.%s." \ "entry_ops" self.profile_info[ t_name % (self.CONFIG['integration_id'], brick_host.replace( '.', '_'), brickName.split(':')[1].replace( '/', self.brick_path_separator))] = entry_hits
def get_metrics(self): try: ret_val = {} volumes = self.CLUSTER_TOPOLOGY.get('volumes', []) # Push brick level connections count volumes_list = [] for volume in volumes: brick_found_for_curr_node = False for sub_volume_index, sub_volume_bricks in volume.get( 'bricks', {}).iteritems(): for brick in sub_volume_bricks: brick_hostname = \ tendrl_glusterfs_utils.find_brick_host( self.etcd_client, self.CONFIG['integration_id'], brick.get('hostname') ) if brick_hostname: brick_ip = socket.gethostbyname(brick_hostname) if (brick_ip == socket.gethostbyname( self.CONFIG['peer_name']) or brick_hostname == self.CONFIG['peer_name']): brick_found_for_curr_node = True # Push brick client connections ret_val['clusters.%s.volumes.%s.nodes.%s.' 'bricks.%s.' 'connections_count' % (self.CONFIG['integration_id'], volume.get('name', ''), self.CONFIG['peer_name'].replace( '.', '_'), brick['path'].replace( '/', '|') )] = brick['connections_count'] if brick_found_for_curr_node: # Update rebalance info only for this volumes volumes_list.append(volume.get('name', '')) # push rebalance info rebalance_info = self._get_rebalance_info() for vol_name in rebalance_info: if vol_name in volumes_list: # Push volume wise snap counts ret_val['clusters.%s.volumes.%s.snap_count' % (self.CONFIG['integration_id'], vol_name )] = rebalance_info[vol_name]['snap_count'] # Push rebalance bytes progress ret_val['clusters.%s.volumes.%s.nodes.%s.rebalance_bytes' % (self.CONFIG['integration_id'], vol_name, self.CONFIG['peer_name'].replace('.', '_') )] = rebalance_info[vol_name]['rebalance_data'] # Push rebalance files progress ret_val['clusters.%s.volumes.%s.nodes.%s.rebalance_files' % (self.CONFIG['integration_id'], vol_name, self.CONFIG['peer_name'].replace('.', '_') )] = rebalance_info[vol_name]['rebalance_files'] # Push rebalance failures ret_val[ 'clusters.%s.volumes.%s.nodes.%s.' 'rebalance_failures' % (self.CONFIG['integration_id'], vol_name, self.CONFIG['peer_name'].replace('.', '_') )] = rebalance_info[vol_name]['rebalance_failures'] # Push rebalance skipped ret_val['clusters.%s.volumes.%s.nodes.%s.rebalance_skipped' % (self.CONFIG['integration_id'], vol_name, self.CONFIG['peer_name'].replace('.', '_') )] = rebalance_info[vol_name]['rebalance_skipped'] return ret_val except (AttributeError, KeyError, ValueError): collectd.error('Failed to fetch counters. Error %s\n\n' % (traceback.format_exc())) return {}
def process_volume_profile_info(self, volume): volName = volume['name'] vol_iops = self.get_volume_profile_info( volName, self.CONFIG['integration_id'] ) if not vol_iops: return read_write_hits = 0 inode_hits = 0 entry_hits = 0 lock_hits = 0 for brick_det in vol_iops.get('bricks', {}): brickName = brick_det.get('brick', '') brick_host = brick_det.get('brick', '').split(':')[0] brick_host = tendrl_glusterfs_utils.find_brick_host( self.etcd_client, self.CONFIG['integration_id'], brick_host ) t_name = "clusters.%s.volumes.%s.nodes.%s.bricks.%s.iops." \ "gauge-read" self.profile_info[ t_name % ( self.CONFIG['integration_id'], volName, brick_host.replace('.', '_'), brickName.split(':')[1].replace('/', '|') ) ] = brick_det.get('intervalStats').get('totalRead') t_name = "clusters.%s.volumes.%s.nodes.%s.bricks.%s.iops." \ "gauge-write" self.profile_info[ t_name % ( self.CONFIG['integration_id'], volName, brick_host.replace('.', '_'), brickName.split(':')[1].replace('/', '|') ) ] = brick_det.get('intervalStats').get('totalWrite') t_name = "clusters.%s.nodes.%s.bricks.%s.iops." \ "gauge-read" self.profile_info[ t_name % ( self.CONFIG['integration_id'], brick_host.replace('.', '_'), brickName.split(':')[1].replace('/', '|') ) ] = brick_det.get('intervalStats').get('totalRead') t_name = "clusters.%s.nodes.%s.bricks.%s.iops." \ "gauge-write" self.profile_info[ t_name % ( self.CONFIG['integration_id'], brick_host.replace('.', '_'), brickName.split(':')[1].replace('/', '|') ) ] = brick_det.get('intervalStats').get('totalWrite') fopIntervalStats = brick_det.get( 'intervalStats' ).get('fopStats') for fopStat in fopIntervalStats: t_name = "clusters.%s.volumes.%s.nodes.%s.bricks.%s.fop." \ "%s.hits" self.profile_info[ t_name % ( self.CONFIG['integration_id'], volName, brick_host.replace('.', '_'), brickName.split(':')[1].replace('/', '|'), fopStat.get('name') ) ] = float(fopStat.get('hits')) t_name = "clusters.%s.volumes.%s.nodes.%s.bricks.%s.fop." \ "%s.latencyAvg" self.profile_info[ t_name % ( self.CONFIG['integration_id'], volName, brick_host.replace('.', '_'), brickName.split(':')[1].replace('/', '|'), fopStat.get('name') ) ] = float(fopStat.get('latencyAvg')) t_name = "clusters.%s.volumes.%s.nodes.%s.bricks.%s.fop." \ "%s.latencyMin" self.profile_info[ t_name % ( self.CONFIG['integration_id'], volName, brick_host.replace('.', '_'), brickName.split(':')[1].replace('/', '|'), fopStat.get('name') ) ] = float(fopStat.get('latencyMin')) t_name = "clusters.%s.volumes.%s.nodes.%s.bricks.%s.fop." \ "%s.latencyMax" self.profile_info[ t_name % ( self.CONFIG['integration_id'], volName, brick_host.replace('.', '_'), brickName.split(':')[1].replace('/', '|'), fopStat.get('name') ) ] = float(fopStat.get('latencyMax')) t_name = "clusters.%s.nodes.%s.bricks.%s.fop." \ "%s.hits" self.profile_info[ t_name % ( self.CONFIG['integration_id'], brick_host.replace('.', '_'), brickName.split(':')[1].replace('/', '|'), fopStat.get('name') ) ] = float(fopStat.get('hits')) t_name = "clusters.%s.nodes.%s.bricks.%s.fop." \ "%s.latencyAvg" self.profile_info[ t_name % ( self.CONFIG['integration_id'], brick_host.replace('.', '_'), brickName.split(':')[1].replace('/', '|'), fopStat.get('name') ) ] = float(fopStat.get('latencyAvg')) t_name = "clusters.%s.nodes.%s.bricks.%s.fop." \ "%s.latencyMin" self.profile_info[ t_name % ( self.CONFIG['integration_id'], brick_host.replace('.', '_'), brickName.split(':')[1].replace('/', '|'), fopStat.get('name') ) ] = float(fopStat.get('latencyMin')) t_name = "clusters.%s.nodes.%s.bricks.%s.fop." \ "%s.latencyMax" self.profile_info[ t_name % ( self.CONFIG['integration_id'], brick_host.replace('.', '_'), brickName.split(':')[1].replace('/', '|'), fopStat.get('name') ) ] = float(fopStat.get('latencyMax')) if fopStat.get('name') in READ_WRITE_OPS: read_write_hits = read_write_hits + float( fopStat.get('hits') ) if fopStat.get('name') in LOCK_OPS: lock_hits = lock_hits + float(fopStat.get('hits')) if fopStat.get('name') in INODE_OPS: inode_hits = inode_hits + float(fopStat.get('hits')) if fopStat.get('name') in ENTRY_OPS: entry_hits = entry_hits + float(fopStat.get('hits')) t_name = "clusters.%s.volumes.%s.nodes.%s.bricks.%s." \ "read_write_ops" self.profile_info[ t_name % ( self.CONFIG['integration_id'], volName, brick_host.replace('.', '_'), brickName.split(':')[1].replace('/', '|') ) ] = read_write_hits t_name = "clusters.%s.volumes.%s.nodes.%s.bricks.%s." \ "lock_ops" self.profile_info[ t_name % ( self.CONFIG['integration_id'], volName, brick_host.replace('.', '_'), brickName.split(':')[1].replace('/', '|') ) ] = lock_hits t_name = "clusters.%s.volumes.%s.nodes.%s.bricks.%s." \ "inode_ops" self.profile_info[ t_name % ( self.CONFIG['integration_id'], volName, brick_host.replace('.', '_'), brickName.split(':')[1].replace('/', '|') ) ] = inode_hits t_name = "clusters.%s.volumes.%s.nodes.%s.bricks.%s." \ "entry_ops" self.profile_info[ t_name % ( self.CONFIG['integration_id'], volName, brick_host.replace('.', '_'), brickName.split(':')[1].replace('/', '|') ) ] = entry_hits t_name = "clusters.%s.nodes.%s.bricks.%s." \ "read_write_ops" self.profile_info[ t_name % ( self.CONFIG['integration_id'], brick_host.replace('.', '_'), brickName.split(':')[1].replace('/', '|') ) ] = read_write_hits t_name = "clusters.%s.nodes.%s.bricks.%s." \ "lock_ops" self.profile_info[ t_name % ( self.CONFIG['integration_id'], brick_host.replace('.', '_'), brickName.split(':')[1].replace('/', '|') ) ] = lock_hits t_name = "clusters.%s.nodes.%s.bricks.%s." \ "inode_ops" self.profile_info[ t_name % ( self.CONFIG['integration_id'], brick_host.replace('.', '_'), brickName.split(':')[1].replace('/', '|') ) ] = inode_hits t_name = "clusters.%s.nodes.%s.bricks.%s." \ "entry_ops" self.profile_info[ t_name % ( self.CONFIG['integration_id'], brick_host.replace('.', '_'), brickName.split(':')[1].replace('/', '|') ) ] = entry_hits