def __init__(self, approot, interval): self.cache = {'treadmill': {}, 'core': {}, 'app': {}} self._interval = interval self._tm_env = appenv.AppEnvironment(root=approot) self._sys_svcs = _sys_svcs(approot) # TODO: sys_maj_min will be used changing treadmill.metrics.app_metrics self._sys_maj_min = '{}:{}'.format( *fs_linux.maj_min_from_path(approot)) self._sys_block_dev = fs_linux.maj_min_to_blk( *fs_linux.maj_min_from_path(approot)) # if interval is zero, we just read one time if interval <= 0: self._read() else: self._loop()
def benchmark(benchmark_publish_file, vg_name, underlying_device_name, underlying_image_path, benchmark_volume, rw_type, job_number, thread_number, iops_block_size, bps_block_size, max_seconds): """Benchmark node IO performance""" try: if underlying_device_name is not None: # LVM is based on physical device, # benchmark VG directly underlying_device_uuid = fs_linux.blk_uuid( underlying_device_name) max_iops_result = diskbenchmark.benchmark_vg( vg_name, benchmark_volume, rw_type, job_number, thread_number, iops_block_size, max_seconds) max_bps_result = diskbenchmark.benchmark_vg( vg_name, benchmark_volume, rw_type, job_number, thread_number, bps_block_size, max_seconds) elif underlying_image_path is not None: # LVM is based on loop device, # benchmark underlying physical device of image file underlying_device_uuid = fs_linux.blk_uuid( fs_linux.maj_min_to_blk( *fs_linux.maj_min_from_path(underlying_image_path))) benchmark_path = os.path.join(underlying_image_path, 'benchmark') max_iops_result = diskbenchmark.benchmark( benchmark_path, benchmark_volume, rw_type, job_number, thread_number, iops_block_size, max_seconds) max_bps_result = diskbenchmark.benchmark( benchmark_path, benchmark_volume, rw_type, job_number, thread_number, bps_block_size, max_seconds) if os.path.isdir(benchmark_path): shutil.rmtree(benchmark_path) else: _LOGGER.error('No underlying device, please specify ' '--underlying-device-name/' '--underlying-image-path') return diskbenchmark.write( benchmark_publish_file, { underlying_device_uuid: { 'read_bps': max_bps_result['read_bps'], 'write_bps': max_bps_result['write_bps'], 'read_iops': max_iops_result['read_iops'], 'write_iops': max_iops_result['write_iops'] } }) except subproc.CommandAliasError: _LOGGER.error(_ALIAS_ERROR_MESSAGE)
def localdisk(img_location, img_size, block_dev, vg_name, block_dev_configuration, block_dev_read_bps, block_dev_write_bps, block_dev_read_iops, block_dev_write_iops, default_read_bps, default_write_bps, default_read_iops, default_write_iops): """Runs localdisk service.""" root_dir = local_ctx['root-dir'] watchdogs_dir = local_ctx['watchdogs-dir'] svc = services.ResourceService(service_dir=os.path.join( root_dir, 'localdisk_svc'), impl='localdisk') block_dev_params = [ block_dev_read_bps, block_dev_write_bps, block_dev_read_iops, block_dev_write_iops ] if img_location is None: img_location = root_dir # prepare block device if block_dev is not None: underlying_device_uuid = fs_linux.blk_uuid(block_dev) else: underlying_device_uuid = fs_linux.blk_uuid( fs_linux.maj_min_to_blk( *fs_linux.maj_min_from_path(img_location))) block_dev = localdiskutils.init_block_dev( localdiskutils.TREADMILL_IMG, img_location, img_size) # prepare block device configuration read_bps = None write_bps = None read_iops = None write_iops = None # use block device config file if block_dev_configuration is not None and all( param is None for param in block_dev_params): try: current_benchmark = diskbenchmark.read( block_dev_configuration)[underlying_device_uuid] read_bps = current_benchmark['read_bps'] write_bps = current_benchmark['write_bps'] read_iops = int(current_benchmark['read_iops']) write_iops = int(current_benchmark['write_iops']) except IOError: _LOGGER.error('No benchmark found : %s', block_dev_configuration) except (KeyError, ValueError): _LOGGER.error('Incorrect disk benchmark for device %s in %s', underlying_device_uuid, block_dev_configuration) # use block device config parameters if all(param is not None for param in block_dev_params) and block_dev_configuration is None: read_bps = block_dev_read_bps write_bps = block_dev_write_bps read_iops = block_dev_read_iops write_iops = block_dev_write_iops if None in [read_bps, write_bps, read_iops, write_iops]: _LOGGER.error('Bad block dev configuration') read_bps = '200M' write_bps = '200M' read_iops = 3000 write_iops = 3000 svc.run( watchdogs_dir=os.path.join(root_dir, watchdogs_dir), block_dev=block_dev, vg_name=vg_name, read_bps=read_bps, write_bps=write_bps, read_iops=read_iops, write_iops=write_iops, default_read_bps=default_read_bps, default_write_bps=default_write_bps, default_read_iops=default_read_iops, default_write_iops=default_write_iops, )
def metrics(step, approot): """Collect node and container metrics.""" tm_env = appenv.AppEnvironment(root=approot) endpoints_mgr = endpoints.EndpointsMgr(tm_env.endpoints_dir) app_metrics_dir = os.path.join(tm_env.metrics_dir, 'apps') core_metrics_dir = os.path.join(tm_env.metrics_dir, 'core') fs.mkdir_safe(app_metrics_dir) fs.mkdir_safe(core_metrics_dir) # Initiate the list for monitored applications monitored_apps = set( os.path.basename(metric_name)[:-len('.rrd')] for metric_name in glob.glob('%s/*' % app_metrics_dir) if metric_name.endswith('.rrd')) sys_maj_min = '{}:{}'.format(*fs_linux.maj_min_from_path(approot)) _LOGGER.info('Device sys maj:min = %s for approot: %s', sys_maj_min, approot) _LOGGER.info('Loading rrd client') rrd_loader = RRDClientLoader() second_used = 0 while True: if step > second_used: time.sleep(step - second_used) spec = endpoints_mgr.get_spec(proto='tcp', endpoint='nodeinfo') if spec is None: second_used = 0 _LOGGER.warning('Cgroup REST api port not found.') continue # appname = 'root.{hostname}#{pid}' appname = spec[0] host = appname.split('#')[0][len('root.'):] port = int(spec[-1]) remote = 'http://{0}:{1}'.format(host, port) _LOGGER.info('remote cgroup API address: %s', remote) starttime_sec = time.time() count = 0 # aggregated cgroup values of `treadmill.core` and `treadmill.apps` url = '/cgroup/treadmill/*/' data = restclient.get(remote, url, auth=None).json() url = '/cgroup/treadmill' data['treadmill'] = restclient.get(remote, url, auth=None).json() count += _update_core_rrds(data, core_metrics_dir, rrd_loader.client, step, sys_maj_min) url = '/cgroup/treadmill/core/*/?detail=true' data = restclient.get(remote, url, auth=None).json() count += _update_service_rrds(data, core_metrics_dir, rrd_loader.client, step, sys_maj_min) url = '/cgroup/treadmill/apps/*/?detail=true' data = restclient.get(remote, url, auth=None).json() count += _update_app_rrds(data, app_metrics_dir, rrd_loader.client, step, tm_env) # Removed metrics for apps that are not present anymore seen_apps = set(data) for app_unique_name in monitored_apps - seen_apps: rrdfile = os.path.join(app_metrics_dir, '{app}.rrd'.format(app=app_unique_name)) _LOGGER.info('removing %r', rrdfile) rrd.finish(rrd_loader.client, rrdfile) monitored_apps = seen_apps second_used = time.time() - starttime_sec _LOGGER.info('Got %d cgroups metrics in %.3f seconds', count, second_used) # Gracefull shutdown. _LOGGER.info('service shutdown.')
def _sys_block_dev(self): if '_sys_block_dev' not in self._initialized: self._initialized['_sys_block_dev'] = fs_linux.maj_min_to_blk( *fs_linux.maj_min_from_path(self._approot)) return self._initialized['_sys_block_dev']
def _sys_maj_min(self): # TODO: sys_maj_min will be used changing treadmill.metrics.app_metrics if '_sys_maj_min' not in self._initialized: self._initialized['_sys_maj_min'] = '{}:{}'.format( *fs_linux.maj_min_from_path(self._approot)) return self._initialized['_sys_maj_min']
def metrics(step, approot, api_socket): """Collect node and container metrics.""" remote = 'http+unix://{}'.format(urllib_parse.quote_plus(api_socket)) _LOGGER.info('remote cgroup API address %s', remote) tm_env = appenv.AppEnvironment(root=approot) app_metrics_dir = os.path.join(tm_env.metrics_dir, 'apps') core_metrics_dir = os.path.join(tm_env.metrics_dir, 'core') fs.mkdir_safe(app_metrics_dir) fs.mkdir_safe(core_metrics_dir) # Initiate the list for monitored applications monitored_apps = set( os.path.basename(metric_name)[:-len('.rrd')] for metric_name in glob.glob('%s/*' % app_metrics_dir) if metric_name.endswith('.rrd') ) sys_maj_min = '{}:{}'.format(*fs_linux.maj_min_from_path(approot)) _LOGGER.info('Device sys maj:min = %s for approot: %s', sys_maj_min, approot) _LOGGER.info('Loading rrd client') rrd_loader = RRDClientLoader() second_used = 0 while True: if step > second_used: time.sleep(step - second_used) starttime_sec = time.time() count = 0 data = restclient.get(remote, '/cgroup/_bulk', auth=None).json() count += _update_core_rrds( data['treadmill'], core_metrics_dir, rrd_loader.client, step, sys_maj_min ) count += _update_service_rrds( data['core'], core_metrics_dir, rrd_loader.client, step, sys_maj_min ) count += _update_app_rrds( data['app'], app_metrics_dir, rrd_loader.client, step, tm_env ) # Removed metrics for apps that are not present anymore seen_apps = set(data['app'].keys()) for app_unique_name in monitored_apps - seen_apps: rrdfile = os.path.join( app_metrics_dir, '{app}.rrd'.format(app=app_unique_name)) _LOGGER.info('removing %r', rrdfile) rrd.finish(rrd_loader.client, rrdfile) monitored_apps = seen_apps second_used = time.time() - starttime_sec _LOGGER.info('Got %d cgroups metrics in %.3f seconds', count, second_used) # Gracefull shutdown. _LOGGER.info('service shutdown.')