def fetch(self): configpath = "http://{}/client/config/{}/{}".format(self.server, self.fqdn, self.secret) self.logger.debug("Fetching {}".format(configpath)) cpu_info = cpuinfo.get_cpu_info() sysinfo = { "arch": platform.machine(), "dist": " ".join(list(platform.linux_distribution())), "cpu": cpu_info["brand"], "cores": cpu_info["count"] } try: response = requests.get(configpath, params=sysinfo) if response.status_code == 200: server_config = response.json() if not server_config["enabled"]: self.logger.error("This client hasn't been accepted on the server yet.") self.logger.info("Retrying in {} minutes".format(server_config["polling_time"])) sleep(60 * server_config["polling_time"]) self.fetch() else: self.logger.info("Config download successful") self.send_throttle = server_config["send_throttle"] self.monitor_enabled = server_config["monitor_enabled"] else: self.logger.error("Server error {} at http://{}".format(response.status_code, self.server)) exit(1) except requests.exceptions.ConnectionError as e: self.logger.error("Cannot connect to the server at http://{}".format(self.server)) exit(1)
def update(self): """Update quicklook stats using the input method.""" # Init new stats stats = self.get_init_value() # Grab quicklook stats: CPU, MEM and SWAP if self.input_method == 'local': # Get the latest CPU percent value stats['cpu'] = cpu_percent.get() stats['percpu'] = cpu_percent.get(percpu=True) # Use the psutil lib for the memory (virtual and swap) stats['mem'] = psutil.virtual_memory().percent stats['swap'] = psutil.swap_memory().percent elif self.input_method == 'snmp': # Not available pass # Optionnaly, get the CPU name/frequency # thanks to the cpuinfo lib: https://github.com/workhorsy/py-cpuinfo if cpuinfo_tag: cpu_info = cpuinfo.get_cpu_info() # Check cpu_info (issue #881) if cpu_info is not None: stats['cpu_name'] = cpu_info.get('brand', 'CPU') if 'hz_actual_raw' in cpu_info: stats['cpu_hz_current'] = cpu_info['hz_actual_raw'][0] if 'hz_advertised_raw' in cpu_info: stats['cpu_hz'] = cpu_info['hz_advertised_raw'][0] # Update the stats self.stats = stats return self.stats
def update(self): """Update quicklook stats using the input method.""" # Reset stats self.reset() # Grab quicklook stats: CPU, MEM and SWAP if self.input_method == 'local': # Get the latest CPU percent value self.stats['cpu'] = cpu_percent.get() self.stats['percpu'] = cpu_percent.get(percpu=True) # Use the PsUtil lib for the memory (virtual and swap) self.stats['mem'] = psutil.virtual_memory().percent self.stats['swap'] = psutil.swap_memory().percent elif self.input_method == 'snmp': # Not available pass # Optionnaly, get the CPU name/frequency # thanks to the cpuinfo lib: https://github.com/workhorsy/py-cpuinfo if cpuinfo_tag: cpu_info = cpuinfo.get_cpu_info() self.stats['cpu_name'] = cpu_info['brand'] self.stats['cpu_hz_current'] = cpu_info['hz_actual_raw'][0] self.stats['cpu_hz'] = cpu_info['hz_advertised_raw'][0] # Update the view self.update_views() return self.stats
def update(self): """Get the latest data and updates the state.""" from cpuinfo import cpuinfo self.info = cpuinfo.get_cpu_info() if HZ_ACTUAL_RAW in self.info: self._state = round( float(self.info[HZ_ACTUAL_RAW][0]) / 10 ** 9, 2 ) else: self._state = None
def nextRun( self ): ''' Decide which run should take place next. Return a (#workers,#battles,run #) tuple. ''' # See which combinations have the fewest runs. # If there are some, choose one of those at random. # Otherwise, choose a possible combination at random. this_cpu = cpuinfo.get_cpu_info()['brand'] max_cpu = int(cpuinfo.get_cpu_info()['count'] * 1.5) comb = tuple(itertools.product(range(1,max_cpu+1), range(1,self.mmax+1))) already_run = { '{0:04d}_{1:04d}'.format(w,m):0 for w,m in comb } #print(str(already_run)) for record in self.conn.execute(''' SELECT Workers,BattleMultiplier,COUNT(RunID) FROM Runs WHERE CPUInfo=? GROUP BY Workers,BattleMultiplier ''',[this_cpu]): #print('{0}'.format({ k:record[k] for k in record.keys()})) already_run['{0:04d}_{1:04d}'.format( record['Workers'],record['BattleMultiplier'])] = \ record['COUNT(RunID)'] #print(str(already_run)) fewest_runs = min(already_run.values()) if fewest_runs == self.desired_runs: raise StopIteration() need_run = tuple(filter(lambda k: already_run[k] == fewest_runs, already_run.keys())) # Choose a random scenario with fewer runs. if len(need_run) > 0: c = random.choice(need_run) return tuple(map(int,re.split(r'_',c))) + (fewest_runs+1,)
def __init__(self, model, dataset, parameters): info = cpuinfo.get_cpu_info() self.model = model self.dataset = dataset self.parameters = json.dumps(parameters) self.machine = json.dumps({'node': platform.node(), 'hostname': socket.gethostname(), 'cpu': info}) self.start = timeit.default_timer() self.experimentId = uuid.uuid4() rollbar.report_message('Starting', 'info', None, extra_data=self.extra_data()) self.io = MySQL()
def get_sys_conf(map): info = cpuinfo.get_cpu_info() #pprint.pprint(info) map['SYS_hz_actual'] = info['hz_actual'] map['SYS_raw_arch_string'] = info['raw_arch_string'] map['SYS_l2_cache_size'] = info['l2_cache_size'] map['SYS_brand'] = info['brand'] map['SYS_cpu_count'] = info['count'] meminfo = psutil.virtual_memory() #pprint.pprint(meminfo) map['SYS_total_mem'] = float(meminfo[0]) map['SYS_percent_free'] = meminfo[2]
def run_job(queue, job): # multiprocessing: don't capture ctrl+c signal.signal(signal.SIGINT, signal.SIG_IGN) output_file = tempfile.NamedTemporaryFile(bufsize=8, delete=False) output_file.write("experiment: %s\n" % " ".join(sys.argv)) output_file.write("start: %s\n" % datetime.datetime.now()) output_file.write("cpu: %s\n" % cpuinfo.get_cpu_info()['brand']) if sys.platform.startswith("linux"): output_file.write("affinity: %s\n" % psutil.Process().cpu_affinity()) output_file.write("\n") output_file.write("cmd: %s\n" % " ".join(job.command)) output_file.write("\n") output_file.flush() queue.put(("start", job, output_file.name)) t = time.time() proc = subprocess.Popen(job.command, stderr=subprocess.STDOUT, stdout=output_file) timed_out = False try: out, err = proc.communicate(timeout=job.timeout) t = time.time() - t except subprocess.TimeoutExpired: parent = psutil.Process(proc.pid) for c in parent.children(recursive=True): try: c.kill() except: pass try: proc.kill() except: pass out, err = proc.communicate() t = job.timeout timed_out = True f = open(output_file.name) out = f.read() f.close() output_file.close() queue.put(("end", job, t, timed_out, out))
def commitRun( self, run_data ): ''' Save the run data into the database. ''' cpu_info = cpuinfo.get_cpu_info() result = self.conn.execute(''' INSERT INTO Runs (CPUInfo,CPUCount,Hostname,Workers,BattleMultiplier, RunStarted,RunFinished,DBFile) VALUES (?,?,?,?,?,?,?,?) ''',[ cpu_info['brand'], cpu_info['count'], socket.gethostname(), run_data['Workers'], run_data['BattleMultiplier'], run_data['RunStarted'], run_data['RunFinished'], run_data['DBFile'], ])
def generate_makefile(self, writer, compiler, native, compiler_flags, linker_flags, nb_threads): if compiler=='msvc': if native: arch_flag = '' try: from cpuinfo import cpuinfo res = cpuinfo.get_cpu_info() if 'sse' in res['flags']: arch_flag = '/arch:SSE' if 'sse2' in res['flags']: arch_flag = '/arch:SSE2' except ImportError: logger.warn('Native flag for MSVC compiler requires installation of the py-cpuinfo module') compiler_flags += ' '+arch_flag if nb_threads>1: openmp_flag = '/openmp' else: openmp_flag = '' # Generate the visual studio makefile source_bases = [fname.replace('.cpp', '').replace('/', '\\') for fname in writer.source_files] win_makefile_tmp = CPPStandaloneCodeObject.templater.win_makefile( None, None, source_bases=source_bases, compiler_flags=compiler_flags, linker_flags=linker_flags, openmp_flag=openmp_flag, ) writer.write('win_makefile', win_makefile_tmp) else: # Generate the makefile if os.name=='nt': rm_cmd = 'del *.o /s\n\tdel main.exe $(DEPS)' else: rm_cmd = 'rm $(OBJS) $(PROGRAM) $(DEPS)' makefile_tmp = CPPStandaloneCodeObject.templater.makefile(None, None, source_files=' '.join(writer.source_files), header_files=' '.join(writer.header_files), compiler_flags=compiler_flags, linker_flags=linker_flags, rm_cmd=rm_cmd) writer.write('makefile', makefile_tmp)
def post_process_perf_results(settings, results_location, workitem_dir): # Use the xunit perf analysis exe from nuget package here log.info('Converting xml to csv') payload_dir = fix_path(os.getenv('HELIX_CORRELATION_PAYLOAD')) perf_analysis_version = (next(os.walk(os.path.join(payload_dir, 'Microsoft.DotNet.xunit.performance.analysis')))[1])[0] xmlconvertorpath = os.path.join(*[payload_dir, 'Microsoft.DotNet.xunit.performance.analysis', perf_analysis_version, 'tools', 'xunit.performance.analysis.exe']) xmlCmd = xmlconvertorpath+' -csv '+os.path.join(workitem_dir, 'results.csv')+' '+results_location if (helix.proc.run_and_log_output(xmlCmd.split(' '))) != 0: raise Exception('Failed to generate csv from result xml') log.info('Uploading the results.csv file') _write_output_path(os.path.join(workitem_dir, 'results.csv'), settings) perfscriptsdir = os.path.join(*[payload_dir, 'RunnerScripts', 'xunitrunner-perf']) perfsettingsjson = '' with open(os.path.join(perfscriptsdir, 'xunitrunner-perf.json'), 'rb') as perfsettingsjson: # read the perf-specific settings perfsettingsjson = json.loads(perfsettingsjson.read()) # need to extract more properties from settings to pass to csvtojsonconvertor.py jsonFileName = perfsettingsjson['TestProduct']+'-'+settings.workitem_id+'.json' jsonPath = os.path.join(workitem_dir, jsonFileName) jsonArgsDict = dict() jsonArgsDict['--csvFile'] = os.path.join(workitem_dir, 'results.csv') jsonArgsDict['--jsonFile'] = jsonPath jsonArgsDict['--jobName'] = settings.correlation_id jsonArgsDict['--jobDescription'] = '...' jsonArgsDict['--configName'] = perfsettingsjson['TargetQueue'] jsonArgsDict['--jobGroupName'] = perfsettingsjson['Creator']+'-'+perfsettingsjson['TestProduct']+'-'+perfsettingsjson['Branch']+'-Perf' jsonArgsDict['--jobTypeName'] = 'Private' jsonArgsDict['--username'] = perfsettingsjson['Creator'] jsonArgsDict['--userAlias'] = perfsettingsjson['Creator'] jsonArgsDict['--branch'] = perfsettingsjson['TestProduct'] jsonArgsDict['--buildInfoName'] = perfsettingsjson['BuildMoniker'] # extract build number from buildmoniker if official build buildtokens = perfsettingsjson['BuildMoniker'].split('-') if len(buildtokens) < 3: jsonArgsDict['--buildNumber'] = perfsettingsjson['BuildMoniker'] else: jsonArgsDict['--buildNumber'] = buildtokens[-2] +'.'+buildtokens[-1] jsonArgsDict['--machinepoolName'] = perfsettingsjson['TargetQueue'] jsonArgsDict['--machinepoolDescription'] = '...' jsonArgsDict['--microarchName'] = 'SSE2' # cannot be obtained by cpu-info; need to figure out some other way jsonArgsDict['--numberOfCores'] = psutil.cpu_count(logical=False) jsonArgsDict['--numberOfLogicalProcessors'] = psutil.cpu_count(logical=True) # psutil returns mem in bytes, convert it to MB for readability jsonArgsDict['--totalPhysicalMemory'] = psutil.virtual_memory().total/1024 jsonArgsDict['--osInfoName'] = platform.system() jsonArgsDict['--osVersion'] = platform.version() jsonArgsDict['--machineName'] = platform.node() info = cpuinfo.get_cpu_info() jsonArgsDict['--architectureName'] = format(info['arch']) jsonArgsDict['--machineDescription'] = format(info['brand']) jsonArgsDict['--manufacturerName'] = format(info['vendor_id']) jsonArgs = [sys.executable, os.path.join(perfscriptsdir, 'csvjsonconvertor.py')] for key, value in jsonArgsDict.iteritems(): jsonArgs.append(key) jsonArgs.append(str(value)) if (helix.proc.run_and_log_output(jsonArgs)) != 0: raise Exception('Failed to generate json from csv file') # set info to upload result to perf-specific json container log.info('Uploading the results json') perfsettings = copy.deepcopy(settings) perfsettings.output_uri = perfsettingsjson['RootURI'] perfsettings.output_write_token = perfsettingsjson['WriteToken'] perfsettings.output_read_token = perfsettingsjson['ReadToken'] jsonPath = str(jsonPath) # Upload json with rest of the results _write_output_path(jsonPath, settings) # Upload json to the perf specific container _write_output_path(jsonPath, perfsettings)
# coding: UTF-8 from statistics import mean from typing import Iterable from itertools import islice from cpuinfo import cpuinfo from ..utils.machine_type import MachineChecker, NodeType NODE_TYPE = MachineChecker.get_node_type() # LLC_SIZE = int(cpuinfo.get_cpu_info()['l3_cache_size'].split()[0]) * 1024 # Xeon Server (BC5) LLC (L3 Cache) if NODE_TYPE == NodeType.IntegratedGPU: LLC_SIZE = int(cpuinfo.get_cpu_info()['l2_cache_size'].split() [0]) * 1024 # JETSON TX2 LLC (L2Cache) elif NODE_TYPE == NodeType.CPU: LLC_SIZE = int(cpuinfo.get_cpu_info()['l3_cache_size'].split() [0]) * 1024 # Desktop (SDC) LLC (L3Cache) class BasicMetric: def __init__(self, llc_references, llc_misses, inst, cycles, gpu_core_util, gpu_core_freq, gpu_emc_util, gpu_emc_freq, interval): self._llc_references = llc_references self._llc_misses = llc_misses self._instructions = inst self._cycles = cycles self._gpu_core_util = gpu_core_util self._gpu_core_freq = gpu_core_freq self._gpu_emc_util = gpu_emc_util self._gpu_emc_freq = gpu_emc_freq
def __init__(self, info=None): """Loads cpu info from localhost""" from cpuinfo import cpuinfo self.info = info if info else cpuinfo.get_cpu_info() self.cpuId = self._loadId() self.advertisedHz = self._loadAdvertisedHz()
def main(self, args): import aetros.const parser = argparse.ArgumentParser( formatter_class=argparse.RawTextHelpFormatter, prog=aetros.const.__prog__ + ' run') parser.add_argument( 'command', nargs='?', help="The command to run. Default read in configuration file") parser.add_argument( '-i', '--image', help= "Which Docker image to use for the command. Default read in configuration file. If not specified, command is executed on the host." ) parser.add_argument( '--no-image', action='store_true', help= "Forces not to use docker, even when image is defined in the configuration file." ) parser.add_argument( '-s', '--server', action='append', help= "Limits the server pool to this server. Default not limitation or read in configuration file. Multiple --server allowed." ) parser.add_argument( '-m', '--model', help= "Under which model this job should be listed. Default read in configuration file" ) parser.add_argument( '-l', '--local', action='store_true', help="Start the job immediately on the current machine.") parser.add_argument( '-c', '--config', help="Default aetros.yml in current working directory.") parser.add_argument( '--priority', help="Increases or decreases priority. Default is 0.") parser.add_argument( '--cpu', help="How many CPU cores should be assigned to job. Docker only.") parser.add_argument( '--memory', help="How much memory should be assigned to job. Docker only.") parser.add_argument( '--gpu', help="How many GPU cards should be assigned to job. Docker only.") parser.add_argument( '--gpu_memory', help="Memory requirement for the GPU. Docker only.") parser.add_argument( '--offline', '-o', action='store_true', help="Whether the execution should happen offline.") parser.add_argument( '--rebuild-image', action='store_true', help="Makes sure the Docker image is re-built without cache.") parser.add_argument( '--max-time', help= "Limit execution time in seconds. Sends SIGINT to the process group when reached." ) parser.add_argument( '--max-epochs', help= "Limit execution epochs. Sends SIGINT to the process group when reached." ) parser.add_argument( '--gpu-device', action='append', help= "Which device id should be mapped into the NVIDIA docker container. Only when --local" ) parser.add_argument('--volume', '-v', action='append', help="Volume into docker. Only when --local") parser.add_argument( '-e', action='append', help= "Sets additional environment variables. '-e name=value' to set value, or '-e name' to read from current env" ) parser.add_argument( '-p', '--param', action='append', help= "Sets a hyperparameter, example '--param name=value'. Multiple --param allowed." ) parsed_args = parser.parse_args(args) if parsed_args.config and not os.path.exists(parsed_args.config): self.logger.error("fatal: file %s does not exist." % (parsed_args.config, )) sys.exit(2) config = find_config(parsed_args.config) home_config = read_home_config() if config['model'] and not parsed_args.model: parsed_args.model = config['model'] if not parsed_args.model: print( "fatal: no model defined. Use --model or switch into a directory where you executed 'aetros init model-name'." ) sys.exit(2) if not parsed_args.local and parsed_args.volume: print( "fatal: can not use volume with jobs on the cluster. Use datasets instead." ) sys.exit(1) if parsed_args.local and parsed_args.priority: print( "fatal: the priority can only be set for jobs in the cluster.") sys.exit(1) if config['image']: ensure_docker_installed(self.logger) env = {} if parsed_args.e: for item in parsed_args.e: if '=' in item: k, v = item.split('=') else: k = item v = os.getenv(k) env[k] = v if ('command' not in config or not config['command']) and not parsed_args.command: self.logger.error( 'No command given. Define the command in aetros.yml or use command argument.' ) sys.exit(1) job_backend = JobBackend(parsed_args.model, self.logger) ignore = [] if 'ignore' in config: ignore = config['ignore'] job_backend.job = {'config': {'ignore': ignore}} adding_files = loading_text("- Adding job files to index ... ") files_added, size_added = job_backend.add_files(config['root'], report=False) adding_files("done with %d file%s added (%s)." % (files_added, 's' if files_added != 1 else '', human_size(size_added, 2))) create_info = {'type': 'custom', 'config': config} incoming_hyperparameter = {} if parsed_args.param: for param in parsed_args.param: if '=' not in param: raise Exception( '--param ' + param + ' does not contain a `=`. Please use "--param name=value"' ) name, value = param.split('=') incoming_hyperparameter[name] = value # first transform simple format in the full definition with parameter types # (string, number, group, choice_group, etc) full_hyperparameters = lose_parameters_to_full(config['parameters']) # now extract hyperparameters from full definition, and overwrite stuff using # incoming_hyperparameter if available hyperparameter = extract_parameters(full_hyperparameters, incoming_hyperparameter) create_info['config']['parameters'] = hyperparameter if parsed_args.rebuild_image: create_info['config']['rebuild_image'] = True if parsed_args.max_epochs: create_info['config']['maxEpochs'] = int(parsed_args.max_epochs) create_info['config']['priority'] = 0 if parsed_args.priority: create_info['config']['priority'] = float(parsed_args.priority) if parsed_args.max_time: create_info['config']['maxTime'] = float(parsed_args.max_time) if parsed_args.command: create_info['config']['command'] = parsed_args.command if parsed_args.image: # reset install options, since we can't make sure if the base image still fits if 'image' in config and config[ 'image'] and config['image'] != parsed_args.image: create_info['config']['install'] = None # reset dockerfile, since we specified manually an image create_info['config']['dockerfile'] = None create_info['config']['image'] = parsed_args.image if parsed_args.no_image: create_info['config']['image'] = None if parsed_args.server: create_info['config']['servers'] = [] for name in parsed_args.server: create_info['config']['servers'].append(name) create_info['config']['resources'] = create_info['config'].get( 'resources', {}) resources = create_info['config']['resources'] default_cpu_and_memory = 1 if create_info['config']['image'] else 0 resources['cpu'] = int(parsed_args.cpu or resources.get('cpu', default_cpu_and_memory)) resources['memory'] = int( parsed_args.memory or resources.get('memory', default_cpu_and_memory)) resources['gpu'] = int(parsed_args.gpu or resources.get('gpu', 0)) resources['gpu_memory'] = int(parsed_args.gpu_memory or resources.get('gpu_memory', 0)) if parsed_args.local: create_info['server'] = 'local' # make sure we do not limit the resources to something that is not available on the local machine warning = [] cpu = cpuinfo.get_cpu_info() mem = psutil.virtual_memory().total gpu = 0 try: gpu = len(get_ordered_devices()) except CudaNotImplementedException: pass if not create_info['config']['image'] and not all( [x == 0 for x in six.itervalues(resources)]): self.logger.warning( "! No Docker virtualization since no `image` defined, resources limitation ignored." ) if create_info['config']['image'] and resources['gpu'] > 0: if not (sys.platform == "linux" or sys.platform == "linux2"): self.logger.warning( "! Your operating system does not support GPU allocation for " "Docker virtualization. " "NVIDIA-Docker2 is only supported on Linux.") local_max_resources = { 'cpu': cpu['count'], 'memory': ceil(mem / 1024 / 1024 / 1024), 'gpu': gpu } if create_info['config']['image']: # read max hardware within Docker out = docker_call([ 'run', 'alpine', 'sh', '-c', 'nproc && cat /proc/meminfo | grep MemTotal' ]) cpus, memory = out.decode('utf-8').strip().split('\n') local_max_resources['cpu'] = int(cpus) memory = memory.replace('MemTotal:', '').replace('kB', '').strip() local_max_resources['memory'] = ceil(int(memory) / 1024 / 1024) if local_max_resources['cpu'] < resources['cpu']: warning.append('CPU cores %d -> %d' % (resources['cpu'], local_max_resources['cpu'])) resources['cpu'] = local_max_resources['cpu'] if local_max_resources['memory'] < resources['memory']: warning.append( 'memory %dGB -> %dGB' % (resources['memory'], local_max_resources['memory'])) resources['memory'] = local_max_resources['memory'] if local_max_resources['gpu'] < resources['gpu']: warning.append('GPU cards %d -> %d' % (resources['gpu'], local_max_resources['gpu'])) resources['gpu'] = local_max_resources['gpu'] if warning: self.logger.warning( "! Resources downgrade due to missing hardware: %s." % (', '.join(warning), )) if parsed_args.config and not create_info['config']['configPath']: create_info['config']['configPath'] = parsed_args.config create_info['config']['sourcesAttached'] = True creating_git_job = loading_text("- Create job in local Git ... ") if aetros.utils.git.get_current_commit_hash(): create_info['origin_git_source'] = { 'origin': aetros.utils.git.get_current_remote_url(), 'author': aetros.utils.git.get_current_commit_author(), 'message': aetros.utils.git.get_current_commit_message(), 'branch': aetros.utils.git.get_current_branch(), 'commit': aetros.utils.git.get_current_commit_hash(), } job_backend.create(create_info=create_info, server=None) creating_git_job("created %s in %s." % (job_backend.job_id[0:9], job_backend.model_name)) summary = "➤ Summary: Job running " if parsed_args.local: summary += 'locally' else: summary += 'on the cluster' if create_info['config']['image']: summary += ' in Docker using image %s with %d CPU cores, %dGB memory and %d GPUs.' \ % (create_info['config']['image'], resources['cpu'], resources['memory'], resources['gpu']) else: summary += ' on host using all available resources.' print(summary) # tasks = [] # # if 'tasks' in config: # for name, task_config in six.iteritems(config['tasks']): # replica = 1 # if 'replica' in task_config: # replica = int(task_config['replica']) # for index in range(0, replica): # tasks.append(job_backend.create_task(job_id, task_config, name, index)) if parsed_args.offline: if not parsed_args.local: self.logger.warning( "Can not create a remote job in offline mode.") sys.exit(1) self.logger.warning("Execution started offline.") else: adding_files = loading_text("- Connecting to " + home_config['host'] + " ... ") if job_backend.connect(): adding_files("connected.") else: parsed_args.offline = True adding_files("failed. Continue in offline mode.") if not parsed_args.offline: sys.stdout.write("- Uploading job data ... ") job_backend.git.push() job_backend.client.wait_until_queue_empty(['files'], clear_end=False) sys.stdout.write(" done.\n") link = "%s/model/%s/job/%s" % ( home_config['url'], job_backend.model_name, job_backend.job_id) sys.__stdout__.write(u"➤ Monitor job at %s\n" % (link)) if parsed_args.local: job_backend.start(collect_system=False, offline=parsed_args.offline, push=False) if not parsed_args.offline: job_backend.git.start_push_sync() cpus = create_info['config']['resources']['cpu'] memory = create_info['config']['resources']['memory'] if not parsed_args.gpu_device and create_info['config'][ 'resources']['gpu'] > 0: # if requested 2 GPUs and we have 3 GPUs with id [0,1,2], gpus should be [0,1] parsed_args.gpu_device = [] for i in range(0, create_info['config']['resources']['gpu']): parsed_args.gpu_device.append(i) start_command(self.logger, job_backend, env, parsed_args.volume, cpus=cpus, memory=memory, gpu_devices=parsed_args.gpu_device, offline=parsed_args.offline)
def samArch(): cpu=cpuinfo.get_cpu_info()['brand'].split()[3:5] if(cpu[0] == "E5-2620" and cpu[1] == "v3"): return "haswell"
def execute_jobs(jobs, threads, job_name, output_dir, plot_file, args): # set up the cache cache_dir = os.path.join(output_dir, "cache") if not os.path.exists(cache_dir): os.mkdir(cache_dir) # find things in the cache if we're allowed cached = [] if not args.force: for job in jobs[:]: key = cache_key(job) path = os.path.join(cache_dir, key + ".json") if os.path.exists(path): cached.append((job, path)) jobs.remove(job) if args.dry_run: if cached: print "in cache:" for i, (job, path) in enumerate(cached): print " %d: %s (%s)" % (i, " ".join( job.command), cache_key(job)) if jobs: print "" if jobs: print "to run:" for i, job in enumerate(jobs): print " %d: %s" % (i, " ".join(job.command)) sys.exit(0) # do we have enough actual threads to run every remaining job? if threads == -1: threads = psutil.cpu_count() else: threads = min(psutil.cpu_count(), threads) needed_threads = max([0] + [job.threads for job in jobs]) if needed_threads > threads: raise Exception("not enough threads: need %d, have %d" % (needed_threads, threads)) # compile benchmark runner if necessary if jobs: compile_runner() # output files data_file = open(os.path.join(output_dir, "%s.out.csv" % job_name), "w") output_file = open(os.path.join(output_dir, "%s.out.txt" % job_name), "w") output_file.write("experiment: %s\n" % " ".join(sys.argv)) output_file.write("start: %s\n" % datetime.datetime.now()) output_file.write("cpu: %s\n" % cpuinfo.get_cpu_info()['brand']) header_printed = False def on_job_complete(job, t, timed_out, out): # write to data file if not header_printed: keys = sorted(job.ident) + ["time", "timeout"] data_file.write(",".join("\"%s\"" % k for k in keys) + "\n") for k in sorted(job.ident): data_file.write("\"%s\"," % job.ident[k]) data_file.write("%.3f,%s\n" % (t, timed_out)) data_file.flush() # write to output file output_file.write("*** %s\n" % job.ident) output_file.write(out.encode('utf8', 'replace')) output_file.write("*** %s\n" % ("timeout (%s)" % t if timed_out else t)) output_file.flush() # first process all results that are cached for job, path in cached: with open(path) as f: data = json.load(f) cmd = " ".join(job.command) print "cached: %s\n (%s)" % (cmd, cache_key(job)) on_job_complete(job, data["time"], data["timed_out"], data["out"]) header_printed = True total_jobs = len(jobs) running_jobs = {} threads_used = 0 cpus = range(psutil.cpu_count()) queue = multiprocessing.Queue() # schedule jobs in descending order of threads so we can use smaller jobs to # fill gaps jobs = sorted(jobs, key=lambda j: j.threads, reverse=True) # spawn jobs that use at most a given number of threads on the given cpus def launch_more_jobs(threads, cpus): threads_used = 0 cpus_available = cpus for job in jobs[:]: if threads_used + job.threads <= threads: my_cpus = cpus_available[:job.threads] cpus_available = cpus_available[job.threads:] p = multiprocessing.Process(target=run_job, args=(queue, job)) p.start() if sys.platform.startswith("linux"): pp = psutil.Process(p.pid) pp.cpu_affinity(my_cpus) running_jobs[job.id] = (p, my_cpus) threads_used += job.threads jobs.remove(job) if args.sequential: break return threads_used, cpus_available # spawn initial threads threads_used, cpus = launch_more_jobs(threads, cpus) while running_jobs: evt = queue.get() if evt[0] == "start": start, job, output_name = evt cmd = " ".join(job.command) status = "[%d jobs: %d complete, %d running, %d remaining]" % ( total_jobs, total_jobs - len(running_jobs) - len(jobs), len(running_jobs), len(jobs)) print "starting: %s\n --> %s\n %s" % (cmd, output_name, status) elif evt[0] == "end": end, job, t, timed_out, out = evt # write to data file on_job_complete(job, t, timed_out, out) header_printed = True # write to cache key = cache_key(job) path = os.path.join(cache_dir, key + ".json") with open(path, "w") as f: json.dump( { "command": job.command, "time": t, "timed_out": timed_out, "out": out }, f, indent=2) p, c = running_jobs[job.id] p.join() del running_jobs[job.id] threads_used -= job.threads cpus = sorted(cpus + c) status = "[%d jobs: %d complete, %d running, %d remaining]" % ( total_jobs, total_jobs - len(running_jobs) - len(jobs), len(running_jobs), len(jobs)) print "finished: %s\n %s" % (" ".join(job.command), status) t, c = launch_more_jobs(threads - threads_used, cpus) threads_used += t cpus = c output_file.close() data_file.close() if plot_file and not args.no_post_process: path = os.path.join(os.getcwd(), "experiments/plots/" + plot_file) if not os.path.exists(path): print "plot file not found: %s" % plot_file else: print "running post-process file %s..." % plot_file subprocess.check_call(["python", path, job_name], cwd=output_dir, stderr=subprocess.STDOUT)
import distro #Colors! Use the ANCI codes found here https://github.com/dslackw/colored FontColor = 166 AtColor = 15 TitleColor = 15 #ubuntu DarkColor = 1 MedColor = 166 LightColor = 11 #You shouldn't need to touch anything bellow here! cpu = cpuinfo.get_cpu_info() RAM = psutil.virtual_memory() Disk = psutil.disk_usage('/') DiskTotal = round(Disk[0] / 1000000000, 2) DiskTaken = round(Disk[1] / 1000000000, 2) DiskFree = round(Disk[2] / 1000000000, 2) DiskPercent = Disk[3] RamTotal = round(RAM[0] / 1000000000, 2) RamTaken = round(RAM[5] / 1000000000, 2) RamFree = round(RAM[1] / 1000000000, 2) RamPercent = RAM[2]
def __init__(self, lxd): self.lxd = lxd self.host_cpu_info = cpuinfo.get_cpu_info()
def send_config(config_file): ''' Sends the registration of this machine to the OpenSubmit web server. ''' config = read_config(config_file) conf = platform.uname() output = [] output.append(["Operating system","%s %s %s (%s)"%(conf[0], conf[2], conf[3], conf[4])]) try: from cpuinfo import cpuinfo cpu=cpuinfo.get_cpu_info() conf="%s, %s, %s Family %d Model %d Stepping %d #%d" % (cpu["brand"],cpu["vendor_id"],cpu["arch"],cpu['family'],cpu['model'],cpu['stepping'],cpu["count"]) except: conf=platform.processor() #may be empty on Linux because of partial implemtation in platform output.append(["CPUID information", conf]) if platform_sys == "Windows": conf = _infos_cmd("cl.exe|@echo off","") #force returncode 0 conf = conf.split("\n")[0] #extract version info else: conf = _infos_cmd("cc -v") try: #read keys and values from JSON file import simplejson with open('environment.json','rb') as f: environ_vars = simplejson.loads(f.read()) output.append(["CC information", conf ]) for k,v in environ_vars.iteritems(): output.append([k,_infos_cmd(v)]) output.append(["OpenCL Details", _infos_opencl()]) except Exception as e: logger.error("ERROR Collecting environment information (%s)" % (str(e))) return False #separate distro and custom environment values for reliability try: import simplejson custom_vars_file = config.get("Execution","custom_variables") print "Using custom variables from %s" % (custom_vars_file) with open(custom_vars_file,'rb') as f: custom_vars = simplejson.loads(f.read()) for k,v in custom_vars.iteritems(): output.append([k + " information", _infos_cmd(v)]) except Exception as e: logger.error("ERROR loading custom variables (%s)" % (str(e))) pass try: logger.debug("Sending config data: "+str(output)) post_data = [ ("Config",json.dumps(output)), ("UUID",config.get("Server","uuid")), ("Address",_infos_host()), ("Secret",config.get("Server","secret")) ] post_data = urlencode(post_data) post_data = post_data.encode("utf-8",errors="ignore") urlopen("%s/machines/"% config.get("Server","url"), post_data) except Exception as e: logger.error("ERROR Could not contact OpenSubmit web server at %s (%s)" % (config.get("Server","url"), str(e)))
async def async_get_config_entry_diagnostics( hass: HomeAssistant, entry: ConfigEntry) -> dict[str, Any]: """Return diagnostics for a config entry.""" info: dict[str, Any] = cpuinfo.get_cpu_info() return info
def generate_json(opts, csvdicts): perfsettingsjson = '' with open(os.path.join(opts['--perfSettingsJson'])) as perfsettingsjson: # read the perf-specific settings perfsettingsjson = json.loads(perfsettingsjson.read()) jsonFilePath = opts['--jsonFile'] log.info('Attempting to generate ' + jsonFilePath) rootTests = list() info = cpuinfo.get_cpu_info() for metric, currdict in csvdicts.iteritems(): # recursively build nodes from the csvdict rootTest = generate_test_object( currdict, perfsettingsjson['TestProduct'] + ' Perf Test Results', info, metric) rootTests.append(rootTest) # populate the root level meta info run = serialobj.Run() run.testList = rootTests machinepool = serialobj.MachinePool() architecture = serialobj.Architecture() architecture.architectureName = format(info['arch']) machinepool.architecture = architecture manufacturer = serialobj.Manufacturer() manufacturer.manufacturerName = format(info['vendor_id']) machinepool.manufacturer = manufacturer microarch = serialobj.MicroArch() microarch.microarchName = 'SSE2' # cannot be obtained by cpu-info; need to figure out some other way osInfo = serialobj.OSInfo() osInfo.osInfoName = platform.system() osInfo.osVersion = platform.version() machinepool.osInfo = osInfo machinepool.microarch = microarch machinepool.NumberOfCores = psutil.cpu_count(logical=False) machinepool.NumberOfLogicalProcessors = psutil.cpu_count(logical=True) machinepool.TotalPhysicalMemory = psutil.virtual_memory().total / 1024 machinepool.machinepoolName = perfsettingsjson['TargetQueue'] machinepool.machinepoolDescription = '...' run.machinepool = machinepool config = serialobj.Config() config.configName = perfsettingsjson['TargetQueue'] run.config = config runs = list() runs.append(run) job = serialobj.Job() job.Runs = runs user = serialobj.User() user.userName = perfsettingsjson['Creator'] user.userAlias = perfsettingsjson['Creator'] job.user = user # extract build number from buildmoniker if official build buildtokens = perfsettingsjson['BuildMoniker'].split('-') if len(buildtokens) < 3: buildNumber = perfsettingsjson['BuildMoniker'] else: buildNumber = buildtokens[-2] + '.' + buildtokens[-1] buildInfo = serialobj.BuildInfo() buildInfo.buildInfoName = perfsettingsjson['BuildMoniker'] buildInfo.buildNumber = buildNumber buildInfo.branch = perfsettingsjson['TestProduct'] job.buildInfo = buildInfo jobType = serialobj.JobType() jobType.jobTypeName = 'Private' job.jobType = jobType jobGroup = serialobj.JobGroup() jobGroup.jobGroupName = perfsettingsjson[ 'Creator'] + '-' + perfsettingsjson[ 'TestProduct'] + '-' + perfsettingsjson['Branch'] + '-Perf' job.jobGroup = jobGroup job.jobDescription = '...' job.jobName = opts['--jobName'] root = serialobj.Root() root.job = job jsonOutput = serialobj.JsonOutput() jsonOutput.roots.append(root) with open(jsonFilePath, 'w+') as opfile: opfile.write(jsonOutput.to_JSON()) opfile.flush() opfile.close() log.info('Conversion of csv to json successful')
def getCPU(self): info = cpuinfo.get_cpu_info() return PREFIX + "[CPU] " + SUFFIX + info['brand'] + ";"
def getCpuInfo(): """Loads cpu info for localhost""" from cpuinfo import cpuinfo fullCpuInfo = cpuinfo.get_cpu_info() return fullCpuInfo
def __init__(self): info = cpuinfo.get_cpu_info() self.name = info["brand"] self.num_proccess = psutil.cpu_count() self.utilization = self.utilization_update()
def update(self): """Get the latest data and updates the state.""" from cpuinfo import cpuinfo self.info = cpuinfo.get_cpu_info() self._state = round(float(self.info['hz_actual_raw'][0])/10**9, 2)
def execute_jobs(jobs, threads, job_name, output_dir, plot_file, args): # set up the cache cache_dir = os.path.join(output_dir, "cache") if not os.path.exists(cache_dir): os.mkdir(cache_dir) # find things in the cache if we're allowed cached = [] if not args.force: for job in jobs[:]: key = cache_key(job) path = os.path.join(cache_dir, key + ".json") if os.path.exists(path): cached.append((job, path)) jobs.remove(job) if args.dry_run: if cached: print "in cache:" for i, (job, path) in enumerate(cached): print " %d: %s (%s)" % (i, " ".join(job.command), cache_key(job)) if jobs: print "" if jobs: print "to run:" for i, job in enumerate(jobs): print " %d: %s" % (i, " ".join(job.command)) sys.exit(0) # do we have enough actual threads to run every remaining job? if threads == -1: threads = psutil.cpu_count() else: threads = min(psutil.cpu_count(), threads) needed_threads = max([0] + [job.threads for job in jobs]) if needed_threads > threads: raise Exception("not enough threads: need %d, have %d" % ( needed_threads, threads)) # compile benchmark runner if necessary if jobs: compile_runner() # output files data_file = open(os.path.join(output_dir, "%s.out.csv" % job_name), "w") output_file = open(os.path.join(output_dir, "%s.out.txt" % job_name), "w") output_file.write("experiment: %s\n" % " ".join(sys.argv)) output_file.write("start: %s\n" % datetime.datetime.now()) output_file.write("cpu: %s\n" % cpuinfo.get_cpu_info()['brand']) header_printed = False def on_job_complete(job, t, timed_out, out): # write to data file if not header_printed: keys = sorted(job.ident) + ["time", "timeout"] data_file.write(",".join("\"%s\"" % k for k in keys) + "\n") for k in sorted(job.ident): data_file.write("\"%s\"," % job.ident[k]) data_file.write("%.3f,%s\n" % (t, timed_out)) data_file.flush() # write to output file output_file.write("*** %s\n" % job.ident) output_file.write(out.encode('utf8', 'replace')) output_file.write("*** %s\n" % ( "timeout (%s)" % t if timed_out else t)) output_file.flush() # first process all results that are cached for job, path in cached: with open(path) as f: data = json.load(f) cmd = " ".join(job.command) print "cached: %s\n (%s)" % (cmd, cache_key(job)) on_job_complete(job, data["time"], data["timed_out"], data["out"]) header_printed = True total_jobs = len(jobs) running_jobs = {} threads_used = 0 cpus = range(psutil.cpu_count()) queue = multiprocessing.Queue() # schedule jobs in descending order of threads so we can use smaller jobs to # fill gaps jobs = sorted(jobs, key=lambda j: j.threads, reverse=True) # spawn jobs that use at most a given number of threads on the given cpus def launch_more_jobs(threads, cpus): threads_used = 0 cpus_available = cpus for job in jobs[:]: if threads_used + job.threads <= threads: my_cpus = cpus_available[:job.threads] cpus_available = cpus_available[job.threads:] p = multiprocessing.Process(target=run_job, args=(queue, job)) p.start() if sys.platform.startswith("linux"): pp = psutil.Process(p.pid) pp.cpu_affinity(my_cpus) running_jobs[job.id] = (p, my_cpus) threads_used += job.threads jobs.remove(job) if args.sequential: break return threads_used, cpus_available # spawn initial threads threads_used, cpus = launch_more_jobs(threads, cpus) while running_jobs: evt = queue.get() if evt[0] == "start": start, job, output_name = evt cmd = " ".join(job.command) status = "[%d jobs: %d complete, %d running, %d remaining]" % ( total_jobs, total_jobs - len(running_jobs) - len(jobs), len(running_jobs), len(jobs)) print "starting: %s\n --> %s\n %s" % (cmd, output_name, status) elif evt[0] == "end": end, job, t, timed_out, out = evt # write to data file on_job_complete(job, t, timed_out, out) header_printed = True # write to cache key = cache_key(job) path = os.path.join(cache_dir, key + ".json") with open(path, "w") as f: json.dump({"command": job.command, "time": t, "timed_out": timed_out, "out": out}, f, indent=2) p, c = running_jobs[job.id] p.join() del running_jobs[job.id] threads_used -= job.threads cpus = sorted(cpus + c) status = "[%d jobs: %d complete, %d running, %d remaining]" % ( total_jobs, total_jobs - len(running_jobs) - len(jobs), len(running_jobs), len(jobs)) print "finished: %s\n %s" % (" ".join(job.command), status) t, c = launch_more_jobs(threads - threads_used, cpus) threads_used += t cpus = c output_file.close() data_file.close() if plot_file and not args.no_post_process: path = os.path.join(os.getcwd(), "experiments/plots/" + plot_file) if not os.path.exists(path): print "plot file not found: %s" % plot_file else: print "running post-process file %s..." % plot_file subprocess.check_call(["python", path, job_name], cwd=output_dir, stderr=subprocess.STDOUT)
#Colors! Use the ANCI codes found here https://github.com/dslackw/colored FontColor = 166 AtColor = 15 TitleColor = 15 #ubuntu DarkColor = 1 MedColor = 166 LightColor = 11 #You shouldn't need to touch anything bellow here! cpu = cpuinfo.get_cpu_info() RAM = psutil.virtual_memory() Disk = psutil.disk_usage('/') DiskTotal = round(Disk[0] / 1000000000,2) DiskTaken = round(Disk[1] / 1000000000,2) DiskFree = round(Disk[2] / 1000000000,2) DiskPercent = Disk[3] RamTotal = round(RAM[0] / 1000000000,2) RamTaken = round(RAM[5] / 1000000000,2) RamFree = round(RAM[1] / 1000000000,2) RamPercent = RAM[2]
def update(self): """Get the latest data and updates the state.""" from cpuinfo import cpuinfo self.info = cpuinfo.get_cpu_info() self._state = round(float(self.info['hz_actual_raw'][0]) / 10**9, 2)
def build(self, directory='output', compile=True, run=True, debug=False, clean=True, with_output=True, native=True, additional_source_files=None, additional_header_files=None, main_includes=None, run_includes=None, run_args=None, **kwds): ''' Build the project TODO: more details Parameters ---------- directory : str The output directory to write the project to, any existing files will be overwritten. compile : bool Whether or not to attempt to compile the project run : bool Whether or not to attempt to run the built project if it successfully builds. debug : bool Whether to compile in debug mode. with_output : bool Whether or not to show the ``stdout`` of the built program when run. native : bool Whether or not to compile for the current machine's architecture (best for speed, but not portable) clean : bool Whether or not to clean the project before building additional_source_files : list of str A list of additional ``.cpp`` files to include in the build. additional_header_files : list of str A list of additional ``.h`` files to include in the build. main_includes : list of str A list of additional header files to include in ``main.cpp``. run_includes : list of str A list of additional header files to include in ``run.cpp``. ''' renames = {'project_dir': 'directory', 'compile_project': 'compile', 'run_project': 'run'} if len(kwds): msg = '' for kwd in kwds: if kwd in renames: msg += ("Keyword argument '%s' has been renamed to " "'%s'. ") % (kwd, renames[kwd]) else: msg += "Unknown keyword argument '%s'. " % kwd raise TypeError(msg) if additional_source_files is None: additional_source_files = [] if additional_header_files is None: additional_header_files = [] if main_includes is None: main_includes = [] if run_includes is None: run_includes = [] if run_args is None: run_args = [] self.project_dir = directory ensure_directory(directory) compiler, extra_compile_args = get_compiler_and_args() compiler_flags = ' '.join(extra_compile_args) for d in ['code_objects', 'results', 'static_arrays']: ensure_directory(os.path.join(directory, d)) writer = CPPWriter(directory) # Get the number of threads if specified in an openmp context nb_threads = prefs.devices.cpp_standalone.openmp_threads # If the number is negative, we need to throw an error if (nb_threads < 0): raise ValueError('The number of OpenMP threads can not be negative !') logger.debug("Writing C++ standalone project to directory "+os.path.normpath(directory)) if nb_threads > 0: logger.warn("OpenMP code is not yet well tested, and may be inaccurate.", "openmp", once=True) logger.debug("Using OpenMP with %d threads " % nb_threads) for codeobj in self.code_objects.itervalues(): if not 'IS_OPENMP_COMPATIBLE' in codeobj.template_source: raise RuntimeError(("Code object '%s' uses the template %s " "which is not compatible with " "OpenMP.") % (codeobj.name, codeobj.template_name)) arange_arrays = sorted([(var, start) for var, start in self.arange_arrays.iteritems()], key=lambda (var, start): var.name) # # Find np arrays in the namespaces and convert them into static # # arrays. Hopefully they are correctly used in the code: For example, # # this works for the namespaces for functions with C++ (e.g. TimedArray # # treats it as a C array) but does not work in places that are # # implicitly vectorized (state updaters, resets, etc.). But arrays # # shouldn't be used there anyway. for code_object in self.code_objects.itervalues(): for name, value in code_object.variables.iteritems(): if isinstance(value, np.ndarray): self.static_arrays[name] = value # write the static arrays logger.debug("static arrays: "+str(sorted(self.static_arrays.keys()))) static_array_specs = [] for name, arr in sorted(self.static_arrays.items()): arr.tofile(os.path.join(directory, 'static_arrays', name)) static_array_specs.append((name, c_data_type(arr.dtype), arr.size, name)) # Write the global objects networks = [net() for net in Network.__instances__() if net().name != '_fake_network'] synapses = [] for net in networks: net_synapses = [s for s in net.objects if isinstance(s, Synapses)] synapses.extend(net_synapses) # We don't currently support pathways with scalar delays for synapse_obj in net_synapses: for pathway in synapse_obj._pathways: if not isinstance(pathway.variables['delay'], DynamicArrayVariable): error_msg = ('The "%s" pathway uses a scalar ' 'delay (instead of a delay per synapse). ' 'This is not yet supported. Do not ' 'specify a delay in the Synapses(...) ' 'call but instead set its delay attribute ' 'afterwards.') % (pathway.name) raise NotImplementedError(error_msg) # Not sure what the best place is to call Network.after_run -- at the # moment the only important thing it does is to clear the objects stored # in magic_network. If this is not done, this might lead to problems # for repeated runs of standalone (e.g. in the test suite). for net in networks: net.after_run() arr_tmp = CPPStandaloneCodeObject.templater.objects( None, None, array_specs=self.arrays, dynamic_array_specs=self.dynamic_arrays, dynamic_array_2d_specs=self.dynamic_arrays_2d, zero_arrays=self.zero_arrays, arange_arrays=arange_arrays, synapses=synapses, clocks=self.clocks, static_array_specs=static_array_specs, networks=networks) writer.write('objects.*', arr_tmp) main_lines = [] procedures = [('', main_lines)] runfuncs = {} for func, args in self.main_queue: if func=='run_code_object': codeobj, = args main_lines.append('_run_%s();' % codeobj.name) elif func=='run_network': net, netcode = args main_lines.extend(netcode) elif func=='set_by_array': arrayname, staticarrayname = args code = ''' {pragma} for(int i=0; i<_num_{staticarrayname}; i++) {{ {arrayname}[i] = {staticarrayname}[i]; }} '''.format(arrayname=arrayname, staticarrayname=staticarrayname, pragma=openmp_pragma('static')) main_lines.extend(code.split('\n')) elif func=='set_by_single_value': arrayname, item, value = args code = '{arrayname}[{item}] = {value};'.format(arrayname=arrayname, item=item, value=value) main_lines.extend([code]) elif func=='set_array_by_array': arrayname, staticarrayname_index, staticarrayname_value = args code = ''' {pragma} for(int i=0; i<_num_{staticarrayname_index}; i++) {{ {arrayname}[{staticarrayname_index}[i]] = {staticarrayname_value}[i]; }} '''.format(arrayname=arrayname, staticarrayname_index=staticarrayname_index, staticarrayname_value=staticarrayname_value, pragma=openmp_pragma('static')) main_lines.extend(code.split('\n')) elif func=='insert_code': main_lines.append(args) elif func=='start_run_func': name, include_in_parent = args if include_in_parent: main_lines.append('%s();' % name) main_lines = [] procedures.append((name, main_lines)) elif func=='end_run_func': name, include_in_parent = args name, main_lines = procedures.pop(-1) runfuncs[name] = main_lines name, main_lines = procedures[-1] else: raise NotImplementedError("Unknown main queue function type "+func) # generate the finalisations for codeobj in self.code_objects.itervalues(): if hasattr(codeobj.code, 'main_finalise'): main_lines.append(codeobj.code.main_finalise) # Generate data for non-constant values code_object_defs = defaultdict(list) for codeobj in self.code_objects.itervalues(): lines = [] for k, v in codeobj.variables.iteritems(): if isinstance(v, AttributeVariable): # We assume all attributes are implemented as property-like methods line = 'const {c_type} {varname} = {objname}.{attrname}();' lines.append(line.format(c_type=c_data_type(v.dtype), varname=k, objname=v.obj.name, attrname=v.attribute)) elif isinstance(v, ArrayVariable): try: if isinstance(v, DynamicArrayVariable): if v.dimensions == 1: dyn_array_name = self.dynamic_arrays[v] array_name = self.arrays[v] line = '{c_type}* const {array_name} = &{dyn_array_name}[0];' line = line.format(c_type=c_data_type(v.dtype), array_name=array_name, dyn_array_name=dyn_array_name) lines.append(line) line = 'const int _num{k} = {dyn_array_name}.size();' line = line.format(k=k, dyn_array_name=dyn_array_name) lines.append(line) else: lines.append('const int _num%s = %s;' % (k, v.size)) except TypeError: pass for line in lines: # Sometimes an array is referred to by to different keys in our # dictionary -- make sure to never add a line twice if not line in code_object_defs[codeobj.name]: code_object_defs[codeobj.name].append(line) # Generate the code objects for codeobj in self.code_objects.itervalues(): ns = codeobj.variables # TODO: fix these freeze/CONSTANTS hacks somehow - they work but not elegant. code = freeze(codeobj.code.cpp_file, ns) code = code.replace('%CONSTANTS%', '\n'.join(code_object_defs[codeobj.name])) code = '#include "objects.h"\n'+code writer.write('code_objects/'+codeobj.name+'.cpp', code) writer.write('code_objects/'+codeobj.name+'.h', codeobj.code.h_file) # The code_objects are passed in the right order to run them because they were # sorted by the Network object. To support multiple clocks we'll need to be # smarter about that. main_tmp = CPPStandaloneCodeObject.templater.main(None, None, main_lines=main_lines, code_objects=self.code_objects.values(), report_func=self.report_func, dt=float(defaultclock.dt), additional_headers=main_includes, ) writer.write('main.cpp', main_tmp) if compiler=='msvc': std_move = 'std::move' else: std_move = '' network_tmp = CPPStandaloneCodeObject.templater.network(None, None, std_move=std_move) writer.write('network.*', network_tmp) synapses_classes_tmp = CPPStandaloneCodeObject.templater.synapses_classes(None, None) writer.write('synapses_classes.*', synapses_classes_tmp) # Generate the run functions run_tmp = CPPStandaloneCodeObject.templater.run(None, None, run_funcs=runfuncs, code_objects=self.code_objects.values(), additional_headers=run_includes, ) writer.write('run.*', run_tmp) # Copy the brianlibdirectory brianlib_dir = os.path.join(os.path.split(inspect.getsourcefile(CPPStandaloneCodeObject))[0], 'brianlib') brianlib_files = copy_directory(brianlib_dir, os.path.join(directory, 'brianlib')) for file in brianlib_files: if file.lower().endswith('.cpp'): writer.source_files.append('brianlib/'+file) elif file.lower().endswith('.h'): writer.header_files.append('brianlib/'+file) # Copy the CSpikeQueue implementation shutil.copy2(os.path.join(os.path.split(inspect.getsourcefile(Synapses))[0], 'cspikequeue.cpp'), os.path.join(directory, 'brianlib', 'spikequeue.h')) shutil.copy2(os.path.join(os.path.split(inspect.getsourcefile(Synapses))[0], 'stdint_compat.h'), os.path.join(directory, 'brianlib', 'stdint_compat.h')) writer.source_files.extend(additional_source_files) writer.header_files.extend(additional_header_files) if compiler=='msvc': if native: arch_flag = '' try: from cpuinfo import cpuinfo res = cpuinfo.get_cpu_info() if 'sse' in res['flags']: arch_flag = '/arch:SSE' if 'sse2' in res['flags']: arch_flag = '/arch:SSE2' except ImportError: logger.warn('Native flag for MSVC compiler requires installation of the py-cpuinfo module') compiler_flags += ' '+arch_flag if nb_threads>1: openmp_flag = '/openmp' else: openmp_flag = '' # Generate the visual studio makefile source_bases = [fname.replace('.cpp', '').replace('/', '\\') for fname in writer.source_files] win_makefile_tmp = CPPStandaloneCodeObject.templater.win_makefile( None, None, source_bases=source_bases, compiler_flags=compiler_flags, openmp_flag=openmp_flag, ) writer.write('win_makefile', win_makefile_tmp) else: # Generate the makefile if os.name=='nt': rm_cmd = 'del *.o /s\n\tdel main.exe $(DEPS)' else: rm_cmd = 'rm $(OBJS) $(PROGRAM) $(DEPS)' makefile_tmp = CPPStandaloneCodeObject.templater.makefile(None, None, source_files=' '.join(writer.source_files), header_files=' '.join(writer.header_files), compiler_flags=compiler_flags, rm_cmd=rm_cmd) writer.write('makefile', makefile_tmp) # build the project if compile: with in_directory(directory): if compiler=='msvc': # TODO: handle debug if debug: logger.warn('Debug flag currently ignored for MSVC') vcvars_search_paths = [ # futureproofing! r'c:\Program Files\Microsoft Visual Studio 15.0\VC\vcvarsall.bat', r'c:\Program Files (x86)\Microsoft Visual Studio 15.0\VC\vcvarsall.bat', r'c:\Program Files\Microsoft Visual Studio 14.0\VC\vcvarsall.bat', r'c:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat', r'c:\Program Files\Microsoft Visual Studio 13.0\VC\vcvarsall.bat', r'c:\Program Files (x86)\Microsoft Visual Studio 13.0\VC\vcvarsall.bat', r'c:\Program Files\Microsoft Visual Studio 12.0\VC\vcvarsall.bat', r'c:\Program Files (x86)\Microsoft Visual Studio 12.0\VC\vcvarsall.bat', r'c:\Program Files\Microsoft Visual Studio 11.0\VC\vcvarsall.bat', r'c:\Program Files (x86)\Microsoft Visual Studio 11.0\VC\vcvarsall.bat', r'c:\Program Files\Microsoft Visual Studio 10.0\VC\vcvarsall.bat', r'c:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\vcvarsall.bat', ] vcvars_loc = prefs['codegen.cpp.msvc_vars_location'] if vcvars_loc=='': for fname in vcvars_search_paths: if os.path.exists(fname): vcvars_loc = fname break if vcvars_loc=='': raise IOError("Cannot find vcvarsall.bat on standard search path.") # TODO: copy vcvars and make replacements for 64 bit automatically arch_name = prefs['codegen.cpp.msvc_architecture'] if arch_name=='': mach = platform.machine() if mach=='AMD64': arch_name = 'x86_amd64' else: arch_name = 'x86' vcvars_cmd = '"{vcvars_loc}" {arch_name}'.format( vcvars_loc=vcvars_loc, arch_name=arch_name) make_cmd = 'nmake /f win_makefile' if os.path.exists('winmake.log'): os.remove('winmake.log') with std_silent(debug): if clean: os.system('%s >>winmake.log 2>&1 && %s clean >>winmake.log 2>&1' % (vcvars_cmd, make_cmd)) x = os.system('%s >>winmake.log 2>&1 && %s >>winmake.log 2>&1' % (vcvars_cmd, make_cmd)) if x!=0: raise RuntimeError("Project compilation failed") else: with std_silent(debug): if clean: os.system('make clean') if debug: x = os.system('make debug') elif native: x = os.system('make native') else: x = os.system('make') if x!=0: raise RuntimeError("Project compilation failed") if run: if not with_output: stdout = open(os.devnull, 'w') else: stdout = None if os.name=='nt': x = subprocess.call(['main'] + run_args, stdout=stdout) else: x = subprocess.call(['./main'] + run_args, stdout=stdout) if x: raise RuntimeError("Project run failed") self.has_been_run = True
def get_server_usage_stats(agent): yield From(agent.run_event.wait()) config = agent.config['metrics'] logger.info('starting "get_server_usage" task for "%s"', hostname) prev_stats = psutil.disk_io_counters(perdisk=True) prev_io_counters = psutil.net_io_counters(pernic=True) disk_partitions = psutil.disk_partitions() partition_mountpoint = dict() partitions = set() included_partitions = [part.device for part in disk_partitions] for dp in disk_partitions: if included_partitions and dp.device not in included_partitions: continue partitions.add(dp.device) partition_mountpoint[dp.device] = dp.mountpoint while agent.run_event.is_set(): yield From(asyncio.sleep(frequency)) try: #OpenStack processes the_processes = [ 'nova-novncproxy', 'nova-cert', 'nova-compute', 'nova-conductor', 'nova-api', 'neutron-openvswitch-agent', 'neutron-l3-agent', 'neutron-dhcp-agent', 'nova-scheduler', 'neutron-server', 'neutron-metadata-agent' ] the_processes_to_check = [ 'nova-novncproxy', 'nova-cert', 'nova-compute', 'nova-conductor', 'nova-api', 'neutron-openvswitch-agent', 'neutron-l3-agent', 'neutron-dhcp-agent', 'nova-scheduler', 'neutron-server', 'neutron-metadata-agent' ] cpu_percent = psutil.cpu_percent(interval=None) memory = psutil.virtual_memory() swap = psutil.swap_memory() loadavg = os.getloadavg() data = { 'server_name': hostname, 'measurements': [], 'stats': { 'platform': {}, 'hardware': {}, 'openstack': {} } } #data['measurements'].append({'name': 'cpu.total', 'value': psutil.cpu_count()}) data['measurements'].append({ 'name': 'cpu.percent', 'value': cpu_percent }) #data['measurements'].append({'name': 'memory.total', 'value': memory.total}) data['measurements'].append({ 'name': 'memory.available', 'value': memory.available }) data['measurements'].append({ 'name': 'memory.percent', 'value': memory.percent }) data['measurements'].append({ 'name': 'memory.used', 'value': memory.used }) data['measurements'].append({ 'name': 'memory.free', 'value': memory.free }) #data['measurements'].append({'name': 'swap.total', 'value': swap.total}) data['measurements'].append({ 'name': 'swap.used', 'value': swap.used }) data['measurements'].append({ 'name': 'swap.free', 'value': swap.free }) data['measurements'].append({ 'name': 'swap.percent', 'value': swap.percent }) data['measurements'].append({ 'name': 'loadavg.1', 'value': loadavg[0] }) data['measurements'].append({ 'name': 'loadavg.5', 'value': loadavg[1] }) data['measurements'].append({ 'name': 'loadavg.15', 'value': loadavg[2] }) curr_stats = psutil.disk_io_counters(perdisk=True) include_disks = None include_disks = [part.device for part in disk_partitions] for disk in curr_stats: if include_disks and disk not in include_disks: continue curr = curr_stats[disk] prev = prev_stats[disk] data['measurements'].append({ 'name': 'disk_io.read_count', 'tags': { 'disk': disk }, 'value': curr.read_count - prev.read_count }) data['measurements'].append({ 'name': 'disk_io.write_count', 'tags': { 'disk': disk }, 'value': curr.write_count - prev.write_count }) data['measurements'].append({ 'name': 'disk_io.read_bytes', 'tags': { 'disk': disk }, 'value': curr.read_bytes - prev.read_bytes }) data['measurements'].append({ 'name': 'disk_io.write_bytes', 'tags': { 'disk': disk }, 'value': curr.write_bytes - prev.write_bytes }) data['measurements'].append({ 'name': 'disk_io.read_time', 'tags': { 'disk': disk }, 'value': curr.read_time - prev.read_time }) data['measurements'].append({ 'name': 'disk_io.write_time', 'tags': { 'disk': disk }, 'value': curr.write_time - prev.write_time }) prev_stats = curr_stats for partition in partitions: disk_data = psutil.disk_usage(partition_mountpoint[partition]) data['measurements'].append({ 'name': 'partition_usage.total', 'tags': { 'partition': partition }, 'value': disk_data.total }) data['measurements'].append({ 'name': 'partition_usage.used', 'tags': { 'partition': partition }, 'value': disk_data.used }) data['measurements'].append({ 'name': 'partition_usage.free', 'tags': { 'partition': partition }, 'value': disk_data.free }) data['measurements'].append({ 'name': 'partition_usage.percent', 'tags': { 'partition': partition }, 'value': disk_data.percent }) curr_io_counters = psutil.net_io_counters(pernic=True) for interface in curr_io_counters: try: curr = curr_io_counters[interface] except: curr = 0 try: prev = prev_io_counters[interface] except: prev = 0 if curr and hasattr(curr, 'bytes_sent'): curr_bytes_sent = curr.bytes_sent else: curr_bytes_sent = 0 if curr and hasattr(curr, 'bytes_recv'): curr_bytes_recv = curr.bytes_recv else: curr_bytes_recv = 0 if prev and hasattr(prev, 'bytes_sent'): prev_bytes_sent = prev.bytes_sent else: prev_bytes_sent = 0 if prev and hasattr(prev, 'bytes_recv'): prev_bytes_recv = prev.bytes_recv else: prev_bytes_recv = 0 if curr and hasattr(curr, 'packets_sent'): curr_packets_sent = curr.packets_sent else: curr_packets_sent = 0 if curr and hasattr(curr, 'packets_recv'): curr_packets_recv = curr.packets_recv else: curr_packets_recv = 0 if prev and hasattr(prev, 'packets_sent'): prev_packets_sent = prev.packets_sent else: prev_packets_sent = 0 if prev and hasattr(prev, 'packets_recv'): prev_packets_recv = prev.packets_recv else: prev_packets_recv = 0 if curr and hasattr(curr, 'errin'): curr_errin = curr.errin else: curr_errin = 0 if curr and hasattr(curr, 'errout'): curr_errout = curr.errout else: curr_errout = 0 if prev and hasattr(prev, 'errin'): prev_errin = prev.errin else: prev_errin = 0 if prev and hasattr(prev, 'errout'): prev_errout = prev.errout else: prev_errout = 0 if curr and hasattr(curr, 'dropin'): curr_dropin = curr.dropin else: curr_dropin = 0 if curr and hasattr(curr, 'dropout'): curr_dropout = curr.dropout else: curr_dropout = 0 if prev and hasattr(prev, 'dropin'): prev_dropin = prev.dropin else: prev_dropin = 0 if prev and hasattr(prev, 'dropout'): prev_dropout = prev.dropout else: prev_dropout = 0 data['measurements'].append({ 'name': 'net_io.bytes_sent', 'tags': { 'interface': interface }, 'value': curr_bytes_sent - prev_bytes_sent }) data['measurements'].append({ 'name': 'net_io.bytes_recv', 'tags': { 'interface': interface }, 'value': curr_bytes_recv - prev_bytes_recv }) data['measurements'].append({ 'name': 'net_io.packets_sent', 'tags': { 'interface': interface }, 'value': curr_packets_sent - prev_packets_sent }) data['measurements'].append({ 'name': 'net_io.packets_recv', 'tags': { 'interface': interface }, 'value': curr_packets_recv - prev_packets_recv }) data['measurements'].append({ 'name': 'net_io.errin', 'tags': { 'interface': interface }, 'value': curr_errin - prev_errin }) data['measurements'].append({ 'name': 'net_io.errout', 'tags': { 'interface': interface }, 'value': curr_errout - prev_errout }) data['measurements'].append({ 'name': 'net_io.dropin', 'tags': { 'interface': interface }, 'value': curr_dropin - prev_dropin }) data['measurements'].append({ 'name': 'net_io.dropout', 'tags': { 'interface': interface }, 'value': curr_dropout - prev_dropout }) server_stats = platform.dist() data['stats']['platform']['dist'] = platform.dist( )[0] + ' ' + platform.dist()[1] + ' ' + platform.dist()[2] data['stats']['platform']['kernel'] = platform.uname()[2] data['stats']['platform']['architecture'] = platform.architecture( )[0] cpu_hw = cpuinfo.get_cpu_info() data['stats']['platform']['processor'] = {} data['stats']['platform']['processor']['qty'] = psutil.cpu_count() data['stats']['platform']['processor'][ 'brand'] = cpuinfo.get_cpu_info()['brand'] data['stats']['platform']['processor'][ 'count'] = cpuinfo.get_cpu_info()['count'] data['stats']['platform']['processor'][ 'flags'] = cpuinfo.get_cpu_info()['flags'] data['stats']['platform']['memory'] = {} data['stats']['platform']['memory']['total'] = memory.total data['stats']['platform']['swap'] = {} data['stats']['platform']['swap']['total'] = swap.total processes = the_processes processes_to_check = the_processes_to_check try: p = subprocess.Popen(["nova-manage", "version"], stdout=subprocess.PIPE) version = p.communicate()[0] if version: data['stats']['openstack']['version'] = version.strip() except Exception, e: pass for p in psutil.process_iter(): try: try: process_name = p.name() except: process_name = p.name process_found = False result = False result = [ process_found for process_found in processes_to_check if process_name in process_found ] if len(result) > 0: process_name = result[0] if p.is_running(): is_running = 1 else: is_running = 0 data['measurements'].append({ 'name': 'openstack.processes.' + process_name + '.' + 'cpu_percent', 'value': p.cpu_percent() }) data['measurements'].append({ 'name': 'openstack.processes.' + process_name + '.' + 'memory_percent', 'value': p.memory_percent() }) data['measurements'].append({ 'name': 'openstack.processes.' + process_name + '.' + 'num_threads', 'value': p.num_threads() }) data['measurements'].append({ 'name': 'openstack.processes.' + process_name + '.' + 'is_running', 'value': is_running }) #data['measurements'].append({'name': 'openstack.processes.'+process_name+'.'+'create_time', 'value': create_time}) if p.is_running() and p.status != psutil.STATUS_ZOMBIE: process_status = 1 else: process_status = 0 data['measurements'].append({ 'name': 'openstack.processes.' + process_name + '.' + 'up', 'tags': { 'status': p.status() }, 'value': process_status }) logger.info( 'OUTPUT - %s', 'openstack.processes.' + process_name + '.' + 'up') processes_to_check.remove(process_name) except psutil.Error: pass for process in processes_to_check: data['measurements'].append({ 'name': 'openstack.processes.' + process + '.' + 'up', 'tags': { 'status': 'down' }, 'value': 0 }) # OpenStack Nova API if agent._config['openstack_credentials']['user'] and agent._config[ 'openstack_credentials']['password'] and agent._config[ 'openstack_credentials']['project'] and agent._config[ 'openstack_credentials']['auth_url']: nova = client.Client( agent.nova_api_version, agent._config['openstack_credentials']['user'], agent._config['openstack_credentials']['password'], agent._config['openstack_credentials']['project'], agent._config['openstack_credentials']['auth_url']) if nova: #nova services try: services = nova.services.list() for service in services: status = 1 if service.state == 'up' else 0 data['measurements'].append({ 'name': 'openstack.nova-api.services.' + service.binary + '.' + 'status', 'value': status }) except Exception, e: pass #availability zones try: avalability_zones = nova.availability_zones.list(True) for availability_zone in avalability_zones: status = 1 if availability_zone.zoneState[ 'available'] is True else 0 data['measurements'].append({ 'name': 'openstack.nova-api.avalability_zones.status', 'tags': { 'availability_zone': availability_zone.zoneName }, 'value': status }) except Exception, e: pass #hypervisor stats hypervisor_keys = { 'count', 'current_workload', 'disk_available_least', 'free_disk_gb', 'free_ram_mb', 'local_gb', 'local_gb_used', 'memory_mb', 'memory_mb_used', 'running_vms', 'vcpus', 'vcpus_used' } try: stats = nova.hypervisor_stats.statistics() for key_name in hypervisor_keys: if hasattr(stats, key_name): data['measurements'].append({ 'name': 'openstack.nova-api.hypervisor_total.' + key_name, 'value': getattr(stats, key_name) }) if hasattr(stats, 'vcpus') and hasattr( stats, 'vcpus_used'): data['measurements'].append({ 'name': 'openstack.nova-api.hypervisor_total.vcpus_percent', 'value': round( 100 * float(stats.vcpus_used) / float(stats.vcpus), 2) }) if hasattr(stats, 'local_gb') and hasattr( stats, 'local_gb_used'): data['measurements'].append({ 'name': 'openstack.nova-api.hypervisor_total.local_gb_percent', 'value': round( 100 * float(stats.local_gb_used) / float(stats.local_gb), 2) }) if hasattr(stats, 'memory_mb') and hasattr( stats, 'memory_mb_used'): data['measurements'].append({ 'name': 'openstack.nova-api.hypervisor_total.memory_mb_percent', 'value': round( 100 * float(stats.memory_mb_used) / float(stats.memory_mb), 2) }) except Exception, e: pass #server stats try: servers = nova.servers.list() for server in servers: value = agent.openstack_status[server.status] if not value: value = -1 data['measurements'].append({ 'name': 'openstack.nova-api.servers', 'tags': { 'name': server.name, 'id': server.id, 'tenant_id': server.tenant_id, 'status': server.status }, 'value': value }) except Exception, e: pass
def main(self, args): import aetros.const parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter, prog=aetros.const.__prog__ + ' run') parser.add_argument('command', nargs='?', help="The command to run. Default read in configuration file") parser.add_argument('-i', '--image', help="Which Docker image to use for the command. Default read in configuration file. If not specified, command is executed on the host.") parser.add_argument('--no-image', action='store_true', help="Forces not to use docker, even when image is defined in the configuration file.") parser.add_argument('-s', '--server', action='append', help="Limits the server pool to this server. Default not limitation or read in configuration file. Multiple --server allowed.") parser.add_argument('-m', '--model', help="Under which model this job should be listed. Default read in configuration file") parser.add_argument('-l', '--local', action='store_true', help="Start the job immediately on the current machine.") parser.add_argument('-c', '--config', help="Default aetros.yml in current working directory.") parser.add_argument('--priority', help="Increases or decreases priority. Default is 0.") parser.add_argument('--cpu', help="How many CPU cores should be assigned to job. Docker only.") parser.add_argument('--memory', help="How much memory should be assigned to job. Docker only.") parser.add_argument('--gpu', help="How many GPU cards should be assigned to job. Docker only.") parser.add_argument('--gpu_memory', help="Memory requirement for the GPU. Docker only.") parser.add_argument('--offline', '-o', action='store_true', help="Whether the execution should happen offline.") parser.add_argument('--rebuild-image', action='store_true', help="Makes sure the Docker image is re-built without cache.") parser.add_argument('--max-time', help="Limit execution time in seconds. Sends SIGINT to the process group when reached.") parser.add_argument('--max-epochs', help="Limit execution epochs. Sends SIGINT to the process group when reached.") parser.add_argument('--gpu-device', action='append', help="Which device id should be mapped into the NVIDIA docker container. Only when --local") parser.add_argument('--volume', '-v', action='append', help="Volume into docker. Only when --local") parser.add_argument('-e', action='append', help="Sets additional environment variables. '-e name=value' to set value, or '-e name' to read from current env") parser.add_argument('-p', '--param', action='append', help="Sets a hyperparameter, example '--param name=value'. Multiple --param allowed.") parsed_args = parser.parse_args(args) if parsed_args.config and not os.path.exists(parsed_args.config): self.logger.error("fatal: file %s does not exist." % (parsed_args.config,)) sys.exit(2) config = find_config(parsed_args.config) home_config = read_home_config() if config['model'] and not parsed_args.model: parsed_args.model = config['model'] if not parsed_args.model: print("fatal: no model defined. Use --model or switch into a directory where you executed 'aetros init model-name'.") sys.exit(2) if not parsed_args.local and parsed_args.volume: print("fatal: can not use volume with jobs on the cluster. Use datasets instead.") sys.exit(1) if parsed_args.local and parsed_args.priority: print("fatal: the priority can only be set for jobs in the cluster.") sys.exit(1) if config['image']: ensure_docker_installed(self.logger) env = {} if parsed_args.e: for item in parsed_args.e: if '=' in item: k, v = item.split('=') else: k = item v = os.getenv(k) env[k] = v if ('command' not in config or not config['command']) and not parsed_args.command: self.logger.error('No command given. Define the command in aetros.yml or use command argument.') sys.exit(1) job_backend = JobBackend(parsed_args.model, self.logger) ignore = [] if 'ignore' in config: ignore = config['ignore'] job_backend.job = {'config': {'ignore': ignore}} adding_files = loading_text("- Adding job files to index ... ") files_added, size_added = job_backend.add_files(config['root'], report=False) adding_files("done with %d file%s added (%s)." % (files_added, 's' if files_added != 1 else '', human_size(size_added, 2))) create_info = { 'type': 'custom', 'config': config } incoming_hyperparameter = {} if parsed_args.param: for param in parsed_args.param: if '=' not in param: raise Exception('--param ' + param + ' does not contain a `=`. Please use "--param name=value"') name, value = param.split('=') incoming_hyperparameter[name] = value # first transform simple format in the full definition with parameter types # (string, number, group, choice_group, etc) full_hyperparameters = lose_parameters_to_full(config['parameters']) # now extract hyperparameters from full definition, and overwrite stuff using # incoming_hyperparameter if available hyperparameter = extract_parameters(full_hyperparameters, incoming_hyperparameter) create_info['config']['parameters'] = hyperparameter if parsed_args.rebuild_image: create_info['config']['rebuild_image'] = True if parsed_args.max_epochs: create_info['config']['maxEpochs'] = int(parsed_args.max_epochs) create_info['config']['priority'] = 0 if parsed_args.priority: create_info['config']['priority'] = float(parsed_args.priority) if parsed_args.max_time: create_info['config']['maxTime'] = float(parsed_args.max_time) if parsed_args.command: create_info['config']['command'] = parsed_args.command if parsed_args.image: # reset install options, since we can't make sure if the base image still fits if 'image' in config and config['image'] and config['image'] != parsed_args.image: create_info['config']['install'] = None # reset dockerfile, since we specified manually an image create_info['config']['dockerfile'] = None create_info['config']['image'] = parsed_args.image if parsed_args.no_image: create_info['config']['image'] = None if parsed_args.server: create_info['config']['servers'] = [] for name in parsed_args.server: create_info['config']['servers'].append(name) create_info['config']['resources'] = create_info['config'].get('resources', {}) resources = create_info['config']['resources'] default_cpu_and_memory = 1 if create_info['config']['image'] else 0 resources['cpu'] = int(parsed_args.cpu or resources.get('cpu', default_cpu_and_memory)) resources['memory'] = int(parsed_args.memory or resources.get('memory', default_cpu_and_memory)) resources['gpu'] = int(parsed_args.gpu or resources.get('gpu', 0)) resources['gpu_memory'] = int(parsed_args.gpu_memory or resources.get('gpu_memory', 0)) if parsed_args.local: create_info['server'] = 'local' # make sure we do not limit the resources to something that is not available on the local machine warning = [] cpu = cpuinfo.get_cpu_info() mem = psutil.virtual_memory().total gpu = 0 try: gpu = len(get_ordered_devices()) except CudaNotImplementedException: pass if not create_info['config']['image'] and not all([x == 0 for x in six.itervalues(resources)]): self.logger.warning("! No Docker virtualization since no `image` defined, resources limitation ignored.") if create_info['config']['image'] and resources['gpu'] > 0: if not (sys.platform == "linux" or sys.platform == "linux2"): self.logger.warning("! Your operating system does not support GPU allocation for " "Docker virtualization. " "NVIDIA-Docker2 is only supported on Linux.") local_max_resources = {'cpu': cpu['count'], 'memory': ceil(mem / 1024 / 1024 / 1024), 'gpu': gpu} if create_info['config']['image']: # read max hardware within Docker out = docker_call(['run', 'alpine', 'sh', '-c', 'nproc && cat /proc/meminfo | grep MemTotal']) cpus, memory = out.decode('utf-8').strip().split('\n') local_max_resources['cpu'] = int(cpus) memory = memory.replace('MemTotal:', '').replace('kB', '').strip() local_max_resources['memory'] = ceil(int(memory) / 1024 / 1024) if local_max_resources['cpu'] < resources['cpu']: warning.append('CPU cores %d -> %d' % (resources['cpu'], local_max_resources['cpu'])) resources['cpu'] = local_max_resources['cpu'] if local_max_resources['memory'] < resources['memory']: warning.append('memory %dGB -> %dGB' % (resources['memory'], local_max_resources['memory'])) resources['memory'] = local_max_resources['memory'] if local_max_resources['gpu'] < resources['gpu']: warning.append('GPU cards %d -> %d' % (resources['gpu'], local_max_resources['gpu'])) resources['gpu'] = local_max_resources['gpu'] if warning: self.logger.warning("! Resources downgrade due to missing hardware: %s." % (', '.join(warning),)) if parsed_args.config and not create_info['config']['configPath']: create_info['config']['configPath'] = parsed_args.config create_info['config']['sourcesAttached'] = True creating_git_job = loading_text("- Create job in local Git ... ") if aetros.utils.git.get_current_commit_hash(): create_info['origin_git_source'] = { 'origin': aetros.utils.git.get_current_remote_url(), 'author': aetros.utils.git.get_current_commit_author(), 'message': aetros.utils.git.get_current_commit_message(), 'branch': aetros.utils.git.get_current_branch(), 'commit': aetros.utils.git.get_current_commit_hash(), } job_backend.create(create_info=create_info, server=None) creating_git_job("created %s in %s." % (job_backend.job_id[0:9], job_backend.model_name)) summary = "➤ Summary: Job running " if parsed_args.local: summary += 'locally' else: summary += 'on the cluster' if create_info['config']['image']: summary += ' in Docker using image %s with %d CPU cores, %dGB memory and %d GPUs.' \ % (create_info['config']['image'], resources['cpu'], resources['memory'], resources['gpu']) else: summary += ' on host using all available resources.' print(summary) # tasks = [] # # if 'tasks' in config: # for name, task_config in six.iteritems(config['tasks']): # replica = 1 # if 'replica' in task_config: # replica = int(task_config['replica']) # for index in range(0, replica): # tasks.append(job_backend.create_task(job_id, task_config, name, index)) if parsed_args.offline: if not parsed_args.local: self.logger.warning("Can not create a remote job in offline mode.") sys.exit(1) self.logger.warning("Execution started offline.") else: adding_files = loading_text("- Connecting to "+home_config['host']+" ... ") if job_backend.connect(): adding_files("connected.") else: parsed_args.offline = True adding_files("failed. Continue in offline mode.") if not parsed_args.offline: sys.stdout.write("- Uploading job data ... ") job_backend.git.push() job_backend.client.wait_until_queue_empty(['files'], clear_end=False) sys.stdout.write(" done.\n") link = "%smodel/%s/job/%s" % (home_config['url'], job_backend.model_name, job_backend.job_id) sys.__stdout__.write(u"➤ Monitor job at %s\n" % (link)) if parsed_args.local: job_backend.start(collect_system=False, offline=parsed_args.offline, push=False) if not parsed_args.offline: job_backend.git.start_push_sync() cpus = create_info['config']['resources']['cpu'] memory = create_info['config']['resources']['memory'] if not parsed_args.gpu_device and create_info['config']['resources']['gpu'] > 0: # if requested 2 GPUs and we have 3 GPUs with id [0,1,2], gpus should be [0,1] parsed_args.gpu_device = [] for i in range(0, create_info['config']['resources']['gpu']): parsed_args.gpu_device.append(i) start_command(self.logger, job_backend, env, parsed_args.volume, cpus=cpus, memory=memory, gpu_devices=parsed_args.gpu_device, offline=parsed_args.offline)
def post_process_perf_results(settings, results_location, workitem_dir): # Use the xunit perf analysis exe from nuget package here log.info('Converting xml to csv') payload_dir = fix_path(os.getenv('HELIX_CORRELATION_PAYLOAD')) xmlconvertorpath = os.path.join(*[payload_dir, 'Microsoft.DotNet.xunit.performance.analysis', '1.0.0-alpha-build0028', 'tools', 'xunit.performance.analysis.exe']) xmlCmd = xmlconvertorpath+' -csv '+os.path.join(workitem_dir, 'results.csv')+' '+results_location if (helix.proc.run_and_log_output(xmlCmd.split(' '))) != 0: raise Exception('Failed to generate csv from result xml') perfscriptsdir = os.path.join(*[payload_dir, 'RunnerScripts', 'xunitrunner-perf']) # need to extract more properties from settings to pass to csvtojsonconvertor.py jsonPath = os.path.join(workitem_dir, settings.workitem_id+'.json') log.info('Uploading the results.csv file') _write_output_path(os.path.join(workitem_dir, 'results.csv'), settings) perfsettingsjson = '' with open(os.path.join(perfscriptsdir, 'xunitrunner-perf.json'), 'rb') as perfsettingsjson: # read the perf-specific settings perfsettingsjson = json.loads(perfsettingsjson.read()) jsonArgsDict = dict() jsonArgsDict['--csvFile'] = os.path.join(workitem_dir, 'results.csv') jsonArgsDict['--jsonFile'] = jsonPath jsonArgsDict['--jobName'] = settings.correlation_id jsonArgsDict['--jobDescription'] = '...' jsonArgsDict['--configName'] = perfsettingsjson['TargetQueue'] jsonArgsDict['--jobGroupName'] = perfsettingsjson['Creator']+'-'+perfsettingsjson['TestProduct']+'-'+perfsettingsjson['Branch']+'-Perf' jsonArgsDict['--jobTypeName'] = 'Private' jsonArgsDict['--username'] = perfsettingsjson['Creator'] jsonArgsDict['--userAlias'] = perfsettingsjson['Creator'] jsonArgsDict['--branch'] = perfsettingsjson['TestProduct'] jsonArgsDict['--buildInfoName'] = perfsettingsjson['BuildMoniker'] # extract build number from buildmoniker if official build buildtokens = perfsettingsjson['BuildMoniker'].split('-') if len(buildtokens) < 3: jsonArgsDict['--buildNumber'] = perfsettingsjson['BuildMoniker'] else: jsonArgsDict['--buildNumber'] = buildtokens[-2] +'.'+buildtokens[-1] jsonArgsDict['--machinepoolName'] = perfsettingsjson['TargetQueue'] jsonArgsDict['--machinepoolDescription'] = '...' jsonArgsDict['--microarchName'] = 'SSE2' # cannot be obtained by cpu-info; need to figure out some other way jsonArgsDict['--numberOfCores'] = psutil.cpu_count(logical=False) jsonArgsDict['--numberOfLogicalProcessors'] = psutil.cpu_count(logical=True) # psutil returns mem in bytes, convert it to MB for readability jsonArgsDict['--totalPhysicalMemory'] = psutil.virtual_memory().total/1024 jsonArgsDict['--osInfoName'] = platform.system() jsonArgsDict['--osVersion'] = platform.version() jsonArgsDict['--machineName'] = platform.node() info = cpuinfo.get_cpu_info() jsonArgsDict['--architectureName'] = format(info['arch']) jsonArgsDict['--machineDescription'] = format(info['brand']) jsonArgsDict['--manufacturerName'] = format(info['vendor_id']) jsonArgs = [sys.executable, os.path.join(perfscriptsdir, 'csvjsonconvertor.py')] for key, value in jsonArgsDict.iteritems(): jsonArgs.append(key) jsonArgs.append(str(value)) if (helix.proc.run_and_log_output(jsonArgs)) != 0: raise Exception('Failed to generate json from csv file') # set info to upload result to perf-specific json container log.info('Uploading the results json') perfsettings = copy.deepcopy(settings) perfsettings.output_uri = perfsettingsjson['RootURI'] perfsettings.output_write_token = perfsettingsjson['WriteToken'] perfsettings.output_read_token = perfsettingsjson['ReadToken'] _write_output_path(jsonPath, perfsettings)
def run_simulations(args): global instance_names global cmd global parameter_string_template global cmd_builder_script # parse the arguments, find the instances and read the pcs file options = parse_args(args) cmd_builder_script = options['cmd_builder_script'] logging.info("Setting up simulations in '%s'"%options['outputdir']) instance_names = find_instances(options['training_instances']) # in case the validation-fraction option is used if options['validation_instances'] is None: instance_names = find_instances(options['training_instances']) random.shuffle(instance_names) num_test_instances = int(len(instance_names) * options['validation_fraction']) num_train_instances= len(instance_names) - num_test_instances else: # if a specific set of validation instances was specified test_instances = find_instances(options['validation_instances']) num_train_instances = len(instance_names) num_test_instances = len(test_instances) instance_names = instance_names + test_instances if (num_train_instances < 1 or num_test_instances <1): logging.error("Unable to create a training-test split from " "the instances you provided!") sys.exit(2) param_dict, conditions, forbiddens = pysmac.utils.read_pcs(options['pcs']) logging.debug("Params: %s" % (str(list(param_dict.keys())))) logging.debug("Params: %s" % (str(param_dict))) parameter_string_template = ("%s" % options['prefix']) +\ '%s' + ('%s' % options['separator']) + '%s' cmd = "%s %s" % (options['binary'], options['callstring']) smac_debug = False if options['verbosity'] != 'DEBUG' else True # for the special seed 0, if options['seed'] == 0: # make sure that the output directory exists try: os.makedirs(options['outputdir']) except OSError as exception: if exception.errno != errno.EEXIST: raise # store meta information in a file for the report with open(os.path.join(options['outputdir'], 'spysmac.meta'),'w') as fh: # os information fh.write("os system = {}\n".format(platform.system())) fh.write("os release = {}\n".format(platform.release())) # cpu info info = get_cpu_info() fh.write("cpu vendor = {}\n".format(info['vendor_id'])) fh.write("cpu brand = {}\n".format(info['brand'])) fh.write("cpu hz = {}\n".format(info['hz_advertised'])) fh.write("cpu arch = {}\n".format(info['arch'])) fh.write("cpu count = {}\n".format(info['count'])) # spysmacs run options for (k,v) in list(options.items()): fh.write("{} = {}\n".format(k,v)) # pcs information for key in param_dict.keys(): fh.write("%s = %s\n" % (key, param_dict[key][1])) # make sure no time is wasted and go directly to the validation options['repetitions'] = 1 options['budget'] = 1 smac = pysmac.optimizer.SMAC_optimizer(deterministic=False, t_limit_total_s=options['budget'], mem_limit_smac_mb=1024, working_directory=options['outputdir'], persistent_files=True, debug=smac_debug) # adjust some advanced SMAC options here smac.smac_options['wallclock-limit'] = options['budget'] smac.smac_options['run-obj'] = 'RUNTIME' smac.smac_options['overall_obj']='MEAN10' if options['seed'] == 0: smac.smac_options['scenario_fn'] = 'default_validation_scenario.dat' with open(os.path.join(options['outputdir'],'shuffled_instances.txt'),'w') as fh: fh.write('\n'.join(instance_names)) logging.info('Starting SMAC...') num_evals = 2**31-1 if options['seed'] > 0 else 1 smac.minimize(sat_function, num_evals, param_dict, conditions, forbiddens, num_instances=num_train_instances, num_test_instances=num_train_instances+num_test_instances, seed=options['seed'], num_procs=options['num_procs'], num_runs=options['repetitions'], mem_limit_function_mb=options['memory'], t_limit_function_s=options['cutoff'])
def cpu_info(): """ Show instruction type, arch, tecnology and more info """ return jsonify(cpuinfo=cpuinfo.get_cpu_info())
def get_system_processor(self): info = cpuinfo.get_cpu_info() return str(info['brand'])
# coding: UTF-8 from statistics import mean from typing import Iterable from cpuinfo import cpuinfo LLC_SIZE = int(cpuinfo.get_cpu_info()['l3_cache_size'].split()[0]) * 1024 class BasicMetric: def __init__(self, l2miss, l3miss, inst, cycles, stall_cycles, wall_cycles, intra_coh, inter_coh, llc_size, local_mem, remote_mem, interval): self._l2miss = l2miss self._l3miss = l3miss self._instructions = inst self._cycles = cycles self._stall_cycles = stall_cycles self._wall_cycles = wall_cycles self._intra_coh = intra_coh self._inter_coh = inter_coh self._llc_size = llc_size self._local_mem = local_mem self._remote_mem = remote_mem self._interval = interval @classmethod def calc_avg(cls, metrics: Iterable['BasicMetric']) -> 'BasicMetric': return BasicMetric( mean(metric._l2miss for metric in metrics), mean(metric._l3miss for metric in metrics),
def generate_json(opts, csvdicts): perfsettingsjson = '' with open(os.path.join(opts['--perfSettingsJson'])) as perfsettingsjson: # read the perf-specific settings perfsettingsjson = json.loads(perfsettingsjson.read()) jsonFilePath = opts['--jsonFile'] log.info('Attempting to generate '+jsonFilePath) rootTests = list() info = cpuinfo.get_cpu_info() for metric, currdict in csvdicts.iteritems(): # recursively build nodes from the csvdict rootTest = generate_test_object(currdict, perfsettingsjson['TestProduct']+' Perf Test Results', info, metric) rootTests.append(rootTest) # populate the root level meta info run = serialobj.Run() run.testList = rootTests machinepool = serialobj.MachinePool() architecture = serialobj.Architecture() architecture.architectureName = format(info['arch']) machinepool.architecture = architecture manufacturer = serialobj.Manufacturer() manufacturer.manufacturerName = format(info['vendor_id']) machinepool.manufacturer = manufacturer microarch = serialobj.MicroArch() microarch.microarchName = 'SSE2' # cannot be obtained by cpu-info; need to figure out some other way osInfo = serialobj.OSInfo() osInfo.osInfoName = platform.system() osInfo.osVersion = platform.version() machinepool.osInfo = osInfo machinepool.microarch = microarch machinepool.NumberOfCores = psutil.cpu_count(logical=False) machinepool.NumberOfLogicalProcessors = psutil.cpu_count(logical=True) machinepool.TotalPhysicalMemory = psutil.virtual_memory().total/1024 machinepool.machinepoolName = perfsettingsjson['TargetQueue'] machinepool.machinepoolDescription = '...' run.machinepool = machinepool config = serialobj.Config() config.configName = perfsettingsjson['TargetQueue'] run.config = config runs = list() runs.append(run) job = serialobj.Job() job.Runs = runs user = serialobj.User() user.userName = perfsettingsjson['Creator'] user.userAlias = perfsettingsjson['Creator'] job.user = user # extract build number from buildmoniker if official build buildtokens = perfsettingsjson['BuildMoniker'].split('-') if len(buildtokens) < 3: buildNumber = perfsettingsjson['BuildMoniker'] else: buildNumber = buildtokens[-2] +'.'+buildtokens[-1] buildInfo = serialobj.BuildInfo() buildInfo.buildInfoName = perfsettingsjson['BuildMoniker'] buildInfo.buildNumber = buildNumber buildInfo.branch = perfsettingsjson['TestProduct'] job.buildInfo = buildInfo jobType = serialobj.JobType() jobType.jobTypeName = 'Private' job.jobType = jobType jobGroup = serialobj.JobGroup() jobGroup.jobGroupName = perfsettingsjson['Creator']+'-'+perfsettingsjson['TestProduct']+'-'+perfsettingsjson['Branch']+'-Perf' job.jobGroup = jobGroup job.jobDescription = '...' job.jobName = opts['--jobName'] root = serialobj.Root() root.job = job jsonOutput = serialobj.JsonOutput() jsonOutput.roots.append(root) with open(jsonFilePath, 'w+') as opfile: opfile.write(jsonOutput.to_JSON()) opfile.flush() opfile.close() log.info('Conversion of csv to json successful')
.. document_brian_prefs:: codegen.cpp ''' from distutils.ccompiler import get_default_compiler from cpuinfo import cpuinfo from brian2.core.preferences import prefs, BrianPreference from .codeobject import sys_info __all__ = ['get_compiler_and_args'] # Try to get architecture information to get the best compiler setting for # Windows msvc_arch_flag = '' res = cpuinfo.get_cpu_info() # Note that this overwrites the arch_flag, i.e. only the best option will # be used if 'sse' in res['flags']: msvc_arch_flag = '/arch:SSE' if 'sse2' in res['flags']: msvc_arch_flag = '/arch:SSE2' if 'avx' in res['flags']: msvc_arch_flag = '/arch:AVX' if 'avx2' in res['flags']: msvc_arch_flag = '/arch:AVX2' # Preferences prefs.register_preferences( 'codegen.cpp', 'C++ compilation preferences',
def get_node_info_for_current_machine(): # TODO return NodeInfo(cpuinfo.get_cpu_info()['brand'], '4 GB')
def get_server_usage_stats(agent): yield From(agent.run_event.wait()) config = agent.config['metrics'] logger.info('starting "get_server_usage" task for "%s"', hostname) prev_stats = psutil.disk_io_counters(perdisk=True) prev_io_counters = psutil.net_io_counters(pernic=True) disk_partitions = psutil.disk_partitions() partition_mountpoint = dict() partitions = set() included_partitions = [part.device for part in disk_partitions] for dp in disk_partitions: if included_partitions and dp.device not in included_partitions: continue partitions.add(dp.device) partition_mountpoint[dp.device] = dp.mountpoint while agent.run_event.is_set(): yield From(asyncio.sleep(frequency)) try: #OpenStack processes the_processes = ['nova-novncproxy', 'nova-cert', 'nova-compute', 'nova-conductor', 'nova-api', 'neutron-openvswitch-agent', 'neutron-l3-agent', 'neutron-dhcp-agent', 'nova-scheduler', 'neutron-server', 'neutron-metadata-agent'] the_processes_to_check = ['nova-novncproxy', 'nova-cert', 'nova-compute', 'nova-conductor', 'nova-api', 'neutron-openvswitch-agent', 'neutron-l3-agent', 'neutron-dhcp-agent', 'nova-scheduler', 'neutron-server', 'neutron-metadata-agent'] cpu_percent = psutil.cpu_percent(interval=None) memory = psutil.virtual_memory() swap = psutil.swap_memory() loadavg = os.getloadavg() data = {'server_name': hostname, 'measurements': [], 'stats':{'platform': {}, 'hardware': {}, 'openstack': {}}} #data['measurements'].append({'name': 'cpu.total', 'value': psutil.cpu_count()}) data['measurements'].append({'name': 'cpu.percent', 'value': cpu_percent}) #data['measurements'].append({'name': 'memory.total', 'value': memory.total}) data['measurements'].append({'name': 'memory.available', 'value': memory.available}) data['measurements'].append({'name': 'memory.percent', 'value': memory.percent}) data['measurements'].append({'name': 'memory.used', 'value': memory.used}) data['measurements'].append({'name': 'memory.free', 'value': memory.free}) #data['measurements'].append({'name': 'swap.total', 'value': swap.total}) data['measurements'].append({'name': 'swap.used', 'value': swap.used}) data['measurements'].append({'name': 'swap.free', 'value': swap.free}) data['measurements'].append({'name': 'swap.percent', 'value': swap.percent}) data['measurements'].append({'name': 'loadavg.1', 'value': loadavg[0]}) data['measurements'].append({'name': 'loadavg.5', 'value': loadavg[1]}) data['measurements'].append({'name': 'loadavg.15', 'value': loadavg[2]}) curr_stats = psutil.disk_io_counters(perdisk=True) include_disks = None include_disks = [part.device for part in disk_partitions] for disk in curr_stats: if include_disks and disk not in include_disks: continue curr = curr_stats[disk] prev = prev_stats[disk] data['measurements'].append({'name': 'disk_io.read_count', 'tags': {'disk': disk}, 'value': curr.read_count - prev.read_count}) data['measurements'].append({'name': 'disk_io.write_count', 'tags': {'disk': disk}, 'value': curr.write_count - prev.write_count}) data['measurements'].append({'name': 'disk_io.read_bytes', 'tags': {'disk': disk}, 'value': curr.read_bytes - prev.read_bytes}) data['measurements'].append({'name': 'disk_io.write_bytes', 'tags': {'disk': disk}, 'value': curr.write_bytes - prev.write_bytes}) data['measurements'].append({'name': 'disk_io.read_time', 'tags': {'disk': disk}, 'value': curr.read_time - prev.read_time}) data['measurements'].append({'name': 'disk_io.write_time', 'tags': {'disk': disk}, 'value': curr.write_time - prev.write_time}) prev_stats = curr_stats for partition in partitions: disk_data = psutil.disk_usage(partition_mountpoint[partition]) data['measurements'].append({'name': 'partition_usage.total', 'tags': {'partition': partition}, 'value': disk_data.total}) data['measurements'].append({'name': 'partition_usage.used', 'tags': {'partition': partition}, 'value': disk_data.used}) data['measurements'].append({'name': 'partition_usage.free', 'tags': {'partition': partition}, 'value': disk_data.free}) data['measurements'].append({'name': 'partition_usage.percent', 'tags': {'partition': partition}, 'value': disk_data.percent}) curr_io_counters = psutil.net_io_counters(pernic=True) for interface in curr_io_counters: try: curr = curr_io_counters[interface] except: curr = 0 try: prev = prev_io_counters[interface] except: prev = 0 if curr and hasattr(curr, 'bytes_sent'): curr_bytes_sent = curr.bytes_sent else: curr_bytes_sent = 0 if curr and hasattr(curr, 'bytes_recv'): curr_bytes_recv = curr.bytes_recv else: curr_bytes_recv = 0 if prev and hasattr(prev, 'bytes_sent'): prev_bytes_sent = prev.bytes_sent else: prev_bytes_sent = 0 if prev and hasattr(prev, 'bytes_recv'): prev_bytes_recv = prev.bytes_recv else: prev_bytes_recv = 0 if curr and hasattr(curr, 'packets_sent'): curr_packets_sent = curr.packets_sent else: curr_packets_sent = 0 if curr and hasattr(curr, 'packets_recv'): curr_packets_recv = curr.packets_recv else: curr_packets_recv = 0 if prev and hasattr(prev, 'packets_sent'): prev_packets_sent = prev.packets_sent else: prev_packets_sent = 0 if prev and hasattr(prev, 'packets_recv'): prev_packets_recv = prev.packets_recv else: prev_packets_recv = 0 if curr and hasattr(curr, 'errin'): curr_errin = curr.errin else: curr_errin = 0 if curr and hasattr(curr, 'errout'): curr_errout = curr.errout else: curr_errout = 0 if prev and hasattr(prev, 'errin'): prev_errin = prev.errin else: prev_errin = 0 if prev and hasattr(prev, 'errout'): prev_errout = prev.errout else: prev_errout = 0 if curr and hasattr(curr, 'dropin'): curr_dropin = curr.dropin else: curr_dropin = 0 if curr and hasattr(curr, 'dropout'): curr_dropout = curr.dropout else: curr_dropout = 0 if prev and hasattr(prev, 'dropin'): prev_dropin = prev.dropin else: prev_dropin = 0 if prev and hasattr(prev, 'dropout'): prev_dropout = prev.dropout else: prev_dropout = 0 data['measurements'].append({'name': 'net_io.bytes_sent', 'tags': {'interface': interface}, 'value': curr_bytes_sent - prev_bytes_sent}) data['measurements'].append({'name': 'net_io.bytes_recv', 'tags': {'interface': interface}, 'value': curr_bytes_recv - prev_bytes_recv}) data['measurements'].append({'name': 'net_io.packets_sent', 'tags': {'interface': interface}, 'value': curr_packets_sent - prev_packets_sent}) data['measurements'].append({'name': 'net_io.packets_recv', 'tags': {'interface': interface}, 'value': curr_packets_recv - prev_packets_recv}) data['measurements'].append({'name': 'net_io.errin', 'tags': {'interface': interface}, 'value': curr_errin - prev_errin}) data['measurements'].append({'name': 'net_io.errout', 'tags': {'interface': interface}, 'value': curr_errout - prev_errout}) data['measurements'].append({'name': 'net_io.dropin', 'tags': {'interface': interface}, 'value': curr_dropin - prev_dropin}) data['measurements'].append({'name': 'net_io.dropout', 'tags': {'interface': interface}, 'value': curr_dropout - prev_dropout}) server_stats = platform.dist() data['stats']['platform']['dist'] = platform.dist()[0] + ' ' + platform.dist()[1] + ' ' + platform.dist()[2] data['stats']['platform']['kernel'] = platform.uname()[2] data['stats']['platform']['architecture'] = platform.architecture()[0] cpu_hw = cpuinfo.get_cpu_info() data['stats']['platform']['processor'] = {} data['stats']['platform']['processor']['qty'] = psutil.cpu_count() data['stats']['platform']['processor']['brand'] = cpuinfo.get_cpu_info()['brand'] data['stats']['platform']['processor']['count'] = cpuinfo.get_cpu_info()['count'] data['stats']['platform']['processor']['flags'] = cpuinfo.get_cpu_info()['flags'] data['stats']['platform']['memory'] = {} data['stats']['platform']['memory']['total'] = memory.total data['stats']['platform']['swap'] = {} data['stats']['platform']['swap']['total'] = swap.total processes = the_processes processes_to_check = the_processes_to_check try: p = subprocess.Popen(["nova-manage", "version"], stdout=subprocess.PIPE) version = p.communicate()[0] if version: data['stats']['openstack']['version'] = version.strip() except Exception, e: pass for p in psutil.process_iter(): try: try: process_name = p.name() except: process_name = p.name process_found = False result = False result = [process_found for process_found in processes_to_check if process_name in process_found] if len(result) > 0: process_name = result[0] if p.is_running(): is_running = 1 else: is_running = 0 data['measurements'].append({'name': 'openstack.processes.'+process_name+'.'+'cpu_percent', 'value': p.cpu_percent()}) data['measurements'].append({'name': 'openstack.processes.'+process_name+'.'+'memory_percent', 'value': p.memory_percent()}) data['measurements'].append({'name': 'openstack.processes.'+process_name+'.'+'num_threads', 'value': p.num_threads()}) data['measurements'].append({'name': 'openstack.processes.'+process_name+'.'+'is_running', 'value': is_running}) #data['measurements'].append({'name': 'openstack.processes.'+process_name+'.'+'create_time', 'value': create_time}) if p.is_running() and p.status != psutil.STATUS_ZOMBIE: process_status = 1 else: process_status = 0 data['measurements'].append({'name': 'openstack.processes.'+process_name+'.'+'up', 'tags': {'status': p.status()} ,'value': process_status}) logger.info('OUTPUT - %s', 'openstack.processes.'+process_name+'.'+'up') processes_to_check.remove(process_name) except psutil.Error: pass for process in processes_to_check: data['measurements'].append({'name': 'openstack.processes.'+process+'.'+'up', 'tags': {'status': 'down'}, 'value': 0}) # OpenStack Nova API if agent._config['openstack_credentials']['user'] and agent._config['openstack_credentials']['password'] and agent._config['openstack_credentials']['project'] and agent._config['openstack_credentials']['auth_url']: nova = client.Client(agent.nova_api_version, agent._config['openstack_credentials']['user'], agent._config['openstack_credentials']['password'], agent._config['openstack_credentials']['project'], agent._config['openstack_credentials']['auth_url']) if nova: #nova services try: services = nova.services.list() for service in services: status = 1 if service.state == 'up' else 0 data['measurements'].append({'name': 'openstack.nova-api.services.'+service.binary+'.'+'status', 'value': status}) except Exception, e: pass #availability zones try: avalability_zones = nova.availability_zones.list(True) for availability_zone in avalability_zones: status = 1 if availability_zone.zoneState['available'] is True else 0 data['measurements'].append({'name': 'openstack.nova-api.avalability_zones.status', 'tags': {'availability_zone': availability_zone.zoneName}, 'value': status}) except Exception, e: pass #hypervisor stats hypervisor_keys = {'count', 'current_workload', 'disk_available_least', 'free_disk_gb', 'free_ram_mb', 'local_gb', 'local_gb_used', 'memory_mb', 'memory_mb_used', 'running_vms', 'vcpus', 'vcpus_used'} try: stats = nova.hypervisor_stats.statistics() for key_name in hypervisor_keys: if hasattr(stats, key_name): data['measurements'].append({'name': 'openstack.nova-api.hypervisor_total.'+key_name, 'value': getattr(stats, key_name)}) if hasattr(stats, 'vcpus') and hasattr(stats, 'vcpus_used'): data['measurements'].append({'name': 'openstack.nova-api.hypervisor_total.vcpus_percent', 'value': round(100 * float(stats.vcpus_used)/float(stats.vcpus), 2)}) if hasattr(stats, 'local_gb') and hasattr(stats, 'local_gb_used'): data['measurements'].append({'name': 'openstack.nova-api.hypervisor_total.local_gb_percent', 'value': round(100 * float(stats.local_gb_used)/float(stats.local_gb), 2)}) if hasattr(stats, 'memory_mb') and hasattr(stats, 'memory_mb_used'): data['measurements'].append({'name': 'openstack.nova-api.hypervisor_total.memory_mb_percent', 'value': round(100 * float(stats.memory_mb_used)/float(stats.memory_mb), 2)}) except Exception, e: pass #server stats try: servers = nova.servers.list() for server in servers: value = agent.openstack_status[server.status] if not value: value = -1 data['measurements'].append({'name': 'openstack.nova-api.servers', 'tags': {'name': server.name, 'id': server.id, 'tenant_id': server.tenant_id, 'status': server.status}, 'value': value}) except Exception, e: pass
def cpu_info(): info = cpuinfo.get_cpu_info() return info['brand_raw']