def translate_wlist_to_scheduler_wlist(self, w_list_arg): """ Translate ubench custom node list format to scheduler custome node list format Args: w_list_arg: """ try: scheduler_interface = slurmi.SlurmInterface() except: # pylint:disable=bare-except print("Error!! Unable to load slurm module") scheduler_interface = None return None w_list = [] for element in w_list_arg: if element.isdigit(): elem_tuple = (int(element), None) w_list.append(elem_tuple) else: elem_tuple = ( scheduler_interface.get_nnodes_from_string(element), element) w_list.append(elem_tuple) return w_list
def test_available_nodes(mocker): """ docstring """ mock_popen = mocker.patch(".".join(MOCK_UTILS + ["Popen"]), side_effect=mockpopen) interface = slurm_i.SlurmInterface() node_list = interface.get_available_nodes(5) assert interface.get_available_nodes() assert len(node_list) == 3 assert node_list[0] == "node[0006-0008,0020-0021]"
def test_job_status(mocker): """ docstring """ mock_popen = mocker.patch( "ubench.scheduler_interfaces.slurm_interface.Popen", side_effect=mockpopen) interface = slurm_i.SlurmInterface() assert interface.get_jobs_state(['111', '222']) == { '175757': 'RUNNING', '26382': 'COMPLETED', '26938': 'COMPLETED' }
def test_job_status_cache(pytestconfig, mocker): """ we test memoize_disk decorator""" mock_popen = mocker.patch( "ubench.scheduler_interfaces.slurm_interface.Popen", side_effect=mockpopen) # cache files have a postfix based on the arguments used in the method's call md5_id = hashlib.md5(''.join(['222']).encode('utf-8')).hexdigest() cache_file = '/tmp/ubench_cache-{}-{}'.format(ubench.config.USER, md5_id) # the cache file exist if the test has failed before so we clean if os.path.isfile(cache_file): os.remove(cache_file) # we create a fake cache file: expected_results = { "175757": "RUNNING", "26382": "COMPLETED", "26938": "COMPLETED" } cache_contets = {'date': time.time(), 'data': expected_results} json.dump(cache_contets, open(cache_file, 'w')) interface = slurm_i.SlurmInterface() # we used cached values no slurm command is executed jobs_info = interface.get_jobs_state(['222']) assert not mock_popen.called assert jobs_info == expected_results # we invalidate cache cache = {'date': time.time() - 1000} json.dump(cache, open(cache_file, 'w')) jobs_info = interface.get_jobs_state(['111', '222']) # slurm command has been executed assert mock_popen.called
def write_bench_data(self, benchmark_id): """ TBD Args: benchmark_id (int): benchmark number """ # pylint: disable=too-many-locals, too-many-branches, too-many-statements try: scheduler_interface = slurmi.SlurmInterface() except: # pylint: disable=bare-except print('Warning!! Unable to load Slurm module') # pylint: disable=superfluous-parens scheduler_interface = None os.chdir(self.benchmark_path) output_dir = self.jube_xml_files.get_bench_outputdir() benchmark_rundir = self.get_bench_rundir(benchmark_id) jube_cmd = 'jube info ./{0} --id {1} --step execute'.format( output_dir, benchmark_id) cmd_output = tempfile.TemporaryFile() result_from_jube = Popen(jube_cmd, cwd=os.getcwd(), shell=True, stdout=cmd_output, universal_newlines=True) ret_code = result_from_jube.wait() # pylint: disable=unused-variable cmd_output.flush() cmd_output.seek(0) results = {} workpackages = re.findall(r'Workpackages(.*?)\n{2,}', cmd_output.read().decode('utf-8'), re.DOTALL)[0] workdirs = {} regex_workdir = r'^\s+(\d+).*(' + re.escape(output_dir) + r'.*work).*' for package in workpackages.split('\n'): temp_match = re.match(regex_workdir, package) if temp_match: id_workpackage = temp_match.group(1) path_workpackage = temp_match.group(2) workdirs[id_workpackage] = path_workpackage cmd_output.seek(0) parameterization = re.findall(r'ID:(.*?)(?=\n{3,}|\sID)', cmd_output.read().decode('utf-8') + '\n', re.DOTALL) for execution_step in parameterization: id_step = [x.strip() for x in execution_step.split('\n')][0] param_step = [x.strip() for x in execution_step.split('\n')][1:] results[id_step] = {} for parameter in param_step: temp_match = re.match(r'^\S+:', parameter) if temp_match: value = parameter.replace(temp_match.group(0), '') param = temp_match.group(0).replace(':', '') results[id_step][param] = value.strip() cmd_output.close() for key, value in list(results.items()): result_file_path = os.path.join(benchmark_rundir, 'result/ubench_results.dat') # We add the part of results which corresponds to a given execute with open(result_file_path) as csvfile: reader = csv.DictReader(csvfile) field_names = reader.fieldnames common_fields = list(set(value.keys()) & set(field_names)) result_fields = list(set(field_names) - set(common_fields)) temp_hash = {} for field in result_fields: temp_hash[field] = [] for row in reader: add_to_results = True for field in common_fields: if value[field] != row[field]: add_to_results = False break if add_to_results: for field in result_fields: temp_hash[field].append(row[field]) # When there is just value we transform the array in one value for field in result_fields: if len(temp_hash[field]) == 1: temp_hash[field] = temp_hash[field][0] results[key]['results_bench'] = temp_hash results[key]['context_fields'] = common_fields # Add job information to step execute job_file_path = os.path.join(workdirs[key], 'stdout') job_id = 0 with open(job_file_path, 'r') as job_file: for line in job_file: re_result = re.findall(r'\d+', line) if re_result: job_id = re_result[0] value['job_id_ubench'] = job_id if scheduler_interface: job_info = scheduler_interface.get_job_info(job_id) if job_info: value.update(job_info[-1]) results[key].update(value) break # Add metadata present on ubench.log field_pattern = re.compile('(.*) : (.*)') try: log_file = open(os.path.join(benchmark_rundir, 'ubench.log'), 'r') except IOError: print('Warning!! file ubench log was not found.' + 'Benchmark data result could not be created') return metadata = {} fields = field_pattern.findall(log_file.read()) for field in fields: metadata[field[0].strip()] = field[1].strip() bench_data = data_store_yaml.DataStoreYAML() bench_data.write(metadata, results, os.path.join(benchmark_rundir, 'bench_results.yaml'))
def translate_wlist_to_scheduler_wlist(self, w_list_arg): """ Translate ubench custom node list format to scheduler custome node list format TODO determine scheduler_interface from platform data. """ try: scheduler_interface = slurmi.SlurmInterface() except: print "Warning!! Unable to load slurm module" scheduler_interface = None return w_list = list(w_list_arg) for sub_wlist in w_list: sub_wlist_temp = list(sub_wlist) stride = 0 for idx, welem in enumerate(sub_wlist_temp): # Manage the all keyword that is meant to launch benchmarks on evry idle node catch = re.search(r'^all(\d+)$', str(welem)) idxn = idx + stride if catch: slice_size = int(catch.group(1)) available_nodes_list = scheduler_interface.get_available_nodes( slice_size) njobs = len(available_nodes_list) sub_wlist[idxn:idxn + 1] = zip([slice_size] * njobs, available_nodes_list) stride += njobs - 1 else: # Manage the cn[10,13-17] notation catch = re.search(r'^(\D+.*)$', str(welem)) if catch: nnodes_list = [ scheduler_interface.get_nnodes_from_string( catch.group(1)) ] nodes_list = [catch.group(1)] sub_wlist[idxn:idxn + 1] = zip(nnodes_list, nodes_list) else: # Manage the 2,4 notation that is needed to launch jobs # without defined node targets. catch = re.search(r'^([\d+,]*)([\d]+)$', str(welem)) if catch: nnodes_list = [ int(x) for x in re.split(',', str(welem)) ] sub_wlist[idxn:idxn + 1] = zip(nnodes_list, [None] * len(nnodes_list)) stride += len(nnodes_list) - 1 else: # Manage the 2,4,cn[200-205] notation that is used # to get cn[200-201] cn[200-203] catch = re.search(r'^([\d+,]*[\d+]),(.*)$', str(welem)) if catch: nnodes_list = [ int(x) for x in re.split(',', catch.group(1)) ] nodes_list = str(catch.group(2)) sub_wlist[idxn:idxn+1]\ = zip(nnodes_list, \ scheduler_interface.\ get_truncated_nodes_lists(nnodes_list, nodes_list)) stride += len(nnodes_list) - 1 else: raise Exception( str(welem) + 'format is not correct') # Flatten the w_list w_list = [item for sublist in w_list for item in sublist] return w_list
def test_emptylist(): """ docstring """ interface = slurm_i.SlurmInterface() assert not interface.get_available_nodes()
def run(self, opts): # pylint: disable=arguments-differ """ Run benchmark on a given platform and write a ubench.log file in the benchmark run directory. Args: opt_dict (dict): dictionary with the options sent to unclebench """ # pylint: disable=dangerous-default-value, too-many-locals, too-many-branches self._init_run_dir(self.platform) if not opts['foreground']: print('---- Launching benchmark in background') try: # run_dir, ID, updated_params = self.benchmarking_api.run(opts) j_job, updated_params = self.benchmarking_api.run(opts) except (RuntimeError, OSError) as rerror: print('---- Error launching benchmark :') print(str(rerror)) raise for name, old_value, new_value in updated_params: print('---- {0} parameter was modified from {1} to {2} for this run'.format(name, old_value, new_value)) print("---- benchmark run directory: {}".format(j_job.result_path)) logfile_path = os.path.join(j_job.result_path, 'ubench.log') date = time.strftime("%c") with open(logfile_path, 'w') as logfile: logfile.write('Benchmark_name : {0} \n'.format(self.benchmark)) logfile.write('Platform : {0} \n'.format(self.platform)) logfile.write('ID : {0} \n'.format(j_job.jubeid)) logfile.write('Date : {0} \n'.format(date)) logfile.write('Run_directory : {0} \n'.format(j_job.result_path)) if 'raw_cli' in opts: logfile.write('cmdline : {0} \n'.format(' '.join(opts['raw_cli']))) print("---- Use the following command to follow benchmark progress: "\ "ubench log -p {0} -b {1} -i {2}".format(self.platform, self.benchmark, j_job.jubeid)) if opts['foreground']: print('---- Waiting benchmark to finish running') #waiting for compilation while j_job.jube_returncode is None: time.sleep(5) print('---- Waiting benchmark compilation') #waiting for jobs execution job_ids = j_job.job_ids if not job_ids: print("Error: No job ids found") scheduler_interface = slurmi.SlurmInterface() job_states = ['RUNNING'] finish_states = ['COMPLETED', 'FAILED', 'CANCELLED'] while job_states: job_req = scheduler_interface.get_jobs_state(job_ids) # failing jobs failed = [job_n for job_n, job_s in job_req.items() if job_s == 'FAILED'] if failed: for job_n in failed: print("Job {} has failed".format(job_n)) job_states = [job_s for job_s in job_req.values() if job_s not in finish_states] if job_states: print("Wating for jobs id: {}".format(",".join(job_req.keys()))) time.sleep(60) print('---- All jobs or processes in background have finished')
def _write_bench_data(self, benchmark_id): # pylint: disable=too-many-locals ''' Generates benchmarks results data Writes bench_results.yaml Args: benchmark_id (int): id of the benchmark Returns: (dict) mapping between Jube execution directories and result values ''' outpath = self.jube_files.get_bench_outputdir() benchmark_rundir = self.get_bench_rundir(benchmark_id, outpath) context_names, context = self._get_execution_context(benchmark_id) results, field_names = self._get_results(benchmark_rundir, context_names) scheduler_interface = slurmi.SlurmInterface() common_fields = [n for n in context_names if n in field_names] map_dir = {} for exec_id, values in context.items(): key_results = hashlib.md5(''.join([values[n] for n in common_fields]).encode('utf-8')) key = key_results.hexdigest() if key not in results: results[key] = 'failed' context[exec_id]['results_bench'] = results[key_results.hexdigest()] context[exec_id]['context_fields'] = common_fields exec_dir = "{}_execute".format(values['jube_wp_id'].zfill(6)) map_dir[exec_dir] = results[key_results.hexdigest()] job_file_path = os.path.join(values['jube_wp_abspath'], 'stdout') with open(job_file_path, 'r') as job_file: for line in job_file: re_result = re.findall(r'\d+', line) if re_result: job_id = re_result[0] values['job_id_ubench'] = job_id if scheduler_interface: job_info = scheduler_interface.get_job_info(job_id) if job_info: values.update(job_info[-1]) context[exec_id].update(values) break try: with open(os.path.join(benchmark_rundir, 'ubench.log'), 'r') as logf: field_pattern = re.compile('(.*) : (.*)') fields = field_pattern.findall(logf.read()) metadata = {name.strip():val.strip() for name, val in fields} except IOError: metadata = {'Benchmark_name': self.benchmark, 'Date' : time.strftime("%c"), 'Platform' : self.platform, 'Run_directory' : benchmark_rundir, 'cmdline' : 'Campaign'} bench_data = data_store_yaml.DataStoreYAML() self.results_file = os.path.join(benchmark_rundir, 'bench_results.yaml') bench_data.write(metadata, context, self.results_file) return map_dir