def get_pbs_command(self, pbs_script_filename): # total parallel process np = self.case.proc # processes per node, default value 1 ppn = self.ppn # number of physical machines to be reserved nodes = float(np) / ppn if int(nodes) != nodes: Printer.wrn('Warning: NP is not divisible by PPN') nodes = int(math.ceil(nodes)) # memory limit mem = int(self.case.memory_limit * ppn) # time_limit walltime = datetime.timedelta(seconds=int(self.case.time_limit)) # get queue, if only -q is set, 'default' queue will be set # otherwise given string value will be used queue = self.queue queue = 'default' if type(queue) is not str else queue # command command = [ 'qsub', '-l', 'nodes={nodes}:ppn={ppn}'.format(**locals()), # :nfs4 option may be set '-l', 'mem={mem}mb'.format(**locals()), '-l', 'walltime={walltime}'.format(**locals()), '-l', 'place=infiniband', '-q', '{queue}'.format(**locals()), '-o', self.case.fs.pbs_output, pbs_script_filename ] return command
def get_result(self): if self.returncode != 0: if self.returncode != 0: Printer.out("{} Run error, case: {p.name}", RuntestParser.get_status_line(self), p=self) RuntestParser.print_log_file(self.log, self.n_lines) return elif self.batch: RuntestParser.print_log_file(self.log, self.n_lines)
def exit_usage(self, msg=None, exit_code=1, *args, **kwargs): if msg: Printer.err('Error: {}'.format(msg), *args, **kwargs) Printer.err(self.usage()) if exit_code is not None: raise ArgumentException(exit_code, msg)
def __exit__(self, exc_type=None, exc_val=None, exc_tb=None): if not self.active: return False self.running = False self.update() # print \n if in dynamic mode (ending) if self.dynamic: Printer.out() return False
def on_complete(self, pypy=None): # print either error that command failed or on_complete info id exists if self.pypy.returncode > 0: Printer.err('Error! Command ({process.pid}) ended with {process.returncode}'. format(process=self.pypy.executor.process)) Printer.err(Command.to_string(self.pypy.executor.command)) elif self.end_fmt: Printer.out(self.end_fmt.format(**dict(self=self))) if not self.pypy.progress: Printer.separator() output = self.pypy.executor.output.read() if output: Printer.out(output)
def get_pbs_command(self, pbs_script_filename): # total parallel process np = self.case.proc # processes per node, default value 1 ppn = self.ppn # number of physical machines to be reserved nodes = float(np) / ppn if int(nodes) != nodes: Printer.wrn('Warning: NP is not divisible by PPN') nodes = int(math.ceil(nodes)) # memory limit mem = int(self.case.memory_limit * ppn) # time_limit walltime = datetime.timedelta(seconds=int(self.case.time_limit)) # get queue, if only -q is set, 'default' queue will be set # otherwise given string value will be used queue = self.queue queue = 'default' if type(queue) is not str else queue # command command = [ 'qsub', '-l', 'nodes={nodes}:ppn={ppn}'.format( **locals()), # :nfs4 option may be set '-l', 'mem={mem}mb'.format(**locals()), '-l', 'walltime={walltime}'.format(**locals()), '-l', 'place=infiniband', '-q', '{queue}'.format(**locals()), '-o', self.case.fs.pbs_output, pbs_script_filename ] return command
def get_pbs_module(hostname=None): """ :rtype : scripts.pbs.modules.pbs_tarkil_cesnet_cz """ pbs_module_path = None if not hostname: hostname = platform.node() # try to get name from json file host_file = Paths.join(Paths.source_dir(), 'host_table.json') if Paths.exists(host_file): with open(host_file, 'r') as fp: hosts = json.load(fp) pbs_module_path = hosts.get(hostname, None) if not pbs_module_path: hostname = hostname.replace('.', '_') pbs_module_path = 'pbs_{}'.format(hostname) Printer.wrn('Warning! no host specified assuming module {}', pbs_module_path) # try to get pbs_module return importlib.import_module('scripts.pbs.modules.{}'.format(pbs_module_path))
def do_work(parser, args=None): """ :type args: list :type parser: utils.argparser.ArgParser """ # parse arguments options, others, rest = parser.parse(args) # check commands if not rest: parser.exit_usage('No command specified!', exit_code=1) # check limits (at least one limit must be set) if (options.time_limit, options.memory_limit) == (None, None): parser.exit_usage('No limits specified!', exit_code=2) # prepare executor executor = BinExecutor(rest) pypy = PyPy(executor, progress=not options.batch) # set limits pypy.error_monitor.message = None pypy.limit_monitor.time_limit = options.time_limit pypy.limit_monitor.memory_limit = options.memory_limit # turn on output if options.batch: pypy.info_monitor.stdout_stderr = None else: pypy.info_monitor.stdout_stderr = Paths.temp_file('exec-limit.log') # start process Printer.separator() pypy.start() pypy.join() return pypy.returncode
def print_log_file(cls, f, n_lines): log_file = IO.read(f) if log_file: if n_lines == 0: Printer.out('Full log from file {}:', f) else: Printer.out('Last {} lines from file {}:', abs(n_lines), f) Printer.wrn(format_n_lines(log_file.rstrip(), -n_lines, indent=Printer.indent * ' '))
def format_n_lines_(text, line_prefix='## ', line_suffix='', first_line='#' * 60, last_line='#' * 60, empty="<file is empty>"): """ Format given lines and adds prefix to them :param text: :param line_prefix: :param line_suffix: :param first_line: :param last_line: :param empty: :return: """ n_lines = 0 if not Printer.batched.is_muted() else -20 indent = Printer.indent() # empty output if text is None or not text: text = '{:-^54s}'.format(empty) line_suffix = line_prefix[::-1] # ensure we have list or iterable text = text.splitlines() if type(text) is str else text # positive n_lines (first n lines) if n_lines > 0: text = text[:n_lines] # negative n_lines (last n lines) elif n_lines < 0: text = text[n_lines:] # otherwise all lines (0) result = list() if first_line: result.append(indent + first_line) for line in text: result.append(indent + line_prefix + line + line_suffix) if last_line: result.append(indent + last_line) return '\n'.join(result)
def create_comparisons(self): comparisons = ComparisonMultiThread(self.case.fs.ndiff_log) comparisons.thread_name_property = True for check_rule in self.case.check_rules: method = str(check_rule.keys()[0]) module = getattr(file_comparison, 'Compare{}'.format(method.capitalize()), None) comp_data = check_rule[method] if not module: Printer.err('Warning! No module for check_rule method "{}"', method) continue pairs = self._get_ref_output_files(comp_data) if pairs: for pair in pairs: command = module.get_command(*pair, **comp_data) pm = PyPy(BinExecutor(command), progress=True) # if we fail, set error to 13 pm.custom_error = 13 pm.info_monitor.active = False pm.limit_monitor.active = False pm.progress_monitor.active = False pm.error_monitor.message = 'Error! Comparison using method {} failed!'.format(method) # catch output pm.executor.output = OutputMode.variable_output() pm.full_output = self.case.fs.ndiff_log path = Paths.path_end_until(pair[0], 'ref_output') test_name = Paths.basename(Paths.dirname(Paths.dirname(self.case.fs.ref_output))) size = Paths.filesize(pair[0], True) pm.name = '{}: {} ({})'.format(test_name, path, size) comparisons.add(pm) return comparisons
def print_log_file(cls, f, n_lines): log_file = IO.read(f) if log_file: if n_lines == 0: Printer.out('Full log from file {}:', f) else: Printer.out('Last {} lines from file {}:', abs(n_lines), f) Printer.wrn( format_n_lines(log_file.rstrip(), -n_lines, indent=Printer.indent * ' '))
def run_local_mode(debug=False): global arg_options, arg_others, arg_rest proc, time_limit, memory_limit = get_args() total = len(proc) if total == 1: pypy = run_local_mode_one(proc[0], time_limit, memory_limit) GlobalResult.returncode = pypy.returncode else: # optionally we use counter progress = ProgressCounter('Running {:02d} of {total:02d}') for p in proc: Printer.separator() progress.next(locals()) Printer.separator() Printer.open() pypy = run_local_mode_one(p, time_limit, memory_limit) Printer.close() GlobalResult.returncode = max(GlobalResult.returncode, pypy.returncode) return GlobalResult.returncode if not debug else pypy
def on_update(self, pypy=None): if self.terminated: return if self.time_limit: try: runtime = self.process.runtime() if runtime > self.time_limit: Printer.out() Printer.err( 'Error: Time limit exceeded! {:1.2f}s of runtime, {:1.2f}s allowed'.format( runtime, self.time_limit ) ) self.terminated_cause = 'TIME_LIMIT' self.terminated = True self.process.secure_kill() return except AttributeError as e2: pass if self.memory_limit: try: memory_usage = self.process.memory_usage() if memory_usage > self.memory_limit: Printer.out() Printer.err('Error: Memory limit exceeded! {:1.2f}MB used, {:1.2f}MB allowed'.format( memory_usage, self.memory_limit ) ) self.terminated_cause = 'MEMORY_LIMIT' self.terminated = True self.process.secure_kill() return # except NoSuchProcess as e1: # pass except AttributeError as e2: pass
def run_local_mode_one(proc, time_limit, memory_limit): if proc == 0: command = arg_rest[1:] else: command = [arg_rest[0], '-np', proc] + arg_rest[1:] n_lines = 0 if arg_options.batch else 10 pypy = PyPy(BinExecutor(command)) # set limits pypy.limit_monitor.time_limit = time_limit pypy.limit_monitor.memory_limit = memory_limit pypy.progress = not arg_options.batch pypy.info_monitor.deactivate() pypy.error_monitor.deactivate() # catch output to variable # in batch mode we will keep the files # otherwise we will keep logs only on error log_file = Paths.temp_file('exec-parallel-{date}-{time}-{rnd}.log') pypy.executor.output = OutputMode.variable_output() pypy.full_output = log_file # start and wait for exit pypy.start() pypy.join() # add result to global json result GlobalResult.add(pypy) # in batch mode or on error if not pypy.with_success() or arg_options.batch: content = pypy.executor.output.read() IO.write(log_file, content) Printer.close() Printer.out(format_n_lines(content, indent=' ', n_lines=-n_lines)) Printer.open() return pypy
def next(self, attributes): self.i += 1 Printer.out(self.fmt.format( self.i, **attributes ))
def get_result(self): if self.clean.returncode != 0: Printer.out("{} Could not clean directory '{c[dir]}': {c[error]}", self.get_status_line(self.clean), c=self.clean) return if self.pypy.returncode != 0: Printer.out("{} Run error, case: {p[name]}", self.get_status_line(self.pypy), p=self.pypy) self.print_log_file(self.pypy.log, self.n_lines) return elif self.batch: self.print_log_file(self.pypy.log, self.n_lines) if self.comp.returncode not in (0, None): Printer.out("{} Compare error, case: {p[name]}, Details: ", self.get_status_line(self.comp), p=self.pypy) self.print_log_file(self.pypy.log, self.n_lines) Printer.open(2) for c in self.comp.tests: rc = c.returncode if rc == 0: Printer.out('[{:^6}]: {}', 'OK', c.name) else: Printer.out('[{:^6}]: {}', 'FAILED', c.name) Printer.close(2) return elif self.batch: self.print_log_file(self.comp.log, self.n_lines)
def run_pbs_mode(configs, debug=False): """ :type debug: bool :type configs: scripts.config.yaml_config.ConfigPool """ global arg_options, arg_others, arg_rest pbs_module = get_pbs_module(arg_options.host) Printer.dynamic_output = not arg_options.batch Printer.dyn('Parsing yaml files') jobs = list() """ :type: list[(str, PBSModule)] """ for yaml_file, yaml_config in configs.files.items(): for case in yaml_config.get_one(yaml_file): pbs_run = pbs_module.Module(case) pbs_run.queue = arg_options.get('queue', True) pbs_run.ppn = arg_options.get('ppn', 1) pbs_content = create_pbs_job_content(pbs_module, case) IO.write(case.fs.pbs_script, pbs_content) qsub_command = pbs_run.get_pbs_command(case.fs.pbs_script) jobs.append((qsub_command, pbs_run)) # start jobs Printer.dyn('Starting jobs') total = len(jobs) job_id = 0 multijob = MultiJob(pbs_module.ModuleJob) for qsub_command, pbs_run in jobs: job_id += 1 Printer.dyn('Starting jobs {:02d} of {:02d}', job_id, total) output = subprocess.check_output(qsub_command) job = pbs_module.ModuleJob.create(output, pbs_run.case) job.full_name = "Case {}".format(pbs_run.case) multijob.add(job) Printer.out() Printer.out('{} job/s inserted into queue', total) # # first update to get more info about multijob jobs Printer.out() Printer.separator() Printer.dyn('Updating job status') multijob.update() # print jobs statuses Printer.out() if not arg_options.batch: multijob.print_status() Printer.separator() Printer.dyn(multijob.get_status_line()) returncodes = dict() # wait for finish while multijob.is_running(): Printer.dyn('Updating job status') multijob.update() Printer.dyn(multijob.get_status_line()) # if some jobs changed status add new line to dynamic output remains jobs_changed = multijob.get_all(status=JobState.COMPLETED) if jobs_changed: Printer.out() Printer.separator() # get all jobs where was status update to COMPLETE state for job in jobs_changed: returncodes[job] = finish_pbs_job(job, arg_options.batch) if jobs_changed: Printer.separator() Printer.out() # after printing update status lets sleep for a bit if multijob.is_running(): time.sleep(5) Printer.out(multijob.get_status_line()) Printer.out('All jobs finished') # get max return code or number 2 if there are no returncodes returncode = max(returncodes.values()) if returncodes else 2 sys.exit(returncode)
def do_work(parser, args=None, debug=False): """ :type parser: utils.argparser.ArgParser """ # parse arguments global arg_options, arg_others, arg_rest arg_options, arg_others, arg_rest = parser.parse(args) Paths.format = PathFormat.ABSOLUTE Paths.base_dir('' if not arg_options.root else arg_options.root) # configure printer Printer.batch_output = arg_options.batch Printer.dynamic_output = not arg_options.batch # we need flow123d, mpiexec and ndiff to exists in LOCAL mode if not arg_options.queue and not Paths.test_paths('flow123d', 'mpiexec', 'ndiff'): Printer.err('Missing obligatory files! Exiting') GlobalResult.error = "missing obligatory files" sys.exit(1) # test yaml args if not arg_others: parser.exit_usage('Error: No yaml files or folder given') GlobalResult.error = "no yaml files or folder given" sys.exit(2) all_yamls = list() for path in arg_others: if not Paths.exists(path): Printer.err('Error! given path does not exists, ignoring path "{}"', path) GlobalResult.error = "path does not exist" sys.exit(3) if Paths.is_dir(path): all_yamls.extend(Paths.walk(path, filters=[ PathFilters.filter_type_is_file(), PathFilters.filter_ext('.yaml'), PathFilters.filter_not(PathFilters.filter_name('config.yaml')) ])) else: all_yamls.append(path) Printer.out("Found {} .yaml file/s", len(all_yamls)) if not all_yamls: Printer.wrn('Warning! No yaml files found in locations: \n {}', '\n '.join(arg_others)) GlobalResult.error = "no yaml files or folders given" sys.exit(3) configs = read_configs(all_yamls) configs.update( proc=arg_options.cpu, time_limit=arg_options.time_limit, memory_limit=arg_options.memory_limit, ) if arg_options.queue: Printer.out('Running in PBS mode') return run_pbs_mode(configs, debug) else: Printer.out('Running in LOCAL mode') return run_local_mode(configs, debug)
def run_local_mode(configs, debug=False): """ :type debug: bool :type configs: scripts.config.yaml_config.ConfigPool """ global arg_options, arg_others, arg_rest runner = ParallelThreads(arg_options.parallel) runner.stop_on_error = not arg_options.keep_going for yaml_file, yaml_config in configs.files.items(): for case in yaml_config.get_one(yaml_file): # create main process which first clean output dir # and then execute test following with comparisons multi_process = create_process_from_case(case) runner.add(multi_process) # run! runner.start() while runner.is_running(): time.sleep(1) Printer.separator() Printer.out('Summary: ') Printer.open() for thread in runner.threads: multithread = thread """ :type: RuntestMultiThread """ returncode = multithread.returncode GlobalResult.add(multithread) if multithread.clean.with_error(): Printer.out("[{:^6}]:{:3} | Could not clean directory '{}': {}", 'ERROR', multithread.clean.returncode, multithread.clean.dir, multithread.clean.error) continue if not multithread.pypy.with_success(): Printer.out("[{:^6}]:{:3} | Run error, case: {}", multithread.pypy.returncode_map.get(str(multithread.pypy.returncode), 'ERROR'), multithread.pypy.returncode, multithread.pypy.case.to_string()) continue if multithread.comp.with_error(): Printer.out("[{:^6}]:{:3} | Compare error, case: {}, Details: ", 'FAILED', multithread.comp.returncode, multithread.pypy.case.to_string()) Printer.open(2) for t in multithread.comp.threads: if t: Printer.out('[{:^6}]: {}', 'OK', t.name) else: Printer.out('[{:^6}]: {}', 'FAILED', t.name) Printer.close(2) continue Printer.out("[{:^6}]:{:3} | Test passed: {}", 'PASSED', multithread.pypy.returncode, multithread.pypy.case.to_string()) Printer.close() # exit with runner's exit code GlobalResult.returncode = runner.returncode return runner if debug else runner.returncode
def print_status(self): for item in self.items: Printer.out(str(item))
def update(self): self.start_time = self.start_time or time.time() Printer.dyn(self.format, self.elapsed, **self.format_args) if not self.dynamic: Printer.out()
def finish_pbs_job(job, batch): """ :type job: scripts.pbs.job.Job """ # try to get more detailed job status job.is_active = False job_output = IO.read(job.case.fs.json_output) if job_output: job_json = JsonParser(json.loads(job_output), batch) if job_json.returncode == 0: job.status = JobState.EXIT_OK Printer.out('OK: Job {}({}) ended', job, job.full_name) Printer.open() # in batch mode print all logs if batch: Printer.open() for test in job_json.tests: test.get_result() Printer.close() Printer.close() else: job.status = JobState.EXIT_ERROR Printer.out('ERROR: Job {}({}) ended', job, job.full_name) # in batch mode print all logs Printer.open() for test in job_json.tests: test.get_result() Printer.close() else: # no output file was generated assuming it went wrong job.status = JobState.EXIT_ERROR Printer.out('ERROR: Job {} ended (no output file found). Case: {}', job, job.full_name) Printer.out(' pbs output: ') Printer.out(format_n_lines(IO.read(job.case.fs.pbs_output), 0)) return 0 if job.status == JobState.EXIT_OK else 1
def run_pbs_mode(debug=False): pbs_module = get_pbs_module() jobs = prepare_pbs_files(pbs_module) if debug: return 0 # start jobs Printer.dyn('Starting jobs') total = len(jobs) job_id = 0 multijob = MultiJob(pbs_module.ModuleJob) for qsub_command, pbs_run in jobs: job_id += 1 Printer.dyn('Starting jobs {:02d} of {:02d}', job_id, total) output = subprocess.check_output(qsub_command) job = pbs_module.ModuleJob.create(output, pbs_run.case) job.full_name = "Case {}".format(pbs_run.case) multijob.add(job) Printer.out() Printer.out('{} job/s inserted into queue', total) # # first update to get more info about multijob jobs Printer.out() Printer.separator() Printer.dyn('Updating job status') multijob.update() # print jobs statuses Printer.out() if not arg_options.batch: multijob.print_status() Printer.separator() Printer.dyn(multijob.get_status_line()) returncodes = dict() # wait for finish while multijob.is_running(): Printer.dyn('Updating job status') multijob.update() Printer.dyn(multijob.get_status_line()) # if some jobs changed status add new line to dynamic output remains jobs_changed = multijob.get_all(status=JobState.COMPLETED) if jobs_changed: Printer.out() Printer.separator() # get all jobs where was status update to COMPLETE state for job in jobs_changed: returncodes[job] = finish_pbs_job(job, arg_options.batch) if jobs_changed: Printer.separator() Printer.out() # after printing update status lets sleep for a bit if multijob.is_running(): time.sleep(5) Printer.out(multijob.get_status_line()) Printer.out('All jobs finished') # get max return code or number 2 if there are no returncodes return max(returncodes.values()) if returncodes else 2
def on_complete(self, pypy=None): if self.pypy.returncode > 0: if self.message: Printer.separator() Printer.open() Printer.out(self.message) else: Printer.open() # if file pointer exist try to read errors and outputs output = self.pypy.executor.output.read() if output: if self.pypy.full_output: Printer.out('Output (last {} lines, rest in {}): ', self.tail, Paths.abspath(self.pypy.full_output)) else: Printer.out('Output (last {} lines): ', self.tail) Printer.err(format_n_lines(output, -self.tail, indent=Printer.indent * ' ')) Printer.close()
def get_result(self): if self.clean.returncode != 0: Printer.out( "{} Could not clean directory '{c[dir]}': {c[error]}", self.get_status_line(self.clean), c=self.clean) return if self.pypy.returncode != 0: Printer.out("{} Run error, case: {p[name]}", self.get_status_line(self.pypy), p=self.pypy) self.print_log_file(self.pypy.log, self.n_lines) return elif self.batch: self.print_log_file(self.pypy.log, self.n_lines) if self.comp.returncode not in (0, None): Printer.out("{} Compare error, case: {p[name]}, Details: ", self.get_status_line(self.comp), p=self.pypy) self.print_log_file(self.pypy.log, self.n_lines) Printer.open(2) for c in self.comp.tests: rc = c.returncode if rc == 0: Printer.out('[{:^6}]: {}', 'OK', c.name) else: Printer.out('[{:^6}]: {}', 'FAILED', c.name) Printer.close(2) return elif self.batch: self.print_log_file(self.comp.log, self.n_lines)
def on_start(self, pypy=None): if self.start_fmt: Printer.out(self.start_fmt.format(**dict(self=self)))